First shot at nv4 profile. trunk
authorRyan C. Gordon <icculus@icculus.org>
Sun, 06 Jul 2008 22:01:28 -0400
branchtrunk
changeset 431 0d0cbe10db02
parent 430 2756be459ea1
child 432 6c59f6c0456a
First shot at nv4 profile.
mojoshader.c
mojoshader.h
mojoshader_opengl.c
--- a/mojoshader.c	Sun Jul 06 22:01:15 2008 -0400
+++ b/mojoshader.c	Sun Jul 06 22:01:28 2008 -0400
@@ -195,7 +195,8 @@
 
 // one emit function for attributes in each profile.
 typedef void (*emit_attribute)(Context *ctx, RegisterType regtype, int regnum,
-                               MOJOSHADER_usage usage, int index, int wmask);
+                               MOJOSHADER_usage usage, int index, int wmask,
+                               int flags);
 
 // one args function for each possible sequence of opcode arguments.
 typedef int (*args_function)(Context *ctx);
@@ -417,11 +418,13 @@
     int sampler_count;
     RegisterList samplers;
     VariableList *variables;  // variables to register mapping.
+    int centroid_allowed:1;
     int have_ctab:1;
     int determined_constants_arrays:1;
     int predicated:1;
     int support_nv2:1;
     int support_nv3:1;
+    int support_nv4:1;
     int support_glsl120:1;
     int glsl_generated_lit_opcode:1;
 };
@@ -797,12 +800,13 @@
 
 static void add_attribute_register(Context *ctx, const RegisterType rtype,
                                 const int regnum, const MOJOSHADER_usage usage,
-                                const int index, const int writemask)
+                                const int index, const int writemask, int flags)
 {
     RegisterList *item = reglist_insert(ctx, &ctx->attributes, rtype, regnum);
     item->usage = usage;
     item->index = index;
     item->writemask = writemask;
+    item->misc = flags;
 } // add_attribute_register
 
 static inline void add_sampler(Context *ctx, const RegisterType rtype,
@@ -1316,7 +1320,8 @@
 
 
 static void emit_D3D_attribute(Context *ctx, RegisterType regtype, int regnum,
-                               MOJOSHADER_usage usage, int index, int wmask)
+                               MOJOSHADER_usage usage, int index, int wmask,
+                               int flags)
 {
     // no-op.
 } // emit_D3D_attribute
@@ -1705,7 +1710,8 @@
 static void emit_PASSTHROUGH_uniform(Context *ctx, RegisterType t, int n,
                                      int arraybase, int arraysize) {}
 static void emit_PASSTHROUGH_attribute(Context *ctx, RegisterType t, int n,
-                                       MOJOSHADER_usage u, int i, int w) {}
+                                       MOJOSHADER_usage u, int i, int w,
+                                       int f) {}
 
 static const char *get_PASSTHROUGH_varname(Context *ctx, RegisterType rt, int regnum)
 {
@@ -1912,11 +1918,8 @@
 
     // MSDN says MOD_PP is a hint and many implementations ignore it. So do we.
 
-    if (arg->result_mod & MOD_CENTROID)
-    {
-        fail(ctx, "MOD_CENTROID unsupported");  // !!! FIXME
-        return "";
-    } // if
+    // CENTROID only allowed in DCL opcodes, which shouldn't come through here.
+    assert((arg->result_mod & MOD_CENTROID) == 0);
 
     if (ctx->predicated)
     {
@@ -2442,7 +2445,8 @@
 } // emit_GLSL_sampler
 
 static void emit_GLSL_attribute(Context *ctx, RegisterType regtype, int regnum,
-                                MOJOSHADER_usage usage, int index, int wmask)
+                                MOJOSHADER_usage usage, int index, int wmask,
+                                int flags)
 {
     // !!! FIXME: this function doesn't deal with write masks at all yet!
     const char *varname = get_GLSL_varname(ctx, regtype, regnum);
@@ -2451,6 +2455,8 @@
     const char *arrayright = "";
     char index_str[16] = { '\0' };
 
+    //assert((flags & MOD_PP) == 0);  // !!! FIXME: is PP allowed?
+
     if (index != 0)  // !!! FIXME: a lot of these MUST be zero.
         snprintf(index_str, sizeof (index_str), "%u", (uint) index);
 
@@ -2565,6 +2571,12 @@
     {
         // samplers DCLs get handled in emit_GLSL_sampler().
 
+        if (flags & MOD_CENTROID)  // !!! FIXME
+        {
+            failf(ctx, "centroid unsupported in %s profile", ctx->profile->name);
+            return;
+        } // if
+
         if (regtype == REG_TYPE_COLOROUT)
             usage_str = "gl_FragColor";
 
@@ -3675,19 +3687,31 @@
     const DestArgInfo *arg = &ctx->dest_arg;
 
     const char *sat_str = "";
-    // The "_SAT" modifier is only available in fragment shaders, but we'll
-    //  fake it for them later in emit_ARB1_dest_modifiers() ...
-    if ( (arg->result_mod & MOD_SATURATE) && (shader_is_pixel(ctx)) )
-        sat_str = "_SAT";
-
-    // no partial precision (MOD_PP), but that's okay.
-
-    if (arg->result_mod & MOD_CENTROID)
-    {
-        fail(ctx, "dest register MOD_CENTROID currently unsupported in arb1");
-        return "";
+    if (arg->result_mod & MOD_SATURATE)
+    {
+        // nv4 can use ".SAT" in all program types.
+        // For less than nv4, the "_SAT" modifier is only available in
+        //  fragment shaders. Every thing else will fake it later in
+        //  emit_ARB1_dest_modifiers() ...
+        if (ctx->support_nv4)
+            sat_str = ".SAT";
+        else if (shader_is_pixel(ctx))
+            sat_str = "_SAT";
     } // if
 
+    const char *pp_str = "";
+    if (arg->result_mod & MOD_PP)
+    {
+        // Most ARB1 profiles can't do partial precision (MOD_PP), but that's
+        //  okay. The spec says lots of Direct3D implementations ignore the
+        //  flag anyhow.
+        if (ctx->support_nv4)
+            pp_str = "H";
+    } // if
+
+    // CENTROID only allowed in DCL opcodes, which shouldn't come through here.
+    assert((arg->result_mod & MOD_CENTROID) == 0);
+
     char regnum_str[16];
     const char *regtype_str = get_ARB1_register_string(ctx, arg->regtype,
                                                        arg->regnum, regnum_str,
@@ -3726,7 +3750,7 @@
     } // if
 
     char *retval = get_scratch_buffer(ctx);
-    snprintf(retval, SCRATCH_BUFFER_SIZE, "%s %s%s%s", sat_str,
+    snprintf(retval, SCRATCH_BUFFER_SIZE, "%s%s %s%s%s", pp_str, sat_str,
              regtype_str, regnum_str, writemask_str);
     // !!! FIXME: make sure the scratch buffer was large enough.
     return retval;
@@ -3757,13 +3781,16 @@
             output_line(ctx, "MUL%s, %s, %s;", dst, varname, multiplier);
     } // if
 
-    if ( (arg->result_mod & MOD_SATURATE) && (!shader_is_pixel(ctx)) )
-    {
-        const char *varname = get_ARB1_destarg_varname(ctx);
-        const char *dst = make_ARB1_destarg_string(ctx);
-        // pixel shaders just use the "_SAT" modifier here, instead.
-        output_line(ctx, "MIN%s, %s, 1.0;", dst, varname);
-        output_line(ctx, "MAX%s, %s, 0.0;", dst, varname);
+    if (arg->result_mod & MOD_SATURATE)
+    {
+        // nv4 and/or pixel shaders just used the "SAT" modifier, instead.
+        if ( (!ctx->support_nv4) && (!shader_is_pixel(ctx)) )
+        {
+            const char *varname = get_ARB1_destarg_varname(ctx);
+            const char *dst = make_ARB1_destarg_string(ctx);
+            output_line(ctx, "MIN%s, %s, 1.0;", dst, varname);
+            output_line(ctx, "MAX%s, %s, 0.0;", dst, varname);
+        } // if
     } // if
 } // emit_ARB1_dest_modifiers
 
@@ -3890,6 +3917,14 @@
         output_line(ctx, "OPTION NV_%s_program%d;", shader_full_str, ver);
     } // else if
 
+    else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV4) == 0)
+    {
+        ctx->support_nv2 = 1;
+        ctx->support_nv3 = 1;
+        ctx->support_nv4 = 1;
+        output_line(ctx, "!!NV%s4.0", shader_str);
+    } // else if
+
     else
     {
         failf(ctx, "Profile '%s' unsupported or unknown.", profilestr);
@@ -3917,7 +3952,8 @@
     for (i = 0; i < ctx->max_scratch_registers; i++)
         output_line(ctx, "TEMP %s;", allocate_ARB1_scratch_reg_name(ctx));
 
-    if ( (ctx->support_nv2) && (!shader_is_pixel(ctx)) )
+    // nv2 fragment programs (and anything nv4) have a real REP/ENDREP.
+    if ( (ctx->support_nv2) && (!shader_is_pixel(ctx)) && (!ctx->support_nv4) )
     {
         // set up temps for nv2 REP/ENDREP emulation through branching.
         for (i = 0; i < ctx->max_reps; i++)
@@ -4027,7 +4063,8 @@
 
 // !!! FIXME: a lot of cut-and-paste here from emit_GLSL_attribute().
 static void emit_ARB1_attribute(Context *ctx, RegisterType regtype, int regnum,
-                                MOJOSHADER_usage usage, int index, int wmask)
+                                MOJOSHADER_usage usage, int index, int wmask,
+                                int flags)
 {
     // !!! FIXME: this function doesn't deal with write masks at all yet!
     const char *varname = get_ARB1_varname(ctx, regtype, regnum);
@@ -4036,6 +4073,8 @@
     const char *arrayright = "";
     char index_str[16] = { '\0' };
 
+    //assert((flags & MOD_PP) == 0);  // !!! FIXME: is PP allowed?
+
     if (index != 0)  // !!! FIXME: a lot of these MUST be zero.
         snprintf(index_str, sizeof (index_str), "%u", (uint) index);
 
@@ -4152,8 +4191,22 @@
     else if (shader_is_pixel(ctx))
     {
         const char *paramtype_str = "ATTRIB";
+
         // samplers DCLs get handled in emit_ARB1_sampler().
 
+        if (flags & MOD_CENTROID)
+        {
+            if (!ctx->support_nv4)  // GL_NV_fragment_program4 adds centroid.
+            {
+                // !!! FIXME: should we just wing it without centroid here?
+                failf(ctx, "centroid unsupported in %s profile",
+                      ctx->profile->name);
+                return;
+            } // if
+
+            paramtype_str = "CENTROID ATTRIB";
+        } // if
+
         if (regtype == REG_TYPE_COLOROUT)
         {
             paramtype_str = "OUTPUT";
@@ -4194,11 +4247,16 @@
             const MiscTypeType mt = (MiscTypeType) regnum;
             if (mt == MISCTYPE_TYPE_FACE)
             {
-                fail(ctx, "Can't handle vFace in arb1 profile"); // !!! FIXME
-                //push_output(ctx, &ctx->globals);
-                //output_line(ctx, "float %s = gl_FrontFacing ? 1.0 : -1.0;",
-                //            varname);
-                //pop_output(ctx);
+                if (ctx->support_nv4)  // FINALLY, a vFace equivalent in nv4!
+                {
+                    index_str[0] = '\0';  // no explicit number.
+                    usage_str = "fragment.facing.x";
+                } // if
+                else
+                {
+                    failf(ctx, "vFace unsupported in %s profile",
+                          ctx->profile->name);
+                } // else
             } // if
             else if (mt == MISCTYPE_TYPE_POSITION)
             {
@@ -4425,8 +4483,8 @@
 
 static void emit_ARB1_NRM(Context *ctx)
 {
-    // nv2 fragment programs have a real NRM.
-    if ( (ctx->support_nv2) && (shader_is_pixel(ctx)) )
+    // nv2 fragment programs (and anything nv4) have a real NRM.
+    if ( (ctx->support_nv4) || ((ctx->support_nv2) && (shader_is_pixel(ctx))) )
         emit_ARB1_opcode_ds(ctx, "NRM");
     else
     {
@@ -4446,8 +4504,8 @@
     // we don't care about the temp registers that <= sm2 demands; ignore them.
     const int mask = ctx->dest_arg.writemask;
 
-    // arb1 fragment shaders have sin/cos/sincos opcodes.
-    if (shader_is_pixel(ctx))
+    // arb1 fragment programs and everything nv4 have sin/cos/sincos opcodes.
+    if ((shader_is_pixel(ctx)) || (ctx->support_nv4))
     {
         const char *dst = make_ARB1_destarg_string(ctx);
         const char *src0 = make_ARB1_srcarg_string(ctx, 0);
@@ -4459,7 +4517,7 @@
             output_line(ctx, "SCS%s, %s;", dst, src0);
     } // if
 
-    // nv2+ shaders have sin and cos opcodes.
+    // nv2+ profiles have sin and cos opcodes.
     else if (ctx->support_nv2)
     {
         const char *dst = get_ARB1_destarg_varname(ctx);
@@ -4537,8 +4595,8 @@
 {
     const char *src0 = make_ARB1_srcarg_string(ctx, 0);
 
-    // nv2 fragment programs have a real REP.
-    if ( (ctx->support_nv2) && (shader_is_pixel(ctx)) )
+    // nv2 fragment programs (and everything nv4) have a real REP.
+    if ( (ctx->support_nv4) || ((ctx->support_nv2) && (shader_is_pixel(ctx))) )
         output_line(ctx, "REP %s;", src0);
 
     else if (ctx->support_nv2)
@@ -4569,8 +4627,8 @@
 
 static void emit_ARB1_ENDREP(Context *ctx)
 {
-    // nv2 fragment programs have a real ENDREP.
-    if ( (ctx->support_nv2) && (shader_is_pixel(ctx)) )
+    // nv2 fragment programs (and everything nv4) have a real ENDREP.
+    if ( (ctx->support_nv4) || ((ctx->support_nv2) && (shader_is_pixel(ctx))) )
         output_line(ctx, "ENDREP;");
 
     else if (ctx->support_nv2)
@@ -4600,7 +4658,8 @@
 static void nv2_if(Context *ctx)
 {
     // The condition code register MUST be set up before this!
-    if (shader_is_pixel(ctx))  // nv2 fragment programs have a real IF.
+    // nv2 fragment programs (and everything nv4) have a real IF.
+    if ( (ctx->support_nv4) || (shader_is_pixel(ctx)) )
         output_line(ctx, "IF EQ.x;");
     else
     {
@@ -4635,8 +4694,8 @@
 
 static void emit_ARB1_ELSE(Context *ctx)
 {
-    // nv2 fragment programs have a real ELSE.
-    if ( (ctx->support_nv2) && (shader_is_pixel(ctx)) )
+    // nv2 fragment programs (and everything nv4) have a real ELSE.
+    if ( (ctx->support_nv4) || ((ctx->support_nv2) && (shader_is_pixel(ctx))) )
         output_line(ctx, "ELSE;");
 
     else if (ctx->support_nv2)
@@ -4665,8 +4724,8 @@
 
 static void emit_ARB1_ENDIF(Context *ctx)
 {
-    // nv2 fragment programs have a real ENDIF.
-    if ( (ctx->support_nv2) && (shader_is_pixel(ctx)) )
+    // nv2 fragment programs (and everything nv4) have a real ENDIF.
+    if ( (ctx->support_nv4) || ((ctx->support_nv2) && (shader_is_pixel(ctx))) )
         output_line(ctx, "ENDIF;");
 
     else if (ctx->support_nv2)
@@ -4686,8 +4745,8 @@
 
 static void emit_ARB1_BREAK(Context *ctx)
 {
-    // nv2 fragment programs have a real BREAK.
-    if ( (ctx->support_nv2) && (shader_is_pixel(ctx)) )
+    // nv2 fragment programs (and everything nv4) have a real BREAK.
+    if ( (ctx->support_nv4) || ((ctx->support_nv2) && (shader_is_pixel(ctx))) )
         output_line(ctx, "BRK;");
 
     else if (ctx->support_nv2)
@@ -4812,18 +4871,23 @@
 
 static void emit_ARB1_DP2ADD(Context *ctx)
 {
-    const char *dst = make_ARB1_destarg_string(ctx);
-    const char *src0 = make_ARB1_srcarg_string(ctx, 0);
-    const char *src1 = make_ARB1_srcarg_string(ctx, 1);
-    const char *src2 = make_ARB1_srcarg_string(ctx, 2);
-    const char *scratch = allocate_ARB1_scratch_reg_name(ctx);
-
-    // DP2ADD is:
-    //  dest = src0.r * src1.r + src0.g * src1.g + src2.replicate_swizzle
-    output_line(ctx, "MUL %s, %s, %s;", scratch, src0, src1);
-    output_line(ctx, "ADD %s, %s.x, %s.y;", scratch, scratch, scratch);
-    output_line(ctx, "ADD%s, %s.x, %s;", dst, scratch, src2);
-    emit_ARB1_dest_modifiers(ctx);
+    if (ctx->support_nv4)  // nv4 has a built-in equivalent to DP2ADD.
+        emit_ARB1_opcode_dsss(ctx, "DP2A");
+    else
+    {
+        const char *dst = make_ARB1_destarg_string(ctx);
+        const char *src0 = make_ARB1_srcarg_string(ctx, 0);
+        const char *src1 = make_ARB1_srcarg_string(ctx, 1);
+        const char *src2 = make_ARB1_srcarg_string(ctx, 2);
+        const char *scratch = allocate_ARB1_scratch_reg_name(ctx);
+
+        // DP2ADD is:
+        //  dst = (src0.r * src1.r) + (src0.g * src1.g) + src2.replicate_swiz
+        output_line(ctx, "MUL %s, %s, %s;", scratch, src0, src1);
+        output_line(ctx, "ADD %s, %s.x, %s.y;", scratch, scratch, scratch);
+        output_line(ctx, "ADD%s, %s.x, %s;", dst, scratch, src2);
+        emit_ARB1_dest_modifiers(ctx);
+    } // else
 } // emit_ARB1_DP2ADD
 
 
@@ -5042,6 +5106,7 @@
     { MOJOSHADER_PROFILE_GLSL120, MOJOSHADER_PROFILE_GLSL },
     { MOJOSHADER_PROFILE_NV2, MOJOSHADER_PROFILE_ARB1 },
     { MOJOSHADER_PROFILE_NV3, MOJOSHADER_PROFILE_ARB1 },
+    { MOJOSHADER_PROFILE_NV4, MOJOSHADER_PROFILE_ARB1 },
 };
 
 
@@ -5147,6 +5212,8 @@
     {
         if (!shader_is_pixel(ctx))
             return fail(ctx, "Centroid result mod in non-pixel shader");
+        else if (!ctx->centroid_allowed)  // only on DCL opcodes!
+            return fail(ctx, "Centroid modifier not allowed here");
     } // if
 
     if ((info->regtype < 0) || (info->regtype > REG_TYPE_MAX))
@@ -5502,9 +5569,12 @@
     if (reserved1 != 0x1)
         return fail(ctx, "Bit #31 in DCL token must be one");
 
+    ctx->centroid_allowed = 1;
     ctx->tokens++;
     ctx->tokencount--;
-    if (parse_destination_token(ctx, &ctx->dest_arg) == FAIL)
+    const int parse_dest_rc = parse_destination_token(ctx, &ctx->dest_arg);
+    ctx->centroid_allowed = 0;
+    if (parse_dest_rc == FAIL)
         return FAIL;
 
     if (ctx->dest_arg.result_shift != 0)  // I'm pretty sure this is illegal...?
@@ -5847,6 +5917,7 @@
     const RegisterType regtype = arg->regtype;
     const int regnum = arg->regnum;
     const int wmask = arg->writemask;
+    const int mods = arg->result_mod;
 
     // parse_args_DCL() does a lot of state checking before we get here.
 
@@ -5865,7 +5936,7 @@
             fail(ctx, "unknown DCL usage");
             return;
         } // if
-        add_attribute_register(ctx, regtype, regnum, usage, index, wmask);
+        add_attribute_register(ctx, regtype, regnum, usage, index, wmask, mods);
     } // if
 
     else if (shader_is_pixel(ctx))
@@ -5876,7 +5947,7 @@
         {
             const MOJOSHADER_usage usage = (MOJOSHADER_usage) ctx->dwords[0];
             const int index = ctx->dwords[1];
-            add_attribute_register(ctx, regtype, regnum, usage, index, wmask);
+            add_attribute_register(ctx, regtype, regnum, usage, index, wmask, mods);
         } // else
     } // else if
 
@@ -7334,7 +7405,7 @@
                     // Apparently this is an attribute that wasn't DCL'd.
                     //  Add it to the attribute list; deal with it later.
                     add_attribute_register(ctx, item->regtype, item->regnum,
-                                           MOJOSHADER_USAGE_UNKNOWN, 0, 0xF);
+                                           MOJOSHADER_USAGE_UNKNOWN, 0, 0xF, 0);
                     break;
 
                 case REG_TYPE_ADDRESS:
@@ -7435,7 +7506,7 @@
             ctx->attribute_count++;
             ctx->profile->attribute_emitter(ctx, item->regtype, item->regnum,
                                             MOJOSHADER_USAGE_POSITION, 0,
-                                            item->writemask);
+                                            item->writemask, item->misc);
             break;
         } // if
     } // for
@@ -7449,7 +7520,7 @@
             ctx->attribute_count++;
             ctx->profile->attribute_emitter(ctx, item->regtype, item->regnum,
                                             item->usage, item->index,
-                                            item->writemask);
+                                            item->writemask, item->misc);
         } // if
     } // for
 } // process_definitions
--- a/mojoshader.h	Sun Jul 06 22:01:15 2008 -0400
+++ b/mojoshader.h	Sun Jul 06 22:01:28 2008 -0400
@@ -363,7 +363,7 @@
  * Profile string for OpenGL ARB 1.0 shaders with Nvidia 4.0 extensions:
  *  NV_gpu_program4
  */
-// Not yet. #define MOJOSHADER_PROFILE_NV4 "nv4"
+#define MOJOSHADER_PROFILE_NV4 "nv4"
 
 
 /*
--- a/mojoshader_opengl.c	Sun Jul 06 22:01:15 2008 -0400
+++ b/mojoshader_opengl.c	Sun Jul 06 22:01:28 2008 -0400
@@ -142,6 +142,7 @@
     int have_GL_NV_vertex_program2_option:1;
     int have_GL_NV_fragment_program2:1;
     int have_GL_NV_vertex_program3:1;
+    int have_GL_NV_gpu_program4:1;
     int have_GL_ARB_shader_objects:1;
     int have_GL_ARB_vertex_shader:1;
     int have_GL_ARB_fragment_shader:1;
@@ -175,6 +176,7 @@
     PFNGLGETPROGRAMIVARBPROC glGetProgramivARB;
     PFNGLGETPROGRAMSTRINGARBPROC glGetProgramStringARB;
     PFNGLPROGRAMLOCALPARAMETER4FVARBPROC glProgramLocalParameter4fvARB;
+    PFNGLPROGRAMLOCALPARAMETERI4IVNVPROC glProgramLocalParameterI4ivNV;
     PFNGLDELETEPROGRAMSARBPROC glDeleteProgramsARB;
     PFNGLGENPROGRAMSARBPROC glGenProgramsARB;
     PFNGLBINDPROGRAMARBPROC glBindProgramARB;
@@ -547,6 +549,25 @@
 } // impl_ARB1_Uniform1i
 
 
+static void impl_NV4_Uniform4iv(const MOJOSHADER_parseData *pd, GLint loc,
+                                 GLsizei siz, GLint *v)
+{
+    int i;
+    const GLenum shader_type = arb1_shader_type(pd->shader_type);
+    for (i = 0; i < siz; i++, v += 4)
+        ctx->glProgramLocalParameterI4ivNV(shader_type, loc + i, v);
+} // impl_NV4_Uniform4iv
+
+
+static void impl_NV4_Uniform1i(const MOJOSHADER_parseData *pd, GLint loc,
+                                GLint _v)
+{
+    const GLenum shader_type = arb1_shader_type(pd->shader_type);
+    GLint v[4] = { _v, _v, _v, _v };
+    ctx->glProgramLocalParameterI4ivNV(shader_type, loc, v);
+} // impl_NV4_Uniform1i
+
+
 static void impl_ARB1_SetSampler(GLint loc, GLuint sampler)
 {
     // no-op in this profile...arb1 uses the texture units as-is.
@@ -622,6 +643,7 @@
     DO_LOOKUP(GL_ARB_vertex_program, PFNGLGENPROGRAMSARBPROC, glGenProgramsARB);
     DO_LOOKUP(GL_ARB_vertex_program, PFNGLBINDPROGRAMARBPROC, glBindProgramARB);
     DO_LOOKUP(GL_ARB_vertex_program, PFNGLPROGRAMSTRINGARBPROC, glProgramStringARB);
+    DO_LOOKUP(GL_NV_gpu_program4, PFNGLPROGRAMLOCALPARAMETERI4IVNVPROC, glProgramLocalParameterI4ivNV);
 
     #undef DO_LOOKUP
 } // lookup_entry_points
@@ -769,6 +791,11 @@
         MUST_HAVE(MOJOSHADER_PROFILE_NV3, GL_NV_vertex_program3);
         MUST_HAVE(MOJOSHADER_PROFILE_NV3, GL_NV_fragment_program2);
     } // else if
+
+    else if (strcmp(profile, MOJOSHADER_PROFILE_NV4) == 0)
+    {
+        MUST_HAVE(MOJOSHADER_PROFILE_NV4, GL_NV_gpu_program4);
+    } // else if
     #endif
 
     #if SUPPORT_PROFILE_GLSL
@@ -810,6 +837,7 @@
     MOJOSHADER_PROFILE_GLSL,
 #endif
 #if SUPPORT_PROFILE_ARB1
+    MOJOSHADER_PROFILE_NV4,
     MOJOSHADER_PROFILE_NV3,
     MOJOSHADER_PROFILE_NV2,
     MOJOSHADER_PROFILE_ARB1,
@@ -919,7 +947,8 @@
 #if SUPPORT_PROFILE_ARB1
     else if ( (strcmp(profile, MOJOSHADER_PROFILE_ARB1) == 0) ||
               (strcmp(profile, MOJOSHADER_PROFILE_NV2) == 0) ||
-              (strcmp(profile, MOJOSHADER_PROFILE_NV3) == 0) )
+              (strcmp(profile, MOJOSHADER_PROFILE_NV3) == 0) ||
+              (strcmp(profile, MOJOSHADER_PROFILE_NV4) == 0) )
     {
         ctx->profileMaxUniforms = impl_ARB1_MaxUniforms;
         ctx->profileCompileShader = impl_ARB1_CompileShader;
@@ -934,6 +963,13 @@
         ctx->profileUniform4iv = impl_ARB1_Uniform4iv;
         ctx->profileUniform1i = impl_ARB1_Uniform1i;
         ctx->profileSetSampler = impl_ARB1_SetSampler;
+
+        // GL_NV_gpu_program4 has integer uniform loading support.
+        if (strcmp(profile, MOJOSHADER_PROFILE_NV4) == 0)
+        {
+            ctx->profileUniform4iv = impl_NV4_Uniform4iv;
+            ctx->profileUniform1i = impl_NV4_Uniform1i;
+        } // if
     } // if
 #endif