mojoshader.c
author Ryan C. Gordon <icculus@icculus.org>
Sun, 29 May 2016 02:59:29 -0400
changeset 1175 9a010116e353
parent 1174 e83fe013f0c5
child 1176 1356c2c32ef6
permissions -rw-r--r--
Don't free ctx->mainfn if context build fails; it wasn't ever set.
icculus@7
     1
/**
icculus@35
     2
 * MojoShader; generate shader programs from bytecode of compiled
icculus@35
     3
 *  Direct3D shaders.
icculus@7
     4
 *
icculus@7
     5
 * Please see the file LICENSE.txt in the source's root directory.
icculus@7
     6
 *
icculus@7
     7
 *  This file written by Ryan C. Gordon.
icculus@7
     8
 */
icculus@7
     9
icculus@322
    10
// !!! FIXME: this file really needs to be split up.
icculus@18
    11
// !!! FIXME: I keep changing coding styles for symbols and typedefs.
icculus@18
    12
icculus@1082
    13
// !!! FIXME: rules from MSDN about temp registers we probably don't check.
icculus@1082
    14
// - There are limited temporaries: vs_1_1 has 12 (ps_1_1 has _2_!).
icculus@1082
    15
// - SM2 apparently was variable, between 12 and 32. Shader Model 3 has 32.
icculus@1082
    16
// - A maximum of three temp registers can be used in a single instruction.
icculus@1082
    17
icculus@464
    18
#define __MOJOSHADER_INTERNAL__ 1
icculus@464
    19
#include "mojoshader_internal.h"
icculus@45
    20
icculus@405
    21
typedef struct ConstantsList
icculus@405
    22
{
icculus@405
    23
    MOJOSHADER_constant constant;
icculus@405
    24
    struct ConstantsList *next;
icculus@405
    25
} ConstantsList;
icculus@405
    26
icculus@399
    27
typedef struct VariableList
icculus@399
    28
{
icculus@399
    29
    MOJOSHADER_uniformType type;
icculus@399
    30
    int index;
icculus@399
    31
    int count;
icculus@405
    32
    ConstantsList *constant;
icculus@402
    33
    int used;
icculus@760
    34
    int emit_position;  // used in some profiles.
icculus@399
    35
    struct VariableList *next;
icculus@399
    36
} VariableList;
icculus@20
    37
icculus@82
    38
typedef struct RegisterList
icculus@82
    39
{
icculus@82
    40
    RegisterType regtype;
icculus@82
    41
    int regnum;
icculus@104
    42
    MOJOSHADER_usage usage;
icculus@798
    43
    unsigned int index;
icculus@104
    44
    int writemask;
icculus@248
    45
    int misc;
icculus@1083
    46
    int written;
icculus@402
    47
    const VariableList *array;
icculus@82
    48
    struct RegisterList *next;
icculus@82
    49
} RegisterList;
icculus@82
    50
icculus@468
    51
typedef struct
icculus@468
    52
{
icculus@468
    53
    const uint32 *token;   // this is the unmolested token in the stream.
icculus@468
    54
    int regnum;
icculus@468
    55
    int swizzle;  // xyzw (all four, not split out).
icculus@468
    56
    int swizzle_x;
icculus@468
    57
    int swizzle_y;
icculus@468
    58
    int swizzle_z;
icculus@468
    59
    int swizzle_w;
icculus@468
    60
    SourceMod src_mod;
icculus@468
    61
    RegisterType regtype;
icculus@468
    62
    int relative;
icculus@468
    63
    RegisterType relative_regtype;
icculus@468
    64
    int relative_regnum;
icculus@468
    65
    int relative_component;
icculus@468
    66
    const VariableList *relative_array;
icculus@468
    67
} SourceArgInfo;
icculus@56
    68
icculus@760
    69
struct Profile;  // predeclare.
icculus@760
    70
icculus@1030
    71
typedef struct CtabData
icculus@1030
    72
{
icculus@1030
    73
    int have_ctab;
icculus@1030
    74
    int symbol_count;
icculus@1030
    75
    MOJOSHADER_symbol *symbols;
icculus@1030
    76
} CtabData;
icculus@1030
    77
icculus@18
    78
// Context...this is state that changes as we parse through a shader...
icculus@760
    79
typedef struct Context
icculus@18
    80
{
icculus@536
    81
    int isfail;
icculus@536
    82
    int out_of_memory;
icculus@35
    83
    MOJOSHADER_malloc malloc;
icculus@35
    84
    MOJOSHADER_free free;
icculus@97
    85
    void *malloc_data;
icculus@947
    86
    int current_position;
icculus@475
    87
    const uint32 *orig_tokens;
icculus@18
    88
    const uint32 *tokens;
icculus@18
    89
    uint32 tokencount;
icculus@1143
    90
    int know_shader_size;
icculus@450
    91
    const MOJOSHADER_swizzle *swizzles;
icculus@450
    92
    unsigned int swizzles_count;
icculus@1104
    93
    const MOJOSHADER_samplerMap *samplermap;
icculus@1104
    94
    unsigned int samplermap_count;
icculus@944
    95
    Buffer *output;
icculus@944
    96
    Buffer *preflight;
icculus@944
    97
    Buffer *globals;
icculus@1156
    98
    Buffer *inputs;
icculus@1156
    99
    Buffer *outputs;
icculus@944
   100
    Buffer *helpers;
icculus@944
   101
    Buffer *subroutines;
icculus@944
   102
    Buffer *mainline_intro;
icculus@1156
   103
    Buffer *mainline_arguments;
icculus@1156
   104
    Buffer *mainline_top;
icculus@944
   105
    Buffer *mainline;
icculus@1156
   106
    Buffer *postflight;
icculus@944
   107
    Buffer *ignore;
icculus@1156
   108
    Buffer *output_stack[3];
icculus@1156
   109
    int indent_stack[3];
icculus@56
   110
    int output_stack_len;
icculus@40
   111
    int indent;
icculus@334
   112
    const char *shader_type_str;
icculus@18
   113
    const char *endline;
icculus@1156
   114
    const char *mainfn;
icculus@18
   115
    int endline_len;
icculus@18
   116
    int profileid;
icculus@760
   117
    const struct Profile *profile;
icculus@96
   118
    MOJOSHADER_shaderType shader_type;
icculus@46
   119
    uint8 major_ver;
icculus@46
   120
    uint8 minor_ver;
icculus@161
   121
    DestArgInfo dest_arg;
icculus@56
   122
    SourceArgInfo source_args[5];
icculus@139
   123
    SourceArgInfo predicate_arg;  // for predicated instructions.
icculus@31
   124
    uint32 dwords[4];
icculus@398
   125
    uint32 version_token;
icculus@46
   126
    int instruction_count;
icculus@28
   127
    uint32 instruction_controls;
icculus@56
   128
    uint32 previous_opcode;
icculus@1093
   129
    int coissue;
icculus@56
   130
    int loops;
icculus@114
   131
    int reps;
icculus@382
   132
    int max_reps;
icculus@118
   133
    int cmps;
icculus@329
   134
    int scratch_registers;
icculus@329
   135
    int max_scratch_registers;
icculus@382
   136
    int branch_labels_stack_index;
icculus@382
   137
    int branch_labels_stack[32];
icculus@382
   138
    int assigned_branch_labels;
icculus@332
   139
    int assigned_vertex_attributes;
icculus@336
   140
    int last_address_reg_component;
icculus@82
   141
    RegisterList used_registers;
icculus@82
   142
    RegisterList defined_registers;
icculus@536
   143
    ErrorList *errors;
icculus@278
   144
    int constant_count;
icculus@278
   145
    ConstantsList *constants;
icculus@95
   146
    int uniform_count;
icculus@760
   147
    int uniform_float4_count;
icculus@760
   148
    int uniform_int4_count;
icculus@760
   149
    int uniform_bool_count;
icculus@95
   150
    RegisterList uniforms;
icculus@104
   151
    int attribute_count;
icculus@104
   152
    RegisterList attributes;
icculus@148
   153
    int sampler_count;
icculus@148
   154
    RegisterList samplers;
icculus@399
   155
    VariableList *variables;  // variables to register mapping.
icculus@463
   156
    int centroid_allowed;
icculus@1030
   157
    CtabData ctab;
icculus@531
   158
    int have_relative_input_registers;
icculus@1048
   159
    int have_multi_color_outputs;
icculus@463
   160
    int determined_constants_arrays;
icculus@463
   161
    int predicated;
icculus@1054
   162
    int uses_pointsize;
icculus@1075
   163
    int uses_fog;
icculus@1156
   164
icculus@1156
   165
    // !!! FIXME: move these into SUPPORT_PROFILE sections.
icculus@1099
   166
    int glsl_generated_lit_helper;
icculus@1020
   167
    int glsl_generated_texldd_setup;
icculus@1099
   168
    int glsl_generated_texm3x3spec_helper;
icculus@1049
   169
    int arb1_wrote_position;
icculus@1156
   170
    // !!! FIXME: move these into SUPPORT_PROFILE sections.
icculus@1156
   171
icculus@1030
   172
    int have_preshader;
icculus@1058
   173
    int ignores_ctab;
icculus@1099
   174
    int reset_texmpad;
icculus@1099
   175
    int texm3x2pad_dst0;
icculus@1099
   176
    int texm3x2pad_src0;
icculus@1099
   177
    int texm3x3pad_dst0;
icculus@1099
   178
    int texm3x3pad_src0;
icculus@1099
   179
    int texm3x3pad_dst1;
icculus@1099
   180
    int texm3x3pad_src1;
icculus@1030
   181
    MOJOSHADER_preshader *preshader;
icculus@808
   182
icculus@808
   183
#if SUPPORT_PROFILE_ARB1_NV
icculus@808
   184
    int profile_supports_nv2;
icculus@808
   185
    int profile_supports_nv3;
icculus@808
   186
    int profile_supports_nv4;
icculus@808
   187
#endif
icculus@808
   188
#if SUPPORT_PROFILE_GLSL120
icculus@808
   189
    int profile_supports_glsl120;
icculus@808
   190
#endif
flibitijibibo@1150
   191
#if SUPPORT_PROFILE_GLSLES
flibitijibibo@1150
   192
    int profile_supports_glsles;
flibitijibibo@1150
   193
#endif
icculus@1156
   194
icculus@1156
   195
#if SUPPORT_PROFILE_METAL
icculus@1156
   196
    int metal_need_header_common;
icculus@1156
   197
    int metal_need_header_math;
icculus@1156
   198
    int metal_need_header_relational;
icculus@1156
   199
    int metal_need_header_geometric;
icculus@1156
   200
    int metal_need_header_graphics;
icculus@1156
   201
    int metal_need_header_texture;
icculus@1156
   202
#endif
icculus@760
   203
} Context;
icculus@760
   204
icculus@808
   205
icculus@808
   206
// Use these macros so we can remove all bits of these profiles from the build.
icculus@808
   207
#if SUPPORT_PROFILE_ARB1_NV
icculus@808
   208
#define support_nv2(ctx) ((ctx)->profile_supports_nv2)
icculus@808
   209
#define support_nv3(ctx) ((ctx)->profile_supports_nv3)
icculus@808
   210
#define support_nv4(ctx) ((ctx)->profile_supports_nv4)
icculus@808
   211
#else
icculus@808
   212
#define support_nv2(ctx) (0)
icculus@808
   213
#define support_nv3(ctx) (0)
icculus@808
   214
#define support_nv4(ctx) (0)
icculus@808
   215
#endif
icculus@808
   216
icculus@808
   217
#if SUPPORT_PROFILE_GLSL120
icculus@808
   218
#define support_glsl120(ctx) ((ctx)->profile_supports_glsl120)
icculus@808
   219
#else
icculus@808
   220
#define support_glsl120(ctx) (0)
icculus@808
   221
#endif
icculus@808
   222
flibitijibibo@1150
   223
#if SUPPORT_PROFILE_GLSLES
flibitijibibo@1150
   224
#define support_glsles(ctx) ((ctx)->profile_supports_glsles)
flibitijibibo@1150
   225
#else
flibitijibibo@1150
   226
#define support_glsles(ctx) (0)
flibitijibibo@1150
   227
#endif
flibitijibibo@1150
   228
icculus@808
   229
icculus@760
   230
// Profile entry points...
icculus@760
   231
icculus@760
   232
// one emit function for each opcode in each profile.
icculus@760
   233
typedef void (*emit_function)(Context *ctx);
icculus@760
   234
icculus@760
   235
// one emit function for starting output in each profile.
icculus@760
   236
typedef void (*emit_start)(Context *ctx, const char *profilestr);
icculus@760
   237
icculus@760
   238
// one emit function for ending output in each profile.
icculus@760
   239
typedef void (*emit_end)(Context *ctx);
icculus@760
   240
icculus@760
   241
// one emit function for phase opcode output in each profile.
icculus@760
   242
typedef void (*emit_phase)(Context *ctx);
icculus@760
   243
icculus@760
   244
// one emit function for finalizing output in each profile.
icculus@760
   245
typedef void (*emit_finalize)(Context *ctx);
icculus@760
   246
icculus@760
   247
// one emit function for global definitions in each profile.
icculus@760
   248
typedef void (*emit_global)(Context *ctx, RegisterType regtype, int regnum);
icculus@760
   249
icculus@760
   250
// one emit function for relative uniform arrays in each profile.
icculus@760
   251
typedef void (*emit_array)(Context *ctx, VariableList *var);
icculus@760
   252
icculus@760
   253
// one emit function for relative constants arrays in each profile.
icculus@760
   254
typedef void (*emit_const_array)(Context *ctx,
icculus@760
   255
                                 const struct ConstantsList *constslist,
icculus@760
   256
                                 int base, int size);
icculus@760
   257
icculus@760
   258
// one emit function for uniforms in each profile.
icculus@760
   259
typedef void (*emit_uniform)(Context *ctx, RegisterType regtype, int regnum,
icculus@760
   260
                             const VariableList *var);
icculus@760
   261
icculus@760
   262
// one emit function for samplers in each profile.
icculus@1090
   263
typedef void (*emit_sampler)(Context *ctx, int stage, TextureType ttype,
icculus@1090
   264
                             int texbem);
icculus@760
   265
icculus@760
   266
// one emit function for attributes in each profile.
icculus@760
   267
typedef void (*emit_attribute)(Context *ctx, RegisterType regtype, int regnum,
icculus@760
   268
                               MOJOSHADER_usage usage, int index, int wmask,
icculus@760
   269
                               int flags);
icculus@760
   270
icculus@760
   271
// one args function for each possible sequence of opcode arguments.
icculus@760
   272
typedef int (*args_function)(Context *ctx);
icculus@760
   273
icculus@760
   274
// one state function for each opcode where we have state machine updates.
icculus@760
   275
typedef void (*state_function)(Context *ctx);
icculus@760
   276
icculus@760
   277
// one function for varnames in each profile.
icculus@760
   278
typedef const char *(*varname_function)(Context *c, RegisterType t, int num);
icculus@760
   279
icculus@760
   280
// one function for const var array in each profile.
icculus@760
   281
typedef const char *(*const_array_varname_function)(Context *c, int base, int size);
icculus@760
   282
icculus@760
   283
typedef struct Profile
icculus@760
   284
{
icculus@760
   285
    const char *name;
icculus@760
   286
    emit_start start_emitter;
icculus@760
   287
    emit_end end_emitter;
icculus@760
   288
    emit_phase phase_emitter;
icculus@760
   289
    emit_global global_emitter;
icculus@760
   290
    emit_array array_emitter;
icculus@760
   291
    emit_const_array const_array_emitter;
icculus@760
   292
    emit_uniform uniform_emitter;
icculus@760
   293
    emit_sampler sampler_emitter;
icculus@760
   294
    emit_attribute attribute_emitter;
icculus@760
   295
    emit_finalize finalize_emitter;
icculus@760
   296
    varname_function get_varname;
icculus@760
   297
    const_array_varname_function get_const_array_varname;
icculus@760
   298
} Profile;
icculus@18
   299
icculus@18
   300
icculus@940
   301
// !!! FIXME: cut and paste between every damned source file follows...
icculus@940
   302
// !!! FIXME: We need to make some sort of ContextBase that applies to all
icculus@940
   303
// !!! FIXME:  files and move this stuff to mojoshader_common.c ...
icculus@940
   304
icculus@542
   305
static inline void out_of_memory(Context *ctx)
icculus@194
   306
{
icculus@536
   307
    ctx->isfail = ctx->out_of_memory = 1;
icculus@194
   308
} // out_of_memory
icculus@194
   309
icculus@307
   310
static inline void *Malloc(Context *ctx, const size_t len)
icculus@307
   311
{
icculus@307
   312
    void *retval = ctx->malloc((int) len, ctx->malloc_data);
icculus@194
   313
    if (retval == NULL)
icculus@194
   314
        out_of_memory(ctx);
icculus@194
   315
    return retval;
icculus@97
   316
} // Malloc
icculus@97
   317
icculus@541
   318
static inline char *StrDup(Context *ctx, const char *str)
icculus@541
   319
{
icculus@541
   320
    char *retval = (char *) Malloc(ctx, strlen(str) + 1);
icculus@554
   321
    if (retval != NULL)
icculus@541
   322
        strcpy(retval, str);
icculus@541
   323
    return retval;
icculus@541
   324
} // StrDup
icculus@97
   325
icculus@194
   326
static inline void Free(Context *ctx, void *ptr)
icculus@126
   327
{
icculus@939
   328
    ctx->free(ptr, ctx->malloc_data);
icculus@97
   329
} // Free
icculus@97
   330
flibitijibibo@1150
   331
static void * MOJOSHADERCALL MallocBridge(int bytes, void *data)
icculus@939
   332
{
icculus@939
   333
    return Malloc((Context *) data, (size_t) bytes);
icculus@939
   334
} // MallocBridge
icculus@939
   335
flibitijibibo@1150
   336
static void MOJOSHADERCALL FreeBridge(void *ptr, void *data)
icculus@939
   337
{
icculus@939
   338
    Free((Context *) data, ptr);
icculus@939
   339
} // FreeBridge
icculus@939
   340
icculus@97
   341
icculus@56
   342
// jump between output sections in the context...
icculus@56
   343
icculus@944
   344
static int set_output(Context *ctx, Buffer **section)
icculus@944
   345
{
icculus@944
   346
    // only create output sections on first use.
icculus@944
   347
    if (*section == NULL)
icculus@944
   348
    {
icculus@944
   349
        *section = buffer_create(256, MallocBridge, FreeBridge, ctx);
icculus@944
   350
        if (*section == NULL)
icculus@944
   351
            return 0;
icculus@944
   352
    } // if
icculus@944
   353
icculus@944
   354
    ctx->output = *section;
icculus@944
   355
    return 1;
icculus@944
   356
} // set_output
icculus@944
   357
icculus@944
   358
static void push_output(Context *ctx, Buffer **section)
icculus@56
   359
{
icculus@798
   360
    assert(ctx->output_stack_len < (int) (STATICARRAYLEN(ctx->output_stack)));
icculus@56
   361
    ctx->output_stack[ctx->output_stack_len] = ctx->output;
icculus@56
   362
    ctx->indent_stack[ctx->output_stack_len] = ctx->indent;
icculus@56
   363
    ctx->output_stack_len++;
icculus@944
   364
    if (!set_output(ctx, section))
icculus@944
   365
        return;
icculus@56
   366
    ctx->indent = 0;
icculus@56
   367
} // push_output
icculus@56
   368
icculus@56
   369
static inline void pop_output(Context *ctx)
icculus@56
   370
{
icculus@56
   371
    assert(ctx->output_stack_len > 0);
icculus@56
   372
    ctx->output_stack_len--;
icculus@56
   373
    ctx->output = ctx->output_stack[ctx->output_stack_len];
icculus@56
   374
    ctx->indent = ctx->indent_stack[ctx->output_stack_len];
icculus@56
   375
} // pop_output
icculus@56
   376
icculus@56
   377
icculus@56
   378
icculus@56
   379
// Shader model version magic...
icculus@56
   380
icculus@43
   381
static inline uint32 ver_ui32(const uint8 major, const uint8 minor)
icculus@43
   382
{
icculus@493
   383
    return ( (((uint32) major) << 16) | (((minor) == 0xFF) ? 1 : (minor)) );
icculus@43
   384
} // version_ui32
icculus@43
   385
icculus@151
   386
static inline int shader_version_supported(const uint8 maj, const uint8 min)
icculus@43
   387
{
icculus@43
   388
    return (ver_ui32(maj,min) <= ver_ui32(MAX_SHADER_MAJOR, MAX_SHADER_MINOR));
icculus@43
   389
} // shader_version_supported
icculus@43
   390
icculus@151
   391
static inline int shader_version_atleast(const Context *ctx, const uint8 maj,
icculus@151
   392
                                         const uint8 min)
icculus@43
   393
{
icculus@43
   394
    return (ver_ui32(ctx->major_ver, ctx->minor_ver) >= ver_ui32(maj, min));
icculus@43
   395
} // shader_version_atleast
icculus@43
   396
icculus@400
   397
static inline int shader_version_exactly(const Context *ctx, const uint8 maj,
icculus@400
   398
                                         const uint8 min)
icculus@400
   399
{
icculus@400
   400
    return ((ctx->major_ver == maj) && (ctx->minor_ver == min));
icculus@400
   401
} // shader_version_exactly
icculus@400
   402
icculus@151
   403
static inline int shader_is_pixel(const Context *ctx)
icculus@151
   404
{
icculus@151
   405
    return (ctx->shader_type == MOJOSHADER_TYPE_PIXEL);
icculus@151
   406
} // shader_is_pixel
icculus@151
   407
icculus@151
   408
static inline int shader_is_vertex(const Context *ctx)
icculus@151
   409
{
icculus@151
   410
    return (ctx->shader_type == MOJOSHADER_TYPE_VERTEX);
icculus@151
   411
} // shader_is_vertex
icculus@151
   412
icculus@43
   413
icculus@344
   414
static inline int isfail(const Context *ctx)
icculus@344
   415
{
icculus@536
   416
    return ctx->isfail;
icculus@344
   417
} // isfail
icculus@344
   418
icculus@56
   419
icculus@542
   420
static void failf(Context *ctx, const char *fmt, ...) ISPRINTF(2,3);
icculus@542
   421
static void failf(Context *ctx, const char *fmt, ...)
icculus@7
   422
{
icculus@536
   423
    ctx->isfail = 1;
icculus@536
   424
    if (ctx->out_of_memory)
icculus@542
   425
        return;
icculus@536
   426
icculus@939
   427
    // no filename at this level (we pass a NULL to errorlist_add_va()...)
icculus@536
   428
    va_list ap;
icculus@536
   429
    va_start(ap, fmt);
icculus@947
   430
    errorlist_add_va(ctx->errors, NULL, ctx->current_position, fmt, ap);
icculus@536
   431
    va_end(ap);
icculus@12
   432
} // failf
icculus@12
   433
icculus@12
   434
icculus@542
   435
static inline void fail(Context *ctx, const char *reason)
icculus@542
   436
{
icculus@542
   437
    failf(ctx, "%s", reason);
icculus@12
   438
} // fail
icculus@12
   439
icculus@12
   440
icculus@542
   441
static void output_line(Context *ctx, const char *fmt, ...) ISPRINTF(2,3);
icculus@542
   442
static void output_line(Context *ctx, const char *fmt, ...)
icculus@12
   443
{
icculus@944
   444
    assert(ctx->output != NULL);
icculus@944
   445
    if (isfail(ctx))
icculus@541
   446
        return;  // we failed previously, don't go on...
icculus@12
   447
icculus@40
   448
    const int indent = ctx->indent;
icculus@41
   449
    if (indent > 0)
icculus@944
   450
    {
icculus@944
   451
        char *indentbuf = (char *) alloca(indent);
icculus@944
   452
        memset(indentbuf, '\t', indent);
icculus@944
   453
        buffer_append(ctx->output, indentbuf, indent);
icculus@944
   454
    } // if
icculus@56
   455
icculus@12
   456
    va_list ap;
icculus@12
   457
    va_start(ap, fmt);
icculus@944
   458
    buffer_append_va(ctx->output, fmt, ap);
icculus@12
   459
    va_end(ap);
icculus@12
   460
icculus@944
   461
    buffer_append(ctx->output, ctx->endline, ctx->endline_len);
icculus@12
   462
} // output_line
icculus@12
   463
icculus@12
   464
icculus@542
   465
static inline void output_blank_line(Context *ctx)
icculus@542
   466
{
icculus@944
   467
    assert(ctx->output != NULL);
icculus@944
   468
    if (!isfail(ctx))
icculus@944
   469
        buffer_append(ctx->output, ctx->endline, ctx->endline_len);
icculus@56
   470
} // output_blank_line
icculus@56
   471
icculus@56
   472
icculus@43
   473
// !!! FIXME: this is sort of nasty.
icculus@72
   474
static void floatstr(Context *ctx, char *buf, size_t bufsize, float f,
icculus@72
   475
                     int leavedecimal)
icculus@43
   476
{
flibitijibibo@1150
   477
    const size_t len = MOJOSHADER_printFloat(buf, bufsize, f);
icculus@72
   478
    if ((len+2) >= bufsize)
icculus@43
   479
        fail(ctx, "BUG: internal buffer is too small");
icculus@43
   480
    else
icculus@43
   481
    {
icculus@43
   482
        char *end = buf + len;
icculus@43
   483
        char *ptr = strchr(buf, '.');
icculus@43
   484
        if (ptr == NULL)
icculus@72
   485
        {
icculus@72
   486
            if (leavedecimal)
icculus@72
   487
                strcat(buf, ".0");
icculus@43
   488
            return;  // done.
icculus@72
   489
        } // if
icculus@43
   490
icculus@43
   491
        while (--end != ptr)
icculus@43
   492
        {
icculus@43
   493
            if (*end != '0')
icculus@43
   494
            {
icculus@43
   495
                end++;
icculus@43
   496
                break;
icculus@43
   497
            } // if
icculus@43
   498
        } // while
icculus@72
   499
        if ((leavedecimal) && (end == ptr))
icculus@72
   500
            end += 2;
icculus@43
   501
        *end = '\0';  // chop extra '0' or all decimal places off.
icculus@43
   502
    } // else
icculus@43
   503
} // floatstr
icculus@43
   504
icculus@1104
   505
static inline TextureType cvtMojoToD3DSamplerType(const MOJOSHADER_samplerType type)
icculus@1104
   506
{
icculus@1104
   507
    return (TextureType) (((int) type) + 2);
icculus@1104
   508
} // cvtMojoToD3DSamplerType
icculus@1104
   509
icculus@1104
   510
static inline MOJOSHADER_samplerType cvtD3DToMojoSamplerType(const TextureType type)
icculus@1104
   511
{
icculus@1104
   512
    return (MOJOSHADER_samplerType) (((int) type) - 2);
icculus@1104
   513
} // cvtD3DToMojoSamplerType
icculus@1104
   514
icculus@17
   515
icculus@82
   516
// Deal with register lists...  !!! FIXME: I sort of hate this.
icculus@82
   517
icculus@97
   518
static void free_reglist(MOJOSHADER_free f, void *d, RegisterList *item)
icculus@82
   519
{
icculus@82
   520
    while (item != NULL)
icculus@82
   521
    {
icculus@82
   522
        RegisterList *next = item->next;
icculus@97
   523
        f(item, d);
icculus@82
   524
        item = next;
icculus@82
   525
    } // while
icculus@82
   526
} // free_reglist
icculus@82
   527
icculus@82
   528
static inline uint32 reg_to_ui32(const RegisterType regtype, const int regnum)
icculus@82
   529
{
icculus@82
   530
    return ( ((uint32) regtype) | (((uint32) regnum) << 16) );
icculus@82
   531
} // reg_to_uint32
icculus@82
   532
icculus@940
   533
// !!! FIXME: ditch this for a hash table.
icculus@104
   534
static RegisterList *reglist_insert(Context *ctx, RegisterList *prev,
icculus@104
   535
                                    const RegisterType regtype,
icculus@104
   536
                                    const int regnum)
icculus@82
   537
{
icculus@82
   538
    const uint32 newval = reg_to_ui32(regtype, regnum);
icculus@82
   539
    RegisterList *item = prev->next;
icculus@82
   540
    while (item != NULL)
icculus@82
   541
    {
icculus@82
   542
        const uint32 val = reg_to_ui32(item->regtype, item->regnum);
icculus@82
   543
        if (newval == val)
icculus@104
   544
            return item;  // already set, so we're done.
icculus@82
   545
        else if (newval < val)  // insert it here.
icculus@82
   546
            break;
icculus@82
   547
        else // if (newval > val)
icculus@82
   548
        {
icculus@82
   549
            // keep going, we're not to the insertion point yet.
icculus@82
   550
            prev = item;
icculus@82
   551
            item = item->next;
icculus@82
   552
        } // else
icculus@82
   553
    } // while
icculus@82
   554
icculus@82
   555
    // we need to insert an entry after (prev).
icculus@97
   556
    item = (RegisterList *) Malloc(ctx, sizeof (RegisterList));
icculus@194
   557
    if (item != NULL)
icculus@82
   558
    {
icculus@82
   559
        item->regtype = regtype;
icculus@82
   560
        item->regnum = regnum;
icculus@248
   561
        item->usage = MOJOSHADER_USAGE_UNKNOWN;
icculus@104
   562
        item->index = 0;
icculus@104
   563
        item->writemask = 0;
icculus@248
   564
        item->misc = 0;
icculus@402
   565
        item->array = NULL;
icculus@82
   566
        item->next = prev->next;
icculus@82
   567
        prev->next = item;
icculus@194
   568
    } // if
icculus@104
   569
icculus@104
   570
    return item;
icculus@82
   571
} // reglist_insert
icculus@82
   572
icculus@450
   573
static RegisterList *reglist_find(const RegisterList *prev,
icculus@450
   574
                                  const RegisterType rtype, const int regnum)
icculus@122
   575
{
icculus@122
   576
    const uint32 newval = reg_to_ui32(rtype, regnum);
icculus@122
   577
    RegisterList *item = prev->next;
icculus@82
   578
    while (item != NULL)
icculus@82
   579
    {
icculus@82
   580
        const uint32 val = reg_to_ui32(item->regtype, item->regnum);
icculus@82
   581
        if (newval == val)
icculus@82
   582
            return item;  // here it is.
icculus@82
   583
        else if (newval < val)  // should have been here if it existed.
icculus@82
   584
            return NULL;
icculus@82
   585
        else // if (newval > val)
icculus@82
   586
            item = item->next;
icculus@82
   587
    } // while
icculus@82
   588
icculus@82
   589
    return NULL;  // wasn't in the list.
icculus@122
   590
} // reglist_find
icculus@122
   591
icculus@122
   592
static inline const RegisterList *reglist_exists(RegisterList *prev,
icculus@122
   593
                                                 const RegisterType regtype,
icculus@122
   594
                                                 const int regnum)
icculus@122
   595
{
icculus@122
   596
    return (reglist_find(prev, regtype, regnum));
icculus@82
   597
} // reglist_exists
icculus@82
   598
icculus@1084
   599
static inline int register_was_written(Context *ctx, const RegisterType rtype,
icculus@1084
   600
                                       const int regnum)
icculus@1084
   601
{
icculus@1084
   602
    RegisterList *reg = reglist_find(&ctx->used_registers, rtype, regnum);
icculus@1084
   603
    return (reg && reg->written);
icculus@1084
   604
} // register_was_written
icculus@1084
   605
icculus@1085
   606
static inline RegisterList *set_used_register(Context *ctx,
icculus@1085
   607
                                              const RegisterType regtype,
icculus@1085
   608
                                              const int regnum,
icculus@1085
   609
                                              const int written)
icculus@1083
   610
{
icculus@1083
   611
    RegisterList *reg = NULL;
icculus@1048
   612
    if ((regtype == REG_TYPE_COLOROUT) && (regnum > 0))
icculus@1048
   613
        ctx->have_multi_color_outputs = 1;
icculus@1083
   614
icculus@1083
   615
    reg = reglist_insert(ctx, &ctx->used_registers, regtype, regnum);
icculus@1083
   616
    if (reg && written)
icculus@1083
   617
        reg->written = 1;
icculus@1085
   618
    return reg;
icculus@82
   619
} // set_used_register
icculus@82
   620
icculus@82
   621
static inline int get_used_register(Context *ctx, const RegisterType regtype,
icculus@82
   622
                                    const int regnum)
icculus@82
   623
{
icculus@82
   624
    return (reglist_exists(&ctx->used_registers, regtype, regnum) != NULL);
icculus@83
   625
} // get_used_register
icculus@82
   626
icculus@82
   627
static inline void set_defined_register(Context *ctx, const RegisterType rtype,
icculus@82
   628
                                        const int regnum)
icculus@82
   629
{
icculus@82
   630
    reglist_insert(ctx, &ctx->defined_registers, rtype, regnum);
icculus@82
   631
} // set_defined_register
icculus@82
   632
icculus@82
   633
static inline int get_defined_register(Context *ctx, const RegisterType rtype,
icculus@82
   634
                                       const int regnum)
icculus@82
   635
{
icculus@82
   636
    return (reglist_exists(&ctx->defined_registers, rtype, regnum) != NULL);
icculus@82
   637
} // get_defined_register
icculus@82
   638
icculus@104
   639
static void add_attribute_register(Context *ctx, const RegisterType rtype,
icculus@104
   640
                                const int regnum, const MOJOSHADER_usage usage,
icculus@431
   641
                                const int index, const int writemask, int flags)
icculus@104
   642
{
icculus@104
   643
    RegisterList *item = reglist_insert(ctx, &ctx->attributes, rtype, regnum);
icculus@104
   644
    item->usage = usage;
icculus@104
   645
    item->index = index;
icculus@104
   646
    item->writemask = writemask;
icculus@431
   647
    item->misc = flags;
icculus@1054
   648
icculus@1054
   649
    if ((rtype == REG_TYPE_OUTPUT) && (usage == MOJOSHADER_USAGE_POINTSIZE))
icculus@1054
   650
        ctx->uses_pointsize = 1;  // note that we have to check this later.
icculus@1075
   651
    else if ((rtype == REG_TYPE_OUTPUT) && (usage == MOJOSHADER_USAGE_FOG))
icculus@1075
   652
        ctx->uses_fog = 1;  // note that we have to check this later.
icculus@104
   653
} // add_attribute_register
icculus@104
   654
icculus@1104
   655
static inline void add_sampler(Context *ctx, const int regnum,
icculus@1104
   656
                               TextureType ttype, const int texbem)
icculus@1104
   657
{
icculus@1104
   658
    const RegisterType rtype = REG_TYPE_SAMPLER;
icculus@1104
   659
icculus@297
   660
    // !!! FIXME: make sure it doesn't exist?
icculus@1088
   661
    // !!! FIXME:  (ps_1_1 assume we can add it multiple times...)
icculus@148
   662
    RegisterList *item = reglist_insert(ctx, &ctx->samplers, rtype, regnum);
icculus@1104
   663
icculus@1104
   664
    if (ctx->samplermap != NULL)
icculus@1104
   665
    {
icculus@1104
   666
        unsigned int i;
icculus@1104
   667
        for (i = 0; i < ctx->samplermap_count; i++)
icculus@1104
   668
        {
icculus@1104
   669
            if (ctx->samplermap[i].index == regnum)
icculus@1104
   670
            {
icculus@1104
   671
                ttype = cvtMojoToD3DSamplerType(ctx->samplermap[i].type);
icculus@1104
   672
                break;
icculus@1104
   673
            } // if
icculus@1104
   674
        } // for
icculus@1104
   675
    } // if
icculus@1104
   676
icculus@148
   677
    item->index = (int) ttype;
icculus@1090
   678
    item->misc |= texbem;
icculus@148
   679
} // add_sampler
icculus@148
   680
icculus@104
   681
icculus@295
   682
static inline int writemask_xyzw(const int writemask)
icculus@295
   683
{
icculus@295
   684
    return (writemask == 0xF);  // 0xF == 1111. No explicit mask (full!).
icculus@295
   685
} // writemask_xyzw
icculus@295
   686
icculus@295
   687
icculus@295
   688
static inline int writemask_xyz(const int writemask)
icculus@295
   689
{
icculus@295
   690
    return (writemask == 0x7);  // 0x7 == 0111. (that is: xyz)
icculus@295
   691
} // writemask_xyz
icculus@295
   692
icculus@295
   693
icculus@295
   694
static inline int writemask_xy(const int writemask)
icculus@295
   695
{
icculus@295
   696
    return (writemask == 0x3);  // 0x3 == 0011. (that is: xy)
icculus@295
   697
} // writemask_xy
icculus@295
   698
icculus@295
   699
icculus@295
   700
static inline int writemask_x(const int writemask)
icculus@295
   701
{
icculus@295
   702
    return (writemask == 0x1);  // 0x1 == 0001. (that is: x)
icculus@295
   703
} // writemask_x
icculus@295
   704
icculus@295
   705
icculus@295
   706
static inline int writemask_y(const int writemask)
icculus@295
   707
{
icculus@295
   708
    return (writemask == 0x2);  // 0x1 == 0010. (that is: y)
icculus@295
   709
} // writemask_y
icculus@295
   710
icculus@295
   711
icculus@121
   712
static inline int replicate_swizzle(const int swizzle)
icculus@121
   713
{
icculus@121
   714
    return ( (((swizzle >> 0) & 0x3) == ((swizzle >> 2) & 0x3)) &&
icculus@121
   715
             (((swizzle >> 2) & 0x3) == ((swizzle >> 4) & 0x3)) &&
icculus@121
   716
             (((swizzle >> 4) & 0x3) == ((swizzle >> 6) & 0x3)) );
icculus@121
   717
} // replicate_swizzle
icculus@121
   718
icculus@121
   719
icculus@292
   720
static inline int no_swizzle(const int swizzle)
icculus@292
   721
{
icculus@296
   722
    return (swizzle == 0xE4);  // 0xE4 == 11100100 ... 0 1 2 3. No swizzle.
icculus@292
   723
} // no_swizzle
icculus@292
   724
icculus@292
   725
icculus@165
   726
static inline int vecsize_from_writemask(const int m)
icculus@165
   727
{
icculus@165
   728
    return (m & 1) + ((m >> 1) & 1) + ((m >> 2) & 1) + ((m >> 3) & 1);
icculus@165
   729
} // vecsize_from_writemask
icculus@165
   730
icculus@1077
   731
icculus@1077
   732
static inline void set_dstarg_writemask(DestArgInfo *dst, const int mask)
icculus@1077
   733
{
icculus@1077
   734
    dst->writemask = mask;
icculus@1077
   735
    dst->writemask0 = ((mask >> 0) & 1);
icculus@1077
   736
    dst->writemask1 = ((mask >> 1) & 1);
icculus@1077
   737
    dst->writemask2 = ((mask >> 2) & 1);
icculus@1077
   738
    dst->writemask3 = ((mask >> 3) & 1);
icculus@1077
   739
} // set_dstarg_writemask
icculus@1077
   740
icculus@1077
   741
icculus@329
   742
static int allocate_scratch_register(Context *ctx)
icculus@329
   743
{
icculus@329
   744
    const int retval = ctx->scratch_registers++;
icculus@329
   745
    if (retval >= ctx->max_scratch_registers)
icculus@329
   746
        ctx->max_scratch_registers = retval + 1;
icculus@329
   747
    return retval;
icculus@329
   748
} // allocate_scratch_register
icculus@329
   749
icculus@382
   750
static int allocate_branch_label(Context *ctx)
icculus@382
   751
{
icculus@382
   752
    return ctx->assigned_branch_labels++;
icculus@382
   753
} // allocate_branch_label
icculus@368
   754
icculus@947
   755
static inline void adjust_token_position(Context *ctx, const int incr)
icculus@947
   756
{
icculus@947
   757
    ctx->tokens += incr;
icculus@947
   758
    ctx->tokencount -= incr;
icculus@947
   759
    ctx->current_position += incr * sizeof (uint32);
icculus@947
   760
} // adjust_token_position
icculus@947
   761
icculus@121
   762
icculus@104
   763
// D3D stuff that's used in more than just the d3d profile...
icculus@104
   764
icculus@1054
   765
static int isscalar(Context *ctx, const MOJOSHADER_shaderType shader_type,
icculus@1054
   766
                    const RegisterType rtype, const int rnum)
icculus@1054
   767
{
icculus@1075
   768
    const int uses_psize = ctx->uses_pointsize;
icculus@1075
   769
    const int uses_fog = ctx->uses_fog;
icculus@1075
   770
    if ( (rtype == REG_TYPE_OUTPUT) && ((uses_psize) || (uses_fog)) )
icculus@1054
   771
    {
icculus@1054
   772
        const RegisterList *reg = reglist_find(&ctx->attributes, rtype, rnum);
icculus@1054
   773
        if (reg != NULL)
icculus@1075
   774
        {
icculus@1075
   775
            const MOJOSHADER_usage usage = reg->usage;
icculus@1075
   776
            return ( (uses_psize && (usage == MOJOSHADER_USAGE_POINTSIZE)) ||
icculus@1075
   777
                     (uses_fog && (usage == MOJOSHADER_USAGE_FOG)) );
icculus@1075
   778
        } // if
icculus@1054
   779
    } // if
icculus@1054
   780
icculus@1054
   781
    return scalar_register(shader_type, rtype, rnum);
icculus@1054
   782
} // isscalar
icculus@1054
   783
icculus@141
   784
static const char swizzle_channels[] = { 'x', 'y', 'z', 'w' };
icculus@141
   785
icculus@141
   786
icculus@104
   787
static const char *usagestrs[] = {
icculus@104
   788
    "_position", "_blendweight", "_blendindices", "_normal", "_psize",
icculus@104
   789
    "_texcoord", "_tangent", "_binormal", "_tessfactor", "_positiont",
icculus@104
   790
    "_color", "_fog", "_depth", "_sample"
icculus@104
   791
};
icculus@104
   792
icculus@24
   793
static const char *get_D3D_register_string(Context *ctx,
icculus@34
   794
                                           RegisterType regtype,
icculus@24
   795
                                           int regnum, char *regnum_str,
icculus@24
   796
                                           size_t regnum_size)
icculus@24
   797
{
icculus@24
   798
    const char *retval = NULL;
icculus@31
   799
    int has_number = 1;
icculus@24
   800
icculus@24
   801
    switch (regtype)
icculus@24
   802
    {
icculus@73
   803
        case REG_TYPE_TEMP:
icculus@24
   804
            retval = "r";
icculus@24
   805
            break;
icculus@24
   806
icculus@73
   807
        case REG_TYPE_INPUT:
icculus@24
   808
            retval = "v";
icculus@24
   809
            break;
icculus@24
   810
icculus@73
   811
        case REG_TYPE_CONST:
icculus@31
   812
            retval = "c";
icculus@31
   813
            break;
icculus@31
   814
icculus@73
   815
        case REG_TYPE_ADDRESS:  // (or REG_TYPE_TEXTURE, same value.)
icculus@151
   816
            retval = shader_is_vertex(ctx) ? "a" : "t";
icculus@24
   817
            break;
icculus@24
   818
icculus@73
   819
        case REG_TYPE_RASTOUT:
icculus@34
   820
            switch ((RastOutType) regnum)
icculus@24
   821
            {
icculus@24
   822
                case RASTOUT_TYPE_POSITION: retval = "oPos"; break;
icculus@24
   823
                case RASTOUT_TYPE_FOG: retval = "oFog"; break;
icculus@24
   824
                case RASTOUT_TYPE_POINT_SIZE: retval = "oPts"; break;
icculus@24
   825
            } // switch
icculus@31
   826
            has_number = 0;
icculus@24
   827
            break;
icculus@24
   828
icculus@73
   829
        case REG_TYPE_ATTROUT:
icculus@24
   830
            retval = "oD";
icculus@24
   831
            break;
icculus@24
   832
icculus@73
   833
        case REG_TYPE_OUTPUT: // (or REG_TYPE_TEXCRDOUT, same value.)
icculus@151
   834
            if (shader_is_vertex(ctx) && shader_version_atleast(ctx, 3, 0))
icculus@24
   835
                retval = "o";
icculus@24
   836
            else
icculus@24
   837
                retval = "oT";
icculus@24
   838
            break;
icculus@24
   839
icculus@73
   840
        case REG_TYPE_CONSTINT:
icculus@24
   841
            retval = "i";
icculus@24
   842
            break;
icculus@24
   843
icculus@73
   844
        case REG_TYPE_COLOROUT:
icculus@24
   845
            retval = "oC";
icculus@24
   846
            break;
icculus@24
   847
icculus@73
   848
        case REG_TYPE_DEPTHOUT:
icculus@24
   849
            retval = "oDepth";
icculus@31
   850
            has_number = 0;
icculus@24
   851
            break;
icculus@24
   852
icculus@73
   853
        case REG_TYPE_SAMPLER:
icculus@24
   854
            retval = "s";
icculus@24
   855
            break;
icculus@24
   856
icculus@73
   857
        case REG_TYPE_CONSTBOOL:
icculus@24
   858
            retval = "b";
icculus@24
   859
            break;
icculus@24
   860
icculus@73
   861
        case REG_TYPE_LOOP:
icculus@24
   862
            retval = "aL";
icculus@31
   863
            has_number = 0;
icculus@24
   864
            break;
icculus@24
   865
icculus@73
   866
        case REG_TYPE_MISCTYPE:
icculus@317
   867
            switch ((const MiscTypeType) regnum)
icculus@24
   868
            {
icculus@24
   869
                case MISCTYPE_TYPE_POSITION: retval = "vPos"; break;
icculus@24
   870
                case MISCTYPE_TYPE_FACE: retval = "vFace"; break;
icculus@24
   871
            } // switch
icculus@31
   872
            has_number = 0;
icculus@24
   873
            break;
icculus@24
   874
icculus@73
   875
        case REG_TYPE_LABEL:
icculus@24
   876
            retval = "l";
icculus@24
   877
            break;
icculus@24
   878
icculus@73
   879
        case REG_TYPE_PREDICATE:
icculus@24
   880
            retval = "p";
icculus@24
   881
            break;
icculus@234
   882
icculus@234
   883
        //case REG_TYPE_TEMPFLOAT16:  // !!! FIXME: don't know this asm string
icculus@234
   884
        default:
icculus@234
   885
            fail(ctx, "unknown register type");
icculus@234
   886
            retval = "???";
icculus@234
   887
            has_number = 0;
icculus@234
   888
            break;
icculus@24
   889
    } // switch
icculus@24
   890
icculus@31
   891
    if (has_number)
icculus@31
   892
        snprintf(regnum_str, regnum_size, "%u", (uint) regnum);
icculus@31
   893
    else
icculus@31
   894
        regnum_str[0] = '\0';
icculus@31
   895
icculus@24
   896
    return retval;
icculus@24
   897
} // get_D3D_register_string
icculus@24
   898
icculus@24
   899
icculus@940
   900
// !!! FIXME: can we split the profile code out to separate source files?
icculus@940
   901
icculus@67
   902
#define AT_LEAST_ONE_PROFILE 0
icculus@67
   903
icculus@67
   904
#if !SUPPORT_PROFILE_D3D
icculus@67
   905
#define PROFILE_EMITTER_D3D(op)
icculus@67
   906
#else
icculus@67
   907
#undef AT_LEAST_ONE_PROFILE
icculus@67
   908
#define AT_LEAST_ONE_PROFILE 1
icculus@67
   909
#define PROFILE_EMITTER_D3D(op) emit_D3D_##op,
icculus@67
   910
icculus@165
   911
static const char *make_D3D_srcarg_string_in_buf(Context *ctx,
icculus@943
   912
                                                 const SourceArgInfo *arg,
icculus@943
   913
                                                 char *buf, size_t buflen)
icculus@139
   914
{
icculus@139
   915
    const char *premod_str = "";
icculus@139
   916
    const char *postmod_str = "";
icculus@139
   917
    switch (arg->src_mod)
icculus@139
   918
    {
icculus@139
   919
        case SRCMOD_NEGATE:
icculus@139
   920
            premod_str = "-";
icculus@139
   921
            break;
icculus@139
   922
icculus@139
   923
        case SRCMOD_BIASNEGATE:
icculus@139
   924
            premod_str = "-";
icculus@139
   925
            // fall through.
icculus@139
   926
        case SRCMOD_BIAS:
icculus@139
   927
            postmod_str = "_bias";
icculus@139
   928
            break;
icculus@139
   929
icculus@139
   930
        case SRCMOD_SIGNNEGATE:
icculus@139
   931
            premod_str = "-";
icculus@139
   932
            // fall through.
icculus@139
   933
        case SRCMOD_SIGN:
icculus@139
   934
            postmod_str = "_bx2";
icculus@139
   935
            break;
icculus@139
   936
icculus@139
   937
        case SRCMOD_COMPLEMENT:
icculus@139
   938
            premod_str = "1-";
icculus@139
   939
            break;
icculus@139
   940
icculus@139
   941
        case SRCMOD_X2NEGATE:
icculus@139
   942
            premod_str = "-";
icculus@139
   943
            // fall through.
icculus@139
   944
        case SRCMOD_X2:
icculus@139
   945
            postmod_str = "_x2";
icculus@139
   946
            break;
icculus@139
   947
icculus@139
   948
        case SRCMOD_DZ:
icculus@139
   949
            postmod_str = "_dz";
icculus@139
   950
            break;
icculus@139
   951
icculus@139
   952
        case SRCMOD_DW:
icculus@139
   953
            postmod_str = "_dw";
icculus@139
   954
            break;
icculus@139
   955
icculus@139
   956
        case SRCMOD_ABSNEGATE:
icculus@139
   957
            premod_str = "-";
icculus@139
   958
            // fall through.
icculus@139
   959
        case SRCMOD_ABS:
icculus@139
   960
            postmod_str = "_abs";
icculus@139
   961
            break;
icculus@139
   962
icculus@139
   963
        case SRCMOD_NOT:
icculus@139
   964
            premod_str = "!";
icculus@139
   965
            break;
icculus@139
   966
icculus@139
   967
        case SRCMOD_NONE:
icculus@139
   968
        case SRCMOD_TOTAL:
icculus@139
   969
             break;  // stop compiler whining.
icculus@139
   970
    } // switch
icculus@139
   971
icculus@139
   972
icculus@139
   973
    char regnum_str[16];
icculus@139
   974
    const char *regtype_str = get_D3D_register_string(ctx, arg->regtype,
icculus@139
   975
                                                      arg->regnum, regnum_str,
icculus@139
   976
                                                      sizeof (regnum_str));
icculus@139
   977
icculus@139
   978
    if (regtype_str == NULL)
icculus@139
   979
    {
icculus@139
   980
        fail(ctx, "Unknown source register type.");
icculus@943
   981
        *buf = '\0';
icculus@943
   982
        return buf;
icculus@139
   983
    } // if
icculus@139
   984
icculus@152
   985
    const char *rel_lbracket = "";
icculus@152
   986
    const char *rel_rbracket = "";
icculus@152
   987
    char rel_swizzle[4] = { '\0' };
icculus@152
   988
    char rel_regnum_str[16] = { '\0' };
icculus@152
   989
    const char *rel_regtype_str = "";
icculus@152
   990
    if (arg->relative)
icculus@152
   991
    {
icculus@152
   992
        rel_swizzle[0] = '.';
icculus@152
   993
        rel_swizzle[1] = swizzle_channels[arg->relative_component];
icculus@152
   994
        rel_swizzle[2] = '\0';
icculus@152
   995
        rel_lbracket = "[";
icculus@152
   996
        rel_rbracket = "]";
icculus@152
   997
        rel_regtype_str = get_D3D_register_string(ctx, arg->relative_regtype,
icculus@152
   998
                                                  arg->relative_regnum,
icculus@152
   999
                                                  rel_regnum_str,
icculus@152
  1000
                                                  sizeof (rel_regnum_str));
icculus@152
  1001
icculus@152
  1002
        if (regtype_str == NULL)
icculus@152
  1003
        {
icculus@152
  1004
            fail(ctx, "Unknown relative source register type.");
icculus@943
  1005
            *buf = '\0';
icculus@943
  1006
            return buf;
icculus@152
  1007
        } // if
icculus@152
  1008
    } // if
icculus@152
  1009
icculus@139
  1010
    char swizzle_str[6];
icculus@798
  1011
    size_t i = 0;
icculus@1054
  1012
    const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
icculus@316
  1013
    if (!scalar && !no_swizzle(arg->swizzle))
icculus@139
  1014
    {
icculus@139
  1015
        swizzle_str[i++] = '.';
icculus@141
  1016
        swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
icculus@141
  1017
        swizzle_str[i++] = swizzle_channels[arg->swizzle_y];
icculus@141
  1018
        swizzle_str[i++] = swizzle_channels[arg->swizzle_z];
icculus@141
  1019
        swizzle_str[i++] = swizzle_channels[arg->swizzle_w];
icculus@139
  1020
icculus@139
  1021
        // .xyzz is the same as .xyz, .z is the same as .zzzz, etc.
icculus@139
  1022
        while (swizzle_str[i-1] == swizzle_str[i-2])
icculus@139
  1023
            i--;
icculus@139
  1024
    } // if
icculus@139
  1025
    swizzle_str[i] = '\0';
icculus@139
  1026
    assert(i < sizeof (swizzle_str));
icculus@139
  1027
icculus@476
  1028
    // !!! FIXME: c12[a0.x] actually needs to be c[a0.x + 12]
icculus@152
  1029
    snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s",
icculus@152
  1030
             premod_str, regtype_str, regnum_str, postmod_str,
icculus@152
  1031
             rel_lbracket, rel_regtype_str, rel_regnum_str, rel_swizzle,
icculus@152
  1032
             rel_rbracket, swizzle_str);
icculus@139
  1033
    // !!! FIXME: make sure the scratch buffer was large enough.
icculus@139
  1034
    return buf;
icculus@165
  1035
} // make_D3D_srcarg_string_in_buf
icculus@139
  1036
icculus@139
  1037
icculus@943
  1038
static const char *make_D3D_destarg_string(Context *ctx, char *buf,
icculus@943
  1039
                                           const size_t buflen)
icculus@161
  1040
{
icculus@161
  1041
    const DestArgInfo *arg = &ctx->dest_arg;
icculus@21
  1042
icculus@21
  1043
    const char *result_shift_str = "";
icculus@21
  1044
    switch (arg->result_shift)
icculus@21
  1045
    {
icculus@21
  1046
        case 0x1: result_shift_str = "_x2"; break;
icculus@21
  1047
        case 0x2: result_shift_str = "_x4"; break;
icculus@21
  1048
        case 0x3: result_shift_str = "_x8"; break;
icculus@21
  1049
        case 0xD: result_shift_str = "_d8"; break;
icculus@21
  1050
        case 0xE: result_shift_str = "_d4"; break;
icculus@21
  1051
        case 0xF: result_shift_str = "_d2"; break;
icculus@21
  1052
    } // switch
icculus@21
  1053
icculus@21
  1054
    const char *sat_str = (arg->result_mod & MOD_SATURATE) ? "_sat" : "";
icculus@21
  1055
    const char *pp_str = (arg->result_mod & MOD_PP) ? "_pp" : "";
icculus@21
  1056
    const char *cent_str = (arg->result_mod & MOD_CENTROID) ? "_centroid" : "";
icculus@21
  1057
icculus@21
  1058
    char regnum_str[16];
icculus@56
  1059
    const char *regtype_str = get_D3D_register_string(ctx, arg->regtype,
icculus@56
  1060
                                                      arg->regnum, regnum_str,
icculus@56
  1061
                                                      sizeof (regnum_str));
icculus@21
  1062
    if (regtype_str == NULL)
icculus@21
  1063
    {
icculus@21
  1064
        fail(ctx, "Unknown destination register type.");
icculus@943
  1065
        *buf = '\0';
icculus@943
  1066
        return buf;
icculus@21
  1067
    } // if
icculus@21
  1068
icculus@27
  1069
    char writemask_str[6];
icculus@798
  1070
    size_t i = 0;
icculus@1054
  1071
    const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
icculus@316
  1072
    if (!scalar && !writemask_xyzw(arg->writemask))
icculus@27
  1073
    {
icculus@27
  1074
        writemask_str[i++] = '.';
icculus@27
  1075
        if (arg->writemask0) writemask_str[i++] = 'x';
icculus@27
  1076
        if (arg->writemask1) writemask_str[i++] = 'y';
icculus@27
  1077
        if (arg->writemask2) writemask_str[i++] = 'z';
icculus@27
  1078
        if (arg->writemask3) writemask_str[i++] = 'w';
icculus@27
  1079
    } // if
icculus@27
  1080
    writemask_str[i] = '\0';
icculus@27
  1081
    assert(i < sizeof (writemask_str));
icculus@21
  1082
icculus@139
  1083
    const char *pred_left = "";
icculus@139
  1084
    const char *pred_right = "";
icculus@139
  1085
    char pred[32] = { '\0' };
icculus@139
  1086
    if (ctx->predicated)
icculus@139
  1087
    {
icculus@139
  1088
        pred_left = "(";
icculus@139
  1089
        pred_right = ") ";
icculus@165
  1090
        make_D3D_srcarg_string_in_buf(ctx, &ctx->predicate_arg,
icculus@943
  1091
                                      pred, sizeof (pred));
icculus@139
  1092
    } // if
icculus@139
  1093
icculus@139
  1094
    // may turn out something like "_x2_sat_pp_centroid (!p0.x) r0.xyzw" ...
icculus@943
  1095
    snprintf(buf, buflen, "%s%s%s%s %s%s%s%s%s%s",
icculus@21
  1096
             result_shift_str, sat_str, pp_str, cent_str,
icculus@139
  1097
             pred_left, pred, pred_right,
icculus@27
  1098
             regtype_str, regnum_str, writemask_str);
icculus@67
  1099
    // !!! FIXME: make sure the scratch buffer was large enough.
icculus@943
  1100
    return buf;
icculus@20
  1101
} // make_D3D_destarg_string
icculus@16
  1102
icculus@16
  1103
icculus@943
  1104
static const char *make_D3D_srcarg_string(Context *ctx, const size_t idx,
icculus@943
  1105
                                          char *buf, size_t buflen)
icculus@17
  1106
{
icculus@20
  1107
    if (idx >= STATICARRAYLEN(ctx->source_args))
icculus@20
  1108
    {
icculus@20
  1109
        fail(ctx, "Too many source args");
icculus@943
  1110
        *buf = '\0';
icculus@943
  1111
        return buf;
icculus@20
  1112
    } // if
icculus@16
  1113
icculus@24
  1114
    const SourceArgInfo *arg = &ctx->source_args[idx];
icculus@943
  1115
    return make_D3D_srcarg_string_in_buf(ctx, arg, buf, buflen);
icculus@165
  1116
} // make_D3D_srcarg_string
icculus@16
  1117
icculus@943
  1118
static const char *get_D3D_varname_in_buf(Context *ctx, RegisterType rt,
icculus@943
  1119
                                           int regnum, char *buf,
icculus@943
  1120
                                           const size_t len)
icculus@347
  1121
{
icculus@347
  1122
    char regnum_str[16];
icculus@347
  1123
    const char *regtype_str = get_D3D_register_string(ctx, rt, regnum,
icculus@347
  1124
                                              regnum_str, sizeof (regnum_str));
icculus@943
  1125
    snprintf(buf,len,"%s%s", regtype_str, regnum_str);
icculus@943
  1126
    return buf;
icculus@943
  1127
} // get_D3D_varname_in_buf
icculus@943
  1128
icculus@943
  1129
icculus@943
  1130
static const char *get_D3D_varname(Context *ctx, RegisterType rt, int regnum)
icculus@943
  1131
{
icculus@943
  1132
    char buf[64];
icculus@943
  1133
    get_D3D_varname_in_buf(ctx, rt, regnum, buf, sizeof (buf));
icculus@943
  1134
    return StrDup(ctx, buf);
icculus@347
  1135
} // get_D3D_varname
icculus@347
  1136
icculus@943
  1137
icculus@402
  1138
static const char *get_D3D_const_array_varname(Context *ctx, int base, int size)
icculus@402
  1139
{
icculus@943
  1140
    char buf[64];
icculus@943
  1141
    snprintf(buf, sizeof (buf), "c_array_%d_%d", base, size);
icculus@943
  1142
    return StrDup(ctx, buf);
icculus@347
  1143
} // get_D3D_const_array_varname
icculus@347
  1144
icculus@347
  1145
icculus@361
  1146
static void emit_D3D_start(Context *ctx, const char *profilestr)
icculus@16
  1147
{
icculus@16
  1148
    const uint major = (uint) ctx->major_ver;
icculus@16
  1149
    const uint minor = (uint) ctx->minor_ver;
icculus@24
  1150
    char minor_str[16];
icculus@24
  1151
icculus@1058
  1152
    ctx->ignores_ctab = 1;
icculus@1058
  1153
icculus@24
  1154
    if (minor == 0xFF)
icculus@24
  1155
        strcpy(minor_str, "sw");
icculus@804
  1156
    else if ((major > 1) && (minor == 1))
icculus@804
  1157
        strcpy(minor_str, "x");  // for >= SM2, apparently this is "x". Weird.
icculus@24
  1158
    else
icculus@24
  1159
        snprintf(minor_str, sizeof (minor_str), "%u", (uint) minor);
icculus@19
  1160
icculus@334
  1161
    output_line(ctx, "%s_%u_%s", ctx->shader_type_str, major, minor_str);
icculus@15
  1162
} // emit_D3D_start
icculus@15
  1163
icculus@17
  1164
icculus@18
  1165
static void emit_D3D_end(Context *ctx)
icculus@15
  1166
{
icculus@30
  1167
    output_line(ctx, "end");
icculus@15
  1168
} // emit_D3D_end
icculus@14
  1169
icculus@17
  1170
icculus@400
  1171
static void emit_D3D_phase(Context *ctx)
icculus@400
  1172
{
icculus@400
  1173
    output_line(ctx, "phase");
icculus@400
  1174
} // emit_D3D_phase
icculus@400
  1175
icculus@400
  1176
icculus@95
  1177
static void emit_D3D_finalize(Context *ctx)
icculus@95
  1178
{
icculus@95
  1179
    // no-op.
icculus@95
  1180
} // emit_D3D_finalize
icculus@95
  1181
icculus@95
  1182
icculus@95
  1183
static void emit_D3D_global(Context *ctx, RegisterType regtype, int regnum)
icculus@95
  1184
{
icculus@95
  1185
    // no-op.
icculus@95
  1186
} // emit_D3D_global
icculus@95
  1187
icculus@95
  1188
icculus@760
  1189
static void emit_D3D_array(Context *ctx, VariableList *var)
icculus@280
  1190
{
icculus@280
  1191
    // no-op.
icculus@402
  1192
} // emit_D3D_array
icculus@402
  1193
icculus@402
  1194
icculus@405
  1195
static void emit_D3D_const_array(Context *ctx, const ConstantsList *clist,
icculus@405
  1196
                                 int base, int size)
icculus@405
  1197
{
icculus@405
  1198
    // no-op.
icculus@405
  1199
} // emit_D3D_const_array
icculus@405
  1200
icculus@405
  1201
icculus@402
  1202
static void emit_D3D_uniform(Context *ctx, RegisterType regtype, int regnum,
icculus@760
  1203
                             const VariableList *var)
icculus@95
  1204
{
icculus@95
  1205
    // no-op.
icculus@95
  1206
} // emit_D3D_uniform
icculus@95
  1207
icculus@95
  1208
icculus@1090
  1209
static void emit_D3D_sampler(Context *ctx, int s, TextureType ttype, int tb)
icculus@148
  1210
{
icculus@148
  1211
    // no-op.
icculus@148
  1212
} // emit_D3D_sampler
icculus@148
  1213
icculus@148
  1214
icculus@104
  1215
static void emit_D3D_attribute(Context *ctx, RegisterType regtype, int regnum,
icculus@431
  1216
                               MOJOSHADER_usage usage, int index, int wmask,
icculus@431
  1217
                               int flags)
icculus@104
  1218
{
icculus@104
  1219
    // no-op.
icculus@104
  1220
} // emit_D3D_attribute
icculus@104
  1221
icculus@104
  1222
icculus@18
  1223
static void emit_D3D_RESERVED(Context *ctx)
icculus@14
  1224
{
icculus@14
  1225
    // do nothing; fails in the state machine.
icculus@14
  1226
} // emit_D3D_RESERVED
icculus@14
  1227
icculus@17
  1228
icculus@17
  1229
// Generic D3D opcode emitters. A list of macros generate all the entry points
icculus@17
  1230
//  that call into these...
icculus@17
  1231
icculus@30
  1232
static char *lowercase(char *dst, const char *src)
icculus@30
  1233
{
icculus@30
  1234
    int i = 0;
icculus@30
  1235
    do
icculus@30
  1236
    {
icculus@30
  1237
        const char ch = src[i];
icculus@30
  1238
        dst[i] = (((ch >= 'A') && (ch <= 'Z')) ? (ch - ('A' - 'a')) : ch);
icculus@30
  1239
    } while (src[i++]);
icculus@30
  1240
    return dst;
icculus@30
  1241
} // lowercase
icculus@30
  1242
icculus@30
  1243
icculus@18
  1244
static void emit_D3D_opcode_d(Context *ctx, const char *opcode)
icculus@17
  1245
{
icculus@943
  1246
    char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
icculus@943
  1247
    opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
icculus@1093
  1248
    output_line(ctx, "%s%s%s", ctx->coissue ? "+" : "", opcode, dst);
icculus@17
  1249
} // emit_D3D_opcode_d
icculus@17
  1250
icculus@17
  1251
icculus@18
  1252
static void emit_D3D_opcode_s(Context *ctx, const char *opcode)
icculus@17
  1253
{
icculus@943
  1254
    char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
icculus@943
  1255
    opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
icculus@1093
  1256
    output_line(ctx, "%s%s %s", ctx->coissue ? "+" : "", opcode, src0);
icculus@17
  1257
} // emit_D3D_opcode_s
icculus@17
  1258
icculus@17
  1259
icculus@18
  1260
static void emit_D3D_opcode_ss(Context *ctx, const char *opcode)
icculus@17
  1261
{
icculus@943
  1262
    char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
icculus@943
  1263
    char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
icculus@943
  1264
    opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
icculus@1093
  1265
    output_line(ctx, "%s%s %s, %s", ctx->coissue ? "+" : "", opcode, src0, src1);
icculus@39
  1266
} // emit_D3D_opcode_ss
icculus@17
  1267
icculus@17
  1268
icculus@18
  1269
static void emit_D3D_opcode_ds(Context *ctx, const char *opcode)
icculus@17
  1270
{
icculus@943
  1271
    char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
icculus@943
  1272
    char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
icculus@943
  1273
    opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
icculus@1093
  1274
    output_line(ctx, "%s%s%s, %s", ctx->coissue ? "+" : "", opcode, dst, src0);
icculus@17
  1275
} // emit_D3D_opcode_ds
icculus@17
  1276
icculus@17
  1277
icculus@18
  1278
static void emit_D3D_opcode_dss(Context *ctx, const char *opcode)
icculus@17
  1279
{
icculus@943
  1280
    char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
icculus@943
  1281
    char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
icculus@943
  1282
    char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
icculus@943
  1283
    opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
icculus@1093
  1284
    output_line(ctx, "%s%s%s, %s, %s", ctx->coissue ? "+" : "",
icculus@1093
  1285
                opcode, dst, src0, src1);
icculus@17
  1286
} // emit_D3D_opcode_dss
icculus@17
  1287
icculus@17
  1288
icculus@18
  1289
static void emit_D3D_opcode_dsss(Context *ctx, const char *opcode)
icculus@17
  1290
{
icculus@943
  1291
    char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
icculus@943
  1292
    char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
icculus@943
  1293
    char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
icculus@943
  1294
    char src2[64]; make_D3D_srcarg_string(ctx, 2, src2, sizeof (src2));
icculus@943
  1295
    opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
icculus@1093
  1296
    output_line(ctx, "%s%s%s, %s, %s, %s", ctx->coissue ? "+" : "", 
icculus@1093
  1297
                opcode, dst, src0, src1, src2);
icculus@17
  1298
} // emit_D3D_opcode_dsss
icculus@17
  1299
icculus@17
  1300
icculus@18
  1301
static void emit_D3D_opcode_dssss(Context *ctx, const char *opcode)
icculus@17
  1302
{
icculus@943
  1303
    char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
icculus@943
  1304
    char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
icculus@943
  1305
    char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
icculus@943
  1306
    char src2[64]; make_D3D_srcarg_string(ctx, 2, src2, sizeof (src2));
icculus@943
  1307
    char src3[64]; make_D3D_srcarg_string(ctx, 3, src3, sizeof (src3));
icculus@943
  1308
    opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
icculus@1093
  1309
    output_line(ctx,"%s%s%s, %s, %s, %s, %s", ctx->coissue ? "+" : "",
icculus@1093
  1310
                opcode, dst, src0, src1, src2, src3);
icculus@17
  1311
} // emit_D3D_opcode_dssss
icculus@17
  1312
icculus@17
  1313
icculus@30
  1314
static void emit_D3D_opcode(Context *ctx, const char *opcode)
icculus@30
  1315
{
icculus@943
  1316
    opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
icculus@1093
  1317
    output_line(ctx, "%s%s", ctx->coissue ? "+" : "", opcode);
icculus@1093
  1318
} // emit_D3D_opcode
icculus@30
  1319
icculus@30
  1320
icculus@17
  1321
#define EMIT_D3D_OPCODE_FUNC(op) \
icculus@18
  1322
    static void emit_D3D_##op(Context *ctx) { \
icculus@30
  1323
        emit_D3D_opcode(ctx, #op); \
icculus@17
  1324
    }
icculus@17
  1325
#define EMIT_D3D_OPCODE_D_FUNC(op) \
icculus@18
  1326
    static void emit_D3D_##op(Context *ctx) { \
icculus@17
  1327
        emit_D3D_opcode_d(ctx, #op); \
icculus@17
  1328
    }
icculus@17
  1329
#define EMIT_D3D_OPCODE_S_FUNC(op) \
icculus@18
  1330
    static void emit_D3D_##op(Context *ctx) { \
icculus@17
  1331
        emit_D3D_opcode_s(ctx, #op); \
icculus@17
  1332
    }
icculus@17
  1333
#define EMIT_D3D_OPCODE_SS_FUNC(op) \
icculus@18
  1334
    static void emit_D3D_##op(Context *ctx) { \
icculus@17
  1335
        emit_D3D_opcode_ss(ctx, #op); \
icculus@17
  1336
    }
icculus@17
  1337
#define EMIT_D3D_OPCODE_DS_FUNC(op) \
icculus@18
  1338
    static void emit_D3D_##op(Context *ctx) { \
icculus@17
  1339
        emit_D3D_opcode_ds(ctx, #op); \
icculus@17
  1340
    }
icculus@17
  1341
#define EMIT_D3D_OPCODE_DSS_FUNC(op) \
icculus@18
  1342
    static void emit_D3D_##op(Context *ctx) { \
icculus@17
  1343
        emit_D3D_opcode_dss(ctx, #op); \
icculus@17
  1344
    }
icculus@17
  1345
#define EMIT_D3D_OPCODE_DSSS_FUNC(op) \
icculus@18
  1346
    static void emit_D3D_##op(Context *ctx) { \
icculus@17
  1347
        emit_D3D_opcode_dsss(ctx, #op); \
icculus@17
  1348
    }
icculus@17
  1349
#define EMIT_D3D_OPCODE_DSSSS_FUNC(op) \
icculus@18
  1350
    static void emit_D3D_##op(Context *ctx) { \
icculus@17
  1351
        emit_D3D_opcode_dssss(ctx, #op); \
icculus@17
  1352
    }
icculus@17
  1353
icculus@17
  1354
EMIT_D3D_OPCODE_FUNC(NOP)
icculus@17
  1355
EMIT_D3D_OPCODE_DS_FUNC(MOV)
icculus@17
  1356
EMIT_D3D_OPCODE_DSS_FUNC(ADD)
icculus@17
  1357
EMIT_D3D_OPCODE_DSS_FUNC(SUB)
icculus@17
  1358
EMIT_D3D_OPCODE_DSSS_FUNC(MAD)
icculus@17
  1359
EMIT_D3D_OPCODE_DSS_FUNC(MUL)
icculus@17
  1360
EMIT_D3D_OPCODE_DS_FUNC(RCP)
icculus@17
  1361
EMIT_D3D_OPCODE_DS_FUNC(RSQ)
icculus@17
  1362
EMIT_D3D_OPCODE_DSS_FUNC(DP3)
icculus@17
  1363
EMIT_D3D_OPCODE_DSS_FUNC(DP4)
icculus@17
  1364
EMIT_D3D_OPCODE_DSS_FUNC(MIN)
icculus@17
  1365
EMIT_D3D_OPCODE_DSS_FUNC(MAX)
icculus@17
  1366
EMIT_D3D_OPCODE_DSS_FUNC(SLT)
icculus@17
  1367
EMIT_D3D_OPCODE_DSS_FUNC(SGE)
icculus@17
  1368
EMIT_D3D_OPCODE_DS_FUNC(EXP)
icculus@17
  1369
EMIT_D3D_OPCODE_DS_FUNC(LOG)
icculus@17
  1370
EMIT_D3D_OPCODE_DS_FUNC(LIT)
icculus@17
  1371
EMIT_D3D_OPCODE_DSS_FUNC(DST)
icculus@17
  1372
EMIT_D3D_OPCODE_DSSS_FUNC(LRP)
icculus@17
  1373
EMIT_D3D_OPCODE_DS_FUNC(FRC)
icculus@17
  1374
EMIT_D3D_OPCODE_DSS_FUNC(M4X4)
icculus@17
  1375
EMIT_D3D_OPCODE_DSS_FUNC(M4X3)
icculus@17
  1376
EMIT_D3D_OPCODE_DSS_FUNC(M3X4)
icculus@17
  1377
EMIT_D3D_OPCODE_DSS_FUNC(M3X3)
icculus@17
  1378
EMIT_D3D_OPCODE_DSS_FUNC(M3X2)
icculus@17
  1379
EMIT_D3D_OPCODE_S_FUNC(CALL)
icculus@17
  1380
EMIT_D3D_OPCODE_SS_FUNC(CALLNZ)
icculus@20
  1381
EMIT_D3D_OPCODE_SS_FUNC(LOOP)
icculus@17
  1382
EMIT_D3D_OPCODE_FUNC(RET)
icculus@17
  1383
EMIT_D3D_OPCODE_FUNC(ENDLOOP)
icculus@17
  1384
EMIT_D3D_OPCODE_S_FUNC(LABEL)
icculus@17
  1385
EMIT_D3D_OPCODE_DSS_FUNC(POW)
icculus@17
  1386
EMIT_D3D_OPCODE_DSS_FUNC(CRS)
icculus@17
  1387
EMIT_D3D_OPCODE_DSSS_FUNC(SGN)
icculus@17
  1388
EMIT_D3D_OPCODE_DS_FUNC(ABS)
icculus@17
  1389
EMIT_D3D_OPCODE_DS_FUNC(NRM)
icculus@17
  1390
EMIT_D3D_OPCODE_S_FUNC(REP)
icculus@17
  1391
EMIT_D3D_OPCODE_FUNC(ENDREP)
icculus@17
  1392
EMIT_D3D_OPCODE_S_FUNC(IF)
icculus@17
  1393
EMIT_D3D_OPCODE_FUNC(ELSE)
icculus@17
  1394
EMIT_D3D_OPCODE_FUNC(ENDIF)
icculus@17
  1395
EMIT_D3D_OPCODE_FUNC(BREAK)
icculus@17
  1396
EMIT_D3D_OPCODE_DS_FUNC(MOVA)
icculus@17
  1397
EMIT_D3D_OPCODE_D_FUNC(TEXKILL)
icculus@17
  1398
EMIT_D3D_OPCODE_DS_FUNC(TEXBEM)
icculus@17
  1399
EMIT_D3D_OPCODE_DS_FUNC(TEXBEML)
icculus@17
  1400
EMIT_D3D_OPCODE_DS_FUNC(TEXREG2AR)
icculus@17
  1401
EMIT_D3D_OPCODE_DS_FUNC(TEXREG2GB)
icculus@17
  1402
EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2PAD)
icculus@17
  1403
EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2TEX)
icculus@17
  1404
EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3PAD)
icculus@17
  1405
EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3TEX)
icculus@17
  1406
EMIT_D3D_OPCODE_DSS_FUNC(TEXM3X3SPEC)
icculus@17
  1407
EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3VSPEC)
icculus@17
  1408
EMIT_D3D_OPCODE_DS_FUNC(EXPP)
icculus@17
  1409
EMIT_D3D_OPCODE_DS_FUNC(LOGP)
icculus@17
  1410
EMIT_D3D_OPCODE_DSSS_FUNC(CND)
icculus@17
  1411
EMIT_D3D_OPCODE_DS_FUNC(TEXREG2RGB)
icculus@17
  1412
EMIT_D3D_OPCODE_DS_FUNC(TEXDP3TEX)
icculus@17
  1413
EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2DEPTH)
icculus@17
  1414
EMIT_D3D_OPCODE_DS_FUNC(TEXDP3)
icculus@17
  1415
EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3)
icculus@17
  1416
EMIT_D3D_OPCODE_D_FUNC(TEXDEPTH)
icculus@17
  1417
EMIT_D3D_OPCODE_DSSS_FUNC(CMP)
icculus@17
  1418
EMIT_D3D_OPCODE_DSS_FUNC(BEM)
icculus@17
  1419
EMIT_D3D_OPCODE_DSSS_FUNC(DP2ADD)
icculus@17
  1420
EMIT_D3D_OPCODE_DS_FUNC(DSX)
icculus@17
  1421
EMIT_D3D_OPCODE_DS_FUNC(DSY)
icculus@17
  1422
EMIT_D3D_OPCODE_DSSSS_FUNC(TEXLDD)
icculus@17
  1423
EMIT_D3D_OPCODE_DSS_FUNC(TEXLDL)
icculus@20
  1424
EMIT_D3D_OPCODE_S_FUNC(BREAKP)
icculus@17
  1425
icculus@28
  1426
// special cases for comparison opcodes...
icculus@28
  1427
static const char *get_D3D_comparison_string(Context *ctx)
icculus@28
  1428
{
icculus@28
  1429
    static const char *comps[] = {
icculus@28
  1430
        "", "_gt", "_eq", "_ge", "_lt", "_ne", "_le"
icculus@28
  1431
    };
icculus@28
  1432
icculus@28
  1433
    if (ctx->instruction_controls >= STATICARRAYLEN(comps))
icculus@28
  1434
    {
icculus@28
  1435
        fail(ctx, "unknown comparison control");
icculus@28
  1436
        return "";
icculus@28
  1437
    } // if
icculus@28
  1438
icculus@28
  1439
    return comps[ctx->instruction_controls];
icculus@28
  1440
} // get_D3D_comparison_string
icculus@28
  1441
icculus@28
  1442
static void emit_D3D_BREAKC(Context *ctx)
icculus@28
  1443
{
icculus@28
  1444
    char op[16];
icculus@30
  1445
    snprintf(op, sizeof (op), "break%s", get_D3D_comparison_string(ctx));
icculus@28
  1446
    emit_D3D_opcode_ss(ctx, op);
icculus@28
  1447
} // emit_D3D_BREAKC
icculus@28
  1448
icculus@28
  1449
static void emit_D3D_IFC(Context *ctx)
icculus@28
  1450
{
icculus@28
  1451
    char op[16];
icculus@30
  1452
    snprintf(op, sizeof (op), "if%s", get_D3D_comparison_string(ctx));
icculus@28
  1453
    emit_D3D_opcode_ss(ctx, op);
icculus@28
  1454
} // emit_D3D_IFC
icculus@28
  1455
icculus@28
  1456
static void emit_D3D_SETP(Context *ctx)
icculus@28
  1457
{
icculus@28
  1458
    char op[16];
icculus@30
  1459
    snprintf(op, sizeof (op), "setp%s", get_D3D_comparison_string(ctx));
icculus@28
  1460
    emit_D3D_opcode_dss(ctx, op);
icculus@28
  1461
} // emit_D3D_SETP
icculus@28
  1462
icculus@31
  1463
static void emit_D3D_DEF(Context *ctx)
icculus@31
  1464
{
icculus@943
  1465
    char dst[64];
icculus@943
  1466
    make_D3D_destarg_string(ctx, dst, sizeof (dst));
icculus@33
  1467
    const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
icculus@33
  1468
    char val0[32];
icculus@33
  1469
    char val1[32];
icculus@33
  1470
    char val2[32];
icculus@33
  1471
    char val3[32];
icculus@72
  1472
    floatstr(ctx, val0, sizeof (val0), val[0], 0);
icculus@72
  1473
    floatstr(ctx, val1, sizeof (val1), val[1], 0);
icculus@72
  1474
    floatstr(ctx, val2, sizeof (val2), val[2], 0);
icculus@72
  1475
    floatstr(ctx, val3, sizeof (val3), val[3], 0);
icculus@943
  1476
    output_line(ctx, "def%s, %s, %s, %s, %s", dst, val0, val1, val2, val3);
icculus@31
  1477
} // emit_D3D_DEF
icculus@31
  1478
icculus@31
  1479
static void emit_D3D_DEFI(Context *ctx)
icculus@31
  1480
{
icculus@943
  1481
    char dst[64];
icculus@943
  1482
    make_D3D_destarg_string(ctx, dst, sizeof (dst));
icculus@31
  1483
    const int32 *x = (const int32 *) ctx->dwords;
icculus@943
  1484
    output_line(ctx, "defi%s, %d, %d, %d, %d", dst,
icculus@31
  1485
                (int) x[0], (int) x[1], (int) x[2], (int) x[3]);
icculus@31
  1486
} // emit_D3D_DEFI
icculus@31
  1487
icculus@31
  1488
static void emit_D3D_DEFB(Context *ctx)
icculus@31
  1489
{
icculus@943
  1490
    char dst[64];
icculus@943
  1491
    make_D3D_destarg_string(ctx, dst, sizeof (dst));
icculus@943
  1492
    output_line(ctx, "defb%s, %s", dst, ctx->dwords[0] ? "true" : "false");
icculus@31
  1493
} // emit_D3D_DEFB
icculus@31
  1494
icculus@31
  1495
icculus@31
  1496
static void emit_D3D_DCL(Context *ctx)
icculus@31
  1497
{
icculus@943
  1498
    char dst[64];
icculus@943
  1499
    make_D3D_destarg_string(ctx, dst, sizeof (dst));
icculus@161
  1500
    const DestArgInfo *arg = &ctx->dest_arg;
icculus@31
  1501
    const char *usage_str = "";
icculus@31
  1502
    char index_str[16] = { '\0' };
icculus@31
  1503
icculus@154
  1504
    if (arg->regtype == REG_TYPE_SAMPLER)
icculus@154
  1505
    {
icculus@318
  1506
        switch ((const TextureType) ctx->dwords[0])
icculus@154
  1507
        {
icculus@154
  1508
            case TEXTURE_TYPE_2D: usage_str = "_2d"; break;
icculus@154
  1509
            case TEXTURE_TYPE_CUBE: usage_str = "_cube"; break;
icculus@154
  1510
            case TEXTURE_TYPE_VOLUME: usage_str = "_volume"; break;
icculus@154
  1511
            default: fail(ctx, "unknown sampler texture type"); return;
icculus@154
  1512
        } // switch
icculus@154
  1513
    } // if
icculus@154
  1514
icculus@318
  1515
    else if (arg->regtype == REG_TYPE_MISCTYPE)
icculus@318
  1516
    {
icculus@318
  1517
        switch ((const MiscTypeType) arg->regnum)
icculus@318
  1518
        {
icculus@318
  1519
            case MISCTYPE_TYPE_POSITION:
icculus@318
  1520
            case MISCTYPE_TYPE_FACE:
icculus@318
  1521
                usage_str = "";  // just become "dcl vFace" or whatever.
icculus@318
  1522
                break;
icculus@318
  1523
            default: fail(ctx, "unknown misc register type"); return;
icculus@318
  1524
        } // switch
icculus@318
  1525
    } // else if
icculus@318
  1526
icculus@154
  1527
    else
icculus@31
  1528
    {
icculus@151
  1529
        const uint32 usage = ctx->dwords[0];
icculus@31
  1530
        const uint32 index = ctx->dwords[1];
icculus@31
  1531
        usage_str = usagestrs[usage];
icculus@31
  1532
        if (index != 0)
icculus@31
  1533
            snprintf(index_str, sizeof (index_str), "%u", (uint) index);
icculus@154
  1534
    } // else
icculus@31
  1535
icculus@943
  1536
    output_line(ctx, "dcl%s%s%s", usage_str, index_str, dst);
icculus@31
  1537
} // emit_D3D_DCL
icculus@31
  1538
icculus@28
  1539
icculus@288
  1540
static void emit_D3D_TEXCRD(Context *ctx)
icculus@43
  1541
{
icculus@43
  1542
    // this opcode looks and acts differently depending on the shader model.
icculus@43
  1543
    if (shader_version_atleast(ctx, 1, 4))
icculus@43
  1544
        emit_D3D_opcode_ds(ctx, "texcrd");
icculus@43
  1545
    else
icculus@43
  1546
        emit_D3D_opcode_d(ctx, "texcoord");
icculus@43
  1547
} // emit_D3D_TEXCOORD
icculus@43
  1548
icculus@288
  1549
static void emit_D3D_TEXLD(Context *ctx)
icculus@43
  1550
{
icculus@43
  1551
    // this opcode looks and acts differently depending on the shader model.
icculus@289
  1552
    if (shader_version_atleast(ctx, 2, 0))
icculus@518
  1553
    {
icculus@519
  1554
        if (ctx->instruction_controls == CONTROL_TEXLD)
icculus@518
  1555
           emit_D3D_opcode_dss(ctx, "texld");
icculus@519
  1556
        else if (ctx->instruction_controls == CONTROL_TEXLDP)
icculus@518
  1557
           emit_D3D_opcode_dss(ctx, "texldp");
icculus@519
  1558
        else if (ctx->instruction_controls == CONTROL_TEXLDB)
icculus@518
  1559
           emit_D3D_opcode_dss(ctx, "texldb");
icculus@518
  1560
    } // if
icculus@518
  1561
icculus@289
  1562
    else if (shader_version_atleast(ctx, 1, 4))
icculus@518
  1563
    {
icculus@289
  1564
        emit_D3D_opcode_ds(ctx, "texld");
icculus@518
  1565
    } // else if
icculus@518
  1566
icculus@43
  1567
    else
icculus@518
  1568
    {
icculus@288
  1569
        emit_D3D_opcode_d(ctx, "tex");
icculus@518
  1570
    } // else
icculus@288
  1571
} // emit_D3D_TEXLD
icculus@43
  1572
icculus@120
  1573
static void emit_D3D_SINCOS(Context *ctx)
icculus@120
  1574
{
icculus@120
  1575
    // this opcode needs extra registers for sm2 and lower.
icculus@120
  1576
    if (!shader_version_atleast(ctx, 3, 0))
icculus@120
  1577
        emit_D3D_opcode_dsss(ctx, "sincos");
icculus@120
  1578
    else
icculus@120
  1579
        emit_D3D_opcode_ds(ctx, "sincos");
icculus@120
  1580
} // emit_D3D_SINCOS
icculus@120
  1581
icculus@43
  1582
icculus@17
  1583
#undef EMIT_D3D_OPCODE_FUNC
icculus@17
  1584
#undef EMIT_D3D_OPCODE_D_FUNC
icculus@17
  1585
#undef EMIT_D3D_OPCODE_S_FUNC
icculus@17
  1586
#undef EMIT_D3D_OPCODE_SS_FUNC
icculus@17
  1587
#undef EMIT_D3D_OPCODE_DS_FUNC
icculus@17
  1588
#undef EMIT_D3D_OPCODE_DSS_FUNC
icculus@17
  1589
#undef EMIT_D3D_OPCODE_DSSS_FUNC
icculus@17
  1590
#undef EMIT_D3D_OPCODE_DSSSS_FUNC
icculus@17
  1591
icculus@14
  1592
#endif  // SUPPORT_PROFILE_D3D
icculus@14
  1593
icculus@14
  1594
icculus@469
  1595
#if !SUPPORT_PROFILE_BYTECODE
icculus@469
  1596
#define PROFILE_EMITTER_BYTECODE(op)
icculus@109
  1597
#else
icculus@109
  1598
#undef AT_LEAST_ONE_PROFILE
icculus@109
  1599
#define AT_LEAST_ONE_PROFILE 1
icculus@469
  1600
#define PROFILE_EMITTER_BYTECODE(op) emit_BYTECODE_##op,
icculus@469
  1601
icculus@469
  1602
static void emit_BYTECODE_start(Context *ctx, const char *profilestr)
icculus@109
  1603
{
icculus@1058
  1604
    ctx->ignores_ctab = 1;
icculus@1143
  1605
} // emit_BYTECODE_start
icculus@1143
  1606
icculus@1143
  1607
static void emit_BYTECODE_finalize(Context *ctx)
icculus@1143
  1608
{
icculus@109
  1609
    // just copy the whole token stream and make all other emitters no-ops.
icculus@944
  1610
    if (set_output(ctx, &ctx->mainline))
icculus@944
  1611
    {
icculus@1143
  1612
        const size_t len = ((size_t) (ctx->tokens - ctx->orig_tokens)) * sizeof (uint32);
mischanix@1148
  1613
        buffer_append(ctx->mainline, (const char *) ctx->orig_tokens, len);
icculus@944
  1614
    } // if
icculus@1143
  1615
} // emit_BYTECODE_finalize
icculus@469
  1616
icculus@944
  1617
static void emit_BYTECODE_end(Context *ctx) {}
icculus@469
  1618
static void emit_BYTECODE_phase(Context *ctx) {}
icculus@469
  1619
static void emit_BYTECODE_global(Context *ctx, RegisterType t, int n) {}
icculus@760
  1620
static void emit_BYTECODE_array(Context *ctx, VariableList *var) {}
icculus@1090
  1621
static void emit_BYTECODE_sampler(Context *c, int s, TextureType t, int tb) {}
icculus@469
  1622
static void emit_BYTECODE_const_array(Context *ctx, const ConstantsList *c,
icculus@405
  1623
                                         int base, int size) {}
icculus@469
  1624
static void emit_BYTECODE_uniform(Context *ctx, RegisterType t, int n,
icculus@760
  1625
                                  const VariableList *var) {}
icculus@469
  1626
static void emit_BYTECODE_attribute(Context *ctx, RegisterType t, int n,
icculus@431
  1627
                                       MOJOSHADER_usage u, int i, int w,
icculus@431
  1628
                                       int f) {}
icculus@116
  1629
icculus@469
  1630
static const char *get_BYTECODE_varname(Context *ctx, RegisterType rt, int regnum)
icculus@347
  1631
{
icculus@347
  1632
    char regnum_str[16];
icculus@347
  1633
    const char *regtype_str = get_D3D_register_string(ctx, rt, regnum,
icculus@347
  1634
                                              regnum_str, sizeof (regnum_str));
icculus@943
  1635
    char buf[64];
icculus@943
  1636
    snprintf(buf, sizeof (buf), "%s%s", regtype_str, regnum_str);
icculus@943
  1637
    return StrDup(ctx, buf);
icculus@469
  1638
} // get_BYTECODE_varname
icculus@469
  1639
icculus@469
  1640
static const char *get_BYTECODE_const_array_varname(Context *ctx, int base, int size)
icculus@402
  1641
{
icculus@943
  1642
    char buf[64];
icculus@943
  1643
    snprintf(buf, sizeof (buf), "c_array_%d_%d", base, size);
icculus@943
  1644
    return StrDup(ctx, buf);
icculus@469
  1645
} // get_BYTECODE_const_array_varname
icculus@469
  1646
icculus@469
  1647
#define EMIT_BYTECODE_OPCODE_FUNC(op) \
icculus@469
  1648
    static void emit_BYTECODE_##op(Context *ctx) {}
icculus@469
  1649
icculus@469
  1650
EMIT_BYTECODE_OPCODE_FUNC(RESERVED)
icculus@469
  1651
EMIT_BYTECODE_OPCODE_FUNC(NOP)
icculus@469
  1652
EMIT_BYTECODE_OPCODE_FUNC(MOV)
icculus@469
  1653
EMIT_BYTECODE_OPCODE_FUNC(ADD)
icculus@469
  1654
EMIT_BYTECODE_OPCODE_FUNC(SUB)
icculus@469
  1655
EMIT_BYTECODE_OPCODE_FUNC(MAD)
icculus@469
  1656
EMIT_BYTECODE_OPCODE_FUNC(MUL)
icculus@469
  1657
EMIT_BYTECODE_OPCODE_FUNC(RCP)
icculus@469
  1658
EMIT_BYTECODE_OPCODE_FUNC(RSQ)
icculus@469
  1659
EMIT_BYTECODE_OPCODE_FUNC(DP3)
icculus@469
  1660
EMIT_BYTECODE_OPCODE_FUNC(DP4)
icculus@469
  1661
EMIT_BYTECODE_OPCODE_FUNC(MIN)
icculus@469
  1662
EMIT_BYTECODE_OPCODE_FUNC(MAX)
icculus@469
  1663
EMIT_BYTECODE_OPCODE_FUNC(SLT)
icculus@469
  1664
EMIT_BYTECODE_OPCODE_FUNC(SGE)
icculus@469
  1665
EMIT_BYTECODE_OPCODE_FUNC(EXP)
icculus@469
  1666
EMIT_BYTECODE_OPCODE_FUNC(LOG)
icculus@469
  1667
EMIT_BYTECODE_OPCODE_FUNC(LIT)
icculus@469
  1668
EMIT_BYTECODE_OPCODE_FUNC(DST)
icculus@469
  1669
EMIT_BYTECODE_OPCODE_FUNC(LRP)
icculus@469
  1670
EMIT_BYTECODE_OPCODE_FUNC(FRC)
icculus@469
  1671
EMIT_BYTECODE_OPCODE_FUNC(M4X4)
icculus@469
  1672
EMIT_BYTECODE_OPCODE_FUNC(M4X3)
icculus@469
  1673
EMIT_BYTECODE_OPCODE_FUNC(M3X4)
icculus@469
  1674
EMIT_BYTECODE_OPCODE_FUNC(M3X3)
icculus@469
  1675
EMIT_BYTECODE_OPCODE_FUNC(M3X2)
icculus@469
  1676
EMIT_BYTECODE_OPCODE_FUNC(CALL)
icculus@469
  1677
EMIT_BYTECODE_OPCODE_FUNC(CALLNZ)
icculus@469
  1678
EMIT_BYTECODE_OPCODE_FUNC(LOOP)
icculus@469
  1679
EMIT_BYTECODE_OPCODE_FUNC(RET)
icculus@469
  1680
EMIT_BYTECODE_OPCODE_FUNC(ENDLOOP)
icculus@469
  1681
EMIT_BYTECODE_OPCODE_FUNC(LABEL)
icculus@469
  1682
EMIT_BYTECODE_OPCODE_FUNC(POW)
icculus@469
  1683
EMIT_BYTECODE_OPCODE_FUNC(CRS)
icculus@469
  1684
EMIT_BYTECODE_OPCODE_FUNC(SGN)
icculus@469
  1685
EMIT_BYTECODE_OPCODE_FUNC(ABS)
icculus@469
  1686
EMIT_BYTECODE_OPCODE_FUNC(NRM)
icculus@469
  1687
EMIT_BYTECODE_OPCODE_FUNC(SINCOS)
icculus@469
  1688
EMIT_BYTECODE_OPCODE_FUNC(REP)
icculus@469
  1689
EMIT_BYTECODE_OPCODE_FUNC(ENDREP)
icculus@469
  1690
EMIT_BYTECODE_OPCODE_FUNC(IF)
icculus@469
  1691
EMIT_BYTECODE_OPCODE_FUNC(ELSE)
icculus@469
  1692
EMIT_BYTECODE_OPCODE_FUNC(ENDIF)
icculus@469
  1693
EMIT_BYTECODE_OPCODE_FUNC(BREAK)
icculus@469
  1694
EMIT_BYTECODE_OPCODE_FUNC(MOVA)
icculus@469
  1695
EMIT_BYTECODE_OPCODE_FUNC(TEXKILL)
icculus@469
  1696
EMIT_BYTECODE_OPCODE_FUNC(TEXBEM)
icculus@469
  1697
EMIT_BYTECODE_OPCODE_FUNC(TEXBEML)
icculus@469
  1698
EMIT_BYTECODE_OPCODE_FUNC(TEXREG2AR)
icculus@469
  1699
EMIT_BYTECODE_OPCODE_FUNC(TEXREG2GB)
icculus@469
  1700
EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2PAD)
icculus@469
  1701
EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2TEX)
icculus@469
  1702
EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3PAD)
icculus@469
  1703
EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3TEX)
icculus@469
  1704
EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3SPEC)
icculus@469
  1705
EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3VSPEC)
icculus@469
  1706
EMIT_BYTECODE_OPCODE_FUNC(EXPP)
icculus@469
  1707
EMIT_BYTECODE_OPCODE_FUNC(LOGP)
icculus@469
  1708
EMIT_BYTECODE_OPCODE_FUNC(CND)
icculus@469
  1709
EMIT_BYTECODE_OPCODE_FUNC(TEXREG2RGB)
icculus@469
  1710
EMIT_BYTECODE_OPCODE_FUNC(TEXDP3TEX)
icculus@469
  1711
EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2DEPTH)
icculus@469
  1712
EMIT_BYTECODE_OPCODE_FUNC(TEXDP3)
icculus@469
  1713
EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3)
icculus@469
  1714
EMIT_BYTECODE_OPCODE_FUNC(TEXDEPTH)
icculus@469
  1715
EMIT_BYTECODE_OPCODE_FUNC(CMP)
icculus@469
  1716
EMIT_BYTECODE_OPCODE_FUNC(BEM)
icculus@469
  1717
EMIT_BYTECODE_OPCODE_FUNC(DP2ADD)
icculus@469
  1718
EMIT_BYTECODE_OPCODE_FUNC(DSX)
icculus@469
  1719
EMIT_BYTECODE_OPCODE_FUNC(DSY)
icculus@469
  1720
EMIT_BYTECODE_OPCODE_FUNC(TEXLDD)
icculus@469
  1721
EMIT_BYTECODE_OPCODE_FUNC(TEXLDL)
icculus@469
  1722
EMIT_BYTECODE_OPCODE_FUNC(BREAKP)
icculus@469
  1723
EMIT_BYTECODE_OPCODE_FUNC(BREAKC)
icculus@469
  1724
EMIT_BYTECODE_OPCODE_FUNC(IFC)
icculus@469
  1725
EMIT_BYTECODE_OPCODE_FUNC(SETP)
icculus@469
  1726
EMIT_BYTECODE_OPCODE_FUNC(DEF)
icculus@469
  1727
EMIT_BYTECODE_OPCODE_FUNC(DEFI)
icculus@469
  1728
EMIT_BYTECODE_OPCODE_FUNC(DEFB)
icculus@469
  1729
EMIT_BYTECODE_OPCODE_FUNC(DCL)
icculus@469
  1730
EMIT_BYTECODE_OPCODE_FUNC(TEXCRD)
icculus@469
  1731
EMIT_BYTECODE_OPCODE_FUNC(TEXLD)
icculus@469
  1732
icculus@469
  1733
#undef EMIT_BYTECODE_OPCODE_FUNC
icculus@469
  1734
icculus@469
  1735
#endif  // SUPPORT_PROFILE_BYTECODE
icculus@109
  1736
icculus@14
  1737
icculus@14
  1738
#if !SUPPORT_PROFILE_GLSL
icculus@15
  1739
#define PROFILE_EMITTER_GLSL(op)
icculus@14
  1740
#else
icculus@14
  1741
#undef AT_LEAST_ONE_PROFILE
icculus@14
  1742
#define AT_LEAST_ONE_PROFILE 1
icculus@15
  1743
#define PROFILE_EMITTER_GLSL(op) emit_GLSL_##op,
icculus@15
  1744
icculus@360
  1745
#define EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(op) \
icculus@360
  1746
    static void emit_GLSL_##op(Context *ctx) { \
icculus@360
  1747
        fail(ctx, #op " unimplemented in glsl profile"); \
icculus@360
  1748
    }
icculus@360
  1749
icculus@943
  1750
static inline const char *get_GLSL_register_string(Context *ctx,
icculus@943
  1751
                        const RegisterType regtype, const int regnum,
icculus@943
  1752
                        char *regnum_str, const size_t regnum_size)
icculus@943
  1753
{
icculus@943
  1754
    // turns out these are identical at the moment.
icculus@943
  1755
    return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size);
icculus@67
  1756
} // get_GLSL_register_string
icculus@67
  1757
icculus@760
  1758
static const char *get_GLSL_uniform_type(Context *ctx, const RegisterType rtype)
icculus@760
  1759
{
icculus@760
  1760
    switch (rtype)
icculus@760
  1761
    {
icculus@760
  1762
        case REG_TYPE_CONST: return "vec4";
icculus@760
  1763
        case REG_TYPE_CONSTINT: return "ivec4";
icculus@762
  1764
        case REG_TYPE_CONSTBOOL: return "bool";
icculus@760
  1765
        default: fail(ctx, "BUG: used a uniform we don't know how to define.");
icculus@760
  1766
    } // switch
icculus@760
  1767
icculus@760
  1768
    return NULL;
icculus@760
  1769
} // get_GLSL_uniform_type
icculus@760
  1770
icculus@943
  1771
static const char *get_GLSL_varname_in_buf(Context *ctx, RegisterType rt,
icculus@943
  1772
                                           int regnum, char *buf,
icculus@943
  1773
                                           const size_t len)
icculus@104
  1774
{
icculus@104
  1775
    char regnum_str[16];
icculus@104
  1776
    const char *regtype_str = get_GLSL_register_string(ctx, rt, regnum,
icculus@104
  1777
                                              regnum_str, sizeof (regnum_str));
icculus@943
  1778
    snprintf(buf,len,"%s_%s%s", ctx->shader_type_str, regtype_str, regnum_str);
icculus@943
  1779
    return buf;
icculus@943
  1780
} // get_GLSL_varname_in_buf
icculus@943
  1781
icculus@943
  1782
icculus@943
  1783
static const char *get_GLSL_varname(Context *ctx, RegisterType rt, int regnum)
icculus@943
  1784
{
icculus@943
  1785
    char buf[64];
icculus@943
  1786
    get_GLSL_varname_in_buf(ctx, rt, regnum, buf, sizeof (buf));
icculus@943
  1787
    return StrDup(ctx, buf);
icculus@104
  1788
} // get_GLSL_varname
icculus@104
  1789
icculus@280
  1790
icculus@943
  1791
static inline const char *get_GLSL_const_array_varname_in_buf(Context *ctx,
icculus@943
  1792
                                                const int base, const int size,
icculus@943
  1793
                                                char *buf, const size_t buflen)
icculus@943
  1794
{
icculus@943
  1795
    const char *type = ctx->shader_type_str;
icculus@943
  1796
    snprintf(buf, buflen, "%s_const_array_%d_%d", type, base, size);
icculus@943
  1797
    return buf;
icculus@943
  1798
} // get_GLSL_const_array_varname_in_buf
icculus@943
  1799
icculus@402
  1800
static const char *get_GLSL_const_array_varname(Context *ctx, int base, int size)
icculus@280
  1801
{
icculus@943
  1802
    char buf[64];
icculus@943
  1803
    get_GLSL_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf));
icculus@943
  1804
    return StrDup(ctx, buf);
icculus@280
  1805
} // get_GLSL_const_array_varname
icculus@280
  1806
icculus@280
  1807
icculus@943
  1808
static inline const char *get_GLSL_input_array_varname(Context *ctx,
icculus@943
  1809
                                                char *buf, const size_t buflen)
icculus@943
  1810
{
icculus@943
  1811
    snprintf(buf, buflen, "%s", "vertex_input_array");
icculus@943
  1812
    return buf;
icculus@531
  1813
} // get_GLSL_input_array_varname
icculus@531
  1814
icculus@531
  1815
icculus@760
  1816
static const char *get_GLSL_uniform_array_varname(Context *ctx,
icculus@943
  1817
                                                  const RegisterType regtype,
icculus@943
  1818
                                                  char *buf, const size_t len)
icculus@943
  1819
{
icculus@760
  1820
    const char *shadertype = ctx->shader_type_str;
icculus@760
  1821
    const char *type = get_GLSL_uniform_type(ctx, regtype);
icculus@943
  1822
    snprintf(buf, len, "%s_uniforms_%s", shadertype, type);
icculus@943
  1823
    return buf;
icculus@760
  1824
} // get_GLSL_uniform_array_varname
icculus@760
  1825
icculus@943
  1826
static const char *get_GLSL_destarg_varname(Context *ctx, char *buf, size_t len)
icculus@161
  1827
{
icculus@161
  1828
    const DestArgInfo *arg = &ctx->dest_arg;
icculus@943
  1829
    return get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len);
icculus@67
  1830
} // get_GLSL_destarg_varname
icculus@67
  1831
icculus@943
  1832
static const char *get_GLSL_srcarg_varname(Context *ctx, const size_t idx,
icculus@943
  1833
                                           char *buf, size_t len)
icculus@118
  1834
{
icculus@118
  1835
    if (idx >= STATICARRAYLEN(ctx->source_args))
icculus@118
  1836
    {
icculus@118
  1837
        fail(ctx, "Too many source args");
icculus@943
  1838
        *buf = '\0';
icculus@943
  1839
        return buf;
icculus@118
  1840
    } // if
icculus@118
  1841
icculus@118
  1842
    const SourceArgInfo *arg = &ctx->source_args[idx];
icculus@943
  1843
    return get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len);
icculus@165
  1844
} // get_GLSL_srcarg_varname
icculus@118
  1845
icculus@67
  1846
icculus@943
  1847
static const char *make_GLSL_destarg_assign(Context *, char *, const size_t,
icculus@943
  1848
                                            const char *, ...) ISPRINTF(4,5);
icculus@943
  1849
icculus@943
  1850
static const char *make_GLSL_destarg_assign(Context *ctx, char *buf,
icculus@943
  1851
                                            const size_t buflen,
icculus@943
  1852
                                            const char *fmt, ...)
icculus@161
  1853
{
icculus@67
  1854
    int need_parens = 0;
icculus@161
  1855
    const DestArgInfo *arg = &ctx->dest_arg;
icculus@160
  1856
icculus@295
  1857
    if (arg->writemask == 0)
icculus@943
  1858
    {
icculus@943
  1859
        *buf = '\0';
icculus@943
  1860
        return buf;  // no writemask? It's a no-op.
icculus@943
  1861
    } // if
icculus@175
  1862
icculus@184
  1863
    char clampbuf[32] = { '\0' };
icculus@182
  1864
    const char *clampleft = "";
icculus@182
  1865
    const char *clampright = "";
icculus@182
  1866
    if (arg->result_mod & MOD_SATURATE)
icculus@182
  1867
    {
icculus@182
  1868
        const int vecsize = vecsize_from_writemask(arg->writemask);
icculus@182
  1869
        clampleft = "clamp(";
icculus@182
  1870
        if (vecsize == 1)
icculus@182
  1871
            clampright = ", 0.0, 1.0)";
icculus@182
  1872
        else
icculus@182
  1873
        {
icculus@182
  1874
            snprintf(clampbuf, sizeof (clampbuf),
icculus@182
  1875
                     ", vec%d(0.0), vec%d(1.0))", vecsize, vecsize);
icculus@184
  1876
            clampright = clampbuf;
icculus@182
  1877
        } // else
icculus@182
  1878
    } // if
icculus@182
  1879
icculus@182
  1880
    // MSDN says MOD_PP is a hint and many implementations ignore it. So do we.
icculus@182
  1881
icculus@431
  1882
    // CENTROID only allowed in DCL opcodes, which shouldn't come through here.
icculus@431
  1883
    assert((arg->result_mod & MOD_CENTROID) == 0);
icculus@294
  1884
icculus@294
  1885
    if (ctx->predicated)
icculus@294
  1886
    {
icculus@294
  1887
        fail(ctx, "predicated destinations unsupported");  // !!! FIXME
icculus@943
  1888
        *buf = '\0';
icculus@943
  1889
        return buf;
icculus@294
  1890
    } // if
icculus@160
  1891
icculus@1080
  1892
    char operation[256];
icculus@67
  1893
    va_list ap;
icculus@67
  1894
    va_start(ap, fmt);
icculus@943
  1895
    const int len = vsnprintf(operation, sizeof (operation), fmt, ap);
icculus@67
  1896
    va_end(ap);
icculus@943
  1897
    if (len >= sizeof (operation))
icculus@67
  1898
    {
icculus@67
  1899
        fail(ctx, "operation string too large");  // I'm lazy.  :P
icculus@943
  1900
        *buf = '\0';
icculus@943
  1901
        return buf;
icculus@67
  1902
    } // if
icculus@67
  1903
icculus@67
  1904
    const char *result_shift_str = "";
icculus@67
  1905
    switch (arg->result_shift)
icculus@67
  1906
    {
icculus@181
  1907
        case 0x1: result_shift_str = " * 2.0"; break;
icculus@181
  1908
        case 0x2: result_shift_str = " * 4.0"; break;
icculus@181
  1909
        case 0x3: result_shift_str = " * 8.0"; break;
icculus@181
  1910
        case 0xD: result_shift_str = " / 8.0"; break;
icculus@181
  1911
        case 0xE: result_shift_str = " / 4.0"; break;
icculus@181
  1912
        case 0xF: result_shift_str = " / 2.0"; break;
icculus@67
  1913
    } // switch
icculus@67
  1914
    need_parens |= (result_shift_str[0] != '\0');
icculus@67
  1915
icculus@67
  1916
    char regnum_str[16];
icculus@67
  1917
    const char *regtype_str = get_GLSL_register_string(ctx, arg->regtype,
icculus@67
  1918
                                                       arg->regnum, regnum_str,
icculus@67
  1919
                                                       sizeof (regnum_str));
icculus@67
  1920
    char writemask_str[6];
icculus@798
  1921
    size_t i = 0;
icculus@1054
  1922
    const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
icculus@316
  1923
    if (!scalar && !writemask_xyzw(arg->writemask))
icculus@67
  1924
    {
icculus@67
  1925
        writemask_str[i++] = '.';
icculus@67
  1926
        if (arg->writemask0) writemask_str[i++] = 'x';
icculus@67
  1927
        if (arg->writemask1) writemask_str[i++] = 'y';
icculus@67
  1928
        if (arg->writemask2) writemask_str[i++] = 'z';
icculus@67
  1929
        if (arg->writemask3) writemask_str[i++] = 'w';
icculus@67
  1930
    } // if
icculus@67
  1931
    writemask_str[i] = '\0';
icculus@67
  1932
    assert(i < sizeof (writemask_str));
icculus@67
  1933
icculus@67
  1934
    const char *leftparen = (need_parens) ? "(" : "";
icculus@67
  1935
    const char *rightparen = (need_parens) ? ")" : "";
icculus@67
  1936
icculus@943
  1937
    snprintf(buf, buflen, "%s_%s%s%s = %s%s%s%s%s%s;",
icculus@334
  1938
             ctx->shader_type_str, regtype_str, regnum_str, writemask_str,
icculus@182
  1939
             clampleft, leftparen, operation, rightparen, result_shift_str,
icculus@182
  1940
             clampright);
icculus@67
  1941
    // !!! FIXME: make sure the scratch buffer was large enough.
icculus@943
  1942
    return buf;
icculus@67
  1943
} // make_GLSL_destarg_assign
icculus@40
  1944
icculus@40
  1945
icculus@298
  1946
static char *make_GLSL_swizzle_string(char *swiz_str, const size_t strsize,
icculus@298
  1947
                                      const int swizzle, const int writemask)
icculus@298
  1948
{
icculus@298
  1949
    size_t i = 0;
icculus@298
  1950
    if ( (!no_swizzle(swizzle)) || (!writemask_xyzw(writemask)) )
icculus@298
  1951
    {
icculus@909
  1952
        const int writemask0 = (writemask >> 0) & 0x1;
icculus@909
  1953
        const int writemask1 = (writemask >> 1) & 0x1;
icculus@909
  1954
        const int writemask2 = (writemask >> 2) & 0x1;
icculus@909
  1955
        const int writemask3 = (writemask >> 3) & 0x1;
icculus@298
  1956
icculus@298
  1957
        const int swizzle_x = (swizzle >> 0) & 0x3;
icculus@298
  1958
        const int swizzle_y = (swizzle >> 2) & 0x3;
icculus@298
  1959
        const int swizzle_z = (swizzle >> 4) & 0x3;
icculus@298
  1960
        const int swizzle_w = (swizzle >> 6) & 0x3;
icculus@298
  1961
icculus@298
  1962
        swiz_str[i++] = '.';
icculus@909
  1963
        if (writemask0) swiz_str[i++] = swizzle_channels[swizzle_x];
icculus@909
  1964
        if (writemask1) swiz_str[i++] = swizzle_channels[swizzle_y];
icculus@909
  1965
        if (writemask2) swiz_str[i++] = swizzle_channels[swizzle_z];
icculus@909
  1966
        if (writemask3) swiz_str[i++] = swizzle_channels[swizzle_w];
icculus@298
  1967
    } // if
icculus@298
  1968
    assert(i < strsize);
icculus@298
  1969
    swiz_str[i] = '\0';
icculus@298
  1970
    return swiz_str;
icculus@298
  1971
} // make_GLSL_swizzle_string
icculus@298
  1972
icculus@298
  1973
icculus@798
  1974
static const char *make_GLSL_srcarg_string(Context *ctx, const size_t idx,
icculus@943
  1975
                                           const int writemask, char *buf,
icculus@943
  1976
                                           const size_t buflen)
icculus@943
  1977
{
icculus@943
  1978
    *buf = '\0';
icculus@943
  1979
icculus@40
  1980
    if (idx >= STATICARRAYLEN(ctx->source_args))
icculus@40
  1981
    {
icculus@40
  1982
        fail(ctx, "Too many source args");
icculus@943
  1983
        return buf;
icculus@40
  1984
    } // if
icculus@40
  1985
icculus@69
  1986
    const SourceArgInfo *arg = &ctx->source_args[idx];
icculus@164
  1987
icculus@69
  1988
    const char *premod_str = "";
icculus@69
  1989
    const char *postmod_str = "";
icculus@139
  1990
    switch (arg->src_mod)
icculus@69
  1991
    {
icculus@69
  1992
        case SRCMOD_NEGATE:
icculus@69
  1993
            premod_str = "-";
icculus@69
  1994
            break;
icculus@69
  1995
icculus@69
  1996
        case SRCMOD_BIASNEGATE:
icculus@1094
  1997
            premod_str = "-(";
icculus@1094
  1998
            postmod_str = " - 0.5)";
icculus@1094
  1999
            break;
icculus@1094
  2000
icculus@69
  2001
        case SRCMOD_BIAS:
icculus@1094
  2002
            premod_str = "(";
icculus@1094
  2003
            postmod_str = " - 0.5)";
icculus@69
  2004
            break;
icculus@69
  2005
icculus@69
  2006
        case SRCMOD_SIGNNEGATE:
icculus@1095
  2007
            premod_str = "-((";
icculus@1095
  2008
            postmod_str = " - 0.5) * 2.0)";
icculus@1095
  2009
            break;
icculus@1095
  2010
icculus@69
  2011
        case SRCMOD_SIGN:
icculus@1095
  2012
            premod_str = "((";
icculus@1095
  2013
            postmod_str = " - 0.5) * 2.0)";
icculus@69
  2014
            break;
icculus@69
  2015
icculus@69
  2016
        case SRCMOD_COMPLEMENT:
icculus@1095
  2017
            premod_str = "(1.0 - ";
icculus@1095
  2018
            postmod_str = ")";
icculus@69
  2019
            break;
icculus@69
  2020
icculus@69
  2021
        case SRCMOD_X2NEGATE:
icculus@106
  2022
            premod_str = "-(";
icculus@131
  2023
            postmod_str = " * 2.0)";
icculus@106
  2024
            break;
icculus@106
  2025
icculus@69
  2026
        case SRCMOD_X2:
icculus@106
  2027
            premod_str = "(";
icculus@131
  2028
            postmod_str = " * 2.0)";
icculus@69
  2029
            break;
icculus@69
  2030
icculus@69
  2031
        case SRCMOD_DZ:
icculus@943
  2032
            fail(ctx, "SRCMOD_DZ unsupported"); return buf; // !!! FIXME
icculus@69
  2033
            postmod_str = "_dz";
icculus@69
  2034
            break;
icculus@69
  2035
icculus@69
  2036
        case SRCMOD_DW:
icculus@943
  2037
            fail(ctx, "SRCMOD_DW unsupported"); return buf; // !!! FIXME
icculus@69
  2038
            postmod_str = "_dw";
icculus@69
  2039
            break;
icculus@69
  2040
icculus@69
  2041
        case SRCMOD_ABSNEGATE:
icculus@69
  2042
            premod_str = "-abs(";
icculus@69
  2043
            postmod_str = ")";
icculus@69
  2044
            break;
icculus@69
  2045
icculus@69
  2046
        case SRCMOD_ABS:
icculus@69
  2047
            premod_str = "abs(";
icculus@69
  2048
            postmod_str = ")";
icculus@69
  2049
            break;
icculus@69
  2050
icculus@69
  2051
        case SRCMOD_NOT:
icculus@69
  2052
            premod_str = "!";
icculus@69
  2053
            break;
icculus@69
  2054
icculus@69
  2055
        case SRCMOD_NONE:
icculus@69
  2056
        case SRCMOD_TOTAL:
icculus@69
  2057
             break;  // stop compiler whining.
icculus@69
  2058
    } // switch
icculus@69
  2059
icculus@280
  2060
    const char *regtype_str = NULL;
icculus@280
  2061
icculus@280
  2062
    if (!arg->relative)
icculus@943
  2063
    {
icculus@943
  2064
        regtype_str = get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum,
icculus@943
  2065
                                              (char *) alloca(64), 64);
icculus@943
  2066
    } // if
icculus@69
  2067
icculus@268
  2068
    const char *rel_lbracket = "";
icculus@280
  2069
    char rel_offset[32] = { '\0' };
icculus@268
  2070
    const char *rel_rbracket = "";
icculus@268
  2071
    char rel_swizzle[4] = { '\0' };
icculus@268
  2072
    const char *rel_regtype_str = "";
icculus@268
  2073
    if (arg->relative)
icculus@268
  2074
    {
icculus@531
  2075
        if (arg->regtype == REG_TYPE_INPUT)
icculus@943
  2076
            regtype_str=get_GLSL_input_array_varname(ctx,(char*)alloca(64),64);
icculus@531
  2077
        else
icculus@531
  2078
        {
icculus@531
  2079
            assert(arg->regtype == REG_TYPE_CONST);
icculus@531
  2080
            const int arrayidx = arg->relative_array->index;
icculus@531
  2081
            const int offset = arg->regnum - arrayidx;
icculus@531
  2082
            assert(offset >= 0);
icculus@760
  2083
            if (arg->relative_array->constant)
icculus@760
  2084
            {
icculus@760
  2085
                const int arraysize = arg->relative_array->count;
icculus@943
  2086
                regtype_str = get_GLSL_const_array_varname_in_buf(ctx,
icculus@943
  2087
                                arrayidx, arraysize, (char *) alloca(64), 64);
icculus@760
  2088
                if (offset != 0)
icculus@760
  2089
                    snprintf(rel_offset, sizeof (rel_offset), "%d + ", offset);
icculus@760
  2090
            } // if
icculus@760
  2091
            else
icculus@760
  2092
            {
icculus@943
  2093
                regtype_str = get_GLSL_uniform_array_varname(ctx, arg->regtype,
icculus@943
  2094
                                                      (char *) alloca(64), 64);
icculus@760
  2095
                if (offset == 0)
icculus@760
  2096
                {
icculus@760
  2097
                    snprintf(rel_offset, sizeof (rel_offset),
icculus@760
  2098
                             "ARRAYBASE_%d + ", arrayidx);
icculus@760
  2099
                } // if
icculus@760
  2100
                else
icculus@760
  2101
                {
icculus@760
  2102
                    snprintf(rel_offset, sizeof (rel_offset),
icculus@760
  2103
                             "(ARRAYBASE_%d + %d) + ", arrayidx, offset);
icculus@760
  2104
                } // else
icculus@760
  2105
            } // else
icculus@531
  2106
        } // else
icculus@531
  2107
icculus@280
  2108
        rel_lbracket = "[";
icculus@402
  2109
icculus@943
  2110
        rel_regtype_str = get_GLSL_varname_in_buf(ctx, arg->relative_regtype,
icculus@943
  2111
                                                  arg->relative_regnum,
icculus@943
  2112
                                                  (char *) alloca(64), 64);
icculus@268
  2113
        rel_swizzle[0] = '.';
icculus@268
  2114
        rel_swizzle[1] = swizzle_channels[arg->relative_component];
icculus@268
  2115
        rel_swizzle[2] = '\0';
icculus@268
  2116
        rel_rbracket = "]";
icculus@268
  2117
    } // if
icculus@268
  2118
icculus@316
  2119
    char swiz_str[6] = { '\0' };
icculus@1054
  2120
    if (!isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum))
icculus@316
  2121
    {
icculus@316
  2122
        make_GLSL_swizzle_string(swiz_str, sizeof (swiz_str),
icculus@316
  2123
                                 arg->swizzle, writemask);
icculus@316
  2124
    } // if
icculus@69
  2125
icculus@280
  2126
    if (regtype_str == NULL)
icculus@280
  2127
    {
icculus@280
  2128
        fail(ctx, "Unknown source register type.");
icculus@943
  2129
        return buf;
icculus@280
  2130
    } // if
icculus@280
  2131
icculus@943
  2132
    snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s",
icculus@402
  2133
             premod_str, regtype_str, rel_lbracket, rel_offset,
icculus@402
  2134
             rel_regtype_str, rel_swizzle, rel_rbracket, swiz_str,
icculus@402
  2135
             postmod_str);
icculus@69
  2136
    // !!! FIXME: make sure the scratch buffer was large enough.
icculus@943
  2137
    return buf;
icculus@165
  2138
} // make_GLSL_srcarg_string
icculus@165
  2139
icculus@943
  2140
// generate some convenience functions.
icculus@943
  2141
#define MAKE_GLSL_SRCARG_STRING_(mask, bitmask) \
icculus@943
  2142
    static inline const char *make_GLSL_srcarg_string_##mask(Context *ctx, \
icculus@943
  2143
                                                const size_t idx, char *buf, \
icculus@943
  2144
                                                const size_t buflen) { \
icculus@943
  2145
        return make_GLSL_srcarg_string(ctx, idx, bitmask, buf, buflen); \
icculus@943
  2146
    }
icculus@943
  2147
MAKE_GLSL_SRCARG_STRING_(x, (1 << 0))
icculus@943
  2148
MAKE_GLSL_SRCARG_STRING_(y, (1 << 1))
icculus@943
  2149
MAKE_GLSL_SRCARG_STRING_(z, (1 << 2))
icculus@943
  2150
MAKE_GLSL_SRCARG_STRING_(w, (1 << 3))
icculus@943
  2151
MAKE_GLSL_SRCARG_STRING_(scalar, (1 << 0))
icculus@943
  2152
MAKE_GLSL_SRCARG_STRING_(full, 0xF)
icculus@943
  2153
MAKE_GLSL_SRCARG_STRING_(masked, ctx->dest_arg.writemask)
icculus@943
  2154
MAKE_GLSL_SRCARG_STRING_(vec3, 0x7)
icculus@943
  2155
MAKE_GLSL_SRCARG_STRING_(vec2, 0x3)
icculus@943
  2156
#undef MAKE_GLSL_SRCARG_STRING_
icculus@41
  2157
icculus@41
  2158
// special cases for comparison opcodes...
icculus@121
  2159
icculus@121
  2160
static const char *get_GLSL_comparison_string_scalar(Context *ctx)
icculus@121
  2161
{
icculus@121
  2162
    static const char *comps[] = { "", ">", "==", ">=", "<", "!=", "<=" };
icculus@41
  2163
    if (ctx->instruction_controls >= STATICARRAYLEN(comps))
icculus@41
  2164
    {
icculus@41
  2165
        fail(ctx, "unknown comparison control");
icculus@41
  2166
        return "";
icculus@41
  2167
    } // if
icculus@41
  2168
icculus@41
  2169
    return comps[ctx->instruction_controls];
icculus@121
  2170
} // get_GLSL_comparison_string_scalar
icculus@121
  2171
icculus@121
  2172
static const char *get_GLSL_comparison_string_vector(Context *ctx)
icculus@121
  2173
{
icculus@121
  2174
    static const char *comps[] = {
icculus@121
  2175
        "", "greaterThan", "equal", "greaterThanEqual", "lessThan",
icculus@121
  2176
        "notEqual", "lessThanEqual"
icculus@121
  2177
    };
icculus@121
  2178
icculus@121
  2179
    if (ctx->instruction_controls >= STATICARRAYLEN(comps))
icculus@121
  2180
    {
icculus@121
  2181
        fail(ctx, "unknown comparison control");
icculus@121
  2182
        return "";
icculus@121
  2183
    } // if
icculus@121
  2184
icculus@121
  2185
    return comps[ctx->instruction_controls];
icculus@121
  2186
} // get_GLSL_comparison_string_vector
icculus@41
  2187
icculus@41
  2188
icculus@361
  2189
static void emit_GLSL_start(Context *ctx, const char *profilestr)
icculus@15
  2190
{
icculus@151
  2191
    if (!shader_is_vertex(ctx) && !shader_is_pixel(ctx))
icculus@16
  2192
    {
icculus@151
  2193
        failf(ctx, "Shader type %u unsupported in this profile.",
icculus@151
  2194
              (uint) ctx->shader_type);
icculus@151
  2195
        return;
icculus@151
  2196
    } // if
icculus@16
  2197
icculus@1048
  2198
    else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSL) == 0)
icculus@1048
  2199
    {
icculus@1048
  2200
        // No gl_FragData[] before GLSL 1.10, so we have to force the version.
icculus@1048
  2201
        push_output(ctx, &ctx->preflight);
icculus@1048
  2202
        output_line(ctx, "#version 110");
icculus@1048
  2203
        pop_output(ctx);
icculus@1048
  2204
    } // else if
icculus@407
  2205
icculus@808
  2206
    #if SUPPORT_PROFILE_GLSL120
icculus@407
  2207
    else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSL120) == 0)
icculus@407
  2208
    {
icculus@808
  2209
        ctx->profile_supports_glsl120 = 1;
icculus@944
  2210
        push_output(ctx, &ctx->preflight);
icculus@407
  2211
        output_line(ctx, "#version 120");
icculus@944
  2212
        pop_output(ctx);
icculus@407
  2213
    } // else if
icculus@808
  2214
    #endif
icculus@407
  2215
flibitijibibo@1150
  2216
    #if SUPPORT_PROFILE_GLSLES
flibitijibibo@1150
  2217
    else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSLES) == 0)
flibitijibibo@1150
  2218
    {
flibitijibibo@1150
  2219
        ctx->profile_supports_glsles = 1;
flibitijibibo@1150
  2220
        push_output(ctx, &ctx->preflight);
flibitijibibo@1150
  2221
        output_line(ctx, "#version 100");
flibitijibibo@1150
  2222
        if (shader_is_vertex(ctx))
flibitijibibo@1150
  2223
            output_line(ctx, "precision highp float;");
flibitijibibo@1150
  2224
        else
flibitijibibo@1150
  2225
            output_line(ctx, "precision mediump float;");
flibitijibibo@1150
  2226
        output_line(ctx, "precision mediump int;");
flibitijibibo@1173
  2227
        // Some drivers don't like it when the precision varies between shaders. -ade
flibitijibibo@1173
  2228
        output_line(ctx, "varying highp vec4 v_FrontColor;");
flibitijibibo@1173
  2229
        output_line(ctx, "varying highp vec4 v_FrontSecondaryColor;");
flibitijibibo@1173
  2230
        output_line(ctx, "varying highp vec4 v_TexCoord[10];"); // 10 according to SM3
flibitijibibo@1150
  2231
        pop_output(ctx);
flibitijibibo@1150
  2232
    } // else if
flibitijibibo@1150
  2233
    #endif
flibitijibibo@1150
  2234
icculus@407
  2235
    else
icculus@407
  2236
    {
icculus@407
  2237
        failf(ctx, "Profile '%s' unsupported or unknown.", profilestr);
icculus@407
  2238
        return;
icculus@407
  2239
    } // else
icculus@407
  2240
icculus@944
  2241
    push_output(ctx, &ctx->mainline_intro);
icculus@60
  2242
    output_line(ctx, "void main()");
icculus@60
  2243
    output_line(ctx, "{");
icculus@944
  2244
    pop_output(ctx);
icculus@944
  2245
icculus@944
  2246
    set_output(ctx, &ctx->mainline);
icculus@40
  2247
    ctx->indent++;
icculus@15
  2248
} // emit_GLSL_start
icculus@15
  2249
icculus@56
  2250
static void emit_GLSL_RET(Context *ctx);
icculus@18
  2251
static void emit_GLSL_end(Context *ctx)
icculus@15
  2252
{
icculus@1081
  2253
    // ps_1_* writes color to r0 instead oC0. We move it to the right place.
icculus@1081
  2254
    // We don't have to worry about a RET opcode messing this up, since
icculus@1081
  2255
    //  RET isn't available before ps_2_0.
icculus@1081
  2256
    if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0))
icculus@1081
  2257
    {
icculus@1081
  2258
        const char *shstr = ctx->shader_type_str;
icculus@1083
  2259
        set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1);
icculus@1081
  2260
        output_line(ctx, "%s_oC0 = %s_r0;", shstr, shstr);
icculus@1081
  2261
    } // if
flibitijibibo@1150
  2262
    else if (shader_is_vertex(ctx))
flibitijibibo@1150
  2263
    {
flibitijibibo@1150
  2264
#ifdef MOJOSHADER_FLIP_RENDERTARGET
flibitijibibo@1150
  2265
        output_line(ctx, "gl_Position.y = gl_Position.y * vpFlip;");
flibitijibibo@1150
  2266
#endif
flibitijibibo@1150
  2267
#ifdef MOJOSHADER_DEPTH_CLIPPING
flibitijibibo@1150
  2268
        output_line(ctx, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;");
flibitijibibo@1150
  2269
#endif
flibitijibibo@1150
  2270
    } // else if
icculus@1081
  2271
icculus@56
  2272
    // force a RET opcode if we're at the end of the stream without one.
icculus@56
  2273
    if (ctx->previous_opcode != OPCODE_RET)
icculus@56
  2274
        emit_GLSL_RET(ctx);
icculus@95
  2275
} // emit_GLSL_end
icculus@95
  2276
icculus@400
  2277
static void emit_GLSL_phase(Context *ctx)
icculus@400
  2278
{
icculus@400
  2279
    // no-op in GLSL.
icculus@400
  2280
} // emit_GLSL_phase
icculus@400
  2281
icculus@760
  2282
static void output_GLSL_uniform_array(Context *ctx, const RegisterType regtype,
icculus@760
  2283
                                      const int size)
icculus@760
  2284
{
icculus@760
  2285
    if (size > 0)
icculus@760
  2286
    {
icculus@943
  2287
        char buf[64];
icculus@943
  2288
        get_GLSL_uniform_array_varname(ctx, regtype, buf, sizeof (buf));
flibitijibibo@1150
  2289
        const char *typ;
flibitijibibo@1150
  2290
        switch (regtype)
flibitijibibo@1150
  2291
        {
flibitijibibo@1150
  2292
            case REG_TYPE_CONST: typ = "vec4"; break;
flibitijibibo@1150
  2293
            case REG_TYPE_CONSTINT: typ ="ivec4"; break;
flibitijibibo@1150
  2294
            case REG_TYPE_CONSTBOOL: typ = "bool"; break;
flibitijibibo@1150
  2295
            default:
flibitijibibo@1150
  2296
            {
flibitijibibo@1150
  2297
                fail(ctx, "BUG: used a uniform we don't know how to define.");
flibitijibibo@1150
  2298
                return;
flibitijibibo@1150
  2299
            } // default
flibitijibibo@1150
  2300
        } // switch
flibitijibibo@1150
  2301
        output_line(ctx, "uniform %s %s[%d];", typ, buf, size);
icculus@760
  2302
    } // if
icculus@760
  2303
} // output_GLSL_uniform_array
icculus@760
  2304
icculus@95
  2305
static void emit_GLSL_finalize(Context *ctx)
icculus@95
  2306
{
icculus@95
  2307
    // throw some blank lines around to make source more readable.
icculus@74
  2308
    push_output(ctx, &ctx->globals);
icculus@78
  2309
    output_blank_line(ctx);
icculus@74
  2310
    pop_output(ctx);
icculus@531
  2311
icculus@531
  2312
    // If we had a relative addressing of REG_TYPE_INPUT, we need to build
icculus@531
  2313
    //  an array for it at the start of main(). GLSL doesn't let you specify
icculus@531
  2314
    //  arrays of attributes.
icculus@531
  2315
    //vec4 blah_array[BIGGEST_ARRAY];
icculus@531
  2316
    if (ctx->have_relative_input_registers) // !!! FIXME
icculus@531
  2317
        fail(ctx, "Relative addressing of input registers not supported.");
icculus@760
  2318
icculus@760
  2319
    push_output(ctx, &ctx->preflight);
icculus@760
  2320
    output_GLSL_uniform_array(ctx, REG_TYPE_CONST, ctx->uniform_float4_count);
icculus@760
  2321
    output_GLSL_uniform_array(ctx, REG_TYPE_CONSTINT, ctx->uniform_int4_count);
icculus@760
  2322
    output_GLSL_uniform_array(ctx, REG_TYPE_CONSTBOOL, ctx->uniform_bool_count);
flibitijibibo@1150
  2323
#ifdef MOJOSHADER_FLIP_RENDERTARGET
flibitijibibo@1150
  2324
    if (shader_is_vertex(ctx))
flibitijibibo@1150
  2325
        output_line(ctx, "uniform float vpFlip;");
flibitijibibo@1150
  2326
#endif
icculus@760
  2327
    pop_output(ctx);
icculus@95
  2328
} // emit_GLSL_finalize
icculus@95
  2329
icculus@95
  2330
static void emit_GLSL_global(Context *ctx, RegisterType regtype, int regnum)
icculus@95
  2331
{
icculus@943
  2332
    char varname[64];
icculus@943
  2333
    get_GLSL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
icculus@232
  2334
icculus@95
  2335
    push_output(ctx, &ctx->globals);
icculus@101
  2336
    switch (regtype)
icculus@101
  2337
    {
icculus@101
  2338
        case REG_TYPE_ADDRESS:
icculus@1088
  2339
            if (shader_is_vertex(ctx))
icculus@1088
  2340
                output_line(ctx, "ivec4 %s;", varname);
icculus@1088
  2341
            else if (shader_is_pixel(ctx))  // actually REG_TYPE_TEXTURE.
icculus@1088
  2342
            {
icculus@1088
  2343
                // We have to map texture registers to temps for ps_1_1, since
icculus@1088
  2344
                //  they work like temps, initialize with tex coords, and the
icculus@1088
  2345
                //  ps_1_1 TEX opcode expects to overwrite it.
icculus@1088
  2346
                if (!shader_version_atleast(ctx, 1, 4))
icculus@1088
  2347
                {
flibitijibibo@1173
  2348
#if SUPPORT_PROFILE_GLSLES
flibitijibibo@1173
  2349
                    if (support_glsles(ctx))
flibitijibibo@1173
  2350
                        output_line(ctx, "vec4 %s = v_TexCoord[%d];",
flibitijibibo@1173
  2351
                                    varname, regnum);
flibitijibibo@1173
  2352
                    else
flibitijibibo@1173
  2353
#endif
icculus@1088
  2354
                    output_line(ctx, "vec4 %s = gl_TexCoord[%d];",
icculus@1088
  2355
                                varname, regnum);
icculus@1088
  2356
                } // if
icculus@1088
  2357
            } // else if
icculus@101
  2358
            break;
icculus@101
  2359
        case REG_TYPE_PREDICATE:
icculus@232
  2360
            output_line(ctx, "bvec4 %s;", varname);
icculus@101
  2361
            break;
icculus@101
  2362
        case REG_TYPE_TEMP:
icculus@232
  2363
            output_line(ctx, "vec4 %s;", varname);
icculus@101
  2364
            break;
icculus@101
  2365
        case REG_TYPE_LOOP:
icculus@107
  2366
            break; // no-op. We declare these in for loops at the moment.
icculus@101
  2367
        case REG_TYPE_LABEL:
icculus@101
  2368
            break; // no-op. If we see it here, it means we optimized it out.
icculus@101
  2369
        default:
icculus@101
  2370
            fail(ctx, "BUG: we used a register we don't know how to define.");
icculus@101
  2371
            break;
icculus@101
  2372
    } // switch
icculus@95
  2373
    pop_output(ctx);
icculus@95
  2374
} // emit_GLSL_global
icculus@95
  2375
icculus@760
  2376
static void emit_GLSL_array(Context *ctx, VariableList *var)
icculus@760
  2377
{
icculus@760
  2378
    // All uniforms (except constant arrays, which only get pushed once at
icculus@760
  2379
    //  compile time) are now packed into a single array, so we can batch
icculus@1154
  2380
    //  the uniform transfers. So this doesn't actually define an array
icculus@760
  2381
    //  here; the one, big array is emitted during finalization instead.
icculus@760
  2382
    // However, we need to #define the offset into the one, big array here,
icculus@760
  2383
    //  and let dereferences use that #define.
icculus@760
  2384
    const int base = var->index;
icculus@760
  2385
    const int glslbase = ctx->uniform_float4_count;
icculus@280
  2386
    push_output(ctx, &ctx->globals);
icculus@760
  2387
    output_line(ctx, "#define ARRAYBASE_%d %d", base, glslbase);
icculus@280
  2388
    pop_output(ctx);
icculus@760
  2389
    var->emit_position = glslbase;
icculus@402
  2390
} // emit_GLSL_array
icculus@402
  2391
icculus@405
  2392
static void emit_GLSL_const_array(Context *ctx, const ConstantsList *clist,
icculus@405
  2393
                                  int base, int size)
icculus@405
  2394
{
icculus@943
  2395
    char varname[64];
icculus@943
  2396
    get_GLSL_const_array_varname_in_buf(ctx,base,size,varname,sizeof(varname));
icculus@405
  2397
icculus@414
  2398
#if 0
icculus@414
  2399
    // !!! FIXME: fails on Nvidia's and Apple's GL, even with #version 120.
icculus@414
  2400
    // !!! FIXME:  (the 1.20 spec says it should work, though, I think...)
icculus@808
  2401
    if (support_glsl120(ctx))
icculus@408
  2402
    {
icculus@408
  2403
        // GLSL 1.20 can do constant arrays.
icculus@439
  2404
        const char *cstr = NULL;
icculus@408
  2405
        push_output(ctx, &ctx->globals);
icculus@408
  2406
        output_line(ctx, "const vec4 %s[%d] = vec4[%d](", varname, size, size);
icculus@408
  2407
        ctx->indent++;
icculus@408
  2408
icculus@439
  2409
        int i;
icculus@408
  2410
        for (i = 0; i < size; i++)
icculus@408
  2411
        {
icculus@408
  2412
            while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT)
icculus@408
  2413
                clist = clist->next;
icculus@408
  2414
            assert(clist->constant.index == (base + i));
icculus@408
  2415
icculus@408
  2416
            char val0[32];
icculus@408
  2417
            char val1[32];
icculus@408
  2418
            char val2[32];
icculus@408
  2419
            char val3[32];
icculus@408
  2420
            floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1);
icculus@408
  2421
            floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1);
icculus@408
  2422
            floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1);
icculus@408
  2423
            floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1);
icculus@408
  2424
icculus@408
  2425
            output_line(ctx, "vec4(%s, %s, %s, %s)%s", val0, val1, val2, val3,
icculus@408
  2426
                        (i < (size-1)) ? "," : "");
icculus@408
  2427
icculus@405
  2428
            clist = clist->next;
icculus@408
  2429
        } // for
icculus@408
  2430
icculus@408
  2431
        ctx->indent--;
icculus@408
  2432
        output_line(ctx, ");");
icculus@408
  2433
        pop_output(ctx);
icculus@408
  2434
    } // if
icculus@408
  2435
icculus@408
  2436
    else
icculus@414
  2437
#endif
icculus@408
  2438
    {
icculus@438
  2439
        // stock GLSL 1.0 can't do constant arrays, so make a uniform array
icculus@438
  2440
        //  and have the OpenGL glue assign it at link time. Lame!
icculus@408
  2441
        push_output(ctx, &ctx->globals);
icculus@438
  2442
        output_line(ctx, "uniform vec4 %s[%d];", varname, size);
icculus@408
  2443
        pop_output(ctx);
icculus@408
  2444
    } // else
icculus@405
  2445
} // emit_GLSL_const_array
icculus@405
  2446
icculus@402
  2447
static void emit_GLSL_uniform(Context *ctx, RegisterType regtype, int regnum,
icculus@760
  2448
                              const VariableList *var)
icculus@760
  2449
{
icculus@760
  2450
    // Now that we're pushing all the uniforms as one big array, pack these
icculus@760
  2451
    //  down, so if we only use register c439, it'll actually map to
icculus@760
  2452
    //  glsl_uniforms_vec4[0]. As we push one big array, this will prevent
icculus@760
  2453
    //  uploading unused data.
icculus@760
  2454
icculus@943
  2455
    char varname[64];
icculus@943
  2456
    char name[64];
icculus@760
  2457
    int index = 0;
icculus@232
  2458
icculus@943
  2459
    get_GLSL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
icculus@943
  2460
icculus@95
  2461
    push_output(ctx, &ctx->globals);
icculus@280
  2462
icculus@760
  2463
    if (var == NULL)
icculus@760
  2464
    {
icculus@943
  2465
        get_GLSL_uniform_array_varname(ctx, regtype, name, sizeof (name));
icculus@760
  2466
icculus@760
  2467
        if (regtype == REG_TYPE_CONST)
icculus@760
  2468
            index = ctx->uniform_float4_count;
icculus@760
  2469
        else if (regtype == REG_TYPE_CONSTINT)
icculus@760
  2470
            index = ctx->uniform_int4_count;
icculus@760
  2471
        else if (regtype == REG_TYPE_CONSTBOOL)
icculus@760
  2472
            index = ctx->uniform_bool_count;
icculus@760
  2473
        else  // get_GLSL_uniform_array_varname() would have called fail().
icculus@760
  2474
            assert(isfail(ctx));
icculus@760
  2475
icculus@943
  2476
        output_line(ctx, "#define %s %s[%d]", varname, name, index);
icculus@760
  2477
    } // if
icculus@760
  2478
icculus@280
  2479
    else
icculus@280
  2480
    {
icculus@760
  2481
        const int arraybase = var->index;
icculus@760
  2482
        if (var->constant)
icculus@760
  2483
        {
icculus@943
  2484
            get_GLSL_const_array_varname_in_buf(ctx, arraybase, var->count,
icculus@943
  2485
                                                name, sizeof (name));
icculus@760
  2486
            index = (regnum - arraybase);
icculus@760
  2487
        } // if
icculus@760
  2488
        else
icculus@760
  2489
        {
icculus@760
  2490
            assert(var->emit_position != -1);
icculus@943
  2491
            get_GLSL_uniform_array_varname(ctx, regtype, name, sizeof (name));
icculus@760
  2492
            index = (regnum - arraybase) + var->emit_position;
icculus@760
  2493
        } // else
icculus@760
  2494
icculus@943
  2495
        output_line(ctx, "#define %s %s[%d]", varname, name, index);
icculus@280
  2496
    } // else
icculus@280
  2497
icculus@95
  2498
    pop_output(ctx);
icculus@95
  2499
} // emit_GLSL_uniform
icculus@14
  2500
icculus@1090
  2501
static void emit_GLSL_sampler(Context *ctx,int stage,TextureType ttype,int tb)
icculus@148
  2502
{
icculus@943
  2503
    const char *type = "";
icculus@233
  2504
    switch (ttype)
icculus@233
  2505
    {
icculus@233
  2506
        case TEXTURE_TYPE_2D: type = "sampler2D"; break;
icculus@233
  2507
        case TEXTURE_TYPE_CUBE: type = "samplerCube"; break;
icculus@233
  2508
        case TEXTURE_TYPE_VOLUME: type = "sampler3D"; break;
icculus@233
  2509
        default: fail(ctx, "BUG: used a sampler we don't know how to define.");
icculus@233
  2510
    } // switch
icculus@232
  2511
icculus@943
  2512
    char var[64];
icculus@943
  2513
    get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof (var));
icculus@943
  2514
icculus@148
  2515
    push_output(ctx, &ctx->globals);
icculus@943
  2516
    output_line(ctx, "uniform %s %s;", type, var);
icculus@1090
  2517
    if (tb)  // This sampler used a ps_1_1 TEXBEM opcode?
icculus@1090
  2518
    {
icculus@1090
  2519
        char name[64];
icculus@1090
  2520
        const int index = ctx->uniform_float4_count;
icculus@1090
  2521
        ctx->uniform_float4_count += 2;
icculus@1090
  2522
        get_GLSL_uniform_array_varname(ctx, REG_TYPE_CONST, name, sizeof (name));
icculus@1090
  2523
        output_line(ctx, "#define %s_texbem %s[%d]", var, name, index);
icculus@1090
  2524
        output_line(ctx, "#define %s_texbeml %s[%d]", var, name, index+1);
icculus@1090
  2525
    } // if
icculus@148
  2526
    pop_output(ctx);
icculus@148
  2527
} // emit_GLSL_sampler
icculus@148
  2528
icculus@104
  2529
static void emit_GLSL_attribute(Context *ctx, RegisterType regtype, int regnum,
icculus@431
  2530
                                MOJOSHADER_usage usage, int index, int wmask,
icculus@431
  2531
                                int flags)
icculus@14
  2532
{
icculus@101
  2533
    // !!! FIXME: this function doesn't deal with write masks at all yet!
icculus@104
  2534
    const char *usage_str = NULL;
icculus@303
  2535
    const char *arrayleft = "";
icculus@303
  2536
    const char *arrayright = "";
icculus@104
  2537
    char index_str[16] = { '\0' };
icculus@943
  2538
    char var[64];
icculus@943
  2539
icculus@943
  2540
    get_GLSL_varname_in_buf(ctx, regtype, regnum, var, sizeof (var));
icculus@104
  2541
icculus@431
  2542
    //assert((flags & MOD_PP) == 0);  // !!! FIXME: is PP allowed?
icculus@431
  2543
icculus@104
  2544
    if (index != 0)  // !!! FIXME: a lot of these MUST be zero.
icculus@104
  2545
        snprintf(index_str, sizeof (index_str), "%u", (uint) index);
icculus@56
  2546
icculus@151
  2547
    if (shader_is_vertex(ctx))
icculus@56
  2548
    {
icculus@101
  2549
        // pre-vs3 output registers.
icculus@104
  2550
        // these don't ever happen in DCL opcodes, I think. Map to vs_3_*
icculus@104
  2551
        //  output registers.
icculus@101
  2552
        if (!shader_version_atleast(ctx, 3, 0))
icculus@56
  2553
        {
icculus@101
  2554
            if (regtype == REG_TYPE_RASTOUT)
icculus@101
  2555
            {
icculus@101
  2556
                regtype = REG_TYPE_OUTPUT;
icculus@156
  2557
                index = regnum;
icculus@104
  2558
                switch ((const RastOutType) regnum)
icculus@101
  2559
                {
icculus@101
  2560
                    case RASTOUT_TYPE_POSITION:
icculus@104
  2561
                        usage = MOJOSHADER_USAGE_POSITION;
icculus@101
  2562
                        break;
icculus@101
  2563
                    case RASTOUT_TYPE_FOG:
icculus@104
  2564
                        usage = MOJOSHADER_USAGE_FOG;
icculus@101
  2565
                        break;
icculus@101
  2566
                    case RASTOUT_TYPE_POINT_SIZE:
icculus@104
  2567
                        usage = MOJOSHADER_USAGE_POINTSIZE;
icculus@101
  2568
                        break;
icculus@101
  2569
                } // switch
icculus@101
  2570
            } // if
icculus@101
  2571
icculus@101
  2572
            else if (regtype == REG_TYPE_ATTROUT)
icculus@101
  2573
            {
icculus@101
  2574
                regtype = REG_TYPE_OUTPUT;
icculus@101
  2575
                usage = MOJOSHADER_USAGE_COLOR;
icculus@156
  2576
                index = regnum;
icculus@101
  2577
            } // else if
icculus@101
  2578
icculus@101
  2579
            else if (regtype == REG_TYPE_TEXCRDOUT)
icculus@101
  2580
            {
icculus@101
  2581
                regtype = REG_TYPE_OUTPUT;
icculus@101
  2582
                usage = MOJOSHADER_USAGE_TEXCOORD;
icculus@156
  2583
                index = regnum;
icculus@101
  2584
            } // else if
icculus@56
  2585
        } // if
icculus@56
  2586
icculus@101
  2587
        // to avoid limitations of various GL entry points for input
icculus@101
  2588
        // attributes (glSecondaryColorPointer() can only take 3 component
icculus@101
  2589
        // items, glVertexPointer() can't do GL_UNSIGNED_BYTE, many other
icculus@101
  2590
        // issues), we set up all inputs as generic vertex attributes, so we
icculus@101
  2591
        // can pass data in just about any form, and ignore the built-in GLSL
icculus@101
  2592
        // attributes like gl_SecondaryColor. Output needs to use the the
icculus@101
  2593
        // built-ins, though, but we don't have to worry about the GL entry
icculus@101
  2594
        // point limitations there.
icculus@101
  2595
icculus@101
  2596
        if (regtype == REG_TYPE_INPUT)
icculus@103
  2597
        {
icculus@103
  2598
            push_output(ctx, &ctx->globals);
icculus@943
  2599
            output_line(ctx, "attribute vec4 %s;", var);
icculus@103
  2600
            pop_output(ctx);
icculus@103
  2601
        } // if
icculus@103
  2602
icculus@101
  2603
        else if (regtype == REG_TYPE_OUTPUT)
icculus@101
  2604
        {
icculus@101
  2605
            switch (usage)
icculus@101
  2606
            {
icculus@101
  2607
                case MOJOSHADER_USAGE_POSITION:
icculus@101
  2608
                    usage_str = "gl_Position";
icculus@101
  2609
                    break;
icculus@101
  2610
                case MOJOSHADER_USAGE_POINTSIZE:
icculus@101
  2611
                    usage_str = "gl_PointSize";
icculus@101
  2612
                    break;
icculus@102
  2613
                case MOJOSHADER_USAGE_COLOR:
icculus@102
  2614
                    index_str[0] = '\0';  // no explicit number.
icculus@102
  2615
                    if (index == 0)
flibitijibibo@1150
  2616
                    {