From 963fd467cb86f75933f59e37709585689c89dcb0 Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Mon, 8 Feb 2010 02:42:12 -0500 Subject: [PATCH 01/16] Start of experiment with a basic calculator. This is a throwaway so I can debug some of the HLSL parsing code, and get a better idea of what I want to do there. This will eventually merge back into the default branch with the right bits going into the compiler code. --HG-- branch : calculator-experiment --- CMakeLists.txt | 15 ++ calculator.c | 412 ++++++++++++++++++++++++++++++++++++++++++ calculator.lemon | 185 +++++++++++++++++++ mojoshader_internal.h | 4 + 4 files changed, 616 insertions(+) create mode 100644 calculator.c create mode 100644 calculator.lemon diff --git a/CMakeLists.txt b/CMakeLists.txt index bbc96151..8cf2476b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,6 +76,21 @@ SET_SOURCE_FILES_PROPERTIES( PROPERTIES OBJECT_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/mojoshader_parser_hlsl.h" ) +# Calculator test stuff... +ADD_CUSTOM_COMMAND( + OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/calculator.h" + MAIN_DEPENDENCY "${CMAKE_CURRENT_SOURCE_DIR}/calculator.lemon" + DEPENDS lemon "${CMAKE_CURRENT_SOURCE_DIR}/misc/lempar.c" + COMMAND "${LEMON}" + ARGS -q "-T${CMAKE_CURRENT_SOURCE_DIR}/misc/lempar.c" "${CMAKE_CURRENT_SOURCE_DIR}/calculator.lemon" +) +SET_SOURCE_FILES_PROPERTIES( + calculator.c + PROPERTIES OBJECT_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/calculator.h" +) +ADD_EXECUTABLE(calculator calculator.c) +TARGET_LINK_LIBRARIES(calculator mojoshader) + FIND_PROGRAM(RE2C re2c DOC "Path to re2c command line app: http://re2c.org/") IF(NOT RE2C) MESSAGE(STATUS "re2c missing. You can go on, but can't rebuild the lexer.") diff --git a/calculator.c b/calculator.c new file mode 100644 index 00000000..db01051e --- /dev/null +++ b/calculator.c @@ -0,0 +1,412 @@ +#define __MOJOSHADER_INTERNAL__ 1 +#include "mojoshader_internal.h" + +#if DEBUG_COMPILER_PARSER +#define LEMON_SUPPORT_TRACING 1 +#endif + +typedef struct Context +{ + int isfail; + int out_of_memory; + MOJOSHADER_malloc malloc; + MOJOSHADER_free free; + void *malloc_data; + int error_count; + ErrorList *errors; + Preprocessor *preprocessor; + const char *token; + unsigned int tokenlen; + Token tokenval; + unsigned int parse_errors; +} Context; + + +// Convenience functions for allocators... + +static inline void out_of_memory(Context *ctx) +{ + ctx->isfail = ctx->out_of_memory = 1; +} // out_of_memory + +static inline void *Malloc(Context *ctx, const size_t len) +{ + void *retval = ctx->malloc((int) len, ctx->malloc_data); + if (retval == NULL) + out_of_memory(ctx); + return retval; +} // Malloc + +static inline char *StrDup(Context *ctx, const char *str) +{ + char *retval = (char *) Malloc(ctx, strlen(str) + 1); + if (retval != NULL) + strcpy(retval, str); + return retval; +} // StrDup + +static inline void Free(Context *ctx, void *ptr) +{ + if (ptr != NULL) // check for NULL in case of dumb free() impl. + ctx->free(ptr, ctx->malloc_data); +} // Free + +typedef enum Operator +{ + OP_DEREF_ARRAY, + OP_CALLFUNC, + OP_DEREF_STRUCT, + OP_POSTINCREMENT, + OP_POSTDECREMENT, + OP_COMMA, + OP_PREINCREMENT, + OP_PREDECREMENT, + OP_NEGATE, + OP_COMPLEMENT, + OP_NOT, + OP_MULTIPLY, + OP_DIVIDE, + OP_MODULO, + OP_ADD, + OP_SUBTRACT, + OP_LSHIFT, + OP_RSHIFT, + OP_LESSTHAN, + OP_GREATERTHAN, + OP_LESSTHANOREQUAL, + OP_GREATERTHANOREQUAL, + OP_EQUAL, + OP_NOTEQUAL, + OP_BINARYAND, + OP_BINARYXOR, + OP_BINARYOR, + OP_LOGICALAND, + OP_LOGICALOR, + OP_CONDITIONAL, + OP_ASSIGN, + OP_MULASSIGN, + OP_DIVASSIGN, + OP_MODASSIGN, + OP_ADDASSIGN, + OP_SUBASSIGN, + OP_LSHIFTASSIGN, + OP_RSHIFTASSIGN, + OP_ANDASSIGN, + OP_XORASSIGN, + OP_ORASSIGN, +} Operator; + +typedef struct Expression +{ + Operator op; // operator +} Expression; + +#define NEW_EXPR(cls) \ + cls *retval = Malloc(ctx, sizeof (cls)); \ + if (retval == NULL) { return NULL; } + +typedef struct ExpressionUnary +{ + Operator op; // operator + Expression *operand; +} ExpressionUnary; + +typedef struct ExpressionBinary +{ + Operator op; // operator + Expression *left; + Expression *right; +} ExpressionBinary; + +typedef struct ExpressionTernary +{ + Operator op; // operator + Expression *left; + Expression *center; + Expression *right; +} ExpressionTernary; + +typedef struct ExpressionIdentifier +{ + Operator op; // Always TOKEN_CALC_IDENTIFIER + const char *identifier; +} ExpressionIdentifier; + +typedef struct ExpressionLiteralInt +{ + Operator op; // Always TOKEN_CALC_INT_CONSTANT + int64 value; +} ExpressionLiteralInt; + +typedef struct ExpressionLiteralFloat +{ + Operator op; // Always TOKEN_CALC_FLOAT_CONSTANT + double value; +} ExpressionLiteralFloat; + +typedef struct ExpressionLiteralString +{ + Operator op; // Always TOKEN_CALC_STRING_LITERAL + const char *string; +} ExpressionLiteralString; + +static const char *new_identifier(Context *); +static Expression *new_unary_expr(Context *, const Operator, Expression *); +static Expression *new_binary_expr(Context *, const Operator, Expression *, Expression *); +static Expression *new_ternary_expr(Context *, const Operator, Expression *, Expression *, Expression *); +static Expression *new_identifier_expr(Context *, const char *); +static Expression *new_literal_int_expr(Context *); +static Expression *new_literal_float_expr(Context *); +static Expression *new_literal_string_expr(Context *); + +static void parse_complete(const Expression *expr) +{ + printf("parse complete!\n"); + +} // parse_complete + + +// This is where the actual parsing happens. It's Lemon-generated! +#define __MOJOSHADER_CALC_COMPILER__ 1 +#include "calculator.h" + +static const char *new_identifier(Context *ctx) +{ + // !!! FIXME: this needs to cache strings. + const unsigned int len = ctx->tokenlen; + char *retval = Malloc(ctx, len + 1); + if (retval == NULL) + return NULL; + memcpy(retval, ctx->token, len); + retval[len] = '\0'; + return retval; +} // new_identifier + +static Expression *new_unary_expr(Context *ctx, const Operator op, + Expression *operand) +{ + NEW_EXPR(ExpressionUnary); + retval->op = op; + retval->operand = operand; + return (Expression *) retval; +} // new_unary_expr + +static Expression *new_binary_expr(Context *ctx, const Operator op, + Expression *left, Expression *right) +{ + NEW_EXPR(ExpressionBinary); + retval->op = op; + retval->left = left; + retval->right = right; + return (Expression *) retval; +} // new_binary_expr + +static Expression *new_ternary_expr(Context *ctx, const Operator op, + Expression *left, Expression *center, + Expression *right) +{ + NEW_EXPR(ExpressionTernary); + retval->op = op; + retval->left = left; + retval->center = center; + retval->right = right; + return (Expression *) retval; +} // new_ternary_expr + +static Expression *new_identifier_expr(Context *ctx, const char *identifier) +{ + NEW_EXPR(ExpressionIdentifier); + retval->op = TOKEN_CALC_IDENTIFIER; + retval->identifier = identifier; + return (Expression *) retval; +} // new_identifier_expr + +static inline int64 strtoi64(const char *str, unsigned int len) +{ + int64 retval = 0; + int64 mult = 1; + int i = 0; + + while ((len) && (*str == ' ')) + { + str++; + len--; + } // while + + if ((len) && (*str == '-')) + { + mult = -1; + str++; + len--; + } // if + + while (i < len) + { + const char ch = str[i]; + if ((ch < '0') || (ch > '9')) + break; + i++; + } // while + + while (--i >= 0) + { + const char ch = str[i]; + retval += ((int64) (ch - '0')) * mult; + mult *= 10; + } // while + + return retval; +} // strtoi64 + +static Expression *new_literal_int_expr(Context *ctx) +{ + NEW_EXPR(ExpressionLiteralInt); + retval->op = TOKEN_CALC_INT_CONSTANT; + retval->value = strtoi64(ctx->token, ctx->tokenlen); + return (Expression *) retval; +} // new_literal_int_expr + +static inline double strtodouble(const char *_str, unsigned int len) +{ + // !!! FIXME: laziness prevails. + char *str = (char *) alloca(len+1); + memcpy(str, _str, len); + str[len] = '\0'; + return strtod(str, NULL); +} // strtodouble + +static Expression *new_literal_float_expr(Context *ctx) +{ + NEW_EXPR(ExpressionLiteralFloat); + retval->op = TOKEN_CALC_FLOAT_CONSTANT; + retval->value = strtodouble(ctx->token, ctx->tokenlen); + return (Expression *) retval; +} // new_literal_float_expr + +static Expression *new_literal_string_expr(Context *ctx) +{ + NEW_EXPR(ExpressionLiteralString); + retval->op = TOKEN_CALC_STRING_LITERAL; + retval->string = new_identifier(ctx); + return (Expression *) retval; +} // new_string_literal_expr + + +static int convert_to_lemon_token(const Context *ctx) +{ + switch (ctx->tokenval) + { + case ((Token) ','): return TOKEN_CALC_COMMA; + case ((Token) '='): return TOKEN_CALC_ASSIGN; + case ((Token) TOKEN_ADDASSIGN): return TOKEN_CALC_ADDASSIGN; + case ((Token) TOKEN_SUBASSIGN): return TOKEN_CALC_SUBASSIGN; + case ((Token) TOKEN_MULTASSIGN): return TOKEN_CALC_MULASSIGN; + case ((Token) TOKEN_DIVASSIGN): return TOKEN_CALC_DIVASSIGN; + case ((Token) TOKEN_MODASSIGN): return TOKEN_CALC_MODASSIGN; + case ((Token) TOKEN_LSHIFTASSIGN): return TOKEN_CALC_LSHIFTASSIGN; + case ((Token) TOKEN_RSHIFTASSIGN): return TOKEN_CALC_RSHIFTASSIGN; + case ((Token) TOKEN_ANDASSIGN): return TOKEN_CALC_ANDASSIGN; + case ((Token) TOKEN_ORASSIGN): return TOKEN_CALC_ORASSIGN; + case ((Token) TOKEN_XORASSIGN): return TOKEN_CALC_XORASSIGN; + case ((Token) '?'): return TOKEN_CALC_QUESTION; + case ((Token) TOKEN_OROR): return TOKEN_CALC_OROR; + case ((Token) TOKEN_ANDAND): return TOKEN_CALC_ANDAND; + case ((Token) '|'): return TOKEN_CALC_OR; + case ((Token) '^'): return TOKEN_CALC_XOR; + case ((Token) '&'): return TOKEN_CALC_AND; + case ((Token) TOKEN_EQL): return TOKEN_CALC_EQL; + case ((Token) TOKEN_NEQ): return TOKEN_CALC_NEQ; + case ((Token) '<'): return TOKEN_CALC_LT; + case ((Token) TOKEN_LEQ): return TOKEN_CALC_LEQ; + case ((Token) '>'): return TOKEN_CALC_GT; + case ((Token) TOKEN_GEQ): return TOKEN_CALC_GEQ; + case ((Token) TOKEN_LSHIFT): return TOKEN_CALC_LSHIFT; + case ((Token) TOKEN_RSHIFT): return TOKEN_CALC_RSHIFT; + case ((Token) '+'): return TOKEN_CALC_PLUS; + case ((Token) '-'): return TOKEN_CALC_MINUS; + case ((Token) '*'): return TOKEN_CALC_STAR; + case ((Token) '/'): return TOKEN_CALC_SLASH; + case ((Token) '%'): return TOKEN_CALC_PERCENT; + case ((Token) '!'): return TOKEN_CALC_EXCLAMATION; + case ((Token) '~'): return TOKEN_CALC_COMPLEMENT; + case ((Token) TOKEN_DECREMENT): return TOKEN_CALC_MINUSMINUS; + case ((Token) TOKEN_INCREMENT): return TOKEN_CALC_PLUSPLUS; + case ((Token) '.'): return TOKEN_CALC_DOT; + case ((Token) '['): return TOKEN_CALC_LBRACKET; + case ((Token) ']'): return TOKEN_CALC_RBRACKET; + case ((Token) '('): return TOKEN_CALC_LPAREN; + case ((Token) ')'): return TOKEN_CALC_RPAREN; + case ((Token) TOKEN_INT_LITERAL): return TOKEN_CALC_INT_CONSTANT; + case ((Token) TOKEN_FLOAT_LITERAL): return TOKEN_CALC_FLOAT_CONSTANT; + case ((Token) TOKEN_STRING_LITERAL): return TOKEN_CALC_STRING_LITERAL; + case ((Token) ':'): return TOKEN_CALC_COLON; + //case ((Token) ';'): return TOKEN_CALC_SEMICOLON; + //case ((Token) '{'): return TOKEN_CALC_LBRACE; + //case ((Token) '}'): return TOKEN_CALC_RBRACE; + case ((Token) TOKEN_IDENTIFIER): return TOKEN_CALC_IDENTIFIER; + case TOKEN_EOI: return 0; + case TOKEN_BAD_CHARS: printf("bad chars from lexer\n"); return 0; + case TOKEN_PREPROCESSING_ERROR: printf("error from lexer\n"); return 0; + default: assert(0 && "unexpected token from lexer\n"); return 0; + } // switch + + return 0; +} // convert_to_lemon_token + +static void MOJOSHADER_compile(const char *filename, + const char *source, unsigned int sourcelen, + const MOJOSHADER_preprocessorDefine *defines, + unsigned int define_count, + MOJOSHADER_includeOpen include_open, + MOJOSHADER_includeClose include_close, + MOJOSHADER_malloc m, MOJOSHADER_free f, void *d) +{ + Context ctx; + if (m == NULL) m = MOJOSHADER_internal_malloc; + if (f == NULL) f = MOJOSHADER_internal_free; + + memset(&ctx, '\0', sizeof (Context)); + ctx.malloc = m; + ctx.free = f; + ctx.malloc_data = d; + ctx.preprocessor = preprocessor_start(filename, source, sourcelen, + include_open, include_close, + defines, define_count, 0, m, f, d); + + void *pParser = ParseCalculatorAlloc(m, d); + + #if DEBUG_COMPILER_PARSER + ParseCalculatorTrace(stdout, "COMPILER: "); + #endif + + do { + ctx.token = preprocessor_nexttoken(ctx.preprocessor, + &ctx.tokenlen, + &ctx.tokenval); + ParseCalculator(pParser, convert_to_lemon_token(&ctx), 0, &ctx); + } while (ctx.tokenval != TOKEN_EOI); + ParseCalculatorFree(pParser, f, d); +} + +int main(int argc, char **argv) +{ + const char *ln; + size_t len = 0; + FILE *io = stdin; + const char *filename = ""; + + while ((ln = fgetln(io, &len)) != NULL) + { + if ((len == 5) && (memcmp(ln, "quit\n", 5) == 0)) + break; + + MOJOSHADER_compile(filename, ln, (unsigned int) len, + NULL, 0, NULL, NULL, NULL, NULL, NULL); + } // while + + fclose(io); + return 0; +} // main + +// end of calculator.c ... + diff --git a/calculator.lemon b/calculator.lemon new file mode 100644 index 00000000..69da912f --- /dev/null +++ b/calculator.lemon @@ -0,0 +1,185 @@ +/** + * MojoShader; generate shader programs from bytecode of compiled + * Direct3D shaders. + * + * Please see the file LICENSE.txt in the source's root directory. + * + * This file written by Ryan C. Gordon. + */ + +// This is a Lemon Parser grammar for HLSL. It is based on an ANSI C YACC +// grammar by Jeff Lee: http://www.lysator.liu.se/c/ANSI-C-grammar-y.html + +// Lemon is here: http://www.hwaci.com/sw/lemon/ ... the source is included +// with MojoShader, and built with the library, so you don't have to track +// down the dependency. + +%name ParseCalculator + +// Some shift-reduce conflicts are basically unavoidable, but if the final +// conflict count matches this value, we consider it known and acceptable. +%expect 0 + +%start_symbol calculator +%token_prefix TOKEN_CALC_ +%token_type { int } +%extra_argument { Context *ctx } + +%include { +#ifndef __MOJOSHADER_CALC_COMPILER__ +#error Do not compile this file directly. +#endif +} + +%syntax_error { + fprintf(stderr,"Syntax error\n"); +} + +%parse_failure { + fprintf(stderr,"Giving up. Parser is hopelessly lost...\n"); +} + +%stack_overflow { + fprintf(stderr,"Giving up. Parser stack overflow\n"); +} + +// operator precedence (matches C spec)... + +%left COMMA. +%right ASSIGN ADDASSIGN SUBASSIGN MULASSIGN DIVASSIGN MODASSIGN LSHIFTASSIGN + RSHIFTASSIGN ANDASSIGN ORASSIGN XORASSIGN. +%right QUESTION. +%left OROR. +%left ANDAND. +%left OR. +%left XOR. +%left AND. +%left EQL NEQ. +%left LT LEQ GT GEQ. +%left LSHIFT RSHIFT. +%left PLUS MINUS. +%left STAR SLASH PERCENT. +%right TYPECAST EXCLAMATION COMPLEMENT MINUSMINUS PLUSPLUS. +%left DOT LBRACKET RBRACKET LPAREN RPAREN. + +// bump up the precedence of ELSE, to avoid shift/reduce conflict on the +// usual "dangling else ambiguity" ... +%right ELSE. + + +// The rules... + +%type calculator { Expression * } +calculator(A) ::= expression(B). { A = B; } + +%type identifier { const char * } +%destructor identifier { (void) ctx; } // !!! FIXME: remove this later, it's just to shut up the compiler for now. +identifier(A) ::= IDENTIFIER. { A = new_identifier(ctx); } + +// the expression stuff is based on Jeff Lee's ANSI C grammar. +%type primary_expr { Expression * } +primary_expr(A) ::= identifier(B). { A = new_identifier_expr(ctx, B); } +primary_expr(A) ::= INT_CONSTANT. { A = new_literal_int_expr(ctx); } +primary_expr(A) ::= FLOAT_CONSTANT. { A = new_literal_float_expr(ctx); } +primary_expr(A) ::= STRING_LITERAL. { A = new_literal_string_expr(ctx); } +primary_expr(A) ::= LPAREN expression(B) RPAREN. { A = B; } + +%type postfix_expr { Expression * } +postfix_expr(A) ::= primary_expr(B). { A = B; } +postfix_expr(A) ::= postfix_expr(B) LBRACKET expression(C) RBRACKET. { A = new_binary_expr(ctx, OP_DEREF_ARRAY, B, C); } +postfix_expr(A) ::= postfix_expr(B) LPAREN RPAREN. { A = new_binary_expr(ctx, OP_CALLFUNC, B, NULL); } +postfix_expr(A) ::= postfix_expr(B) LPAREN argument_expr_list(C) RPAREN. { A = new_binary_expr(ctx, OP_CALLFUNC, B, C); } +//postfix_expr(A) ::= datatype(B) LPAREN argument_expr_list(C) RPAREN. { A = new_constructor_expr(ctx, B, C); } // HLSL constructor +postfix_expr(A) ::= postfix_expr(B) DOT identifier(C). { A = new_binary_expr(ctx, OP_DEREF_STRUCT, B, new_identifier_expr(ctx, C)); } +postfix_expr(A) ::= postfix_expr(B) PLUSPLUS. { A = new_unary_expr(ctx, OP_POSTINCREMENT, B); } +postfix_expr(A) ::= postfix_expr(B) MINUSMINUS. { A = new_unary_expr(ctx, OP_POSTDECREMENT, B); } + +%type argument_expr_list { Expression * } +argument_expr_list(A) ::= assignment_expr(B). { A = B; } +argument_expr_list(A) ::= argument_expr_list(B) COMMA assignment_expr(C). { A = new_binary_expr(ctx, OP_COMMA, B, C); } + +%type unary_expr { Expression * } +unary_expr(A) ::= postfix_expr(B). { A = B; } +unary_expr(A) ::= PLUSPLUS unary_expr(B). { A = new_unary_expr(ctx, OP_PREINCREMENT, B); } +unary_expr(A) ::= MINUSMINUS unary_expr(B). { A = new_unary_expr(ctx, OP_PREDECREMENT, B); } +unary_expr(A) ::= PLUS cast_expr(B). { A = B; } // unary "+x" is always a no-op, so throw it away here. +unary_expr(A) ::= MINUS cast_expr(B). { A = new_unary_expr(ctx, OP_NEGATE, B); } +unary_expr(A) ::= COMPLEMENT cast_expr(B). { A = new_unary_expr(ctx, OP_COMPLEMENT, B); } +unary_expr(A) ::= EXCLAMATION cast_expr(B). { A = new_unary_expr(ctx, OP_NOT, B); } + +%type cast_expr { Expression * } +cast_expr(A) ::= unary_expr(B). { A = B; } +//cast_expr(A) ::= LPAREN datatype(B) RPAREN cast_expr(C). { A = new_cast_expr(ctx, B, C); } + +%type multiplicative_expr { Expression * } +multiplicative_expr(A) ::= cast_expr(B). { A = B; } +multiplicative_expr(A) ::= multiplicative_expr(B) STAR cast_expr(C). { A = new_binary_expr(ctx, OP_MULTIPLY, B, C); } +multiplicative_expr(A) ::= multiplicative_expr(B) SLASH cast_expr(C). { A = new_binary_expr(ctx, OP_DIVIDE, B, C); } +multiplicative_expr(A) ::= multiplicative_expr(B) PERCENT cast_expr(C). { A = new_binary_expr(ctx, OP_MODULO, B, C); } + +%type additive_expr { Expression * } +additive_expr(A) ::= multiplicative_expr(B). { A = B; } +additive_expr(A) ::= additive_expr(B) PLUS multiplicative_expr(C). { A = new_binary_expr(ctx, OP_ADD, B, C); } +additive_expr(A) ::= additive_expr(B) MINUS multiplicative_expr(C). { A = new_binary_expr(ctx, OP_SUBTRACT, B, C); } + +%type shift_expr { Expression * } +shift_expr(A) ::= additive_expr(B). { A = B; } +shift_expr(A) ::= shift_expr(B) LSHIFT additive_expr(C). { A = new_binary_expr(ctx, OP_LSHIFT, B, C); } +shift_expr(A) ::= shift_expr(B) RSHIFT additive_expr(C). { A = new_binary_expr(ctx, OP_RSHIFT, B, C); } + +%type relational_expr { Expression * } +relational_expr(A) ::= shift_expr(B). { A = B; } +relational_expr(A) ::= relational_expr(B) LT shift_expr(C). { A = new_binary_expr(ctx, OP_LESSTHAN, B, C); } +relational_expr(A) ::= relational_expr(B) GT shift_expr(C). { A = new_binary_expr(ctx, OP_GREATERTHAN, B, C); } +relational_expr(A) ::= relational_expr(B) LEQ shift_expr(C). { A = new_binary_expr(ctx, OP_LESSTHANOREQUAL, B, C); } +relational_expr(A) ::= relational_expr(B) GEQ shift_expr(C). { A = new_binary_expr(ctx, OP_GREATERTHANOREQUAL, B, C); } + +%type equality_expr { Expression * } +equality_expr(A) ::= relational_expr(B). { A = B; } +equality_expr(A) ::= equality_expr(B) EQL relational_expr(C). { A = new_binary_expr(ctx, OP_EQUAL, B, C); } +equality_expr(A) ::= equality_expr(B) NEQ relational_expr(C). { A = new_binary_expr(ctx, OP_NOTEQUAL, B, C); } + +%type and_expr { Expression * } +and_expr(A) ::= equality_expr(B). { A = B; } +and_expr(A) ::= and_expr(B) AND equality_expr(C). { A = new_binary_expr(ctx, OP_BINARYAND, B, C); } + +%type exclusive_or_expr { Expression * } +exclusive_or_expr(A) ::= and_expr(B). { A = B; } +exclusive_or_expr(A) ::= exclusive_or_expr(B) XOR and_expr(C). { A = new_binary_expr(ctx, OP_BINARYXOR, B, C); } + +%type inclusive_or_expr { Expression * } +inclusive_or_expr(A) ::= exclusive_or_expr(B). { A = B; } +inclusive_or_expr(A) ::= inclusive_or_expr(B) OR exclusive_or_expr(C). { A = new_binary_expr(ctx, OP_BINARYOR, B, C); } + +%type logical_and_expr { Expression * } +logical_and_expr(A) ::= inclusive_or_expr(B). { A = B; } +logical_and_expr(A) ::= logical_and_expr(B) ANDAND inclusive_or_expr(C). { A = new_binary_expr(ctx, OP_LOGICALAND, B, C); } + +%type logical_or_expr { Expression * } +logical_or_expr(A) ::= logical_and_expr(B). { A = B; } +logical_or_expr(A) ::= logical_or_expr(B) OROR logical_and_expr(C). { A = new_binary_expr(ctx, OP_LOGICALOR, B, C); } + +%type conditional_expr { Expression * } +conditional_expr(A) ::= logical_or_expr(B). { A = B; } +conditional_expr(A) ::= logical_or_expr(B) QUESTION logical_or_expr(C) COLON conditional_expr(D). { A = new_ternary_expr(ctx, OP_CONDITIONAL, B, C, D); } + +%type assignment_expr { Expression * } +assignment_expr(A) ::= conditional_expr(B). { A = B; } +assignment_expr(A) ::= unary_expr(B) ASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_ASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) MULASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_MULASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) DIVASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_DIVASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) MODASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_MODASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) ADDASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_ADDASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) SUBASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_SUBASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) LSHIFTASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_LSHIFTASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) RSHIFTASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_RSHIFTASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) ANDASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_ANDASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) XORASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_XORASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) ORASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_ORASSIGN, B, C); } + +%type expression { Expression * } +expression(A) ::= assignment_expr(B). { A = B; } +expression(A) ::= expression(B) COMMA assignment_expr(C). { A = new_binary_expr(ctx, OP_COMMA, B, C); } + +// end of calculator.lemon ... + diff --git a/mojoshader_internal.h b/mojoshader_internal.h index c81d5de2..bfd870eb 100644 --- a/mojoshader_internal.h +++ b/mojoshader_internal.h @@ -88,7 +88,9 @@ typedef unsigned int uint; // this is a printf() helper. don't use for code. typedef unsigned __int8 uint8; typedef unsigned __int16 uint16; typedef unsigned __int32 uint32; +typedef unsigned __int64 uint64; typedef __int32 int32; +typedef __int64 int64; // Warning Level 4 considered harmful. :) #pragma warning(disable: 4100) // "unreferenced formal parameter" #pragma warning(disable: 4389) // "signed/unsigned mismatch" @@ -98,6 +100,8 @@ typedef uint8_t uint8; typedef uint16_t uint16; typedef uint32_t uint32; typedef int32_t int32; +typedef int64_t int64; +typedef uint64_t uint64; #endif #ifdef sun From e7818ac4cb39fa0c2fd60d81e0ad21d3f327754b Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Mon, 8 Feb 2010 03:04:29 -0500 Subject: [PATCH 02/16] Cleaned up operator enumeration, put it into ranges, fixed data ops. --HG-- branch : calculator-experiment --- calculator.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/calculator.c b/calculator.c index db01051e..d284de40 100644 --- a/calculator.c +++ b/calculator.c @@ -53,17 +53,21 @@ static inline void Free(Context *ctx, void *ptr) typedef enum Operator { - OP_DEREF_ARRAY, - OP_CALLFUNC, - OP_DEREF_STRUCT, + OP_START_RANGE_UNARY_OPERATORS, OP_POSTINCREMENT, OP_POSTDECREMENT, - OP_COMMA, OP_PREINCREMENT, OP_PREDECREMENT, OP_NEGATE, OP_COMPLEMENT, OP_NOT, + OP_END_RANGE_UNARY_OPERATORS, + + OP_START_RANGE_BINARY_OPERATORS, + OP_DEREF_ARRAY, + OP_CALLFUNC, + OP_DEREF_STRUCT, + OP_COMMA, OP_MULTIPLY, OP_DIVIDE, OP_MODULO, @@ -82,7 +86,6 @@ typedef enum Operator OP_BINARYOR, OP_LOGICALAND, OP_LOGICALOR, - OP_CONDITIONAL, OP_ASSIGN, OP_MULASSIGN, OP_DIVASSIGN, @@ -94,6 +97,18 @@ typedef enum Operator OP_ANDASSIGN, OP_XORASSIGN, OP_ORASSIGN, + OP_END_RANGE_BINARY_OPERATORS, + + OP_START_RANGE_TERNARY_OPERATORS, + OP_CONDITIONAL, + OP_END_RANGE_TERNARY_OPERATORS, + + OP_START_RANGE_DATA, + OP_IDENTIFIER, + OP_INT_LITERAL, + OP_FLOAT_LITERAL, + OP_STRING_LITERAL, + OP_END_RANGE_DATA, } Operator; typedef struct Expression @@ -128,25 +143,25 @@ typedef struct ExpressionTernary typedef struct ExpressionIdentifier { - Operator op; // Always TOKEN_CALC_IDENTIFIER + Operator op; // Always OP_IDENTIFIER const char *identifier; } ExpressionIdentifier; typedef struct ExpressionLiteralInt { - Operator op; // Always TOKEN_CALC_INT_CONSTANT + Operator op; // Always OP_INT_LITERAL int64 value; } ExpressionLiteralInt; typedef struct ExpressionLiteralFloat { - Operator op; // Always TOKEN_CALC_FLOAT_CONSTANT + Operator op; // Always OP_FLOAT_LITERAL double value; } ExpressionLiteralFloat; typedef struct ExpressionLiteralString { - Operator op; // Always TOKEN_CALC_STRING_LITERAL + Operator op; // Always OP_STRING_LITERAL const char *string; } ExpressionLiteralString; From 699bb516340166f75ffced6cfd361cedc0b7dbb0 Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Mon, 8 Feb 2010 03:30:48 -0500 Subject: [PATCH 03/16] Make #include callbacks optional. Now fails at runtime if we hit an #include without callbacks defined. If we never need the callbacks, it's silly to assert they must exist. --HG-- branch : calculator-experiment --- mojoshader_preprocessor.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mojoshader_preprocessor.c b/mojoshader_preprocessor.c index 48daf3f3..f465b2aa 100644 --- a/mojoshader_preprocessor.c +++ b/mojoshader_preprocessor.c @@ -656,8 +656,6 @@ Preprocessor *preprocessor_start(const char *fname, const char *source, // the preprocessor is internal-only, so we verify all these are != NULL. assert(m != NULL); assert(f != NULL); - assert(open_callback != NULL); - assert(close_callback != NULL); Context *ctx = (Context *) m(sizeof (Context), d); if (ctx == NULL) @@ -837,6 +835,12 @@ static void handle_pp_include(Context *ctx) const char *newdata = NULL; unsigned int newbytes = 0; + if ((ctx->open_callback == NULL) || (ctx->close_callback == NULL)) + { + fail(ctx, "Saw #include, but no include callbacks defined"); + return; + } // if + if (!ctx->open_callback(incltype, filename, state->source_base, &newdata, &newbytes, ctx->malloc, ctx->free, ctx->malloc_data)) From 759929226a602f857ed65818b66763880c339d84 Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Mon, 8 Feb 2010 03:33:12 -0500 Subject: [PATCH 04/16] Allow "q" to quit calculator, too. --HG-- branch : calculator-experiment --- calculator.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/calculator.c b/calculator.c index d284de40..ef1fee3d 100644 --- a/calculator.c +++ b/calculator.c @@ -414,6 +414,8 @@ int main(int argc, char **argv) { if ((len == 5) && (memcmp(ln, "quit\n", 5) == 0)) break; + else if ((len == 2) && (memcmp(ln, "q\n", 2) == 0)) + break; MOJOSHADER_compile(filename, ln, (unsigned int) len, NULL, 0, NULL, NULL, NULL, NULL, NULL); From c8deb55131029d0f2a3b1180fa217d0329e4c8e8 Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Mon, 8 Feb 2010 03:33:47 -0500 Subject: [PATCH 05/16] Initial dumping of parse tree once parsing finishes. Other minor tweaks and cleanups, too. --HG-- branch : calculator-experiment --- calculator.c | 196 +++++++++++++++++++++++++++++++++++++---------- calculator.lemon | 3 +- 2 files changed, 158 insertions(+), 41 deletions(-) diff --git a/calculator.c b/calculator.c index ef1fee3d..9cc195e6 100644 --- a/calculator.c +++ b/calculator.c @@ -53,7 +53,7 @@ static inline void Free(Context *ctx, void *ptr) typedef enum Operator { - OP_START_RANGE_UNARY_OPERATORS, + OP_START_RANGE_UNARY, OP_POSTINCREMENT, OP_POSTDECREMENT, OP_PREINCREMENT, @@ -61,9 +61,9 @@ typedef enum Operator OP_NEGATE, OP_COMPLEMENT, OP_NOT, - OP_END_RANGE_UNARY_OPERATORS, + OP_END_RANGE_UNARY, - OP_START_RANGE_BINARY_OPERATORS, + OP_START_RANGE_BINARY, OP_DEREF_ARRAY, OP_CALLFUNC, OP_DEREF_STRUCT, @@ -97,11 +97,11 @@ typedef enum Operator OP_ANDASSIGN, OP_XORASSIGN, OP_ORASSIGN, - OP_END_RANGE_BINARY_OPERATORS, + OP_END_RANGE_BINARY, - OP_START_RANGE_TERNARY_OPERATORS, + OP_START_RANGE_TERNARY, OP_CONDITIONAL, - OP_END_RANGE_TERNARY_OPERATORS, + OP_END_RANGE_TERNARY, OP_START_RANGE_DATA, OP_IDENTIFIER, @@ -111,6 +111,22 @@ typedef enum Operator OP_END_RANGE_DATA, } Operator; +static inline int operator_is_unary(const Operator op) +{ + return ((op > OP_START_RANGE_UNARY) && (op < OP_END_RANGE_UNARY)); +} // operator_is_unary + +static inline int operator_is_binary(const Operator op) +{ + return ((op > OP_START_RANGE_BINARY) && (op < OP_END_RANGE_BINARY)); +} // operator_is_binary + +static inline int operator_is_ternary(const Operator op) +{ + return ((op > OP_START_RANGE_TERNARY) && (op < OP_END_RANGE_TERNARY)); +} // operator_is_ternary + + typedef struct Expression { Operator op; // operator @@ -147,43 +163,23 @@ typedef struct ExpressionIdentifier const char *identifier; } ExpressionIdentifier; -typedef struct ExpressionLiteralInt +typedef struct ExpressionIntLiteral { Operator op; // Always OP_INT_LITERAL int64 value; -} ExpressionLiteralInt; +} ExpressionIntLiteral; -typedef struct ExpressionLiteralFloat +typedef struct ExpressionFloatLiteral { Operator op; // Always OP_FLOAT_LITERAL double value; -} ExpressionLiteralFloat; +} ExpressionFloatLiteral; -typedef struct ExpressionLiteralString +typedef struct ExpressionStringLiteral { Operator op; // Always OP_STRING_LITERAL const char *string; -} ExpressionLiteralString; - -static const char *new_identifier(Context *); -static Expression *new_unary_expr(Context *, const Operator, Expression *); -static Expression *new_binary_expr(Context *, const Operator, Expression *, Expression *); -static Expression *new_ternary_expr(Context *, const Operator, Expression *, Expression *, Expression *); -static Expression *new_identifier_expr(Context *, const char *); -static Expression *new_literal_int_expr(Context *); -static Expression *new_literal_float_expr(Context *); -static Expression *new_literal_string_expr(Context *); - -static void parse_complete(const Expression *expr) -{ - printf("parse complete!\n"); - -} // parse_complete - - -// This is where the actual parsing happens. It's Lemon-generated! -#define __MOJOSHADER_CALC_COMPILER__ 1 -#include "calculator.h" +} ExpressionStringLiteral; static const char *new_identifier(Context *ctx) { @@ -201,6 +197,7 @@ static Expression *new_unary_expr(Context *ctx, const Operator op, Expression *operand) { NEW_EXPR(ExpressionUnary); + assert(operator_is_unary(op)); retval->op = op; retval->operand = operand; return (Expression *) retval; @@ -210,6 +207,7 @@ static Expression *new_binary_expr(Context *ctx, const Operator op, Expression *left, Expression *right) { NEW_EXPR(ExpressionBinary); + assert(operator_is_binary(op)); retval->op = op; retval->left = left; retval->right = right; @@ -221,6 +219,7 @@ static Expression *new_ternary_expr(Context *ctx, const Operator op, Expression *right) { NEW_EXPR(ExpressionTernary); + assert(operator_is_ternary(op)); retval->op = op; retval->left = left; retval->center = center; @@ -231,7 +230,7 @@ static Expression *new_ternary_expr(Context *ctx, const Operator op, static Expression *new_identifier_expr(Context *ctx, const char *identifier) { NEW_EXPR(ExpressionIdentifier); - retval->op = TOKEN_CALC_IDENTIFIER; + retval->op = OP_IDENTIFIER; retval->identifier = identifier; return (Expression *) retval; } // new_identifier_expr @@ -275,8 +274,8 @@ static inline int64 strtoi64(const char *str, unsigned int len) static Expression *new_literal_int_expr(Context *ctx) { - NEW_EXPR(ExpressionLiteralInt); - retval->op = TOKEN_CALC_INT_CONSTANT; + NEW_EXPR(ExpressionIntLiteral); + retval->op = OP_INT_LITERAL; retval->value = strtoi64(ctx->token, ctx->tokenlen); return (Expression *) retval; } // new_literal_int_expr @@ -292,20 +291,139 @@ static inline double strtodouble(const char *_str, unsigned int len) static Expression *new_literal_float_expr(Context *ctx) { - NEW_EXPR(ExpressionLiteralFloat); - retval->op = TOKEN_CALC_FLOAT_CONSTANT; + NEW_EXPR(ExpressionFloatLiteral); + retval->op = OP_FLOAT_LITERAL; retval->value = strtodouble(ctx->token, ctx->tokenlen); return (Expression *) retval; } // new_literal_float_expr static Expression *new_literal_string_expr(Context *ctx) { - NEW_EXPR(ExpressionLiteralString); - retval->op = TOKEN_CALC_STRING_LITERAL; + NEW_EXPR(ExpressionStringLiteral); + retval->op = OP_STRING_LITERAL; retval->string = new_identifier(ctx); return (Expression *) retval; } // new_string_literal_expr +static void print_expr(const Expression *expr, const int depth) +{ + int i; + for (i = 0; i < depth; i++) + printf(" "); + + printf("Expression "); + switch (expr->op) + { + #define PRINT_OP(op) case op: printf("%s\n", #op); break; + PRINT_OP(OP_DEREF_ARRAY); + PRINT_OP(OP_CALLFUNC); + PRINT_OP(OP_DEREF_STRUCT); + PRINT_OP(OP_POSTINCREMENT); + PRINT_OP(OP_POSTDECREMENT); + PRINT_OP(OP_COMMA); + PRINT_OP(OP_PREINCREMENT); + PRINT_OP(OP_PREDECREMENT); + PRINT_OP(OP_NEGATE); + PRINT_OP(OP_COMPLEMENT); + PRINT_OP(OP_NOT); + PRINT_OP(OP_MULTIPLY); + PRINT_OP(OP_DIVIDE); + PRINT_OP(OP_MODULO); + PRINT_OP(OP_ADD); + PRINT_OP(OP_SUBTRACT); + PRINT_OP(OP_LSHIFT); + PRINT_OP(OP_RSHIFT); + PRINT_OP(OP_LESSTHAN); + PRINT_OP(OP_GREATERTHAN); + PRINT_OP(OP_LESSTHANOREQUAL); + PRINT_OP(OP_GREATERTHANOREQUAL); + PRINT_OP(OP_EQUAL); + PRINT_OP(OP_NOTEQUAL); + PRINT_OP(OP_BINARYAND); + PRINT_OP(OP_BINARYXOR); + PRINT_OP(OP_BINARYOR); + PRINT_OP(OP_LOGICALAND); + PRINT_OP(OP_LOGICALOR); + PRINT_OP(OP_CONDITIONAL); + PRINT_OP(OP_ASSIGN); + PRINT_OP(OP_MULASSIGN); + PRINT_OP(OP_DIVASSIGN); + PRINT_OP(OP_MODASSIGN); + PRINT_OP(OP_ADDASSIGN); + PRINT_OP(OP_SUBASSIGN); + PRINT_OP(OP_LSHIFTASSIGN); + PRINT_OP(OP_RSHIFTASSIGN); + PRINT_OP(OP_ANDASSIGN); + PRINT_OP(OP_XORASSIGN); + PRINT_OP(OP_ORASSIGN); + PRINT_OP(OP_INT_LITERAL); + PRINT_OP(OP_FLOAT_LITERAL); + PRINT_OP(OP_STRING_LITERAL); + PRINT_OP(OP_IDENTIFIER); + default: printf("---UNKNOWN!---\n"); return; + } // switch + + if (operator_is_unary(expr->op)) + { + const ExpressionUnary *unary = (const ExpressionUnary *) expr; + print_expr(unary->operand, depth + 1); + } // if + else if (operator_is_binary(expr->op)) + { + const ExpressionBinary *binary = (const ExpressionBinary *) expr; + print_expr(binary->left, depth + 1); + print_expr(binary->right, depth + 1); + } // else if + else if (operator_is_ternary(expr->op)) + { + const ExpressionTernary *ternary = (const ExpressionTernary *) expr; + print_expr(ternary->left, depth + 1); + print_expr(ternary->center, depth + 1); + print_expr(ternary->right, depth + 1); + } // else if + + else + { + for (i = 0; i < (depth + 1); i++) + printf(" "); + + if (expr->op == OP_IDENTIFIER) + { + const ExpressionIdentifier *ident = (const ExpressionIdentifier *) expr; + printf("(%s)\n", ident->identifier); + } // if + else if (expr->op == OP_INT_LITERAL) + { + const ExpressionIntLiteral *lit = (const ExpressionIntLiteral *) expr; + printf("(%lld)\n", (long long) lit->value); + } // if + else if (expr->op == OP_FLOAT_LITERAL) + { + const ExpressionFloatLiteral *lit = (const ExpressionFloatLiteral *) expr; + printf("(%lf)\n", lit->value); + } // if + else if (expr->op == OP_STRING_LITERAL) + { + const ExpressionStringLiteral *lit = (const ExpressionStringLiteral *) expr; + printf("(\"%s\")\n", lit->string); + } // if + else + { + assert(0 && "Shouldn't hit this."); + } // else + } // else +} // print_expr + +static void parse_complete(const Expression *expr) +{ + printf("parse complete!\n"); + print_expr(expr, 0); +} // parse_complete + + +// This is where the actual parsing happens. It's Lemon-generated! +#define __MOJOSHADER_CALC_COMPILER__ 1 +#include "calculator.h" static int convert_to_lemon_token(const Context *ctx) { diff --git a/calculator.lemon b/calculator.lemon index 69da912f..bdb67606 100644 --- a/calculator.lemon +++ b/calculator.lemon @@ -69,8 +69,7 @@ // The rules... -%type calculator { Expression * } -calculator(A) ::= expression(B). { A = B; } +calculator ::= expression(B). { parse_complete(B); } %type identifier { const char * } %destructor identifier { (void) ctx; } // !!! FIXME: remove this later, it's just to shut up the compiler for now. From 7eff1e827f7bb93c378d3289d0476175ea5d392d Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Mon, 8 Feb 2010 03:59:00 -0500 Subject: [PATCH 06/16] Fixed literal data getting through the parser in one piece. --HG-- branch : calculator-experiment --- calculator.c | 30 ++++++++++++++++++++---------- calculator.lemon | 10 +++++----- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/calculator.c b/calculator.c index 9cc195e6..f058eb06 100644 --- a/calculator.c +++ b/calculator.c @@ -5,6 +5,12 @@ #define LEMON_SUPPORT_TRACING 1 #endif +typedef struct TokenData +{ + const char *token; + unsigned int tokenlen; +} TokenData; + typedef struct Context { int isfail; @@ -181,14 +187,14 @@ typedef struct ExpressionStringLiteral const char *string; } ExpressionStringLiteral; -static const char *new_identifier(Context *ctx) +static const char *new_identifier(Context *ctx, const TokenData *data) { // !!! FIXME: this needs to cache strings. - const unsigned int len = ctx->tokenlen; + const unsigned int len = data->tokenlen; char *retval = Malloc(ctx, len + 1); if (retval == NULL) return NULL; - memcpy(retval, ctx->token, len); + memcpy(retval, data->token, len); retval[len] = '\0'; return retval; } // new_identifier @@ -272,11 +278,11 @@ static inline int64 strtoi64(const char *str, unsigned int len) return retval; } // strtoi64 -static Expression *new_literal_int_expr(Context *ctx) +static Expression *new_literal_int_expr(Context *ctx, const TokenData *data) { NEW_EXPR(ExpressionIntLiteral); retval->op = OP_INT_LITERAL; - retval->value = strtoi64(ctx->token, ctx->tokenlen); + retval->value = strtoi64(data->token, data->tokenlen); return (Expression *) retval; } // new_literal_int_expr @@ -289,19 +295,19 @@ static inline double strtodouble(const char *_str, unsigned int len) return strtod(str, NULL); } // strtodouble -static Expression *new_literal_float_expr(Context *ctx) +static Expression *new_literal_float_expr(Context *ctx, const TokenData *data) { NEW_EXPR(ExpressionFloatLiteral); retval->op = OP_FLOAT_LITERAL; - retval->value = strtodouble(ctx->token, ctx->tokenlen); + retval->value = strtodouble(data->token, data->tokenlen); return (Expression *) retval; } // new_literal_float_expr -static Expression *new_literal_string_expr(Context *ctx) +static Expression *new_literal_string_expr(Context *ctx, const TokenData *data) { NEW_EXPR(ExpressionStringLiteral); retval->op = OP_STRING_LITERAL; - retval->string = new_identifier(ctx); + retval->string = new_identifier(ctx, data); return (Expression *) retval; } // new_string_literal_expr @@ -516,7 +522,11 @@ static void MOJOSHADER_compile(const char *filename, ctx.token = preprocessor_nexttoken(ctx.preprocessor, &ctx.tokenlen, &ctx.tokenval); - ParseCalculator(pParser, convert_to_lemon_token(&ctx), 0, &ctx); + // !!! FIXME: this can't refer directly to pointers in the stream, + // !!! FIXME: as they can be free()'d before we actually use them + // !!! FIXME: when a rule reduces down later. + TokenData token = { ctx.token, ctx.tokenlen }; + ParseCalculator(pParser, convert_to_lemon_token(&ctx), token, &ctx); } while (ctx.tokenval != TOKEN_EOI); ParseCalculatorFree(pParser, f, d); } diff --git a/calculator.lemon b/calculator.lemon index bdb67606..ad5467ec 100644 --- a/calculator.lemon +++ b/calculator.lemon @@ -22,7 +22,7 @@ %start_symbol calculator %token_prefix TOKEN_CALC_ -%token_type { int } +%token_type { TokenData } %extra_argument { Context *ctx } %include { @@ -73,14 +73,14 @@ calculator ::= expression(B). { parse_complete(B); } %type identifier { const char * } %destructor identifier { (void) ctx; } // !!! FIXME: remove this later, it's just to shut up the compiler for now. -identifier(A) ::= IDENTIFIER. { A = new_identifier(ctx); } +identifier(A) ::= IDENTIFIER(B). { A = new_identifier(ctx, &B); } // the expression stuff is based on Jeff Lee's ANSI C grammar. %type primary_expr { Expression * } primary_expr(A) ::= identifier(B). { A = new_identifier_expr(ctx, B); } -primary_expr(A) ::= INT_CONSTANT. { A = new_literal_int_expr(ctx); } -primary_expr(A) ::= FLOAT_CONSTANT. { A = new_literal_float_expr(ctx); } -primary_expr(A) ::= STRING_LITERAL. { A = new_literal_string_expr(ctx); } +primary_expr(A) ::= INT_CONSTANT(B). { A = new_literal_int_expr(ctx, &B); } +primary_expr(A) ::= FLOAT_CONSTANT(B). { A = new_literal_float_expr(ctx, &B); } +primary_expr(A) ::= STRING_LITERAL(B). { A = new_literal_string_expr(ctx, &B); } primary_expr(A) ::= LPAREN expression(B) RPAREN. { A = B; } %type postfix_expr { Expression * } From 6735007a23b4765c6bc3baeac4c3e5e1548f1b5f Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Mon, 8 Feb 2010 03:59:12 -0500 Subject: [PATCH 07/16] Removed debug output. --HG-- branch : calculator-experiment --- calculator.c | 1 - 1 file changed, 1 deletion(-) diff --git a/calculator.c b/calculator.c index f058eb06..94e1148e 100644 --- a/calculator.c +++ b/calculator.c @@ -422,7 +422,6 @@ static void print_expr(const Expression *expr, const int depth) static void parse_complete(const Expression *expr) { - printf("parse complete!\n"); print_expr(expr, 0); } // parse_complete From 8c720f9ec2d698ccd56056e684b95128d1234526 Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Mon, 8 Feb 2010 03:59:18 -0500 Subject: [PATCH 08/16] Ignore blank lines. --HG-- branch : calculator-experiment --- calculator.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/calculator.c b/calculator.c index 94e1148e..a6a1e8f7 100644 --- a/calculator.c +++ b/calculator.c @@ -539,7 +539,9 @@ int main(int argc, char **argv) while ((ln = fgetln(io, &len)) != NULL) { - if ((len == 5) && (memcmp(ln, "quit\n", 5) == 0)) + if (len == 1) + continue; + else if ((len == 5) && (memcmp(ln, "quit\n", 5) == 0)) break; else if ((len == 2) && (memcmp(ln, "q\n", 2) == 0)) break; From 07e212ad05a2a99cb69a2231ddc02187ef2f24c6 Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Mon, 8 Feb 2010 04:29:05 -0500 Subject: [PATCH 09/16] Stop parsing if the parser panics. --HG-- branch : calculator-experiment --- calculator.c | 2 +- calculator.lemon | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/calculator.c b/calculator.c index a6a1e8f7..38430e27 100644 --- a/calculator.c +++ b/calculator.c @@ -526,7 +526,7 @@ static void MOJOSHADER_compile(const char *filename, // !!! FIXME: when a rule reduces down later. TokenData token = { ctx.token, ctx.tokenlen }; ParseCalculator(pParser, convert_to_lemon_token(&ctx), token, &ctx); - } while (ctx.tokenval != TOKEN_EOI); + } while ((!ctx.isfail) && (ctx.tokenval != TOKEN_EOI)); ParseCalculatorFree(pParser, f, d); } diff --git a/calculator.lemon b/calculator.lemon index ad5467ec..2d8a154c 100644 --- a/calculator.lemon +++ b/calculator.lemon @@ -36,11 +36,14 @@ } %parse_failure { - fprintf(stderr,"Giving up. Parser is hopelessly lost...\n"); + ctx->isfail = 1; + fprintf(stderr, "Giving up. Parser is hopelessly lost...\n"); + } %stack_overflow { - fprintf(stderr,"Giving up. Parser stack overflow\n"); + ctx->isfail = 1; + fprintf(stderr, "Giving up. Parser stack overflow\n"); } // operator precedence (matches C spec)... From 74bd06697d139742b0a42ce94dc90ff997e82eca Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Mon, 8 Feb 2010 04:29:31 -0500 Subject: [PATCH 10/16] Actually run the calculator, walking the parse tree. --HG-- branch : calculator-experiment --- calculator.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/calculator.c b/calculator.c index 38430e27..57fddb1d 100644 --- a/calculator.c +++ b/calculator.c @@ -420,9 +420,87 @@ static void print_expr(const Expression *expr, const int depth) } // else } // print_expr +static double run_expr(const Expression *expr) +{ + if (operator_is_unary(expr->op)) + { + const ExpressionUnary *unary = (const ExpressionUnary *) expr; + if (expr->op == OP_NEGATE) + return -run_expr(unary->operand); + else if (expr->op == OP_COMPLEMENT) + return (double) (~((int64)run_expr(unary->operand))); + else if (expr->op == OP_NOT) + return (run_expr(unary->operand) == 0.0) ? 1.0 : 0.0; + } // if + else if (operator_is_binary(expr->op)) + { + const ExpressionBinary *binary = (const ExpressionBinary *) expr; + if (expr->op == OP_MULTIPLY) + return run_expr(binary->left) * run_expr(binary->right); + else if (expr->op == OP_DIVIDE) + return run_expr(binary->left) / run_expr(binary->right); + else if (expr->op == OP_ADD) + return run_expr(binary->left) + run_expr(binary->right); + else if (expr->op == OP_SUBTRACT) + return run_expr(binary->left) - run_expr(binary->right); + else if (expr->op == OP_LESSTHAN) + return (run_expr(binary->left) < run_expr(binary->right)) ? 1.0 : 0.0; + else if (expr->op == OP_GREATERTHAN) + return (run_expr(binary->left) > run_expr(binary->right)) ? 1.0 : 0.0; + else if (expr->op == OP_LESSTHANOREQUAL) + return (run_expr(binary->left) <= run_expr(binary->right)) ? 1.0 : 0.0; + else if (expr->op == OP_GREATERTHANOREQUAL) + return (run_expr(binary->left) >= run_expr(binary->right)) ? 1.0 : 0.0; + else if (expr->op == OP_EQUAL) + return (run_expr(binary->left) == run_expr(binary->right)) ? 1.0 : 0.0; + else if (expr->op == OP_NOTEQUAL) + return (run_expr(binary->left) == run_expr(binary->right)) ? 1.0 : 0.0; + else if (expr->op == OP_LOGICALAND) + return (((int64)run_expr(binary->left)) && ((int64)run_expr(binary->right))) ? 1.0 : 0.0; + else if (expr->op == OP_LOGICALOR) + return (((int64)run_expr(binary->left)) || ((int64)run_expr(binary->right))) ? 1.0 : 0.0; + else if (expr->op == OP_BINARYAND) + return (double)(((int64)run_expr(binary->left)) & ((int64)run_expr(binary->right))); + else if (expr->op == OP_BINARYOR) + return (double)(((int64)run_expr(binary->left)) | ((int64)run_expr(binary->right))); + else if (expr->op == OP_BINARYXOR) + return (double)(((int64)run_expr(binary->left)) ^ ((int64)run_expr(binary->right))); + else if (expr->op == OP_LSHIFT) + return (double)(((int64)run_expr(binary->left)) << ((int64)run_expr(binary->right))); + else if (expr->op == OP_RSHIFT) + return (double)(((int64)run_expr(binary->left)) >> ((int64)run_expr(binary->right))); + else if (expr->op == OP_MODULO) + return (double)(((int64)run_expr(binary->left)) % ((int64)run_expr(binary->right))); + } // else if + + else if (operator_is_ternary(expr->op)) + { + const ExpressionTernary *ternary = (const ExpressionTernary *) expr; + if (expr->op == OP_CONDITIONAL) + return (run_expr(ternary->left) != 0.0) ? run_expr(ternary->center) : run_expr(ternary->right); + } // else if + + else + { + if (expr->op == OP_INT_LITERAL) + { + const ExpressionIntLiteral *lit = (const ExpressionIntLiteral *) expr; + return ((double) lit->value); + } // if + else if (expr->op == OP_FLOAT_LITERAL) + { + const ExpressionFloatLiteral *lit = (const ExpressionFloatLiteral *) expr; + return lit->value; + } // if + } // else + + return 0.0; // oh well. +} // run_expr + static void parse_complete(const Expression *expr) { print_expr(expr, 0); + printf("Result: %lf\n\n", run_expr(expr)); } // parse_complete From 8e12f4a4ac15287c953f846ca00d7e15be37f7bc Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Mon, 8 Feb 2010 04:42:51 -0500 Subject: [PATCH 11/16] Free the parse tree once we're done with it. --HG-- branch : calculator-experiment --- calculator.c | 34 +++++++++++++++++++++++++++++++++- calculator.lemon | 2 +- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/calculator.c b/calculator.c index 57fddb1d..c94bd67a 100644 --- a/calculator.c +++ b/calculator.c @@ -497,10 +497,42 @@ static double run_expr(const Expression *expr) return 0.0; // oh well. } // run_expr -static void parse_complete(const Expression *expr) +static void free_expr(Context *ctx, Expression *expr) +{ + if (operator_is_unary(expr->op)) + { + const ExpressionUnary *unary = (const ExpressionUnary *) expr; + free_expr(ctx, unary->operand); + } // if + else if (operator_is_binary(expr->op)) + { + const ExpressionBinary *binary = (const ExpressionBinary *) expr; + free_expr(ctx, binary->left); + free_expr(ctx, binary->right); + } // else if + else if (operator_is_ternary(expr->op)) + { + const ExpressionTernary *ternary = (const ExpressionTernary *) expr; + free_expr(ctx, ternary->left); + free_expr(ctx, ternary->center); + free_expr(ctx, ternary->right); + } // else if + else if (expr->op == OP_STRING_LITERAL) + { + Free(ctx, (void *) ((ExpressionStringLiteral *)expr)->string); + } // else if + else if (expr->op == OP_IDENTIFIER) + { + Free(ctx, (void *) ((ExpressionIdentifier *)expr)->identifier); + } // else if + Free(ctx, expr); +} // free_expr + +static void parse_complete(Context *ctx, Expression *expr) { print_expr(expr, 0); printf("Result: %lf\n\n", run_expr(expr)); + free_expr(ctx, expr); } // parse_complete diff --git a/calculator.lemon b/calculator.lemon index 2d8a154c..329b3920 100644 --- a/calculator.lemon +++ b/calculator.lemon @@ -72,7 +72,7 @@ // The rules... -calculator ::= expression(B). { parse_complete(B); } +calculator ::= expression(B). { parse_complete(ctx, B); } %type identifier { const char * } %destructor identifier { (void) ctx; } // !!! FIXME: remove this later, it's just to shut up the compiler for now. From 6f1a4c797b79cbe93286c2c384d045a014faa2b0 Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Mon, 8 Feb 2010 23:51:32 -0500 Subject: [PATCH 12/16] Cleaned up TokenData FIXMEs, and added string cache. --HG-- branch : calculator-experiment --- calculator.c | 164 ++++++++++++++++++++++++++++++++++++----------- calculator.lemon | 9 +-- 2 files changed, 132 insertions(+), 41 deletions(-) diff --git a/calculator.c b/calculator.c index c94bd67a..7a09a6d5 100644 --- a/calculator.c +++ b/calculator.c @@ -5,12 +5,19 @@ #define LEMON_SUPPORT_TRACING 1 #endif -typedef struct TokenData +typedef union TokenData { - const char *token; - unsigned int tokenlen; + int64 i64; + double dbl; + const char *string; } TokenData; +typedef struct StringBucket +{ + char *string; + struct StringBucket *next; +} StringBucket; + typedef struct Context { int isfail; @@ -21,6 +28,8 @@ typedef struct Context int error_count; ErrorList *errors; Preprocessor *preprocessor; + StringBucket *string_hashtable[256]; + // !!! FIXME: do these really need to be in here? const char *token; unsigned int tokenlen; Token tokenval; @@ -187,18 +196,6 @@ typedef struct ExpressionStringLiteral const char *string; } ExpressionStringLiteral; -static const char *new_identifier(Context *ctx, const TokenData *data) -{ - // !!! FIXME: this needs to cache strings. - const unsigned int len = data->tokenlen; - char *retval = Malloc(ctx, len + 1); - if (retval == NULL) - return NULL; - memcpy(retval, data->token, len); - retval[len] = '\0'; - return retval; -} // new_identifier - static Expression *new_unary_expr(Context *ctx, const Operator op, Expression *operand) { @@ -233,11 +230,11 @@ static Expression *new_ternary_expr(Context *ctx, const Operator op, return (Expression *) retval; } // new_ternary_expr -static Expression *new_identifier_expr(Context *ctx, const char *identifier) +static Expression *new_identifier_expr(Context *ctx, const TokenData *data) { NEW_EXPR(ExpressionIdentifier); retval->op = OP_IDENTIFIER; - retval->identifier = identifier; + retval->identifier = data->string; // cached; don't copy string. return (Expression *) retval; } // new_identifier_expr @@ -282,7 +279,7 @@ static Expression *new_literal_int_expr(Context *ctx, const TokenData *data) { NEW_EXPR(ExpressionIntLiteral); retval->op = OP_INT_LITERAL; - retval->value = strtoi64(data->token, data->tokenlen); + retval->value = data->i64; return (Expression *) retval; } // new_literal_int_expr @@ -299,7 +296,7 @@ static Expression *new_literal_float_expr(Context *ctx, const TokenData *data) { NEW_EXPR(ExpressionFloatLiteral); retval->op = OP_FLOAT_LITERAL; - retval->value = strtodouble(data->token, data->tokenlen); + retval->value = data->dbl; return (Expression *) retval; } // new_literal_float_expr @@ -307,7 +304,7 @@ static Expression *new_literal_string_expr(Context *ctx, const TokenData *data) { NEW_EXPR(ExpressionStringLiteral); retval->op = OP_STRING_LITERAL; - retval->string = new_identifier(ctx, data); + retval->string = data->string; // cached; don't copy string. return (Expression *) retval; } // new_string_literal_expr @@ -517,14 +514,9 @@ static void free_expr(Context *ctx, Expression *expr) free_expr(ctx, ternary->center); free_expr(ctx, ternary->right); } // else if - else if (expr->op == OP_STRING_LITERAL) - { - Free(ctx, (void *) ((ExpressionStringLiteral *)expr)->string); - } // else if - else if (expr->op == OP_IDENTIFIER) - { - Free(ctx, (void *) ((ExpressionIdentifier *)expr)->identifier); - } // else if + + // don't need to free extra fields in other types at the moment. + Free(ctx, expr); } // free_expr @@ -536,6 +528,82 @@ static void parse_complete(Context *ctx, Expression *expr) } // parse_complete +// !!! FIXME: sort of cut-and-paste from the preprocessor... + +// this is djb's xor hashing function. +static inline uint32 hash_string_djbxor(const char *str, unsigned int len) +{ + register uint32 hash = 5381; + while (len--) + hash = ((hash << 5) + hash) ^ *(str++); + return hash; +} // hash_string_djbxor + +static inline uint8 hash_string(const char *str, const unsigned int len) +{ + return (uint8) hash_string_djbxor(str, len); +} // hash_string + +static const char *cache_string(Context *ctx, const char *str, + const unsigned int len) +{ + const uint8 hash = hash_string(str, len); + StringBucket *bucket = ctx->string_hashtable[hash]; + StringBucket *prev = NULL; + while (bucket) + { + const char *bstr = bucket->string; + if ((strncmp(bstr, str, len) == 0) && (bstr[len] == 0)) + { + // Matched! Move this to the front of the list. + if (prev != NULL) + { + assert(prev->next == bucket); + prev->next = bucket->next; + bucket->next = ctx->string_hashtable[hash]; + ctx->string_hashtable[hash] = bucket; + } // if + return bstr; // already cached + } // if + prev = bucket; + bucket = bucket->next; + } // while + + // no match, add to the table. + bucket = (StringBucket *) Malloc(ctx, sizeof (StringBucket)); + if (bucket == NULL) + return NULL; + bucket->string = (char *) Malloc(ctx, len + 1); + if (bucket->string == NULL) + { + Free(ctx, bucket); + return NULL; + } // if + memcpy(bucket->string, str, len); + bucket->string[len] = '\0'; + bucket->next = ctx->string_hashtable[hash]; + ctx->string_hashtable[hash] = bucket; + return bucket->string; +} // cache_string + +static void free_string_cache(Context *ctx) +{ + size_t i; + for (i = 0; i < STATICARRAYLEN(ctx->string_hashtable); i++) + { + StringBucket *bucket = ctx->string_hashtable[i]; + ctx->string_hashtable[i] = NULL; + while (bucket) + { + StringBucket *next = bucket->next; + Free(ctx, bucket->string); + Free(ctx, bucket); + bucket = next; + } // while + } // for +} // free_string_cache + + // This is where the actual parsing happens. It's Lemon-generated! #define __MOJOSHADER_CALC_COMPILER__ 1 #include "calculator.h" @@ -610,6 +678,8 @@ static void MOJOSHADER_compile(const char *filename, MOJOSHADER_malloc m, MOJOSHADER_free f, void *d) { Context ctx; + TokenData data; + if (m == NULL) m = MOJOSHADER_internal_malloc; if (f == NULL) f = MOJOSHADER_internal_free; @@ -628,17 +698,37 @@ static void MOJOSHADER_compile(const char *filename, #endif do { - ctx.token = preprocessor_nexttoken(ctx.preprocessor, - &ctx.tokenlen, - &ctx.tokenval); - // !!! FIXME: this can't refer directly to pointers in the stream, - // !!! FIXME: as they can be free()'d before we actually use them - // !!! FIXME: when a rule reduces down later. - TokenData token = { ctx.token, ctx.tokenlen }; - ParseCalculator(pParser, convert_to_lemon_token(&ctx), token, &ctx); + ctx.token = preprocessor_nexttoken(ctx.preprocessor, &ctx.tokenlen, + &ctx.tokenval); + + const int lemon_token = convert_to_lemon_token(&ctx); + switch (lemon_token) + { + case TOKEN_CALC_INT_CONSTANT: + data.i64 = strtoi64(ctx.token, ctx.tokenlen); + break; + + case TOKEN_CALC_FLOAT_CONSTANT: + data.dbl = strtodouble(ctx.token, ctx.tokenlen); + break; + + case TOKEN_CALC_STRING_LITERAL: + case TOKEN_CALC_IDENTIFIER: + data.string = cache_string(&ctx, ctx.token, ctx.tokenlen); + break; + + default: + data.i64 = 0; + break; + } // switch + + ParseCalculator(pParser, lemon_token, data, &ctx); } while ((!ctx.isfail) && (ctx.tokenval != TOKEN_EOI)); + ParseCalculatorFree(pParser, f, d); -} + // !!! FIXME: destruct (ctx) here. + free_string_cache(&ctx); +} // MOJOSHADER_compile int main(int argc, char **argv) { diff --git a/calculator.lemon b/calculator.lemon index 329b3920..3c2f10f6 100644 --- a/calculator.lemon +++ b/calculator.lemon @@ -74,13 +74,14 @@ calculator ::= expression(B). { parse_complete(ctx, B); } -%type identifier { const char * } +// !!! FIXME: why is this a non-terminal? +%type identifier { TokenData } %destructor identifier { (void) ctx; } // !!! FIXME: remove this later, it's just to shut up the compiler for now. -identifier(A) ::= IDENTIFIER(B). { A = new_identifier(ctx, &B); } +identifier(A) ::= IDENTIFIER(B). { A = B; } // the expression stuff is based on Jeff Lee's ANSI C grammar. %type primary_expr { Expression * } -primary_expr(A) ::= identifier(B). { A = new_identifier_expr(ctx, B); } +primary_expr(A) ::= identifier(B). { A = new_identifier_expr(ctx, &B); } primary_expr(A) ::= INT_CONSTANT(B). { A = new_literal_int_expr(ctx, &B); } primary_expr(A) ::= FLOAT_CONSTANT(B). { A = new_literal_float_expr(ctx, &B); } primary_expr(A) ::= STRING_LITERAL(B). { A = new_literal_string_expr(ctx, &B); } @@ -92,7 +93,7 @@ postfix_expr(A) ::= postfix_expr(B) LBRACKET expression(C) RBRACKET. { A = new_b postfix_expr(A) ::= postfix_expr(B) LPAREN RPAREN. { A = new_binary_expr(ctx, OP_CALLFUNC, B, NULL); } postfix_expr(A) ::= postfix_expr(B) LPAREN argument_expr_list(C) RPAREN. { A = new_binary_expr(ctx, OP_CALLFUNC, B, C); } //postfix_expr(A) ::= datatype(B) LPAREN argument_expr_list(C) RPAREN. { A = new_constructor_expr(ctx, B, C); } // HLSL constructor -postfix_expr(A) ::= postfix_expr(B) DOT identifier(C). { A = new_binary_expr(ctx, OP_DEREF_STRUCT, B, new_identifier_expr(ctx, C)); } +postfix_expr(A) ::= postfix_expr(B) DOT identifier(C). { A = new_binary_expr(ctx, OP_DEREF_STRUCT, B, new_identifier_expr(ctx, &C)); } postfix_expr(A) ::= postfix_expr(B) PLUSPLUS. { A = new_unary_expr(ctx, OP_POSTINCREMENT, B); } postfix_expr(A) ::= postfix_expr(B) MINUSMINUS. { A = new_unary_expr(ctx, OP_POSTDECREMENT, B); } From 29da731bb8bee27ed1a1c6493a56861c93383373 Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Tue, 9 Feb 2010 00:08:12 -0500 Subject: [PATCH 13/16] "identifier" shouldn't be a non-terminal. --HG-- branch : calculator-experiment --- calculator.lemon | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/calculator.lemon b/calculator.lemon index 3c2f10f6..e04759eb 100644 --- a/calculator.lemon +++ b/calculator.lemon @@ -72,16 +72,13 @@ // The rules... +%type calculator { int } // !!! FIXME: remove this later. +%destructor calculator { (void) ctx; } // !!! FIXME: remove this later. calculator ::= expression(B). { parse_complete(ctx, B); } -// !!! FIXME: why is this a non-terminal? -%type identifier { TokenData } -%destructor identifier { (void) ctx; } // !!! FIXME: remove this later, it's just to shut up the compiler for now. -identifier(A) ::= IDENTIFIER(B). { A = B; } - // the expression stuff is based on Jeff Lee's ANSI C grammar. %type primary_expr { Expression * } -primary_expr(A) ::= identifier(B). { A = new_identifier_expr(ctx, &B); } +primary_expr(A) ::= IDENTIFIER(B). { A = new_identifier_expr(ctx, &B); } primary_expr(A) ::= INT_CONSTANT(B). { A = new_literal_int_expr(ctx, &B); } primary_expr(A) ::= FLOAT_CONSTANT(B). { A = new_literal_float_expr(ctx, &B); } primary_expr(A) ::= STRING_LITERAL(B). { A = new_literal_string_expr(ctx, &B); } @@ -93,7 +90,7 @@ postfix_expr(A) ::= postfix_expr(B) LBRACKET expression(C) RBRACKET. { A = new_b postfix_expr(A) ::= postfix_expr(B) LPAREN RPAREN. { A = new_binary_expr(ctx, OP_CALLFUNC, B, NULL); } postfix_expr(A) ::= postfix_expr(B) LPAREN argument_expr_list(C) RPAREN. { A = new_binary_expr(ctx, OP_CALLFUNC, B, C); } //postfix_expr(A) ::= datatype(B) LPAREN argument_expr_list(C) RPAREN. { A = new_constructor_expr(ctx, B, C); } // HLSL constructor -postfix_expr(A) ::= postfix_expr(B) DOT identifier(C). { A = new_binary_expr(ctx, OP_DEREF_STRUCT, B, new_identifier_expr(ctx, &C)); } +postfix_expr(A) ::= postfix_expr(B) DOT IDENTIFIER(C). { A = new_binary_expr(ctx, OP_DEREF_STRUCT, B, new_identifier_expr(ctx, &C)); } postfix_expr(A) ::= postfix_expr(B) PLUSPLUS. { A = new_unary_expr(ctx, OP_POSTINCREMENT, B); } postfix_expr(A) ::= postfix_expr(B) MINUSMINUS. { A = new_unary_expr(ctx, OP_POSTDECREMENT, B); } From a858c522fa497968bbe66c469e2cea2b91676f5e Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Tue, 9 Feb 2010 00:23:09 -0500 Subject: [PATCH 14/16] Removed some Context fields we don't really need. --HG-- branch : calculator-experiment --- calculator.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/calculator.c b/calculator.c index 7a09a6d5..5213da78 100644 --- a/calculator.c +++ b/calculator.c @@ -29,11 +29,6 @@ typedef struct Context ErrorList *errors; Preprocessor *preprocessor; StringBucket *string_hashtable[256]; - // !!! FIXME: do these really need to be in here? - const char *token; - unsigned int tokenlen; - Token tokenval; - unsigned int parse_errors; } Context; @@ -608,9 +603,9 @@ static void free_string_cache(Context *ctx) #define __MOJOSHADER_CALC_COMPILER__ 1 #include "calculator.h" -static int convert_to_lemon_token(const Context *ctx) +static int convert_to_lemon_token(const Context *ctx, const Token tokenval) { - switch (ctx->tokenval) + switch (tokenval) { case ((Token) ','): return TOKEN_CALC_COMMA; case ((Token) '='): return TOKEN_CALC_ASSIGN; @@ -679,6 +674,10 @@ static void MOJOSHADER_compile(const char *filename, { Context ctx; TokenData data; + unsigned int tokenlen; + Token tokenval; + const char *token; + int lemon_token; if (m == NULL) m = MOJOSHADER_internal_malloc; if (f == NULL) f = MOJOSHADER_internal_free; @@ -698,23 +697,21 @@ static void MOJOSHADER_compile(const char *filename, #endif do { - ctx.token = preprocessor_nexttoken(ctx.preprocessor, &ctx.tokenlen, - &ctx.tokenval); - - const int lemon_token = convert_to_lemon_token(&ctx); + token = preprocessor_nexttoken(ctx.preprocessor, &tokenlen, &tokenval); + lemon_token = convert_to_lemon_token(&ctx, tokenval); switch (lemon_token) { case TOKEN_CALC_INT_CONSTANT: - data.i64 = strtoi64(ctx.token, ctx.tokenlen); + data.i64 = strtoi64(token, tokenlen); break; case TOKEN_CALC_FLOAT_CONSTANT: - data.dbl = strtodouble(ctx.token, ctx.tokenlen); + data.dbl = strtodouble(token, tokenlen); break; case TOKEN_CALC_STRING_LITERAL: case TOKEN_CALC_IDENTIFIER: - data.string = cache_string(&ctx, ctx.token, ctx.tokenlen); + data.string = cache_string(&ctx, token, tokenlen); break; default: @@ -723,7 +720,7 @@ static void MOJOSHADER_compile(const char *filename, } // switch ParseCalculator(pParser, lemon_token, data, &ctx); - } while ((!ctx.isfail) && (ctx.tokenval != TOKEN_EOI)); + } while ((!ctx.isfail) && (tokenval != TOKEN_EOI)); ParseCalculatorFree(pParser, f, d); // !!! FIXME: destruct (ctx) here. From 31af9ce1d434057a360d9969258208f370247210 Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Tue, 9 Feb 2010 01:52:08 -0500 Subject: [PATCH 15/16] Don't pass the TokenData back out of the lemon code. --HG-- branch : calculator-experiment --- calculator.c | 16 ++++++++-------- calculator.lemon | 10 +++++----- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/calculator.c b/calculator.c index 5213da78..571fa19e 100644 --- a/calculator.c +++ b/calculator.c @@ -225,11 +225,11 @@ static Expression *new_ternary_expr(Context *ctx, const Operator op, return (Expression *) retval; } // new_ternary_expr -static Expression *new_identifier_expr(Context *ctx, const TokenData *data) +static Expression *new_identifier_expr(Context *ctx, const char *string) { NEW_EXPR(ExpressionIdentifier); retval->op = OP_IDENTIFIER; - retval->identifier = data->string; // cached; don't copy string. + retval->identifier = string; // cached; don't copy string! return (Expression *) retval; } // new_identifier_expr @@ -270,11 +270,11 @@ static inline int64 strtoi64(const char *str, unsigned int len) return retval; } // strtoi64 -static Expression *new_literal_int_expr(Context *ctx, const TokenData *data) +static Expression *new_literal_int_expr(Context *ctx, const int64 value) { NEW_EXPR(ExpressionIntLiteral); retval->op = OP_INT_LITERAL; - retval->value = data->i64; + retval->value = value; return (Expression *) retval; } // new_literal_int_expr @@ -287,19 +287,19 @@ static inline double strtodouble(const char *_str, unsigned int len) return strtod(str, NULL); } // strtodouble -static Expression *new_literal_float_expr(Context *ctx, const TokenData *data) +static Expression *new_literal_float_expr(Context *ctx, const double value) { NEW_EXPR(ExpressionFloatLiteral); retval->op = OP_FLOAT_LITERAL; - retval->value = data->dbl; + retval->value = value; return (Expression *) retval; } // new_literal_float_expr -static Expression *new_literal_string_expr(Context *ctx, const TokenData *data) +static Expression *new_literal_string_expr(Context *ctx, const char *string) { NEW_EXPR(ExpressionStringLiteral); retval->op = OP_STRING_LITERAL; - retval->string = data->string; // cached; don't copy string. + retval->string = string; // cached; don't copy string! return (Expression *) retval; } // new_string_literal_expr diff --git a/calculator.lemon b/calculator.lemon index e04759eb..cec0367c 100644 --- a/calculator.lemon +++ b/calculator.lemon @@ -78,10 +78,10 @@ calculator ::= expression(B). { parse_complete(ctx, B); } // the expression stuff is based on Jeff Lee's ANSI C grammar. %type primary_expr { Expression * } -primary_expr(A) ::= IDENTIFIER(B). { A = new_identifier_expr(ctx, &B); } -primary_expr(A) ::= INT_CONSTANT(B). { A = new_literal_int_expr(ctx, &B); } -primary_expr(A) ::= FLOAT_CONSTANT(B). { A = new_literal_float_expr(ctx, &B); } -primary_expr(A) ::= STRING_LITERAL(B). { A = new_literal_string_expr(ctx, &B); } +primary_expr(A) ::= IDENTIFIER(B). { A = new_identifier_expr(ctx, B.string); } +primary_expr(A) ::= INT_CONSTANT(B). { A = new_literal_int_expr(ctx, B.i64); } +primary_expr(A) ::= FLOAT_CONSTANT(B). { A = new_literal_float_expr(ctx, B.dbl); } +primary_expr(A) ::= STRING_LITERAL(B). { A = new_literal_string_expr(ctx, B.string); } primary_expr(A) ::= LPAREN expression(B) RPAREN. { A = B; } %type postfix_expr { Expression * } @@ -90,7 +90,7 @@ postfix_expr(A) ::= postfix_expr(B) LBRACKET expression(C) RBRACKET. { A = new_b postfix_expr(A) ::= postfix_expr(B) LPAREN RPAREN. { A = new_binary_expr(ctx, OP_CALLFUNC, B, NULL); } postfix_expr(A) ::= postfix_expr(B) LPAREN argument_expr_list(C) RPAREN. { A = new_binary_expr(ctx, OP_CALLFUNC, B, C); } //postfix_expr(A) ::= datatype(B) LPAREN argument_expr_list(C) RPAREN. { A = new_constructor_expr(ctx, B, C); } // HLSL constructor -postfix_expr(A) ::= postfix_expr(B) DOT IDENTIFIER(C). { A = new_binary_expr(ctx, OP_DEREF_STRUCT, B, new_identifier_expr(ctx, &C)); } +postfix_expr(A) ::= postfix_expr(B) DOT IDENTIFIER(C). { A = new_binary_expr(ctx, OP_DEREF_STRUCT, B, new_identifier_expr(ctx, C.string)); } postfix_expr(A) ::= postfix_expr(B) PLUSPLUS. { A = new_unary_expr(ctx, OP_POSTINCREMENT, B); } postfix_expr(A) ::= postfix_expr(B) MINUSMINUS. { A = new_unary_expr(ctx, OP_POSTDECREMENT, B); } From 4923bb2bb815971ae66c7b042c2f98bd6dd04f8a Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Tue, 9 Feb 2010 02:55:38 -0500 Subject: [PATCH 16/16] Moved the calculator experiment work back into the compiler. At least we know the expression parser works! :) Lots of other bits of new infrastructure in there, too. --HG-- branch : calculator-experiment --- mojoshader_compiler.c | 495 ++++++++++++++++++++++++++++++++--- mojoshader_parser_hlsl.lemon | 256 +++++++++--------- 2 files changed, 592 insertions(+), 159 deletions(-) diff --git a/mojoshader_compiler.c b/mojoshader_compiler.c index 384da4dd..298845e7 100644 --- a/mojoshader_compiler.c +++ b/mojoshader_compiler.c @@ -5,46 +5,327 @@ #define LEMON_SUPPORT_TRACING 1 #endif -typedef struct TokenData +typedef union TokenData { - const char *token; - unsigned int tokenlen; + int64 i64; + double dbl; + const char *string; } TokenData; +typedef struct StringBucket +{ + char *string; + struct StringBucket *next; +} StringBucket; + typedef struct Context { + int isfail; + int out_of_memory; + MOJOSHADER_malloc malloc; + MOJOSHADER_free free; + void *malloc_data; + int error_count; + ErrorList *errors; Preprocessor *preprocessor; - const char *token; - unsigned int tokenlen; - Token tokenval; - unsigned int parse_errors; - TokenData usertypes[512]; // !!! FIXME: dynamic allocation - int usertype_count; // !!! FIXME: dynamic allocation + StringBucket *string_hashtable[256]; + const char *usertypes[512]; // !!! FIXME: dynamic allocation + int usertype_count; } Context; -static void add_usertype(Context *ctx, const char *token, unsigned int len) +// Convenience functions for allocators... + +static inline void out_of_memory(Context *ctx) +{ + ctx->isfail = ctx->out_of_memory = 1; +} // out_of_memory + +static inline void *Malloc(Context *ctx, const size_t len) +{ + void *retval = ctx->malloc((int) len, ctx->malloc_data); + if (retval == NULL) + out_of_memory(ctx); + return retval; +} // Malloc + +static inline char *StrDup(Context *ctx, const char *str) +{ + char *retval = (char *) Malloc(ctx, strlen(str) + 1); + if (retval != NULL) + strcpy(retval, str); + return retval; +} // StrDup + +static inline void Free(Context *ctx, void *ptr) +{ + if (ptr != NULL) // check for NULL in case of dumb free() impl. + ctx->free(ptr, ctx->malloc_data); +} // Free + +typedef enum Operator +{ + OP_START_RANGE_UNARY, + OP_POSTINCREMENT, + OP_POSTDECREMENT, + OP_PREINCREMENT, + OP_PREDECREMENT, + OP_NEGATE, + OP_COMPLEMENT, + OP_NOT, + OP_END_RANGE_UNARY, + + OP_START_RANGE_BINARY, + OP_DEREF_ARRAY, + OP_CALLFUNC, + OP_DEREF_STRUCT, + OP_COMMA, + OP_MULTIPLY, + OP_DIVIDE, + OP_MODULO, + OP_ADD, + OP_SUBTRACT, + OP_LSHIFT, + OP_RSHIFT, + OP_LESSTHAN, + OP_GREATERTHAN, + OP_LESSTHANOREQUAL, + OP_GREATERTHANOREQUAL, + OP_EQUAL, + OP_NOTEQUAL, + OP_BINARYAND, + OP_BINARYXOR, + OP_BINARYOR, + OP_LOGICALAND, + OP_LOGICALOR, + OP_ASSIGN, + OP_MULASSIGN, + OP_DIVASSIGN, + OP_MODASSIGN, + OP_ADDASSIGN, + OP_SUBASSIGN, + OP_LSHIFTASSIGN, + OP_RSHIFTASSIGN, + OP_ANDASSIGN, + OP_XORASSIGN, + OP_ORASSIGN, + OP_END_RANGE_BINARY, + + OP_START_RANGE_TERNARY, + OP_CONDITIONAL, + OP_END_RANGE_TERNARY, + + OP_START_RANGE_DATA, + OP_IDENTIFIER, + OP_INT_LITERAL, + OP_FLOAT_LITERAL, + OP_STRING_LITERAL, + OP_END_RANGE_DATA, +} Operator; + +static inline int operator_is_unary(const Operator op) +{ + return ((op > OP_START_RANGE_UNARY) && (op < OP_END_RANGE_UNARY)); +} // operator_is_unary + +static inline int operator_is_binary(const Operator op) +{ + return ((op > OP_START_RANGE_BINARY) && (op < OP_END_RANGE_BINARY)); +} // operator_is_binary + +static inline int operator_is_ternary(const Operator op) +{ + return ((op > OP_START_RANGE_TERNARY) && (op < OP_END_RANGE_TERNARY)); +} // operator_is_ternary + + +typedef struct Expression +{ + Operator op; // operator +} Expression; + +#define NEW_EXPR(cls) \ + cls *retval = Malloc(ctx, sizeof (cls)); \ + if (retval == NULL) { return NULL; } + +typedef struct ExpressionUnary +{ + Operator op; // operator + Expression *operand; +} ExpressionUnary; + +typedef struct ExpressionBinary +{ + Operator op; // operator + Expression *left; + Expression *right; +} ExpressionBinary; + +typedef struct ExpressionTernary +{ + Operator op; // operator + Expression *left; + Expression *center; + Expression *right; +} ExpressionTernary; + +typedef struct ExpressionIdentifier +{ + Operator op; // Always OP_IDENTIFIER + const char *identifier; +} ExpressionIdentifier; + +typedef struct ExpressionIntLiteral +{ + Operator op; // Always OP_INT_LITERAL + int64 value; +} ExpressionIntLiteral; + +typedef struct ExpressionFloatLiteral +{ + Operator op; // Always OP_FLOAT_LITERAL + double value; +} ExpressionFloatLiteral; + +typedef struct ExpressionStringLiteral +{ + Operator op; // Always OP_STRING_LITERAL + const char *string; +} ExpressionStringLiteral; + +static Expression *new_unary_expr(Context *ctx, const Operator op, + Expression *operand) +{ + NEW_EXPR(ExpressionUnary); + assert(operator_is_unary(op)); + retval->op = op; + retval->operand = operand; + return (Expression *) retval; +} // new_unary_expr + +static Expression *new_binary_expr(Context *ctx, const Operator op, + Expression *left, Expression *right) +{ + NEW_EXPR(ExpressionBinary); + assert(operator_is_binary(op)); + retval->op = op; + retval->left = left; + retval->right = right; + return (Expression *) retval; +} // new_binary_expr + +static Expression *new_ternary_expr(Context *ctx, const Operator op, + Expression *left, Expression *center, + Expression *right) +{ + NEW_EXPR(ExpressionTernary); + assert(operator_is_ternary(op)); + retval->op = op; + retval->left = left; + retval->center = center; + retval->right = right; + return (Expression *) retval; +} // new_ternary_expr + +static Expression *new_identifier_expr(Context *ctx, const char *string) +{ + NEW_EXPR(ExpressionIdentifier); + retval->op = OP_IDENTIFIER; + retval->identifier = string; // cached; don't copy string. + return (Expression *) retval; +} // new_identifier_expr + +static inline int64 strtoi64(const char *str, unsigned int len) +{ + int64 retval = 0; + int64 mult = 1; + int i = 0; + + while ((len) && (*str == ' ')) + { + str++; + len--; + } // while + + if ((len) && (*str == '-')) + { + mult = -1; + str++; + len--; + } // if + + while (i < len) + { + const char ch = str[i]; + if ((ch < '0') || (ch > '9')) + break; + i++; + } // while + + while (--i >= 0) + { + const char ch = str[i]; + retval += ((int64) (ch - '0')) * mult; + mult *= 10; + } // while + + return retval; +} // strtoi64 + +static Expression *new_literal_int_expr(Context *ctx, const int64 value) +{ + NEW_EXPR(ExpressionIntLiteral); + retval->op = OP_INT_LITERAL; + retval->value = value; + return (Expression *) retval; +} // new_literal_int_expr + +static inline double strtodouble(const char *_str, unsigned int len) +{ + // !!! FIXME: laziness prevails. + char *str = (char *) alloca(len+1); + memcpy(str, _str, len); + str[len] = '\0'; + return strtod(str, NULL); +} // strtodouble + +static Expression *new_literal_float_expr(Context *ctx, const double dbl) +{ + NEW_EXPR(ExpressionFloatLiteral); + retval->op = OP_FLOAT_LITERAL; + retval->value = dbl; + return (Expression *) retval; +} // new_literal_float_expr + +static Expression *new_literal_string_expr(Context *ctx, const char *string) +{ + NEW_EXPR(ExpressionStringLiteral); + retval->op = OP_STRING_LITERAL; + retval->string = string; // cached; don't copy string. + return (Expression *) retval; +} // new_string_literal_expr + + +static void add_usertype(Context *ctx, const char *sym) { // !!! FIXME: error if this is a reserved keyword. // !!! FIXME: dynamic allocation assert(ctx->usertype_count < STATICARRAYLEN(ctx->usertypes)); - ctx->usertypes[ctx->usertype_count].token = token; - ctx->usertypes[ctx->usertype_count].tokenlen = len; + ctx->usertypes[ctx->usertype_count++] = sym; ctx->usertype_count++; } // add_usertype -static int is_usertype(const Context *ctx) +static int is_usertype(const Context *ctx, const char *token, + const unsigned int tokenlen) { // !!! FIXME: dynamic allocation // !!! FIXME: should probably redesign this anyhow. int i; for (i = 0; i < ctx->usertype_count; i++) { - if (ctx->usertypes[i].tokenlen == ctx->tokenlen) - { - if (memcmp(ctx->usertypes[i].token, ctx->token, ctx->tokenlen)==0) - return 1; - } // if + const char *type = ctx->usertypes[i]; + if (strncmp(type, token, tokenlen) == 0) + return type[tokenlen] == '\0'; } // for return 0; @@ -55,8 +336,112 @@ static int is_usertype(const Context *ctx) #define __MOJOSHADER_HLSL_COMPILER__ 1 #include "mojoshader_parser_hlsl.h" + +static void free_expr(Context *ctx, Expression *expr) +{ + if (operator_is_unary(expr->op)) + { + const ExpressionUnary *unary = (const ExpressionUnary *) expr; + free_expr(ctx, unary->operand); + } // if + else if (operator_is_binary(expr->op)) + { + const ExpressionBinary *binary = (const ExpressionBinary *) expr; + free_expr(ctx, binary->left); + free_expr(ctx, binary->right); + } // else if + else if (operator_is_ternary(expr->op)) + { + const ExpressionTernary *ternary = (const ExpressionTernary *) expr; + free_expr(ctx, ternary->left); + free_expr(ctx, ternary->center); + free_expr(ctx, ternary->right); + } // else if + + // don't need to free extra fields in other types at the moment. + + Free(ctx, expr); +} // free_expr + +// !!! FIXME: sort of cut-and-paste from the preprocessor... + +// this is djb's xor hashing function. +static inline uint32 hash_string_djbxor(const char *str, unsigned int len) +{ + register uint32 hash = 5381; + while (len--) + hash = ((hash << 5) + hash) ^ *(str++); + return hash; +} // hash_string_djbxor + +static inline uint8 hash_string(const char *str, const unsigned int len) +{ + return (uint8) hash_string_djbxor(str, len); +} // hash_string + +static const char *cache_string(Context *ctx, const char *str, + const unsigned int len) +{ + const uint8 hash = hash_string(str, len); + StringBucket *bucket = ctx->string_hashtable[hash]; + StringBucket *prev = NULL; + while (bucket) + { + const char *bstr = bucket->string; + if ((strncmp(bstr, str, len) == 0) && (bstr[len] == 0)) + { + // Matched! Move this to the front of the list. + if (prev != NULL) + { + assert(prev->next == bucket); + prev->next = bucket->next; + bucket->next = ctx->string_hashtable[hash]; + ctx->string_hashtable[hash] = bucket; + } // if + return bstr; // already cached + } // if + prev = bucket; + bucket = bucket->next; + } // while + + // no match, add to the table. + bucket = (StringBucket *) Malloc(ctx, sizeof (StringBucket)); + if (bucket == NULL) + return NULL; + bucket->string = (char *) Malloc(ctx, len + 1); + if (bucket->string == NULL) + { + Free(ctx, bucket); + return NULL; + } // if + memcpy(bucket->string, str, len); + bucket->string[len] = '\0'; + bucket->next = ctx->string_hashtable[hash]; + ctx->string_hashtable[hash] = bucket; + return bucket->string; +} // cache_string + +static void free_string_cache(Context *ctx) +{ + size_t i; + for (i = 0; i < STATICARRAYLEN(ctx->string_hashtable); i++) + { + StringBucket *bucket = ctx->string_hashtable[i]; + ctx->string_hashtable[i] = NULL; + while (bucket) + { + StringBucket *next = bucket->next; + Free(ctx, bucket->string); + Free(ctx, bucket); + bucket = next; + } // while + } // for +} // free_string_cache + + // This does not check correctness (POSITIONT993842 passes, etc). -static int is_semantic(const Context *ctx) +static int is_semantic(const Context *ctx, const char *token, + const unsigned int tokenlen) { static const char *names[] = { "BINORMAL", "BLENDINDICES", "BLENDWEIGHT", @@ -71,9 +456,9 @@ static int is_semantic(const Context *ctx) { const char *name = *i; const size_t namelen = strlen(name); - if (ctx->tokenlen < namelen) + if (tokenlen < namelen) continue; - else if (memcmp(ctx->token, name, namelen) != 0) + else if (memcmp(token, name, namelen) != 0) continue; for (name += namelen; *name; name++) @@ -90,9 +475,10 @@ static int is_semantic(const Context *ctx) } // is_semantic -static int convert_to_lemon_token(const Context *ctx) +static int convert_to_lemon_token(const Context *ctx, const char *token, + unsigned int tokenlen, const Token tokenval) { - switch (ctx->tokenval) + switch (tokenval) { case ((Token) ','): return TOKEN_HLSL_COMMA; case ((Token) '='): return TOKEN_HLSL_ASSIGN; @@ -143,7 +529,7 @@ static int convert_to_lemon_token(const Context *ctx) case ((Token) '}'): return TOKEN_HLSL_RBRACE; case ((Token) TOKEN_IDENTIFIER): - #define tokencmp(t) ((ctx->tokenlen == strlen(t)) && (memcmp(ctx->token, t, ctx->tokenlen) == 0)) + #define tokencmp(t) ((tokenlen == strlen(t)) && (memcmp(token, t, tokenlen) == 0)) //case ((Token) ''): return TOKEN_HLSL_TYPECAST //if (tokencmp("")) return TOKEN_HLSL_TYPE_NAME //if (tokencmp("...")) return TOKEN_HLSL_ELIPSIS @@ -335,9 +721,9 @@ static int convert_to_lemon_token(const Context *ctx) #undef tokencmp - if (is_semantic(ctx)) + if (is_semantic(ctx, token, tokenlen)) return TOKEN_HLSL_SEMANTIC; - else if (is_usertype(ctx)) + else if (is_usertype(ctx, token, tokenlen)) return TOKEN_HLSL_USERTYPE; return TOKEN_HLSL_IDENTIFIER; @@ -352,18 +738,27 @@ static int convert_to_lemon_token(const Context *ctx) void MOJOSHADER_compile(const char *filename, - const char *source, unsigned int sourcelen, - const MOJOSHADER_preprocessorDefine *defines, - unsigned int define_count, - MOJOSHADER_includeOpen include_open, - MOJOSHADER_includeClose include_close, - MOJOSHADER_malloc m, MOJOSHADER_free f, void *d) + const char *source, unsigned int sourcelen, + const MOJOSHADER_preprocessorDefine *defines, + unsigned int define_count, + MOJOSHADER_includeOpen include_open, + MOJOSHADER_includeClose include_close, + MOJOSHADER_malloc m, MOJOSHADER_free f, void *d) { Context ctx; + TokenData data; + unsigned int tokenlen; + Token tokenval; + const char *token; + int lemon_token; + if (m == NULL) m = MOJOSHADER_internal_malloc; if (f == NULL) f = MOJOSHADER_internal_free; memset(&ctx, '\0', sizeof (Context)); + ctx.malloc = m; + ctx.free = f; + ctx.malloc_data = d; ctx.preprocessor = preprocessor_start(filename, source, sourcelen, include_open, include_close, defines, define_count, 0, m, f, d); @@ -375,15 +770,37 @@ void MOJOSHADER_compile(const char *filename, #endif do { - ctx.token = preprocessor_nexttoken(ctx.preprocessor, - &ctx.tokenlen, - &ctx.tokenval); + token = preprocessor_nexttoken(ctx.preprocessor, &tokenlen, &tokenval); + lemon_token = convert_to_lemon_token(&ctx, token, tokenlen, tokenval); + switch (lemon_token) + { + case TOKEN_HLSL_INT_CONSTANT: + data.i64 = strtoi64(token, tokenlen); + break; + + case TOKEN_HLSL_FLOAT_CONSTANT: + data.dbl = strtodouble(token, tokenlen); + break; + + case TOKEN_HLSL_SEMANTIC: + case TOKEN_HLSL_USERTYPE: + case TOKEN_HLSL_STRING_LITERAL: + case TOKEN_HLSL_IDENTIFIER: + data.string = cache_string(&ctx, token, tokenlen); + break; + + default: + data.i64 = 0; + break; + } // switch + + ParseHLSL(pParser, lemon_token, data, &ctx); + } while ((!ctx.isfail) && (tokenval != TOKEN_EOI)); - TokenData token = { ctx.token, ctx.tokenlen }; - ParseHLSL(pParser, convert_to_lemon_token(&ctx), token, &ctx); - } while (ctx.tokenval != TOKEN_EOI); ParseHLSLFree(pParser, f, d); -} + // !!! FIXME: destruct (ctx) here. + free_string_cache(&ctx); +} // MOJOSHADER_compile // end of mojoshader_compiler.c ... diff --git a/mojoshader_parser_hlsl.lemon b/mojoshader_parser_hlsl.lemon index 451dc21b..d01d0c7d 100644 --- a/mojoshader_parser_hlsl.lemon +++ b/mojoshader_parser_hlsl.lemon @@ -39,10 +39,14 @@ } %parse_failure { + // !!! FIXME: make this a proper fail() function. + ctx->isfail = 1; fprintf(stderr,"Giving up. Parser is hopelessly lost...\n"); } %stack_overflow { + // !!! FIXME: make this a proper fail() function. + ctx->isfail = 1; fprintf(stderr,"Giving up. Parser stack overflow\n"); } @@ -72,6 +76,8 @@ // The rules... +%type shader { int } // !!! FIXME: remove this later. +%destructor shader { (void) ctx; } // !!! FIXME: remove this later. shader ::= compilation_units. compilation_units ::= compilation_unit. @@ -92,8 +98,8 @@ function_signature ::= function_storageclass function_details. function_signature ::= function_details semantic. function_signature ::= function_details. -function_details ::= datatype identifier LPAREN function_arguments RPAREN. -function_details ::= VOID identifier LPAREN function_arguments RPAREN. +function_details ::= datatype IDENTIFIER LPAREN function_arguments RPAREN. +function_details ::= VOID IDENTIFIER LPAREN function_arguments RPAREN. // !!! FIXME: there is a "target" storage class that is the name of the // !!! FIXME: platform that this function is meant for...but I don't know @@ -111,22 +117,22 @@ function_arguments ::= . function_argument_list ::= function_argument. function_argument_list ::= function_argument_list COMMA function_argument. -function_argument ::= input_modifier datatype identifier semantic interpolation_mod initializer. -function_argument ::= input_modifier datatype identifier semantic interpolation_mod. -function_argument ::= input_modifier datatype identifier semantic initializer. -function_argument ::= input_modifier datatype identifier semantic. -function_argument ::= input_modifier datatype identifier interpolation_mod initializer. -function_argument ::= input_modifier datatype identifier interpolation_mod. -function_argument ::= input_modifier datatype identifier initializer. -function_argument ::= input_modifier datatype identifier. -function_argument ::= datatype identifier semantic interpolation_mod initializer. -function_argument ::= datatype identifier semantic interpolation_mod. -function_argument ::= datatype identifier semantic initializer. -function_argument ::= datatype identifier semantic. -function_argument ::= datatype identifier interpolation_mod initializer. -function_argument ::= datatype identifier interpolation_mod. -function_argument ::= datatype identifier initializer. -function_argument ::= datatype identifier. +function_argument ::= input_modifier datatype IDENTIFIER semantic interpolation_mod initializer. +function_argument ::= input_modifier datatype IDENTIFIER semantic interpolation_mod. +function_argument ::= input_modifier datatype IDENTIFIER semantic initializer. +function_argument ::= input_modifier datatype IDENTIFIER semantic. +function_argument ::= input_modifier datatype IDENTIFIER interpolation_mod initializer. +function_argument ::= input_modifier datatype IDENTIFIER interpolation_mod. +function_argument ::= input_modifier datatype IDENTIFIER initializer. +function_argument ::= input_modifier datatype IDENTIFIER. +function_argument ::= datatype IDENTIFIER semantic interpolation_mod initializer. +function_argument ::= datatype IDENTIFIER semantic interpolation_mod. +function_argument ::= datatype IDENTIFIER semantic initializer. +function_argument ::= datatype IDENTIFIER semantic. +function_argument ::= datatype IDENTIFIER interpolation_mod initializer. +function_argument ::= datatype IDENTIFIER interpolation_mod. +function_argument ::= datatype IDENTIFIER initializer. +function_argument ::= datatype IDENTIFIER. input_modifier ::= IN. input_modifier ::= INOUT. @@ -172,9 +178,9 @@ variable_declaration_details ::= scalar_or_array. // !!! FIXME: we don't handle full sampler declarations at the moment. -struct_declaration ::= STRUCT identifier(A) LBRACE struct_member_list RBRACE. +struct_declaration ::= STRUCT IDENTIFIER(A) LBRACE struct_member_list RBRACE. { - add_usertype(ctx, A.token, A.tokenlen); + add_usertype(ctx, A.string); } struct_member_list ::= struct_member. @@ -187,7 +193,7 @@ struct_member_details ::= datatype struct_member_item_list SEMICOLON. struct_member_item_list ::= scalar_or_array. struct_member_item_list ::= scalar_or_array semantic. -struct_member_item_list ::= struct_member_item_list COMMA identifier. +struct_member_item_list ::= struct_member_item_list COMMA IDENTIFIER. typedef_statement ::= TYPEDEF CONST datatype scalar_or_array. typedef_statement ::= TYPEDEF datatype scalar_or_array. @@ -196,14 +202,14 @@ variable_lowlevel ::= packoffset register. variable_lowlevel ::= packoffset. variable_lowlevel ::= register. -scalar_or_array ::= identifier LBRACKET RBRACKET. -scalar_or_array ::= identifier LBRACKET expression RBRACKET. -scalar_or_array ::= identifier. +scalar_or_array ::= IDENTIFIER LBRACKET RBRACKET. +scalar_or_array ::= IDENTIFIER LBRACKET expression RBRACKET. +scalar_or_array ::= IDENTIFIER. -packoffset ::= PACKOFFSET LPAREN identifier identifier RPAREN. -packoffset ::= PACKOFFSET LPAREN identifier RPAREN. +packoffset ::= PACKOFFSET LPAREN IDENTIFIER IDENTIFIER RPAREN. +packoffset ::= PACKOFFSET LPAREN IDENTIFIER RPAREN. -register ::= COLON REGISTER LPAREN identifier RPAREN. +register ::= COLON REGISTER LPAREN IDENTIFIER RPAREN. annotations ::= LT annotation_list GT. @@ -485,100 +491,110 @@ switch_case ::= CASE expression COLON. switch_case ::= DEFAULT COLON statement_list. switch_case ::= DEFAULT COLON. -%type identifier { TokenData } -%destructor identifier { (void) ctx; } // !!! FIXME: remove this later, it's just to shut up the compiler for now. -identifier(A) ::= IDENTIFIER(B). { A = B; } - // the expression stuff is based on Jeff Lee's ANSI C grammar. -primary_expr ::= identifier. -primary_expr ::= INT_CONSTANT. -primary_expr ::= FLOAT_CONSTANT. -primary_expr ::= STRING_LITERAL. -primary_expr ::= LPAREN expression RPAREN. - -postfix_expr ::= primary_expr. -postfix_expr ::= postfix_expr LBRACKET expression RBRACKET. -postfix_expr ::= postfix_expr LPAREN RPAREN. -postfix_expr ::= postfix_expr LPAREN argument_expr_list RPAREN. -postfix_expr ::= datatype LPAREN argument_expr_list RPAREN. // HLSL constructor -postfix_expr ::= postfix_expr DOT identifier. -postfix_expr ::= postfix_expr PLUSPLUS. -postfix_expr ::= postfix_expr MINUSMINUS. - -argument_expr_list ::= assignment_expr. -argument_expr_list ::= argument_expr_list COMMA assignment_expr. - -unary_expr ::= postfix_expr. -unary_expr ::= PLUSPLUS unary_expr. -unary_expr ::= MINUSMINUS unary_expr. -unary_expr ::= unary_operator cast_expr. - -unary_operator ::= PLUS. -unary_operator ::= MINUS. -unary_operator ::= COMPLEMENT. -unary_operator ::= EXCLAMATION. - -cast_expr ::= unary_expr. -cast_expr ::= LPAREN datatype RPAREN cast_expr. - -multiplicative_expr ::= cast_expr. -multiplicative_expr ::= multiplicative_expr STAR cast_expr. -multiplicative_expr ::= multiplicative_expr SLASH cast_expr. -multiplicative_expr ::= multiplicative_expr PERCENT cast_expr. - -additive_expr ::= multiplicative_expr. -additive_expr ::= additive_expr PLUS multiplicative_expr. -additive_expr ::= additive_expr MINUS multiplicative_expr. - -shift_expr ::= additive_expr. -shift_expr ::= shift_expr LSHIFT additive_expr. -shift_expr ::= shift_expr RSHIFT additive_expr. - -relational_expr ::= shift_expr. -relational_expr ::= relational_expr LT shift_expr. -relational_expr ::= relational_expr GT shift_expr. -relational_expr ::= relational_expr LEQ shift_expr. -relational_expr ::= relational_expr GEQ shift_expr. - -equality_expr ::= relational_expr. -equality_expr ::= equality_expr EQL relational_expr. -equality_expr ::= equality_expr NEQ relational_expr. - -and_expr ::= equality_expr. -and_expr ::= and_expr AND equality_expr. - -exclusive_or_expr ::= and_expr. -exclusive_or_expr ::= exclusive_or_expr XOR and_expr. - -inclusive_or_expr ::= exclusive_or_expr. -inclusive_or_expr ::= inclusive_or_expr OR exclusive_or_expr. - -logical_and_expr ::= inclusive_or_expr. -logical_and_expr ::= logical_and_expr ANDAND inclusive_or_expr. - -logical_or_expr ::= logical_and_expr. -logical_or_expr ::= logical_or_expr OROR logical_and_expr. - -conditional_expr ::= logical_or_expr. -conditional_expr ::= logical_or_expr QUESTION logical_or_expr COLON conditional_expr. - -assignment_expr ::= conditional_expr. -assignment_expr ::= unary_expr assignment_operator assignment_expr. - -assignment_operator ::= ASSIGN. -assignment_operator ::= MULASSIGN. -assignment_operator ::= DIVASSIGN. -assignment_operator ::= MODASSIGN. -assignment_operator ::= ADDASSIGN. -assignment_operator ::= SUBASSIGN. -assignment_operator ::= LSHIFTASSIGN. -assignment_operator ::= RSHIFTASSIGN. -assignment_operator ::= ANDASSIGN. -assignment_operator ::= XORASSIGN. -assignment_operator ::= ORASSIGN. - -expression ::= assignment_expr. -expression ::= expression COMMA assignment_expr. +%type primary_expr { Expression * } +primary_expr(A) ::= IDENTIFIER(B). { A = new_identifier_expr(ctx, B.string); } +primary_expr(A) ::= INT_CONSTANT(B). { A = new_literal_int_expr(ctx, B.i64); } +primary_expr(A) ::= FLOAT_CONSTANT(B). { A = new_literal_float_expr(ctx, B.dbl); } +primary_expr(A) ::= STRING_LITERAL(B). { A = new_literal_string_expr(ctx, B.string); } +primary_expr(A) ::= LPAREN expression(B) RPAREN. { A = B; } + +%type postfix_expr { Expression * } +postfix_expr(A) ::= primary_expr(B). { A = B; } +postfix_expr(A) ::= postfix_expr(B) LBRACKET expression(C) RBRACKET. { A = new_binary_expr(ctx, OP_DEREF_ARRAY, B, C); } +postfix_expr(A) ::= postfix_expr(B) LPAREN RPAREN. { A = new_binary_expr(ctx, OP_CALLFUNC, B, NULL); } +postfix_expr(A) ::= postfix_expr(B) LPAREN argument_expr_list(C) RPAREN. { A = new_binary_expr(ctx, OP_CALLFUNC, B, C); } +//postfix_expr(A) ::= datatype(B) LPAREN argument_expr_list(C) RPAREN. { A = new_constructor_expr(ctx, B, C); } // HLSL constructor +postfix_expr(A) ::= postfix_expr(B) DOT IDENTIFIER(C). { A = new_binary_expr(ctx, OP_DEREF_STRUCT, B, new_identifier_expr(ctx, C.string)); } +postfix_expr(A) ::= postfix_expr(B) PLUSPLUS. { A = new_unary_expr(ctx, OP_POSTINCREMENT, B); } +postfix_expr(A) ::= postfix_expr(B) MINUSMINUS. { A = new_unary_expr(ctx, OP_POSTDECREMENT, B); } + +%type argument_expr_list { Expression * } +argument_expr_list(A) ::= assignment_expr(B). { A = B; } +argument_expr_list(A) ::= argument_expr_list(B) COMMA assignment_expr(C). { A = new_binary_expr(ctx, OP_COMMA, B, C); } + +%type unary_expr { Expression * } +unary_expr(A) ::= postfix_expr(B). { A = B; } +unary_expr(A) ::= PLUSPLUS unary_expr(B). { A = new_unary_expr(ctx, OP_PREINCREMENT, B); } +unary_expr(A) ::= MINUSMINUS unary_expr(B). { A = new_unary_expr(ctx, OP_PREDECREMENT, B); } +unary_expr(A) ::= PLUS cast_expr(B). { A = B; } // unary "+x" is always a no-op, so throw it away here. +unary_expr(A) ::= MINUS cast_expr(B). { A = new_unary_expr(ctx, OP_NEGATE, B); } +unary_expr(A) ::= COMPLEMENT cast_expr(B). { A = new_unary_expr(ctx, OP_COMPLEMENT, B); } +unary_expr(A) ::= EXCLAMATION cast_expr(B). { A = new_unary_expr(ctx, OP_NOT, B); } + +%type cast_expr { Expression * } +cast_expr(A) ::= unary_expr(B). { A = B; } +//cast_expr(A) ::= LPAREN datatype(B) RPAREN cast_expr(C). { A = new_cast_expr(ctx, B, C); } + +%type multiplicative_expr { Expression * } +multiplicative_expr(A) ::= cast_expr(B). { A = B; } +multiplicative_expr(A) ::= multiplicative_expr(B) STAR cast_expr(C). { A = new_binary_expr(ctx, OP_MULTIPLY, B, C); } +multiplicative_expr(A) ::= multiplicative_expr(B) SLASH cast_expr(C). { A = new_binary_expr(ctx, OP_DIVIDE, B, C); } +multiplicative_expr(A) ::= multiplicative_expr(B) PERCENT cast_expr(C). { A = new_binary_expr(ctx, OP_MODULO, B, C); } + +%type additive_expr { Expression * } +additive_expr(A) ::= multiplicative_expr(B). { A = B; } +additive_expr(A) ::= additive_expr(B) PLUS multiplicative_expr(C). { A = new_binary_expr(ctx, OP_ADD, B, C); } +additive_expr(A) ::= additive_expr(B) MINUS multiplicative_expr(C). { A = new_binary_expr(ctx, OP_SUBTRACT, B, C); } + +%type shift_expr { Expression * } +shift_expr(A) ::= additive_expr(B). { A = B; } +shift_expr(A) ::= shift_expr(B) LSHIFT additive_expr(C). { A = new_binary_expr(ctx, OP_LSHIFT, B, C); } +shift_expr(A) ::= shift_expr(B) RSHIFT additive_expr(C). { A = new_binary_expr(ctx, OP_RSHIFT, B, C); } + +%type relational_expr { Expression * } +relational_expr(A) ::= shift_expr(B). { A = B; } +relational_expr(A) ::= relational_expr(B) LT shift_expr(C). { A = new_binary_expr(ctx, OP_LESSTHAN, B, C); } +relational_expr(A) ::= relational_expr(B) GT shift_expr(C). { A = new_binary_expr(ctx, OP_GREATERTHAN, B, C); } +relational_expr(A) ::= relational_expr(B) LEQ shift_expr(C). { A = new_binary_expr(ctx, OP_LESSTHANOREQUAL, B, C); } +relational_expr(A) ::= relational_expr(B) GEQ shift_expr(C). { A = new_binary_expr(ctx, OP_GREATERTHANOREQUAL, B, C); } + +%type equality_expr { Expression * } +equality_expr(A) ::= relational_expr(B). { A = B; } +equality_expr(A) ::= equality_expr(B) EQL relational_expr(C). { A = new_binary_expr(ctx, OP_EQUAL, B, C); } +equality_expr(A) ::= equality_expr(B) NEQ relational_expr(C). { A = new_binary_expr(ctx, OP_NOTEQUAL, B, C); } + +%type and_expr { Expression * } +and_expr(A) ::= equality_expr(B). { A = B; } +and_expr(A) ::= and_expr(B) AND equality_expr(C). { A = new_binary_expr(ctx, OP_BINARYAND, B, C); } + +%type exclusive_or_expr { Expression * } +exclusive_or_expr(A) ::= and_expr(B). { A = B; } +exclusive_or_expr(A) ::= exclusive_or_expr(B) XOR and_expr(C). { A = new_binary_expr(ctx, OP_BINARYXOR, B, C); } + +%type inclusive_or_expr { Expression * } +inclusive_or_expr(A) ::= exclusive_or_expr(B). { A = B; } +inclusive_or_expr(A) ::= inclusive_or_expr(B) OR exclusive_or_expr(C). { A = new_binary_expr(ctx, OP_BINARYOR, B, C); } + +%type logical_and_expr { Expression * } +logical_and_expr(A) ::= inclusive_or_expr(B). { A = B; } +logical_and_expr(A) ::= logical_and_expr(B) ANDAND inclusive_or_expr(C). { A = new_binary_expr(ctx, OP_LOGICALAND, B, C); } + +%type logical_or_expr { Expression * } +logical_or_expr(A) ::= logical_and_expr(B). { A = B; } +logical_or_expr(A) ::= logical_or_expr(B) OROR logical_and_expr(C). { A = new_binary_expr(ctx, OP_LOGICALOR, B, C); } + +%type conditional_expr { Expression * } +conditional_expr(A) ::= logical_or_expr(B). { A = B; } +conditional_expr(A) ::= logical_or_expr(B) QUESTION logical_or_expr(C) COLON conditional_expr(D). { A = new_ternary_expr(ctx, OP_CONDITIONAL, B, C, D); } + +%type assignment_expr { Expression * } +assignment_expr(A) ::= conditional_expr(B). { A = B; } +assignment_expr(A) ::= unary_expr(B) ASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_ASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) MULASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_MULASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) DIVASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_DIVASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) MODASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_MODASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) ADDASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_ADDASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) SUBASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_SUBASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) LSHIFTASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_LSHIFTASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) RSHIFTASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_RSHIFTASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) ANDASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_ANDASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) XORASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_XORASSIGN, B, C); } +assignment_expr(A) ::= unary_expr(B) ORASSIGN assignment_expr(C). { A = new_binary_expr(ctx, OP_ORASSIGN, B, C); } + +%type expression { Expression * } +expression(A) ::= assignment_expr(B). { A = B; } +expression(A) ::= expression(B) COMMA assignment_expr(C). { A = new_binary_expr(ctx, OP_COMMA, B, C); } // end of mojoshader_parser_hlsl.lemon ...