import * as moo from "moo"; import { combineRegex } from "./helper"; export interface I18N_LEXER_TYPES { RK_INTEGER: string; RK_REAL: string; RK_VOID: string; RK_CHARACTER: string; RK_STRING: string; RK_BOOLEAN: string; } export interface I18N_LEXER_BOOLVAL { RK_FALSE: string; RK_TRUE: string; } export interface I18N_LEXER_LOGICOP { RK_LOGICAL_NOT: string; RK_LOGICAL_AND: string; RK_LOGICAL_OR: string; } export interface I18N_LEXER_COMMANDS { RK_PROGRAM: string; RK_SWITCH: string; RK_CASE: string; RK_DEFAULT: string; RK_CONST: string; RK_FUNCTION: string; RK_RETURN: string; RK_FOR: string; RK_FOR_ALT: string; RK_FOR_FROM: string; RK_FOR_TO: string; RK_FOR_PASS: string; RK_BREAK: string; RK_DO: string; RK_DO_UNTIL: string; RK_WHILE: string; RK_WHILE_ALT: string; RK_IF: string; RK_ELSE: string; RK_REFERENCE: string; } export interface I18N_LANG_LIBS { $ioLib: string; $strLib: string; $mathLib: string; $langLib: string; $arrayLib: string; } export interface I18N_LANG_FUNCS { main_function: string; $read: string; $write: string; $numElements: string; $matrixLines: string; $matrixColumns: string; $substring: string; $length: string; $uppercase: string; $lowercase: string; $charAt: string; $isReal: string; $isInt: string; $isBool: string; $castReal: string; $castInt: string; $castBool: string; $castString: string; $castChar: string; $sin: string; $cos: string; $tan: string; $sqrt: string; $pow: string; $log: string; $abs: string; $negate: string; $invert: string; $max: string; $min: string; $rand: string; } export interface I18nLexer { commands: I18N_LEXER_COMMANDS; logicOp: I18N_LEXER_LOGICOP; boolVal: I18N_LEXER_BOOLVAL; types: I18N_LEXER_TYPES; langLibs: I18N_LANG_LIBS; langFuncs: I18N_LANG_FUNCS; } // General Regex Rules const ID = "[a-zA-Z_][a-zA-Z0-9_]*"; const HEX_DIGIT = "[0-9a-fA-F]"; const OCTAL_DIGIT = "[0-7]"; const ESC_OCTAL = RegExp( `\\\\[0-3]${OCTAL_DIGIT}${OCTAL_DIGIT}|\\\\${OCTAL_DIGIT}${OCTAL_DIGIT}|\\\\${OCTAL_DIGIT}` ); const ESC_UNICODE = RegExp( `\\\\u${HEX_DIGIT}${HEX_DIGIT}${HEX_DIGIT}${HEX_DIGIT}` ); const ESC_SEQ_BASE = /\\[b,t,n,f,r,",',\\]|/; const ESC_SEQ = combineRegex`${ESC_SEQ_BASE}|${ESC_UNICODE}|${ESC_OCTAL}`; const STRING_CHARACTER = combineRegex`${ESC_SEQ}|[^"\\\\]`; export class IVProgLexer { constructor( private RKs: Record, public lexer: moo.Lexer, private i18nLexer: I18nLexer ) {} getReservedKeys(): Record { return this.RKs; } getTypeKeys() { return this.i18nLexer.types; } getLangLibs() { return this.i18nLexer.langLibs; } getLangFuncs() { return this.i18nLexer.langFuncs; } } export function makeLexer(lexer: I18nLexer): IVProgLexer { const RKs: Record = {}; Object.entries(lexer.types).forEach(([key, value]) => (RKs[key] = value)); Object.entries(lexer.boolVal).forEach(([key, value]) => (RKs[key] = value)); Object.entries(lexer.commands).forEach(([key, value]) => (RKs[key] = value)); RKs["AND_OPERATOR"] = lexer.logicOp.RK_LOGICAL_AND; RKs["OR_OPERATOR"] = lexer.logicOp.RK_LOGICAL_OR; RKs["NOT_OPERATOR"] = lexer.logicOp.RK_LOGICAL_NOT; const RESERVED_KEYS = moo.keywords(RKs); const moolexer = moo.compile({ OPEN_BRACE: /\[/, CLOSE_BRACE: /\]/, OPEN_PARENTHESIS: /\(/, CLOSE_PARENTHESIS: /\)/, OPEN_CURLY: /\{/, CLOSE_CURLY: /\}/, COMMA: /,/, ASSIGNMENT: /<-|←/, REAL: /[0-9]+\.[0-9]*[eE][+-]?[0-9]+|[0-9]+\.[0-9]+/, INTEGER: RegExp(`(?:0x|0X)${HEX_DIGIT}+|(?:0b|0B)[0-1]+|[0-9]+`), SUM_OP: /[+-]/, MULTI_OP: /[*/%]/, RELATIONAL_OPERATOR: />=|==|<=|>|<|!=/, COLON: /:/, STRING: combineRegex`"(?:${STRING_CHARACTER})*?"`, CHARACTER: combineRegex`'(?:${ESC_SEQ}|[^'\\\\])'`, // LIB_ID: RegExp(`${ID}\.${ID}`), cannot filter keywords here EOS: { match: /;\r?\n?|[\r\n]+/, lineBreaks: true }, WHITESPACE: /(?: |\t)+/, COMMENTS: { match: /\/\/[^$]*?$|\/\*[^$]*?\*\//, lineBreaks: true }, RK_REFERENCE: /&/, ID: { match: RegExp(ID), type: RESERVED_KEYS }, DOT: /\./, }); return new IVProgLexer(RKs, moolexer, lexer); }