lexer.ts 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. import * as moo from "moo";
  2. import { combineRegex } from "./helper";
  3. export interface I18N_LEXER_TYPES {
  4. RK_INTEGER: string;
  5. RK_REAL: string;
  6. RK_VOID: string;
  7. RK_CHARACTER: string;
  8. RK_STRING: string;
  9. RK_BOOLEAN: string;
  10. }
  11. export interface I18N_LEXER_BOOLVAL {
  12. RK_FALSE: string;
  13. RK_TRUE: string;
  14. }
  15. export interface I18N_LEXER_LOGICOP {
  16. RK_LOGICAL_NOT: string;
  17. RK_LOGICAL_AND: string;
  18. RK_LOGICAL_OR: string;
  19. }
  20. export interface I18N_LEXER_COMMANDS {
  21. RK_PROGRAM: string;
  22. RK_SWITCH: string;
  23. RK_CASE: string;
  24. RK_DEFAULT: string;
  25. RK_CONST: string;
  26. RK_FUNCTION: string;
  27. RK_RETURN: string;
  28. RK_FOR: string;
  29. RK_FOR_ALT: string;
  30. RK_FOR_FROM: string;
  31. RK_FOR_TO: string;
  32. RK_FOR_PASS: string;
  33. RK_BREAK: string;
  34. RK_DO: string;
  35. RK_DO_UNTIL: string;
  36. RK_WHILE: string;
  37. RK_WHILE_ALT: string;
  38. RK_IF: string;
  39. RK_ELSE: string;
  40. RK_REFERENCE: string;
  41. }
  42. export interface I18N_LANG_LIBS {
  43. $ioLib: string;
  44. $strLib: string;
  45. $mathLib: string;
  46. $langLib: string;
  47. $arrayLib: string;
  48. }
  49. export interface I18N_LANG_FUNCS {
  50. main_function: string;
  51. $read: string;
  52. $write: string;
  53. $numElements: string;
  54. $matrixLines: string;
  55. $matrixColumns: string;
  56. $substring: string;
  57. $length: string;
  58. $uppercase: string;
  59. $lowercase: string;
  60. $charAt: string;
  61. $isReal: string;
  62. $isInt: string;
  63. $isBool: string;
  64. $castReal: string;
  65. $castInt: string;
  66. $castBool: string;
  67. $castString: string;
  68. $castChar: string;
  69. $sin: string;
  70. $cos: string;
  71. $tan: string;
  72. $sqrt: string;
  73. $pow: string;
  74. $log: string;
  75. $abs: string;
  76. $negate: string;
  77. $invert: string;
  78. $max: string;
  79. $min: string;
  80. $rand: string;
  81. }
  82. export interface I18nLexer {
  83. commands: I18N_LEXER_COMMANDS;
  84. logicOp: I18N_LEXER_LOGICOP;
  85. boolVal: I18N_LEXER_BOOLVAL;
  86. types: I18N_LEXER_TYPES;
  87. langLibs: I18N_LANG_LIBS;
  88. langFuncs: I18N_LANG_FUNCS;
  89. }
  90. // General Regex Rules
  91. const ID = "[a-zA-Z_][a-zA-Z0-9_]*";
  92. const HEX_DIGIT = "[0-9a-fA-F]";
  93. const OCTAL_DIGIT = "[0-7]";
  94. const ESC_OCTAL = RegExp(
  95. `\\\\[0-3]${OCTAL_DIGIT}${OCTAL_DIGIT}|\\\\${OCTAL_DIGIT}${OCTAL_DIGIT}|\\\\${OCTAL_DIGIT}`
  96. );
  97. const ESC_UNICODE = RegExp(
  98. `\\\\u${HEX_DIGIT}${HEX_DIGIT}${HEX_DIGIT}${HEX_DIGIT}`
  99. );
  100. const ESC_SEQ_BASE = /\\[b,t,n,f,r,",',\\]|/;
  101. const ESC_SEQ = combineRegex`${ESC_SEQ_BASE}|${ESC_UNICODE}|${ESC_OCTAL}`;
  102. const STRING_CHARACTER = combineRegex`${ESC_SEQ}|[^"\\\\]`;
  103. export class IVProgLexer {
  104. constructor(
  105. private RKs: Record<string, string>,
  106. public lexer: moo.Lexer,
  107. private i18nLexer: I18nLexer
  108. ) {}
  109. getReservedKeys(): Record<string, string> {
  110. return this.RKs;
  111. }
  112. getTypeKeys() {
  113. return this.i18nLexer.types;
  114. }
  115. getLangLibs() {
  116. return this.i18nLexer.langLibs;
  117. }
  118. getLangFuncs() {
  119. return this.i18nLexer.langFuncs;
  120. }
  121. }
  122. export function makeLexer(lexer: I18nLexer): IVProgLexer {
  123. const RKs: Record<string, string> = {};
  124. Object.entries(lexer.types).forEach(([key, value]) => (RKs[key] = value));
  125. Object.entries(lexer.boolVal).forEach(([key, value]) => (RKs[key] = value));
  126. Object.entries(lexer.commands).forEach(([key, value]) => (RKs[key] = value));
  127. RKs["AND_OPERATOR"] = lexer.logicOp.RK_LOGICAL_AND;
  128. RKs["OR_OPERATOR"] = lexer.logicOp.RK_LOGICAL_OR;
  129. RKs["NOT_OPERATOR"] = lexer.logicOp.RK_LOGICAL_NOT;
  130. const RESERVED_KEYS = moo.keywords(RKs);
  131. const moolexer = moo.compile({
  132. OPEN_BRACE: /\[/,
  133. CLOSE_BRACE: /\]/,
  134. OPEN_PARENTHESIS: /\(/,
  135. CLOSE_PARENTHESIS: /\)/,
  136. OPEN_CURLY: /\{/,
  137. CLOSE_CURLY: /\}/,
  138. COMMA: /,/,
  139. ASSIGNMENT: /<-|←/,
  140. REAL: /[0-9]+\.[0-9]*[eE][+-]?[0-9]+|[0-9]+\.[0-9]+/,
  141. INTEGER: RegExp(`(?:0x|0X)${HEX_DIGIT}+|(?:0b|0B)[0-1]+|[0-9]+`),
  142. SUM_OP: /[+-]/,
  143. MULTI_OP: /[*/%]/,
  144. RELATIONAL_OPERATOR: />=|==|<=|>|<|!=/,
  145. COLON: /:/,
  146. STRING: combineRegex`"(?:${STRING_CHARACTER})*?"`,
  147. CHARACTER: combineRegex`'(?:${ESC_SEQ}|[^'\\\\])'`,
  148. // LIB_ID: RegExp(`${ID}\.${ID}`), cannot filter keywords here
  149. EOS: { match: /;\r?\n?|[\r\n]+/, lineBreaks: true },
  150. WHITESPACE: /(?: |\t)+/,
  151. COMMENTS: { match: /\/\/[^$]*?$|\/\*[^$]*?\*\//, lineBreaks: true },
  152. RK_REFERENCE: /&/,
  153. ID: { match: RegExp(ID), type: RESERVED_KEYS },
  154. DOT: /\./,
  155. });
  156. return new IVProgLexer(RKs, moolexer, lexer);
  157. }