lexer.ts 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. import * as moo from "moo";
  2. import { combineRegex } from "./helper";
  3. export interface I18N_LEXER_TYPES {
  4. RK_INTEGER: string;
  5. RK_REAL: string;
  6. RK_CHARACTER: string;
  7. RK_STRING: string;
  8. RK_BOOLEAN: string;
  9. }
  10. export interface I18N_LEXER_BOOLVAL {
  11. RK_FALSE: string;
  12. RK_TRUE: string;
  13. }
  14. export interface I18N_LEXER_LOGICOP {
  15. RK_LOGICAL_NOT: string;
  16. RK_LOGICAL_AND: string;
  17. RK_LOGICAL_OR: string;
  18. }
  19. export interface I18N_LEXER_COMMANDS {
  20. RK_PROGRAM: string;
  21. RK_VOID: string;
  22. RK_SWITCH: string;
  23. RK_CASE: string;
  24. RK_DEFAULT: string;
  25. RK_CONST: string;
  26. RK_FUNCTION: string;
  27. RK_RETURN: string;
  28. RK_FOR: string;
  29. RK_FOR_ALT: string;
  30. RK_FOR_FROM: string;
  31. RK_FOR_TO: string;
  32. RK_FOR_PASS: string;
  33. RK_BREAK: string;
  34. RK_DO: string;
  35. RK_DO_UNTIL: string;
  36. RK_WHILE: string;
  37. RK_WHILE_ALT: string;
  38. RK_IF: string;
  39. RK_ELSE: string;
  40. RK_REFERENCE: string;
  41. }
  42. export interface I18N_LANG_LIBS {
  43. $ioLib: string;
  44. $strLib: string;
  45. $mathLib: string;
  46. $langLib: string;
  47. $arrayLib: string;
  48. }
  49. export interface I18N_LANG_FUNCS {
  50. main_function: string;
  51. $read: string;
  52. $write: string;
  53. $numElements: string;
  54. $matrixLines: string;
  55. $matrixColumns: string;
  56. $substring: string;
  57. $length: string;
  58. $uppercase: string;
  59. $lowercase: string;
  60. $charAt: string;
  61. $isReal: string;
  62. $isInt: string;
  63. $isBool: string;
  64. $castReal: string;
  65. $castInt: string;
  66. $castBool: string;
  67. $castString: string;
  68. $castChar: string;
  69. $sin: string;
  70. $cos: string;
  71. $tan: string;
  72. $sqrt: string;
  73. $pow: string;
  74. $log: string;
  75. $abs: string;
  76. $negate: string;
  77. $invert: string;
  78. $max: string;
  79. $min: string;
  80. $rand: string;
  81. }
  82. export interface I18nLexer {
  83. commands: I18N_LEXER_COMMANDS;
  84. logicOp: I18N_LEXER_LOGICOP;
  85. boolVal: I18N_LEXER_BOOLVAL;
  86. types: I18N_LEXER_TYPES;
  87. langLibs: I18N_LANG_LIBS;
  88. langFuncs: I18N_LANG_FUNCS;
  89. }
  90. export class IVProgLexer {
  91. constructor (
  92. private RKs: Record<string, string>,
  93. public lexer: moo.Lexer,
  94. private rules: I18N_LEXER_RULES,
  95. private i18nLexer: I18nLexer
  96. ) {}
  97. getReservedKeys (): Record<string, string> {
  98. return this.RKs;
  99. }
  100. getRules (): I18N_LEXER_RULES {
  101. return this.rules;
  102. }
  103. getTypeKeys (): I18N_LEXER_TYPES {
  104. return this.i18nLexer.types;
  105. }
  106. getLangLibs (): I18N_LANG_LIBS {
  107. return this.i18nLexer.langLibs;
  108. }
  109. getLangFuncs (): I18N_LANG_FUNCS {
  110. return this.i18nLexer.langFuncs;
  111. }
  112. }
  113. export interface I18N_LEXER_RULES
  114. extends I18N_LEXER_TYPES,
  115. I18N_LEXER_COMMANDS,
  116. I18N_LEXER_BOOLVAL,
  117. I18N_LEXER_LOGICOP {
  118. OPEN_BRACE: string;
  119. CLOSE_BRACE: string;
  120. OPEN_PARENTHESIS: string;
  121. CLOSE_PARENTHESIS: string;
  122. OPEN_CURLY: string;
  123. CLOSE_CURLY: string;
  124. COMMA: string;
  125. ASSIGNMENT: string;
  126. REAL: string;
  127. INTEGER: string;
  128. SUM_OP: string;
  129. MULTI_OP: string;
  130. RELATIONAL_OPERATOR: string;
  131. COLON: string;
  132. STRING: string;
  133. CHARACTER: string;
  134. EOS: string;
  135. WHITESPACE: string;
  136. COMMENTS: string;
  137. RK_REFERENCE: string;
  138. ID: string;
  139. DOT: string;
  140. ERROR: string;
  141. }
  142. // General Regex Rules
  143. const ID = "[a-zA-Z_][a-zA-Z0-9_]*";
  144. const HEX_DIGIT = "[0-9a-fA-F]";
  145. const OCTAL_DIGIT = "[0-7]";
  146. const ESC_OCTAL = RegExp(
  147. `\\\\[0-3]${OCTAL_DIGIT}${OCTAL_DIGIT}|\\\\${OCTAL_DIGIT}${OCTAL_DIGIT}|\\\\${OCTAL_DIGIT}`
  148. );
  149. const ESC_UNICODE = RegExp(
  150. `\\\\u${HEX_DIGIT}${HEX_DIGIT}${HEX_DIGIT}${HEX_DIGIT}`
  151. );
  152. const ESC_SEQ_BASE = /\\[b,t,n,f,r,",',\\]|/;
  153. const ESC_SEQ = combineRegex`${ESC_SEQ_BASE}|${ESC_UNICODE}|${ESC_OCTAL}`;
  154. const STRING_CHARACTER = combineRegex`${ESC_SEQ}|[^"\\\\]`;
  155. export function makeLexer (lexer: I18nLexer): IVProgLexer {
  156. const RKs: Record<string, string> = {};
  157. const rules: Record<string, string> = {};
  158. Object.entries(lexer.types).forEach(([key, value]) => {
  159. RKs[key] = value;
  160. rules[key] = key;
  161. });
  162. Object.entries(lexer.boolVal).forEach(([key, value]) => {
  163. RKs[key] = value;
  164. rules[key] = key;
  165. });
  166. Object.entries(lexer.commands).forEach(([key, value]) => {
  167. RKs[key] = value;
  168. rules[key] = key;
  169. });
  170. RKs["RK_LOGICAL_AND"] = lexer.logicOp.RK_LOGICAL_AND;
  171. RKs["RK_LOGICAL_OR"] = lexer.logicOp.RK_LOGICAL_OR;
  172. RKs["RK_LOGICAL_NOT"] = lexer.logicOp.RK_LOGICAL_NOT;
  173. rules["RK_LOGICAL_AND"] = "RK_LOGICAL_AND";
  174. rules["RK_LOGICAL_OR"] = "RK_LOGICAL_OR";
  175. rules["RK_LOGICAL_NOT"] = "RK_LOGICAL_NOT";
  176. const RESERVED_KEYS = moo.keywords(RKs);
  177. const lexerRules: moo.Rules = {
  178. COMMENTS: { match: /\/\/[^$]*?$|\/\*[^$]*?\*\//, lineBreaks: true },
  179. OPEN_BRACE: /\[/,
  180. CLOSE_BRACE: /\]/,
  181. OPEN_PARENTHESIS: /\(/,
  182. CLOSE_PARENTHESIS: /\)/,
  183. OPEN_CURLY: /\{/,
  184. CLOSE_CURLY: /\}/,
  185. COMMA: /,/,
  186. ASSIGNMENT: /<-|←/,
  187. REAL: /[0-9]+\.[0-9]*[eE][+-]?[0-9]+|[0-9]+\.[0-9]+/,
  188. INTEGER: RegExp(`(?:0x|0X)${HEX_DIGIT}+|(?:0b|0B)[0-1]+|[0-9]+`),
  189. SUM_OP: /[+-]/,
  190. MULTI_OP: /[*/%]/,
  191. RELATIONAL_OPERATOR: />=|==|<=|>|<|!=/,
  192. COLON: /:/,
  193. STRING: combineRegex`"(?:${STRING_CHARACTER})*?"`,
  194. CHARACTER: combineRegex`'(?:${ESC_SEQ}|[^'\\\\])'`,
  195. EOS: { match: /;\r?\n?|[\r\n]+/, lineBreaks: true },
  196. WHITESPACE: /(?: |\t)+/,
  197. RK_REFERENCE: RegExp(lexer.commands.RK_REFERENCE),
  198. ID: { match: RegExp(ID), type: RESERVED_KEYS },
  199. DOT: /\./,
  200. ERROR: { match: /[\$?`]/, error: true },
  201. };
  202. Object.entries(lexerRules).forEach(([key, _]) => (rules[key] = key));
  203. const moolexer = moo.compile(lexerRules);
  204. return new IVProgLexer(RKs, moolexer, rules as unknown as I18N_LEXER_RULES, lexer);
  205. }