ccleaner.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. import re
  2. VAR = r"(?:int|float|double|char|long|string)\s*\*?\s+([^;\(\)]+);$"
  3. FUNC = re.compile(r"(?:int|float|double|char|long|string|void)\s*\*?\s+([a-zA-Z0-9_]+)\s*\([^)]*\)\s*(?:{|;)")
  4. FUNC_PARAM = re.compile(r"\(\s*(?:int|float|double|char|long|string)\s*\*?\s+([^;\(\)]+)\)\s*(?:{|;)")
  5. PARAM_DECL = re.compile(r"(?:int|float|double|char|long|string)\s*\*?\s+([a-zA-Z0-9_]+)")
  6. class CodeCleaner:
  7. def __init__ (self):
  8. self.varCount = 0
  9. self.funcCount = 0
  10. self.symbolMap = {}
  11. # def cleanCode (self,text):
  12. # lines = text.splitlines()
  13. # for line in lines:
  14. # if FUNC.search(line) != None:
  15. # self.cleanFunc(line)
  16. # if FUNC_PARAM.search(line) != None:
  17. # self.cleanFuncParam(line)
  18. # elif VAR.search(line) != None:
  19. # self.cleanVar(line)
  20. # regex = {}
  21. # for k in self.symbolMap:
  22. # regex[k] = re.compile(r"[^\"](\W|^)({})(\W)".format(k))
  23. # finalText = []
  24. # for line in lines:
  25. # for k in regex:
  26. # line = regex[k].sub(r"\1{}\3".format(self.symbolMap[k]), line)
  27. # finalText.append(line)
  28. #
  29. # return "\n".join(finalText)
  30. def cleanCode (self,text):
  31. funcs = FUNC.finditer(text)
  32. for _,m in enumerate(funcs, start=1):
  33. #print("Func delc: {}".format(m.group()))
  34. if m == None:
  35. continue
  36. self.cleanFunc(m.group())
  37. params = FUNC_PARAM.finditer(m.group())
  38. #print("p",list(params))
  39. for _, p in enumerate(params, start=1):
  40. if p == None:
  41. continue
  42. #print("Func param: {}".format(p.group()))
  43. self.cleanFuncParam(p.group())
  44. nvars = re.finditer(VAR,text,re.MULTILINE)
  45. for _, m in enumerate(nvars, start=1):
  46. if m == None:
  47. continue
  48. #print("vars: {}".format(m.group()))
  49. self.cleanVar(m.group())
  50. #print(self.symbolMap)
  51. stringMode = False
  52. lineComment = False
  53. blockComment = False
  54. lastChar = ''
  55. output = []
  56. alpha = ""
  57. for c in text:
  58. if stringMode:
  59. stringMode = not (c == '"')
  60. output.append(c)
  61. elif lineComment:
  62. lineComment = c != '\n'
  63. output.append(c)
  64. elif blockComment:
  65. blockComment = (lastChar+c) != '*/'
  66. output.append(c)
  67. elif c == '/':
  68. if lastChar == '/':
  69. lineComment = True
  70. output.append(c)
  71. elif c == '*':
  72. if lastChar == '/':
  73. blockComment = True
  74. output.append(c)
  75. elif c == '"':
  76. alpha = ""
  77. stringMode = True
  78. output.append(c)
  79. elif re.match("[a-zA-Z0-9_]",c) != None:
  80. alpha += c
  81. else:
  82. if len(alpha) > 0 and alpha in self.symbolMap:
  83. #print("Replacing {} with {}.".format(alpha, self.symbolMap[alpha]))
  84. output.append(self.symbolMap[alpha])
  85. alpha = ""
  86. elif len(alpha) > 0:
  87. #print("Reinserting {}".format(alpha))
  88. output.append(alpha)
  89. alpha = ""
  90. output.append(c)
  91. lastChar = c
  92. return "".join(output)
  93. def cleanFunc (self,line):
  94. match = FUNC.search(line)
  95. if match == None:
  96. print(line)
  97. varID = match.group(1)
  98. self.symbolMap[varID] = "f{}".format(self.funcCount)
  99. self.funcCount += 1
  100. def cleanFuncParam (self,line):
  101. match = FUNC_PARAM.findall(line)[0]
  102. ids = match.split(",")
  103. for i in range(len(ids)):
  104. varID = ids[i].strip()
  105. if varID.find(" ") == -1:
  106. varID = re.search("[a-zA-Z_][a-zA-Z0-9_]*",varID).group()
  107. if varID not in self.symbolMap:
  108. self.symbolMap[varID] = "v{}".format(self.varCount)
  109. self.varCount += 1
  110. else:
  111. varID = varID.split(" ")[1].strip()
  112. isMatch = re.search("[a-zA-Z_][a-zA-Z0-9_]*",varID)
  113. if isMatch == None:
  114. print(match, ids[i])
  115. continue
  116. if varID not in self.symbolMap:
  117. self.symbolMap[varID] = "v{}".format(self.varCount)
  118. self.varCount += 1
  119. def cleanVar (self,line):
  120. match = re.findall(VAR, line)[0]
  121. ids = match.split(",")
  122. for i in range(len(ids)):
  123. varID = ids[i].strip()
  124. if varID.find("=") != -1:
  125. varID = varID.split("=")[0].strip()
  126. isMatch = re.search("[a-zA-Z_][a-zA-Z0-9_]*",varID)
  127. if isMatch == None:
  128. continue
  129. varID = isMatch.group()
  130. if varID not in self.symbolMap:
  131. self.symbolMap[varID] = "v{}".format(self.varCount)
  132. self.varCount += 1