123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139 |
- import re
- VAR = r"(?:int|float|double|char|long|string)\s*\*?\s+([^;\(\)]+);$"
- FUNC = re.compile(r"(?:int|float|double|char|long|string|void)\s*\*?\s+([a-zA-Z0-9_]+)\s*\([^)]*\)\s*(?:{|;)")
- FUNC_PARAM = re.compile(r"\(\s*(?:int|float|double|char|long|string)\s*\*?\s+([^;\(\)]+)\)\s*(?:{|;)")
- PARAM_DECL = re.compile(r"(?:int|float|double|char|long|string)\s*\*?\s+([a-zA-Z0-9_]+)")
- class CodeCleaner:
- def __init__ (self):
- self.varCount = 0
- self.funcCount = 0
- self.symbolMap = {}
- # def cleanCode (self,text):
- # lines = text.splitlines()
- # for line in lines:
- # if FUNC.search(line) != None:
- # self.cleanFunc(line)
- # if FUNC_PARAM.search(line) != None:
- # self.cleanFuncParam(line)
- # elif VAR.search(line) != None:
- # self.cleanVar(line)
- # regex = {}
- # for k in self.symbolMap:
- # regex[k] = re.compile(r"[^\"](\W|^)({})(\W)".format(k))
- # finalText = []
- # for line in lines:
- # for k in regex:
- # line = regex[k].sub(r"\1{}\3".format(self.symbolMap[k]), line)
- # finalText.append(line)
- #
- # return "\n".join(finalText)
- def cleanCode (self,text):
- funcs = FUNC.finditer(text)
- for _,m in enumerate(funcs, start=1):
- #print("Func delc: {}".format(m.group()))
- if m == None:
- continue
- self.cleanFunc(m.group())
- params = FUNC_PARAM.finditer(m.group())
- #print("p",list(params))
- for _, p in enumerate(params, start=1):
- if p == None:
- continue
- #print("Func param: {}".format(p.group()))
- self.cleanFuncParam(p.group())
- nvars = re.finditer(VAR,text,re.MULTILINE)
- for _, m in enumerate(nvars, start=1):
- if m == None:
- continue
- #print("vars: {}".format(m.group()))
- self.cleanVar(m.group())
- #print(self.symbolMap)
- stringMode = False
- lineComment = False
- blockComment = False
- lastChar = ''
- output = []
- alpha = ""
- for c in text:
- if stringMode:
- stringMode = not (c == '"')
- output.append(c)
- elif lineComment:
- lineComment = c != '\n'
- output.append(c)
- elif blockComment:
- blockComment = (lastChar+c) != '*/'
- output.append(c)
- elif c == '/':
- if lastChar == '/':
- lineComment = True
- output.append(c)
- elif c == '*':
- if lastChar == '/':
- blockComment = True
- output.append(c)
- elif c == '"':
- alpha = ""
- stringMode = True
- output.append(c)
- elif re.match("[a-zA-Z0-9_]",c) != None:
- alpha += c
- else:
- if len(alpha) > 0 and alpha in self.symbolMap:
- #print("Replacing {} with {}.".format(alpha, self.symbolMap[alpha]))
- output.append(self.symbolMap[alpha])
- alpha = ""
- elif len(alpha) > 0:
- #print("Reinserting {}".format(alpha))
- output.append(alpha)
- alpha = ""
- output.append(c)
- lastChar = c
- return "".join(output)
- def cleanFunc (self,line):
- match = FUNC.search(line)
- if match == None:
- print(line)
- varID = match.group(1)
- self.symbolMap[varID] = "f{}".format(self.funcCount)
- self.funcCount += 1
- def cleanFuncParam (self,line):
- match = FUNC_PARAM.findall(line)[0]
- ids = match.split(",")
- for i in range(len(ids)):
- varID = ids[i].strip()
- if varID.find(" ") == -1:
- varID = re.search("[a-zA-Z_][a-zA-Z0-9_]*",varID).group()
- if varID not in self.symbolMap:
- self.symbolMap[varID] = "v{}".format(self.varCount)
- self.varCount += 1
- else:
- varID = varID.split(" ")[1].strip()
- isMatch = re.search("[a-zA-Z_][a-zA-Z0-9_]*",varID)
- if isMatch == None:
- print(match, ids[i])
- continue
- if varID not in self.symbolMap:
- self.symbolMap[varID] = "v{}".format(self.varCount)
- self.varCount += 1
- def cleanVar (self,line):
- match = re.findall(VAR, line)[0]
- ids = match.split(",")
- for i in range(len(ids)):
- varID = ids[i].strip()
- if varID.find("=") != -1:
- varID = varID.split("=")[0].strip()
- isMatch = re.search("[a-zA-Z_][a-zA-Z0-9_]*",varID)
- if isMatch == None:
- continue
- varID = isMatch.group()
- if varID not in self.symbolMap:
- self.symbolMap[varID] = "v{}".format(self.varCount)
- self.varCount += 1
|