import re VAR = r"(?:int|float|double|char|long|string)\s*\*?\s+([^;\(\)]+);$" FUNC = re.compile(r"(?:int|float|double|char|long|string|void)\s*\*?\s+([a-zA-Z0-9_]+)\s*\([^)]*\)\s*(?:{|;)") FUNC_PARAM = re.compile(r"\(\s*(?:int|float|double|char|long|string)\s*\*?\s+([^;\(\)]+)\)\s*(?:{|;)") PARAM_DECL = re.compile(r"(?:int|float|double|char|long|string)\s*\*?\s+([a-zA-Z0-9_]+)") class CodeCleaner: def __init__ (self): self.varCount = 0 self.funcCount = 0 self.symbolMap = {} # def cleanCode (self,text): # lines = text.splitlines() # for line in lines: # if FUNC.search(line) != None: # self.cleanFunc(line) # if FUNC_PARAM.search(line) != None: # self.cleanFuncParam(line) # elif VAR.search(line) != None: # self.cleanVar(line) # regex = {} # for k in self.symbolMap: # regex[k] = re.compile(r"[^\"](\W|^)({})(\W)".format(k)) # finalText = [] # for line in lines: # for k in regex: # line = regex[k].sub(r"\1{}\3".format(self.symbolMap[k]), line) # finalText.append(line) # # return "\n".join(finalText) def cleanCode (self,text): funcs = FUNC.finditer(text) for _,m in enumerate(funcs, start=1): #print("Func delc: {}".format(m.group())) if m == None: continue self.cleanFunc(m.group()) params = FUNC_PARAM.finditer(m.group()) #print("p",list(params)) for _, p in enumerate(params, start=1): if p == None: continue #print("Func param: {}".format(p.group())) self.cleanFuncParam(p.group()) nvars = re.finditer(VAR,text,re.MULTILINE) for _, m in enumerate(nvars, start=1): if m == None: continue #print("vars: {}".format(m.group())) self.cleanVar(m.group()) #print(self.symbolMap) stringMode = False lineComment = False blockComment = False lastChar = '' output = [] alpha = "" for c in text: if stringMode: stringMode = not (c == '"') output.append(c) elif lineComment: lineComment = c != '\n' output.append(c) elif blockComment: blockComment = (lastChar+c) != '*/' output.append(c) elif c == '/': if lastChar == '/': lineComment = True output.append(c) elif c == '*': if lastChar == '/': blockComment = True output.append(c) elif c == '"': alpha = "" stringMode = True output.append(c) elif re.match("[a-zA-Z0-9_]",c) != None: alpha += c else: if len(alpha) > 0 and alpha in self.symbolMap: #print("Replacing {} with {}.".format(alpha, self.symbolMap[alpha])) output.append(self.symbolMap[alpha]) alpha = "" elif len(alpha) > 0: #print("Reinserting {}".format(alpha)) output.append(alpha) alpha = "" output.append(c) lastChar = c return "".join(output) def cleanFunc (self,line): match = FUNC.search(line) if match == None: print(line) varID = match.group(1) self.symbolMap[varID] = "f{}".format(self.funcCount) self.funcCount += 1 def cleanFuncParam (self,line): match = FUNC_PARAM.findall(line)[0] ids = match.split(",") for i in range(len(ids)): varID = ids[i].strip() if varID.find(" ") == -1: varID = re.search("[a-zA-Z_][a-zA-Z0-9_]*",varID).group() if varID not in self.symbolMap: self.symbolMap[varID] = "v{}".format(self.varCount) self.varCount += 1 else: varID = varID.split(" ")[1].strip() isMatch = re.search("[a-zA-Z_][a-zA-Z0-9_]*",varID) if isMatch == None: print(match, ids[i]) continue if varID not in self.symbolMap: self.symbolMap[varID] = "v{}".format(self.varCount) self.varCount += 1 def cleanVar (self,line): match = re.findall(VAR, line)[0] ids = match.split(",") for i in range(len(ids)): varID = ids[i].strip() if varID.find("=") != -1: varID = varID.split("=")[0].strip() isMatch = re.search("[a-zA-Z_][a-zA-Z0-9_]*",varID) if isMatch == None: continue varID = isMatch.group() if varID not in self.symbolMap: self.symbolMap[varID] = "v{}".format(self.varCount) self.varCount += 1