import re VAR = r"([a-zA-Z_][a-zA-Z0-9_]*)\s*=[^\n]+$" FUNC = re.compile(r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\([^\)]*\)\s*:\s*") FUNC_PARAM = re.compile(r"\(([^\)]*)\)\s*:\s*") class CodeCleaner: def __init__ (self): self.varCount = 0 self.funcCount = 0 self.symbolMap = {} def cleanCode (self,text): funcs = FUNC.finditer(text) for _,m in enumerate(funcs, start=1): #print("Func delc: {}".format(m.group())) if m == None: continue self.cleanFunc(m.group()) params = FUNC_PARAM.finditer(m.group()) #print("p",list(params)) for _, p in enumerate(params, start=1): if p == None: continue #print("Func param: {}".format(p.group())) self.cleanFuncParam(p.group()) nvars = re.finditer(VAR,text,re.MULTILINE) for _, m in enumerate(nvars, start=1): if m == None: continue #print("vars: {}".format(m.group())) self.cleanVar(m.group()) #print(self.symbolMap) stringMode = False openChar = None lineComment = False output = [] alpha = "" for c in text: if stringMode: stringMode = not (c == openChar) output.append(c) elif lineComment: lineComment = c != '\n' #output.append(c) elif c == '#': lineComment = True #output.append(c) elif c == '"' or c == '\'': alpha = "" stringMode = True openChar = c output.append(c) elif re.match("[a-zA-Z0-9_]",c) != None: alpha += c else: if len(alpha) > 0 and alpha in self.symbolMap: #print("Replacing {} with {}.".format(alpha, self.symbolMap[alpha])) output.append(self.symbolMap[alpha]) alpha = "" elif len(alpha) > 0: #print("Reinserting {}".format(alpha)) output.append(alpha) alpha = "" output.append(c) output = "".join(output) return "".join([l for l in output.splitlines() if len(l.strip()) > 0]) def cleanFunc (self,line): match = FUNC.search(line) varID = match.group(1) self.symbolMap[varID] = "f{}".format(self.funcCount) self.funcCount += 1 def cleanFuncParam (self,line): match = FUNC_PARAM.findall(line)[0] if len(match.strip()) <= 0: return ids = match.split(",") for i in range(len(ids)): varID = ids[i].strip() if varID not in self.symbolMap: self.symbolMap[varID] = "v{}".format(self.varCount) self.varCount += 1 def cleanVar (self,line): match = re.findall(VAR, line,re.M) ids = match for i in range(len(ids)): varID = ids[i].strip() if varID not in self.symbolMap: self.symbolMap[varID] = "v{}".format(self.varCount) self.varCount += 1