12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- import re
- VAR = r"([a-zA-Z_][a-zA-Z0-9_]*)\s*=[^\n]+$"
- FUNC = re.compile(r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\([^\)]*\)\s*:\s*")
- FUNC_PARAM = re.compile(r"\(([^\)]*)\)\s*:\s*")
- class CodeCleaner:
- def __init__ (self):
- self.varCount = 0
- self.funcCount = 0
- self.symbolMap = {}
- def cleanCode (self,text):
- funcs = FUNC.finditer(text)
- for _,m in enumerate(funcs, start=1):
- #print("Func delc: {}".format(m.group()))
- if m == None:
- continue
- self.cleanFunc(m.group())
- params = FUNC_PARAM.finditer(m.group())
- #print("p",list(params))
- for _, p in enumerate(params, start=1):
- if p == None:
- continue
- #print("Func param: {}".format(p.group()))
- self.cleanFuncParam(p.group())
- nvars = re.finditer(VAR,text,re.MULTILINE)
- for _, m in enumerate(nvars, start=1):
- if m == None:
- continue
- #print("vars: {}".format(m.group()))
- self.cleanVar(m.group())
- #print(self.symbolMap)
- stringMode = False
- openChar = None
- lineComment = False
- output = []
- alpha = ""
- for c in text:
- if stringMode:
- stringMode = not (c == openChar)
- output.append(c)
- elif lineComment:
- lineComment = c != '\n'
- #output.append(c)
- elif c == '#':
- lineComment = True
- #output.append(c)
- elif c == '"' or c == '\'':
- alpha = ""
- stringMode = True
- openChar = c
- output.append(c)
- elif re.match("[a-zA-Z0-9_]",c) != None:
- alpha += c
- else:
- if len(alpha) > 0 and alpha in self.symbolMap:
- #print("Replacing {} with {}.".format(alpha, self.symbolMap[alpha]))
- output.append(self.symbolMap[alpha])
- alpha = ""
- elif len(alpha) > 0:
- #print("Reinserting {}".format(alpha))
- output.append(alpha)
- alpha = ""
- output.append(c)
- output = "".join(output)
- return "".join([l for l in output.splitlines() if len(l.strip()) > 0])
- def cleanFunc (self,line):
- match = FUNC.search(line)
- varID = match.group(1)
- self.symbolMap[varID] = "f{}".format(self.funcCount)
- self.funcCount += 1
- def cleanFuncParam (self,line):
- match = FUNC_PARAM.findall(line)[0]
- if len(match.strip()) <= 0:
- return
- ids = match.split(",")
- for i in range(len(ids)):
- varID = ids[i].strip()
- if varID not in self.symbolMap:
- self.symbolMap[varID] = "v{}".format(self.varCount)
- self.varCount += 1
- def cleanVar (self,line):
- match = re.findall(VAR, line,re.M)
- ids = match
- for i in range(len(ids)):
- varID = ids[i].strip()
- if varID not in self.symbolMap:
- self.symbolMap[varID] = "v{}".format(self.varCount)
- self.varCount += 1
|