import sys from math import trunc import os import tempfile import glob from ccleaner import CodeCleaner from pycleaner import CodeCleaner as PyCleaner sys.path.extend(['.', '..']) import re import subprocess import numpy as np USEFUL_REGEX = r"(//.*)|(/\*[\w\W\n\r]*?\*/)|(^\s*$)|(\{\s*\})|(^\s*\{\s*$)|(^\s*\}\s*$)" FILE_REGEX = re.compile(r".*\.(py|c)$",re.IGNORECASE) #VAR_NO_INIT = re.compile(r"(int|float|double|char|long|string)\s*\*?([a-zA-Z0-9_]+)\s*(,\s*[a-zA-Z0-9)]+\s*)*;") #VAR_INIT = re.compile(r"(int|float|double|char|long|string)\s*\*?([a-zA-Z0-9_]+)[^=]*=\s*[^,](,\s*[a-zA-Z0-9)][^=]*=\s*[^,])*;") def readSubmissionContent (path, exerciseID, submissionID): filePath = "%s%s/%s/submittedfiles/*.*"%(path, trunc(exerciseID), trunc(submissionID)) files = glob.glob(filePath) files = [f for f in files if FILE_REGEX.match(f) != None] #assert len(files) > 0, "Evaluated code for exercise {}, submission {} doesn't have a single submitted .c file at {}".format(exerciseID, submissionID, filePath) if len(files) == 0: print("Evaluated code for exercise {}, submission {} doesn't have a single submitted .c/.py file at {}".format(exerciseID, submissionID, filePath)) return "" # check type file = files[0] content = open(file, "r").read() try: if file.endswith("py") : #python return minifyPython(content) else: #c code return minifyC(content) except Exception as e: print(e) return "" def minifyC (text): usefulText = CodeCleaner().cleanCode(text) #print(usefulText) out = "" with tempfile.NamedTemporaryFile() as fp: fp.write(usefulText.encode("utf-8")) fp.flush() process = subprocess.run("./cminify.sh {}".format(fp.name),shell=True, capture_output=True, text=True) out = process.stdout fp.close() return out def minifyPython (text): usefulText = PyCleaner().cleanCode(text) return usefulText def getSubmissionFile (path, exerciseID, submissionID): filePath = "%s%s/%s/submittedfiles/*.*"%(path, trunc(exerciseID), trunc(submissionID)) files = glob.glob(filePath) files = [f for f in files if FILE_REGEX.match(f) != None] assert len(files) > 0, "Evaluated code for exercise {}, submission {} doesn't have a single submitted .c file at {}".format(exerciseID, submissionID, filePath) return files[0] def getUsefulLines (path, exerciseID, submissionID): subFile = getSubmissionFile(path, exerciseID, submissionID) usefulLines = 0 with open(subFile, "r") as fileHandler: content = fileHandler.read() fileHandler.close() usefulText = re.sub(USEFUL_REGEX, "", content, flags=re.MULTILINE) usefulText = os.linesep.join([s for s in usefulText.splitlines() if s]) usefulLines = usefulText.count("\n") return usefulLines def levenshteinDistance (seq1, seq2): size_x = len(seq1) + 1 size_y = len(seq2) + 1 matrix = np.zeros ((size_x, size_y)) for x in range(size_x): matrix [x, 0] = x for y in range(size_y): matrix [0, y] = y for x in range(1, size_x): for y in range(1, size_y): if seq1[x-1] == seq2[y-1]: matrix [x,y] = min( matrix[x-1, y] + 1, matrix[x-1, y-1], matrix[x, y-1] + 1 ) else: matrix [x,y] = min( matrix[x-1,y] + 1, matrix[x-1,y-1] + 1, matrix[x,y-1] + 1 ) return (matrix[size_x - 1, size_y - 1])