il y a 3 ans · d472cddc6f
--- a/analyser.py
+++ b/analyser.py
@@ -19,6 +19,9 @@ VALID_OPS = list(['*', '-', '/', '+', '%', '++', '--', 'p--', 'p++'])
 
				 VALID_OPS.extend(VALID_LOGIC_OPS)
			
 
				 VALID_OPS.extend(VALID_REL_OPS)
			
 
				 
			
 
				+PRINT_COUNT = 0
			
 
				+SCAN_COUNT = 0
			
 
				+
			
 
				 def normalizeType(type):
			
 
				   if type in INT_TYPES:
			
 
				     return 'int'
			
@@ -35,13 +38,13 @@ class DeclarationInfo:
 
				 
			
 
				   def isVector (self) :
			
 
				     return self.dimensions == 1
			
 
				-  
			
 
				+
			
 
				   def isMatrix (self):
			
 
				     return self.dimensions == 2
			
 
				-  
			
 
				+
			
 
				   def isMultiDimension (self):
			
 
				     return self.dimensions > 2
			
 
				-  
			
 
				+
			
 
				   def isArray (self):
			
 
				     return self.dimensions > 0
			
 
				 
			
@@ -52,7 +55,7 @@ class CommandInfo:
 
				     self.numLogicOps = numLogicOps
			
 
				     self.numRelOps = numRelOps
			
 
				     self.opList = opList
			
 
				-  
			
 
				+
			
 
				   def __str__(self):
			
 
				     return "Type:%s LogicOpCount:%d RelOpCount:%d OpList:%s"%( self.condType, self.numLogicOps, self.numRelOps, self.opList)
			
 
				 
			
@@ -61,7 +64,7 @@ class ForCommandInfo (CommandInfo):
 
				     CommandInfo.__init__(self, cmdCount, condType, numLogicOps, numRelOps, opList)
			
 
				     self.useAssignment = useAssignment
			
 
				     self.useNext = useNext
			
 
				-  
			
 
				+
			
 
				   def __str__(self):
			
 
				     return "hasInit:%s hasNext:%s Type:%s LogicOpCount:%d RelOpCount:%d OpList:%s"%(self.useAssignment, self.useNext, self.condType, self.numLogicOps, self.numRelOps, self.opList)
			
 
				 
			
@@ -87,10 +90,10 @@ class ASTAnalyser:
 
				     self.declarationsPointers = dict()
			
 
				     self.declarationsVectors = dict()
			
 
				     self.declarationsMatrixes = dict()
			
 
				-  
			
 
				+
			
 
				   def conditionCommandStr (self) :
			
 
				     return [ s.__str__() for s in self.conditionCommandData]
			
 
				-  
			
 
				+
			
 
				   def forCommandStr (self) :
			
 
				     return [ s.__str__() for s in self.forCommandData]
			
 
				 
			
@@ -139,7 +142,7 @@ class ASTAnalyser:
 
				         self.declarations[type] += 1
			
 
				       else:
			
 
				         self.declarations[type] = 1
			
 
				-  
			
 
				+
			
 
				   def proccessDecl (self, node):
			
 
				     type = node.type
			
 
				     dimensions = 0
			
@@ -160,7 +163,7 @@ class ASTAnalyser:
 
				           self.constantInitCount[init.value] += 1
			
 
				         else:
			
 
				           self.constantInitCount[init.value] = 1
			
 
				-  
			
 
				+
			
 
				   def proccessFuncDef (self, node):
			
 
				     name = node.__class__.__name__
			
 
				     self.incCmdCount(name)
			
@@ -169,24 +172,29 @@ class ASTAnalyser:
 
				       self.proccessCommand(cmd)
			
 
				 
			
 
				   def proccessFuncCall (self, node):
			
 
				+    global PRINT_COUNT, SCAN_COUNT
			
 
				     name = node.__class__.__name__
			
 
				+    if node.name.name == "printf":
			
 
				+      PRINT_COUNT = PRINT_COUNT + 1
			
 
				+    elif node.name.name == "scanf":
			
 
				+      SCAN_COUNT = SCAN_COUNT + 1
			
 
				     self.incCmdCount(name)
			
 
				     epxrs = node.args.exprs
			
 
				     for e in epxrs:
			
 
				       self.countOperators(e)
			
 
				-    
			
 
				+
			
 
				   def proccessAssignment (self, node):
			
 
				     name = node.__class__.__name__
			
 
				     self.incCmdCount(name)
			
 
				     epxr = node.rvalue
			
 
				     self.countOperators(epxr)
			
 
				-  
			
 
				+
			
 
				   def proccessReturn (self, node):
			
 
				     name = node.__class__.__name__
			
 
				     self.incCmdCount(name)
			
 
				     epxr = node.expr
			
 
				     self.countOperators(epxr)
			
 
				-  
			
 
				+
			
 
				   def proccessSwitch (self, node):
			
 
				     name = node.__class__.__name__
			
 
				     self.incCmdCount(name)
			
@@ -195,7 +203,7 @@ class ASTAnalyser:
 
				     cmdList = node.stmt.block_items
			
 
				     for cmd in cmdList:
			
 
				       self.proccessCommand(cmd)
			
 
				-  
			
 
				+
			
 
				   def proccessDoWhile (self, name, node):
			
 
				     self.incCmdCount(name)
			
 
				     epxr = node.cond
			
@@ -216,7 +224,7 @@ class ASTAnalyser:
 
				     else:
			
 
				       self.proccessCommand(cmdList)
			
 
				     self.conditionCommandData.append(CommandInfo(self.cmdCountStack.pop(), condType,logicCount, relCount, opList))
			
 
				-  
			
 
				+
			
 
				   def proccessFor (self, node):
			
 
				     name = node.__class__.__name__
			
 
				     self.incCmdCount(name)
			
@@ -233,7 +241,7 @@ class ASTAnalyser:
 
				     hasInit = node.init.__class__.__name__ != 'NoneType'
			
 
				     if hasInit:
			
 
				       self.proccessCommand(node.init)
			
 
				-    
			
 
				+
			
 
				     hasNext = node.next.__class__.__name__ != 'NoneType'
			
 
				     if hasNext:
			
 
				       self.proccessCommand(node.next)
			
@@ -249,7 +257,7 @@ class ASTAnalyser:
 
				     elif name != 'NoneType':
			
 
				       self.proccessCommand(cmdList)
			
 
				     self.forCommandData.append(ForCommandInfo(hasInit, hasNext, self.cmdCountStack.pop(), condType, logicCount, relCount, opList))
			
 
				-  
			
 
				+
			
 
				   def proccessIf (self, node):
			
 
				     name = node.__class__.__name__
			
 
				     self.incCmdCount(name)
			
@@ -271,17 +279,21 @@ class ASTAnalyser:
 
				         self.proccessCommand(cmd)
			
 
				     else:
			
 
				       self.proccessCommand(iftrue)
			
 
				-
			
 
				     iffalse = node.iffalse
			
 
				     ifCompound = iffalse.__class__.__name__
			
 
				     if ifCompound == 'Compound':
			
 
				+      # TODO contar else's
			
 
				+      self.cmdCountStack.append(0)
			
 
				+      self.incCmdCount('Else')
			
 
				       cmdList = iffalse.block_items
			
 
				       for cmd in cmdList:
			
 
				         self.proccessCommand(cmd)
			
 
				-    elif name != 'NoneType':
			
 
				+    elif iffalse != None and name != 'NoneType':
			
 
				+      self.cmdCountStack.append(0)
			
 
				+      self.incCmdCount('Else')
			
 
				       self.proccessCommand(iffalse)
			
 
				     self.conditionCommandData.append(CommandInfo(self.cmdCountStack.pop(), condType, logicCount, relCount, opList))
			
 
				-  
			
 
				+
			
 
				   def proccessCase (self, node):
			
 
				     try:
			
 
				       epxr = node.expr
			
@@ -335,7 +347,7 @@ class ASTAnalyser:
 
				       self.commandCount[cmd] += 1
			
 
				     else:
			
 
				       self.commandCount[cmd] = 1
			
 
				-  
			
 
				+
			
 
				   def checkCondType (self, expr):
			
 
				     name = expr.__class__.__name__
			
 
				     if name == 'BinaryOp':
			
@@ -385,4 +397,4 @@ class ASTAnalyser:
 
				       self.proccessFor(node)
			
 
				     else:
			
 
				       self.cmdCountStack[-1] -= 1
			
 
				-      #print("Unknown Command: %s" % name )
			
 
				+      #print("Unknown Command: %s" % name )
			
--- a/ccleaner.py
+++ b/ccleaner.py
@@ -0,0 +1,139 @@
 
				+import re
			
 
				+
			
 
				+VAR = r"(?:int|float|double|char|long|string)\s*\*?\s+([^;\(\)]+);$"
			
 
				+FUNC = re.compile(r"(?:int|float|double|char|long|string|void)\s*\*?\s+([a-zA-Z0-9_]+)\s*\([^)]*\)\s*(?:{|;)")
			
 
				+FUNC_PARAM = re.compile(r"\(\s*(?:int|float|double|char|long|string)\s*\*?\s+([^;\(\)]+)\)\s*(?:{|;)")
			
 
				+PARAM_DECL = re.compile(r"(?:int|float|double|char|long|string)\s*\*?\s+([a-zA-Z0-9_]+)")
			
 
				+
			
 
				+class CodeCleaner:
			
 
				+    def __init__ (self):
			
 
				+        self.varCount = 0
			
 
				+        self.funcCount = 0
			
 
				+        self.symbolMap = {}
			
 
				+
			
 
				+#    def cleanCode (self,text):
			
 
				+#        lines = text.splitlines()
			
 
				+#        for line in lines:
			
 
				+#            if FUNC.search(line) != None:
			
 
				+#                self.cleanFunc(line)
			
 
				+#                if FUNC_PARAM.search(line) != None:
			
 
				+#                    self.cleanFuncParam(line)
			
 
				+#            elif VAR.search(line) != None:
			
 
				+#                self.cleanVar(line)
			
 
				+#        regex = {}
			
 
				+#        for k in self.symbolMap:
			
 
				+#            regex[k] = re.compile(r"[^\"](\W|^)({})(\W)".format(k))
			
 
				+#        finalText = []
			
 
				+#        for line in lines:
			
 
				+#            for k in regex:
			
 
				+#                line = regex[k].sub(r"\1{}\3".format(self.symbolMap[k]), line)
			
 
				+#            finalText.append(line)
			
 
				+#
			
 
				+#        return "\n".join(finalText)
			
 
				+    def cleanCode (self,text):
			
 
				+        funcs = FUNC.finditer(text)
			
 
				+        for _,m in enumerate(funcs, start=1):
			
 
				+            #print("Func delc: {}".format(m.group()))
			
 
				+            if m == None:
			
 
				+                continue
			
 
				+            self.cleanFunc(m.group())
			
 
				+            params = FUNC_PARAM.finditer(m.group())
			
 
				+            #print("p",list(params))
			
 
				+            for _, p in enumerate(params, start=1):
			
 
				+                if p == None:
			
 
				+                    continue
			
 
				+                #print("Func param: {}".format(p.group()))
			
 
				+                self.cleanFuncParam(p.group())
			
 
				+        nvars = re.finditer(VAR,text,re.MULTILINE)
			
 
				+        for _, m in enumerate(nvars, start=1):
			
 
				+            if m == None:
			
 
				+                continue
			
 
				+            #print("vars: {}".format(m.group()))
			
 
				+            self.cleanVar(m.group())
			
 
				+        #print(self.symbolMap)
			
 
				+        stringMode = False
			
 
				+        lineComment = False
			
 
				+        blockComment = False
			
 
				+        lastChar = ''
			
 
				+        output = []
			
 
				+        alpha = ""
			
 
				+        for c in text:
			
 
				+            if stringMode:
			
 
				+                stringMode = not (c == '"')
			
 
				+                output.append(c)
			
 
				+            elif lineComment:
			
 
				+                lineComment = c != '\n'
			
 
				+                output.append(c)
			
 
				+            elif blockComment:
			
 
				+                blockComment = (lastChar+c) != '*/'
			
 
				+                output.append(c)
			
 
				+            elif c == '/':
			
 
				+                if lastChar == '/':
			
 
				+                    lineComment = True
			
 
				+                output.append(c)
			
 
				+            elif c == '*':
			
 
				+                if lastChar == '/':
			
 
				+                    blockComment = True
			
 
				+                output.append(c)
			
 
				+            elif c == '"':
			
 
				+                alpha = ""
			
 
				+                stringMode = True
			
 
				+                output.append(c)
			
 
				+            elif re.match("[a-zA-Z0-9_]",c) != None:
			
 
				+                alpha += c
			
 
				+            else:
			
 
				+                if len(alpha) > 0 and alpha in self.symbolMap:
			
 
				+                    #print("Replacing {} with {}.".format(alpha, self.symbolMap[alpha]))
			
 
				+                    output.append(self.symbolMap[alpha])
			
 
				+                    alpha = ""
			
 
				+                elif len(alpha) > 0:
			
 
				+                    #print("Reinserting {}".format(alpha))
			
 
				+                    output.append(alpha)
			
 
				+                    alpha = ""
			
 
				+                output.append(c)
			
 
				+            lastChar = c
			
 
				+
			
 
				+        return "".join(output)
			
 
				+
			
 
				+    def cleanFunc (self,line):
			
 
				+        match = FUNC.search(line)
			
 
				+        if match == None:
			
 
				+            print(line)
			
 
				+        varID = match.group(1)
			
 
				+        self.symbolMap[varID] = "f{}".format(self.funcCount)
			
 
				+        self.funcCount += 1
			
 
				+
			
 
				+    def cleanFuncParam (self,line):
			
 
				+        match = FUNC_PARAM.findall(line)[0]
			
 
				+        ids = match.split(",")
			
 
				+        for i in range(len(ids)):
			
 
				+            varID = ids[i].strip()
			
 
				+            if varID.find(" ") == -1:
			
 
				+                varID = re.search("[a-zA-Z_][a-zA-Z0-9_]*",varID).group()
			
 
				+                if varID not in self.symbolMap:
			
 
				+                    self.symbolMap[varID] = "v{}".format(self.varCount)
			
 
				+                    self.varCount += 1
			
 
				+            else:
			
 
				+                varID = varID.split(" ")[1].strip()
			
 
				+                isMatch = re.search("[a-zA-Z_][a-zA-Z0-9_]*",varID)
			
 
				+                if isMatch == None:
			
 
				+                    print(match, ids[i])
			
 
				+                    continue
			
 
				+                if varID not in self.symbolMap:
			
 
				+                    self.symbolMap[varID] = "v{}".format(self.varCount)
			
 
				+                    self.varCount += 1
			
 
				+
			
 
				+    def cleanVar (self,line):
			
 
				+        match = re.findall(VAR, line)[0]
			
 
				+        ids = match.split(",")
			
 
				+        for i in range(len(ids)):
			
 
				+            varID = ids[i].strip()
			
 
				+            if varID.find("=") != -1:
			
 
				+                varID = varID.split("=")[0].strip()
			
 
				+            isMatch = re.search("[a-zA-Z_][a-zA-Z0-9_]*",varID)
			
 
				+            if isMatch == None:
			
 
				+                continue
			
 
				+            varID = isMatch.group()
			
 
				+            if varID not in self.symbolMap:
			
 
				+                self.symbolMap[varID] = "v{}".format(self.varCount)
			
 
				+                self.varCount += 1
			
--- a/cjson.py
+++ b/cjson.py
@@ -1,11 +1,9 @@
 
				-from __future__ import print_function
			
 
				-
			
 
				 import sys
			
 
				 import os
			
 
				 sys.path.extend(['.', '..'])
			
 
				 from pycparser import parse_file, c_ast
			
 
				 
			
 
				-from Queue import Queue
			
 
				+import queue
			
 
				 from threading import Thread
			
 
				 
			
 
				 import re
			
@@ -13,6 +11,7 @@ import copy
 
				 
			
 
				 import files
			
 
				 import analyser
			
 
				+from submissionFileReader import getSubmissionFile
			
 
				 
			
 
				 COMMENT_REGEX = r"(//.*)|(/\*[\w\W\n\r]*?\*/)"
			
 
				 USEFUL_REGEX = r"(//.*)|(/\*[\w\W\n\r]*?\*/)|(^\s*$)|(\{\s*\})|(^\s*\{\s*$)|(^\s*\}\s*$)"
			
@@ -40,7 +39,7 @@ class Worker (Thread):
 
				         self.tasks = tasks
			
 
				         self.daemon = True
			
 
				         self.start()
			
 
				-    
			
 
				+
			
 
				     def run (self):
			
 
				         while True:
			
 
				             func, args, kargs = self.tasks.get()
			
@@ -52,7 +51,7 @@ class Worker (Thread):
 
				 class ThreadPool:
			
 
				     """Pool of threads consuming tasks from a queue"""
			
 
				     def __init__ (self, num_threads):
			
 
				-        self.tasks = Queue(num_threads)
			
 
				+        self.tasks = queue.Queue(num_threads)
			
 
				         for _ in range(num_threads): Worker(self.tasks)
			
 
				 
			
 
				     def add_task (self, func, *args, **kargs):
			
@@ -131,13 +130,103 @@ def saveToFile (filePath, data):
 
				     file.write(data)
			
 
				     file.close()
			
 
				 
			
 
				+def processDataFromCSV (parser, vplFolder):
			
 
				+    assingments = parser.exercises
			
 
				+    data = {}
			
 
				+    for e in assingments:
			
 
				+        (allSubs, students) = parser.getSubmissions(e)
			
 
				+        studentData = []
			
 
				+        for student in students:
			
 
				+            submissions = parser.getStudentValidSubmissions(allSubs, student)
			
 
				+            try:
			
 
				+                assert len(submissions) > 0
			
 
				+                submissions.sort(key = lambda x : x.submission_id)
			
 
				+                lastSub = submissions[-1]
			
 
				+                content = getSubmissionFile(vplFolder, e, lastSub.submission_id)
			
 
				+                studentData.append((student, content, len(submissions)))
			
 
				+            except Exception:
			
 
				+                studentData.append((student, "", len(submissions)))
			
 
				+        data[e] = studentData[:]
			
 
				+
			
 
				+    pool = ThreadPool(10)
			
 
				+    for a in data:
			
 
				+        for studentData in data[a]:
			
 
				+            pool.add_task(processStudentData, studentData, a)
			
 
				+    pool.wait_completion()
			
 
				+    mainCSVFile = ""
			
 
				+    forCSVFile = ""
			
 
				+    condCSVFile = ""
			
 
				+    assignmentList = dict()
			
 
				+    constantInitCount = dict()
			
 
				+    for data in finalDataList:
			
 
				+        if data[0].assignment in assignmentList:
			
 
				+            assignmentList[data[0].assignment].append(data)
			
 
				+        else:
			
 
				+            assignmentList[data[0].assignment] = list()
			
 
				+            assignmentList[data[0].assignment].append(data)
			
 
				+    for assignmentKey in assignmentList:
			
 
				+        for studentData in assignmentList[assignmentKey]:
			
 
				+            astInfo = studentData[0]
			
 
				+            for k in astInfo.constantInitCount:
			
 
				+                if k in constantInitCount:
			
 
				+                    constantInitCount[k] += astInfo.constantInitCount[k]
			
 
				+                else:
			
 
				+                    constantInitCount[k] = astInfo.constantInitCount[k]
			
 
				+            studentOpData = initEmptyDict(analyser.VALID_OPS)
			
 
				+            for key in astInfo.operatorsCount:
			
 
				+                studentOpData[key] = astInfo.operatorsCount[key]
			
 
				+            studentCommandData = initEmptyDict(COMMANDS)
			
 
				+            for key in astInfo.commandCount:
			
 
				+                studentCommandData[key] = astInfo.commandCount[key]
			
 
				+            studentDeclarationData = initEmptyDict(DECLARATIONS)
			
 
				+            for key in astInfo.declarations:
			
 
				+                studentDeclarationData[key] = astInfo.declarations[key]
			
 
				+            for key in astInfo.declarationsPointers:
			
 
				+                studentDeclarationData["pointer_" + key] = astInfo.declarationsPointers[key]
			
 
				+            for key in astInfo.declarationsVectors:
			
 
				+                studentDeclarationData["vector_" + key] = astInfo.declarationsVectors[key]
			
 
				+            for key in astInfo.declarationsMatrixes:
			
 
				+                studentDeclarationData["matrix_" + key] = astInfo.declarationsMatrixes[key]
			
 
				+            mainCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, studentData[1], studentData[4], studentData[2], studentData[3])
			
 
				+            mainCSVFile += "," + ','.join([str(v) for v in studentOpData.values()])
			
 
				+            mainCSVFile += "," + ",".join([str(v) for v in studentCommandData.values()])
			
 
				+            mainCSVFile += "," + ",".join([str(v) for v in studentDeclarationData.values()])
			
 
				+            mainCSVFile += "\n"
			
 
				+            #For_structure.csv
			
 
				+            for i in astInfo.forCommandData:
			
 
				+                forCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, i.cmdCount, i.condType, i.numLogicOps, i.numRelOps)
			
 
				+                opData = initEmptyDict(analyser.VALID_OPS)
			
 
				+                for op in i.opList:
			
 
				+                    opData[op] += 1
			
 
				+                forCSVFile += "," + ','.join([str(v) for v in opData.values()])
			
 
				+                forCSVFile += ",%s,%s\n" % (i.useAssignment, i.useNext)
			
 
				+            #condition_structure.csv
			
 
				+            for i in astInfo.conditionCommandData:
			
 
				+                condCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, i.cmdCount, i.condType, i.numLogicOps, i.numRelOps)
			
 
				+                opData = initEmptyDict(analyser.VALID_OPS)
			
 
				+                for op in i.opList:
			
 
				+                    opData[op] += 1
			
 
				+                condCSVFile += "," + ','.join([str(v) for v in opData.values()])
			
 
				+                condCSVFile += "\n"
			
 
				+    mainCSVFile = ','.join(CSV_HEADER) + '\n' + mainCSVFile
			
 
				+    saveToFile("data.csv", mainCSVFile)
			
 
				+    forCSVFile = ','.join(FOR_CSV_HEADER) + '\n' + forCSVFile
			
 
				+    saveToFile("for_structure.csv", forCSVFile)
			
 
				+    condCSVFile = ','.join(COND_CSV_HEADER) + '\n' + condCSVFile
			
 
				+    saveToFile("cond_structure.csv", condCSVFile)
			
 
				+    constantInitFile = "constant,count\n"
			
 
				+    for k in constantInitCount:
			
 
				+        constantInitFile += "%s,%s\n" % (k, str(constantInitCount[k]))
			
 
				+    saveToFile("const_init.csv", constantInitFile)
			
 
				+    print("Entrada: {}, Saida: {}".format(analyser.PRINT_COUNT,analyser.SCAN_COUNT))
			
 
				+
			
 
				 #--- run ---#
			
 
				 if __name__ == "__main__":
			
 
				     if len(sys.argv) > 1:
			
 
				         file = sys.argv[1]
			
 
				         if file == "-f" and len(sys.argv) > 2:
			
 
				             print(processFile(sys.argv[2]))
			
 
				-        elif file != "-f":    
			
 
				+        elif file != "-f":
			
 
				             raiz = "./" + sys.argv[1]
			
 
				             data = loadAssignments(raiz)
			
 
				             pool = ThreadPool(10)
			
@@ -209,8 +298,8 @@ if __name__ == "__main__":
 
				             constantInitFile = "constant,count\n"
			
 
				             for k in constantInitCount:
			
 
				                 constantInitFile += "%s,%s\n" % (k, str(constantInitCount[k]))
			
 
				-            saveToFile("const_init.csv", constantInitFile) 
			
 
				+            saveToFile("const_init.csv", constantInitFile)
			
 
				         else:
			
 
				-            print("cjson -f file | cjon folder/")    
			
 
				+            print("cjson -f file | cjon folder/")
			
 
				     else:
			
 
				         print("cjson -f file | cjon folder/")
			
--- a/cminify.sh
+++ b/cminify.sh
@@ -0,0 +1,11 @@
 
				+#!/bin/bash
			
 
				+if [[ -z $1 ]]
			
 
				+then
			
 
				+  echo "No param provided!"
			
 
				+  exit 1
			
 
				+fi
			
 
				+sed -rb 's/ {6}//g' $1 |
			
 
				+sed -rb 's/\/\/.*$//g' |
			
 
				+tr -d '\n' |
			
 
				+sed -rb 's/\/\*.*\*\///g' |
			
 
				+sed -rb 's/(#include.*>)/\1\n/g'
			
--- a/csvParser.py
+++ b/csvParser.py
@@ -0,0 +1,60 @@
 
				+import pandas as pd
			
 
				+from dateutil.tz import tzoffset
			
 
				+from dateutil.parser import parse
			
 
				+from dateutil.utils import default_tzinfo
			
 
				+
			
 
				+class CSVParser:
			
 
				+
			
 
				+    def __init__ (self, path):
			
 
				+        self.df = pd.read_csv(path)
			
 
				+        tz = tzoffset(name="saw", offset=-10800)
			
 
				+        self.df['time'] = self.df['time'].transform(lambda x: default_tzinfo(parse(x), tz).timestamp())
			
 
				+        self.exercises = self.df['exercise_id'].unique().tolist()
			
 
				+        self.exercises.sort()
			
 
				+
			
 
				+    def getSubmissions (self, exercise_id):
			
 
				+        dataFrame:pd.DataFrame = self.df
			
 
				+        submissionData = dataFrame[dataFrame['exercise_id'] == exercise_id]
			
 
				+        if 0.01 in submissionData['grade'].values :
			
 
				+            submissionData['grade'] = submissionData['grade'].transform(lambda x: x*10 if x != 1 and not pd.isna(x) else x)
			
 
				+        students = submissionData['user_id'].unique().tolist()
			
 
				+        students.sort()
			
 
				+        return (submissionData, students)
			
 
				+
			
 
				+    def getStudentFirstInteraction (self, submissionData, studentID):
			
 
				+        # filter and sort user_id submission and turn them in tuples
			
 
				+        subTuple = submissionData[submissionData['user_id'] == studentID].sort_values(by='time').itertuples(index=False, name="Submission")
			
 
				+        subList = list(subTuple)
			
 
				+        size = len(subList)
			
 
				+        for i in range(size):
			
 
				+            if subList[i].action == 'uploaded_submission':
			
 
				+                foundPos = self._backtrackViewDescription(subList, i)
			
 
				+                if foundPos >= 0:
			
 
				+                    return subList[foundPos].time
			
 
				+                else:
			
 
				+                    return -1
			
 
				+        print("firt intereaction not found: %i"%size)
			
 
				+        return -1
			
 
				+
			
 
				+    def getStudentValidSubmissions (self, submissionData, studentID):
			
 
				+        # filter and sort user_id submission and turn them in tuples
			
 
				+        filteredDF = submissionData[(submissionData['user_id'] == studentID) & (submissionData['action'] == 'uploaded_submission')].dropna().sort_values(by='time')
			
 
				+        filteredDF = filteredDF.drop_duplicates(subset="submission_id")
			
 
				+        subList = list(filteredDF.itertuples(index=False, name="Submission"))
			
 
				+        return subList
			
 
				+
			
 
				+    def getStudentLastSubmission (self, submissionData, studentID):
			
 
				+        filteredDF = submissionData[(submissionData['user_id'] == studentID) & (submissionData['action'] == 'uploaded_submission')].dropna().sort_values(by='submission_id')
			
 
				+        filteredDF = filteredDF.drop_duplicates(subset="submission_id")
			
 
				+        subList = list(filteredDF.itertuples(index=False, name="Submission"))
			
 
				+        return subList
			
 
				+
			
 
				+    def _backtrackViewDescription (self, subList, start_pos):
			
 
				+        i = start_pos - 1
			
 
				+        while (i >= 0):
			
 
				+            if subList[i].action == 'view_description':
			
 
				+                return i
			
 
				+            i = i - 1
			
 
				+        print("Backtrack view not found")
			
 
				+        return -1
			
 
				+
			
--- a/main.py
+++ b/main.py
@@ -0,0 +1,14 @@
 
				+from csvParser import CSVParser
			
 
				+from cjson import processDataFromCSV
			
 
				+import sys
			
 
				+
			
 
				+
			
 
				+def bootstrap (csvPath, vplFolder):
			
 
				+    parser = CSVParser(csvPath)
			
 
				+    processDataFromCSV(parser, vplFolder)
			
 
				+
			
 
				+
			
 
				+#--- run ---#
			
 
				+if __name__ == "__main__":
			
 
				+    assert len(sys.argv) == 3, "You must provide the following: path to the csv, the folder with vpl data"
			
 
				+    bootstrap(sys.argv[1], sys.argv[2])
			
--- a/pycleaner.py
+++ b/pycleaner.py
@@ -0,0 +1,94 @@
 
				+import re
			
 
				+
			
 
				+VAR = r"([a-zA-Z_][a-zA-Z0-9_]*)\s*=[^\n]+$"
			
 
				+FUNC = re.compile(r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\([^\)]*\)\s*:\s*")
			
 
				+FUNC_PARAM = re.compile(r"\(([^\)]*)\)\s*:\s*")
			
 
				+
			
 
				+class CodeCleaner:
			
 
				+    def __init__ (self):
			
 
				+        self.varCount = 0
			
 
				+        self.funcCount = 0
			
 
				+        self.symbolMap = {}
			
 
				+
			
 
				+    def cleanCode (self,text):
			
 
				+        funcs = FUNC.finditer(text)
			
 
				+        for _,m in enumerate(funcs, start=1):
			
 
				+            #print("Func delc: {}".format(m.group()))
			
 
				+            if m == None:
			
 
				+                continue
			
 
				+            self.cleanFunc(m.group())
			
 
				+            params = FUNC_PARAM.finditer(m.group())
			
 
				+            #print("p",list(params))
			
 
				+            for _, p in enumerate(params, start=1):
			
 
				+                if p == None:
			
 
				+                    continue
			
 
				+                #print("Func param: {}".format(p.group()))
			
 
				+                self.cleanFuncParam(p.group())
			
 
				+        nvars = re.finditer(VAR,text,re.MULTILINE)
			
 
				+        for _, m in enumerate(nvars, start=1):
			
 
				+            if m == None:
			
 
				+                continue
			
 
				+            #print("vars: {}".format(m.group()))
			
 
				+            self.cleanVar(m.group())
			
 
				+        #print(self.symbolMap)
			
 
				+        stringMode = False
			
 
				+        openChar = None
			
 
				+        lineComment = False
			
 
				+        output = []
			
 
				+        alpha = ""
			
 
				+        for c in text:
			
 
				+            if stringMode:
			
 
				+                stringMode = not (c == openChar)
			
 
				+                output.append(c)
			
 
				+            elif lineComment:
			
 
				+                lineComment = c != '\n'
			
 
				+                #output.append(c)
			
 
				+            elif c == '#':
			
 
				+                lineComment = True
			
 
				+                #output.append(c)
			
 
				+            elif c == '"' or c == '\'':
			
 
				+                alpha = ""
			
 
				+                stringMode = True
			
 
				+                openChar = c
			
 
				+                output.append(c)
			
 
				+            elif re.match("[a-zA-Z0-9_]",c) != None:
			
 
				+                alpha += c
			
 
				+            else:
			
 
				+                if len(alpha) > 0 and alpha in self.symbolMap:
			
 
				+                    #print("Replacing {} with {}.".format(alpha, self.symbolMap[alpha]))
			
 
				+                    output.append(self.symbolMap[alpha])
			
 
				+                    alpha = ""
			
 
				+                elif len(alpha) > 0:
			
 
				+                    #print("Reinserting {}".format(alpha))
			
 
				+                    output.append(alpha)
			
 
				+                    alpha = ""
			
 
				+                output.append(c)
			
 
				+
			
 
				+        output = "".join(output)
			
 
				+        return "".join([l for l in output.splitlines() if len(l.strip()) > 0])
			
 
				+
			
 
				+    def cleanFunc (self,line):
			
 
				+        match = FUNC.search(line)
			
 
				+        varID = match.group(1)
			
 
				+        self.symbolMap[varID] = "f{}".format(self.funcCount)
			
 
				+        self.funcCount += 1
			
 
				+
			
 
				+    def cleanFuncParam (self,line):
			
 
				+        match = FUNC_PARAM.findall(line)[0]
			
 
				+        if len(match.strip()) <= 0:
			
 
				+            return
			
 
				+        ids = match.split(",")
			
 
				+        for i in range(len(ids)):
			
 
				+            varID = ids[i].strip()
			
 
				+            if varID not in self.symbolMap:
			
 
				+                self.symbolMap[varID] = "v{}".format(self.varCount)
			
 
				+                self.varCount += 1
			
 
				+
			
 
				+    def cleanVar (self,line):
			
 
				+        match = re.findall(VAR, line,re.M)
			
 
				+        ids = match
			
 
				+        for i in range(len(ids)):
			
 
				+            varID = ids[i].strip()
			
 
				+            if varID not in self.symbolMap:
			
 
				+                self.symbolMap[varID] = "v{}".format(self.varCount)
			
 
				+                self.varCount += 1
			
--- a/submissionAnalysis.py
+++ b/submissionAnalysis.py
@@ -0,0 +1,107 @@
 
				+from submissionFileReader import readSubmissionContent, levenshteinDistance
			
 
				+from collections import namedtuple
			
 
				+
			
 
				+
			
 
				+class SubmissionAnalysis:
			
 
				+
			
 
				+    def __init__ (self, exerciseID):
			
 
				+        self.exerciseID = exerciseID
			
 
				+        self.data = list()
			
 
				+        self.SubmissionData = namedtuple("SubmissionData", "student_id f{0}_TES f{0}_DES f{0}_grade f{0}_DT f{0}_timestamp".format(exerciseID))
			
 
				+        headerMap = {}
			
 
				+        headerMap["f{0}_TES".format(exerciseID)] = "{0}_TES".format(exerciseID)
			
 
				+        headerMap["f{0}_DES".format(exerciseID)] = "{0}_DES".format(exerciseID)
			
 
				+        headerMap["f{0}_grade".format(exerciseID)] = "{0}_grade".format(exerciseID)
			
 
				+        headerMap["f{0}_DT".format(exerciseID)] = "{0}_D/T".format(exerciseID)
			
 
				+        headerMap["student_id"] = "student_id"
			
 
				+        headerMap["f{0}_timestamp".format(exerciseID)] = "{0}_timestamp".format(exerciseID)
			
 
				+        self.headerMap = headerMap
			
 
				+
			
 
				+    def analyze (self, submissions, firstTimestamp, path):
			
 
				+        submissions = self.fixTimestamps(submissions)
			
 
				+        resultList = []
			
 
				+        submissionsWithFiles = []
			
 
				+        for s in submissions:
			
 
				+            content = readSubmissionContent(path, self.exerciseID, s.submission_id)
			
 
				+            if len(content) > 0:
			
 
				+                submissionsWithFiles.append((s,content))
			
 
				+        assert len(submissionsWithFiles) >= 1, "No valid code submitted to exercise {} from student {}".format(self.exerciseID,submissions[0].user_id)
			
 
				+        total = len(submissionsWithFiles)
			
 
				+        firstTuple = submissionsWithFiles[0]
			
 
				+        first = firstTuple[0]
			
 
				+        firstTES = first.time - firstTimestamp
			
 
				+        firstContent = firstTuple[1]
			
 
				+        firstDES = levenshteinDistance("", firstContent)
			
 
				+        firstDT = 0 if firstTES == 0 or firstDES == 0 else firstDES/firstTES
			
 
				+        resultList.append(self.SubmissionData(first.user_id, firstTES, firstDES, first.grade, firstDT, first.time))
			
 
				+        for i in range(1, total):
			
 
				+            subTuple = submissionsWithFiles[i]
			
 
				+            sub = subTuple[0]
			
 
				+            tes = sub.time - submissionsWithFiles[i-1][0].time
			
 
				+            subContent = subTuple[1]
			
 
				+            prevContent = submissionsWithFiles[i-1][1]
			
 
				+            des = levenshteinDistance(prevContent,subContent)
			
 
				+            dt = 0 if tes == 0 or des == 0 else des/tes
			
 
				+            resultList.append(self.SubmissionData(sub.user_id, tes, des, sub.grade, dt, sub.time))
			
 
				+        return (self.exerciseID, resultList)
			
 
				+
			
 
				+    def fixTimestamps (self, submissions):
			
 
				+        sameTS = list()
			
 
				+        repeated = 0
			
 
				+        for i in range(1,len(submissions)):
			
 
				+            prev = i - 1
			
 
				+            if submissions[prev].time == submissions[i].time:
			
 
				+                sameTS.append(i)
			
 
				+            else:
			
 
				+                if len(sameTS) > 0:
			
 
				+                    repeated += len(sameTS)
			
 
				+                    self.spreadTSEvenly(sameTS, submissions)
			
 
				+                    sameTS = list()
			
 
				+                continue
			
 
				+        if len(sameTS) > 0:
			
 
				+            repeated += len(sameTS)
			
 
				+            self.spreadTSEvenly(sameTS, submissions)
			
 
				+        # we need to sort
			
 
				+        submissions.sort(key=lambda x: x.time)
			
 
				+        if repeated > 0:
			
 
				+            sub = submissions[0]
			
 
				+            print("{0} repeated {1} TS for exercise {2}".format(sub.user_id,repeated,self.exerciseID))
			
 
				+        return submissions
			
 
				+
			
 
				+    def spreadTSEvenly (self, indexes, submissions):
			
 
				+        print("repeated ts")
			
 
				+        if len(indexes) == 1:
			
 
				+            sub = submissions[indexes[0]]
			
 
				+            submissions[indexes[0]] = sub._replace(time=sub.time + 30)
			
 
				+        else:
			
 
				+            if len(indexes) > 2:
			
 
				+                print("We have a problem...")
			
 
				+            first = indexes[0]
			
 
				+            sub = submissions[first]
			
 
				+            submissions[first] = sub._replace(time=sub.time + 20)
			
 
				+            for i in range(1, len(indexes)):
			
 
				+                current = indexes[i]
			
 
				+                prev = indexes[i-1]
			
 
				+                sub = submissions[current]
			
 
				+                submissions[current] = sub._replace(time=submissions[prev].time + 20)
			
 
				+
			
 
				+
			
 
				+    def addData (self, submissionData):
			
 
				+        self.data.append(submissionData)
			
 
				+
			
 
				+    def saveToCSV (self, folder, dataset):
			
 
				+        with open("{}/{}.csv".format(folder, self.exerciseID),"w", encoding='utf-8') as file:
			
 
				+            fields = self.SubmissionData._fields
			
 
				+            translatedFields = [self.headerMap[x] for x in fields]
			
 
				+            header = ",".join(translatedFields)
			
 
				+            file.write(header)
			
 
				+            file.write('\n')
			
 
				+            for data in dataset:
			
 
				+                lineData = [getattr(data, x) for x in fields]
			
 
				+                line = ",".join(str(e) for e in lineData)
			
 
				+                file.write(line)
			
 
				+                file.write('\n')
			
 
				+            file.close()
			
 
				+
			
 
				+
			
 
				+
			
--- a/submissionFileReader.py
+++ b/submissionFileReader.py
@@ -0,0 +1,98 @@
 
				+import sys
			
 
				+from math import trunc
			
 
				+import os
			
 
				+import tempfile
			
 
				+import glob
			
 
				+from ccleaner import CodeCleaner
			
 
				+from pycleaner import CodeCleaner as PyCleaner
			
 
				+sys.path.extend(['.', '..'])
			
 
				+import re
			
 
				+import subprocess
			
 
				+import numpy as np
			
 
				+
			
 
				+USEFUL_REGEX = r"(//.*)|(/\*[\w\W\n\r]*?\*/)|(^\s*$)|(\{\s*\})|(^\s*\{\s*$)|(^\s*\}\s*$)"
			
 
				+FILE_REGEX = re.compile(r".*\.(py|c)$",re.IGNORECASE)
			
 
				+#VAR_NO_INIT = re.compile(r"(int|float|double|char|long|string)\s*\*?([a-zA-Z0-9_]+)\s*(,\s*[a-zA-Z0-9)]+\s*)*;")
			
 
				+#VAR_INIT = re.compile(r"(int|float|double|char|long|string)\s*\*?([a-zA-Z0-9_]+)[^=]*=\s*[^,](,\s*[a-zA-Z0-9)][^=]*=\s*[^,])*;")
			
 
				+
			
 
				+def readSubmissionContent (path, exerciseID, submissionID):
			
 
				+    filePath = "%s%s/%s/submittedfiles/*.*"%(path, trunc(exerciseID), trunc(submissionID))
			
 
				+    files = glob.glob(filePath)
			
 
				+    files = [f for f in files if FILE_REGEX.match(f) != None]
			
 
				+    #assert len(files) > 0, "Evaluated code for exercise {}, submission {} doesn't have a single submitted .c file at {}".format(exerciseID, submissionID, filePath)
			
 
				+    if len(files) == 0:
			
 
				+        print("Evaluated code for exercise {}, submission {} doesn't have a single submitted .c/.py file at {}".format(exerciseID, submissionID, filePath))
			
 
				+        return ""
			
 
				+    # check type
			
 
				+    file = files[0]
			
 
				+    content = open(file, "r").read()
			
 
				+    try:
			
 
				+        if file.endswith("py") :
			
 
				+            #python
			
 
				+            return minifyPython(content)
			
 
				+        else:
			
 
				+            #c code
			
 
				+            return minifyC(content)
			
 
				+    except Exception as e:
			
 
				+        print(e)
			
 
				+        return ""
			
 
				+
			
 
				+def minifyC (text):
			
 
				+    usefulText = CodeCleaner().cleanCode(text)
			
 
				+    #print(usefulText)
			
 
				+    out = ""
			
 
				+    with tempfile.NamedTemporaryFile() as fp:
			
 
				+        fp.write(usefulText.encode("utf-8"))
			
 
				+        fp.flush()
			
 
				+        process = subprocess.run("./cminify.sh {}".format(fp.name),shell=True, capture_output=True, text=True)
			
 
				+        out = process.stdout
			
 
				+        fp.close()
			
 
				+    return out
			
 
				+
			
 
				+def minifyPython (text):
			
 
				+    usefulText = PyCleaner().cleanCode(text)
			
 
				+    return usefulText
			
 
				+
			
 
				+def getSubmissionFile (path, exerciseID, submissionID):
			
 
				+    filePath = "%s%s/%s/submittedfiles/*.*"%(path, trunc(exerciseID), trunc(submissionID))
			
 
				+    files = glob.glob(filePath)
			
 
				+    files = [f for f in files if FILE_REGEX.match(f) != None]
			
 
				+    assert len(files) > 0, "Evaluated code for exercise {}, submission {} doesn't have a single submitted .c file at {}".format(exerciseID, submissionID, filePath)
			
 
				+    return files[0]
			
 
				+
			
 
				+def getUsefulLines (path, exerciseID, submissionID):
			
 
				+    subFile = getSubmissionFile(path, exerciseID, submissionID)
			
 
				+    usefulLines = 0
			
 
				+    with open(subFile, "r") as fileHandler:
			
 
				+        content = fileHandler.read()
			
 
				+        fileHandler.close()
			
 
				+        usefulText = re.sub(USEFUL_REGEX, "", content, flags=re.MULTILINE)
			
 
				+        usefulText = os.linesep.join([s for s in usefulText.splitlines() if s])
			
 
				+        usefulLines = usefulText.count("\n")
			
 
				+    return usefulLines
			
 
				+
			
 
				+def levenshteinDistance (seq1, seq2):
			
 
				+    size_x = len(seq1) + 1
			
 
				+    size_y = len(seq2) + 1
			
 
				+    matrix = np.zeros ((size_x, size_y))
			
 
				+    for x in range(size_x):
			
 
				+        matrix [x, 0] = x
			
 
				+    for y in range(size_y):
			
 
				+        matrix [0, y] = y
			
 
				+
			
 
				+    for x in range(1, size_x):
			
 
				+        for y in range(1, size_y):
			
 
				+            if seq1[x-1] == seq2[y-1]:
			
 
				+                matrix [x,y] = min(
			
 
				+                    matrix[x-1, y] + 1,
			
 
				+                    matrix[x-1, y-1],
			
 
				+                    matrix[x, y-1] + 1
			
 
				+                )
			
 
				+            else:
			
 
				+                matrix [x,y] = min(
			
 
				+                    matrix[x-1,y] + 1,
			
 
				+                    matrix[x-1,y-1] + 1,
			
 
				+                    matrix[x,y-1] + 1
			
 
				+                )
			
 
				+    return (matrix[size_x - 1, size_y - 1])
			
 
				+
			
--- a/threadPool.py
+++ b/threadPool.py
@@ -0,0 +1,43 @@
 
				+from queue import Queue
			
 
				+from threading import Thread
			
 
				+from joblib import Parallel, delayed
			
 
				+import multiprocessing as mb
			
 
				+import traceback
			
 
				+
			
 
				+def createParallelPool (func, inputs):
			
 
				+   result = Parallel(mb.cpu_count()/2)
			
 
				+   for i in inputs:
			
 
				+       (task, workload) = i
			
 
				+       result(delayed(func)(task, workload))
			
 
				+   return result
			
 
				+
			
 
				+class Worker (Thread):
			
 
				+    """Thread executing tasks from a given tasks queue"""
			
 
				+    def __init__ (self, tasks):
			
 
				+        Thread.__init__(self)
			
 
				+        self.tasks = tasks
			
 
				+        self.daemon = True
			
 
				+        self.start()
			
 
				+
			
 
				+    def run (self):
			
 
				+        while True:
			
 
				+            func, args, kargs = self.tasks.get()
			
 
				+            try: func(*args, **kargs)
			
 
				+            except Exception as ex:
			
 
				+                #traceback.print_exception(type(ex), ex, ex.__traceback__)
			
 
				+                print(type(ex), ex)
			
 
				+            self.tasks.task_done()
			
 
				+
			
 
				+class ThreadPool:
			
 
				+    """Pool of threads consuming tasks from a queue"""
			
 
				+    def __init__ (self, num_threads):
			
 
				+        self.tasks = Queue(num_threads)
			
 
				+        for _ in range(num_threads): Worker(self.tasks)
			
 
				+
			
 
				+    def add_task (self, func, *args, **kargs):
			
 
				+        """Add a task to the queue"""
			
 
				+        self.tasks.put((func, args, kargs))
			
 
				+
			
 
				+    def wait_completion (self):
			
 
				+        """Wait for completion of all the tasks in the queue"""
			
 
				+        self.tasks.join()
			
--- a/vplAnalyzer.py
+++ b/vplAnalyzer.py
@@ -0,0 +1,94 @@
 
				+from csvParser import CSVParser
			
 
				+import sys
			
 
				+import traceback
			
 
				+from submissionAnalysis import SubmissionAnalysis
			
 
				+from submissionFileReader import getUsefulLines
			
 
				+from joblib import Parallel, delayed
			
 
				+
			
 
				+def processSubmission (subAnalisys:SubmissionAnalysis, workload:'tuple[int,list,int,str]'):
			
 
				+    (studentID, submissions, firstTimestamp, path) = workload
			
 
				+    if firstTimestamp < 0 or len(submissions) == 0:
			
 
				+        print("No valid submission for exercise {} from student {}".format(subAnalisys.exerciseID, studentID))
			
 
				+        return None
			
 
				+    try:
			
 
				+        result = subAnalisys.analyze(submissions, firstTimestamp, path)
			
 
				+        return result
			
 
				+    except Exception as e:
			
 
				+        print(e)
			
 
				+        traceback.print_exc()
			
 
				+        return None
			
 
				+
			
 
				+def countUsefulLines (subAnalysis:SubmissionAnalysis, workload):
			
 
				+    (studentID, submissions, path) = workload
			
 
				+    if len(submissions) == 0:
			
 
				+        print("No valid submission for exercise {} from student {}".format(subAnalysis.exerciseID, studentID))
			
 
				+        return None
			
 
				+    try:
			
 
				+        count = getUsefulLines(path,subAnalysis.exerciseID,submissions[-1].submission_id)
			
 
				+        return (subAnalysis.exerciseID, studentID, count)
			
 
				+    except Exception as e:
			
 
				+        print(e)
			
 
				+        traceback.print_exc()
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def add_task (pool, task, workload):
			
 
				+    pool.append((task, workload))
			
 
				+
			
 
				+def bootstrap (csvPath, vplFolder, outputFolder):
			
 
				+    parser = CSVParser(csvPath)
			
 
				+    pool = []
			
 
				+    tasks = {}
			
 
				+    for e in parser.exercises:
			
 
				+        (submissions, students) = parser.getSubmissions(e)
			
 
				+        studentsSub = [ (s, parser.getStudentValidSubmissions(submissions, s), parser.getStudentFirstInteraction(submissions, s), vplFolder) for s in students]
			
 
				+        task = SubmissionAnalysis(e)
			
 
				+        tasks[e] = task
			
 
				+        for workload in studentsSub:
			
 
				+            add_task(pool, task, workload)
			
 
				+    result = Parallel(8)(delayed(processSubmission)(t,w) for (t,w) in pool)
			
 
				+    exerciseMap = {}
			
 
				+    for r in result:
			
 
				+        if r == None:
			
 
				+            continue
			
 
				+        (e, data) = r
			
 
				+        if e in exerciseMap:
			
 
				+            exerciseMap[e].extend(data)
			
 
				+        else:
			
 
				+            exerciseMap[e] = data
			
 
				+    for e in exerciseMap:
			
 
				+        task = tasks[e]
			
 
				+        task.saveToCSV(outputFolder, exerciseMap[e])
			
 
				+
			
 
				+def checkUsefulLines (csvPath, vplFolder, _):
			
 
				+    parser = CSVParser(csvPath)
			
 
				+    pool = []
			
 
				+    for e in parser.exercises:
			
 
				+        if e not in [5035,4988]:
			
 
				+            continue
			
 
				+        (submissions, students) = parser.getSubmissions(e)
			
 
				+        studentsSub = [ (s, parser.getStudentLastSubmission(submissions, s), vplFolder) for s in students]
			
 
				+        task = SubmissionAnalysis(e)
			
 
				+        for workload in studentsSub:
			
 
				+            add_task(pool, task, workload)
			
 
				+    result = Parallel(8)(delayed(countUsefulLines)(t,w) for (t,w) in pool)
			
 
				+    output = ''
			
 
				+    for r in result:
			
 
				+        if r == None:
			
 
				+            continue
			
 
				+        (exercise, student, count) = r
			
 
				+        output += f'{exercise},{student},{count}\n'
			
 
				+    with open("useful_count.csv",'w') as fileHandler:
			
 
				+        fileHandler.write(output)
			
 
				+        fileHandler.close()
			
 
				+
			
 
				+
			
 
				+CHECK_LINES = True
			
 
				+#--- run ---#
			
 
				+if __name__ == "__main__":
			
 
				+    assert len(sys.argv) == 4, "You must provide the following: path to the csv, the folder with vpl data and the output folder path respectively"
			
 
				+    if not CHECK_LINES:
			
 
				+        bootstrap(sys.argv[1], sys.argv[2], sys.argv[3])
			
 
				+    else:
			
 
				+        checkUsefulLines(sys.argv[1], sys.argv[2], sys.argv[3])
			
 
				+