import sys import os sys.path.extend(['.', '..']) from pycparser import parse_file, c_ast import queue from threading import Thread import re import copy import files import analyser from submissionFileReader import getSubmissionFile COMMENT_REGEX = r"(//.*)|(/\*[\w\W\n\r]*?\*/)" USEFUL_REGEX = r"(//.*)|(/\*[\w\W\n\r]*?\*/)|(^\s*$)|(\{\s*\})|(^\s*\{\s*$)|(^\s*\}\s*$)" CSV_HEADER = ['assignment_id', 'student_id', 'total_submissions', 'code_lines','total_lines','comments'] OP_HEADER = ['!', '<=', '%', '>=', '++', '+', '*', '-', '/', '<', '--', '&&', 'p++', '\'==\'', 'p--', '!=', '||', '>'] CSV_HEADER.extend(OP_HEADER) COMMANDS = ['Return', 'For', 'FuncCall', 'Assignment', 'Switch', 'DoWhile', 'While', 'FuncDef', 'If'] CSV_HEADER.extend(COMMANDS) DECLARATIONS = ['vector_int', 'vector_float', 'matrix_string', 'string', 'int', 'pointer_int', 'float', 'matrix_int', 'pointer_float', 'matrix_float'] CSV_HEADER.extend(DECLARATIONS) COND_CSV_HEADER = ['assignment_id', 'student_id', 'commands_count', 'cond_type', 'logic_op_count', 'rel_op_count'] COND_CSV_HEADER.extend(OP_HEADER) FOR_CSV_HEADER = copy.deepcopy(COND_CSV_HEADER) FOR_CSV_HEADER.extend(['use_assignment','use_next_loop']) finalDataList = list() class Worker (Thread): """Thread executing tasks from a given tasks queue""" def __init__ (self, tasks): Thread.__init__(self) self.tasks = tasks self.daemon = True self.start() def run (self): while True: func, args, kargs = self.tasks.get() try: func(*args, **kargs) except Exception as e: print(e) self.tasks.task_done() class ThreadPool: """Pool of threads consuming tasks from a queue""" def __init__ (self, num_threads): self.tasks = queue.Queue(num_threads) for _ in range(num_threads): Worker(self.tasks) def add_task (self, func, *args, **kargs): """Add a task to the queue""" self.tasks.put((func, args, kargs)) def wait_completion (self): """Wait for completion of all the tasks in the queue""" self.tasks.join() def processFile (filename): if len(filename) == 0: return list() ast = parse_file(filename, use_cpp=True, cpp_path='gcc', cpp_args=['-E', r'-Iutils/fake_libc_include']) nodeList = [node for (_, node) in ast.children() if node.__class__.__name__ is 'FuncDef' or node.__class__.__name__ is 'Decl'] return nodeList def processStudentData (studentData, assignment): try: result = processFile(studentData[1]) if len(result) == 0: return data = analyser.ASTAnalyser(assignment, studentData[0], result) data.beginAnalysis() fileText = open(studentData[1], 'r').read() totalLines = fileText.count("\n") totalComments = len(re.findall(COMMENT_REGEX, fileText, re.MULTILINE)) usefulText = re.sub(USEFUL_REGEX, "", fileText, flags=re.MULTILINE) usefulText = os.linesep.join([s for s in usefulText.splitlines() if s]) usefulLines = usefulText.count("\n") finalDataList.append((data, studentData[2], totalLines, totalComments, usefulLines)) print("Processing data from student %s at assignment %s \n Commands: %s \n Declarations:\n\tBasic:%s\n\tMatrix:%s\n\tPointers:%s\n\tVectors:%s \n Operators: %s \n Constants: %s \n For data: %s \n Condition data: %s \n Lines:%s, Comments:%s, Useful:%s" % (studentData[0], assignment, data.commandCount, data.declarations, data.declarationsMatrixes, data.declarationsPointers, data.declarationsVectors, data.operatorsCount, data.constantInitCount, data.forCommandStr(), data.conditionCommandStr(), totalLines, totalComments, usefulLines)) except Exception as e: print("%s! Failed to process file: %s" % (e, studentData[1])) def loadStudentsFolders (raiz, folder): userDataPath = os.path.join(raiz, folder) return files.filesFromFolder(userDataPath, "usersdata") def loadStudentFiles (raiz, folder, studentsFolders): studentsData = [] for s in studentsFolders: path = os.path.join(raiz, folder, "usersdata", s) totalSub = files.countFolders(path) finalFolder = files.highestFileName(path) cFileFolder = os.path.join(path, finalFolder, "submittedfiles") cFiles = files.getFilesInFolder(cFileFolder, "*[cC]*") if len(cFiles) == 0: studentsData.append((s, "", totalSub)) else: studentsData.append((s, cFiles[0], totalSub)) return studentsData def loadAssignments (raiz): assignmentsFolders = files.filesFromFolder(raiz, "") assignments = {} for a in assignmentsFolders: studentsFolders = loadStudentsFolders(raiz, a) if(len(studentsFolders) == 0): assignments[a] = [] continue studentsData = loadStudentFiles(raiz, a, studentsFolders) assignments[a] = studentsData return assignments def initEmptyDict (list): result = dict() for k in list: result[k] = 0 return result def saveToFile (filePath, data): file = open(filePath, "w+") file.write(data) file.close() def processDataFromCSV (parser, vplFolder): assingments = parser.exercises data = {} for e in assingments: (allSubs, students) = parser.getSubmissions(e) studentData = [] for student in students: submissions = parser.getStudentValidSubmissions(allSubs, student) try: assert len(submissions) > 0 submissions.sort(key = lambda x : x.submission_id) lastSub = submissions[-1] content = getSubmissionFile(vplFolder, e, lastSub.submission_id) studentData.append((student, content, len(submissions))) except Exception: studentData.append((student, "", len(submissions))) data[e] = studentData[:] pool = ThreadPool(10) for a in data: for studentData in data[a]: pool.add_task(processStudentData, studentData, a) pool.wait_completion() mainCSVFile = "" forCSVFile = "" condCSVFile = "" assignmentList = dict() constantInitCount = dict() for data in finalDataList: if data[0].assignment in assignmentList: assignmentList[data[0].assignment].append(data) else: assignmentList[data[0].assignment] = list() assignmentList[data[0].assignment].append(data) for assignmentKey in assignmentList: for studentData in assignmentList[assignmentKey]: astInfo = studentData[0] for k in astInfo.constantInitCount: if k in constantInitCount: constantInitCount[k] += astInfo.constantInitCount[k] else: constantInitCount[k] = astInfo.constantInitCount[k] studentOpData = initEmptyDict(analyser.VALID_OPS) for key in astInfo.operatorsCount: studentOpData[key] = astInfo.operatorsCount[key] studentCommandData = initEmptyDict(COMMANDS) for key in astInfo.commandCount: studentCommandData[key] = astInfo.commandCount[key] studentDeclarationData = initEmptyDict(DECLARATIONS) for key in astInfo.declarations: studentDeclarationData[key] = astInfo.declarations[key] for key in astInfo.declarationsPointers: studentDeclarationData["pointer_" + key] = astInfo.declarationsPointers[key] for key in astInfo.declarationsVectors: studentDeclarationData["vector_" + key] = astInfo.declarationsVectors[key] for key in astInfo.declarationsMatrixes: studentDeclarationData["matrix_" + key] = astInfo.declarationsMatrixes[key] mainCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, studentData[1], studentData[4], studentData[2], studentData[3]) mainCSVFile += "," + ','.join([str(v) for v in studentOpData.values()]) mainCSVFile += "," + ",".join([str(v) for v in studentCommandData.values()]) mainCSVFile += "," + ",".join([str(v) for v in studentDeclarationData.values()]) mainCSVFile += "\n" #For_structure.csv for i in astInfo.forCommandData: forCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, i.cmdCount, i.condType, i.numLogicOps, i.numRelOps) opData = initEmptyDict(analyser.VALID_OPS) for op in i.opList: opData[op] += 1 forCSVFile += "," + ','.join([str(v) for v in opData.values()]) forCSVFile += ",%s,%s\n" % (i.useAssignment, i.useNext) #condition_structure.csv for i in astInfo.conditionCommandData: condCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, i.cmdCount, i.condType, i.numLogicOps, i.numRelOps) opData = initEmptyDict(analyser.VALID_OPS) for op in i.opList: opData[op] += 1 condCSVFile += "," + ','.join([str(v) for v in opData.values()]) condCSVFile += "\n" mainCSVFile = ','.join(CSV_HEADER) + '\n' + mainCSVFile saveToFile("data.csv", mainCSVFile) forCSVFile = ','.join(FOR_CSV_HEADER) + '\n' + forCSVFile saveToFile("for_structure.csv", forCSVFile) condCSVFile = ','.join(COND_CSV_HEADER) + '\n' + condCSVFile saveToFile("cond_structure.csv", condCSVFile) constantInitFile = "constant,count\n" for k in constantInitCount: constantInitFile += "%s,%s\n" % (k, str(constantInitCount[k])) saveToFile("const_init.csv", constantInitFile) print("Entrada: {}, Saida: {}".format(analyser.PRINT_COUNT,analyser.SCAN_COUNT)) #--- run ---# if __name__ == "__main__": if len(sys.argv) > 1: file = sys.argv[1] if file == "-f" and len(sys.argv) > 2: print(processFile(sys.argv[2])) elif file != "-f": raiz = "./" + sys.argv[1] data = loadAssignments(raiz) pool = ThreadPool(10) for a in data: for studentData in data[a]: pool.add_task(processStudentData, studentData, a) pool.wait_completion() mainCSVFile = "" forCSVFile = "" condCSVFile = "" assignmentList = dict() constantInitCount = dict() for data in finalDataList: if data[0].assignment in assignmentList: assignmentList[data[0].assignment].append(data) else: assignmentList[data[0].assignment] = list() assignmentList[data[0].assignment].append(data) for assignmentKey in assignmentList: for studentData in assignmentList[assignmentKey]: astInfo = studentData[0] for k in astInfo.constantInitCount: if k in constantInitCount: constantInitCount[k] += astInfo.constantInitCount[k] else: constantInitCount[k] = astInfo.constantInitCount[k] studentOpData = initEmptyDict(analyser.VALID_OPS) for key in astInfo.operatorsCount: studentOpData[key] = astInfo.operatorsCount[key] studentCommandData = initEmptyDict(COMMANDS) for key in astInfo.commandCount: studentCommandData[key] = astInfo.commandCount[key] studentDeclarationData = initEmptyDict(DECLARATIONS) for key in astInfo.declarations: studentDeclarationData[key] = astInfo.declarations[key] for key in astInfo.declarationsPointers: studentDeclarationData["pointer_" + key] = astInfo.declarationsPointers[key] for key in astInfo.declarationsVectors: studentDeclarationData["vector_" + key] = astInfo.declarationsVectors[key] for key in astInfo.declarationsMatrixes: studentDeclarationData["matrix_" + key] = astInfo.declarationsMatrixes[key] mainCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, studentData[1], studentData[4], studentData[2], studentData[3]) mainCSVFile += "," + ','.join([str(v) for v in studentOpData.values()]) mainCSVFile += "," + ",".join([str(v) for v in studentCommandData.values()]) mainCSVFile += "," + ",".join([str(v) for v in studentDeclarationData.values()]) mainCSVFile += "\n" #For_structure.csv for i in astInfo.forCommandData: forCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, i.cmdCount, i.condType, i.numLogicOps, i.numRelOps) opData = initEmptyDict(analyser.VALID_OPS) for op in i.opList: opData[op] += 1 forCSVFile += "," + ','.join([str(v) for v in opData.values()]) forCSVFile += ",%s,%s\n" % (i.useAssignment, i.useNext) #condition_structure.csv for i in astInfo.conditionCommandData: condCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, i.cmdCount, i.condType, i.numLogicOps, i.numRelOps) opData = initEmptyDict(analyser.VALID_OPS) for op in i.opList: opData[op] += 1 condCSVFile += "," + ','.join([str(v) for v in opData.values()]) condCSVFile += "\n" mainCSVFile = ','.join(CSV_HEADER) + '\n' + mainCSVFile saveToFile("data.csv", mainCSVFile) forCSVFile = ','.join(FOR_CSV_HEADER) + '\n' + forCSVFile saveToFile("for_structure.csv", forCSVFile) condCSVFile = ','.join(COND_CSV_HEADER) + '\n' + condCSVFile saveToFile("cond_structure.csv", condCSVFile) constantInitFile = "constant,count\n" for k in constantInitCount: constantInitFile += "%s,%s\n" % (k, str(constantInitCount[k])) saveToFile("const_init.csv", constantInitFile) else: print("cjson -f file | cjon folder/") else: print("cjson -f file | cjon folder/")