123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305 |
- import sys
- import os
- sys.path.extend(['.', '..'])
- from pycparser import parse_file, c_ast
- import queue
- from threading import Thread
- import re
- import copy
- import files
- import analyser
- from submissionFileReader import getSubmissionFile
- COMMENT_REGEX = r"(//.*)|(/\*[\w\W\n\r]*?\*/)"
- USEFUL_REGEX = r"(//.*)|(/\*[\w\W\n\r]*?\*/)|(^\s*$)|(\{\s*\})|(^\s*\{\s*$)|(^\s*\}\s*$)"
- CSV_HEADER = ['assignment_id', 'student_id', 'total_submissions', 'code_lines','total_lines','comments']
- OP_HEADER = ['!', '<=', '%', '>=', '++', '+', '*', '-', '/', '<', '--', '&&', 'p++', '\'==\'', 'p--', '!=', '||', '>']
- CSV_HEADER.extend(OP_HEADER)
- COMMANDS = ['Return', 'For', 'FuncCall', 'Assignment', 'Switch', 'DoWhile', 'While', 'FuncDef', 'If']
- CSV_HEADER.extend(COMMANDS)
- DECLARATIONS = ['vector_int', 'vector_float', 'matrix_string', 'string', 'int', 'pointer_int', 'float', 'matrix_int', 'pointer_float', 'matrix_float']
- CSV_HEADER.extend(DECLARATIONS)
- COND_CSV_HEADER = ['assignment_id', 'student_id', 'commands_count', 'cond_type', 'logic_op_count', 'rel_op_count']
- COND_CSV_HEADER.extend(OP_HEADER)
- FOR_CSV_HEADER = copy.deepcopy(COND_CSV_HEADER)
- FOR_CSV_HEADER.extend(['use_assignment','use_next_loop'])
- finalDataList = list()
- class Worker (Thread):
- """Thread executing tasks from a given tasks queue"""
- def __init__ (self, tasks):
- Thread.__init__(self)
- self.tasks = tasks
- self.daemon = True
- self.start()
- def run (self):
- while True:
- func, args, kargs = self.tasks.get()
- try: func(*args, **kargs)
- except Exception as e:
- print(e)
- self.tasks.task_done()
- class ThreadPool:
- """Pool of threads consuming tasks from a queue"""
- def __init__ (self, num_threads):
- self.tasks = queue.Queue(num_threads)
- for _ in range(num_threads): Worker(self.tasks)
- def add_task (self, func, *args, **kargs):
- """Add a task to the queue"""
- self.tasks.put((func, args, kargs))
- def wait_completion (self):
- """Wait for completion of all the tasks in the queue"""
- self.tasks.join()
- def processFile (filename):
- if len(filename) == 0:
- return list()
- ast = parse_file(filename, use_cpp=True,
- cpp_path='gcc',
- cpp_args=['-E', r'-Iutils/fake_libc_include'])
- nodeList = [node for (_, node) in ast.children() if node.__class__.__name__ is 'FuncDef' or node.__class__.__name__ is 'Decl']
- return nodeList
- def processStudentData (studentData, assignment):
- try:
- result = processFile(studentData[1])
- if len(result) == 0:
- return
- data = analyser.ASTAnalyser(assignment, studentData[0], result)
- data.beginAnalysis()
- fileText = open(studentData[1], 'r').read()
- totalLines = fileText.count("\n")
- totalComments = len(re.findall(COMMENT_REGEX, fileText, re.MULTILINE))
- usefulText = re.sub(USEFUL_REGEX, "", fileText, flags=re.MULTILINE)
- usefulText = os.linesep.join([s for s in usefulText.splitlines() if s])
- usefulLines = usefulText.count("\n")
- finalDataList.append((data, studentData[2], totalLines, totalComments, usefulLines))
- print("Processing data from student %s at assignment %s \n Commands: %s \n Declarations:\n\tBasic:%s\n\tMatrix:%s\n\tPointers:%s\n\tVectors:%s \n Operators: %s \n Constants: %s \n For data: %s \n Condition data: %s \n Lines:%s, Comments:%s, Useful:%s" % (studentData[0], assignment, data.commandCount, data.declarations, data.declarationsMatrixes, data.declarationsPointers, data.declarationsVectors, data.operatorsCount, data.constantInitCount, data.forCommandStr(), data.conditionCommandStr(), totalLines, totalComments, usefulLines))
- except Exception as e:
- print("%s! Failed to process file: %s" % (e, studentData[1]))
- def loadStudentsFolders (raiz, folder):
- userDataPath = os.path.join(raiz, folder)
- return files.filesFromFolder(userDataPath, "usersdata")
- def loadStudentFiles (raiz, folder, studentsFolders):
- studentsData = []
- for s in studentsFolders:
- path = os.path.join(raiz, folder, "usersdata", s)
- totalSub = files.countFolders(path)
- finalFolder = files.highestFileName(path)
- cFileFolder = os.path.join(path, finalFolder, "submittedfiles")
- cFiles = files.getFilesInFolder(cFileFolder, "*[cC]*")
- if len(cFiles) == 0:
- studentsData.append((s, "", totalSub))
- else:
- studentsData.append((s, cFiles[0], totalSub))
- return studentsData
- def loadAssignments (raiz):
- assignmentsFolders = files.filesFromFolder(raiz, "")
- assignments = {}
- for a in assignmentsFolders:
- studentsFolders = loadStudentsFolders(raiz, a)
- if(len(studentsFolders) == 0):
- assignments[a] = []
- continue
- studentsData = loadStudentFiles(raiz, a, studentsFolders)
- assignments[a] = studentsData
- return assignments
- def initEmptyDict (list):
- result = dict()
- for k in list:
- result[k] = 0
- return result
- def saveToFile (filePath, data):
- file = open(filePath, "w+")
- file.write(data)
- file.close()
- def processDataFromCSV (parser, vplFolder):
- assingments = parser.exercises
- data = {}
- for e in assingments:
- (allSubs, students) = parser.getSubmissions(e)
- studentData = []
- for student in students:
- submissions = parser.getStudentValidSubmissions(allSubs, student)
- try:
- assert len(submissions) > 0
- submissions.sort(key = lambda x : x.submission_id)
- lastSub = submissions[-1]
- content = getSubmissionFile(vplFolder, e, lastSub.submission_id)
- studentData.append((student, content, len(submissions)))
- except Exception:
- studentData.append((student, "", len(submissions)))
- data[e] = studentData[:]
- pool = ThreadPool(10)
- for a in data:
- for studentData in data[a]:
- pool.add_task(processStudentData, studentData, a)
- pool.wait_completion()
- mainCSVFile = ""
- forCSVFile = ""
- condCSVFile = ""
- assignmentList = dict()
- constantInitCount = dict()
- for data in finalDataList:
- if data[0].assignment in assignmentList:
- assignmentList[data[0].assignment].append(data)
- else:
- assignmentList[data[0].assignment] = list()
- assignmentList[data[0].assignment].append(data)
- for assignmentKey in assignmentList:
- for studentData in assignmentList[assignmentKey]:
- astInfo = studentData[0]
- for k in astInfo.constantInitCount:
- if k in constantInitCount:
- constantInitCount[k] += astInfo.constantInitCount[k]
- else:
- constantInitCount[k] = astInfo.constantInitCount[k]
- studentOpData = initEmptyDict(analyser.VALID_OPS)
- for key in astInfo.operatorsCount:
- studentOpData[key] = astInfo.operatorsCount[key]
- studentCommandData = initEmptyDict(COMMANDS)
- for key in astInfo.commandCount:
- studentCommandData[key] = astInfo.commandCount[key]
- studentDeclarationData = initEmptyDict(DECLARATIONS)
- for key in astInfo.declarations:
- studentDeclarationData[key] = astInfo.declarations[key]
- for key in astInfo.declarationsPointers:
- studentDeclarationData["pointer_" + key] = astInfo.declarationsPointers[key]
- for key in astInfo.declarationsVectors:
- studentDeclarationData["vector_" + key] = astInfo.declarationsVectors[key]
- for key in astInfo.declarationsMatrixes:
- studentDeclarationData["matrix_" + key] = astInfo.declarationsMatrixes[key]
- mainCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, studentData[1], studentData[4], studentData[2], studentData[3])
- mainCSVFile += "," + ','.join([str(v) for v in studentOpData.values()])
- mainCSVFile += "," + ",".join([str(v) for v in studentCommandData.values()])
- mainCSVFile += "," + ",".join([str(v) for v in studentDeclarationData.values()])
- mainCSVFile += "\n"
- #For_structure.csv
- for i in astInfo.forCommandData:
- forCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, i.cmdCount, i.condType, i.numLogicOps, i.numRelOps)
- opData = initEmptyDict(analyser.VALID_OPS)
- for op in i.opList:
- opData[op] += 1
- forCSVFile += "," + ','.join([str(v) for v in opData.values()])
- forCSVFile += ",%s,%s\n" % (i.useAssignment, i.useNext)
- #condition_structure.csv
- for i in astInfo.conditionCommandData:
- condCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, i.cmdCount, i.condType, i.numLogicOps, i.numRelOps)
- opData = initEmptyDict(analyser.VALID_OPS)
- for op in i.opList:
- opData[op] += 1
- condCSVFile += "," + ','.join([str(v) for v in opData.values()])
- condCSVFile += "\n"
- mainCSVFile = ','.join(CSV_HEADER) + '\n' + mainCSVFile
- saveToFile("data.csv", mainCSVFile)
- forCSVFile = ','.join(FOR_CSV_HEADER) + '\n' + forCSVFile
- saveToFile("for_structure.csv", forCSVFile)
- condCSVFile = ','.join(COND_CSV_HEADER) + '\n' + condCSVFile
- saveToFile("cond_structure.csv", condCSVFile)
- constantInitFile = "constant,count\n"
- for k in constantInitCount:
- constantInitFile += "%s,%s\n" % (k, str(constantInitCount[k]))
- saveToFile("const_init.csv", constantInitFile)
- print("Entrada: {}, Saida: {}".format(analyser.PRINT_COUNT,analyser.SCAN_COUNT))
- #--- run ---#
- if __name__ == "__main__":
- if len(sys.argv) > 1:
- file = sys.argv[1]
- if file == "-f" and len(sys.argv) > 2:
- print(processFile(sys.argv[2]))
- elif file != "-f":
- raiz = "./" + sys.argv[1]
- data = loadAssignments(raiz)
- pool = ThreadPool(10)
- for a in data:
- for studentData in data[a]:
- pool.add_task(processStudentData, studentData, a)
- pool.wait_completion()
- mainCSVFile = ""
- forCSVFile = ""
- condCSVFile = ""
- assignmentList = dict()
- constantInitCount = dict()
- for data in finalDataList:
- if data[0].assignment in assignmentList:
- assignmentList[data[0].assignment].append(data)
- else:
- assignmentList[data[0].assignment] = list()
- assignmentList[data[0].assignment].append(data)
- for assignmentKey in assignmentList:
- for studentData in assignmentList[assignmentKey]:
- astInfo = studentData[0]
- for k in astInfo.constantInitCount:
- if k in constantInitCount:
- constantInitCount[k] += astInfo.constantInitCount[k]
- else:
- constantInitCount[k] = astInfo.constantInitCount[k]
- studentOpData = initEmptyDict(analyser.VALID_OPS)
- for key in astInfo.operatorsCount:
- studentOpData[key] = astInfo.operatorsCount[key]
- studentCommandData = initEmptyDict(COMMANDS)
- for key in astInfo.commandCount:
- studentCommandData[key] = astInfo.commandCount[key]
- studentDeclarationData = initEmptyDict(DECLARATIONS)
- for key in astInfo.declarations:
- studentDeclarationData[key] = astInfo.declarations[key]
- for key in astInfo.declarationsPointers:
- studentDeclarationData["pointer_" + key] = astInfo.declarationsPointers[key]
- for key in astInfo.declarationsVectors:
- studentDeclarationData["vector_" + key] = astInfo.declarationsVectors[key]
- for key in astInfo.declarationsMatrixes:
- studentDeclarationData["matrix_" + key] = astInfo.declarationsMatrixes[key]
- mainCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, studentData[1], studentData[4], studentData[2], studentData[3])
- mainCSVFile += "," + ','.join([str(v) for v in studentOpData.values()])
- mainCSVFile += "," + ",".join([str(v) for v in studentCommandData.values()])
- mainCSVFile += "," + ",".join([str(v) for v in studentDeclarationData.values()])
- mainCSVFile += "\n"
- #For_structure.csv
- for i in astInfo.forCommandData:
- forCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, i.cmdCount, i.condType, i.numLogicOps, i.numRelOps)
- opData = initEmptyDict(analyser.VALID_OPS)
- for op in i.opList:
- opData[op] += 1
- forCSVFile += "," + ','.join([str(v) for v in opData.values()])
- forCSVFile += ",%s,%s\n" % (i.useAssignment, i.useNext)
- #condition_structure.csv
- for i in astInfo.conditionCommandData:
- condCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, i.cmdCount, i.condType, i.numLogicOps, i.numRelOps)
- opData = initEmptyDict(analyser.VALID_OPS)
- for op in i.opList:
- opData[op] += 1
- condCSVFile += "," + ','.join([str(v) for v in opData.values()])
- condCSVFile += "\n"
- mainCSVFile = ','.join(CSV_HEADER) + '\n' + mainCSVFile
- saveToFile("data.csv", mainCSVFile)
- forCSVFile = ','.join(FOR_CSV_HEADER) + '\n' + forCSVFile
- saveToFile("for_structure.csv", forCSVFile)
- condCSVFile = ','.join(COND_CSV_HEADER) + '\n' + condCSVFile
- saveToFile("cond_structure.csv", condCSVFile)
- constantInitFile = "constant,count\n"
- for k in constantInitCount:
- constantInitFile += "%s,%s\n" % (k, str(constantInitCount[k]))
- saveToFile("const_init.csv", constantInitFile)
- else:
- print("cjson -f file | cjon folder/")
- else:
- print("cjson -f file | cjon folder/")
|