12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- from csvParser import CSVParser
- import sys
- import traceback
- from submissionAnalysis import SubmissionAnalysis
- from submissionFileReader import getUsefulLines
- from joblib import Parallel, delayed
- def processSubmission (subAnalisys:SubmissionAnalysis, workload:'tuple[int,list,int,str]'):
- (studentID, submissions, firstTimestamp, path) = workload
- if firstTimestamp < 0 or len(submissions) == 0:
- print("No valid submission for exercise {} from student {}".format(subAnalisys.exerciseID, studentID))
- return None
- try:
- result = subAnalisys.analyze(submissions, firstTimestamp, path)
- return result
- except Exception as e:
- print(e)
- traceback.print_exc()
- return None
- def countUsefulLines (subAnalysis:SubmissionAnalysis, workload):
- (studentID, submissions, path) = workload
- if len(submissions) == 0:
- print("No valid submission for exercise {} from student {}".format(subAnalysis.exerciseID, studentID))
- return None
- try:
- count = getUsefulLines(path,subAnalysis.exerciseID,submissions[-1].submission_id)
- return (subAnalysis.exerciseID, studentID, count)
- except Exception as e:
- print(e)
- traceback.print_exc()
- return None
- def add_task (pool, task, workload):
- pool.append((task, workload))
- def bootstrap (csvPath, vplFolder, outputFolder):
- parser = CSVParser(csvPath)
- pool = []
- tasks = {}
- for e in parser.exercises:
- (submissions, students) = parser.getSubmissions(e)
- studentsSub = [ (s, parser.getStudentValidSubmissions(submissions, s), parser.getStudentFirstInteraction(submissions, s), vplFolder) for s in students]
- task = SubmissionAnalysis(e)
- tasks[e] = task
- for workload in studentsSub:
- add_task(pool, task, workload)
- result = Parallel(8)(delayed(processSubmission)(t,w) for (t,w) in pool)
- exerciseMap = {}
- for r in result:
- if r == None:
- continue
- (e, data) = r
- if e in exerciseMap:
- exerciseMap[e].extend(data)
- else:
- exerciseMap[e] = data
- for e in exerciseMap:
- task = tasks[e]
- task.saveToCSV(outputFolder, exerciseMap[e])
- def checkUsefulLines (csvPath, vplFolder, _):
- parser = CSVParser(csvPath)
- pool = []
- for e in parser.exercises:
- if e not in [5035,4988]:
- continue
- (submissions, students) = parser.getSubmissions(e)
- studentsSub = [ (s, parser.getStudentLastSubmission(submissions, s), vplFolder) for s in students]
- task = SubmissionAnalysis(e)
- for workload in studentsSub:
- add_task(pool, task, workload)
- result = Parallel(8)(delayed(countUsefulLines)(t,w) for (t,w) in pool)
- output = ''
- for r in result:
- if r == None:
- continue
- (exercise, student, count) = r
- output += f'{exercise},{student},{count}\n'
- with open("useful_count.csv",'w') as fileHandler:
- fileHandler.write(output)
- fileHandler.close()
- CHECK_LINES = True
- #--- run ---#
- if __name__ == "__main__":
- assert len(sys.argv) == 4, "You must provide the following: path to the csv, the folder with vpl data and the output folder path respectively"
- if not CHECK_LINES:
- bootstrap(sys.argv[1], sys.argv[2], sys.argv[3])
- else:
- checkUsefulLines(sys.argv[1], sys.argv[2], sys.argv[3])
|