vplAnalyzer.py 3.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. from csvParser import CSVParser
  2. import sys
  3. import traceback
  4. from submissionAnalysis import SubmissionAnalysis
  5. from submissionFileReader import getUsefulLines
  6. from joblib import Parallel, delayed
  7. def processSubmission (subAnalisys:SubmissionAnalysis, workload:'tuple[int,list,int,str]'):
  8. (studentID, submissions, firstTimestamp, path) = workload
  9. if firstTimestamp < 0 or len(submissions) == 0:
  10. print("No valid submission for exercise {} from student {}".format(subAnalisys.exerciseID, studentID))
  11. return None
  12. try:
  13. result = subAnalisys.analyze(submissions, firstTimestamp, path)
  14. return result
  15. except Exception as e:
  16. print(e)
  17. traceback.print_exc()
  18. return None
  19. def countUsefulLines (subAnalysis:SubmissionAnalysis, workload):
  20. (studentID, submissions, path) = workload
  21. if len(submissions) == 0:
  22. print("No valid submission for exercise {} from student {}".format(subAnalysis.exerciseID, studentID))
  23. return None
  24. try:
  25. count = getUsefulLines(path,subAnalysis.exerciseID,submissions[-1].submission_id)
  26. return (subAnalysis.exerciseID, studentID, count)
  27. except Exception as e:
  28. print(e)
  29. traceback.print_exc()
  30. return None
  31. def add_task (pool, task, workload):
  32. pool.append((task, workload))
  33. def bootstrap (csvPath, vplFolder, outputFolder):
  34. parser = CSVParser(csvPath)
  35. pool = []
  36. tasks = {}
  37. for e in parser.exercises:
  38. (submissions, students) = parser.getSubmissions(e)
  39. studentsSub = [ (s, parser.getStudentValidSubmissions(submissions, s), parser.getStudentFirstInteraction(submissions, s), vplFolder) for s in students]
  40. task = SubmissionAnalysis(e)
  41. tasks[e] = task
  42. for workload in studentsSub:
  43. add_task(pool, task, workload)
  44. result = Parallel(8)(delayed(processSubmission)(t,w) for (t,w) in pool)
  45. exerciseMap = {}
  46. for r in result:
  47. if r == None:
  48. continue
  49. (e, data) = r
  50. if e in exerciseMap:
  51. exerciseMap[e].extend(data)
  52. else:
  53. exerciseMap[e] = data
  54. for e in exerciseMap:
  55. task = tasks[e]
  56. task.saveToCSV(outputFolder, exerciseMap[e])
  57. def checkUsefulLines (csvPath, vplFolder, _):
  58. parser = CSVParser(csvPath)
  59. pool = []
  60. for e in parser.exercises:
  61. if e not in [5035,4988]:
  62. continue
  63. (submissions, students) = parser.getSubmissions(e)
  64. studentsSub = [ (s, parser.getStudentLastSubmission(submissions, s), vplFolder) for s in students]
  65. task = SubmissionAnalysis(e)
  66. for workload in studentsSub:
  67. add_task(pool, task, workload)
  68. result = Parallel(8)(delayed(countUsefulLines)(t,w) for (t,w) in pool)
  69. output = ''
  70. for r in result:
  71. if r == None:
  72. continue
  73. (exercise, student, count) = r
  74. output += f'{exercise},{student},{count}\n'
  75. with open("useful_count.csv",'w') as fileHandler:
  76. fileHandler.write(output)
  77. fileHandler.close()
  78. CHECK_LINES = True
  79. #--- run ---#
  80. if __name__ == "__main__":
  81. assert len(sys.argv) == 4, "You must provide the following: path to the csv, the folder with vpl data and the output folder path respectively"
  82. if not CHECK_LINES:
  83. bootstrap(sys.argv[1], sys.argv[2], sys.argv[3])
  84. else:
  85. checkUsefulLines(sys.argv[1], sys.argv[2], sys.argv[3])