submissionAnalysis.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. from submissionFileReader import readSubmissionContent, levenshteinDistance
  2. from collections import namedtuple
  3. class SubmissionAnalysis:
  4. def __init__ (self, exerciseID):
  5. self.exerciseID = exerciseID
  6. self.data = list()
  7. self.SubmissionData = namedtuple("SubmissionData", "student_id f{0}_TES f{0}_DES f{0}_grade f{0}_DT f{0}_timestamp".format(exerciseID))
  8. headerMap = {}
  9. headerMap["f{0}_TES".format(exerciseID)] = "{0}_TES".format(exerciseID)
  10. headerMap["f{0}_DES".format(exerciseID)] = "{0}_DES".format(exerciseID)
  11. headerMap["f{0}_grade".format(exerciseID)] = "{0}_grade".format(exerciseID)
  12. headerMap["f{0}_DT".format(exerciseID)] = "{0}_D/T".format(exerciseID)
  13. headerMap["student_id"] = "student_id"
  14. headerMap["f{0}_timestamp".format(exerciseID)] = "{0}_timestamp".format(exerciseID)
  15. self.headerMap = headerMap
  16. def analyze (self, submissions, firstTimestamp, path):
  17. submissions = self.fixTimestamps(submissions)
  18. resultList = []
  19. submissionsWithFiles = []
  20. for s in submissions:
  21. content = readSubmissionContent(path, self.exerciseID, s.submission_id)
  22. if len(content) > 0:
  23. submissionsWithFiles.append((s,content))
  24. assert len(submissionsWithFiles) >= 1, "No valid code submitted to exercise {} from student {}".format(self.exerciseID,submissions[0].user_id)
  25. total = len(submissionsWithFiles)
  26. firstTuple = submissionsWithFiles[0]
  27. first = firstTuple[0]
  28. firstTES = first.time - firstTimestamp
  29. firstContent = firstTuple[1]
  30. firstDES = levenshteinDistance("", firstContent)
  31. firstDT = 0 if firstTES == 0 or firstDES == 0 else firstDES/firstTES
  32. resultList.append(self.SubmissionData(first.user_id, firstTES, firstDES, first.grade, firstDT, first.time))
  33. for i in range(1, total):
  34. subTuple = submissionsWithFiles[i]
  35. sub = subTuple[0]
  36. tes = sub.time - submissionsWithFiles[i-1][0].time
  37. subContent = subTuple[1]
  38. prevContent = submissionsWithFiles[i-1][1]
  39. des = levenshteinDistance(prevContent,subContent)
  40. dt = 0 if tes == 0 or des == 0 else des/tes
  41. resultList.append(self.SubmissionData(sub.user_id, tes, des, sub.grade, dt, sub.time))
  42. return (self.exerciseID, resultList)
  43. def fixTimestamps (self, submissions):
  44. sameTS = list()
  45. repeated = 0
  46. for i in range(1,len(submissions)):
  47. prev = i - 1
  48. if submissions[prev].time == submissions[i].time:
  49. sameTS.append(i)
  50. else:
  51. if len(sameTS) > 0:
  52. repeated += len(sameTS)
  53. self.spreadTSEvenly(sameTS, submissions)
  54. sameTS = list()
  55. continue
  56. if len(sameTS) > 0:
  57. repeated += len(sameTS)
  58. self.spreadTSEvenly(sameTS, submissions)
  59. # we need to sort
  60. submissions.sort(key=lambda x: x.time)
  61. if repeated > 0:
  62. sub = submissions[0]
  63. print("{0} repeated {1} TS for exercise {2}".format(sub.user_id,repeated,self.exerciseID))
  64. return submissions
  65. def spreadTSEvenly (self, indexes, submissions):
  66. print("repeated ts")
  67. if len(indexes) == 1:
  68. sub = submissions[indexes[0]]
  69. submissions[indexes[0]] = sub._replace(time=sub.time + 30)
  70. else:
  71. if len(indexes) > 2:
  72. print("We have a problem...")
  73. first = indexes[0]
  74. sub = submissions[first]
  75. submissions[first] = sub._replace(time=sub.time + 20)
  76. for i in range(1, len(indexes)):
  77. current = indexes[i]
  78. prev = indexes[i-1]
  79. sub = submissions[current]
  80. submissions[current] = sub._replace(time=submissions[prev].time + 20)
  81. def addData (self, submissionData):
  82. self.data.append(submissionData)
  83. def saveToCSV (self, folder, dataset):
  84. with open("{}/{}.csv".format(folder, self.exerciseID),"w", encoding='utf-8') as file:
  85. fields = self.SubmissionData._fields
  86. translatedFields = [self.headerMap[x] for x in fields]
  87. header = ",".join(translatedFields)
  88. file.write(header)
  89. file.write('\n')
  90. for data in dataset:
  91. lineData = [getattr(data, x) for x in fields]
  92. line = ",".join(str(e) for e in lineData)
  93. file.write(line)
  94. file.write('\n')
  95. file.close()