csvParser.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. import pandas as pd
  2. from dateutil.tz import tzoffset
  3. from dateutil.parser import parse
  4. from dateutil.utils import default_tzinfo
  5. class CSVParser:
  6. def __init__ (self, path):
  7. self.df = pd.read_csv(path)
  8. tz = tzoffset(name="saw", offset=-10800)
  9. self.df['time'] = self.df['time'].transform(lambda x: default_tzinfo(parse(x), tz).timestamp())
  10. self.exercises = self.df['exercise_id'].unique().tolist()
  11. self.exercises.sort()
  12. def getSubmissions (self, exercise_id):
  13. dataFrame:pd.DataFrame = self.df
  14. submissionData = dataFrame[dataFrame['exercise_id'] == exercise_id]
  15. if 0.01 in submissionData['grade'].values :
  16. submissionData['grade'] = submissionData['grade'].transform(lambda x: x*10 if x != 1 and not pd.isna(x) else x)
  17. students = submissionData['user_id'].unique().tolist()
  18. students.sort()
  19. return (submissionData, students)
  20. def getStudentFirstInteraction (self, submissionData, studentID):
  21. # filter and sort user_id submission and turn them in tuples
  22. subTuple = submissionData[submissionData['user_id'] == studentID].sort_values(by='time').itertuples(index=False, name="Submission")
  23. subList = list(subTuple)
  24. size = len(subList)
  25. for i in range(size):
  26. if subList[i].action == 'uploaded_submission':
  27. foundPos = self._backtrackViewDescription(subList, i)
  28. if foundPos >= 0:
  29. return subList[foundPos].time
  30. else:
  31. return -1
  32. print("firt intereaction not found: %i"%size)
  33. return -1
  34. def getStudentValidSubmissions (self, submissionData, studentID):
  35. # filter and sort user_id submission and turn them in tuples
  36. filteredDF = submissionData[(submissionData['user_id'] == studentID) & (submissionData['action'] == 'uploaded_submission')].dropna().sort_values(by='time')
  37. filteredDF = filteredDF.drop_duplicates(subset="submission_id")
  38. subList = list(filteredDF.itertuples(index=False, name="Submission"))
  39. return subList
  40. def getStudentLastSubmission (self, submissionData, studentID):
  41. filteredDF = submissionData[(submissionData['user_id'] == studentID) & (submissionData['action'] == 'uploaded_submission')].dropna().sort_values(by='submission_id')
  42. filteredDF = filteredDF.drop_duplicates(subset="submission_id")
  43. subList = list(filteredDF.itertuples(index=False, name="Submission"))
  44. return subList
  45. def _backtrackViewDescription (self, subList, start_pos):
  46. i = start_pos - 1
  47. while (i >= 0):
  48. if subList[i].action == 'view_description':
  49. return i
  50. i = i - 1
  51. print("Backtrack view not found")
  52. return -1