import pandas as pd from dateutil.tz import tzoffset from dateutil.parser import parse from dateutil.utils import default_tzinfo class CSVParser: def __init__ (self, path): self.df = pd.read_csv(path) tz = tzoffset(name="saw", offset=-10800) self.df['time'] = self.df['time'].transform(lambda x: default_tzinfo(parse(x), tz).timestamp()) self.exercises = self.df['exercise_id'].unique().tolist() self.exercises.sort() def getSubmissions (self, exercise_id): dataFrame:pd.DataFrame = self.df submissionData = dataFrame[dataFrame['exercise_id'] == exercise_id] if 0.01 in submissionData['grade'].values : submissionData['grade'] = submissionData['grade'].transform(lambda x: x*10 if x != 1 and not pd.isna(x) else x) students = submissionData['user_id'].unique().tolist() students.sort() return (submissionData, students) def getStudentFirstInteraction (self, submissionData, studentID): # filter and sort user_id submission and turn them in tuples subTuple = submissionData[submissionData['user_id'] == studentID].sort_values(by='time').itertuples(index=False, name="Submission") subList = list(subTuple) size = len(subList) for i in range(size): if subList[i].action == 'uploaded_submission': foundPos = self._backtrackViewDescription(subList, i) if foundPos >= 0: return subList[foundPos].time else: return -1 print("firt intereaction not found: %i"%size) return -1 def getStudentValidSubmissions (self, submissionData, studentID): # filter and sort user_id submission and turn them in tuples filteredDF = submissionData[(submissionData['user_id'] == studentID) & (submissionData['action'] == 'uploaded_submission')].dropna().sort_values(by='time') filteredDF = filteredDF.drop_duplicates(subset="submission_id") subList = list(filteredDF.itertuples(index=False, name="Submission")) return subList def getStudentLastSubmission (self, submissionData, studentID): filteredDF = submissionData[(submissionData['user_id'] == studentID) & (submissionData['action'] == 'uploaded_submission')].dropna().sort_values(by='submission_id') filteredDF = filteredDF.drop_duplicates(subset="submission_id") subList = list(filteredDF.itertuples(index=False, name="Submission")) return subList def _backtrackViewDescription (self, subList, start_pos): i = start_pos - 1 while (i >= 0): if subList[i].action == 'view_description': return i i = i - 1 print("Backtrack view not found") return -1