cjson.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. import sys
  2. import os
  3. sys.path.extend(['.', '..'])
  4. from pycparser import parse_file, c_ast
  5. import queue
  6. from threading import Thread
  7. import re
  8. import copy
  9. import files
  10. import analyser
  11. from submissionFileReader import getSubmissionFile
  12. COMMENT_REGEX = r"(//.*)|(/\*[\w\W\n\r]*?\*/)"
  13. USEFUL_REGEX = r"(//.*)|(/\*[\w\W\n\r]*?\*/)|(^\s*$)|(\{\s*\})|(^\s*\{\s*$)|(^\s*\}\s*$)"
  14. CSV_HEADER = ['assignment_id', 'student_id', 'total_submissions', 'code_lines','total_lines','comments']
  15. OP_HEADER = ['!', '<=', '%', '>=', '++', '+', '*', '-', '/', '<', '--', '&&', 'p++', '\'==\'', 'p--', '!=', '||', '>']
  16. CSV_HEADER.extend(OP_HEADER)
  17. COMMANDS = ['Return', 'For', 'FuncCall', 'Assignment', 'Switch', 'DoWhile', 'While', 'FuncDef', 'If']
  18. CSV_HEADER.extend(COMMANDS)
  19. DECLARATIONS = ['vector_int', 'vector_float', 'matrix_string', 'string', 'int', 'pointer_int', 'float', 'matrix_int', 'pointer_float', 'matrix_float']
  20. CSV_HEADER.extend(DECLARATIONS)
  21. COND_CSV_HEADER = ['assignment_id', 'student_id', 'commands_count', 'cond_type', 'logic_op_count', 'rel_op_count']
  22. COND_CSV_HEADER.extend(OP_HEADER)
  23. FOR_CSV_HEADER = copy.deepcopy(COND_CSV_HEADER)
  24. FOR_CSV_HEADER.extend(['use_assignment','use_next_loop'])
  25. finalDataList = list()
  26. class Worker (Thread):
  27. """Thread executing tasks from a given tasks queue"""
  28. def __init__ (self, tasks):
  29. Thread.__init__(self)
  30. self.tasks = tasks
  31. self.daemon = True
  32. self.start()
  33. def run (self):
  34. while True:
  35. func, args, kargs = self.tasks.get()
  36. try: func(*args, **kargs)
  37. except Exception as e:
  38. print(e)
  39. self.tasks.task_done()
  40. class ThreadPool:
  41. """Pool of threads consuming tasks from a queue"""
  42. def __init__ (self, num_threads):
  43. self.tasks = queue.Queue(num_threads)
  44. for _ in range(num_threads): Worker(self.tasks)
  45. def add_task (self, func, *args, **kargs):
  46. """Add a task to the queue"""
  47. self.tasks.put((func, args, kargs))
  48. def wait_completion (self):
  49. """Wait for completion of all the tasks in the queue"""
  50. self.tasks.join()
  51. def processFile (filename):
  52. if len(filename) == 0:
  53. return list()
  54. ast = parse_file(filename, use_cpp=True,
  55. cpp_path='gcc',
  56. cpp_args=['-E', r'-Iutils/fake_libc_include'])
  57. nodeList = [node for (_, node) in ast.children() if node.__class__.__name__ is 'FuncDef' or node.__class__.__name__ is 'Decl']
  58. return nodeList
  59. def processStudentData (studentData, assignment):
  60. try:
  61. result = processFile(studentData[1])
  62. if len(result) == 0:
  63. return
  64. data = analyser.ASTAnalyser(assignment, studentData[0], result)
  65. data.beginAnalysis()
  66. fileText = open(studentData[1], 'r').read()
  67. totalLines = fileText.count("\n")
  68. totalComments = len(re.findall(COMMENT_REGEX, fileText, re.MULTILINE))
  69. usefulText = re.sub(USEFUL_REGEX, "", fileText, flags=re.MULTILINE)
  70. usefulText = os.linesep.join([s for s in usefulText.splitlines() if s])
  71. usefulLines = usefulText.count("\n")
  72. finalDataList.append((data, studentData[2], totalLines, totalComments, usefulLines))
  73. print("Processing data from student %s at assignment %s \n Commands: %s \n Declarations:\n\tBasic:%s\n\tMatrix:%s\n\tPointers:%s\n\tVectors:%s \n Operators: %s \n Constants: %s \n For data: %s \n Condition data: %s \n Lines:%s, Comments:%s, Useful:%s" % (studentData[0], assignment, data.commandCount, data.declarations, data.declarationsMatrixes, data.declarationsPointers, data.declarationsVectors, data.operatorsCount, data.constantInitCount, data.forCommandStr(), data.conditionCommandStr(), totalLines, totalComments, usefulLines))
  74. except Exception as e:
  75. print("%s! Failed to process file: %s" % (e, studentData[1]))
  76. def loadStudentsFolders (raiz, folder):
  77. userDataPath = os.path.join(raiz, folder)
  78. return files.filesFromFolder(userDataPath, "usersdata")
  79. def loadStudentFiles (raiz, folder, studentsFolders):
  80. studentsData = []
  81. for s in studentsFolders:
  82. path = os.path.join(raiz, folder, "usersdata", s)
  83. totalSub = files.countFolders(path)
  84. finalFolder = files.highestFileName(path)
  85. cFileFolder = os.path.join(path, finalFolder, "submittedfiles")
  86. cFiles = files.getFilesInFolder(cFileFolder, "*[cC]*")
  87. if len(cFiles) == 0:
  88. studentsData.append((s, "", totalSub))
  89. else:
  90. studentsData.append((s, cFiles[0], totalSub))
  91. return studentsData
  92. def loadAssignments (raiz):
  93. assignmentsFolders = files.filesFromFolder(raiz, "")
  94. assignments = {}
  95. for a in assignmentsFolders:
  96. studentsFolders = loadStudentsFolders(raiz, a)
  97. if(len(studentsFolders) == 0):
  98. assignments[a] = []
  99. continue
  100. studentsData = loadStudentFiles(raiz, a, studentsFolders)
  101. assignments[a] = studentsData
  102. return assignments
  103. def initEmptyDict (list):
  104. result = dict()
  105. for k in list:
  106. result[k] = 0
  107. return result
  108. def saveToFile (filePath, data):
  109. file = open(filePath, "w+")
  110. file.write(data)
  111. file.close()
  112. def processDataFromCSV (parser, vplFolder):
  113. assingments = parser.exercises
  114. data = {}
  115. for e in assingments:
  116. (allSubs, students) = parser.getSubmissions(e)
  117. studentData = []
  118. for student in students:
  119. submissions = parser.getStudentValidSubmissions(allSubs, student)
  120. try:
  121. assert len(submissions) > 0
  122. submissions.sort(key = lambda x : x.submission_id)
  123. lastSub = submissions[-1]
  124. content = getSubmissionFile(vplFolder, e, lastSub.submission_id)
  125. studentData.append((student, content, len(submissions)))
  126. except Exception:
  127. studentData.append((student, "", len(submissions)))
  128. data[e] = studentData[:]
  129. pool = ThreadPool(10)
  130. for a in data:
  131. for studentData in data[a]:
  132. pool.add_task(processStudentData, studentData, a)
  133. pool.wait_completion()
  134. mainCSVFile = ""
  135. forCSVFile = ""
  136. condCSVFile = ""
  137. assignmentList = dict()
  138. constantInitCount = dict()
  139. for data in finalDataList:
  140. if data[0].assignment in assignmentList:
  141. assignmentList[data[0].assignment].append(data)
  142. else:
  143. assignmentList[data[0].assignment] = list()
  144. assignmentList[data[0].assignment].append(data)
  145. for assignmentKey in assignmentList:
  146. for studentData in assignmentList[assignmentKey]:
  147. astInfo = studentData[0]
  148. for k in astInfo.constantInitCount:
  149. if k in constantInitCount:
  150. constantInitCount[k] += astInfo.constantInitCount[k]
  151. else:
  152. constantInitCount[k] = astInfo.constantInitCount[k]
  153. studentOpData = initEmptyDict(analyser.VALID_OPS)
  154. for key in astInfo.operatorsCount:
  155. studentOpData[key] = astInfo.operatorsCount[key]
  156. studentCommandData = initEmptyDict(COMMANDS)
  157. for key in astInfo.commandCount:
  158. studentCommandData[key] = astInfo.commandCount[key]
  159. studentDeclarationData = initEmptyDict(DECLARATIONS)
  160. for key in astInfo.declarations:
  161. studentDeclarationData[key] = astInfo.declarations[key]
  162. for key in astInfo.declarationsPointers:
  163. studentDeclarationData["pointer_" + key] = astInfo.declarationsPointers[key]
  164. for key in astInfo.declarationsVectors:
  165. studentDeclarationData["vector_" + key] = astInfo.declarationsVectors[key]
  166. for key in astInfo.declarationsMatrixes:
  167. studentDeclarationData["matrix_" + key] = astInfo.declarationsMatrixes[key]
  168. mainCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, studentData[1], studentData[4], studentData[2], studentData[3])
  169. mainCSVFile += "," + ','.join([str(v) for v in studentOpData.values()])
  170. mainCSVFile += "," + ",".join([str(v) for v in studentCommandData.values()])
  171. mainCSVFile += "," + ",".join([str(v) for v in studentDeclarationData.values()])
  172. mainCSVFile += "\n"
  173. #For_structure.csv
  174. for i in astInfo.forCommandData:
  175. forCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, i.cmdCount, i.condType, i.numLogicOps, i.numRelOps)
  176. opData = initEmptyDict(analyser.VALID_OPS)
  177. for op in i.opList:
  178. opData[op] += 1
  179. forCSVFile += "," + ','.join([str(v) for v in opData.values()])
  180. forCSVFile += ",%s,%s\n" % (i.useAssignment, i.useNext)
  181. #condition_structure.csv
  182. for i in astInfo.conditionCommandData:
  183. condCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, i.cmdCount, i.condType, i.numLogicOps, i.numRelOps)
  184. opData = initEmptyDict(analyser.VALID_OPS)
  185. for op in i.opList:
  186. opData[op] += 1
  187. condCSVFile += "," + ','.join([str(v) for v in opData.values()])
  188. condCSVFile += "\n"
  189. mainCSVFile = ','.join(CSV_HEADER) + '\n' + mainCSVFile
  190. saveToFile("data.csv", mainCSVFile)
  191. forCSVFile = ','.join(FOR_CSV_HEADER) + '\n' + forCSVFile
  192. saveToFile("for_structure.csv", forCSVFile)
  193. condCSVFile = ','.join(COND_CSV_HEADER) + '\n' + condCSVFile
  194. saveToFile("cond_structure.csv", condCSVFile)
  195. constantInitFile = "constant,count\n"
  196. for k in constantInitCount:
  197. constantInitFile += "%s,%s\n" % (k, str(constantInitCount[k]))
  198. saveToFile("const_init.csv", constantInitFile)
  199. print("Entrada: {}, Saida: {}".format(analyser.PRINT_COUNT,analyser.SCAN_COUNT))
  200. #--- run ---#
  201. if __name__ == "__main__":
  202. if len(sys.argv) > 1:
  203. file = sys.argv[1]
  204. if file == "-f" and len(sys.argv) > 2:
  205. print(processFile(sys.argv[2]))
  206. elif file != "-f":
  207. raiz = "./" + sys.argv[1]
  208. data = loadAssignments(raiz)
  209. pool = ThreadPool(10)
  210. for a in data:
  211. for studentData in data[a]:
  212. pool.add_task(processStudentData, studentData, a)
  213. pool.wait_completion()
  214. mainCSVFile = ""
  215. forCSVFile = ""
  216. condCSVFile = ""
  217. assignmentList = dict()
  218. constantInitCount = dict()
  219. for data in finalDataList:
  220. if data[0].assignment in assignmentList:
  221. assignmentList[data[0].assignment].append(data)
  222. else:
  223. assignmentList[data[0].assignment] = list()
  224. assignmentList[data[0].assignment].append(data)
  225. for assignmentKey in assignmentList:
  226. for studentData in assignmentList[assignmentKey]:
  227. astInfo = studentData[0]
  228. for k in astInfo.constantInitCount:
  229. if k in constantInitCount:
  230. constantInitCount[k] += astInfo.constantInitCount[k]
  231. else:
  232. constantInitCount[k] = astInfo.constantInitCount[k]
  233. studentOpData = initEmptyDict(analyser.VALID_OPS)
  234. for key in astInfo.operatorsCount:
  235. studentOpData[key] = astInfo.operatorsCount[key]
  236. studentCommandData = initEmptyDict(COMMANDS)
  237. for key in astInfo.commandCount:
  238. studentCommandData[key] = astInfo.commandCount[key]
  239. studentDeclarationData = initEmptyDict(DECLARATIONS)
  240. for key in astInfo.declarations:
  241. studentDeclarationData[key] = astInfo.declarations[key]
  242. for key in astInfo.declarationsPointers:
  243. studentDeclarationData["pointer_" + key] = astInfo.declarationsPointers[key]
  244. for key in astInfo.declarationsVectors:
  245. studentDeclarationData["vector_" + key] = astInfo.declarationsVectors[key]
  246. for key in astInfo.declarationsMatrixes:
  247. studentDeclarationData["matrix_" + key] = astInfo.declarationsMatrixes[key]
  248. mainCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, studentData[1], studentData[4], studentData[2], studentData[3])
  249. mainCSVFile += "," + ','.join([str(v) for v in studentOpData.values()])
  250. mainCSVFile += "," + ",".join([str(v) for v in studentCommandData.values()])
  251. mainCSVFile += "," + ",".join([str(v) for v in studentDeclarationData.values()])
  252. mainCSVFile += "\n"
  253. #For_structure.csv
  254. for i in astInfo.forCommandData:
  255. forCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, i.cmdCount, i.condType, i.numLogicOps, i.numRelOps)
  256. opData = initEmptyDict(analyser.VALID_OPS)
  257. for op in i.opList:
  258. opData[op] += 1
  259. forCSVFile += "," + ','.join([str(v) for v in opData.values()])
  260. forCSVFile += ",%s,%s\n" % (i.useAssignment, i.useNext)
  261. #condition_structure.csv
  262. for i in astInfo.conditionCommandData:
  263. condCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, i.cmdCount, i.condType, i.numLogicOps, i.numRelOps)
  264. opData = initEmptyDict(analyser.VALID_OPS)
  265. for op in i.opList:
  266. opData[op] += 1
  267. condCSVFile += "," + ','.join([str(v) for v in opData.values()])
  268. condCSVFile += "\n"
  269. mainCSVFile = ','.join(CSV_HEADER) + '\n' + mainCSVFile
  270. saveToFile("data.csv", mainCSVFile)
  271. forCSVFile = ','.join(FOR_CSV_HEADER) + '\n' + forCSVFile
  272. saveToFile("for_structure.csv", forCSVFile)
  273. condCSVFile = ','.join(COND_CSV_HEADER) + '\n' + condCSVFile
  274. saveToFile("cond_structure.csv", condCSVFile)
  275. constantInitFile = "constant,count\n"
  276. for k in constantInitCount:
  277. constantInitFile += "%s,%s\n" % (k, str(constantInitCount[k]))
  278. saveToFile("const_init.csv", constantInitFile)
  279. else:
  280. print("cjson -f file | cjon folder/")
  281. else:
  282. print("cjson -f file | cjon folder/")