Sfoglia il codice sorgente

Implement loop command counts and conditional structures csv files

Implement constants initial values csv file
Lucas de Souza 5 anni fa
parent
commit
115ccfaa7e
3 ha cambiato i file con 94 aggiunte e 29 eliminazioni
  1. 26 17
      analyser.py
  2. 55 12
      cjson.py
  3. 13 0
      cleanData.py

+ 26 - 17
analyser.py

@@ -1,7 +1,7 @@
 from __future__ import print_function
 
 import os
-import copy;
+import copy
 from pycparser import c_ast
 
 INT_TYPES = ['int', 'short', 'float']
@@ -46,7 +46,8 @@ class DeclarationInfo:
     return self.dimensions > 0
 
 class CommandInfo:
-  def __init__ (self, condType, numLogicOps, numRelOps, opList):
+  def __init__ (self, cmdCount, condType, numLogicOps, numRelOps, opList):
+    self.cmdCount = cmdCount
     self.condType = condType
     self.numLogicOps = numLogicOps
     self.numRelOps = numRelOps
@@ -56,8 +57,8 @@ class CommandInfo:
     return "Type:%s LogicOpCount:%d RelOpCount:%d OpList:%s"%( self.condType, self.numLogicOps, self.numRelOps, self.opList)
 
 class ForCommandInfo (CommandInfo):
-  def __init__ (self, useAssignment, useNext, condType, numLogicOps, numRelOps, opList):
-    CommandInfo.__init__(self, condType, numLogicOps, numRelOps, opList)
+  def __init__ (self, useAssignment, useNext, cmdCount, condType, numLogicOps, numRelOps, opList):
+    CommandInfo.__init__(self, cmdCount, condType, numLogicOps, numRelOps, opList)
     self.useAssignment = useAssignment
     self.useNext = useNext
   
@@ -80,6 +81,7 @@ class ASTAnalyser:
     self.logicOpCount = 0
     self.relOpCount = 0
     self.opList = list()
+    self.cmdCountStack = [0]
     #helpers />
     self.declarations = dict()
     self.declarationsPointers = dict()
@@ -207,12 +209,13 @@ class ASTAnalyser:
     opList = copy.deepcopy(self.opList)
     cmdList = node.stmt
     cmdName = cmdList.__class__.__name__
+    self.cmdCountStack.append(0)
     if cmdName == 'Compound':
       for cmd in cmdList:
         self.proccessCommand(cmd)
     else:
       self.proccessCommand(cmdList)
-    self.conditionCommandData.append(CommandInfo(condType,logicCount, relCount, opList))
+    self.conditionCommandData.append(CommandInfo(self.cmdCountStack.pop(), condType,logicCount, relCount, opList))
   
   def proccessFor (self, node):
     name = node.__class__.__name__
@@ -237,6 +240,7 @@ class ASTAnalyser:
       if node.next.__class__.__name__ != 'FuncCall':
         self.countOperators(node.next)
 
+    self.cmdCountStack.append(0)
     cmdList = node.stmt
     cmdName = cmdList.__class__.__name__
     if cmdName == 'Compound':
@@ -244,7 +248,7 @@ class ASTAnalyser:
         self.proccessCommand(cmd)
     elif name != 'NoneType':
       self.proccessCommand(cmdList)
-    self.forCommandData.append(ForCommandInfo(hasInit, hasNext, condType, logicCount, relCount, opList))
+    self.forCommandData.append(ForCommandInfo(hasInit, hasNext, self.cmdCountStack.pop(), condType, logicCount, relCount, opList))
   
   def proccessIf (self, node):
     name = node.__class__.__name__
@@ -258,7 +262,7 @@ class ASTAnalyser:
     logicCount = self.logicOpCount
     relCount = self.relOpCount
     opList = copy.deepcopy(self.opList)
-
+    self.cmdCountStack.append(0)
     iftrue = node.iftrue
     ifCompound = iftrue.__class__.__name__
     if ifCompound == 'Compound':
@@ -276,7 +280,7 @@ class ASTAnalyser:
         self.proccessCommand(cmd)
     elif name != 'NoneType':
       self.proccessCommand(iffalse)
-    self.conditionCommandData.append(CommandInfo(condType, logicCount, relCount, opList))
+    self.conditionCommandData.append(CommandInfo(self.cmdCountStack.pop(), condType, logicCount, relCount, opList))
   
   def proccessCase (self, node):
     try:
@@ -303,8 +307,9 @@ class ASTAnalyser:
       self.countOperators(expr.expr)
     elif name == 'BinaryOp':
       op = expr.op
-      if op in VALID_OPS:
-        self.incOpCount(op)
+      if not op in VALID_OPS:
+        return
+      self.incOpCount(op)
       if op in VALID_LOGIC_OPS:
         self.logicOpCount += 1
       elif op in VALID_REL_OPS:
@@ -336,28 +341,31 @@ class ASTAnalyser:
     if name == 'BinaryOp':
       op = expr.op
       if op in VALID_LOGIC_OPS:
-        return 'LogicOp'
+        return 'logic_expression'
       elif op in VALID_REL_OPS:
-        return 'RelationalOp'
+        return 'relational_expression'
       else:
-        return 'OtherOp'
+        return 'other_expression'
     elif name == 'UnaryOp':
       op = expr.op
       if op in VALID_LOGIC_OPS:
-        return 'LogicOp'
+        return 'logic_expression'
       else:
-        return 'OtherOp'
+        return 'other_expression'
     elif name == 'Cast':
       return self.checkCondType(expr.expr)
     else:
-      return 'BooleanValue'
+      return 'boolean_value'
 
   def proccessCommand (self, node):
     name = node.__class__.__name__
+    self.cmdCountStack[-1] += 1
     if name == 'Decl':
       self.proccessDecl(node)
+      self.cmdCountStack[-1] -= 1
     elif name == 'FuncDef':
       self.proccessFuncDef(node)
+      self.cmdCountStack[-1] -= 1
     elif name == 'FuncCall':
       self.proccessFuncCall(node)
     elif name == 'Assignment':
@@ -368,6 +376,7 @@ class ASTAnalyser:
       self.proccessSwitch(node)
     elif name == 'Case' or name == 'Default':
       self.proccessCase(node)
+      self.cmdCountStack[-1] -= 1
     elif name == 'If':
       self.proccessIf(node)
     elif name == 'DoWhile' or name == 'While':
@@ -375,5 +384,5 @@ class ASTAnalyser:
     elif name == 'For':
       self.proccessFor(node)
     else:
-      pass
+      self.cmdCountStack[-1] -= 1
       #print("Unknown Command: %s" % name )

+ 55 - 12
cjson.py

@@ -9,6 +9,7 @@ from Queue import Queue
 from threading import Thread
 
 import re
+import copy
 
 import files
 import analyser
@@ -17,12 +18,19 @@ COMMENT_REGEX = r"(//.*)|(/\*[\w\W\n\r]*?\*/)"
 USEFUL_REGEX = r"(//.*)|(/\*[\w\W\n\r]*?\*/)|(^\s*$)|(\{\s*\})|(^\s*\{\s*$)|(^\s*\}\s*$)"
 
 CSV_HEADER = ['assignment_id', 'student_id', 'total_submissions', 'code_lines','total_lines','comments']
-CSV_HEADER.extend(['!', '<=', '%', '>=', '++', '+', '*', '-', '/', '<', '--', '&&', 'p++', '\'==\'', 'p--', '!=', '||', '>'])
+OP_HEADER = ['!', '<=', '%', '>=', '++', '+', '*', '-', '/', '<', '--', '&&', 'p++', '\'==\'', 'p--', '!=', '||', '>']
+CSV_HEADER.extend(OP_HEADER)
 COMMANDS = ['Return', 'For', 'FuncCall', 'Assignment', 'Switch', 'DoWhile', 'While', 'FuncDef', 'If']
 CSV_HEADER.extend(COMMANDS)
 DECLARATIONS = ['vector_int', 'vector_float', 'matrix_string', 'string', 'int', 'pointer_int', 'float', 'matrix_int', 'pointer_float', 'matrix_float']
 CSV_HEADER.extend(DECLARATIONS)
 
+COND_CSV_HEADER = ['assignment_id', 'student_id', 'commands_count', 'cond_type', 'logic_op_count', 'rel_op_count']
+COND_CSV_HEADER.extend(OP_HEADER)
+
+FOR_CSV_HEADER = copy.deepcopy(COND_CSV_HEADER)
+FOR_CSV_HEADER.extend(['use_assignment','use_next_loop'])
+
 finalDataList = list()
 
 class Worker (Thread):
@@ -118,6 +126,11 @@ def initEmptyDict (list):
         result[k] = 0
     return result
 
+def saveToFile (filePath, data):
+    file = open(filePath, "w+")
+    file.write(data)
+    file.close()
+
 #--- run ---#
 if __name__ == "__main__":
     if len(sys.argv) > 1:
@@ -132,8 +145,11 @@ if __name__ == "__main__":
                 for studentData in data[a]:
                     pool.add_task(processStudentData, studentData, a)
             pool.wait_completion()
-            csvfile = ""
+            mainCSVFile = ""
+            forCSVFile = ""
+            condCSVFile = ""
             assignmentList = dict()
+            constantInitCount = dict()
             for data in finalDataList:
                 if data[0].assignment in assignmentList:
                     assignmentList[data[0].assignment].append(data)
@@ -143,6 +159,11 @@ if __name__ == "__main__":
             for assignmentKey in assignmentList:
                 for studentData in assignmentList[assignmentKey]:
                     astInfo = studentData[0]
+                    for k in astInfo.constantInitCount:
+                        if k in constantInitCount:
+                            constantInitCount[k] += astInfo.constantInitCount[k]
+                        else:
+                            constantInitCount[k] = astInfo.constantInitCount[k]
                     studentOpData = initEmptyDict(analyser.VALID_OPS)
                     for key in astInfo.operatorsCount:
                         studentOpData[key] = astInfo.operatorsCount[key]
@@ -158,16 +179,38 @@ if __name__ == "__main__":
                         studentDeclarationData["vector_" + key] = astInfo.declarationsVectors[key]
                     for key in astInfo.declarationsMatrixes:
                         studentDeclarationData["matrix_" + key] = astInfo.declarationsMatrixes[key]
-                    csvfile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, studentData[1], studentData[4], studentData[2], studentData[3])
-                    csvfile += "," + ','.join([str(v) for v in studentOpData.values()])
-                    csvfile += "," + ",".join([str(v) for v in studentCommandData.values()])
-                    csvfile += "," + ",".join([str(v) for v in studentDeclarationData.values()])
-                    csvfile += "\n"
-            csvfile = ','.join(CSV_HEADER) + '\n' + csvfile
-            file = open("data.csv", "w+")
-            file.write(csvfile)
-            file.close()
+                    mainCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, studentData[1], studentData[4], studentData[2], studentData[3])
+                    mainCSVFile += "," + ','.join([str(v) for v in studentOpData.values()])
+                    mainCSVFile += "," + ",".join([str(v) for v in studentCommandData.values()])
+                    mainCSVFile += "," + ",".join([str(v) for v in studentDeclarationData.values()])
+                    mainCSVFile += "\n"
+                    #For_structure.csv
+                    for i in astInfo.forCommandData:
+                        forCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, i.cmdCount, i.condType, i.numLogicOps, i.numRelOps)
+                        opData = initEmptyDict(analyser.VALID_OPS)
+                        for op in i.opList:
+                            opData[op] += 1
+                        forCSVFile += "," + ','.join([str(v) for v in opData.values()])
+                        forCSVFile += ",%s,%s\n" % (i.useAssignment, i.useNext)
+                    #condition_structure.csv
+                    for i in astInfo.conditionCommandData:
+                        condCSVFile += "%s,%s,%s,%s,%s,%s" % (assignmentKey, astInfo.student, i.cmdCount, i.condType, i.numLogicOps, i.numRelOps)
+                        opData = initEmptyDict(analyser.VALID_OPS)
+                        for op in i.opList:
+                            opData[op] += 1
+                        condCSVFile += "," + ','.join([str(v) for v in opData.values()])
+                        condCSVFile += "\n"
+            mainCSVFile = ','.join(CSV_HEADER) + '\n' + mainCSVFile
+            saveToFile("data.csv", mainCSVFile)
+            forCSVFile = ','.join(FOR_CSV_HEADER) + '\n' + forCSVFile
+            saveToFile("for_structure.csv", forCSVFile)
+            condCSVFile = ','.join(COND_CSV_HEADER) + '\n' + condCSVFile
+            saveToFile("cond_structure.csv", condCSVFile)
+            constantInitFile = "constant,count\n"
+            for k in constantInitCount:
+                constantInitFile += "%s,%s\n" % (k, str(constantInitCount[k]))
+            saveToFile("const_init.csv", constantInitFile) 
         else:
             print("cjson -f file | cjon folder/")    
     else:
-        print("cjson -f file | cjon folder/")
+        print("cjson -f file | cjon folder/")

+ 13 - 0
cleanData.py

@@ -0,0 +1,13 @@
+import pandas as pd
+
+data = pd.read_csv("data.csv")
+data = data.query("not student_id in [3375,3486,3382,3369,1767,1760,3]")
+data.to_csv("clean_data.csv")
+
+data = pd.read_csv("cond_structure.csv")
+data = data.query("not student_id in [3375,3486,3382,3369,1767,1760,3]")
+data.to_csv("clean_cond_structure.csv")
+
+data = pd.read_csv("for_structure.csv")
+data = data.query("not student_id in [3375,3486,3382,3369,1767,1760,3]")
+data.to_csv("clean_for_structure.csv")