submissionFileReader.py 3.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. import sys
  2. from math import trunc
  3. import os
  4. import tempfile
  5. import glob
  6. from ccleaner import CodeCleaner
  7. from pycleaner import CodeCleaner as PyCleaner
  8. sys.path.extend(['.', '..'])
  9. import re
  10. import subprocess
  11. import numpy as np
  12. USEFUL_REGEX = r"(//.*)|(/\*[\w\W\n\r]*?\*/)|(^\s*$)|(\{\s*\})|(^\s*\{\s*$)|(^\s*\}\s*$)"
  13. FILE_REGEX = re.compile(r".*\.(py|c)$",re.IGNORECASE)
  14. #VAR_NO_INIT = re.compile(r"(int|float|double|char|long|string)\s*\*?([a-zA-Z0-9_]+)\s*(,\s*[a-zA-Z0-9)]+\s*)*;")
  15. #VAR_INIT = re.compile(r"(int|float|double|char|long|string)\s*\*?([a-zA-Z0-9_]+)[^=]*=\s*[^,](,\s*[a-zA-Z0-9)][^=]*=\s*[^,])*;")
  16. def readSubmissionContent (path, exerciseID, submissionID):
  17. filePath = "%s%s/%s/submittedfiles/*.*"%(path, trunc(exerciseID), trunc(submissionID))
  18. files = glob.glob(filePath)
  19. files = [f for f in files if FILE_REGEX.match(f) != None]
  20. #assert len(files) > 0, "Evaluated code for exercise {}, submission {} doesn't have a single submitted .c file at {}".format(exerciseID, submissionID, filePath)
  21. if len(files) == 0:
  22. print("Evaluated code for exercise {}, submission {} doesn't have a single submitted .c/.py file at {}".format(exerciseID, submissionID, filePath))
  23. return ""
  24. # check type
  25. file = files[0]
  26. content = open(file, "r").read()
  27. try:
  28. if file.endswith("py") :
  29. #python
  30. return minifyPython(content)
  31. else:
  32. #c code
  33. return minifyC(content)
  34. except Exception as e:
  35. print(e)
  36. return ""
  37. def minifyC (text):
  38. usefulText = CodeCleaner().cleanCode(text)
  39. #print(usefulText)
  40. out = ""
  41. with tempfile.NamedTemporaryFile() as fp:
  42. fp.write(usefulText.encode("utf-8"))
  43. fp.flush()
  44. process = subprocess.run("./cminify.sh {}".format(fp.name),shell=True, capture_output=True, text=True)
  45. out = process.stdout
  46. fp.close()
  47. return out
  48. def minifyPython (text):
  49. usefulText = PyCleaner().cleanCode(text)
  50. return usefulText
  51. def getSubmissionFile (path, exerciseID, submissionID):
  52. filePath = "%s%s/%s/submittedfiles/*.*"%(path, trunc(exerciseID), trunc(submissionID))
  53. files = glob.glob(filePath)
  54. files = [f for f in files if FILE_REGEX.match(f) != None]
  55. assert len(files) > 0, "Evaluated code for exercise {}, submission {} doesn't have a single submitted .c file at {}".format(exerciseID, submissionID, filePath)
  56. return files[0]
  57. def getUsefulLines (path, exerciseID, submissionID):
  58. subFile = getSubmissionFile(path, exerciseID, submissionID)
  59. usefulLines = 0
  60. with open(subFile, "r") as fileHandler:
  61. content = fileHandler.read()
  62. fileHandler.close()
  63. usefulText = re.sub(USEFUL_REGEX, "", content, flags=re.MULTILINE)
  64. usefulText = os.linesep.join([s for s in usefulText.splitlines() if s])
  65. usefulLines = usefulText.count("\n")
  66. return usefulLines
  67. def levenshteinDistance (seq1, seq2):
  68. size_x = len(seq1) + 1
  69. size_y = len(seq2) + 1
  70. matrix = np.zeros ((size_x, size_y))
  71. for x in range(size_x):
  72. matrix [x, 0] = x
  73. for y in range(size_y):
  74. matrix [0, y] = y
  75. for x in range(1, size_x):
  76. for y in range(1, size_y):
  77. if seq1[x-1] == seq2[y-1]:
  78. matrix [x,y] = min(
  79. matrix[x-1, y] + 1,
  80. matrix[x-1, y-1],
  81. matrix[x, y-1] + 1
  82. )
  83. else:
  84. matrix [x,y] = min(
  85. matrix[x-1,y] + 1,
  86. matrix[x-1,y-1] + 1,
  87. matrix[x,y-1] + 1
  88. )
  89. return (matrix[size_x - 1, size_y - 1])