helpers.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. import csv
  2. import sys
  3. import os
  4. import shutil
  5. import datetime
  6. import tkinter as tk
  7. from tkinter import messagebox
  8. import hashlib
  9. import pickle
  10. import filters.ieeeFilter as IEEE, filters.scopusFilter as Scopus, filters.webScienceFilter as WebOfScience
  11. # titulo, abstract, autores, arquivo&linha, DOI, palavras-chave,palavras-chave extras
  12. FINAL_CSV_HEADERS = ['titulo', 'abstract', 'autores', 'doi', 'keywords','extra_keywords', 'fonte']
  13. def loadFiles (files):
  14. loadedFiles = []
  15. for file in files:
  16. loadedFiles.append((csv.DictReader(open(file['path']), delimiter=file['separator']), file))
  17. return loadedFiles
  18. def saveAcceptedCSV (data):
  19. doBackup('accepted.csv')
  20. file = open('./accepted.csv', mode='w')
  21. headers = data['header']
  22. writer = csv.writer(file, delimiter=',')
  23. writer.writerow(headers)
  24. data.pop('header')
  25. for row in data.values():
  26. writer.writerow(row)
  27. data['header'] = headers
  28. file.flush()
  29. file.close()
  30. def saveRejectedCSV (data):
  31. doBackup('rejected.csv')
  32. file = open('./rejected.csv', mode='w')
  33. headers = data['header']
  34. writer = csv.writer(file, delimiter=',')
  35. writer.writerow(headers)
  36. data.pop('header')
  37. for row in data.values():
  38. writer.writerow(row)
  39. data['header'] = headers
  40. file.flush()
  41. file.close()
  42. def writeFinalLog (data):
  43. file = open("./log.txt", mode='w')
  44. file.write("accepted: {0}\nrejected: {1}\nduplicated: {2}\n".format(data['accepted'],data['rejected'],data['duplicate']))
  45. file.flush()
  46. file.close()
  47. def removeStateFile ():
  48. try:
  49. os.remove('./.csvstate')
  50. except Exception:
  51. pass
  52. def saveState (data):
  53. doBackup(".csvstate")
  54. file = open('./.csvstate', mode='wb')
  55. pickle.dump(data, file)
  56. file.close()
  57. def doBackup (fileName):
  58. filePath = './bak/{0}'.format(fileName)
  59. shutil.copy('./{0}'.format(fileName), "{0}.{1}.bak".format(filePath, datetime.datetime.now()))
  60. def loadState (root):
  61. try:
  62. file = open('./.csvstate', mode='rb')
  63. answer = tk.messagebox.askyesno("State lading","Load last session state?")
  64. if answer:
  65. data = pickle.load(file)
  66. file.close()
  67. return data
  68. except Exception:
  69. pass
  70. return {'file_index':0, 'row_index':0, 'accepted':0, 'rejected':0, 'duplicate': 0}
  71. def loadConfigFile ():
  72. files = list()
  73. try:
  74. file = open('./csvconfig.txt')
  75. rawData = file.read()
  76. lines = rawData.split('\n')
  77. for line in lines:
  78. parameters = line.split(':')
  79. separator = parameters[2].encode('utf-8').decode("unicode_escape")
  80. fileConfig = {'path': parameters[0], 'filter': parameters[1], 'separator': separator}
  81. files.append(fileConfig)
  82. file.close()
  83. return files
  84. except FileNotFoundError:
  85. return list()
  86. def getFilter (file):
  87. if file['filter'] == 'ieee':
  88. return IEEE.IEEEFilter()
  89. elif file['filter'] == 'scopus':
  90. return Scopus.ScopusFilter()
  91. elif file['filter'] == 'webscience':
  92. return WebOfScience.WebScience()
  93. else:
  94. raise Exception('Undefined filter '+file['filter'])
  95. def restoreAcceptedCSV ():
  96. try:
  97. file = open('./accepted.csv')
  98. rows = list(csv.reader(file))
  99. data = {}
  100. if len(rows) == 0:
  101. return data
  102. data['header'] = rows[0]
  103. for i in range(1,len(rows)):
  104. digest = hashlib.sha256(str.encode(rows[i][0])).hexdigest()
  105. data[digest] = rows[i]
  106. return data
  107. except FileNotFoundError:
  108. return {}
  109. def restoreRejectedCSV ():
  110. try:
  111. file = open('./rejected.csv')
  112. rows = list(csv.reader(file))
  113. data = {}
  114. if len(rows) == 0:
  115. return data
  116. data['header'] = rows[0]
  117. for i in range(1,len(rows)):
  118. digest = hashlib.sha256(str.encode(rows[i][0])).hexdigest()
  119. data[digest] = rows[i]
  120. return data
  121. except FileNotFoundError:
  122. return {}