123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135 |
- import csv
- import sys
- import os
- import shutil
- import datetime
- import tkinter as tk
- from tkinter import messagebox
- import hashlib
- import pickle
- import filters.ieeeFilter as IEEE, filters.scopusFilter as Scopus, filters.webScienceFilter as WebOfScience
- # titulo, abstract, autores, arquivo&linha, DOI, palavras-chave,palavras-chave extras
- FINAL_CSV_HEADERS = ['titulo', 'abstract', 'autores', 'doi', 'keywords','extra_keywords', 'fonte']
- def loadFiles (files):
- loadedFiles = []
- for file in files:
- loadedFiles.append((csv.DictReader(open(file['path']), delimiter=file['separator']), file))
- return loadedFiles
- def saveAcceptedCSV (data):
- doBackup('accepted.csv')
- file = open('./accepted.csv', mode='w')
- headers = data['header']
- writer = csv.writer(file, delimiter=',')
- writer.writerow(headers)
- data.pop('header')
- for row in data.values():
- writer.writerow(row)
- data['header'] = headers
- file.flush()
- file.close()
- def saveRejectedCSV (data):
- doBackup('rejected.csv')
- file = open('./rejected.csv', mode='w')
- headers = data['header']
- writer = csv.writer(file, delimiter=',')
- writer.writerow(headers)
- data.pop('header')
- for row in data.values():
- writer.writerow(row)
- data['header'] = headers
- file.flush()
- file.close()
- def writeFinalLog (data):
- file = open("./log.txt", mode='w')
- file.write("accepted: {0}\nrejected: {1}\nduplicated: {2}\n".format(data['accepted'],data['rejected'],data['duplicate']))
- file.flush()
- file.close()
- def removeStateFile ():
- try:
- os.remove('./.csvstate')
- except Exception:
- pass
- def saveState (data):
- doBackup(".csvstate")
- file = open('./.csvstate', mode='wb')
- pickle.dump(data, file)
- file.close()
- def doBackup (fileName):
- filePath = './bak/{0}'.format(fileName)
- shutil.copy('./{0}'.format(fileName), "{0}.{1}.bak".format(filePath, datetime.datetime.now()))
- def loadState (root):
- try:
- file = open('./.csvstate', mode='rb')
- answer = tk.messagebox.askyesno("State lading","Load last session state?")
- if answer:
- data = pickle.load(file)
- file.close()
- return data
- except Exception:
- pass
- return {'file_index':0, 'row_index':0, 'accepted':0, 'rejected':0, 'duplicate': 0}
- def loadConfigFile ():
- files = list()
- try:
- file = open('./csvconfig.txt')
- rawData = file.read()
- lines = rawData.split('\n')
- for line in lines:
- parameters = line.split(':')
- separator = parameters[2].encode('utf-8').decode("unicode_escape")
- fileConfig = {'path': parameters[0], 'filter': parameters[1], 'separator': separator}
- files.append(fileConfig)
- file.close()
- return files
- except FileNotFoundError:
- return list()
- def getFilter (file):
- if file['filter'] == 'ieee':
- return IEEE.IEEEFilter()
- elif file['filter'] == 'scopus':
- return Scopus.ScopusFilter()
- elif file['filter'] == 'webscience':
- return WebOfScience.WebScience()
- else:
- raise Exception('Undefined filter '+file['filter'])
- def restoreAcceptedCSV ():
- try:
- file = open('./accepted.csv')
- rows = list(csv.reader(file))
- data = {}
- if len(rows) == 0:
- return data
- data['header'] = rows[0]
- for i in range(1,len(rows)):
- digest = hashlib.sha256(str.encode(rows[i][0])).hexdigest()
- data[digest] = rows[i]
- return data
- except FileNotFoundError:
- return {}
- def restoreRejectedCSV ():
- try:
- file = open('./rejected.csv')
- rows = list(csv.reader(file))
- data = {}
- if len(rows) == 0:
- return data
- data['header'] = rows[0]
- for i in range(1,len(rows)):
- digest = hashlib.sha256(str.encode(rows[i][0])).hexdigest()
- data[digest] = rows[i]
- return data
- except FileNotFoundError:
- return {}
|