|
@@ -12,6 +12,7 @@ csvFiles = None
|
|
|
state = None
|
|
|
acceptedDataCSV = None
|
|
|
rejectedDataCSV = None
|
|
|
+nextCount = 0
|
|
|
|
|
|
def findDuplicate (hashDict, title):
|
|
|
if title in hashDict:
|
|
@@ -27,7 +28,7 @@ def acceptPaper (file, csvRow, index):
|
|
|
digest = hashlib.sha256(str.encode(fileFilter.getTitle(csvRow))).hexdigest()
|
|
|
duplicate = findDuplicate(rejectedDataCSV, digest) or findDuplicate(acceptedDataCSV, digest)
|
|
|
if duplicate:
|
|
|
- state['duplihttps://mail.google.com/mail/u/0/#inboxcate'] += 1
|
|
|
+ state['duplicate'] += 1
|
|
|
else:
|
|
|
data.append("{0} - {1}".format(file['path'],index))
|
|
|
acceptedDataCSV[digest] = data
|
|
@@ -69,7 +70,6 @@ def acceptButtonHandler (*arg):
|
|
|
nextPaper()
|
|
|
return
|
|
|
|
|
|
-
|
|
|
def updateAndClear (textField, *args):
|
|
|
textField.config(state=NORMAL)
|
|
|
textField.delete('1.0', END)
|
|
@@ -95,7 +95,6 @@ def loadFiles ():
|
|
|
if len(rejectedDataCSV) == 0:
|
|
|
rejectedDataCSV['header'] = helpers.FINAL_CSV_HEADERS
|
|
|
csvFiles = [(list(f[0]), f[1]) for f in helpers.loadFiles(filesInfo)]
|
|
|
- setPaperFromState()
|
|
|
|
|
|
def setPaperFromState ():
|
|
|
global titleLabel
|
|
@@ -121,6 +120,7 @@ def nextPaper ():
|
|
|
global titleLabel
|
|
|
global text
|
|
|
global root
|
|
|
+ global nextCount
|
|
|
fileIndex = state['file_index']
|
|
|
if fileIndex >= len(filesInfo):
|
|
|
helpers.removeStateFile()
|
|
@@ -136,6 +136,9 @@ def nextPaper ():
|
|
|
state['row_index'] = -1
|
|
|
nextPaper()
|
|
|
else:
|
|
|
+ nextCount += 1
|
|
|
+ if nextCount%10 == 0:
|
|
|
+ saveCurrentData()
|
|
|
file = fileList[index]
|
|
|
fileFilter = helpers.getFilter(csvFiles[fileIndex][1])
|
|
|
title = fileFilter.getTitle(file)
|
|
@@ -144,15 +147,47 @@ def nextPaper ():
|
|
|
text.set(abstract)
|
|
|
|
|
|
def onClosing ():
|
|
|
- global state
|
|
|
global root
|
|
|
+ saveCurrentData()
|
|
|
+ root.destroy()
|
|
|
+
|
|
|
+def saveCurrentData ():
|
|
|
+ global state
|
|
|
global acceptedDataCSV
|
|
|
global rejectedDataCSV
|
|
|
helpers.saveState(state)
|
|
|
helpers.saveAcceptedCSV(acceptedDataCSV)
|
|
|
helpers.saveRejectedCSV(rejectedDataCSV)
|
|
|
- root.destroy()
|
|
|
|
|
|
+def updateDup ():
|
|
|
+ global acceptedDataCSV
|
|
|
+ global rejectedDataCSV
|
|
|
+ global state
|
|
|
+ tempA = {}
|
|
|
+ tempR = {}
|
|
|
+ dataR = list(rejectedDataCSV.values())
|
|
|
+ for i in range(1,len(dataR)):
|
|
|
+ digest = hashlib.sha256(str.encode(dataR[i][0].lower())).hexdigest()
|
|
|
+ if digest in tempR:
|
|
|
+ print("Found duplicate in rejected: {0}".format(dataR[i][0]))
|
|
|
+ state['duplicate'] += 1
|
|
|
+ else:
|
|
|
+ tempR[digest] = dataR[i]
|
|
|
+ dataA = list(acceptedDataCSV.values())
|
|
|
+ for i in range(1, len(dataA)):
|
|
|
+ digest = hashlib.sha256(str.encode(dataA[i][0].lower())).hexdigest()
|
|
|
+ if digest in tempA or digest in tempR:
|
|
|
+ print("Found duplicate in accepted: {0}".format(dataA[i][0]))
|
|
|
+ state['duplicate'] += 1
|
|
|
+ else:
|
|
|
+ tempA[digest] = dataA[i]
|
|
|
+ state['accepted'] = len(tempA)
|
|
|
+ state['rejected'] = len(tempR)
|
|
|
+ tempA['header'] = dataA[0]
|
|
|
+ tempR['header'] = dataR[0]
|
|
|
+ acceptedDataCSV = tempA
|
|
|
+ rejectedDataCSV = tempR
|
|
|
+ saveCurrentData()
|
|
|
|
|
|
def main ():
|
|
|
global root
|
|
@@ -177,7 +212,13 @@ def main ():
|
|
|
text.trace('w', lambda *arg: updateAndClear(textField))
|
|
|
textField.config(state=DISABLED)
|
|
|
root.protocol("WM_DELETE_WINDOW", onClosing)
|
|
|
+ setPaperFromState()
|
|
|
+ #updateDup()
|
|
|
+ print("Duplicate count {0}".format(state['duplicate']))
|
|
|
+ print("Accepted count {0}".format(state['accepted']))
|
|
|
+ print("Rejected count {0}".format(state['rejected']))
|
|
|
root.mainloop()
|
|
|
|
|
|
-loadFiles()
|
|
|
-main()
|
|
|
+if __name__ == '__main__':
|
|
|
+ loadFiles()
|
|
|
+ main()
|