Lots of improvement ;-)

This commit is contained in:
Gregory Trullemans 2022-03-08 19:54:16 +01:00
parent a40ca27c24
commit 3c77dc20e4
6 changed files with 184 additions and 49 deletions

Binary file not shown.

View File

@ -2,8 +2,46 @@ from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.pdfpage import PDFPage
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
import io
from openpyxl import Workbook, load_workbook
from openpyxl.styles import Font, numbers
from openpyxl.formatting.rule import ColorScaleRule
from openpyxl.worksheet.table import Table, TableStyleInfo
from string import printable
class RoutineScore:
def __init__(self):
self.execution = 0.000
self.difficulty = 0.000
self.hd = 0.000
self.time_of_flight = 0.000
self.penality = 0.000
self.total = 0.000
def compute_total(self):
self.total = self.execution + self.difficulty + self.hd + self.time_of_flight + self.penality
def display(self):
self.compute_total()
print(str(self.difficulty) + ' | ' + str(self.time_of_flight) + ' | ' + str(self.execution) + ' | ' + str(self.hd) + ' | ' + str(self.penality) + ' | ' + str(self.total))
class CompetitionScore:
def __init__(self):
self.first_routine = RoutineScore()
self.second_routine = RoutineScore()
self.third_routine = RoutineScore()
self.total_score = 0.000
def compute_total(self):
self.first_routine.compute_total()
self.second_routine.compute_total()
self.third_routine.compute_total()
self.total_score = self.first_routine.total + self.second_routine.total + self.third_routine.total
def pdf2txt(inPDFfile, outTXTFile):
inFile = open(inPDFfile, "rb")
@ -20,7 +58,7 @@ def pdf2txt(inPDFfile, outTXTFile):
f.write(txt)
def clean_generated_text_file(text_file):
def generate_clean_text_file(text_file):
file = open(text_file, "r")
cleaned_file = open(text_file[:-4] + "_cleaned.txt", "w")
@ -41,14 +79,11 @@ def clean_generated_text_file(text_file):
and not "" in line
and not "\x00" in line
):
# print("ligne rejetée")
pass
else:
if "REEKS" in line:
# print("ligne rejetée 2")
pass
elif len(line) >= 5:
# print(line)
# on remplace les caractères illisibles
new_line = line.replace("", "0")
new_line = new_line.replace("", "1")
@ -61,22 +96,115 @@ def clean_generated_text_file(text_file):
new_line = new_line.replace("", "8")
new_line = new_line.replace("", "9")
new_line = new_line.replace("\x00", ".")
# for c in new_line:
# print(c)
cleaned_file.write(new_line)
# print(line)
else:
if "TU TRA - Provinciale voorronde" in line:
pass
# elif " - ALLROUND " in line:
# write = False
# elif " - ALLROUND" in line:
# write = True
else:
cleaned_file.write(line)
cleaned_file.write(line.replace(" JAAR", "").replace(" - ALLROUND", ""))
return text_file[:-4] + "_cleaned.txt"
inPDFfile = "2022_PV2.pdf" # your file path
outTXTFile = "test.txt" # what ever the name you want enjoy!
def write_header(xl_sheet, cell, text) -> None:
"""
Ecrit un Header dans une feuille Excel
"""
xl_sheet[cell] = text
xl_sheet[cell].font = Font(bold=True)
def generete_excel_file(cleaned_text_file):
cleaned_file = open(cleaned_text_file, "r")
lines = cleaned_file.readlines()
wb = Workbook()
ws_score = wb.active
ws_score.title = "Dummy sheet"
i = 0
score_line = 2
number_of_score = 0
number_of_r1_score = 0
number_of_r2_score = 0
while i < 1000: # len(lines):
print("LINE : " + lines[i])
if "A - " in lines[i] or "B - " in lines[i]:
# Ligne de titre
# On ajoute les formule à la feuille en cours avant de passer à la suivante ?
# Création d'une nouvelle feuille Excell et des headers
title = lines[i].rstrip()
print('Création de la feuille --||' + title + '||--')
ws_score = wb.create_sheet(title)
write_header(ws_score, "A1", "Diff")
write_header(ws_score, "B1", "ToF")
write_header(ws_score, "C1", "Exe")
write_header(ws_score, "D1", "HD")
write_header(ws_score, "E1", "Pen")
write_header(ws_score, "F1", "Total")
write_header(ws_score, "H1", "Diff")
write_header(ws_score, "I1", "ToF")
write_header(ws_score, "J1", "Exe")
write_header(ws_score, "K1", "HD")
write_header(ws_score, "L1", "Pen")
write_header(ws_score, "M1", "Total")
write_header(ws_score, "O1", "TOTAL")
number_of_r1_score = 0
number_of_r2_score = 0
i += 1
else:
# Ligne de chiffre
if float(lines[i]) > 22.000:
# Score supérieur à 22 -> sous-total ou total
i += 1
else:
number_of_score += 1
score = RoutineScore()
# print('Lines ' + str(i) + ' to ' + str(i + 5))
score.difficulty = float(lines[i])
score.time_of_flight = float(lines[i+1])
score.execution = float(lines[i+2])
score.hd = float(lines[i+3])
score.penality = float(lines[i+4])
score.compute_total()
score.display()
if score.difficulty == 0:
index = score_line + number_of_r1_score
ws_score['A' + str(index)] = score.difficulty
ws_score['B' + str(index)] = score.time_of_flight
ws_score['C' + str(index)] = score.execution
ws_score['D' + str(index)] = score.hd
ws_score['E' + str(index)] = score.penality
ws_score['F' + str(index)] = score.total
number_of_r1_score += 1
else:
index = score_line + number_of_r2_score
ws_score['H' + str(index)] = score.difficulty
ws_score['I' + str(index)] = score.time_of_flight
ws_score['J' + str(index)] = score.execution
ws_score['K' + str(index)] = score.hd
ws_score['L' + str(index)] = score.penality
ws_score['M' + str(index)] = score.total
number_of_r2_score += 1
i += 6
if i >= len(lines):
break
wb.save(filename=cleaned_text_file[:-4] + ".xlsx")
cleaned_file.close()
inPDFfile = "./examples/2022_PV2.pdf" # your file path
outTXTFile = "./examples/test.txt" # what ever the name you want enjoy!
pdf2txt(inPDFfile, outTXTFile)
clean_generated_text_file(outTXTFile)
cleaned_text_file = generate_clean_text_file(outTXTFile)
generete_excel_file(cleaned_text_file)

View File

@ -1,4 +1,4 @@
A - 11 JAAR MEISJES - ALLROUND
A - 11 MEISJES
0.000
12.265
16.700
@ -233,7 +233,7 @@ A - 11 JAAR MEISJES - ALLROUND
0.000
10.798
47.203
A - 11 JAAR MEISJES - ALLROUND ANTWERPEN
A - 11 MEISJES ANTWERPEN
0.000
11.485
12.100
@ -247,7 +247,7 @@ A - 11 JAAR MEISJES - ALLROUND ANTWERPEN
0.000
36.180
69.465
A - 11 JAAR JONGENS - ALLROUND
A - 11 JONGENS
0.000
12.375
14.400
@ -326,7 +326,7 @@ A - 11 JAAR JONGENS - ALLROUND
72.685
72.315
63.535
A - 11 JAAR JONGENS - ALLROUND ANTWERPEN
A - 11 JONGENS ANTWERPEN
0.000
12.375
14.400
@ -340,7 +340,7 @@ A - 11 JAAR JONGENS - ALLROUND ANTWERPEN
0.000
40.242
76.067
A - 12 JAAR MEISJES - ALLROUND
A - 12 MEISJES
0.000
13.130
17.600
@ -484,7 +484,7 @@ A - 12 JAAR MEISJES - ALLROUND
76.390
73.110
71.935
A - 12 JAAR JONGENS - ALLROUND
A - 12 JONGENS
0.000
13.070
18.300
@ -537,7 +537,7 @@ A - 12 JAAR JONGENS - ALLROUND
80.985
77.225
76.585
A - 13-14 JAAR MEISJES - ALLROUND
A - 13-14 MEISJES
0.000
13.550
15.800
@ -733,7 +733,7 @@ A - 13-14 JAAR MEISJES - ALLROUND
61.220
61.050
44.395
A - 13-14 JAAR MEISJES - ALLROUND ANTWERPEN
A - 13-14 MEISJES ANTWERPEN
0.000
12.580
13.400
@ -747,7 +747,7 @@ A - 13-14 JAAR MEISJES - ALLROUND ANTWERPEN
0.000
38.385
73.865
A - 13-14 JAAR JONGENS - ALLROUND
A - 13-14 JONGENS
0.000
13.340
16.100
@ -852,7 +852,7 @@ A - 13-14 JAAR JONGENS - ALLROUND
61.175
51.885
49.535
A - 15-16 JAAR MEISJES - ALLROUND
A - 15-16 MEISJES
0.000
15.765
17.600
@ -970,7 +970,7 @@ A - 15-16 JAAR MEISJES - ALLROUND
0.000
8.430
32.175
A - 15-16 JAAR JONGENS - ALLROUND
A - 15-16 JONGENS
0.000
14.885
17.800
@ -1023,7 +1023,7 @@ A - 15-16 JAAR JONGENS - ALLROUND
86.955
72.600
71.780
A - SENIORES MEISJES - ALLROUND
A - SENIORES MEISJES
11.200
14.750
14.400
@ -1115,7 +1115,7 @@ A - SENIORES MEISJES - ALLROUND
46.290
44.965
41.825
A - SENIORES JONGENS - ALLROUND
A - SENIORES JONGENS
15.600
16.620
15.400
@ -1259,7 +1259,7 @@ A - SENIORES JONGENS - ALLROUND
39.305
17.110
11.220
B - 11 JAAR MEISJES - ALLROUND
B - 11 MEISJES
0.000
11.915
16.300
@ -1600,7 +1600,7 @@ GYM 90 BERINGEN
48.044
47.488
34.389
B - 11 JAAR MEISJES - ALLROUND ANTWERPEN
B - 11 MEISJES ANTWERPEN
0.000
11.545
14.000
@ -1653,7 +1653,7 @@ B - 11 JAAR MEISJES - ALLROUND ANTWERPEN
71.545
66.960
52.275
B - 11 JAAR JONGENS - ALLROUND
B - 11 JONGENS
0.000
10.690
14.600
@ -1745,7 +1745,7 @@ B - 11 JAAR JONGENS - ALLROUND
65.575
57.005
44.605
B - 11 JAAR JONGENS - ALLROUND ANTWERPEN
B - 11 JONGENS ANTWERPEN
0.000
11.075
13.200
@ -1759,7 +1759,7 @@ B - 11 JAAR JONGENS - ALLROUND ANTWERPEN
0.000
39.170
72.145
B - 12 JAAR MEISJES - ALLROUND
B - 12 MEISJES
0.000
12.905
14.900
@ -2204,7 +2204,7 @@ GYM 90 BERINGEN
62.850
59.225
52.400
B - 12 JAAR MEISJES - ALLROUND ANTWERPEN
B - 12 MEISJES ANTWERPEN
0.000
12.700
14.500
@ -2270,7 +2270,7 @@ B - 12 JAAR MEISJES - ALLROUND ANTWERPEN
74.355
73.810
68.425
B - 12 JAAR JONGENS - ALLROUND
B - 12 JONGENS
0.000
11.845
16.600
@ -2362,7 +2362,7 @@ B - 12 JAAR JONGENS - ALLROUND
74.395
63.495
29.640
B - 12 JAAR JONGENS - ALLROUND ANTWERPEN
B - 12 JONGENS ANTWERPEN
0.000
12.110
15.600
@ -2389,7 +2389,7 @@ B - 12 JAAR JONGENS - ALLROUND ANTWERPEN
0.000
19.440
29.640
B - 13 JAAR MEISJES - ALLROUND
B - 13 MEISJES
0.000
12.600
15.400
@ -2611,7 +2611,7 @@ B - 13 JAAR MEISJES - ALLROUND
73.705
73.620
73.085
B - 13 JAAR MEISJES - ALLROUND ANTWERPEN
B - 13 MEISJES ANTWERPEN
0.000
10.295
13.600
@ -2690,7 +2690,7 @@ B - 13 JAAR MEISJES - ALLROUND ANTWERPEN
79.320
77.995
75.865
B - 14 JAAR MEISJES - ALLROUND
B - 14 MEISJES
0.000
13.355
16.700
@ -3133,7 +3133,7 @@ B - 14 JAAR MEISJES - ALLROUND
54.215
51.720
47.330
B - 14 JAAR MEISJES - ALLROUND ANTWERPEN
B - 14 MEISJES ANTWERPEN
0.000
12.280
15.500
@ -3186,7 +3186,7 @@ B - 14 JAAR MEISJES - ALLROUND ANTWERPEN
74.800
72.920
71.385
B - 13-14 JAAR JONGENS - ALLROUND
B - 13-14 JONGENS
0.000
12.750
15.100
@ -3369,7 +3369,7 @@ B - 13-14 JAAR JONGENS - ALLROUND
67.295
61.875
56.080
B - 13-14 JAAR JONGENS - ALLROUND ANTWERPEN
B - 13-14 JONGENS ANTWERPEN
0.000
14.035
13.500
@ -3383,7 +3383,7 @@ B - 13-14 JAAR JONGENS - ALLROUND ANTWERPEN
0.000
37.240
73.875
B - 15-16 JAAR MEISJES - ALLROUND
B - 15-16 MEISJES
0.000
14.160
16.800
@ -3931,7 +3931,7 @@ GYM 90 BERINGEN
52.385
51.910
38.970
B - 15-16 JAAR MEISJES - ALLROUND ANTWERPEN
B - 15-16 MEISJES ANTWERPEN
0.000
12.840
13.900
@ -3958,7 +3958,7 @@ B - 15-16 JAAR MEISJES - ALLROUND ANTWERPEN
0.000
39.795
74.820
B - 15-16 JAAR JONGENS - ALLROUND
B - 15-16 JONGENS
0.000
14.190
16.700
@ -4193,7 +4193,7 @@ B - 15-16 JAAR JONGENS - ALLROUND
0.000
4.460
40.005
B - 17+ JAAR MEISJES - ALLROUND
B - 17+ MEISJES
GYM 90 BERINGEN
1.800
14.375
@ -4513,7 +4513,7 @@ GYM 90 BERINGEN
55.687
46.790
45.235
B - 17+ JAAR MEISJES - ALLROUND ANTWERPEN
B - 17+ MEISJES ANTWERPEN
11.365
0.900
12.905
@ -4574,7 +4574,7 @@ B - 17+ JAAR MEISJES - ALLROUND ANTWERPEN
78.910
69.040
11.365
B - 17+ JAAR JONGENS - ALLROUND
B - 17+ JONGENS
2.300
14.930
17.300

BIN
examples/test_cleaned.xlsx Normal file

Binary file not shown.

View File

@ -1 +1,8 @@
PyPDF2==1.26.0
black==22.1.0
cffi==1.15.0
chardet==4.0.0
cryptography==36.0.1
et-xmlfile==1.1.0
openpyxl==3.0.9
pdfminer.six==20211012
pycparser==2.21