259 lines
8.5 KiB
Python
259 lines
8.5 KiB
Python
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
|
from pdfminer.pdfpage import PDFPage
|
|
from pdfminer.converter import TextConverter
|
|
from pdfminer.layout import LAParams
|
|
|
|
import io
|
|
|
|
from openpyxl import Workbook, load_workbook
|
|
from openpyxl.styles import Font, numbers
|
|
from openpyxl.formatting.rule import ColorScaleRule
|
|
from openpyxl.worksheet.table import Table, TableStyleInfo
|
|
|
|
from string import printable
|
|
|
|
|
|
class RoutineScore:
|
|
def __init__(self):
|
|
self.execution = 0.000
|
|
self.difficulty = 0.000
|
|
self.hd = 0.000
|
|
self.time_of_flight = 0.000
|
|
self.penality = 0.000
|
|
self.total = 0.000
|
|
|
|
def compute_total(self):
|
|
self.total = (
|
|
self.execution
|
|
+ self.difficulty
|
|
+ self.hd
|
|
+ self.time_of_flight
|
|
+ self.penality
|
|
)
|
|
|
|
def display(self):
|
|
self.compute_total()
|
|
print(
|
|
str(self.difficulty)
|
|
+ " | "
|
|
+ str(self.time_of_flight)
|
|
+ " | "
|
|
+ str(self.execution)
|
|
+ " | "
|
|
+ str(self.hd)
|
|
+ " | "
|
|
+ str(self.penality)
|
|
+ " | "
|
|
+ str(self.total)
|
|
)
|
|
|
|
|
|
class CompetitionScore:
|
|
def __init__(self):
|
|
self.first_routine = RoutineScore()
|
|
self.second_routine = RoutineScore()
|
|
self.third_routine = RoutineScore()
|
|
self.total_score = 0.000
|
|
|
|
def compute_total(self):
|
|
self.first_routine.compute_total()
|
|
self.second_routine.compute_total()
|
|
self.third_routine.compute_total()
|
|
self.total_score = (
|
|
self.first_routine.total
|
|
+ self.second_routine.total
|
|
+ self.third_routine.total
|
|
)
|
|
|
|
|
|
def pdf_to_txt(pdf_file, txt_file):
|
|
"""
|
|
Récupère tout le contenu possible d'un fichier PDF et le stocke de manière brute dans un
|
|
fichier texte.
|
|
"""
|
|
input_file = open(pdf_file, "rb")
|
|
|
|
retData = io.StringIO()
|
|
resource_manager = PDFResourceManager()
|
|
txt_converter = TextConverter(resource_manager, retData, laparams=LAParams())
|
|
interpreter = PDFPageInterpreter(resource_manager, txt_converter)
|
|
|
|
for page in PDFPage.get_pages(input_file):
|
|
interpreter.process_page(page)
|
|
|
|
txt = retData.getvalue()
|
|
with open(txt_file, "w") as f:
|
|
f.write(txt)
|
|
|
|
|
|
def generate_clean_text_file(text_file):
|
|
"""
|
|
Génère un nouveau fichier texte, nettoyé, sur base du fichier texte brut (cf. pdf_to_txt())
|
|
"""
|
|
file = open(text_file, "r")
|
|
cleaned_file = open(text_file[:-4] + "_cleaned.txt", "w")
|
|
|
|
for line in file.readlines():
|
|
if line.rstrip():
|
|
# On garde les titres
|
|
if not "A - " in line and not "B - " in line:
|
|
if (
|
|
not "" in line
|
|
and not "" in line
|
|
and not "" in line
|
|
and not "" in line
|
|
and not "" in line
|
|
and not "" in line
|
|
and not "" in line
|
|
and not "" in line
|
|
and not "" in line
|
|
and not "" in line
|
|
and not "\x00" in line
|
|
):
|
|
pass
|
|
else:
|
|
if "REEKS" in line:
|
|
pass
|
|
elif len(line) >= 5:
|
|
# on remplace les caractères illisibles
|
|
new_line = line.replace("", "0")
|
|
new_line = new_line.replace("", "1")
|
|
new_line = new_line.replace("", "2")
|
|
new_line = new_line.replace("", "3")
|
|
new_line = new_line.replace("", "4")
|
|
new_line = new_line.replace("", "5")
|
|
new_line = new_line.replace("", "6")
|
|
new_line = new_line.replace("", "7")
|
|
new_line = new_line.replace("", "8")
|
|
new_line = new_line.replace("", "9")
|
|
new_line = new_line.replace("\x00", ".")
|
|
cleaned_file.write(new_line)
|
|
else:
|
|
if "TU TRA - Provinciale voorronde" in line:
|
|
pass
|
|
else:
|
|
cleaned_file.write(
|
|
line.replace(" JAAR", "").replace(" - ALLROUND", "")
|
|
)
|
|
|
|
return text_file[:-4] + "_cleaned.txt"
|
|
|
|
|
|
def write_header(xl_sheet, cell, text) -> None:
|
|
"""
|
|
Ecrit un Header dans une feuille Excel
|
|
"""
|
|
xl_sheet[cell] = text
|
|
xl_sheet[cell].font = Font(bold=True)
|
|
|
|
|
|
def generete_excel_file(cleaned_text_file):
|
|
"""
|
|
Génère un fichier excel sur base du fichier texte nettoyé (cf. generate_clean_text_file()).
|
|
"""
|
|
cleaned_file = open(cleaned_text_file, "r")
|
|
lines = cleaned_file.readlines()
|
|
|
|
wb = Workbook()
|
|
ws_score = wb.active
|
|
ws_score.title = "Dummy sheet"
|
|
|
|
i = 0
|
|
score_line = 2
|
|
number_of_score = 0
|
|
number_of_r1_score = 0
|
|
number_of_r2_score = 0
|
|
|
|
while i < 1000: # len(lines):
|
|
print("LINE : " + lines[i])
|
|
if "A - " in lines[i] or "B - " in lines[i]:
|
|
# Ligne de titre
|
|
# On ajoute les formule à la feuille en cours avant de passer à la suivante ?
|
|
|
|
# Création d'une nouvelle feuille Excell et des headers
|
|
ws_score = wb.create_sheet(lines[i].rstrip())
|
|
write_header(ws_score, "A1", "Diff")
|
|
write_header(ws_score, "B1", "ToF")
|
|
write_header(ws_score, "C1", "Exe")
|
|
write_header(ws_score, "D1", "HD")
|
|
write_header(ws_score, "E1", "Pen")
|
|
write_header(ws_score, "F1", "Total")
|
|
|
|
write_header(ws_score, "H1", "Diff")
|
|
write_header(ws_score, "I1", "ToF")
|
|
write_header(ws_score, "J1", "Exe")
|
|
write_header(ws_score, "K1", "HD")
|
|
write_header(ws_score, "L1", "Pen")
|
|
write_header(ws_score, "M1", "Total")
|
|
|
|
write_header(ws_score, "O1", "TOTAL")
|
|
number_of_r1_score = 0
|
|
number_of_r2_score = 0
|
|
i += 1
|
|
else:
|
|
# Ligne de chiffre
|
|
if float(lines[i]) > 22.000:
|
|
# Score supérieur à 22 -> sous-total ou total
|
|
i += 1
|
|
|
|
else:
|
|
number_of_score += 1
|
|
score = RoutineScore()
|
|
# print('Lines ' + str(i) + ' to ' + str(i + 5))
|
|
score.difficulty = float(lines[i])
|
|
score.time_of_flight = float(lines[i + 1])
|
|
score.execution = float(lines[i + 2])
|
|
score.hd = float(lines[i + 3])
|
|
score.penality = float(lines[i + 4])
|
|
score.compute_total()
|
|
score.display()
|
|
|
|
if score.difficulty == 0:
|
|
index = score_line + number_of_r1_score
|
|
ws_score["A" + str(index)] = score.difficulty
|
|
ws_score["B" + str(index)] = score.time_of_flight
|
|
ws_score["C" + str(index)] = score.execution
|
|
ws_score["D" + str(index)] = score.hd
|
|
ws_score["E" + str(index)] = score.penality
|
|
ws_score["F" + str(index)] = score.total
|
|
number_of_r1_score += 1
|
|
else:
|
|
index = score_line + number_of_r2_score
|
|
ws_score["H" + str(index)] = score.difficulty
|
|
ws_score["I" + str(index)] = score.time_of_flight
|
|
ws_score["J" + str(index)] = score.execution
|
|
ws_score["K" + str(index)] = score.hd
|
|
ws_score["L" + str(index)] = score.penality
|
|
ws_score["M" + str(index)] = score.total
|
|
number_of_r2_score += 1
|
|
|
|
i += 6
|
|
|
|
if i >= len(lines):
|
|
break
|
|
|
|
wb.save(filename=cleaned_text_file[:-4] + ".xlsx")
|
|
cleaned_file.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 2:
|
|
# use clize ?
|
|
# print("Please, give a XLSX file.")
|
|
# exit()
|
|
pdf_file = "./examples/2022_PV2.pdf"
|
|
|
|
# if sys.argv[1][-3:] != ".pdf":
|
|
# print("Must be a PDF file.")
|
|
# exit()
|
|
|
|
# pdf_file = sys.argv[1]
|
|
# if not os.path.isfile(pdf_file):
|
|
# print("File does not exist.")
|
|
# exit()
|
|
|
|
txt_file = "./examples/test.txt" # what ever the name you want enjoy!
|
|
pdf_to_txt(pdf_file, txt_file)
|
|
cleaned_txt_file = generate_clean_text_file(txt_file)
|
|
generete_excel_file(cleaned_txt_file)
|