Competition_scores_analyses/analyse_scores.py

259 lines
8.5 KiB
Python

from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.pdfpage import PDFPage
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
import io
from openpyxl import Workbook, load_workbook
from openpyxl.styles import Font, numbers
from openpyxl.formatting.rule import ColorScaleRule
from openpyxl.worksheet.table import Table, TableStyleInfo
from string import printable
class RoutineScore:
def __init__(self):
self.execution = 0.000
self.difficulty = 0.000
self.hd = 0.000
self.time_of_flight = 0.000
self.penality = 0.000
self.total = 0.000
def compute_total(self):
self.total = (
self.execution
+ self.difficulty
+ self.hd
+ self.time_of_flight
+ self.penality
)
def display(self):
self.compute_total()
print(
str(self.difficulty)
+ " | "
+ str(self.time_of_flight)
+ " | "
+ str(self.execution)
+ " | "
+ str(self.hd)
+ " | "
+ str(self.penality)
+ " | "
+ str(self.total)
)
class CompetitionScore:
def __init__(self):
self.first_routine = RoutineScore()
self.second_routine = RoutineScore()
self.third_routine = RoutineScore()
self.total_score = 0.000
def compute_total(self):
self.first_routine.compute_total()
self.second_routine.compute_total()
self.third_routine.compute_total()
self.total_score = (
self.first_routine.total
+ self.second_routine.total
+ self.third_routine.total
)
def pdf_to_txt(pdf_file, txt_file):
"""
Récupère tout le contenu possible d'un fichier PDF et le stocke de manière brute dans un
fichier texte.
"""
input_file = open(pdf_file, "rb")
retData = io.StringIO()
resource_manager = PDFResourceManager()
txt_converter = TextConverter(resource_manager, retData, laparams=LAParams())
interpreter = PDFPageInterpreter(resource_manager, txt_converter)
for page in PDFPage.get_pages(input_file):
interpreter.process_page(page)
txt = retData.getvalue()
with open(txt_file, "w") as f:
f.write(txt)
def generate_clean_text_file(text_file):
"""
Génère un nouveau fichier texte, nettoyé, sur base du fichier texte brut (cf. pdf_to_txt())
"""
file = open(text_file, "r")
cleaned_file = open(text_file[:-4] + "_cleaned.txt", "w")
for line in file.readlines():
if line.rstrip():
# On garde les titres
if not "A - " in line and not "B - " in line:
if (
not "" in line
and not "" in line
and not "" in line
and not "" in line
and not "" in line
and not "" in line
and not "" in line
and not "" in line
and not "" in line
and not "" in line
and not "\x00" in line
):
pass
else:
if "REEKS" in line:
pass
elif len(line) >= 5:
# on remplace les caractères illisibles
new_line = line.replace("", "0")
new_line = new_line.replace("", "1")
new_line = new_line.replace("", "2")
new_line = new_line.replace("", "3")
new_line = new_line.replace("", "4")
new_line = new_line.replace("", "5")
new_line = new_line.replace("", "6")
new_line = new_line.replace("", "7")
new_line = new_line.replace("", "8")
new_line = new_line.replace("", "9")
new_line = new_line.replace("\x00", ".")
cleaned_file.write(new_line)
else:
if "TU TRA - Provinciale voorronde" in line:
pass
else:
cleaned_file.write(
line.replace(" JAAR", "").replace(" - ALLROUND", "")
)
return text_file[:-4] + "_cleaned.txt"
def write_header(xl_sheet, cell, text) -> None:
"""
Ecrit un Header dans une feuille Excel
"""
xl_sheet[cell] = text
xl_sheet[cell].font = Font(bold=True)
def generete_excel_file(cleaned_text_file):
"""
Génère un fichier excel sur base du fichier texte nettoyé (cf. generate_clean_text_file()).
"""
cleaned_file = open(cleaned_text_file, "r")
lines = cleaned_file.readlines()
wb = Workbook()
ws_score = wb.active
ws_score.title = "Dummy sheet"
i = 0
score_line = 2
number_of_score = 0
number_of_r1_score = 0
number_of_r2_score = 0
while i < 1000: # len(lines):
print("LINE : " + lines[i])
if "A - " in lines[i] or "B - " in lines[i]:
# Ligne de titre
# On ajoute les formule à la feuille en cours avant de passer à la suivante ?
# Création d'une nouvelle feuille Excell et des headers
ws_score = wb.create_sheet(lines[i].rstrip())
write_header(ws_score, "A1", "Diff")
write_header(ws_score, "B1", "ToF")
write_header(ws_score, "C1", "Exe")
write_header(ws_score, "D1", "HD")
write_header(ws_score, "E1", "Pen")
write_header(ws_score, "F1", "Total")
write_header(ws_score, "H1", "Diff")
write_header(ws_score, "I1", "ToF")
write_header(ws_score, "J1", "Exe")
write_header(ws_score, "K1", "HD")
write_header(ws_score, "L1", "Pen")
write_header(ws_score, "M1", "Total")
write_header(ws_score, "O1", "TOTAL")
number_of_r1_score = 0
number_of_r2_score = 0
i += 1
else:
# Ligne de chiffre
if float(lines[i]) > 22.000:
# Score supérieur à 22 -> sous-total ou total
i += 1
else:
number_of_score += 1
score = RoutineScore()
# print('Lines ' + str(i) + ' to ' + str(i + 5))
score.difficulty = float(lines[i])
score.time_of_flight = float(lines[i + 1])
score.execution = float(lines[i + 2])
score.hd = float(lines[i + 3])
score.penality = float(lines[i + 4])
score.compute_total()
score.display()
if score.difficulty == 0:
index = score_line + number_of_r1_score
ws_score["A" + str(index)] = score.difficulty
ws_score["B" + str(index)] = score.time_of_flight
ws_score["C" + str(index)] = score.execution
ws_score["D" + str(index)] = score.hd
ws_score["E" + str(index)] = score.penality
ws_score["F" + str(index)] = score.total
number_of_r1_score += 1
else:
index = score_line + number_of_r2_score
ws_score["H" + str(index)] = score.difficulty
ws_score["I" + str(index)] = score.time_of_flight
ws_score["J" + str(index)] = score.execution
ws_score["K" + str(index)] = score.hd
ws_score["L" + str(index)] = score.penality
ws_score["M" + str(index)] = score.total
number_of_r2_score += 1
i += 6
if i >= len(lines):
break
wb.save(filename=cleaned_text_file[:-4] + ".xlsx")
cleaned_file.close()
if __name__ == "__main__":
if len(sys.argv) < 2:
# use clize ?
# print("Please, give a XLSX file.")
# exit()
pdf_file = "./examples/2022_PV2.pdf"
# if sys.argv[1][-3:] != ".pdf":
# print("Must be a PDF file.")
# exit()
# pdf_file = sys.argv[1]
# if not os.path.isfile(pdf_file):
# print("File does not exist.")
# exit()
txt_file = "./examples/test.txt" # what ever the name you want enjoy!
pdf_to_txt(pdf_file, txt_file)
cleaned_txt_file = generate_clean_text_file(txt_file)
generete_excel_file(cleaned_txt_file)