Update readme file, remove unused files, ...
This commit is contained in:
parent
6c25496a28
commit
044f544a5f
|
@ -0,0 +1,4 @@
|
|||
*.pdf
|
||||
|
||||
# MacOS
|
||||
.DS_Store
|
13
ReadMe.md
13
ReadMe.md
|
@ -2,15 +2,14 @@
|
|||
|
||||
|
||||
|
||||
This application read à ***CSV file*** whitch contain trampoline routines and make some analyses :
|
||||
This application read à ***PDF file*** whitch contain trampoline competitions cores and make some analyses :
|
||||
|
||||
- number of routines,
|
||||
- number of unfinished routines,
|
||||
- number of skills,
|
||||
- number of tucked/picked/straight skills,
|
||||
- number of category,
|
||||
- number of score,
|
||||
- number of mean score by category,
|
||||
- …
|
||||
|
||||
All these results are exported in two CSV files.
|
||||
All these results are exported in an Excel file.
|
||||
|
||||
|
||||
|
||||
|
@ -22,5 +21,5 @@ First, create a virtual env. and install the requirements :
|
|||
|
||||
Then, you can run the application :
|
||||
|
||||
`python3 analyse.py for_exemple.csv`
|
||||
`python3 analyse.py exemple.pdf`
|
||||
|
||||
|
|
|
@ -1,31 +0,0 @@
|
|||
import io
|
||||
|
||||
from pdfminer.converter import TextConverter
|
||||
from pdfminer.pdfinterp import PDFPageInterpreter
|
||||
from pdfminer.pdfinterp import PDFResourceManager
|
||||
from pdfminer.pdfpage import PDFPage
|
||||
|
||||
def extract_text_from_pdf(pdf_path):
|
||||
resource_manager = PDFResourceManager()
|
||||
fake_file_handle = io.StringIO()
|
||||
converter = TextConverter(resource_manager, fake_file_handle, codec='utf-8')
|
||||
page_interpreter = PDFPageInterpreter(resource_manager, converter)
|
||||
|
||||
with open(pdf_path, 'rb') as fh:
|
||||
# page = PDFPage.get_pages(0, caching=True, check_extractable=True)
|
||||
# page_interpreter.process_page(page)
|
||||
for page in PDFPage.get_pages(fh, caching=True, check_extractable=True):
|
||||
page_interpreter.process_page(page)
|
||||
|
||||
text = fake_file_handle.getvalue()
|
||||
|
||||
# close open handles
|
||||
converter.close()
|
||||
fake_file_handle.close()
|
||||
|
||||
if text:
|
||||
return text
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(extract_text_from_pdf('2022_PV2.pdf'))
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
# Filename : analyse.py
|
||||
|
||||
import sys
|
||||
import os.path
|
||||
from os import path
|
||||
import PyPDF2
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
# use clize ?
|
||||
print("Please, give a PDF file.")
|
||||
exit()
|
||||
|
||||
filename = sys.argv[1]
|
||||
if filename[-4:] != ".pdf":
|
||||
print("Must be a PDF file.")
|
||||
exit()
|
||||
|
||||
if not os.path.isfile(filename):
|
||||
print("File does not exist.")
|
||||
exit()
|
||||
|
||||
pdfFileObj = open(filename, 'rb')
|
||||
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
|
||||
|
||||
print(pdfReader.numPages)
|
||||
pageObj = pdfReader.getPage(0)
|
||||
print(pageObj.extractText())
|
||||
|
||||
# pdfFileObj = open('meetingminutes.pdf', 'rb')
|
||||
# pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
|
||||
pdfFileObj.close()
|
|
@ -1,32 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
# Filename : analyse.py
|
||||
|
||||
import sys
|
||||
import os.path
|
||||
from os import path
|
||||
import PyPDF2
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
# use clize ?
|
||||
print("Please, give a PDF file.")
|
||||
exit()
|
||||
|
||||
filename = sys.argv[1]
|
||||
if filename[-4:] != ".pdf":
|
||||
print("Must be a PDF file.")
|
||||
exit()
|
||||
|
||||
if not os.path.isfile(filename):
|
||||
print("File does not exist.")
|
||||
exit()
|
||||
|
||||
pdfReader = PyPDF2.PdfFileReader(open(filename, 'rb'))
|
||||
|
||||
print(pdfReader.numPages)
|
||||
pageObj = pdfReader.getPage(0)
|
||||
print(pageObj.extractText())
|
||||
|
||||
# pdfFileObj = open('meetingminutes.pdf', 'rb')
|
||||
# pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
|
Loading…
Reference in New Issue