-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTable in PDF to CSV file.py
29 lines (22 loc) · 1023 Bytes
/
Table in PDF to CSV file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import pdfplumber
def extract_table_to_csv(pdf_path, output_csv_path):
"""Extracts table data from a PDF and saves it as a CSV file."""
with pdfplumber.open(pdf_path) as pdf:
# estou considerando que a tabela esta na primeira pagina
page = pdf.pages[0]
tables = page.extract_tables()
if tables:
# extraindo tabela da 1meira pagina
table_data = tables[0]
# convertendo a tabela em um csv
csv_data = "\n".join([",".join(map(str, row)) for row in table_data])
# guardar
with open(output_csv_path, "w", encoding="utf-8") as f:
f.write(csv_data)
print(f"Table data extracted to: {output_csv_path}")
else:
print("No tables found on the page.")
# Substitua pelo caminho real do pdf e pelo caminho do arquivo csv de saída desejado
pdf_file_path = "arq.PDF"
output_csv_path = "extracted_table_data.csv"
extract_table_to_csv(pdf_file_path, output_csv_path)