-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add initdb feature for local geocoding
- Loading branch information
Showing
3 changed files
with
110 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import pandas as pd | ||
import sqlite3 | ||
|
||
|
||
def import_csv_to_sqlite(csv_file_path, sqlite_db_path, table_name, separator, chunksize=10000, verbose=True): | ||
""" | ||
Import a large CSV file into a SQLite database in chunks. | ||
Parameters: | ||
- csv_file_path (str): The file path of the CSV file to import. | ||
- sqlite_db_path (str): The file path of the SQLite database. | ||
- table_name (str): The name of the table to insert the data into. | ||
- separator (str): The delimiter to use for separating entries in the CSV file. | ||
- chunksize (int): The number of rows per chunk to process at a time. A larger chunksize can | ||
be faster for writing data, but it may also consume more memory. | ||
Notes: | ||
- This function assumes that the SQLite database and table already exist. | ||
- The function appends each chunk of data to the specified table. If the table does not exist, | ||
pandas will create it based on the DataFrame's schema. | ||
- It is recommended to ensure that the table schema in SQLite matches the CSV file structure. | ||
- In case of large CSV files, the 'chunksize' parameter can be adjusted to avoid memory issues. | ||
""" | ||
# Create a connection to the SQLite database | ||
conn = sqlite3.connect(sqlite_db_path) | ||
if verbose: | ||
print(f"[+] Importing csv_file_path into SQLite database {sqlite_db_path}...") | ||
|
||
# Iterate over the CSV file in chunks | ||
for chunk in pd.read_csv(csv_file_path, chunksize=chunksize, sep=separator): | ||
# Append each chunk to the specified table in the SQLite database | ||
chunk.to_sql(name=table_name, con=conn, if_exists='replace', index=False) | ||
|
||
if verbose: | ||
print(f"[+] Database {sqlite_db_path} with table {table_name} created succesfully !") | ||
|
||
# Close the connection to the SQLite database | ||
conn.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import gzip | ||
import shutil | ||
|
||
import requests | ||
|
||
|
||
def uncompress_gz_to_csv(gz_file_path, csv_file_path, verbose): | ||
""" | ||
Uncompress a .gz file and write the contents to a .csv file. | ||
Parameters: | ||
- gz_file_path (str): The file path of the .gz file to uncompress. | ||
- csv_file_path (str): The file path where the .csv will be saved. | ||
""" | ||
if verbose: | ||
print(f"[+] Uncompressing {gz_file_path} to {csv_file_path}") | ||
with gzip.open(gz_file_path, 'rb') as f_in: | ||
with open(csv_file_path, 'wb') as f_out: | ||
shutil.copyfileobj(f_in, f_out) | ||
|
||
|
||
def download_csv(url, output_path, verbose): | ||
""" | ||
Download a CSV file from a given URL and save it to the specified path. | ||
Parameters: | ||
- url (str): The URL of the CSV file to download. | ||
- output_path (str): The file path where the CSV will be saved. | ||
""" | ||
if verbose: | ||
print(f"[+] Downloading BAN datasheet from {url}") | ||
|
||
response = requests.get(url) | ||
|
||
# Check if the request was successful | ||
if response.status_code == 200: | ||
# Write the content of the response to a file | ||
with open(output_path, 'wb') as file: | ||
file.write(response.content) | ||
if verbose: | ||
print(f"[+] File downloaded successfully: {output_path}") | ||
else: | ||
print(f"[!] Failed to download CSV file. HTTP Status Code: {response.status_code}") |