forked from ronaldokun/anateldb
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(extract): Added new extract.py script
- Adds a new script `extract.py` that encapsulates the data extraction process - Includes functionality to: - Load environment variables - Connect to SQL Server and MongoDB data sources - Extract data from the sources - Save the extracted data to a specified destination folder - Provides a command-line interface using the `typer` library
- Loading branch information
Ronaldo S.A. Batista
committed
Sep 12, 2024
1 parent
d8cf854
commit fd4acc5
Showing
5 changed files
with
129 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import json | ||
import os | ||
import shutil | ||
import warnings | ||
from datetime import datetime | ||
import sys | ||
|
||
import pandas as pd | ||
import typer | ||
from dotenv import find_dotenv, load_dotenv | ||
from fastcore.xtras import Path | ||
|
||
from extracao.stations import Estacoes | ||
|
||
load_dotenv(find_dotenv(), override=True) | ||
warnings.simplefilter('ignore') | ||
|
||
SQLSERVER_PARAMS = dict( | ||
driver=os.environ.get('SQL_DRIVER'), | ||
server=os.environ.get('SQL_SERVER'), | ||
database=os.environ.get('SQL_DATABASE'), | ||
trusted_conn=True, | ||
mult_results=True, | ||
encrypt=False, | ||
timeout=int(os.environ.get('SQL_TIMEOUT')), | ||
) | ||
|
||
if sys.platform in ('linux', 'darwin', 'cygwin'): | ||
SQLSERVER_PARAMS.update( | ||
{ | ||
'trusted_conn': False, | ||
'mult_results': False, | ||
'username': os.environ.get('USERNAME'), | ||
'password': os.environ.get('PASSWORD'), | ||
} | ||
) | ||
|
||
MONGO_URI: str = os.environ.get('MONGO_URI') | ||
|
||
|
||
def get_db( | ||
path: str = os.environ.get('DESTINATION'), # Pasta onde salvar os arquivos", | ||
limit: int = 0, # Número máximo de registros a serem extraídos da cada base MongoDB, 0: sem limite | ||
parallel: bool = True, # Caso verdadeiro efetua as requisições de forma paralela em cada fonte de dados | ||
read_cache: bool = False, # Caso verdadeiro lê os dados já existentes, do contrário efetua a atualização dos dados | ||
reprocess_sources: bool = False, | ||
) -> 'pd.DataFrame': # Retorna o DataFrame com as bases da Anatel e da Aeronáutica | ||
"""Função para encapsular a instância e atualização dos dados""" | ||
import time | ||
|
||
start = time.perf_counter() | ||
data = Estacoes(SQLSERVER_PARAMS, MONGO_URI, limit, parallel, read_cache, reprocess_sources) | ||
data.update() | ||
if path is not None: | ||
if path := Path(path): | ||
path.mkdir(parents=True, exist_ok=True) | ||
print(f'Salvando dados em {path}') | ||
subprocess.run( | ||
['powershell', '-Command', f'"robocopy {data.folder} {path} /E /IS /IT"'], | ||
check=False, | ||
) | ||
print(f'Elapsed time: {time.perf_counter() - start} seconds') | ||
|
||
|
||
if __name__ == '__main__': | ||
typer.run(get_db) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters