Skip to content

Commit

Permalink
fixup! fixup! Copy PostgresSQL to GCS as CSV
Browse files Browse the repository at this point in the history
  • Loading branch information
mik-laj committed Apr 17, 2024
1 parent be70de3 commit 5eb413c
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 32 deletions.
18 changes: 16 additions & 2 deletions .github/workflows/bi_transfer_pola_backend_to_gcs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ on:
pull_request:
branches: ['master']


permissions:
id-token: write # This is required for requesting the JWT
contents: read # This is required for actions/checkout

env:
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_ORGANIZATION: ${{ github.repository_owner }}
Expand All @@ -34,6 +39,13 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 2
- id: 'auth'
name: 'Authenticate to GCP'
uses: 'google-github-actions/auth@v0.3.1'
with:
# yamllint disable-line rule:line-length
workload_identity_provider: 'projects/354540873199/locations/global/workloadIdentityPools/github-action-pool/providers/github-action-provider'
service_account: 'pola-bi-github-action@pola-bi.iam.gserviceaccount.com'
- name: "Setup Python"
uses: actions/setup-python@v5
with:
Expand All @@ -44,5 +56,7 @@ jobs:
source <(python ./pola-bi/dev.py --environment "prod")
printenv | grep POLA_APP | cut -d "=" -f 2- | xargs -n 1 -I {} echo "::add-mask::{}"
printenv | grep POLA_APP >> $GITHUB_ENV;
- run: ./pola-bi/postgres_to_gcs/postgres_to_csv.py
- run: ls -lah pola-bi/postgres_to_gcs/output
- run: ./pola-bi/postgres_to_gcs/postgres_to_csv.py --output-dir /tmp/csv-files/
- run: gcloud storage ls --recursive 'gs://pola_app_pola_backend_postgres_export/**'
- run: gsutil rsync /tmp/csv-files/ 'gs://pola_app_pola_backend_postgres_export'
- run: gcloud storage ls --recursive 'gs://pola_app_pola_backend_postgres_export/**'
83 changes: 53 additions & 30 deletions pola-bi/postgres_to_gcs/postgres_to_csv.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
import argparse
import logging
import os
import shlex
Expand All @@ -7,6 +8,8 @@

logging.basicConfig(level=logging.INFO)

log = logging.getLogger(__name__)

postgres_docker_image = "postgres:13.1"
pg_host = os.getenv('POLA_APP_HOST')
pg_port = os.getenv('POLA_APP_PORT', '5432')
Expand All @@ -20,38 +23,39 @@


tables_to_export = [
"ai_pics_aiattachment",
"ai_pics_aipics",
"bi_companies_by_query_group",
"bi_companies_with_count_group",
"bi_new_product_by_hour",
"bi_popular_not_verified_products",
"bi_product_by_time",
"bi_queries_by_time",
"bi_queries_stats_intervals",
"bi_stats_queries_uq_users_by_week",
# "ai_pics_aiattachment",
# "ai_pics_aipics",
# "bi_companies_by_query_group",
# "bi_companies_with_count_group",
# "bi_new_product_by_hour",
# "bi_popular_not_verified_products",
# "bi_product_by_time",
# "bi_queries_by_time",
# "bi_queries_stats_intervals",
# "bi_stats_queries_uq_users_by_week",
"company_brand",
"company_company",
"gpc_brick",
"gpc_class",
"gpc_family",
"gpc_segment",
"pola_query",
"pola_searchquery",
"pola_stats",
"product_product",
"report_attachment",
"report_report",
# "gpc_brick",
# "gpc_class",
# "gpc_family",
# "gpc_segment",
# "pola_query",
# "pola_searchquery",
# "pola_stats",
# "product_product",
# "report_attachment",
# "report_report",
]


def execute_verbose(command, **kwargs):
logging.info("Executing command: %s", shlex.join(command))
log.info("Executing command: %s", shlex.join(command))
subprocess.run(command, check=True, **kwargs)


def run_sql(sql_statement):
logging.info("Running SQL statement: %s", sql_statement)
def dump_table(table_name, output_directory):
sql_statement = fr"\COPY (SELECT * FROM {table_name}) TO '/output/{table_name}.csv' WITH CSV HEADER"
log.info("Running SQL statement: %s", sql_statement)
psql_command = [
"docker",
"run",
Expand All @@ -67,11 +71,11 @@ def run_sql(sql_statement):
"-e",
"PGPASSWORD",
"-v",
f"{root_dir}/output:/output",
f"{output_directory}/:/output/",
postgres_docker_image,
"psql",
"-c",
sql,
sql_statement,
]

# Uruchomienie polecenia
Expand All @@ -86,8 +90,27 @@ def run_sql(sql_statement):
execute_verbose(psql_command, env=env)


for no, table_name in enumerate(tables_to_export, start=1):
logging.info("Exporting table %s (%s/%s)", table_name, no, len(tables_to_export))
sql = fr"\COPY (SELECT * FROM {table_name}) TO '/output/{table_name}.csv' WITH CSV HEADER"
run_sql(sql)
logging.info("")
def get_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description='Fetch credentials to database access.')
parser.add_argument('--output-dir', help='Output directory', default=str(root_dir / 'output'))
return parser


def main():
parser = get_parser()
args = parser.parse_args()
output_dir = args.output_dir

execute_verbose(["mkdir", "-p", output_dir])
execute_verbose(['docker', 'pull', postgres_docker_image])

for no, table_name in enumerate(tables_to_export, start=1):
log.info("Exporting table %s (%s/%s)", table_name, no, len(tables_to_export))
dump_table(table_name, output_dir)
logging.info("")

logging.info("Export completed successfully")


if __name__ == "__main__":
main()

0 comments on commit 5eb413c

Please sign in to comment.