diff --git a/.devcontainer/devcontainer.env b/.devcontainer/devcontainer.env new file mode 100644 index 0000000..31bf13c --- /dev/null +++ b/.devcontainer/devcontainer.env @@ -0,0 +1,4 @@ +PORT_PREFIX=${PORT_PREFIX} +CONTAINER_PREFIX=${USER} + +GOOGLE_SHEETS_CLIENT_SECRET=${GOOGLE_SHEETS_CLIENT_SECRET} diff --git a/.devcontainer/python-3.10/devcontainer.json b/.devcontainer/python-3.10/devcontainer.json new file mode 100644 index 0000000..696ea7d --- /dev/null +++ b/.devcontainer/python-3.10/devcontainer.json @@ -0,0 +1,57 @@ +{ + "name": "python-3.10", + "dockerComposeFile": [ + "./docker-compose.yml" + ], + "service": "python-3.10", + "forwardPorts": [], + "shutdownAction": "stopCompose", + "workspaceFolder": "/workspaces/google-sheets", + "remoteEnv": {}, + "features": { + "ghcr.io/devcontainers/features/common-utils:2": { + "installZsh": true, + "installOhMyZsh": true, + "configureZshAsDefaultShell": true, + "username": "vscode", + "userUid": "1000", + "userGid": "1000" + }, + "ghcr.io/devcontainers/features/node:1": {}, + "ghcr.io/devcontainers/features/git:1": { + "version": "latest", + "ppa": true + }, + "ghcr.io/devcontainers/features/git-lfs:1": {}, + "ghcr.io/robbert229/devcontainer-features/postgresql-client:1": {} + }, + "updateContentCommand": "bash .devcontainer/setup.sh", + "postCreateCommand": [], + "customizations": { + "vscode": { + "settings": { + "python.linting.enabled": true, + "python.testing.pytestEnabled": true, + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": "always" + }, + "[python]": { + "editor.defaultFormatter": "ms-python.vscode-pylance" + }, + "editor.rulers": [ + 80 + ] + }, + "extensions": [ + "ms-python.python", + "ms-toolsai.jupyter", + "ms-toolsai.vscode-jupyter-cell-tags", + "ms-toolsai.jupyter-keymap", + "ms-toolsai.jupyter-renderers", + "ms-toolsai.vscode-jupyter-slideshow", + "ms-python.vscode-pylance" + ] + } + } +} diff --git a/.devcontainer/python-3.10/docker-compose.yml b/.devcontainer/python-3.10/docker-compose.yml new file mode 100644 index 0000000..422be1d --- /dev/null +++ b/.devcontainer/python-3.10/docker-compose.yml @@ -0,0 +1,30 @@ +version: '3' + +services: + python-3.10: # nosemgrep + image: mcr.microsoft.com/devcontainers/python:3.10 + container_name: $USER-python-3.10-google-sheets + volumes: + - ../../:/workspaces/google-sheets:cached + command: sleep infinity + environment: + - DATABASE_URL=postgresql://admin:password@${USER}-postgres-py39-google-sheets:5432/google-sheets + env_file: + - ../devcontainer.env + networks: + - google-sheets-network + postgres-google-sheets: # nosemgrep + image: postgres:latest + container_name: $USER-postgres-py39-google-sheets + environment: + POSTGRES_USER: admin + POSTGRES_PASSWORD: password # pragma: allowlist secret + POSTGRES_DB: google-sheets + ports: + - "${PORT_PREFIX}5432:5432" + networks: + - google-sheets-network + +networks: + google-sheets-network: + name: "${USER}-google-sheets-network" diff --git a/.devcontainer/python-3.11/devcontainer.json b/.devcontainer/python-3.11/devcontainer.json new file mode 100644 index 0000000..4511947 --- /dev/null +++ b/.devcontainer/python-3.11/devcontainer.json @@ -0,0 +1,57 @@ +{ + "name": "python-3.11", + "dockerComposeFile": [ + "./docker-compose.yml" + ], + "service": "python-3.11", + "forwardPorts": [], + "shutdownAction": "stopCompose", + "workspaceFolder": "/workspaces/google-sheets", + "remoteEnv": {}, + "features": { + "ghcr.io/devcontainers/features/common-utils:2": { + "installZsh": true, + "installOhMyZsh": true, + "configureZshAsDefaultShell": true, + "username": "vscode", + "userUid": "1000", + "userGid": "1000" + }, + "ghcr.io/devcontainers/features/node:1": {}, + "ghcr.io/devcontainers/features/git:1": { + "version": "latest", + "ppa": true + }, + "ghcr.io/devcontainers/features/git-lfs:1": {}, + "ghcr.io/robbert229/devcontainer-features/postgresql-client:1": {} + }, + "updateContentCommand": "bash .devcontainer/setup.sh", + "postCreateCommand": [], + "customizations": { + "vscode": { + "settings": { + "python.linting.enabled": true, + "python.testing.pytestEnabled": true, + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": "always" + }, + "[python]": { + "editor.defaultFormatter": "ms-python.vscode-pylance" + }, + "editor.rulers": [ + 80 + ] + }, + "extensions": [ + "ms-python.python", + "ms-toolsai.jupyter", + "ms-toolsai.vscode-jupyter-cell-tags", + "ms-toolsai.jupyter-keymap", + "ms-toolsai.jupyter-renderers", + "ms-toolsai.vscode-jupyter-slideshow", + "ms-python.vscode-pylance" + ] + } + } +} diff --git a/.devcontainer/python-3.11/docker-compose.yml b/.devcontainer/python-3.11/docker-compose.yml new file mode 100644 index 0000000..8301642 --- /dev/null +++ b/.devcontainer/python-3.11/docker-compose.yml @@ -0,0 +1,30 @@ +version: '3' + +services: + python-3.11: # nosemgrep + image: mcr.microsoft.com/devcontainers/python:3.11 + container_name: $USER-python-3.11-google-sheets + volumes: + - ../../:/workspaces/google-sheets:cached + command: sleep infinity + environment: + - DATABASE_URL=postgresql://admin:password@${USER}-postgres-py39-google-sheets:5432/google-sheets + env_file: + - ../devcontainer.env + networks: + - google-sheets-network + postgres-google-sheets: # nosemgrep + image: postgres:latest + container_name: $USER-postgres-py39-google-sheets + environment: + POSTGRES_USER: admin + POSTGRES_PASSWORD: password # pragma: allowlist secret + POSTGRES_DB: google-sheets + ports: + - "${PORT_PREFIX}5432:5432" + networks: + - google-sheets-network + +networks: + google-sheets-network: + name: "${USER}-google-sheets-network" diff --git a/.devcontainer/python-3.12/devcontainer.json b/.devcontainer/python-3.12/devcontainer.json new file mode 100644 index 0000000..4f177cd --- /dev/null +++ b/.devcontainer/python-3.12/devcontainer.json @@ -0,0 +1,57 @@ +{ + "name": "python-3.12", + "dockerComposeFile": [ + "./docker-compose.yml" + ], + "service": "python-3.12", + "forwardPorts": [], + "shutdownAction": "stopCompose", + "workspaceFolder": "/workspaces/google-sheets", + "remoteEnv": {}, + "features": { + "ghcr.io/devcontainers/features/common-utils:2": { + "installZsh": true, + "installOhMyZsh": true, + "configureZshAsDefaultShell": true, + "username": "vscode", + "userUid": "1000", + "userGid": "1000" + }, + "ghcr.io/devcontainers/features/node:1": {}, + "ghcr.io/devcontainers/features/git:1": { + "version": "latest", + "ppa": true + }, + "ghcr.io/devcontainers/features/git-lfs:1": {}, + "ghcr.io/robbert229/devcontainer-features/postgresql-client:1": {} + }, + "updateContentCommand": "bash .devcontainer/setup.sh", + "postCreateCommand": [], + "customizations": { + "vscode": { + "settings": { + "python.linting.enabled": true, + "python.testing.pytestEnabled": true, + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": "always" + }, + "[python]": { + "editor.defaultFormatter": "ms-python.vscode-pylance" + }, + "editor.rulers": [ + 80 + ] + }, + "extensions": [ + "ms-python.python", + "ms-toolsai.jupyter", + "ms-toolsai.vscode-jupyter-cell-tags", + "ms-toolsai.jupyter-keymap", + "ms-toolsai.jupyter-renderers", + "ms-toolsai.vscode-jupyter-slideshow", + "ms-python.vscode-pylance" + ] + } + } +} diff --git a/.devcontainer/python-3.12/docker-compose.yml b/.devcontainer/python-3.12/docker-compose.yml new file mode 100644 index 0000000..875bcbe --- /dev/null +++ b/.devcontainer/python-3.12/docker-compose.yml @@ -0,0 +1,30 @@ +version: '3' + +services: + python-3.12: # nosemgrep + image: mcr.microsoft.com/devcontainers/python:3.12 + container_name: $USER-python-3.12-google-sheets + volumes: + - ../../:/workspaces/google-sheets:cached + command: sleep infinity + environment: + - DATABASE_URL=postgresql://admin:password@${USER}-postgres-py39-google-sheets:5432/google-sheets + env_file: + - ../devcontainer.env + networks: + - google-sheets-network + postgres-google-sheets: # nosemgrep + image: postgres:latest + container_name: $USER-postgres-py39-google-sheets + environment: + POSTGRES_USER: admin + POSTGRES_PASSWORD: password # pragma: allowlist secret + POSTGRES_DB: google-sheets + ports: + - "${PORT_PREFIX}5432:5432" + networks: + - google-sheets-network + +networks: + google-sheets-network: + name: "${USER}-google-sheets-network" diff --git a/.devcontainer/python-3.9/devcontainer.json b/.devcontainer/python-3.9/devcontainer.json new file mode 100644 index 0000000..fe45a9e --- /dev/null +++ b/.devcontainer/python-3.9/devcontainer.json @@ -0,0 +1,57 @@ +{ + "name": "python-3.9", + "dockerComposeFile": [ + "./docker-compose.yml" + ], + "service": "python-3.9", + "forwardPorts": [], + "shutdownAction": "stopCompose", + "workspaceFolder": "/workspaces/google-sheets", + "remoteEnv": {}, + "features": { + "ghcr.io/devcontainers/features/common-utils:2": { + "installZsh": true, + "installOhMyZsh": true, + "configureZshAsDefaultShell": true, + "username": "vscode", + "userUid": "1000", + "userGid": "1000" + }, + "ghcr.io/devcontainers/features/node:1": {}, + "ghcr.io/devcontainers/features/git:1": { + "version": "latest", + "ppa": true + }, + "ghcr.io/devcontainers/features/git-lfs:1": {}, + "ghcr.io/robbert229/devcontainer-features/postgresql-client:1": {} + }, + "updateContentCommand": "bash .devcontainer/setup.sh", + "postCreateCommand": [], + "customizations": { + "vscode": { + "settings": { + "python.linting.enabled": true, + "python.testing.pytestEnabled": true, + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": "always" + }, + "[python]": { + "editor.defaultFormatter": "ms-python.vscode-pylance" + }, + "editor.rulers": [ + 80 + ] + }, + "extensions": [ + "ms-python.python", + "ms-toolsai.jupyter", + "ms-toolsai.vscode-jupyter-cell-tags", + "ms-toolsai.jupyter-keymap", + "ms-toolsai.jupyter-renderers", + "ms-toolsai.vscode-jupyter-slideshow", + "ms-python.vscode-pylance" + ] + } + } +} diff --git a/.devcontainer/python-3.9/docker-compose.yml b/.devcontainer/python-3.9/docker-compose.yml new file mode 100644 index 0000000..f1a52b1 --- /dev/null +++ b/.devcontainer/python-3.9/docker-compose.yml @@ -0,0 +1,30 @@ +version: '3' + +services: + python-3.9: # nosemgrep + image: mcr.microsoft.com/devcontainers/python:3.9 + container_name: $USER-python-3.9-google-sheets + volumes: + - ../../:/workspaces/google-sheets:cached + command: sleep infinity + environment: + - DATABASE_URL=postgresql://admin:password@${USER}-postgres-py39-google-sheets:5432/google-sheets + env_file: + - ../devcontainer.env + networks: + - google-sheets-network + postgres-google-sheets: # nosemgrep + image: postgres:latest + container_name: $USER-postgres-py39-google-sheets + environment: + POSTGRES_USER: admin + POSTGRES_PASSWORD: password # pragma: allowlist secret + POSTGRES_DB: google-sheets + ports: + - "${PORT_PREFIX}5432:5432" + networks: + - google-sheets-network + +networks: + google-sheets-network: + name: "${USER}-google-sheets-network" diff --git a/.devcontainer/setup.sh b/.devcontainer/setup.sh new file mode 100644 index 0000000..3a9afaa --- /dev/null +++ b/.devcontainer/setup.sh @@ -0,0 +1,26 @@ +# update pip +pip install --upgrade pip + +# install dev packages +pip install -e ".[dev]" + +# install pre-commit hook if not installed already +pre-commit install + +prisma migrate deploy +prisma generate + +echo '{ + "web": { + "client_id": "1027914582771-g0bcsn4fhd6a59pp3d4n1ntjc03r1k9s.apps.googleusercontent.com", + "project_id": "captn-sheets-dev", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_secret": "'${GOOGLE_SHEETS_CLIENT_SECRET}'", + "redirect_uris": [ + "http://localhost:8000/login/callback" + ] + + } +}' > client_secret.json diff --git a/google_sheets/app.py b/google_sheets/app.py index 2088496..2971840 100644 --- a/google_sheets/app.py +++ b/google_sheets/app.py @@ -1,14 +1,17 @@ import json import logging +from datetime import datetime from os import environ -from typing import Annotated, Dict, List, Union +from typing import Annotated, Any, Dict, List, Optional, Union import httpx -from fastapi import FastAPI, HTTPException, Query, Request, Response, status +import pandas as pd +from fastapi import Body, FastAPI, HTTPException, Query, Response, status from fastapi.responses import RedirectResponse from googleapiclient.errors import HttpError from . import __version__ +from .data_processing import process_data_f, validate_input_data, validate_output_data from .db_helpers import get_db_connection from .google_api import ( build_service, @@ -51,35 +54,53 @@ async def is_authenticated_for_ads(user_id: int) -> bool: # Route 1: Redirect to Google OAuth -@app.get("/login") +@app.get("/login", description="Get the URL to log in with Google") async def get_login_url( - request: Request, - user_id: int = Query(title="User ID"), - force_new_login: bool = Query(title="Force new login", default=False), + user_id: Annotated[ + int, Query(description="The user ID for which the data is requested") + ], + conv_uuid: Annotated[ + Optional[str], Query(description="The conversation UUID") + ] = None, + force_new_login: Annotated[bool, Query(description="Force new login")] = False, ) -> Dict[str, str]: + _check_parameters_are_not_none({"conv_uuid": conv_uuid}) if not force_new_login: is_authenticated = await is_authenticated_for_ads(user_id=user_id) if is_authenticated: return {"login_url": "User is already authenticated"} - google_oauth_url = get_google_oauth_url(user_id) + google_oauth_url = get_google_oauth_url(user_id, conv_uuid) # type: ignore markdown_url = f"To navigate Google Ads waters, I require access to your account. Please [click here]({google_oauth_url}) to grant permission." return {"login_url": markdown_url} -@app.get("/login/success") -async def get_login_success() -> Dict[str, str]: - return {"login_success": "You have successfully logged in"} +def _check_parameters_are_not_none(kwargs: Dict[str, Any]) -> None: + error_message = "The following parameters are required: " + missing_parameters = [key for key, value in kwargs.items() if value is None] + if missing_parameters: + error_message += ", ".join(missing_parameters) + raise HTTPException(status_code=400, detail=error_message) + + +REDIRECT_DOMAIN = environ.get("REDIRECT_DOMAIN", "http://localhost:3000") # Route 2: Save user credentials/token to a JSON file @app.get("/login/callback") async def login_callback( - code: str = Query(title="Authorization Code"), state: str = Query(title="State") + code: Annotated[ + str, + Query(description="The authorization code received after successful login"), + ], + state: Annotated[Optional[str], Query(description="State")] = None, ) -> RedirectResponse: - if not state.isdigit(): + _check_parameters_are_not_none({"state": state}) + user_id_and_chat_uuid = state.split(":") # type: ignore + if not user_id_and_chat_uuid[0].isdigit(): # type: ignore raise HTTPException(status_code=400, detail="User ID must be an integer") - user_id = int(state) + user_id = int(user_id_and_chat_uuid[0]) + chat_uuid = user_id_and_chat_uuid[1] token_request_data = get_token_request_data(code) @@ -115,12 +136,9 @@ async def login_callback( }, ) - # redirect_domain = environ.get("REDIRECT_DOMAIN", "https://captn.ai") - # logged_in_message = "I have successfully logged in" - # redirect_uri = f"{redirect_domain}/chat/{chat_uuid}?msg={logged_in_message}" - # return RedirectResponse(redirect_uri) - # redirect to success page - return RedirectResponse(url=f"{base_url}/login/success") + logged_in_message = "I have successfully logged in" + redirect_uri = f"{REDIRECT_DOMAIN}/chat/{chat_uuid}?msg={logged_in_message}" + return RedirectResponse(redirect_uri) @app.get("/get-sheet", description="Get data from a Google Sheet") @@ -129,22 +147,25 @@ async def get_sheet( int, Query(description="The user ID for which the data is requested") ], spreadsheet_id: Annotated[ - str, Query(description="ID of the Google Sheet to fetch data from") - ], + Optional[str], Query(description="ID of the Google Sheet to fetch data from") + ] = None, title: Annotated[ - str, + Optional[str], Query(description="The title of the sheet to fetch data from"), - ], -) -> Union[str, List[List[str]]]: + ] = None, +) -> Union[str, GoogleSheetValues]: + _check_parameters_are_not_none({"spreadsheet_id": spreadsheet_id, "title": title}) service = await build_service(user_id=user_id, service_name="sheets", version="v4") values = await get_sheet_f( - service=service, spreadsheet_id=spreadsheet_id, range=title + service=service, + spreadsheet_id=spreadsheet_id, # type: ignore + range=title, # type: ignore ) if not values: return "No data found." - return values # type: ignore[no-any-return] + return GoogleSheetValues(values=values) @app.post( @@ -156,22 +177,28 @@ async def update_sheet( int, Query(description="The user ID for which the data is requested") ], spreadsheet_id: Annotated[ - str, Query(description="ID of the Google Sheet to fetch data from") - ], + Optional[str], Query(description="ID of the Google Sheet to fetch data from") + ] = None, title: Annotated[ - str, + Optional[str], Query(description="The title of the sheet to update"), - ], - sheet_values: GoogleSheetValues, + ] = None, + sheet_values: Annotated[ + Optional[GoogleSheetValues], + Body(embed=True, description="Values to be written to the Google Sheet"), + ] = None, ) -> Response: + _check_parameters_are_not_none( + {"spreadsheet_id": spreadsheet_id, "title": title, "sheet_values": sheet_values} + ) service = await build_service(user_id=user_id, service_name="sheets", version="v4") try: await update_sheet_f( service=service, - spreadsheet_id=spreadsheet_id, - range=title, - sheet_values=sheet_values, + spreadsheet_id=spreadsheet_id, # type: ignore + range=title, # type: ignore + sheet_values=sheet_values, # type: ignore ) except HttpError as e: raise HTTPException(status_code=e.status_code, detail=e._get_reason()) from e @@ -195,17 +222,20 @@ async def create_sheet( int, Query(description="The user ID for which the data is requested") ], spreadsheet_id: Annotated[ - str, Query(description="ID of the Google Sheet to fetch data from") - ], + Optional[str], Query(description="ID of the Google Sheet to fetch data from") + ] = None, title: Annotated[ - str, + Optional[str], Query(description="The title of the new sheet"), - ], + ] = None, ) -> Response: + _check_parameters_are_not_none({"spreadsheet_id": spreadsheet_id, "title": title}) service = await build_service(user_id=user_id, service_name="sheets", version="v4") try: await create_sheet_f( - service=service, spreadsheet_id=spreadsheet_id, title=title + service=service, + spreadsheet_id=spreadsheet_id, # type: ignore + title=title, # type: ignore ) except HttpError as e: if ( @@ -250,13 +280,15 @@ async def get_all_sheet_titles( int, Query(description="The user ID for which the data is requested") ], spreadsheet_id: Annotated[ - str, Query(description="ID of the Google Sheet to fetch data from") - ], + Optional[str], Query(description="ID of the Google Sheet to fetch data from") + ] = None, ) -> List[str]: + _check_parameters_are_not_none({"spreadsheet_id": spreadsheet_id}) service = await build_service(user_id=user_id, service_name="sheets", version="v4") try: sheets = await get_all_sheet_titles_f( - service=service, spreadsheet_id=spreadsheet_id + service=service, + spreadsheet_id=spreadsheet_id, # type: ignore ) except HttpError as e: raise HTTPException(status_code=e.status_code, detail=e._get_reason()) from e @@ -265,3 +297,213 @@ async def get_all_sheet_titles( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e) ) from e return sheets + + +NEW_CAMPAIGN_MANDATORY_COLUMNS = ["Country", "Station From", "Station To"] +MANDATORY_AD_TEMPLATE_COLUMNS = [ + "Campaign", + "Ad Group", + "Headline 1", + "Headline 2", + "Headline 3", + "Description Line 1", + "Description Line 2", + "Final Url", +] + +MANDATORY_KEYWORD_TEMPLATE_COLUMNS = [ + "Campaign", + "Ad Group", + "Keyword", + "Criterion Type", + "Max CPC", +] + + +def _validate_target_resource(target_resource: Optional[str]) -> None: + if target_resource not in ["ad", "keyword"]: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="The target resource should be either 'ad' or 'keyword'.", + ) + + +@app.post( + "/process-data", + description="Process data to generate new ads or keywords based on the template", +) +async def process_data( + template_sheet_values: Annotated[ + Optional[GoogleSheetValues], + Body( + embed=True, + description="Template values to be used for generating new ads or keywords", + ), + ] = None, + new_campaign_sheet_values: Annotated[ + Optional[GoogleSheetValues], + Body( + embed=True, + description="New campaign values to be used for generating new ads or keywords", + ), + ] = None, + target_resource: Annotated[ + Optional[str], + Query( + description="The target resource to be updated. This can be 'ad' or 'keyword'" + ), + ] = None, +) -> GoogleSheetValues: + _check_parameters_are_not_none( + { + "template_sheet_values": template_sheet_values, + "new_campaign_sheet_values": new_campaign_sheet_values, + "target_resource": target_resource, + } + ) + _validate_target_resource(target_resource) + if ( + len(template_sheet_values.values) < 2 # type: ignore + or len(new_campaign_sheet_values.values) < 2 # type: ignore + ): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Both template and new campaign data should have at least two rows (header and data).", + ) + try: + template_df = pd.DataFrame( + template_sheet_values.values[1:], # type: ignore + columns=template_sheet_values.values[0], # type: ignore + ) + new_campaign_df = pd.DataFrame( + new_campaign_sheet_values.values[1:], # type: ignore + columns=new_campaign_sheet_values.values[0], # type: ignore + ) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Invalid data format. Please provide data in the correct format: {e}", + ) from e + + validation_error_msg = validate_input_data( + df=new_campaign_df, + mandatory_columns=NEW_CAMPAIGN_MANDATORY_COLUMNS, + name="new campaign", + ) + + if target_resource == "ad": + validation_error_msg += validate_input_data( + df=template_df, + mandatory_columns=MANDATORY_AD_TEMPLATE_COLUMNS, + name="ads template", + ) + else: + validation_error_msg += validate_input_data( + df=template_df, + mandatory_columns=MANDATORY_KEYWORD_TEMPLATE_COLUMNS, + name="keyword template", + ) + if validation_error_msg: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, detail=validation_error_msg + ) + + processed_df = process_data_f(template_df, new_campaign_df) + + validated_df = validate_output_data( + processed_df, + target_resource, # type: ignore + ) + + values = [validated_df.columns.tolist(), *validated_df.values.tolist()] + + return GoogleSheetValues(values=values) + + +@app.post( + "/process-spreadsheet", + description="Process data to generate new ads or keywords based on the template", +) +async def process_spreadsheet( + user_id: Annotated[ + int, Query(description="The user ID for which the data is requested") + ], + template_spreadsheet_id: Annotated[ + Optional[str], + Query(description="ID of the Google Sheet with the template data"), + ] = None, + template_sheet_title: Annotated[ + Optional[str], + Query(description="The title of the sheet with the template data"), + ] = None, + new_campaign_spreadsheet_id: Annotated[ + Optional[str], + Query(description="ID of the Google Sheet with the new campaign data"), + ] = None, + new_campaign_sheet_title: Annotated[ + Optional[str], + Query(description="The title of the sheet with the new campaign data"), + ] = None, + target_resource: Annotated[ + Optional[str], + Query( + description="The target resource to be updated, options: 'ad' or 'keyword'" + ), + ] = None, +) -> str: + _check_parameters_are_not_none( + { + "template_spreadsheet_id": template_spreadsheet_id, + "template_sheet_title": template_sheet_title, + "new_campaign_spreadsheet_id": new_campaign_spreadsheet_id, + "new_campaign_sheet_title": new_campaign_sheet_title, + "target_resource": target_resource, + } + ) + _validate_target_resource(target_resource) + template_values = await get_sheet( + user_id=user_id, + spreadsheet_id=template_spreadsheet_id, + title=template_sheet_title, + ) + new_campaign_values = await get_sheet( + user_id=user_id, + spreadsheet_id=new_campaign_spreadsheet_id, + title=new_campaign_sheet_title, + ) + + if not isinstance(template_values, GoogleSheetValues) or not isinstance( + new_campaign_values, GoogleSheetValues + ): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"""Invalid data format. +template_values: {template_values} + +new_campaign_values: {new_campaign_values} + +Please provide data in the correct format.""", + ) + + processed_values = await process_data( + template_sheet_values=template_values, + new_campaign_sheet_values=new_campaign_values, + target_resource=target_resource, + ) + + title = ( + f"Captn - {target_resource.capitalize()}s {datetime.now():%Y-%m-%d %H:%M:%S}" # type: ignore + ) + await create_sheet( + user_id=user_id, + spreadsheet_id=new_campaign_spreadsheet_id, + title=title, + ) + await update_sheet( + user_id=user_id, + spreadsheet_id=new_campaign_spreadsheet_id, + title=title, + sheet_values=processed_values, + ) + + return f"Sheet with the name 'Captn - {target_resource.capitalize()}s' has been created successfully." # type: ignore diff --git a/google_sheets/data_processing/__init__.py b/google_sheets/data_processing/__init__.py new file mode 100644 index 0000000..03f8bd5 --- /dev/null +++ b/google_sheets/data_processing/__init__.py @@ -0,0 +1,3 @@ +from .processing import process_data_f, validate_input_data, validate_output_data + +__all__ = ["process_data_f", "validate_input_data", "validate_output_data"] diff --git a/google_sheets/data_processing/processing.py b/google_sheets/data_processing/processing.py new file mode 100644 index 0000000..fe50755 --- /dev/null +++ b/google_sheets/data_processing/processing.py @@ -0,0 +1,145 @@ +from typing import List, Literal + +import pandas as pd + +__all__ = ["process_data_f", "validate_input_data", "validate_output_data"] + + +def validate_input_data( + df: pd.DataFrame, mandatory_columns: List[str], name: str +) -> str: + error_msg = "" + if len(df.columns) != len(set(df.columns)): + error_msg = f"""Duplicate columns found in the {name} data. +Please provide unique column names. +""" + if not all(col in df.columns for col in mandatory_columns): + error_msg += f"""Mandatory columns missing in the {name} data. +Please provide the following columns: {mandatory_columns} +""" + if error_msg: + return error_msg + return "" + + +INSERT_STATION_FROM = "INSERT_STATION_FROM" +INSERT_STATION_TO = "INSERT_STATION_TO" + + +def process_data_f( + template_df: pd.DataFrame, new_campaign_df: pd.DataFrame +) -> pd.DataFrame: + final_df = pd.DataFrame(columns=template_df.columns) + for _, template_row in template_df.iterrows(): + for _, new_campaign_row in new_campaign_df.iterrows(): + campaign = f"{new_campaign_row['Country']} - {new_campaign_row['Station From']} - {new_campaign_row['Station To']}" + stations = [ + { + "Station From": new_campaign_row["Station From"], + "Station To": new_campaign_row["Station To"], + }, + # Reverse the order of the stations + { + "Station From": new_campaign_row["Station To"], + "Station To": new_campaign_row["Station From"], + }, + ] + for station in stations: + new_row = template_row.copy() + new_row["Campaign"] = campaign + new_row["Ad Group"] = ( + f"{station['Station From']} - {station['Station To']}" + ) + + # Replace the placeholders in all columns with the actual station names INSERT_STATION_FROM + new_row = new_row.str.replace( + INSERT_STATION_FROM, station["Station From"] + ) + new_row = new_row.str.replace(INSERT_STATION_TO, station["Station To"]) + + final_df = pd.concat( + [final_df, pd.DataFrame([new_row])], ignore_index=True + ) + + return final_df + + +MIN_HEADLINES = 3 +MAX_HEADLINES = 15 +MIN_DESCRIPTIONS = 2 +MAX_DESCRIPTIONS = 4 + +MAX_HEADLINE_LENGTH = 30 +MAX_DESCRIPTION_LENGTH = 90 + + +def _validate_output_data_ad(df: pd.DataFrame) -> pd.DataFrame: # noqa: C901 + df["Issues"] = "" + headline_columns = [col for col in df.columns if "Headline" in col] + description_columns = [col for col in df.columns if "Description" in col] + + for index, row in df.iterrows(): + # Check for duplicate headlines and descriptions + if len(set(row[headline_columns])) != len(row[headline_columns]): + df.loc[index, "Issues"] += "Duplicate headlines found.\n" + if len(set(row[description_columns])) != len(row[description_columns]): + df.loc[index, "Issues"] += "Duplicate descriptions found.\n" + + # Check for the number of headlines and descriptions + headline_count = len( + [headline for headline in row[headline_columns] if headline] + ) + if headline_count < MIN_HEADLINES: + df.loc[index, "Issues"] += ( + f"Minimum {MIN_HEADLINES} headlines are required, found {headline_count}.\n" + ) + elif headline_count > MAX_HEADLINES: + df.loc[index, "Issues"] += ( + f"Maximum {MAX_HEADLINES} headlines are allowed, found {headline_count}.\n" + ) + + description_count = len( + [description for description in row[description_columns] if description] + ) + if description_count < MIN_DESCRIPTIONS: + df.loc[index, "Issues"] += ( + f"Minimum {MIN_DESCRIPTIONS} descriptions are required, found {description_count}.\n" + ) + elif description_count > MAX_DESCRIPTIONS: + df.loc[index, "Issues"] += ( + f"Maximum {MAX_DESCRIPTIONS} descriptions are allowed, found {description_count}.\n" + ) + + # Check for the length of headlines and descriptions + for headline_column in headline_columns: + headline = row[headline_column] + if len(headline) > MAX_HEADLINE_LENGTH: + df.loc[index, "Issues"] += ( + f"Headline length should be less than {MAX_HEADLINE_LENGTH} characters, found {len(headline)} in column {headline_column}.\n" + ) + + for description_column in description_columns: + description = row[description_column] + if len(description) > MAX_DESCRIPTION_LENGTH: + df.loc[index, "Issues"] += ( + f"Description length should be less than {MAX_DESCRIPTION_LENGTH} characters, found {len(description)} in column {description_column}.\n" + ) + + # TODO: Check for the final URL + # if not row["Final URL"]: + # df.loc[index, "Issues"] += "Final URL is missing.\n" + + if not df["Issues"].any(): + df = df.drop(columns=["Issues"]) + + return df + + +def validate_output_data( + df: pd.DataFrame, target_resource: Literal["ad", "keyword"] +) -> pd.DataFrame: + if target_resource == "keyword": + # No validation required for keyword data currently + return df + + return _validate_output_data_ad(df) diff --git a/google_sheets/google_api/oauth_settings.py b/google_sheets/google_api/oauth_settings.py index fc55d69..f0cf8a5 100644 --- a/google_sheets/google_api/oauth_settings.py +++ b/google_sheets/google_api/oauth_settings.py @@ -20,12 +20,13 @@ } -def get_google_oauth_url(user_id: int) -> str: +def get_google_oauth_url(user_id: int, conv_uuid: str) -> str: + state = f"{user_id}:{conv_uuid}" google_oauth_url = ( f"{oauth2_settings['auth_uri']}?client_id={oauth2_settings['clientId']}" f"&redirect_uri={oauth2_settings['redirectUri']}&response_type=code" f"&scope={urllib.parse.quote_plus('email https://www.googleapis.com/auth/spreadsheets https://www.googleapis.com/auth/drive.metadata.readonly')}" - f"&access_type=offline&prompt=consent&state={user_id}" + f"&access_type=offline&prompt=consent&state={state}" ) return google_oauth_url diff --git a/pyproject.toml b/pyproject.toml index 2c9712d..4ff95d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,7 @@ dependencies = [ "prisma==0.13.1", "google-api-python-client==2.133.0", "asyncify==0.10.0", + "pandas==2.2.2" ] [project.optional-dependencies] diff --git a/tests/app/test_app.py b/tests/app/test_app.py index 96d982c..f60b806 100644 --- a/tests/app/test_app.py +++ b/tests/app/test_app.py @@ -1,12 +1,13 @@ -from typing import Optional, Union +from typing import Any, Dict, Optional, Union from unittest.mock import MagicMock, patch import pytest +from fastapi import HTTPException from fastapi.testclient import TestClient from googleapiclient.errors import HttpError -from google_sheets import __version__ as version -from google_sheets.app import app +from google_sheets.app import _check_parameters_are_not_none, app +from google_sheets.model import GoogleSheetValues client = TestClient(app) @@ -17,14 +18,14 @@ def test_get_sheet(self) -> None: "google_sheets.google_api.service._load_user_credentials", return_value={"refresh_token": "abcdf"}, ) as mock_load_user_credentials: - excepted = [ + values = [ ["Campaign", "Ad Group", "Keyword"], ["Campaign A", "Ad group A", "Keyword A"], ["Campaign A", "Ad group A", "Keyword B"], ["Campaign A", "Ad group A", "Keyword C"], ] with patch( - "google_sheets.app.get_sheet_f", return_value=excepted + "google_sheets.app.get_sheet_f", return_value=values ) as mock_get_sheet: response = client.get( "/get-sheet?user_id=123&spreadsheet_id=abc&title=Sheet1" @@ -32,6 +33,8 @@ def test_get_sheet(self) -> None: mock_load_user_credentials.assert_called_once() mock_get_sheet.assert_called_once() assert response.status_code == 200 + + excepted = GoogleSheetValues(values=values).model_dump() assert response.json() == excepted @@ -126,7 +129,9 @@ def test_update_sheet( ) as mock_update_sheet, ): json_data = { - "values": [["Campaign", "Ad Group"], ["Campaign A", "Ad group A"]] + "sheet_values": { + "values": [["Campaign", "Ad Group"], ["Campaign A", "Ad group A"]] + } } response = client.post( "/update-sheet?user_id=123&spreadsheet_id=abc&title=Sheet1", @@ -160,474 +165,146 @@ def test_get_all_file_names(self) -> None: assert response.json() == expected -class TestOpenAPIJSON: - def test_openapi(self) -> None: - expected = { - "openapi": "3.1.0", - "info": {"title": "google-sheets", "version": version}, - "servers": [ - { - "url": "http://localhost:8000", - "description": "Google Sheets app server", - } - ], - "paths": { - "/login": { - "get": { - "summary": "Get Login Url", - "operationId": "get_login_url_login_get", - "parameters": [ - { - "name": "user_id", - "in": "query", - "required": True, - "schema": {"type": "integer", "title": "User ID"}, - }, - { - "name": "force_new_login", - "in": "query", - "required": False, - "schema": { - "type": "boolean", - "title": "Force new login", - "default": False, - }, - }, - ], - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": { - "type": "object", - "additionalProperties": {"type": "string"}, - "title": "Response Get Login Url Login Get", - } - } - }, - }, - "422": { - "description": "Validation Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - }, - }, - }, - } - }, - "/login/success": { - "get": { - "summary": "Get Login Success", - "operationId": "get_login_success_login_success_get", - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": { - "additionalProperties": {"type": "string"}, - "type": "object", - "title": "Response Get Login Success Login Success Get", - } - } - }, - } - }, - } - }, - "/login/callback": { - "get": { - "summary": "Login Callback", - "operationId": "login_callback_login_callback_get", - "parameters": [ - { - "name": "code", - "in": "query", - "required": True, - "schema": { - "type": "string", - "title": "Authorization Code", - }, - }, - { - "name": "state", - "in": "query", - "required": True, - "schema": {"type": "string", "title": "State"}, - }, - ], - "responses": { - "200": { - "description": "Successful Response", - "content": {"application/json": {"schema": {}}}, - }, - "422": { - "description": "Validation Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - }, - }, - }, - } - }, - "/get-sheet": { - "get": { - "summary": "Get Sheet", - "description": "Get data from a Google Sheet", - "operationId": "get_sheet_get_sheet_get", - "parameters": [ - { - "name": "user_id", - "in": "query", - "required": True, - "schema": { - "type": "integer", - "description": "The user ID for which the data is requested", - "title": "User Id", - }, - "description": "The user ID for which the data is requested", - }, - { - "name": "spreadsheet_id", - "in": "query", - "required": True, - "schema": { - "type": "string", - "description": "ID of the Google Sheet to fetch data from", - "title": "Spreadsheet Id", - }, - "description": "ID of the Google Sheet to fetch data from", - }, - { - "name": "title", - "in": "query", - "required": True, - "schema": { - "type": "string", - "description": "The title of the sheet to fetch data from", - "title": "Title", - }, - "description": "The title of the sheet to fetch data from", - }, - ], - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": { - "anyOf": [ - {"type": "string"}, - { - "type": "array", - "items": { - "type": "array", - "items": {"type": "string"}, - }, - }, - ], - "title": "Response Get Sheet Get Sheet Get", - } - } - }, - }, - "422": { - "description": "Validation Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - }, - }, - }, - } - }, - "/update-sheet": { - "post": { - "summary": "Update Sheet", - "description": "Update data in a Google Sheet within the existing spreadsheet", - "operationId": "update_sheet_update_sheet_post", - "parameters": [ - { - "name": "user_id", - "in": "query", - "required": True, - "schema": { - "type": "integer", - "description": "The user ID for which the data is requested", - "title": "User Id", - }, - "description": "The user ID for which the data is requested", - }, - { - "name": "spreadsheet_id", - "in": "query", - "required": True, - "schema": { - "type": "string", - "description": "ID of the Google Sheet to fetch data from", - "title": "Spreadsheet Id", - }, - "description": "ID of the Google Sheet to fetch data from", - }, - { - "name": "title", - "in": "query", - "required": True, - "schema": { - "type": "string", - "description": "The title of the sheet to update", - "title": "Title", - }, - "description": "The title of the sheet to update", - }, +class TestProcessData: + @pytest.mark.parametrize( + ("template_sheet_values", "new_campaign_sheet_values", "status_code", "detail"), + [ + ( + GoogleSheetValues( + values=[ + ["Campaign", "Ad Group", "Keyword"], + ] + ), + GoogleSheetValues( + values=[ + ["Country", "Station From", "Station To"], + ["India", "Delhi", "Mumbai"], + ] + ), + 400, + "Both template and new campaign data should have at least two rows", + ), + ( + GoogleSheetValues( + values=[ + ["Campaign", "Ad Group", "Keyword"], + ["Campaign A", "Ad group A", "Keyword A"], + ] + ), + GoogleSheetValues( + values=[ + ["Country", "Station From", "Station To"], + ["India", "Delhi", "Mumbai"], + ] + ), + 400, + "Mandatory columns missing in the keyword template data.", + ), + ( + GoogleSheetValues( + values=[ + [ + "Campaign", + "Ad Group", + "Keyword", + "Criterion Type", + "Max CPC", ], - "requestBody": { - "required": True, - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/GoogleSheetValues" - } - } - }, - }, - "responses": { - "200": { - "description": "Successful Response", - "content": {"application/json": {"schema": {}}}, - }, - "422": { - "description": "Validation Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - }, - }, - }, - } - }, - "/create-sheet": { - "post": { - "summary": "Create Sheet", - "description": "Create a new Google Sheet within the existing spreadsheet", - "operationId": "create_sheet_create_sheet_post", - "parameters": [ - { - "name": "user_id", - "in": "query", - "required": True, - "schema": { - "type": "integer", - "description": "The user ID for which the data is requested", - "title": "User Id", - }, - "description": "The user ID for which the data is requested", - }, - { - "name": "spreadsheet_id", - "in": "query", - "required": True, - "schema": { - "type": "string", - "description": "ID of the Google Sheet to fetch data from", - "title": "Spreadsheet Id", - }, - "description": "ID of the Google Sheet to fetch data from", - }, - { - "name": "title", - "in": "query", - "required": True, - "schema": { - "type": "string", - "description": "The title of the new sheet", - "title": "Title", - }, - "description": "The title of the new sheet", - }, + ["Campaign A", "Ad group A", "Keyword A", "Exact", "1"], + ] + ), + GoogleSheetValues( + values=[ + ["Country", "Station From", "Station To"], + ["India", "Delhi", "Mumbai"], + ] + ), + 200, + GoogleSheetValues( + values=[ + [ + "Campaign", + "Ad Group", + "Keyword", + "Criterion Type", + "Max CPC", ], - "responses": { - "200": { - "description": "Successful Response", - "content": {"application/json": {"schema": {}}}, - }, - "422": { - "description": "Validation Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - }, - }, - }, - } - }, - "/get-all-file-names": { - "get": { - "summary": "Get All File Names", - "description": "Get all sheets associated with the user", - "operationId": "get_all_file_names_get_all_file_names_get", - "parameters": [ - { - "name": "user_id", - "in": "query", - "required": True, - "schema": { - "type": "integer", - "description": "The user ID for which the data is requested", - "title": "User Id", - }, - "description": "The user ID for which the data is requested", - } + [ + "India - Delhi - Mumbai", + "Delhi - Mumbai", + "Keyword A", + "Exact", + "1", ], - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": { - "type": "object", - "additionalProperties": {"type": "string"}, - "title": "Response Get All File Names Get All File Names Get", - } - } - }, - }, - "422": { - "description": "Validation Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - }, - }, - }, - } - }, - "/get-all-sheet-titles": { - "get": { - "summary": "Get All Sheet Titles", - "description": "Get all sheet titles within a Google Spreadsheet", - "operationId": "get_all_sheet_titles_get_all_sheet_titles_get", - "parameters": [ - { - "name": "user_id", - "in": "query", - "required": True, - "schema": { - "type": "integer", - "description": "The user ID for which the data is requested", - "title": "User Id", - }, - "description": "The user ID for which the data is requested", - }, - { - "name": "spreadsheet_id", - "in": "query", - "required": True, - "schema": { - "type": "string", - "description": "ID of the Google Sheet to fetch data from", - "title": "Spreadsheet Id", - }, - "description": "ID of the Google Sheet to fetch data from", - }, + [ + "India - Delhi - Mumbai", + "Mumbai - Delhi", + "Keyword A", + "Exact", + "1", ], - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": { - "type": "array", - "items": {"type": "string"}, - "title": "Response Get All Sheet Titles Get All Sheet Titles Get", - } - } - }, - }, - "422": { - "description": "Validation Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - }, - }, - }, - } - }, - }, - "components": { - "schemas": { - "GoogleSheetValues": { - "properties": { - "values": { - "items": {"items": {}, "type": "array"}, - "type": "array", - "title": "Values", - "description": "Values to be written to the Google Sheet.", - } - }, - "type": "object", - "required": ["values"], - "title": "GoogleSheetValues", - }, - "HTTPValidationError": { - "properties": { - "detail": { - "items": { - "$ref": "#/components/schemas/ValidationError" - }, - "type": "array", - "title": "Detail", - } - }, - "type": "object", - "title": "HTTPValidationError", - }, - "ValidationError": { - "properties": { - "loc": { - "items": { - "anyOf": [{"type": "string"}, {"type": "integer"}] - }, - "type": "array", - "title": "Location", - }, - "msg": {"type": "string", "title": "Message"}, - "type": {"type": "string", "title": "Error Type"}, - }, - "type": "object", - "required": ["loc", "msg", "type"], - "title": "ValidationError", - }, - } + ], + ), + ), + ], + ) + def test_process_data( + self, + template_sheet_values: GoogleSheetValues, + new_campaign_sheet_values: GoogleSheetValues, + status_code: int, + detail: Union[str, GoogleSheetValues], + ) -> None: + response = client.post( + "/process-data?target_resource=keyword", + json={ + "template_sheet_values": template_sheet_values.model_dump(), + "new_campaign_sheet_values": new_campaign_sheet_values.model_dump(), }, - } + ) + + assert response.status_code == status_code + if isinstance(detail, GoogleSheetValues): + assert response.json() == detail.model_dump() + else: + assert detail in response.json()["detail"] + + +class TestOpenAPIJSON: + def test_openapi(self) -> None: response = client.get("/openapi.json") assert response.status_code == 200 - resp_json = response.json() - assert resp_json == expected + paths = response.json()["paths"] + expected_path_keys = [ + "/login", + "/login/callback", + "/get-sheet", + "/update-sheet", + "/create-sheet", + "/get-all-file-names", + "/get-all-sheet-titles", + "/process-data", + "/process-spreadsheet", + ] + + for key in expected_path_keys: + assert key in paths + + +class TestHelperFunctions: + @pytest.mark.parametrize( + ("endpoint_params", "raises_exception"), + [ + ({"user_id": "123", "spreadsheet_id": "abc", "title": "Sheet1"}, False), + ({"user_id": "123", "spreadsheet_id": "abc", "title": None}, True), + ], + ) + def test_check_parameters_are_not_none( + self, endpoint_params: Dict[str, Any], raises_exception: bool + ) -> None: + if raises_exception: + with pytest.raises(HTTPException): + _check_parameters_are_not_none(endpoint_params) + else: + _check_parameters_are_not_none(endpoint_params) diff --git a/tests/data_processing/test_processing.py b/tests/data_processing/test_processing.py new file mode 100644 index 0000000..089a397 --- /dev/null +++ b/tests/data_processing/test_processing.py @@ -0,0 +1,177 @@ +from typing import List, Optional + +import pandas as pd +import pytest + +from google_sheets.data_processing.processing import ( + process_data_f, + validate_input_data, + validate_output_data, +) + + +@pytest.mark.parametrize( + ("df", "expected"), + [ + ( + pd.DataFrame( + { + "Country": ["USA", "USA"], + "Station From": ["A", "B"], + "Station To": ["B", "A"], + } + ), + "", + ), + ( + pd.DataFrame( + { + "Country": ["USA", "USA"], + "Station From": ["A", "B"], + } + ), + """Mandatory columns missing in the name data. +Please provide the following columns: ['Country', 'Station From', 'Station To'] +""", + ), + ( + pd.DataFrame( + [["USA", "A", "B", "B"], ["USA", "B", "A", "C"]], + columns=["Country", "Station From", "Station To", "Station To"], + ), + """Duplicate columns found in the name data. +Please provide unique column names. +""", + ), + ], +) +def test_validate_input_data(df: pd.DataFrame, expected: str) -> None: + mandatory_columns = ["Country", "Station From", "Station To"] + assert validate_input_data(df, mandatory_columns, "name") == expected + + +@pytest.mark.parametrize( + ("template_df", "new_campaign_df", "expected"), + [ + ( + pd.DataFrame( + { + "Campaign": ["", ""], + "Ad Group": ["", ""], + "Keyword": ["k1", "k2"], + "Max CPC": ["", ""], + } + ), + pd.DataFrame( + { + "Country": ["USA", "USA"], + "Station From": ["A", "B"], + "Station To": ["C", "D"], + } + ), + pd.DataFrame( + { + "Campaign": [ + "USA - A - C", + "USA - A - C", + "USA - B - D", + "USA - B - D", + "USA - A - C", + "USA - A - C", + "USA - B - D", + "USA - B - D", + ], + "Ad Group": [ + "A - C", + "C - A", + "B - D", + "D - B", + "A - C", + "C - A", + "B - D", + "D - B", + ], + "Keyword": ["k1", "k1", "k1", "k1", "k2", "k2", "k2", "k2"], + "Max CPC": ["", "", "", "", "", "", "", ""], + } + ), + ), + ( + pd.DataFrame( + { + "Campaign": ["", ""], + "Ad Group": ["", ""], + "Keyword": ["k1 INSERT_STATION_FROM", "k2"], + "Max CPC": ["", ""], + } + ), + pd.DataFrame( + { + "Country": ["USA", "USA"], + "Station From": ["A", "B"], + "Station To": ["C", "D"], + } + ), + pd.DataFrame( + { + "Campaign": [ + "USA - A - C", + "USA - A - C", + "USA - B - D", + "USA - B - D", + "USA - A - C", + "USA - A - C", + "USA - B - D", + "USA - B - D", + ], + "Ad Group": [ + "A - C", + "C - A", + "B - D", + "D - B", + "A - C", + "C - A", + "B - D", + "D - B", + ], + "Keyword": ["k1 A", "k1 C", "k1 B", "k1 D", "k2", "k2", "k2", "k2"], + "Max CPC": ["", "", "", "", "", "", "", ""], + } + ), + ), + ], +) +def test_process_data_f( + template_df: pd.DataFrame, new_campaign_df: pd.DataFrame, expected: List[List[str]] +) -> None: + process_data_f(template_df, new_campaign_df).equals(expected) + + +@pytest.mark.parametrize( + "issues_column", + [ + [ + "Duplicate headlines found.\nDuplicate descriptions found.\n", + "Duplicate descriptions found.\n", + "", + "Minimum 3 headlines are required, found 2.\nMinimum 2 descriptions are required, found 1.\nHeadline length should be less than 30 characters, found 31 in column Headline 2.\n", + ], + None, + ], +) +def test_validate_output_data(issues_column: Optional[List[str]]) -> None: + df = pd.DataFrame( + { + "Headline 1": ["H1", "H1", "H1", "H1"], + "Headline 2": ["H1", "H2", "H2", ("H" * 31)], + "Headline 3": ["H3", "H3", "H3", ""], + "Description 1": ["D1", "D1", "D2", "D3"], + "Description 2": ["D1", "D1", "D3", ""], + } + ) + result = validate_output_data(df, "ad") + expected = df.copy() + if issues_column: + expected["Issues"] = issues_column + + assert result.equals(expected)