Merge pull request #30 from artefactory/dev

NCK v1.1
artefactory · Jun 15, 2020 · 5957f7e · 5957f7e
2 parents 897c362 + 53b0cab
commit 5957f7e
Show file tree

Hide file tree

Showing 15 changed files with 1,691 additions and 395 deletions.
diff --git a/.env b/.env
@@ -1,4 +1,4 @@
 PROJECT_ID=artefact-docker-containers
-DOCKER_IMAGE=nautilus-connector-kit
-DOCKER_TAG=1.4.0
+DOCKER_IMAGE=nautilus-connectors-kit-dev
+DOCKER_TAG=v1.1
 DOCKER_REGISTRY=eu.gcr.io
diff --git a/.github/workflows/buildtogcp.yml b/.github/workflows/buildtogcp.yml
@@ -34,13 +34,12 @@ on:
 
 # Environment variables available to all jobs and steps in this workflow
 env:
-  GCP_PROJECT: ${{ secrets.GCP_PROJECT }}
   GCP_EMAIL: ${{ secrets.GCP_EMAIL }}
   PROJECT_ID: ${{ secrets.PROJECT_ID }}
   DOCKER_TAG: ${{ github.run_id }}
   DOCKER_REGISTRY: ${{ secrets.DOCKER_REGISTRY }}
   DOCKER_IMAGE: ${{ secrets.DOCKER_IMAGE }}-${{ github.ref }}
-
+  CLOUDSDK_PYTHON_SITEPACKAGES: 1
 
 jobs:
   setup-build-publish:
@@ -55,7 +54,7 @@ jobs:
     # Setup gcloud CLI
     - uses: GoogleCloudPlatform/github-actions/setup-gcloud@master
       with:
-        version: '270.0.0'
+        version: '290.0.1'
         service_account_email: ${{ secrets.GCP_EMAIL }}
         service_account_key: ${{ secrets.GCP_KEY }}
 

diff --git a/README.md b/README.md
@@ -6,23 +6,25 @@ Nautilus connectors kit is a tool which aim is getting raw data from different s
 
 ### Readers
 
-- Google DoubleClick Manager (DBM / DV360)
-- Google Campaign Manager (CM / DCM)
-- Google Search Ads 360 (SA360)
+- Adobe Analytics 1.4
+- Adobe Analytics 2.0
+- Amazon S3
+- Facebook Marketing
+- Google Ads
 - Google Analytics
-- Google Search Console
-- Google Sheets
 - Google Cloud Storage
-- Google Adwords
+- Google Campaign Manager
+- Google Display & Video 360
+- Google Search Ads 360
 - Google Search Console
-- Facebook Business Manager
-- Amazon S3
+- Google Sheets
 - Oracle
-- SalesForce
 - MySQL
 - Radarly
-- Adobe Analytics 1.4
-- Yandex
+- SalesForce
+- Twitter Ads
+- Yandex Campaign
+- Yandex Statistics
 
 ### Writers
 
@@ -97,4 +99,4 @@ It is advised to do the following in a virtual env
 
 * https://manikos.github.io/a-tour-on-python-packaging
 * http://lucumr.pocoo.org/2014/1/27/python-on-wheels/
-* https://pip.readthedocs.io/en/1.4.1/cookbook.html#controlling-setup-requires
+* https://pip.readthedocs.io/en/1.4.1/cookbook.html#controlling-setup-requires
diff --git a/nck/clients/adobe_client.py b/nck/clients/adobe_client.py
@@ -0,0 +1,81 @@
+# GNU Lesser General Public License v3.0 only
+# Copyright (C) 2020 Artefact
+# licence-information@artefact.com
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 3 of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+import logging
+from datetime import datetime, timedelta
+import requests
+import jwt
+from tenacity import retry, wait_exponential, stop_after_delay
+
+IMS_HOST = "ims-na1.adobelogin.com"
+IMS_EXCHANGE = "https://ims-na1.adobelogin.com/ims/exchange/jwt"
+
+logging.basicConfig(level="INFO")
+logger = logging.getLogger()
+
+
+class AdobeClient:
+    """
+    Create an Adobe Client for JWT Authentification.
+    Doc: https://github.com/AdobeDocs/adobeio-auth/blob/stage/JWT/JWT.md
+    Most of the code is taken from this repo:
+    https://github.com/AdobeDocs/analytics-2.0-apis/tree/master/examples/jwt/python
+    """
+
+    def __init__(self, client_id, client_secret, tech_account_id, org_id, private_key):
+        self.client_id = client_id
+        self.client_secret = client_secret
+        self.tech_account_id = tech_account_id
+        self.org_id = org_id
+        self.private_key = private_key
+
+        # Creating jwt_token attribute
+        logging.info("Getting jwt_token.")
+        self.jwt_token = jwt.encode(
+            {
+                "exp": datetime.utcnow() + timedelta(seconds=30),
+                "iss": self.org_id,
+                "sub": self.tech_account_id,
+                f"https://{IMS_HOST}/s/ent_analytics_bulk_ingest_sdk": True,
+                "aud": f"https://{IMS_HOST}/c/{self.client_id}",
+            },
+            self.private_key,
+            algorithm="RS256",
+        )
+
+        # Creating access_token attribute
+        logging.info("Getting access_token.")
+        self.access_token = self.get_access_token()
+
+    @retry(wait=wait_exponential(multiplier=60, min=60, max=1200), stop=stop_after_delay(3600))
+    def get_access_token(self):
+        post_body = {"client_id": self.client_id, "client_secret": self.client_secret, "jwt_token": self.jwt_token}
+        response = requests.post(IMS_EXCHANGE, data=post_body)
+        return response.json()["access_token"]
+
+    def build_request_headers(self, global_company_id):
+        """
+        Build request headers to be used to interract with Adobe Analytics APIs 2.0.
+        """
+        return {
+            "Accept": "application/json",
+            "Authorization": f"Bearer {self.access_token}",
+            "Content-Type": "application/json",
+            "x-api-key": self.client_id,
+            "x-proxy-global-company-id": global_company_id,
+        }
diff --git a/nck/helpers/adobe_helper_2_0.py b/nck/helpers/adobe_helper_2_0.py
@@ -0,0 +1,116 @@
+# GNU Lesser General Public License v3.0 only
+# Copyright (C) 2020 Artefact
+# licence-information@artefact.com
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 3 of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+import logging
+from datetime import datetime
+
+logging.basicConfig(level="INFO")
+logger = logging.getLogger()
+
+
+class APIRateLimitError(Exception):
+    def __init__(self, message):
+        super().__init__(message)
+        logging.error(message)
+
+
+def add_metric_container_to_report_description(
+    rep_desc, dimensions, metrics, breakdown_item_ids
+):
+    """
+    Filling the metricContainer section of a report description:
+    - Creates 1 filter per dimension breakdown x metric
+    - Applies filters to each metric
+    """
+
+    nb_breakdowns = len(breakdown_item_ids)
+    nb_metrics = len(metrics)
+
+    rep_desc["metricContainer"]["metricFilters"] = [
+        {
+            "id": i + j * nb_breakdowns,
+            "type": "breakdown",
+            "dimension": f"variables/{dimensions[i]}",
+            "itemId": breakdown_item_ids[i],
+        }
+        for j in range(nb_metrics)
+        for i in range(nb_breakdowns)
+    ]
+
+    rep_desc["metricContainer"]["metrics"] = [
+        {
+            "id": f"metrics/{metrics[j]}",
+            "filters": [i + j * nb_breakdowns for i in range(nb_breakdowns)],
+        }
+        for j in range(nb_metrics)
+    ]
+
+    return rep_desc
+
+
+def get_node_values_from_response(response):
+    """
+    Extracting dimension values from a report response,
+    and returning them into a dictionnary of nodes: {name_itemId: value}
+    For instance: {'daterangeday_1200201': 'Mar 1, 2020'}
+    """
+
+    name = response["columns"]["dimension"]["id"].split("/")[1]
+    values = [row["value"] for row in response["rows"]]
+    item_ids = [row["itemId"] for row in response["rows"]]
+
+    return {f"{name}_{item_id}": value for (item_id, value) in zip(item_ids, values)}
+
+
+def get_item_ids_from_nodes(list_of_strings):
+    """
+    Extacting item_ids from a list of nodes,
+    each node being expressed as 'name_itemId'
+    """
+
+    return [string.split("_")[1] for string in list_of_strings if string]
+
+
+def format_date(date_string):
+    """
+    Input: "Jan 1, 2020"
+    Output: "2020-01-01"
+    """
+    return datetime.strptime(date_string, "%b %d, %Y").strftime("%Y-%m-%d")
+
+
+def parse_response(response, metrics, parent_dim_parsed):
+    """
+    Parsing a raw JSON response into the following format:
+    {dimension: value, metric: value} (1 dictionnary per row)
+    """
+
+    dimension = response["columns"]["dimension"]["id"].split("variables/")[1]
+
+    for row in response["rows"]:
+        parsed_row_metrics = {m: v for m, v in zip(metrics, row["data"])}
+        parsed_row = {
+            **parent_dim_parsed,
+            dimension: row["value"],
+            **parsed_row_metrics,
+        }
+        parsed_row = {
+            k: (format_date(v) if k == "daterangeday" else v)
+            for k, v in parsed_row.items()
+        }
+        yield parsed_row