Skip to content

Commit

Permalink
Merge pull request #67 from ssenart/release/1.2.2
Browse files Browse the repository at this point in the history
Release 1.2.2
  • Loading branch information
ssenart authored May 8, 2024
2 parents 9e14d7f + 227ac40 commit 0c38d63
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 56 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [1.2.2](https://github.com/ssenart/PyGazpar/compare/1.2.1...1.2.2) - 2024-05-08

### Fixed
- [#65](https://github.com/ssenart/PyGazpar/issues/65): [Bug] PermissionError happens when loading data from Excel file.

## [1.2.1](https://github.com/ssenart/PyGazpar/compare/1.2.0...1.2.1) - 2024-05-04

### Fixed
- [#64](https://github.com/ssenart/PyGazpar/issues/64): [Issue] Captcha failed issue.

- [#63](https://github.com/ssenart/PyGazpar/issues/63): [Bug] If the latest received consumption is Sunday, then the last weekly period is duplicated.

## [1.2.0](https://github.com/ssenart/PyGazpar/compare/1.1.6...1.2.0) - 2022-12-16

### Changed
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# PyGazpar

## $\text{\color{red}{!!! This library is broken since CAPTCHA is mandatory on GrDF site !!!}}$
## <span style="color:green">!!! This library is working again. CAPTCHA has been removed !!!</span>

## <span style="color:red">~~!!! This library is broken since CAPTCHA is mandatory on GrDF site !!!~~</span>

PyGazpar is a Python library for getting natural gas consumption from GrDF French provider.

Expand Down
174 changes: 123 additions & 51 deletions pygazpar/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import glob
import os
import json
import time
import pandas as pd
import http.cookiejar
from abc import ABC, abstractmethod
from typing import Any, List, Dict, cast, Optional
from requests import Session
Expand All @@ -11,15 +13,22 @@
from pygazpar.excelparser import ExcelParser
from pygazpar.jsonparser import JsonParser

AUTH_NONCE_URL = "https://monespace.grdf.fr/client/particulier/accueil"
LOGIN_URL = "https://login.monespace.grdf.fr/sofit-account-api/api/v1/auth"
LOGIN_HEADER = {"domain": "grdf.fr"}
LOGIN_PAYLOAD = """{{
"email": "{0}",
SESSION_TOKEN_URL = "https://connexion.grdf.fr/api/v1/authn"
SESSION_TOKEN_PAYLOAD = """{{
"username": "{0}",
"password": "{1}",
"capp": "meg",
"goto": "https://sofa-connexion.grdf.fr:443/openam/oauth2/externeGrdf/authorize?response_type=code&scope=openid%20profile%20email%20infotravaux%20%2Fv1%2Faccreditation%20%2Fv1%2Faccreditations%20%2Fdigiconso%2Fv1%20%2Fdigiconso%2Fv1%2Fconsommations%20new_meg&client_id=prod_espaceclient&state=0&redirect_uri=https%3A%2F%2Fmonespace.grdf.fr%2F_codexch&nonce={2}&by_pass_okta=1&capp=meg"}}"""

"options": {{
"multiOptionalFactorEnroll": "false",
"warnBeforePasswordExpired": "false"
}}
}}"""

AUTH_TOKEN_URL = "https://connexion.grdf.fr/login/sessionCookieRedirect"
AUTH_TOKEN_PARAMS = """{{
"checkAccountSetupComplete": "true",
"token": "{0}",
"redirectUrl": "https://monespace.grdf.fr"
}}"""

Logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -50,56 +59,59 @@ def __init__(self, username: str, password: str):
# ------------------------------------------------------
def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:

session = Session()

session.headers.update(LOGIN_HEADER)

self._login(session, self.__username, self.__password)
auth_token = self._login(self.__username, self.__password)

res = self._loadFromSession(session, pceIdentifier, startDate, endDate, frequencies)
res = self._loadFromSession(auth_token, pceIdentifier, startDate, endDate, frequencies)

Logger.debug("The data update terminates normally")

return res

# ------------------------------------------------------
def _login(self, session: Session, username: str, password: str):
def _login(self, username: str, password: str) -> str:

# Get auth_nonce token.
session.get(AUTH_NONCE_URL)
if "auth_nonce" not in session.cookies:
raise Exception("Login error: Cannot get auth_nonce token")
auth_nonce = session.cookies.get("auth_nonce")
session = Session()
session.headers.update({"domain": "grdf.fr"})
session.headers.update({"Content-Type": "application/json"})
session.headers.update({"X-Requested-With": "XMLHttpRequest"})

# Build the login payload as a json string.
payload = LOGIN_PAYLOAD.format(username, password, auth_nonce)
payload = SESSION_TOKEN_PAYLOAD.format(username, password)

# Build the login payload as a python object.
data = json.loads(payload)
response = session.post(SESSION_TOKEN_URL, data=payload)

# Send the login command.
response = session.post(LOGIN_URL, data=data)
if response.status_code != 200:
raise Exception(f"An error occurred while logging in. Status code: {response.status_code} - {response.text}")

# Check login result.
loginData = response.json()
session_token = response.json().get("sessionToken")

response.raise_for_status()
Logger.debug("Session token: %s", session_token)

if "status" in loginData and "error" in loginData and loginData["status"] >= 400:
raise Exception(f"{loginData['error']} ({loginData['status']})")
jar = http.cookiejar.CookieJar()

if "state" in loginData and loginData["state"] != "SUCCESS":
raise Exception(loginData["error"])
session = Session()
session.headers.update({"Content-Type": "application/json"})
session.headers.update({"X-Requested-With": "XMLHttpRequest"})

params = json.loads(AUTH_TOKEN_PARAMS.format(session_token))

response = session.get(AUTH_TOKEN_URL, params=params, allow_redirects=True, cookies=jar)

if response.status_code != 200:
raise Exception(f"An error occurred while getting the auth token. Status code: {response.status_code} - {response.text}")

auth_token = session.cookies.get("auth_token", domain="monespace.grdf.fr")

return auth_token

@abstractmethod
def _loadFromSession(self, session: Session, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
def _loadFromSession(self, auth_token: str, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
pass


# ------------------------------------------------------------------------------------------------------------
class ExcelWebDataSource(WebDataSource):

DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives/telecharger?dateDebut={0}&dateFin={1}&frequence={3}&pceList%5B%5D={2}"
DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives/telecharger?dateDebut={0}&dateFin={1}&frequence={3}&pceList[]={2}"

DATE_FORMAT = "%Y-%m-%d"

Expand All @@ -121,7 +133,7 @@ def __init__(self, username: str, password: str, tmpDirectory: str):
self.__tmpDirectory = tmpDirectory

# ------------------------------------------------------
def _loadFromSession(self, session: Session, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
def _loadFromSession(self, auth_token: str, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:

res = {}

Expand All @@ -132,7 +144,10 @@ def _loadFromSession(self, session: Session, pceIdentifier: str, startDate: date
file_list = glob.glob(data_file_path_pattern)
for filename in file_list:
if os.path.isfile(filename):
os.remove(filename)
try:
os.remove(filename)
except PermissionError:
pass

if frequencies is None:
# Transform Enum in List.
Expand All @@ -145,11 +160,31 @@ def _loadFromSession(self, session: Session, pceIdentifier: str, startDate: date
# Inject parameters.
downloadUrl = ExcelWebDataSource.DATA_URL.format(startDate.strftime(ExcelWebDataSource.DATE_FORMAT), endDate.strftime(ExcelWebDataSource.DATE_FORMAT), pceIdentifier, ExcelWebDataSource.FREQUENCY_VALUES[frequency])

session.get(downloadUrl) # First request does not return anything : strange...

Logger.debug(f"Loading data of frequency {ExcelWebDataSource.FREQUENCY_VALUES[frequency]} from {startDate.strftime(ExcelWebDataSource.DATE_FORMAT)} to {endDate.strftime(ExcelWebDataSource.DATE_FORMAT)}")

self.__downloadFile(session, downloadUrl, self.__tmpDirectory)
# Retry mechanism.
retry = 10
while retry > 0:

# Create a session.
session = Session()
session.headers.update({"Host": "monespace.grdf.fr"})
session.headers.update({"Domain": "grdf.fr"})
session.headers.update({"X-Requested-With": "XMLHttpRequest"})
session.headers.update({"Accept": "application/json"})
session.cookies.set("auth_token", auth_token, domain="monespace.grdf.fr")

try:
self.__downloadFile(session, downloadUrl, self.__tmpDirectory)
break
except Exception as e:

if retry == 1:
raise e

Logger.error("An error occurred while loading data. Retry in 3 seconds.")
time.sleep(3)
retry -= 1

# Load the XLSX file into the data structure
file_list = glob.glob(data_file_path_pattern)
Expand All @@ -159,7 +194,11 @@ def _loadFromSession(self, session: Session, pceIdentifier: str, startDate: date

for filename in file_list:
res[frequency.value] = ExcelParser.parse(filename, frequency if frequency != Frequency.YEARLY else Frequency.DAILY)
os.remove(filename)
try:
# openpyxl does not close the file properly.
os.remove(filename)
except PermissionError:
pass

# We compute yearly from daily data.
if frequency == Frequency.YEARLY:
Expand All @@ -172,6 +211,12 @@ def __downloadFile(self, session: Session, url: str, path: str):

response = session.get(url)

if "text/html" in response.headers.get("Content-Type"):
raise Exception("An error occurred while loading data. Please check your credentials.")

if response.status_code != 200:
raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")

response.raise_for_status()

filename = response.headers["Content-Disposition"].split("filename=")[1]
Expand Down Expand Up @@ -210,7 +255,7 @@ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies:
# ------------------------------------------------------------------------------------------------------------
class JsonWebDataSource(WebDataSource):

DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives?dateDebut={0}&dateFin={1}&pceList%5B%5D={2}"
DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives?dateDebut={0}&dateFin={1}&pceList[]={2}"

TEMPERATURES_URL = "https://monespace.grdf.fr/api/e-conso/pce/{0}/meteo?dateFinPeriode={1}&nbJours={2}"

Expand All @@ -222,7 +267,7 @@ def __init__(self, username: str, password: str):

super().__init__(username, password)

def _loadFromSession(self, session: Session, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
def _loadFromSession(self, auth_token: str, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:

res = {}

Expand All @@ -237,11 +282,38 @@ def _loadFromSession(self, session: Session, pceIdentifier: str, startDate: date
# Data URL: Inject parameters.
downloadUrl = JsonWebDataSource.DATA_URL.format(startDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), pceIdentifier)

# First request never returns data.
session.get(downloadUrl)
# Retry mechanism.
retry = 10
while retry > 0:

# Create a session.
session = Session()
session.headers.update({"Host": "monespace.grdf.fr"})
session.headers.update({"Domain": "grdf.fr"})
session.headers.update({"X-Requested-With": "XMLHttpRequest"})
session.headers.update({"Accept": "application/json"})
session.cookies.set("auth_token", auth_token, domain="monespace.grdf.fr")

try:
response = session.get(downloadUrl)

if "text/html" in response.headers.get("Content-Type"):
raise Exception("An error occurred while loading data. Please check your credentials.")

if response.status_code != 200:
raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")

break
except Exception as e:

if retry == 1:
raise e

Logger.error("An error occurred while loading data. Retry in 3 seconds.")
time.sleep(3)
retry -= 1

# Get consumption data.
data = session.get(downloadUrl).text
data = response.text

# Temperatures URL: Inject parameters.
endDate = date.today() - timedelta(days=1) if endDate >= date.today() else endDate
Expand Down Expand Up @@ -394,7 +466,7 @@ def computeWeekly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
df = df.sort_values(by=['first_day_of_week'])

# Select rows where we have a full week (7 days) except for the current week.
df = pd.concat([df[(df["count"] == 7)], df.tail(1)])
df = pd.concat([df[(df["count"] >= 7)], df.tail(1)[df["count"] < 7]])

# Select target columns.
df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
Expand Down Expand Up @@ -422,7 +494,7 @@ def computeMonthly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
df = df.sort_values(by=['first_day_of_month'])

# Select rows where we have a full month (more than 27 days) except for the current month.
df = pd.concat([df[(df["count"] >= 28)], df.tail(1)])
df = pd.concat([df[(df["count"] >= 28)], df.tail(1)[df["count"] < 28]])

# Rename columns for their target names.
df = df.rename(columns={"month_year": "time_period"})
Expand Down Expand Up @@ -452,8 +524,8 @@ def computeYearly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
# Sort rows by month ascending.
df = df.sort_values(by=['year'])

# Select rows where we have almost a full year (more than 360) except for the current month.
df = pd.concat([df[(df["count"] >= 360)], df.tail(1)])
# Select rows where we have almost a full year (more than 360) except for the current year.
df = pd.concat([df[(df["count"] >= 360)], df.tail(1)[df["count"] < 360]])

# Rename columns for their target names.
df = df.rename(columns={"year": "time_period"})
Expand Down
4 changes: 2 additions & 2 deletions tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ def test_monthly_jsonweb(self):

data = client.loadSince(self.__pceIdentifier, 365, [Frequency.MONTHLY])

assert (len(data[Frequency.MONTHLY.value]) >= 12 and len(data[Frequency.MONTHLY.value]) <= 13)
assert (len(data[Frequency.MONTHLY.value]) >= 11 and len(data[Frequency.MONTHLY.value]) <= 13)

def test_yearly_jsonweb(self):
client = Client(ExcelWebDataSource(self.__username, self.__password, self.__tmp_directory))
client = Client(JsonWebDataSource(self.__username, self.__password))

data = client.loadSince(self.__pceIdentifier, 365, [Frequency.YEARLY])

Expand Down
4 changes: 2 additions & 2 deletions tests/test_datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def test_jsonfile_sample(self):

assert (len(data[Frequency.WEEKLY.value]) == 155)

assert (len(data[Frequency.MONTHLY.value]) == 37)
assert (len(data[Frequency.MONTHLY.value]) == 36)

assert (len(data[Frequency.YEARLY.value]) == 3)

Expand Down Expand Up @@ -143,7 +143,7 @@ def test_jsonweb(self):

assert (len(data[Frequency.WEEKLY.value]) >= 51 and len(data[Frequency.WEEKLY.value]) <= 54)

assert (len(data[Frequency.MONTHLY.value]) >= 12 and len(data[Frequency.MONTHLY.value]) <= 13)
assert (len(data[Frequency.MONTHLY.value]) >= 11 and len(data[Frequency.MONTHLY.value]) <= 13)

assert (len(data[Frequency.YEARLY.value]) == 1)

Expand Down

0 comments on commit 0c38d63

Please sign in to comment.