Skip to content

Commit

Permalink
Merge branch 'dev' into improve-docu
Browse files Browse the repository at this point in the history
  • Loading branch information
bergnerjonas authored Jun 29, 2024
2 parents b0c8cd6 + 6f5c153 commit 8949062
Show file tree
Hide file tree
Showing 36 changed files with 62,455 additions and 17,304 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ Example for downloading a Table:
from pystatis import Table

t = Table(name="21311-0001") # data is not yet downloaded
t.get_data() # Only now the data is either fetched from GENESIS or loaded from cache. If the data is downloaded from online, it will be also cached, so next time the data is loaded from cache.
t.data # prettified data stored as pandas data frame
t.get_data() # only now the data is either fetched from GENESIS or loaded from cache. If the data is downloaded from online, it will be also cached, so next time the data is loaded from cache. The default language of the data is German but it can be set to either German (de) or English (en) using the language parameter of get_data().
t.data # prettified data stored as pandas DataFrame
```

For more details, please study the provided sample notebook for [tables](https://github.com/CorrelAid/pystatis/blob/main/nb/table.ipynb).
Expand Down
67 changes: 34 additions & 33 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 44 additions & 0 deletions src/pystatis/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,50 @@
"zensus": re.compile(r"^\d{4}[A-Z]-\d{4}$"),
"regio": re.compile(r"^((\d{5}-.{1,2}($|-.*$))|(A.*$)|([0-9A-Z]{10}$)|(\d{5}\w-Z-\d{1,2}))"),
}
LANG_TO_COL_MAPPING = {
"genesis-regio": {
"de": {
"time_label": "Zeit_Label",
"time": "Zeit",
"variable_label": "Merkmal_Label",
"value_label": "Auspraegung_Label",
"value_code": "Auspraegung_Code",
"ags": "Amtlicher Gemeindeschlüssel (AGS)",
},
"en": {
"time_label": "time_label",
"time": "time",
"variable_label": "variable_label",
"value_label": "variable_code.2",
"value_code": "variable_code.1",
"ags": "Official municipality key (AGS)",
},
},
# Curently, response does not change colum names between languages.
# Keep this dictionary for consistency and future proofing.
"zensus": {
"de": {
"time_label": "time_label",
"time": "time",
"variable_label": "variable_label",
"variable_attribute_label": "variable_attribute_label",
"value_variable_label": "value_variable_label",
"value": "value",
"value_unit": "value_unit",
"ars": "Amtlicher Regionalschlüssel (ARS)",
},
"en": {
"time_label": "time_label",
"time": "time",
"variable_label": "variable_label",
"variable_attribute_label": "variable_attribute_label",
"value_variable_label": "value_variable_label",
"value": "value",
"value_unit": "value_unit",
"ars": "Official regional key (ARS)",
},
},
}
ZENSUS_AGS_CODES = [
"GEOBL1",
"GEOBL3",
Expand Down
42 changes: 37 additions & 5 deletions src/pystatis/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,60 @@

from pystatis import config
from pystatis.cache import normalize_name
from pystatis.exception import PystatisConfigError

logger = logging.getLogger(__name__)


def identify_db(name: str) -> list[str]:
"""Identify the required database by matching the item code to the database regex.
def identify_db_matches(table_name: str) -> list[str]:
"""Identify possible databases by matching the item code to the database regex.
Args:
name (str): Query parameter 'name' corresponding to the item code.
Returns:
db_match (list[str]): List of matching databases.
Raises:
ValueError: If no db match was found.
"""
regex_db = config.get_db_identifiers()

# Strip optional leading * and trailing job id
name = normalize_name(name).lstrip("*")
table_name = normalize_name(table_name).lstrip("*")

# Get list of matching dbs
db_match = [db_name for db_name, reg in regex_db.items() if reg.match(name)]
db_matches = [db_name for db_name, reg in regex_db.items() if reg.match(table_name)]

if db_matches:
return db_matches
else:
raise ValueError(f"Could not determine the database for the table '{table_name}'.")


return db_match
def select_db_by_credentials(db_matches: list[str]) -> str:
"""Out of a selection of db candidates, select the first that has existing
credentials.
Args:
db_matches (list[str]): Possible DBs to choose from.
Returns:
db_name (str): Identified database.
Raises:
PystatisConfigError: If no credentials exist for any db candidate.
"""
for db_name in db_matches:
# Return first hit with existing credentials.
if check_credentials(db_name):
return db_name

raise PystatisConfigError(
"Missing credentials!\n"
f"To access this item you need to be a registered user of: {db_matches} \n"
"Please run setup_credentials()."
)


def get_host(db_name: str) -> str:
Expand Down
33 changes: 5 additions & 28 deletions src/pystatis/http_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from pystatis import config, db
from pystatis.cache import cache_data, hit_in_cash, normalize_name, read_from_cache
from pystatis.exception import DestatisStatusError, PystatisConfigError
from pystatis.exception import DestatisStatusError

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -100,33 +100,10 @@ def get_data_from_endpoint(endpoint: str, method: str, params: dict, db_name: st

# Determine database by matching regex to item code
if db_name is None:
name = params.get("name", params.get("selection", ""))

if name is not None:
db_match = db.identify_db(name)

# Check credentials (Note: we might want to do this also for explicitly specified db_names?)
# If more than one db matches it must be a Cube (provided all regexing works as intended).
# --> Choose db based on available credentials.
if db_match:
for name in db_match:
if db.check_credentials(name):
db_name = name
break
else:
raise PystatisConfigError(
"Missing credentials!\n"
f"To access this item you need to be a registered user of: {db_match} \n"
"Please run setup_credentials()."
)

if not db_name:
raise ValueError(
"Could not determine the database for this request. "
"Please specify a database using the `db_name` parameter "
"or make sure that the `params` dictionary has a key 'name' "
"with a proper object number."
)
table_name = params.get("name", params.get("selection", ""))

db_matches = db.identify_db_matches(table_name)
db_name = db.select_db_by_credentials(db_matches)

db_host, db_user, db_pw = db.get_settings(db_name)
url = f"{db_host}{endpoint}/{method}"
Expand Down
Loading

0 comments on commit 8949062

Please sign in to comment.