diff --git a/nb/presentation.ipynb b/nb/presentation.ipynb deleted file mode 100644 index 764779e..0000000 --- a/nb/presentation.ipynb +++ /dev/null @@ -1,122 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "id": "571e9d1b", - "metadata": {}, - "outputs": [], - "source": [ - "import pystatis" - ] - }, - { - "cell_type": "markdown", - "id": "d580984f", - "metadata": {}, - "source": [ - "# Pystatis presentation\n", - "\n", - "`pystatis` is a small Python library to conveniently wrap the different GENESIS web services (APIs) in a centralized and user-friendly manner.\n", - "\n", - "It allows users to browse the different databases and download the desired tables from all supported databases in a convenient `pandas` `DataFrame` object, suited for further analysis." - ] - }, - { - "cell_type": "markdown", - "id": "62b9f397", - "metadata": {}, - "source": [ - "## Setup\n", - "\n", - "We won't cover the initial only-once setup here because the user has to enter their credentials for the supported databases (GENESIS, Regionalstatistik, Zensus). But there is a dedicated notebook [Setup](./00_Setup.ipynb) with examples and explanations." - ] - }, - { - "cell_type": "markdown", - "id": "cbe657a8", - "metadata": {}, - "source": [ - "## Main Use Cases" - ] - }, - { - "cell_type": "markdown", - "id": "90350387", - "metadata": {}, - "source": [ - "### Find" - ] - }, - { - "cell_type": "markdown", - "id": "354d61d3", - "metadata": {}, - "source": [ - "### Table" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "fe99d7cd", - "metadata": {}, - "outputs": [], - "source": [ - "t = pystatis.Table(name=\"12111-01-01-5-B\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "69b87edb", - "metadata": {}, - "outputs": [ - { - "ename": "PystatisConfigError", - "evalue": "No active database set! Please run `set_db()`.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mPystatisConfigError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[5], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/git/github/CorrelAid/pystatis/src/pystatis/table.py:37\u001b[0m, in \u001b[0;36mTable.get_data\u001b[0;34m(self, area, **kwargs)\u001b[0m\n\u001b[1;32m 33\u001b[0m params \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marea\u001b[39m\u001b[38;5;124m\"\u001b[39m: area, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mformat\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mffcsv\u001b[39m\u001b[38;5;124m\"\u001b[39m}\n\u001b[1;32m 35\u001b[0m params \u001b[38;5;241m|\u001b[39m\u001b[38;5;241m=\u001b[39m kwargs\n\u001b[0;32m---> 37\u001b[0m raw_data \u001b[38;5;241m=\u001b[39m \u001b[43mload_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 38\u001b[0m \u001b[43m \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdata\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtablefile\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mas_json\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 39\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(raw_data, \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;66;03m# nosec assert_used\u001b[39;00m\n\u001b[1;32m 41\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw_data \u001b[38;5;241m=\u001b[39m raw_data\n", - "File \u001b[0;32m~/git/github/CorrelAid/pystatis/src/pystatis/http_helper.py:51\u001b[0m, in \u001b[0;36mload_data\u001b[0;34m(endpoint, method, params, as_json)\u001b[0m\n\u001b[1;32m 49\u001b[0m data \u001b[38;5;241m=\u001b[39m read_from_cache(cache_dir, name, params)\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m---> 51\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mget_data_from_endpoint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mendpoint\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 52\u001b[0m data \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mtext\n\u001b[1;32m 54\u001b[0m \u001b[38;5;66;03m# status code 98 means that the table is too big\u001b[39;00m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;66;03m# we have to start a job and wait for it to be ready\u001b[39;00m\n", - "File \u001b[0;32m~/git/github/CorrelAid/pystatis/src/pystatis/http_helper.py:95\u001b[0m, in \u001b[0;36mget_data_from_endpoint\u001b[0;34m(endpoint, method, params)\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_data_from_endpoint\u001b[39m(\n\u001b[1;32m 81\u001b[0m endpoint: \u001b[38;5;28mstr\u001b[39m, method: \u001b[38;5;28mstr\u001b[39m, params: \u001b[38;5;28mdict\u001b[39m\n\u001b[1;32m 82\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m requests\u001b[38;5;241m.\u001b[39mResponse:\n\u001b[1;32m 83\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;124;03m Wrapper method which constructs an url for querying data from Destatis and\u001b[39;00m\n\u001b[1;32m 85\u001b[0m \u001b[38;5;124;03m sends a GET request.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;124;03m requests.Response: the response object holding the response from calling the Destatis endpoint.\u001b[39;00m\n\u001b[1;32m 94\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 95\u001b[0m db_host, db_user, db_pw \u001b[38;5;241m=\u001b[39m \u001b[43mdb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_db_settings\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 96\u001b[0m url \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdb_host\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mendpoint\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmethod\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;66;03m# params is used to calculate hash for caching so don't alter params dict here!\u001b[39;00m\n", - "File \u001b[0;32m~/git/github/CorrelAid/pystatis/src/pystatis/db.py:61\u001b[0m, in \u001b[0;36mget_db_settings\u001b[0;34m()\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_db_settings\u001b[39m() \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mtuple\u001b[39m[\u001b[38;5;28mstr\u001b[39m, \u001b[38;5;28mstr\u001b[39m, \u001b[38;5;28mstr\u001b[39m]:\n\u001b[1;32m 60\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Get the active database settings (host, user, password).\"\"\"\u001b[39;00m\n\u001b[0;32m---> 61\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mget_db_host\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m, get_db_user(), get_db_pw()\n", - "File \u001b[0;32m~/git/github/CorrelAid/pystatis/src/pystatis/db.py:43\u001b[0m, in \u001b[0;36mget_db_host\u001b[0;34m()\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_db_host\u001b[39m() \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mstr\u001b[39m:\n\u001b[0;32m---> 43\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config\u001b[38;5;241m.\u001b[39mconfig[\u001b[43mget_db\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbase_url\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n", - "File \u001b[0;32m~/git/github/CorrelAid/pystatis/src/pystatis/db.py:35\u001b[0m, in \u001b[0;36mget_db\u001b[0;34m()\u001b[0m\n\u001b[1;32m 32\u001b[0m active_db \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msettings\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mactive_db\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m active_db:\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m PystatisConfigError(\n\u001b[1;32m 36\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo active database set! Please run `set_db()`.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 37\u001b[0m )\n\u001b[1;32m 39\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m active_db\n", - "\u001b[0;31mPystatisConfigError\u001b[0m: No active database set! Please run `set_db()`." - ] - } - ], - "source": [ - "t.get_data()" - ] - } - ], - "metadata": { - "jupytext": { - "formats": "ipynb,py:percent" - }, - "kernelspec": { - "display_name": "pystatis", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}