diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..c4b05dd --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,104 @@ +# This workflow will create a Python package and upload it to testPyPi or PyPi +# Then, it installs pandahub from there and all dependencies and runs tests with different Python versions + +name: release + +# Controls when the action will run. +on: + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + upload_server: + description: 'upload server' + required: true + default: 'testpypi' + type: choice + options: + - 'testpypi' + - 'pypi' + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + upload: + # The type of runner that the job will run on + runs-on: ubuntu-latest + + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v3 + + # Sets up python3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + # Installs and upgrades pip, installs other dependencies and installs the package from setup.py + - name: Install dependencies + run: | + # Upgrade pip + python3 -m pip install --upgrade pip + # Install twine + python3 -m pip install setuptools wheel twine + + # Upload to TestPyPI + - name: Build and Upload to TestPyPI + if: inputs.upload_server == 'testpypi' + run: | + python3 setup.py sdist --formats=zip + twine check dist/* --strict + python3 -m twine upload dist/* + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.TESTPYPI }} + TWINE_REPOSITORY: testpypi + + # Upload to PyPI + - name: Build and Upload to PyPI + if: inputs.upload_server == 'pypi' + run: | + python3 setup.py sdist --formats=zip + twine check dist/* --strict + python3 -m twine upload dist/* + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI }} + TWINE_REPOSITORY: pypi + # Make sure package is available on pypi + - name: Sleep for 300s to make release available + uses: juliangruber/sleep-action@v1 + with: + time: 300s + + + build: + + runs-on: ${{ matrix.os }} + needs: upload + strategy: + matrix: + python-version: ['3.8', '3.9', '3.10', '3.11'] + os: [ ubuntu-latest, windows-latest ] + steps: + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install pytest python-igraph pytest-split + if ${{ matrix.python-version != '3.11' }}; then python -m pip install numba; fi + - name: Install pandahub from TestPyPI + if: ${{ inputs.upload_server == 'testpypi'}} + run: | + pip install --no-cache-dir -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple pandahub + - name: Install pandahub from PyPI + if: ${{ inputs.upload_server == 'pypi'}} + run: | + pip install pandahub + - name: List all installed packages + run: | + pip list + - name: Test with pytest + run: | + pytest --pyargs pandahub.test diff --git a/.gitignore b/.gitignore index dd35ff0..3f427dd 100644 --- a/.gitignore +++ b/.gitignore @@ -107,4 +107,7 @@ ENV/ # Pycharm .idea +# Credentials +settings.py + !pandahub/lib diff --git a/CHANGELOG.md b/CHANGELOG.md index 2163955..3846ca9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Change Log +## [0.2.4] + - BREAKING drops index argument from create_variant() function + + ## [0.2.3]- 2022-08-04 - ADDED version property in project data diff --git a/CHANGELOG.rst b/CHANGELOG.rst new file mode 100644 index 0000000..3f49273 --- /dev/null +++ b/CHANGELOG.rst @@ -0,0 +1,23 @@ +Changelog +============= + +[XX.XX.XX] - 2023-XX-XX +------------------------------- +- [ADDED] +- [IMPROVED] +- [CHANGED] +- [REMOVED] + +[0.23.0] - 2022-08-04 +------------------------------- +- [ADDED] version property in project data +- [ADDED] method to migrate projects to latest version +- [ADDED] option to disable registration +- [ADDED] option to use a separate mongodb instance as global database +- [ADDED] geo mode to handle geojson columns +- [ADDED] tutorials +- [IMPROVED] collections for element tables now start with :code:`net_` +- [IMPROVED] project IDs now can be any name +- [IMPROVED] compatibility with python < 3.9 +- [IMPROVED] project settings API +- [IMPROVED] timeseries handling diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst new file mode 100644 index 0000000..4dd009e --- /dev/null +++ b/CONTRIBUTING.rst @@ -0,0 +1,158 @@ +Get in Touch! +=============== + +You have found a bug in pandahub or have a suggestion for a new functionality? Then get in touch with us by opening up an issue on the pandahub issue board to discuss possible new developments with the community and the maintainers. + + +Setup your git repository +============================== + +**Note**: *The following setup is just a suggestion of how to setup your repository and is suphosed to make contributing easier, especially for newcomers. If you have a different setup that you are more comfortable with, you do not have to adopt this setup.* + +If you want to contribute for the first time, you can set up your environment like this: + +#. If you have not done it yet: install git and create a github account +#. Create a fork of the official pandahub repository by clicking on "Fork" in the official pandahub repository (see https://help.github.com/articles/fork-a-repo/) +#. Clone the forked repository to your local machine: :: + + git clone https://github.com/YOUR-USERNAME/pandahub.git + +#. Copy the following configuration at the bottom of to the pandahub/.git/config file (the .git folder is hidden, so you might have to enable showing hidden folders) and insert your github username: :: + + [remote "origin"] + url = https://github.com/e2nIEE/pandahub.git + fetch = +refs/heads/*:refs/remotes/ph/* + pushurl = https://github.com/YOUR-USERNAME/pandahub.git + [remote "ph"] + url = https://github.com/e2nIEE/pandahub.git + fetch = +refs/heads/*:refs/remotes/ph/* + [remote "ph_fork"] + url = https://github.com/YOUR-USERNAME/pandahub.git + fetch = +refs/heads/*:refs/remotes/ph_fork/* + [branch "develop"] + remote = origin + merge = refs/heads/develop + +The develop branch is now configured to automatically track the official pandahub develop branch. So if you are on the develop branch and use: :: + + git pull + +your local repository will be updated with the newest changes in the official pandahub repository. + +Since you cannot push directly to the official pandahub repository, if you are on develop and do: :: + + git push + +your push is by default routed to your own fork instead of the official pandahub repository with the setting as defined above. + +If this is to implicit for you, you can always explicitely use the remotes "ph" and "ph_fork" to push and pull from the different repositories: :: + + git pull ph develop + git push ph_fork develop + +Contribute +===================================== + +All contributions to the pandahub repository are made through pull requests to the develop branch. You can either submit a pull request from the develop branch of your fork or create a special feature branch that you keep the changes on. A feature branch is the way to go if you have multiple issues that you are working on in parallel and want to submit with seperate pull requests. If you only have small, one-time changes to submit, you can also use the develop branch to submit your pull request. + +**Note**: *The following guide assumes the remotes are set up as described above. If you have a different setup, you will have to adapt the commands accordingly.* + +Contribute from your develop branch +------------------------------------ + +#. Check out the develop branch on your local machine: :: + + git checkout develop + +#. Update your local copy to the most recent version of the pandpower develop branch: :: + + git pull + +#. Make changes in the code + +#. Add and commit your changes: :: + + git add --all + git commit -m"commit message" + + If there is an open issue that the commit belongs to, reference the issue in the commit message, for example for issue 3: :: + + git commit -m"commit message #3" + +#. Push your changes to your fork: :: + + git push + +#. Put in a Pull request to the main repository: https://help.github.com/articles/creating-a-pull-request-from-a-fork/ + +#. If you want to amend the pull request (for example because tests are failing in Travis, or because the community/maintainers have asked for modifications), simply push more commits to the branch: :: + + git add --all + git commit -m"I have updated the pull request after discussions #3" + git push + + The pull request will be automatically updated. + +Contribute from a feature branch +------------------------------------ + +#. Check out the develop branch on your local machine: :: + + git checkout develop + +#. Update your local copy to the most recent version of the pandahub develop branch: :: + + git pull + +#. Create a new feature branch: :: + + git checkout -b my_branch + +#. Make changes in the code + +#. Add and commit your change: :: + + git add --all + git commit -m"commit message" + + If there is an open issue that the commit belongs to, reference the issue in the commit message, for example for issue 3: :: + + git commit -m"commit message #3" + +#. Push your changes to your fork: :: + + git push -u ph_fork my_branch + + this pushes the new branch to your fork and also sets up the remote tracking. + +#. Put in a Pull request to the official repository (see https://help.github.com/articles/creating-a-pull-request-from-a-fork/) + +#. If you want to amend the pull request (for example because tests are failing in Travis, or because the community/maintainers have asked for modifications), simply push more commits to the branch. Since the remote tracking branch has been set up, this is as easy as: :: + + git add --all + git commit -m"I have updated the pull request after discussions #3" + git push + +#. If the pull request was merged and you don't expect further development on this feature, you can delete the feature branch to keep your repository clean. + +Test Suite +================ + +pandahub uses pytest for automatic software testing. + +Making sure you don't break anything +--------------------------------------- + +If you make changes to pandahub that you plan to submit, first make sure that all tests are still passing. You can do this locally with: :: + + import pandahub.test + pandahub.test.run_all_tests() + +When you submit a pull request, Travis CI will run the same tests with Python versions 2.7, 3.4, 3.5 and 3.6. In most cases, if tests pass for you locally, they will also pass on Travis. But it can also haphen that the tests pass for you locally, but still fail on Travis, because the new code is not compatible with all Python versions. +In this case you will have to update your pull request until the tests pass in all Python versions. Pull requests that lead to failing tests will not be accepted. + + +Adding Tests for new functionality +----------------------------------- + +If you have added new functionality, you should also add a new function that tests this functionality. pytest automatically detects all functions in the pandahub/test folder that start with 'test' and are located in a file that also starts with 'test' as relevant test cases. diff --git a/VARIANTS.md b/VARIANTS.md new file mode 100644 index 0000000..008d67f --- /dev/null +++ b/VARIANTS.md @@ -0,0 +1,40 @@ +# Variants + +A variant is a version of a power network with changes recorded relative to the base network (element addition and +deletion or paramater value changes). Changes to the base network automatically propagate to variants (see caveats below). + +All elements have a field "var_type" with possible values "base", "change" and "addition": + +* *base*: base-version of the element +* *change*: variant-version of an element with changes compared to base +* *addition*: element which only exists in a variant + +Elements with var_type "base" have an additional array field "not_in_var" holding indices of +variants in which the element has been removed or changed. + +Elements with var_types "change" and "addition" have an additional field "variant" referencing the variant index for the change/addition. + +`PandaHub.get_variant_filter(variants)` can be used to generate a mongodb query filter mixin which restricts query results to a given variant or variants. + +Example variant filter for single variant with index 1: + + {"$or": [{"var_type": "base", "not_in_var": {"$ne": 1}}, + {"var_type": {"$in": ["change", "addition"]}, "variant": 1}]} + +Filter for base variant: + + {"$or": [{"var_type": {"$exists": False}}, + {"var_type": "base"}]} + +## Caveats + +### Conflicts + +* an element that was changed for a variant is changed in the base variant: + * field(s) changed in variant are changed to different values + * field(s) previously identical with variant are changed +* an element in the base variant is changed breaking the variant: + * net can not be loaded completely - e.g. bus of line added in variant is deleted + +### Restrictions +* bulk operations are difficult to implement for variants diff --git a/pandahub/__init__.py b/pandahub/__init__.py index 26652c1..26bcf66 100644 --- a/pandahub/__init__.py +++ b/pandahub/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.2.3" +__version__ = "0.2.4" from pandahub.lib.PandaHub import PandaHub, PandaHubError from pandahub.client.PandaHubClient import PandaHubClient diff --git a/pandahub/api/internal/db.py b/pandahub/api/internal/db.py index 563345a..236f40d 100644 --- a/pandahub/api/internal/db.py +++ b/pandahub/api/internal/db.py @@ -7,9 +7,11 @@ from pandahub.api.internal import settings from pandahub.api.internal.models import AccessToken, UserDB -client = motor.motor_asyncio.AsyncIOMotorClient( - settings.MONGODB_URL, uuidRepresentation="standard" -) +mongo_client_args = {"host": settings.MONGODB_URL, "uuidRepresentation": "standard", "connect": False} +if settings.MONGODB_USER: + mongo_client_args |= {"username": settings.MONGODB_USER, "password": settings.MONGODB_PASSWORD} + +client = motor.motor_asyncio.AsyncIOMotorClient(**mongo_client_args) client.get_io_loop = asyncio.get_event_loop @@ -19,8 +21,29 @@ async def get_user_db(): - yield MongoDBUserDatabase(UserDB, collection) + if settings.COSMOSDB_COMPAT: + yield MongoDBUserDatabaseCosmos(UserDB, collection) + else: + yield MongoDBUserDatabase(UserDB, collection) async def get_access_token_db(): yield MongoDBAccessTokenDatabase(AccessToken, access_tokens_collection) + +class MongoDBUserDatabaseCosmos(MongoDBUserDatabase): + from typing import Optional + from fastapi_users.models import UD + async def get_by_email(self, email: str) -> Optional[UD]: + await self._initialize() + + user = await self.collection.find_one( + {"email": email} + ) + return self.user_db_model(**user) if user else None + + async def _initialize(self): + if not self.initialized: + if "email_1" not in await self.collection.index_information(): + await self.collection.create_index("id", unique=True) + await self.collection.create_index("email", unique=True) + self.initialized = True diff --git a/pandahub/api/internal/models.py b/pandahub/api/internal/models.py index d0c8a97..7f9dfec 100644 --- a/pandahub/api/internal/models.py +++ b/pandahub/api/internal/models.py @@ -1,9 +1,11 @@ from fastapi_users import models from fastapi_users.authentication.strategy.db import BaseAccessToken +from pandahub.api.internal.settings import REGISTRATION_ADMIN_APPROVAL + class User(models.BaseUser): - pass + is_active: bool = not REGISTRATION_ADMIN_APPROVAL class UserCreate(models.BaseUserCreate): diff --git a/pandahub/api/internal/settings.py b/pandahub/api/internal/settings.py index 045176b..07ed322 100644 --- a/pandahub/api/internal/settings.py +++ b/pandahub/api/internal/settings.py @@ -11,11 +11,25 @@ def settings_bool(var_name, default=None): return False return False +def get_secret(key, default=None): + secret = os.getenv(key, default) + if secret and os.path.isfile(secret): + with open(secret) as f: + secret = f.read() + return secret + # load variables from .env to environment variables load_dotenv() -MONGODB_URL = os.getenv("MONGODB_URL") or "mongodb://localhost:27017" -MONGODB_URL_GLOBAL_DATABASE = os.getenv("MONGODB_URL_GLOBAL_DATABASE") or None +MONGODB_URL = get_secret("MONGODB_URL") or "mongodb://localhost:27017" +MONGODB_USER = get_secret("MONGODB_USER") or None +MONGODB_PASSWORD = get_secret("MONGODB_PASSWORD") or None + +MONGODB_GLOBAL_DATABASE_URL = get_secret("MONGODB_GLOBAL_DATABASE_URL") or None +MONGODB_GLOBAL_DATABASE_USER = get_secret("MONGODB_GLOBAL_DATABASE_USER") or None +MONGODB_GLOBAL_DATABASE_PASSWORD = get_secret("MONGODB_GLOBAL_DATABASE_PASSWORD") or None +if not MONGODB_GLOBAL_DATABASE_URL: + MONGODB_GLOBAL_DATABASE_URL = os.getenv("MONGODB_URL_GLOBAL_DATABASE") or None EMAIL_VERIFICATION_REQUIRED = settings_bool("EMAIL_VERIFICATION_REQUIRED") @@ -28,8 +42,10 @@ def settings_bool(var_name, default=None): PASSWORD_RESET_URL = os.getenv("PASSWORD_RESET_URL") or "" EMAIL_VERIFY_URL = os.getenv("EMAIL_VERIFY_URL") or "" -SECRET = os.getenv("SECRET") +SECRET = get_secret("SECRET") or None REGISTRATION_ENABLED = settings_bool("REGISTRATION_ENABLED", default=True) +REGISTRATION_ADMIN_APPROVAL = settings_bool("REGISTRATION_ADMIN_APPROVAL", default=False) DATATYPES_MODULE = os.getenv("DATATYPES_MODULE") or "pandahub.lib.datatypes" +COSMOSDB_COMPAT = settings_bool("COSMOSDB_COMPAT", default=False) diff --git a/pandahub/api/main.py b/pandahub/api/main.py index 41112db..8fd49b8 100644 --- a/pandahub/api/main.py +++ b/pandahub/api/main.py @@ -4,7 +4,7 @@ from fastapi.responses import JSONResponse from pandahub.lib.PandaHub import PandaHubError -from pandahub.api.routers import net, projects, timeseries, users, auth +from pandahub.api.routers import net, projects, timeseries, users, auth, variants app = FastAPI() @@ -25,6 +25,7 @@ app.include_router(timeseries.router) app.include_router(users.router) app.include_router(auth.router) +app.include_router(variants.router) @app.exception_handler(PandaHubError) diff --git a/pandahub/api/routers/net.py b/pandahub/api/routers/net.py index af49e8c..3bf8d39 100644 --- a/pandahub/api/routers/net.py +++ b/pandahub/api/routers/net.py @@ -38,7 +38,6 @@ class WriteNetwork(BaseModel): @router.post("/write_network_to_db") def write_network_to_db(data: WriteNetwork, ph=Depends(pandahub)): - print("WRITING NET", data) params = data.dict() params["net"] = pp.from_json_string(params["net"]) ph.write_network_to_db(**params) diff --git a/pandahub/api/routers/projects.py b/pandahub/api/routers/projects.py index 53b2926..aadc16d 100644 --- a/pandahub/api/routers/projects.py +++ b/pandahub/api/routers/projects.py @@ -52,7 +52,6 @@ class SetActiveProjectModel(BaseModel): @router.post("/set_active_project") def set_active_project(data: SetActiveProjectModel, ph=Depends(pandahub)): ph.set_active_project(**data.dict()) - print("ACTIVATED PROJECT", ph.active_project) return str(ph.active_project["_id"]) @@ -74,7 +73,6 @@ class SetProjectSettingsModel(BaseModel): @router.post("/set_project_settings") def set_project_settings(data: SetProjectSettingsModel, ph=Depends(pandahub)): - print("SET PROJECT SETTINGS", data.dict()) ph.set_project_settings(**data.dict()) class SetProjectSettingsValueModel(BaseModel): diff --git a/pandahub/api/routers/timeseries.py b/pandahub/api/routers/timeseries.py index 31d596f..ab226bc 100644 --- a/pandahub/api/routers/timeseries.py +++ b/pandahub/api/routers/timeseries.py @@ -29,7 +29,6 @@ class GetTimeSeriesModel(BaseModel): def get_timeseries_from_db(data: GetTimeSeriesModel, ph=Depends(pandahub)): if data.timestamp_range is not None: data.timestamp_range = [pd.Timestamp(t) for t in data.timestamp_range] - print("GETTING TIMESERIES FROM DB", data) ts = ph.get_timeseries_from_db(**data.dict()) return ts.to_json(date_format="iso") @@ -46,14 +45,28 @@ class MultiGetTimeSeriesModel(BaseModel): def multi_get_timeseries_from_db(data: MultiGetTimeSeriesModel, ph=Depends(pandahub)): if data.timestamp_range is not None: data.timestamp_range = [pd.Timestamp(t) for t in data.timestamp_range] - print("GETTING TS", data) - ts = ph.multi_get_timeseries_from_db(**data.dict()) + ts = ph.multi_get_timeseries_from_db(**data.dict(), include_metadata=True) for i, data in enumerate(ts): - print("DATA", i, data) ts[i]["timeseries_data"] = data["timeseries_data"].to_json(date_format="iso") return ts +class GetTimeseriesMetadataModel(BaseModel): + project_id: str + filter_document: Optional[dict] = {} + global_database: Optional[bool] = False + +@router.post("/get_timeseries_metadata") +def get_timeseries_metadata(data: GetTimeseriesMetadataModel, ph=Depends(pandahub)): + ph.set_active_project_by_id(data.project_id) + ts = ph.get_timeseries_metadata( + filter_document=data.filter_document, + global_database=data.global_database + ) + ts = json.loads(ts.to_json(orient="index")) + return ts + + class WriteTimeSeriesModel(BaseModel): timeseries: str project_id: Optional[str] = None @@ -69,7 +82,6 @@ class WriteTimeSeriesModel(BaseModel): @router.post("/write_timeseries_to_db") def write_timeseries_to_db(data: WriteTimeSeriesModel, ph=Depends(pandahub)): data.timeseries = pd.Series(json.loads(data.timeseries)) - print("WRITING TS", data.timeseries) data.timeseries.index = pd.to_datetime(data.timeseries.index) ph.write_timeseries_to_db(**data.dict()) return True diff --git a/pandahub/api/routers/variants.py b/pandahub/api/routers/variants.py new file mode 100644 index 0000000..91321f4 --- /dev/null +++ b/pandahub/api/routers/variants.py @@ -0,0 +1,68 @@ +import json + +import pandas as pd +from fastapi import APIRouter, Depends +from pydantic import BaseModel +from pydantic.typing import Optional + +from pandahub.api.dependencies import pandahub + +router = APIRouter( + prefix="/variants", + tags=["variants"] +) + + +# ------------------------------- +# ROUTES +# ------------------------------- + +class GetVariantsModel(BaseModel): + project_id: str + net_id: int + +@router.post("/get_variants") +def get_variants(data: GetVariantsModel, ph=Depends(pandahub)): + project_id = data.project_id + ph.set_active_project_by_id(project_id) + db = ph._get_project_database() + + variants = db["variant"].find({"net_id": data.net_id}, projection={"_id": 0}) + response = {} + for var in variants: + response[var.pop("index")] = var + return response + + +class CreateVariantModel(BaseModel): + project_id: str + variant_data: dict + +@router.post("/create_variant") +def create_variant(data: CreateVariantModel, ph=Depends(pandahub)): + project_id = data.project_id + ph.set_active_project_by_id(project_id) + return ph.create_variant(data.variant_data) + +class DeleteVariantModel(BaseModel): + project_id: str + net_id: int + index: int + +@router.post("/delete_variant") +def delete_variant(data: DeleteVariantModel, ph=Depends(pandahub)): + project_id = data.project_id + ph.set_active_project_by_id(project_id) + return ph.delete_variant(data.net_id, data.index) + +class UpdateVariantModel(BaseModel): + project_id: str + net_id: int + index: int + data: dict + +@router.post("/update_variant") +def update_variant(data: UpdateVariantModel, ph=Depends(pandahub)): + project_id = data.project_id + ph.set_active_project_by_id(project_id) + return ph.update_variant(data.net_id, data.index, data.data) diff --git a/pandahub/client/user_management.py b/pandahub/client/user_management.py index 2c10459..132cc6c 100644 --- a/pandahub/client/user_management.py +++ b/pandahub/client/user_management.py @@ -27,8 +27,6 @@ def login(): _login(url, email, password, cert_path, key_path) def _login(url, email, password, cert_path=None, key_path=None): - print("Logging into pandbase at {}".format(url)) - print("pandahub API URL {}".format(url)) path = url + "/auth/login" cert = None @@ -39,9 +37,7 @@ def _login(url, email, password, cert_path=None, key_path=None): if r.status_code == 200: token = r.json()["access_token"] - print("Writing access token to config file...") write_config(url, token, cert_path, key_path) - print("Login succesfull.") elif r.status_code == 400: if "required SSL certificate" in r.text: print("Login failed - no client certificate provided") diff --git a/pandahub/lib/PandaHub.py b/pandahub/lib/PandaHub.py index 6d147d3..c51b994 100644 --- a/pandahub/lib/PandaHub.py +++ b/pandahub/lib/PandaHub.py @@ -1,26 +1,35 @@ # -*- coding: utf-8 -*- +import builtins +import importlib +import json +import logging +import traceback +from inspect import signature, _empty +from typing import Optional + import numpy as np import pandas as pd -import pandapower as pp -import pandapipes as pps -from pandahub.lib.database_toolbox import create_timeseries_document, convert_timeseries_to_subdocuments, convert_dataframes_to_dicts, decompress_timeseries_data, convert_geojsons -from pandahub.lib.datatypes import datatypes -from pandahub.lib.database_toolbox import create_timeseries_document, convert_timeseries_to_subdocuments, convert_dataframes_to_dicts, json_to_object -from pandahub.api.internal import settings -from pymongo import MongoClient, ReplaceOne, DESCENDING, GEOSPHERE -from pandapower.io_utils import JSONSerializableClass -from pandapower.std_types import load_std_type +from bson.errors import InvalidId from bson.objectid import ObjectId from pydantic.types import UUID4 -from typing import Optional -from inspect import signature, _empty -import traceback -import logging -import importlib -import builtins +from pymongo import MongoClient, ReplaceOne, DESCENDING + +import pandapipes as pps +from pandapipes import from_json_string as from_json_pps +import pandapower as pp +import pandapower.io_utils as io_pp +from pandahub.api.internal import settings +from pandahub.lib.database_toolbox import create_timeseries_document, convert_timeseries_to_subdocuments, \ + convert_element_to_dict, json_to_object, serialize_object_data, get_dtypes +from pandahub.lib.database_toolbox import decompress_timeseries_data, convert_geojsons + logger = logging.getLogger(__name__) from pandahub import __version__ +from packaging import version + + + # ------------------------- # Exceptions # ------------------------- @@ -48,19 +57,26 @@ class PandaHub: # Initialization # ------------------------- - def __init__(self, connection_url=None, check_server_available=False, user_id=None): - if connection_url is None: - connection_url = settings.MONGODB_URL - if not connection_url.startswith('mongodb://'): - raise PandaHubError("Connection URL needs to point to a mongodb instance: 'mongodb://..'") - self.mongo_client = MongoClient(host=connection_url, uuidRepresentation="standard") + def __init__(self, connection_url=settings.MONGODB_URL, connection_user = settings.MONGODB_USER, + connection_password=settings.MONGODB_PASSWORD, check_server_available=False, user_id=None): + + mongo_client_args = {"host": connection_url, "uuidRepresentation": "standard", "connect":False} + if connection_user: + mongo_client_args |= {"username": connection_user, "password": connection_password} + self.mongo_client = MongoClient(**mongo_client_args) self.mongo_client_global_db = None self.active_project = None self.user_id = user_id + self.base_variant_filter = { + "$or": [ + {"var_type": {"$exists": False}}, + {"var_type": "base"}, + {"var_type": np.nan}, + ] + } if check_server_available: self.server_is_available() - # ------------------------- # Database connection checks # ------------------------- @@ -88,7 +104,6 @@ def check_connection_status(self): except (ServerSelectionTimeoutError, timeout) as e: return "connection timeout" - # ------------------------- # Permission check # ------------------------- @@ -124,7 +139,6 @@ def get_permissions_by_role(self, role): permissions.append(perm) return permissions - # ------------------------- # User handling # ------------------------- @@ -145,7 +159,6 @@ def _get_user(self): ) return user - # ------------------------- # Project handling # ------------------------- @@ -155,7 +168,7 @@ def create_project(self, name, settings=None, realm=None, metadata=None, project if self.project_exists(name, realm): raise PandaHubError("Project already exists") if settings is None: - settings = {} + settings = {} if metadata is None: metadata = {} project_data = {"name": name, @@ -166,7 +179,7 @@ def create_project(self, name, settings=None, realm=None, metadata=None, project if project_id: project_data["_id"] = project_id if self.user_id is not None: - project_data["users"] = {self.user_id: "owner"} + project_data["users"] = {self.user_id: "owner"} self.mongo_client["user_management"]["projects"].insert_one(project_data) if activate: self.set_active_project(name, realm) @@ -178,7 +191,8 @@ def delete_project(self, i_know_this_action_is_final=False, project_id=None): project_id = self.active_project["_id"] self.check_permission("write") if not i_know_this_action_is_final: - raise PandaHubError("Calling this function will delete the whole project and all the nets stored within. It can not be reversed. Add 'i_know_this_action_is_final=True' to confirm.") + raise PandaHubError( + "Calling this function will delete the whole project and all the nets stored within. It can not be reversed. Add 'i_know_this_action_is_final=True' to confirm.") self.mongo_client.drop_database(str(project_id)) self.mongo_client.user_management.projects.delete_one({"_id": project_id}) self.active_project = None @@ -191,7 +205,7 @@ def get_projects(self): else: filter_dict = {"users.{}".format(self.user_id): {"$exists": True}} else: - filter_dict = {"users": {"$exists": False}} + filter_dict = {"users": {"$exists": False}} db = self.mongo_client["user_management"] projects = list(db["projects"].find(filter_dict)) return [{ @@ -217,9 +231,10 @@ def set_active_project(self, project_name, realm=None): def set_active_project_by_id(self, project_id): try: - self.active_project = self._get_project_document({"_id": ObjectId(project_id)}) - except: - self.active_project = self._get_project_document({"_id": project_id}) + project_id = ObjectId(project_id) + except InvalidId: + pass + self.active_project = self._get_project_document({"_id": project_id}) def rename_project(self, project_name): self.has_permission("write") @@ -244,7 +259,7 @@ def change_realm(self, realm): def lock_project(self): db = self.mongo_client["user_management"]["projects"] result = db.update_one( - {"_id": self.active_project["_id"],}, + {"_id": self.active_project["_id"], }, {"$set": {"locked": True, "locked_by": self.user_id}} ) return result.acknowledged and result.modified_count > 0 @@ -267,7 +282,6 @@ def force_unlock_project(self, project_id): else: raise PandaHubError("You don't have rights to access this project", 403) - def project_exists(self, project_name=None, realm=None): project_collection = self.mongo_client["user_management"].projects project = project_collection.find_one({"name": project_name, "realm": realm}) @@ -276,13 +290,14 @@ def project_exists(self, project_name=None, realm=None): def _get_project_document(self, filter_dict: dict) -> Optional[dict]: project_collection = self.mongo_client["user_management"].projects projects = list(project_collection.find(filter_dict)) - if len(projects) == 0: #project doesn't exist + if len(projects) == 0: # project doesn't exist return None if len(projects) > 1: - raise PandaHubError("Duplicate Project detected. This should never happen if you create projects through the API. Remove duplicate projects manually in the database.") + raise PandaHubError( + "Duplicate Project detected. This should never happen if you create projects through the API. Remove duplicate projects manually in the database.") project_doc = projects[0] if "users" not in project_doc: - return project_doc #project is not user protected + return project_doc # project is not user protected user = self._get_user() if not user["is_superuser"] and self.user_id not in project_doc["users"].keys(): @@ -296,42 +311,61 @@ def _get_project_database(self): return self.mongo_client[str(self.active_project["_id"])] def _get_global_database(self): - if self.mongo_client_global_db is None and not settings.MONGODB_URL_GLOBAL_DATABASE is None: - self.mongo_client_global_db = MongoClient( - host=settings.MONGODB_URL_GLOBAL_DATABASE, uuidRepresentation="standard" - ) + if self.mongo_client_global_db is None and settings.MONGODB_GLOBAL_DATABASE_URL is not None: + mongo_client_args = {"host": settings.MONGODB_GLOBAL_DATABASE_URL, "uuidRepresentation": "standard"} + if settings.MONGODB_GLOBAL_DATABASE_USER: + mongo_client_args |= {"username": settings.MONGODB_GLOBAL_DATABASE_USER, + "password": settings.MONGODB_GLOBAL_DATABASE_PASSWORD} + self.mongo_client_global_db = MongoClient(**mongo_client_args) if self.mongo_client_global_db is None: return self.mongo_client["global_data"] else: return self.mongo_client_global_db["global_data"] - def get_project_version(self): return self.active_project.get("version", "0.2.2") def upgrade_project_to_latest_version(self): - from packaging import version - # TODO check that user has right to write user_management # TODO these operations should be encapsulated in a transaction in order to avoid # inconsistent Database states in case of occuring errors - if version.parse(self.get_project_version()) <= version.parse("0.2.2"): + if version.parse(self.get_project_version()) < version.parse("0.2.4"): db = self._get_project_database() all_collection_names = db.list_collection_names() old_net_collections = [name for name in all_collection_names if not name.startswith("_") and - not name=="timeseries" and not name.startswith("net_")] + not name == "timeseries" and not name.startswith("net_")] for element in old_net_collections: db[element].rename(self._collection_name_of_element(element)) - project_collection = self.mongo_client["user_management"].projects - project_collection.find_one_and_update({"_id": self.active_project["_id"]}, - {"$set": {"version": __version__}}) - logger.info(f"upgraded projekt '{self.active_project['name']}' from version" - f" {self.get_project_version()} to version {__version__}") - self.active_project["version"] = __version__ + if version.parse(self.get_project_version()) < version.parse("0.2.4"): + db = self._get_project_database() + # for all networks + for d in list(db["_networks"].find({}, projection={"sector":1, "data":1})): + # load old format + if d.get("sector", "power") == "power": + data = dict((k, json.loads(v, cls=io_pp.PPJSONDecoder)) for k, v in d['data'].items()) + else: + data = dict((k, from_json_pps(v)) for k, v in d['data'].items()) + # save new format + for key, dat in data.items(): + try: + json.dumps(dat) + except: + dat = f"serialized_{json.dumps(data, cls=io_pp.PPJSONEncoder)}" + data[key] = dat + + db["_networks"].find_one_and_update({"_id":d["_id"]}, + {"$set": {"data": data}}) + + project_collection = self.mongo_client["user_management"].projects + project_collection.find_one_and_update({"_id": self.active_project["_id"]}, + {"$set": {"version": __version__}}) + logger.info(f"upgraded projekt '{self.active_project['name']}' from version" + f" {self.get_project_version()} to version {__version__}") + self.active_project["version"] = __version__ # ------------------------- # Project settings and metadata @@ -381,6 +415,7 @@ def restore_empty(data): t = getattr(builtins, val.replace("_empty_", "")) val = t() data[key] = val + restore_empty(metadata) return metadata @@ -407,19 +442,19 @@ def replace_empty(updated, data): updated[key] = sub_upd else: updated[key] = val + update_metadata = dict() replace_empty(update_metadata, new_metadata) self.mongo_client.user_management.projects.update_one( {"_id": project_data['_id']}, [ - {"$unset": "metadata"}, # deletion needed because set won't delete not existing fields + {"$unset": "metadata"}, # deletion needed because set won't delete not existing fields {"$set": {"metadata": update_metadata}} ] ) self.active_project["metadata"] = update_metadata - # ------------------------- # Project user management # ------------------------- @@ -428,7 +463,7 @@ def get_project_users(self): self.check_permission("user_management") project_users = self.active_project["users"] users = self.mongo_client["user_management"]["users"].find( - {"id": { "$in": [UUID4(user_id) for user_id in project_users.keys()] }} + {"id": {"$in": [UUID4(user_id) for user_id in project_users.keys()]}} ) enriched_users = [] for user in users: @@ -475,13 +510,12 @@ def remove_user_from_project(self, email): {"$unset": {f"users.{user_id}": ""}} ) - # ------------------------- # Net handling # ------------------------- - def get_net_from_db(self, name, include_results=True, only_tables=None, project_id=None, - geo_mode="string"): + def get_net_from_db(self, name, include_results=True, only_tables=None, project_id=None, + geo_mode="string", variants=[]): if project_id: self.set_active_project_by_id(project_id) self.check_permission("read") @@ -489,118 +523,174 @@ def get_net_from_db(self, name, include_results=True, only_tables=None, project_ _id = self._get_id_from_name(name, db) if _id is None: return None - return self.get_net_from_db_by_id(_id, include_results, only_tables, geo_mode=geo_mode) + return self.get_net_from_db_by_id(_id, include_results, only_tables, geo_mode=geo_mode, variants=variants) def get_net_from_db_by_id(self, id, include_results=True, only_tables=None, convert=True, - geo_mode="string"): + geo_mode="string", variants=[]): self.check_permission("read") return self._get_net_from_db_by_id(id, include_results, only_tables, convert=convert, - geo_mode=geo_mode) + geo_mode=geo_mode, variants=variants) + + def _get_net_from_db_by_id(self, id_, include_results=True, only_tables=None, convert=True, + geo_mode="string", variants=[]): + db = self._get_project_database() + meta = self._get_network_metadata(db, id_) + + package = pp if meta.get("sector", "power") == "power" else pps + net = package.create_empty_network() + + # add all elements that are stored as dataframes + collection_names = self._get_net_collections(db) + for collection_name in collection_names: + el = self._element_name_of_collection(collection_name) + self._add_element_from_collection(net, db, el, id_, include_results=include_results, + only_tables=only_tables, geo_mode=geo_mode, + variants=variants) + # add data that is not stored in dataframes + self.deserialize_and_update_data(net, meta) + + if convert: + package.convert_format(net) + + return net + + def deserialize_and_update_data(self, net, meta): + if version.parse(self.get_project_version()) <= version.parse("0.2.4"): + if meta.get("sector", "power") == "power": + data = dict((k, json.loads(v, cls=io_pp.PPJSONDecoder)) for k, v in meta['data'].items()) + net.update(data) + else: + data = dict((k, from_json_pps(v)) for k, v in meta['data'].items()) + net.update(data) + else: + for key, value in meta["data"].items(): + if type(value) == str and value.startswith("serialized_"): + value = json.loads(value[11:], cls=io_pp.PPJSONDecoder) + net[key] = value def get_subnet_from_db(self, name, bus_filter=None, include_results=True, - add_edge_branches=True, geo_mode="string"): + add_edge_branches=True, geo_mode="string", variants=[]): self.check_permission("read") db = self._get_project_database() _id = self._get_id_from_name(name, db) if _id is None: return None + return self.get_subnet_from_db_by_id(_id, bus_filter=bus_filter, include_results=include_results, + add_edge_branches=add_edge_branches, geo_mode=geo_mode, variants=variants) - meta = self._get_network_metadata(db, _id) + def get_subnet_from_db_by_id(self, net_id, bus_filter=None, include_results=True, + add_edge_branches=True, geo_mode="string", variants=[], + ignore_elements=[]): + db = self._get_project_database() + meta = self._get_network_metadata(db, net_id) + dtypes = db["_networks"].find_one({"_id": net_id}, projection={"dtypes"}) net = pp.create_empty_network() + if db[self._collection_name_of_element("bus")].count_documents({}) == 0: + net["empty"] = True + # Add buses with filter if bus_filter is not None: - self._add_element_from_collection(net, db, "bus", _id, bus_filter, geo_mode=geo_mode) + self._add_element_from_collection(net, db, "bus", net_id, bus_filter, geo_mode=geo_mode, + variants=variants, dtypes=dtypes) buses = net.bus.index.tolist() branch_operator = "$or" if add_edge_branches else "$and" # Add branch elements connected to at least one bus - self._add_element_from_collection(net, db, "line", _id, + self._add_element_from_collection(net, db, "line", net_id, {branch_operator: [ {"from_bus": {"$in": buses}}, - {"to_bus": {"$in": buses}}]}, geo_mode=geo_mode) - self._add_element_from_collection(net, db, "trafo", _id, + {"to_bus": {"$in": buses}}]}, geo_mode=geo_mode, + variants=variants, dtypes=dtypes) + self._add_element_from_collection(net, db, "trafo", net_id, {branch_operator: [ {"hv_bus": {"$in": buses}}, - {"lv_bus": {"$in": buses}}]}, geo_mode=geo_mode) - self._add_element_from_collection(net, db, "trafo3w", _id, + {"lv_bus": {"$in": buses}}]}, geo_mode=geo_mode, + variants=variants, dtypes=dtypes) + self._add_element_from_collection(net, db, "trafo3w", net_id, {branch_operator: [ {"hv_bus": {"$in": buses}}, {"mv_bus": {"$in": buses}}, - {"lv_bus": {"$in": buses}}]}, geo_mode=geo_mode) + {"lv_bus": {"$in": buses}}]}, geo_mode=geo_mode, + variants=variants, dtypes=dtypes) - self._add_element_from_collection(net, db, "switch", _id, + self._add_element_from_collection(net, db, "switch", net_id, {"$and": [ {"et": "b"}, {branch_operator: [ {"bus": {"$in": buses}}, {"element": {"$in": buses}} - ]} - ] - }, geo_mode=geo_mode) + ]} + ] + }, geo_mode=geo_mode, variants=variants, dtypes=dtypes) if add_edge_branches: # Add buses on the other side of the branches branch_buses = set(net.trafo.hv_bus.values) | set(net.trafo.lv_bus.values) | \ - set(net.line.from_bus) | set(net.line.to_bus) | \ - set(net.trafo3w.hv_bus.values) | set(net.trafo3w.mv_bus.values) | \ - set(net.trafo3w.lv_bus.values) | set(net.switch.bus) | set(net.switch.element) + set(net.line.from_bus) | set(net.line.to_bus) | \ + set(net.trafo3w.hv_bus.values) | set(net.trafo3w.mv_bus.values) | \ + set(net.trafo3w.lv_bus.values) | set(net.switch.bus) | set(net.switch.element) branch_buses_outside = [int(b) for b in branch_buses - set(buses)] - self._add_element_from_collection(net, db, "bus", _id, geo_mode=geo_mode, - filter={"index": {"$in": branch_buses_outside}}) + self._add_element_from_collection(net, db, "bus", net_id, geo_mode=geo_mode, variants=variants, + filter={"index": {"$in": branch_buses_outside}}, + dtypes=dtypes) buses = net.bus.index.tolist() switch_filter = {"$or": [ - {"$and": [ - {"et": "t"}, - {"element": {"$in": net.trafo.index.tolist()}} - ] - }, - {"$and": [ - {"et": "l"}, - {"element": {"$in": net.line.index.tolist()}} - ] - }, - {"$and": [ - {"et": "t3"}, - {"element": {"$in": net.trafo3w.index.tolist()}} - ] - } - ] - } - self._add_element_from_collection(net, db,"switch", _id, switch_filter, geo_mode=geo_mode) - - #add node elements + {"$and": [ + {"et": "t"}, + {"element": {"$in": net.trafo.index.tolist()}} + ] + }, + {"$and": [ + {"et": "l"}, + {"element": {"$in": net.line.index.tolist()}} + ] + }, + {"$and": [ + {"et": "t3"}, + {"element": {"$in": net.trafo3w.index.tolist()}} + ] + } + ] + } + self._add_element_from_collection(net, db, "switch", net_id, switch_filter, + geo_mode=geo_mode, variants=variants, dtypes=dtypes) + + # add node elements node_elements = ["load", "sgen", "gen", "ext_grid", "shunt", "xward", "ward", "motor", "storage"] branch_elements = ["trafo", "line", "trafo3w", "switch", "impedance"] all_elements = node_elements + branch_elements + ["bus"] + all_elements = list(set(all_elements) - set(ignore_elements)) - #add all node elements that are connected to buses within the network + # add all node elements that are connected to buses within the network for element in node_elements: filter = {"bus": {"$in": buses}} - self._add_element_from_collection(net, db, element, _id, + self._add_element_from_collection(net, db, element, net_id, filter=filter, geo_mode=geo_mode, - include_results=include_results) + include_results=include_results, + variants=variants, dtypes=dtypes) - #add all other collections + # add all other collections collection_names = self._get_net_collections(db) for collection in collection_names: table_name = self._element_name_of_collection(collection) - #skip all element tables that we have already added - if table_name in all_elements: + # skip all element tables that we have already added + if table_name in all_elements or table_name in ignore_elements: continue - #for tables that share an index with an element (e.g. load->res_load) load only relevant entries + # for tables that share an index with an element (e.g. load->res_load) load only relevant entries for element in all_elements: if table_name.startswith(element + "_") or table_name.startswith("net_res_" + element): filter = {"index": {"$in": net[element].index.tolist()}} break else: - #all other tables (e.g. std_types) are loaded without filter + # all other tables (e.g. std_types) are loaded without filter filter = None - self._add_element_from_collection(net, db, table_name, _id, + self._add_element_from_collection(net, db, table_name, net_id, filter=filter, geo_mode=geo_mode, - include_results=include_results) - net.update(meta["data"]) + include_results=include_results, + variants=variants, dtypes=dtypes) + self.deserialize_and_update_data(net, meta) return net def _collection_name_of_element(self, element): @@ -609,18 +699,16 @@ def _collection_name_of_element(self, element): def _element_name_of_collection(self, collection): return collection[4:] # remove "net_" prefix - def write_network_to_db(self, net, name, overwrite=True, project_id=None): + def write_network_to_db(self, net, name, sector="power", overwrite=True, project_id=None, + metadata=None): if project_id: self.set_active_project_by_id(project_id) self.check_permission("write") db = self._get_project_database() - if isinstance(net, pp.pandapowerNet): - net_type = "power" - elif isinstance(net, pps.pandapipesNet): - net_type = "pipe" - else: - raise PandaHubError("net must be a pandapower or pandapipes object") +# if not isinstance(net, pp.pandapowerNet) and not isinstance(net, pps.pandapipesNet): +# raise PandaHubError("net must be a pandapower or pandapipes object") + if self._network_with_name_exists(name, db): if overwrite: self.delete_net_from_db(name) @@ -629,27 +717,67 @@ def write_network_to_db(self, net, name, overwrite=True, project_id=None): max_id_network = db["_networks"].find_one(sort=[("_id", -1)]) _id = 0 if max_id_network is None else max_id_network["_id"] + 1 - dataframes, other_parameters, types = convert_dataframes_to_dicts(net, _id, self._datatypes) + data = {} + dtypes = {} + version_ = version.parse(self.get_project_version()) + for element, element_data in net.items(): + if element.startswith("_") or element.startswith("res"): + continue + if isinstance(element_data, pd.core.frame.DataFrame): + # create type lookup + dtypes[element] = get_dtypes(element_data, self._datatypes.get(element)) + if element_data.empty: + continue + # convert pandapower dataframe object to dict and save to db + element_data = convert_element_to_dict(element_data.copy(deep=True), _id, self._datatypes.get(element)) + self._write_element_to_db(db, element, element_data) - self._write_net_collections_to_db(db, dataframes) + else: + element_data = serialize_object_data(element, element_data, version_) + if element_data: + data[element] = element_data + # write network metadata net_dict = {"_id": _id, "name": name, - "dtypes": types, - "net_type": net_type, - "data": other_parameters} + "sector": sector, + "dtypes": dtypes, + "data": data} + + if metadata is not None: + net_dict.update(metadata) db["_networks"].insert_one(net_dict) def _write_net_collections_to_db(self, db, collections): - for key, item in collections.items(): - if len(item) > 0: - collection_name = self._collection_name_of_element(key) - try: - db[collection_name].insert_many(item, ordered=True) - db[collection_name].create_index([("net_id", DESCENDING)]) - except: - traceback.print_exc() - print(f"\nFAILED TO WRITE TABLE '{key}' TO DATABASE! (details above)") + for element, element_data in collections.items(): + self._write_element_to_db(db, element, element_data) + + def _write_element_to_db(self, db, element, element_data): + existing_collections = set(db.list_collection_names()) + def add_index(element): + columns = {"bus": ["net_id", "index"], + "line": ["net_id", "index", "from_bus", "to_bus"], + "trafo": ["net_id", "index", "hv_bus", "lv_bus"], + "switch": ["net_id", "index", "bus", "element", "et"], + "substation": ["net_id", "index"], + "area": ["net_id", "index", "name"]}.get(element, []) + if element in ["load", "sgen", "gen", "ext_grid", "shunt", "xward", "ward", "motor", + "storage"]: + columns = ["net_id", "bus"] + for c in columns: + logger.info(f"creating index on '{c}' in collection '{element}'") + db[self._collection_name_of_element(element)].create_index([(c, DESCENDING)]) + + + collection_name = self._collection_name_of_element(element) + if len(element_data) > 0: + try: + db[collection_name].insert_many(element_data, ordered=False) + if collection_name not in existing_collections: + add_index(element) + except: + traceback.print_exc() + print(f"\nFAILED TO WRITE TABLE '{element}' TO DATABASE! (details above)") def delete_net_from_db(self, name): self.check_permission("write") @@ -657,7 +785,7 @@ def delete_net_from_db(self, name): _id = self._get_id_from_name(name, db) if _id is None: raise PandaHubError("Network does not exist", 404) - collection_names = self._get_net_collections(db) #TODO + collection_names = self._get_net_collections(db) # TODO for collection_name in collection_names: db[collection_name].delete_many({'net_id': _id}) db["_networks"].delete_one({"_id": _id}) @@ -674,7 +802,7 @@ def load_networks_meta(self, networks, load_area=False): proj = {"net": 0} if not load_area: proj["area_geojson"] = 0 - nets = pd.DataFrame(list(db.find(fi,rojection=proj))) + nets = pd.DataFrame(list(db.find(fi, projection=proj))) return nets def _get_metadata_from_name(self, name, db): @@ -689,45 +817,39 @@ def _get_id_from_name(self, name, db): def _network_with_name_exists(self, name, db): return self._get_id_from_name(name, db) is not None - def _get_net_collections(self, db): - all_collection_names = db.list_collection_names() - return [name for name in all_collection_names if self._element_name_of_collection(name)] + def _get_net_collections(self, db, with_areas=True): + if with_areas: + collection_filter = {'name': {'$regex': '^net_'}} + else: + collection_filter = {'name': {'$regex': '^net_.*(? 1: + variants = [int(var) for var in variants] # make sure variants are of type int + return {"$or": [{"var_type": "base", "not_in_var": {"$nin": variants}}, + {"var_type": {"$in": ["change", "addition"]}, "variant": {"$in": variants}}]} + else: + variants = variants[0] + if variants: + variants = int(variants) + return {"$or": [{"var_type": "base", "not_in_var": {"$ne": variants}}, + {"var_type": {"$in": ["change", "addition"]}, "variant": variants}]} + else: + return self.base_variant_filter # ------------------------- # Bulk operations @@ -950,27 +1234,26 @@ def bulk_update_in_db(self, data, document_ids, collection_name="tasks", global_ i = 0 for d in data: operations["UpdateOne"].append({ - "filter": {"_id": document_ids[i]}, - "update": {"$push": d}, - "upsert": False - }) - i+=1 + "filter": {"_id": document_ids[i]}, + "update": {"$push": d}, + "upsert": False + }) + i += 1 db[collection_name].bulk_write(operations) - # ------------------------- # Timeseries # ------------------------- - def write_timeseries_to_db(self, - timeseries, + def write_timeseries_to_db(self, + timeseries, data_type, ts_format="timestamp_value", compress_ts_data=False, - global_database=False, + global_database=False, collection_name="timeseries", - project_id=None, + project_id=None, **kwargs): """ This function can be used to write a timeseries to a MongoDB database. @@ -1038,7 +1321,9 @@ def write_timeseries_to_db(self, upsert=True ) logger.debug("document with _id {document['_id']} added to database") - + if kwargs.get("return_id"): + return document["_id"] + return None def bulk_write_timeseries_to_db(self, timeseries, data_type, meta_frame=None, @@ -1108,7 +1393,7 @@ def bulk_write_timeseries_to_db(self, timeseries, data_type, return [d["_id"] for d in documents] def update_timeseries_in_db(self, new_ts_content, document_id, collection_name="timeseries", - global_database=False): + global_database=False): """ This function can be used to append a timeseries to an existing timseries @@ -1145,7 +1430,7 @@ def update_timeseries_in_db(self, new_ts_content, document_id, collection_name=" {"$push": ts_update}, upsert=False ) - #logger.info("document updated in database") + # logger.info("document updated in database") def bulk_update_timeseries_in_db(self, new_ts_content, document_ids, collection_name="timeseries", global_database=False): @@ -1186,8 +1471,7 @@ def bulk_update_timeseries_in_db(self, new_ts_content, document_ids, collection_ self.bulk_update_in_db(documents, document_ids, project=project, collection_name="timeseries", global_database=global_database) - #logger.debug(f"{len(documents)} documents added to database") - + # logger.debug(f"{len(documents)} documents added to database") def get_timeseries_from_db(self, filter_document={}, timestamp_range=None, ts_format="timestamp_value", @@ -1251,16 +1535,21 @@ def get_timeseries_from_db(self, filter_document={}, timestamp_range=None, if timestamp_range: pipeline.append({"$project": {"timeseries_data": {"$filter": {"input": "$timeseries_data", "as": "timeseries_data", - "cond": {"$and": [{"$gte": ["$$timeseries_data.timestamp", timestamp_range[0]]}, - {"$lt": ["$$timeseries_data.timestamp", timestamp_range[1]]}]}}}}}) + "cond": {"$and": [{"$gte": [ + "$$timeseries_data.timestamp", + timestamp_range[0]]}, + {"$lt": [ + "$$timeseries_data.timestamp", + timestamp_range[ + 1]]}]}}}}}) pipeline.append({"$addFields": {"timestamps": "$timeseries_data.timestamp", - "values": "$timeseries_data.value" }}) + "values": "$timeseries_data.value"}}) if include_metadata: pipeline.append({"$project": {"timeseries_data": 0}}) else: - pipeline.append({"$project": {"timestamps":1, - "values":1, - "_id":0}}) + pipeline.append({"$project": {"timestamps": 1, + "values": 1, + "_id": 0}}) elif ts_format == "array": if not include_metadata: pipeline.append({"$project": {"timeseries_data": 1}}) @@ -1278,7 +1567,7 @@ def get_timeseries_from_db(self, filter_document={}, timestamp_range=None, timeseries_data = decompress_timeseries_data(data["timeseries_data"], ts_format) else: if ts_format == "timestamp_value": - timeseries_data = pd.Series(data["values"], + timeseries_data = pd.Series(data["values"], index=data["timestamps"], dtype="float64") elif ts_format == "array": @@ -1332,7 +1621,7 @@ def get_timeseries_metadata(self, filter_document, collection_name="timeseries", match_filter.append({key: filter_value}) if match_filter: pipeline.append({"$match": {"$and": match_filter}}) - projection = {"$project":{"timeseries_data":0}} + projection = {"$project": {"timeseries_data": 0}} pipeline.append(projection) metadata = list(db[collection_name].aggregate(pipeline)) df_metadata = pd.DataFrame(metadata) @@ -1352,7 +1641,7 @@ def add_metadata(self, filter_document, add_meta, global_database=False, meta_before = self.get_timeseries_metadata(filter_document, global_database=global_database, collection_name=collection_name) # add the new information to the metadata dict of the existing timeseries - if len(meta_before) > 1: #TODO is this the desired behaviour? Needs to specified + if len(meta_before) > 1: # TODO is this the desired behaviour? Needs to specified raise PandaHubError meta_copy = {**meta_before.iloc[0].to_dict(), **add_meta} # write new metadata to mongo db @@ -1360,14 +1649,14 @@ def add_metadata(self, filter_document, add_meta, global_database=False, meta_copy, upsert=True) return meta_copy - def multi_get_timeseries_from_db(self, filter_document={}, + def multi_get_timeseries_from_db(self, filter_document={}, timestamp_range=None, exclude_timestamp_range=None, include_metadata=False, ts_format="timestamp_value", compressed_ts_data=False, global_database=False, collection_name="timeseries", - project_id=None,**kwargs): + project_id=None, **kwargs): if project_id: self.set_active_project_by_id(project_id) if global_database: @@ -1391,14 +1680,18 @@ def multi_get_timeseries_from_db(self, filter_document={}, if timestamp_range: projection = {"timeseries_data": {"$filter": {"input": "$timeseries_data", "as": "timeseries_data", - "cond": {"$and": [{"$gte": ["$$timeseries_data.timestamp", timestamp_range[0]]}, - {"$lt": ["$$timeseries_data.timestamp", timestamp_range[1]]}]}}}} + "cond": {"$and": [{"$gte": ["$$timeseries_data.timestamp", + timestamp_range[0]]}, + {"$lt": ["$$timeseries_data.timestamp", + timestamp_range[1]]}]}}}} pipeline.append({"$project": projection}) if exclude_timestamp_range: projection = {"timeseries_data": {"$filter": {"input": "$timeseries_data", "as": "timeseries_data", - "cond": {"$or": [{"$lt": ["$$timeseries_data.timestamp", timestamp_range[0]]}, - {"$gte": ["$$timeseries_data.timestamp", timestamp_range[1]]}]}}}} + "cond": {"$or": [{"$lt": ["$$timeseries_data.timestamp", + timestamp_range[0]]}, + {"$gte": ["$$timeseries_data.timestamp", + timestamp_range[1]]}]}}}} pipeline.append({"$project": projection}) if not include_metadata: pipeline.append({"$project": {"timeseries_data": 1}}) @@ -1420,7 +1713,7 @@ def multi_get_timeseries_from_db(self, filter_document={}, if include_metadata: timeseries.append(ts) if exclude_timestamp_range is not None or timestamp_range is not None: - #TODO: Second query to get the metadata, since metadata is not returned if a projection on the subfield is used + # TODO: Second query to get the metadata, since metadata is not returned if a projection on the subfield is used metadata = db[collection_name].find_one({"_id": ts["_id"]}, projection={"timeseries_data": 0}) ts.update(metadata) else: @@ -1509,15 +1802,19 @@ def bulk_get_timeseries_from_db(self, filter_document={}, timestamp_range=None, if timestamp_range: projection = {"timeseries_data": {"$filter": {"input": "$timeseries_data", "as": "timeseries_data", - "cond": {"$and": [{"$gte": ["$$timeseries_data.timestamp", timestamp_range[0]]}, - {"$lt": ["$$timeseries_data.timestamp", timestamp_range[1]]}]}}}} + "cond": {"$and": [{"$gte": ["$$timeseries_data.timestamp", + timestamp_range[0]]}, + {"$lt": ["$$timeseries_data.timestamp", + timestamp_range[1]]}]}}}} projection = {**projection, **custom_projection} pipeline.append({"$project": projection}) if exclude_timestamp_range: projection = {"timeseries_data": {"$filter": {"input": "$timeseries_data", "as": "timeseries_data", - "cond": {"$or": [{"$lt": ["$$timeseries_data.timestamp", timestamp_range[0]]}, - {"$gte": ["$$timeseries_data.timestamp", timestamp_range[1]]}]}}}} + "cond": {"$or": [{"$lt": ["$$timeseries_data.timestamp", + timestamp_range[0]]}, + {"$gte": ["$$timeseries_data.timestamp", + timestamp_range[1]]}]}}}} projection = {**projection, **custom_projection} pipeline.append({"$project": projection}) pipeline.append({"$unwind": "$timeseries_data"}) @@ -1537,10 +1834,9 @@ def bulk_get_timeseries_from_db(self, filter_document={}, timestamp_range=None, timeseries = timeseries.pivot(columns=pivot_by_column, values="value") return timeseries - def delete_timeseries_from_db(self, element_type, data_type, netname=None, element_index=None, collection_name="timeseries", - **kwargs): + **kwargs): """ This function can be used to delete a single timeseries that matches the provided metadata from a MongoDB database. The element_type and data_type @@ -1588,7 +1884,6 @@ def delete_timeseries_from_db(self, element_type, data_type, netname=None, del_res = db[collection_name].delete_one(filter_document) return del_res - def bulk_del_timeseries_from_db(self, filter_document, collection_name="timeseries"): """ @@ -1644,7 +1939,6 @@ def bulk_del_timeseries_from_db(self, filter_document, # r = self.delete_project(project_name, i_know_this_action_is_final=True) # self.create_project(project_name) - # r = self.delete_project(project_name, i_know_this_action_is_final=True) # print(r.json()) diff --git a/pandahub/lib/database_toolbox.py b/pandahub/lib/database_toolbox.py index a424c65..634c330 100644 --- a/pandahub/lib/database_toolbox.py +++ b/pandahub/lib/database_toolbox.py @@ -9,6 +9,8 @@ import json import importlib logger = logging.getLogger(__name__) +from pandapower.io_utils import PPJSONEncoder +from packaging import version def get_document_hash(task): @@ -80,8 +82,8 @@ def add_timestamp_info_to_document(document, timeseries, ts_format): document["first_timestamp"] = timeseries.index[0] document["last_timestamp"] = timeseries.index[-1] document["num_timestamps"] = len(timeseries.index) - document["max_value"] = timeseries.max().item() - document["min_value"] = timeseries.min().item() + document["max_value"] = timeseries.max() + document["min_value"] = timeseries.min() return document @@ -111,7 +113,7 @@ def convert_timeseries_to_subdocuments(timeseries): def compress_timeseries_data(timeseries_data, ts_format): import blosc if ts_format == "timestamp_value": - timeseries_data = np.array([timeseries_data.index.astype(int), + timeseries_data = np.array([timeseries_data.index.astype(int), timeseries_data.values]) return blosc.compress(timeseries_data.tobytes(), shuffle=blosc.SHUFFLE, @@ -125,17 +127,17 @@ def compress_timeseries_data(timeseries_data, ts_format): def decompress_timeseries_data(timeseries_data, ts_format): import blosc if ts_format == "timestamp_value": - data = np.frombuffer(blosc.decompress(timeseries_data), - dtype=np.float64).reshape((35040,2), + data = np.frombuffer(blosc.decompress(timeseries_data), + dtype=np.float64).reshape((35040,2), order="F") return pd.Series(data[:,1], index=pd.to_datetime(data[:,0])) elif ts_format == "array": - return np.frombuffer(blosc.decompress(timeseries_data), + return np.frombuffer(blosc.decompress(timeseries_data), dtype=np.float64) - -def create_timeseries_document(timeseries, - data_type, + +def create_timeseries_document(timeseries, + data_type, ts_format="timestamp_value", compress_ts_data=False, **kwargs): @@ -185,17 +187,51 @@ def create_timeseries_document(timeseries, if not "_id" in document: # IDs set by users will not be overwritten document["_id"] = get_document_hash(document) if compress_ts_data: - document["timeseries_data"] = compress_timeseries_data(timeseries, + document["timeseries_data"] = compress_timeseries_data(timeseries, ts_format) else: if ts_format == "timestamp_value": document["timeseries_data"] = convert_timeseries_to_subdocuments(timeseries) elif ts_format == "array": document["timeseries_data"] = list(timeseries.values) - + return document -def convert_dataframes_to_dicts(net, _id, datatypes=None): +def convert_element_to_dict(element_data, net_id, default_dtypes=None): + ''' + Converts a pandapower pandas.DataFrame element into dictonary, casting columns to default dtypes. + * Columns of type Object are serialized to json + * Columns named "geo" or "*_geo" containing strings are parsed into dicts + * net_id and index (from element_data df index) are added as values + + Parameters + ---------- + element_data: pandas.DataFrame + pandapower element table to convert to dict + net_id: int + Network id + default_dtypes: dict + Default dtypes for columns in element_data + + Returns + ------- + dict + Record-orientated dict representation of element_data + + ''' + if default_dtypes is not None: + for column in element_data.columns: + if column in default_dtypes: + element_data[column] = element_data[column].astype(default_dtypes[column], errors="ignore") + + if "object" in element_data.columns: + element_data["object"] = element_data["object"].apply(object_to_json) + element_data["index"] = element_data.index + element_data["net_id"] = net_id + load_geojsons(element_data) + return element_data.to_dict(orient="records") + +def convert_dataframes_to_dicts(net, net_id, version_, datatypes=None): if datatypes is None: datatypes = getattr(importlib.import_module(settings.DATATYPES_MODULE), "datatypes") @@ -206,68 +242,130 @@ def convert_dataframes_to_dicts(net, _id, datatypes=None): if key.startswith("_") or key.startswith("res"): continue if isinstance(data, pd.core.frame.DataFrame): - # ------------ # create type lookup - - types[key] = dict() - default_dtypes = datatypes.get(key) - if default_dtypes is not None: - types[key].update({key: dtype.__name__ for key, dtype in default_dtypes.items()}) - types[key].update( - { - column: str(dtype) for column, dtype in net[key].dtypes.items() - if column not in types[key] - } - ) + types[key] = get_dtypes(key, data, datatypes.get(key)) if data.empty: continue - # ------------ # convert pandapower objects in dataframes to dict - - df = net[key].copy(deep=True) - - # ------------ - # cast all columns with their default datatype - - if default_dtypes is not None: - for column in df.columns: - if column in default_dtypes: - df[column] = df[column].astype(default_dtypes[column], errors="ignore") - - if "object" in df.columns: - df["object"] = df["object"].apply(object_to_json) - df["index"] = df.index - df["net_id"] = _id - load_geojsons(df) - dataframes[key] = df.to_dict(orient="records") + dataframes[key] = convert_element_to_dict(net[key].copy(deep=True), net_id, datatypes.get(key)) else: - try: - json.dumps(data) - except: - print("Data in net[{}] is not JSON serializable and was therefore omitted on import".format(key)) - else: + data = serialize_object_data(key, data, version_) + if data: other_parameters[key] = data + return dataframes, other_parameters, types +def serialize_object_data(element, element_data, version_): + ''' + Serialize a pandapower element which is not of type pandas.DataFrame into json. + + Parameters + ---------- + element: str + Name of the pandapower element + element_data: object + pandapower element data + version_: + pandahub version to target for serialization + + Returns + ------- + json + A json representation of the pandapower element + ''' + if version_ <= version.parse("0.2.4"): + try: + element_data = json.dumps(element_data, cls=PPJSONEncoder) + except: + print( + "Data in net[{}] is not JSON serializable and was therefore omitted on import".format(element)) + else: + return element_data + else: + try: + json.dumps(element_data) + except: + element_data = f"serialized_{json.dumps(element_data, cls=PPJSONEncoder)}" + return element_data + + +def get_dtypes(element_data, default_dtypes): + ''' + Construct data types from a pandas.DataFrame, with given defaults taking precedence. + + Parameters + ---------- + element_data: pandas.DataFrame + Input dataframe + default_dtypes: dict + Default datatypes definition + + Returns + ------- + dict + Datatypes for all columns present in element_data. Column type is taken from default_dtypes if defined, + otherwise directly from element_data + + ''' + types = {} + if default_dtypes is not None: + types.update({key: dtype.__name__ for key, dtype in default_dtypes.items()}) + types.update( + { + column: str(dtype) for column, dtype in element_data.dtypes.items() + if column not in types + } + ) + return types + + def load_geojsons(df): for column in df.columns: if column == "geo" or column.endswith("_geo"): df[column] = df[column].apply(lambda a: json.loads(a) if isinstance(a, str) else a) def convert_geojsons(df, geo_mode="string"): + + def to_dict(geo): + if isinstance(geo, dict): + return geo + elif isinstance(geo, str): + return json.loads(geo) + elif hasattr(geo, "coords"): + return {"type": geo.type, "coordinates": geo.coords} + + def to_string(geo): + if isinstance(geo, str): + return geo + elif isinstance(geo, dict): + return json.dumps(geo) + elif hasattr(geo, "coords"): + return json.dumps({"type": geo.type, "coordinates": geo.coords}) + + def to_shapely(geo): + from shapely.geometry import shape + if hasattr(geo, "coords"): + return geo + elif isinstance(geo, str): + return shape(json.loads(geo)) + elif isinstance(geo, dict): + return shape(geo) + + conv_func = None if geo_mode == "dict": - return + conv_func = to_dict + elif geo_mode == "string": + conv_func = to_string + elif geo_mode == "shapely": + conv_func = to_shapely + else: + raise NotImplementedError("Unknown geo_mode {}".format(geo_mode)) + for column in df.columns: if column == "geo" or column.endswith("_geo"): - if geo_mode == "string": - df[column] = df[column].apply(lambda a: json.dumps(a) if isinstance(a, dict) else a) - elif geo_mode == "shapely": - from shapely.geometry import shape - df[column] = df[column].apply(lambda a: shape(a) if isinstance(a, dict) else a) - else: - raise NotImplementedError("Unknown geo_mode {}".format(geo_mode)) + df[column] = df[column].apply(conv_func) def json_to_object(js): _module = importlib.import_module(js["_module"]) @@ -279,4 +377,4 @@ def object_to_json(obj): "_module": obj.__class__.__module__, "_class": obj.__class__.__name__, "_object": obj.to_json() - } \ No newline at end of file + } diff --git a/pandahub/test/test_networks.py b/pandahub/test/test_networks.py index 257f72d..06c4881 100644 --- a/pandahub/test/test_networks.py +++ b/pandahub/test/test_networks.py @@ -1,8 +1,12 @@ -import pandapower.networks as nw -from pandahub import PandaHubError, PandaHub -import pandapower as pp import pytest +import pandapipes as pps +import pandapipes.networks as nw_pps +import pandapower as pp +import pandapower.networks as nw_pp +from pandahub import PandaHubError +from pandapipes.toolbox import nets_equal + def test_network_io(ph): ph.set_active_project("pytest") @@ -11,7 +15,7 @@ def test_network_io(ph): for cname in db.list_collection_names(): db.drop_collection(cname) - net1 = nw.mv_oberrhein() + net1 = nw_pp.mv_oberrhein() name1 = "oberrhein_network" # for some unknown reason the format of mv_oberrhein does not match the latest pandapower format net1.gen.rename(columns={"qmax_mvar": "max_q_mvar", "qmin_mvar": "min_q_mvar"}, inplace=True) @@ -20,11 +24,11 @@ def test_network_io(ph): del net1.impedance["x_pu"] del net1.dcline["cost_per_mw"] - net2 = nw.simple_four_bus_system() - pp.create_bus(net2, vn_kv=20, index=10) #check non-consecutive indices + net2 = nw_pp.simple_four_bus_system() + pp.create_bus(net2, vn_kv=20, index=10) # check non-consecutive indices name2 = "simple_network" - #we check storing two different networks consecutively to ensure the nets are properly separated + # we check storing two different networks consecutively to ensure the nets are properly separated for net, name in [(net1, name1), (net2, name2)]: if ph.network_with_name_exists(name): ph.delete_net_from_db(name) @@ -46,11 +50,11 @@ def test_network_io(ph): assert len(net3.line) == 0 assert len(net3.load) == 0 - #delete first network + # delete first network ph.delete_net_from_db(name1) assert ph.network_with_name_exists(name1) == False - #check that second network is still in database + # check that second network is still in database assert ph.network_with_name_exists(name2) == True net2_loaded = ph.get_net_from_db(name2) pp.runpp(net2_loaded) @@ -62,7 +66,7 @@ def test_load_subnetwork(ph): name = "oberrhein_network" if not ph.network_with_name_exists(name): - net = nw.mv_oberrhein() + net = nw_pp.mv_oberrhein() ph.write_network_to_db(net, name) subnet = ph.get_subnet_from_db(name, bus_filter={"vn_kv": 110}) @@ -73,7 +77,7 @@ def test_load_subnetwork(ph): assert len(subnet["res_" + element]) == size subnet = ph.get_subnet_from_db(name, bus_filter={"vn_kv": 110}, - include_results=False) + include_results=False) for element, size in expected_sizes: assert len(subnet[element]) == size assert len(subnet["res_" + element]) == 0 @@ -91,7 +95,7 @@ def test_access_and_set_single_values(ph): ph.set_active_project("pytest") name = "oberrhein_network" - net = nw.mv_oberrhein() + net = nw_pp.mv_oberrhein() if not ph.network_with_name_exists(name): ph.write_network_to_db(net, name) @@ -114,13 +118,39 @@ def test_access_and_set_single_values(ph): assert index not in net[element].index +def test_pandapipes(ph): + ph.set_active_project('Awesome') + net = nw_pps.gas_versatility() + ph.write_network_to_db(net, 'versatility') + net2 = ph.get_net_from_db('versatility') + pps.pipeflow(net) + pps.pipeflow(net2) + assert nets_equal(net, net2, check_only_results=True) + + +def test_get_set_single_value(ph): + ph.set_active_project('pytest') + net = nw_pp.mv_oberrhein() + ph.write_network_to_db(net, 'oberrhein') + val = ph.get_net_value_from_db('oberrhein', 'load', 0, 'p_mw') + assert val == net.load.at[0, 'p_mw'] + ph.set_net_value_in_db('oberrhein', 'load', 0, 'p_mw', 0.5) + val = ph.get_net_value_from_db('oberrhein', 'load', 0, 'p_mw') + assert val == 0.5 + + if __name__ == '__main__': from pandahub import PandaHub ph = PandaHub(connection_url="mongodb://localhost:27017") - - test_network_io(ph) - 0/0 + ph.create_project('Awesome') + net = nw_pps.gas_versatility() + ph.write_network_to_db(net, 'versatility') + net2 = ph.get_net_from_db('versatility') + pps.pipeflow(net) + pps.pipeflow(net2) + # test_network_io(ph) + # 0 / 0 # project_name = "pytest" # if ph.project_exists(project_name): @@ -128,8 +158,8 @@ def test_access_and_set_single_values(ph): # ph.delete_project(i_know_this_action_is_final=True) # ph.create_project(project_name) - ph.set_active_project("pytest") - net = ph.get_net_from_db("oberrhein_network") + # ph.set_active_project("pytest") + # net = ph.get_net_from_db("oberrhein_network") # name = "oberrhein_network" # net = nw.mv_oberrhein() @@ -153,5 +183,3 @@ def test_access_and_set_single_values(ph): # ph.get_net_value_from_db(name, element, index, parameter) # net = ph.get_net_from_db(name) # assert index not in net[element].index - - diff --git a/pandahub/test/test_projects.py b/pandahub/test/test_projects.py index 56aef40..0426fe8 100644 --- a/pandahub/test/test_projects.py +++ b/pandahub/test/test_projects.py @@ -4,7 +4,7 @@ import pytest from pandahub.lib.database_toolbox import convert_dataframes_to_dicts from pymongo import DESCENDING - +from packaging import version def test_project_management(ph): project = "pytest2" @@ -57,7 +57,7 @@ def write_network_to_db(self, net, name, overwrite=True, project_id=None): raise pandahub.PandaHubError("Network name already exists") max_id_network = db["_networks"].find_one(sort=[("_id", -1)]) _id = 0 if max_id_network is None else max_id_network["_id"] + 1 - dataframes, other_parameters, types = convert_dataframes_to_dicts(net, _id) + dataframes, other_parameters, types = convert_dataframes_to_dicts(net, _id, version.parse("0.2.1")) self._write_net_collections_to_db(db, dataframes) net_dict = {"_id": _id, "name": name, "dtypes": types, diff --git a/requirements.txt b/requirements.txt index 007fab7..eac539b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ -uvicorn==0.17.4 -fastapi==0.73.0 -fastapi-users[mongodb]==9.2.4 -fastapi-mail==1.0.4 -pandapower==2.8.0 -pandapipes==0.6.0 +uvicorn>=0.17.4 +fastapi~=0.75.0 +fastapi-users[mongodb]~=9.0 +fastapi-mail>=1.0.4 +pandapower>=2.10.1 +pandapipes>=0.7.0 pymongo pydantic simplejson diff --git a/setup.py b/setup.py index 5f18585..6a7fd07 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,7 @@ name='pandahub', packages=find_packages(), url='https://github.com/e2nIEE/pandahub', - version='0.2.3', + version='0.2.4', include_package_data=True, long_description_content_type='text/markdown', zip_safe=False, diff --git a/tutorials/01_pandahub API - call a MongoDB database.ipynb b/tutorials/01_pandahub API - call a MongoDB database.ipynb index d895b08..a87f630 100644 --- a/tutorials/01_pandahub API - call a MongoDB database.ipynb +++ b/tutorials/01_pandahub API - call a MongoDB database.ipynb @@ -218,10 +218,9 @@ "metadata": {}, "outputs": [], "source": [ - "try:\n", - " ph_api.create_project('MyAwesomeFirstProject')\n", - "except TypeError:\n", - " print(\"Don't be confused by this exception. The project is created anyway.\")" + "from pandahub import PandaHubError\n", + "\n", + "ph_api.create_project('MyAwesomeFirstProject', project_id=\"Awesome\")" ] }, { diff --git a/tutorials/02_pandahub IO methods - interactions with a MongoDB database.ipynb b/tutorials/02_pandahub IO methods - interactions with a MongoDB database.ipynb index 3fb66ac..6bf2a65 100644 --- a/tutorials/02_pandahub IO methods - interactions with a MongoDB database.ipynb +++ b/tutorials/02_pandahub IO methods - interactions with a MongoDB database.ipynb @@ -6,33 +6,25 @@ "source": [ "## pandahub IO methods\n", "\n", - "This tutorial briefly demonstrates how to use the pandahub IO methods. This includes high level functions for commonly used database interactions like reading and writing grid data or timeseries data from/to a MongoDB database. pandahub IO methods do not need any additional configuration. Just call a pandahub API object (see the '01_pandahub API - interaction with a MongoDB database tutorial') and directly use its IO interaction methods.\n", + "This tutorial briefly demonstrates how to use the pandahub IO methods. This includes high level functions for commonly used database interactions like reading and writing grid data or timeseries data from/to a MongoDB database. pandahub IO methods do not need any additional configuration. Just call a pandahub API object (see the `01_pandahub API - interaction with a MongoDB database tutorial`) and directly use its IO interaction methods.\n", "\n", ">**Note**\n", ">\n", - ">To run this tutorial you need a config file containing the database server URL and authentification information (For more information check out the tutorial '01_pandahub API - interaction with a MongoDB database tutorial')" + ">To run this tutorial you need a config file containing the database server URL and authentification information (For more information check out the tutorial `01_pandahub API - interaction with a MongoDB database tutorial`)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# 1. Call pandahub api" + "# 1. Call pandahub api and activate project" ] }, { "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "hp.pandapower.plotting.plotly.traces - INFO: Failed to import plotly - interactive plotting will not be available\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# At first we need to call a pandahub API object\n", "import pandahub as ph\n", @@ -45,36 +37,29 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In a next step, check if your project, you want to interact with, exists. If so, we activate the particular project:" + "In a next step, check if your project, you want to interact with, exists. If so, we activate the particular project. \n", + "\n", + ">**Note**\n", + ">\n", + ">If not, you need to create the project first. If you don't know how to do that, please check out '01_pandahub API - call a MongoDB database'):" ] }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "ph_api.project_exists('MyAwesomeFirstProject')" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "ph_api.set_active_project_by_id('62628cc101c2825d4fcc7c27')" + "ph_api.set_active_project('MyAwesomeFirstProject')" ] }, { @@ -95,7 +80,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This is an example how to write and read a pandapower/pandapipes net as well as timeseries data. If return_id is set to True the method returns the unique identifier of the document that is written to the database. You can also add custom kwargs." + "This is an example how to write and read a pandapower/pandapipes net as well as timeseries data. If `return_id` is set to True, the method returns the unique identifier of the document that is written to the database. You can also add custom kwargs." ] }, { @@ -105,12 +90,263 @@ "### Write/Read a pandapower/pandapipes net" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this subsection, we explain how to write and read pandapower and pandapipes networks." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**pandapower**\n", + "\n", + "At first we need to load a pandapower net. In our example we load the `mv_oberrhein` network." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pandapower import networks as nws\n", + "\n", + "net_orig = nws.mv_oberrhein()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This net looks like follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pandapower.plotting import simple_plot\n", + "\n", + "simple_plot(net_orig)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As next step, we write the net to our database we just activated before. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ph_api.write_network_to_db(net_orig, 'MyAwesomeNet')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Wrting a net to our database, each pandas DataFrame is written into a corresponding collection. General information of a net is saved under `_networks`.\n", + "\n", + "Besides the way described above, there is another alternative way to write a network to the database. \n", + "Instead of activating your project like described in section 1, you are also able to directly pass the `project id` within the `write_network_to_db` function. Doing so, the project gets activated automatically. To show this, we again call the pandahub api first." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ph_api = ph.PandaHub(connection_url=MONGODB_URL)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you try to write the net to the database now, you get the error message, that the project is not activated:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ph_api.write_network_to_db(net_orig, 'MyAwesomeNet')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "However, as soon as you pass a `project id`, the project gets activated on the fly:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ph_api.write_network_to_db(net_orig, 'MyAwesomeNet', project_id='Awesome')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The net is now saved in the database and can be called:" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "net_db = ph_api.get_net_from_db('MyAwesomeNet')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Comparing the original net with the net retrieved from the database shows that both nets are equal:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandapower as pp\n", + "from pandapower.toolbox import nets_equal\n", + "\n", + "pp.runpp(net_db)\n", + "pp.runpp(net_orig)\n", + "\n", + "nets_equal(net_orig, net_db, check_only_results=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Instead of writing a net to or reading a net from a database, it is also possible to just call and modify single values. Reading a single value can be done by calling following function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ph_api.get_net_value_from_db('MyAwesomeNet', 'load', 0, 'p_mw')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To change this value, you need to call following function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ph_api.set_net_value_in_db('MyAwesomeNet', 'load', 0, 'p_mw', 0.5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calling the value again, shows that the value has been modified:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ph_api.get_net_value_from_db('MyAwesomeNet', 'load', 0, 'p_mw')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**pandapipes**\n", + "\n", + "Likewise as in the pandapower example, we need to load a pandapipes networks first. We could have chosen a network with georeferenced data points, however, there is no network example given covering most of the pandapipes components. Therefore we chose a generic network." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pandapipes import networks as nws\n", + "\n", + "net_orig = nws.gas_versatility()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As next step we upload the net to our database:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ph_api.write_network_to_db(net_orig, 'MyAwesomePandapipesNet')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Retrieving this net from the database again and comparing it to the original networks leads to same results:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandapipes as pps\n", + "from pandapipes.toolbox import nets_equal\n", + "\n", + "net_db = ph_api.get_net_from_db('MyAwesomePandapipesNet')\n", + "\n", + "pps.pipeflow(net_db)\n", + "pps.pipeflow(net_orig)\n", + "\n", + "nets_equal(net_orig, net_db, check_only_results=True)" + ] }, { "cell_type": "markdown", @@ -119,177 +355,175 @@ "### Write/Read a time series" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The procedure to call a single or multiple time series diverge. The differences are hightlighted in the following:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Single time series**\n", + "\n", + "At first, we call the pandahub api again:" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "ph_api = ph.PandaHub(connection_url=MONGODB_URL)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want to write a time series to the pandahub database, you need to ask yourself, if the time series shall be globally or project specific available. If you define it globally, you can access the time series without calling a specific project. \n", + "\n", + "In the following, we show how to upload a randomly created time series for global use. If you assign it project specific, you need to pass a `project_id` or activate a project beforehand." + ] }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "ename": "PandaHubError", - "evalue": "No project is activated", - "output_type": "error", - "traceback": [ - "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", - "\u001B[1;31mPandaHubError\u001B[0m Traceback (most recent call last)", - "Input \u001B[1;32mIn [6]\u001B[0m, in \u001B[0;36m\u001B[1;34m()\u001B[0m\n\u001B[0;32m 2\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mnumpy\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m \u001B[38;5;21;01mnp\u001B[39;00m\n\u001B[0;32m 3\u001B[0m timeseries_data \u001B[38;5;241m=\u001B[39m pd\u001B[38;5;241m.\u001B[39mSeries(np\u001B[38;5;241m.\u001B[39mrandom\u001B[38;5;241m.\u001B[39mrandom(\u001B[38;5;241m35040\u001B[39m))\n\u001B[1;32m----> 4\u001B[0m document_id \u001B[38;5;241m=\u001B[39m \u001B[43mph_api\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mwrite_timeseries_to_db\u001B[49m\u001B[43m(\u001B[49m\u001B[43mtimeseries_data\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdata_type\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mp_mw\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[0;32m 5\u001B[0m \u001B[43m \u001B[49m\u001B[43mcollection_name\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mtimeseries_data\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mreturn_id\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mTrue\u001B[39;49;00m\u001B[43m)\u001B[49m\n", - "File \u001B[1;32m~\\git\\pandahub\\pandahub\\lib\\PandaHub.py:922\u001B[0m, in \u001B[0;36mPandaHub.write_timeseries_to_db\u001B[1;34m(self, timeseries, data_type, element_type, netname, element_index, name, global_database, collection_name, project_id, **kwargs)\u001B[0m\n\u001B[0;32m 920\u001B[0m db \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_get_global_database()\n\u001B[0;32m 921\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 922\u001B[0m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcheck_permission\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mwrite\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[0;32m 923\u001B[0m db \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_get_project_database()\n\u001B[0;32m 924\u001B[0m document \u001B[38;5;241m=\u001B[39m create_timeseries_document(timeseries\u001B[38;5;241m=\u001B[39mtimeseries,\n\u001B[0;32m 925\u001B[0m data_type\u001B[38;5;241m=\u001B[39mdata_type,\n\u001B[0;32m 926\u001B[0m element_type\u001B[38;5;241m=\u001B[39melement_type,\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 929\u001B[0m name\u001B[38;5;241m=\u001B[39mname,\n\u001B[0;32m 930\u001B[0m \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n", - "File \u001B[1;32m~\\git\\pandahub\\pandahub\\lib\\PandaHub.py:93\u001B[0m, in \u001B[0;36mPandaHub.check_permission\u001B[1;34m(self, permission)\u001B[0m\n\u001B[0;32m 91\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mcheck_permission\u001B[39m(\u001B[38;5;28mself\u001B[39m, permission):\n\u001B[0;32m 92\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mactive_project \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m---> 93\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m PandaHubError(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mNo project is activated\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[0;32m 94\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mhas_permission(permission):\n\u001B[0;32m 95\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m PandaHubError(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mYou don\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mt have \u001B[39m\u001B[38;5;132;01m{}\u001B[39;00m\u001B[38;5;124m rights on this project\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;241m.\u001B[39mformat(permission), \u001B[38;5;241m403\u001B[39m)\n", - "\u001B[1;31mPandaHubError\u001B[0m: No project is activated" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", + "\n", "timeseries_data = pd.Series(np.random.random(35040))\n", - "document_id = ph_api.write_timeseries_to_db(timeseries_data, data_type=\"p_mw\",\n", - " collection_name=\"timeseries_data\", return_id=True)" + "ph_api.write_timeseries_to_db(timeseries_data, \n", + " data_type='p_mw', \n", + " collection_name=\"timeseries\", \n", + " global_database=True, \n", + " compress_ts_data=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Passing the time series data you need to define a data type. The recommended format is `_`. Moreover, you can also compress your time series data. The main advantages of compressing your data are, that you can read time series from and write time series to the data base much faster. Furthermore, the time series occupies much less space. However, you should also always keep the drawbacks in mind. To make use of the advantages, the time series are saved as bytestring. Thus, compressed they are not human readable anymore. Furthmore, filter functionalities are not working. Therefore, you always need to call the entire time series. You cannot only read some parts of it. Especially, for web applications is usually does not make much sense to compress your data.\n", + "\n", + "To call the time series, use following function:" ] }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 0.381588\n", - "1 0.035058\n", - "2 0.975626\n", - "3 0.708254\n", - "4 0.622388\n", - " ... \n", - "35035 0.182994\n", - "35036 0.095744\n", - "35037 0.814569\n", - "35038 0.081234\n", - "35039 0.121295\n", - "Length: 35040, dtype: float64" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# we can retrieve this timeseries by querying for a specific combination of metadata...\n", - "ph_api.get_timeseries_from_db(data_type=\"p_mw\", timeseries_name=\"test_timeseries\", collection_name=\"timeseries_data\")" + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ph_api.get_timeseries_from_db(filter_document={'data_type':'p_mw'}, \n", + " collection_name=\"timeseries\", \n", + " global_database=True,\n", + " compressed_ts_data=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Within the `filter_document` dict you can pass everything you want. The database matching the filter criteria are extracted accordingly." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want, you can also extract the time series metadata. Just set `include_metadata` equal to True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ph_api.get_timeseries_from_db(filter_document={'data_type':'p_mw'}, \n", + " collection_name=\"timeseries\", \n", + " global_database=True,\n", + " compressed_ts_data=False, \n", + " include_metadata=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Et voilĂ , you are now able to save and call time series in pandahub!\n", + "\n", + "**multiple time series**\n", + "\n", + "In case of calling multiple time series the procedure is slightly different compare to a single time series. At first, we add a second time series:" ] }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 0.381588\n", - "1 0.035058\n", - "2 0.975626\n", - "3 0.708254\n", - "4 0.622388\n", - " ... \n", - "35035 0.182994\n", - "35036 0.095744\n", - "35037 0.814569\n", - "35038 0.081234\n", - "35039 0.121295\n", - "Length: 35040, dtype: float64" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# ...or by _id.\n", - "ph_api.get_timeseries_from_db(_id=document_id, collection_name=\"timeseries_data\")" + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "timeseries_data = pd.Series(np.random.random(35040))\n", + "ph_api.write_timeseries_to_db(timeseries_data, \n", + " data_type='p_mw', \n", + " collection_name=\"timeseries\", \n", + " global_database=True, \n", + " compress_ts_data=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Trying to get the time series with the function above leads to following error:" ] }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 0.381588\n", - "1 0.035058\n", - "2 0.975626\n", - "3 0.708254\n", - "4 0.622388\n", - " ... \n", - "35035 0.182994\n", - "35036 0.095744\n", - "35037 0.814569\n", - "35038 0.081234\n", - "35039 0.121295\n", - "Length: 35040, dtype: float64" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# It is also possible to provide a single filter document\n", - "ph_api.get_timeseries_from_db(filter_document={\"data_type\": \"p_mw\",\n", - " \"timeseries_name\": \"test_timeseries\"},\n", - " collection_name=\"timeseries_data\")" + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ph_api.get_timeseries_from_db(filter_document={'data_type':'p_mw'}, \n", + " collection_name=\"timeseries\", \n", + " global_database=True,\n", + " compressed_ts_data=False, \n", + " include_metadata=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you still want to retrieve both time series, you need to call another function:" ] }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'_id': 'FrpbXkqTOzOvrc6ie-RofDma_5R7bbHLQN6f5T9OSBw=',\n", - " 'data_type': 'p_mw',\n", - " 'first_timestamp': 0,\n", - " 'last_timestamp': 35039,\n", - " 'num_timestamps': 35040,\n", - " 'timeseries_name': 'test_timeseries',\n", - " 'timeseries_data': 0 0.381588\n", - " 1 0.035058\n", - " 2 0.975626\n", - " 3 0.708254\n", - " 4 0.622388\n", - " ... \n", - " 35035 0.182994\n", - " 35036 0.095744\n", - " 35037 0.814569\n", - " 35038 0.081234\n", - " 35039 0.121295\n", - " Length: 35040, dtype: float64}" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# ...or to inlude the document's metadata in the query result.\n", - "ph_api.get_timeseries_from_db(filter_document={\"data_type\": \"p_mw\",\n", - " \"timeseries_name\": \"test_timeseries\"},\n", - " collection_name=\"timeseries_data\",\n", - " include_metadata=True)" + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ph_api.multi_get_timeseries_from_db(filter_document={'data_type':'p_mw'}, \n", + " collection_name=\"timeseries\", \n", + " global_database=True,\n", + " compressed_ts_data=False, \n", + " include_metadata=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the function above, you are now able to get all time series matching the filter criteria above and do your postprocessing apart from the database." ] } ], @@ -314,4 +548,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/tutorials/03_mongodb_user_management.ipynb b/tutorials/03_mongodb_user_management.ipynb new file mode 100755 index 0000000..d00e75a --- /dev/null +++ b/tutorials/03_mongodb_user_management.ipynb @@ -0,0 +1,160 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "18ffb553-b9f9-4a96-90d5-362f4f8b2a34", + "metadata": {}, + "source": [ + "Pandahub comes with an application level user management. All necessary data is stored in the collection \"user_management\". If pandahub is used via its REST interface, this allows for user authentification via https." + ] + }, + { + "cell_type": "markdown", + "id": "8154e1c3-a4c1-4dbe-aeb5-894aff5b7646", + "metadata": {}, + "source": [ + "***However***, you can also use the pandahub API ***without*** running the REST interface. Instead, you can use it just as an convenient API to store network data into a mongo DB.\n", + "\n", + "If you still need some control over the access to the data you must fall back on the mongoDB user access management. In the following is a short introduction how this can be accomplished.\n", + "\n", + "The following is only necessary if you do not rely on the pandahub application level user management!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dc30396-90b0-415a-b365-b6a2d9898596", + "metadata": {}, + "outputs": [], + "source": [ + "MONGODB_IP = \"127.0.0.1\"\n", + "MONGODB_PORT = 27017\n", + "# we need a user that has the role \"userAdminAnyDatabase\" to be able to see current users and create new users\n", + "MONGODB_ROOT_USER = \"root\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "631c4cbb-ea3e-4699-be4e-3a692e12df28", + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import pandahub\n", + "print(f\"insert password for user {MONGODB_ROOT_USER}\")\n", + "MONGODB_URL = f\"mongodb://{MONGODB_ROOT_USER}:{getpass.getpass()}@{MONGODB_IP}:{MONGODB_PORT}\"\n", + "ph = pandahub.PandaHub(connection_url=MONGODB_URL)" + ] + }, + { + "cell_type": "markdown", + "id": "1c64e197-abca-40a0-8c9d-3a3d50dfaf2a", + "metadata": {}, + "source": [ + "In order to list existing MongoDB users and theirs respective access rights to the different project databases:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a807d84-afd1-4a37-8808-86b0c04449a4", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "userlist = ph.mongo_client[\"admin\"][\"system.users\"].find({}, projection={\"user\": 1, \"roles\": 1})\n", + "users = pd.DataFrame([(l[\"user\"], r[\"db\"], r[\"role\"]) for l in userlist for r in l[\"roles\"]], columns=[\"user\", \"db\", \"role\"])\n", + "users[~users.db.isin([\"admin\", \"user_management\"])] # we can exclude some databases for better clarity" + ] + }, + { + "cell_type": "markdown", + "id": "e46c09db-c0bb-4b32-8a07-fd6e916eca51", + "metadata": {}, + "source": [ + "We can create new users for existing projects:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0a8244e-d4eb-4b42-a003-876e2d02fee0", + "metadata": {}, + "outputs": [], + "source": [ + "user = \"\"\n", + "pwd = \"\"\n", + "# create a user that has read access to 'user_management' of pandahub\n", + "ph.mongo_client.admin.command('createUser', user, pwd=pwd, roles=[{'role': 'read', 'db': 'user_management'}])" + ] + }, + { + "cell_type": "markdown", + "id": "204e5b7d-853f-4e73-9f01-8f1f4032c1a2", + "metadata": {}, + "source": [ + "Now we can grant readWrite access to the project database(s)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "edc7d857-52c6-45bc-90da-6f16d555dc1f", + "metadata": {}, + "outputs": [], + "source": [ + "project_id = \"\"\n", + "ph.mongo_client.admin.command('grantRolesToUser', user, roles=[{'role': 'readWrite', 'db': project_id}])" + ] + }, + { + "cell_type": "markdown", + "id": "f4f26f86-f144-4564-b30d-c51f7571ac2b", + "metadata": {}, + "source": [ + "Remove users entirely via" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "549b7e2b-0915-4b38-ab30-a437c7f23fa6", + "metadata": {}, + "outputs": [], + "source": [ + "user = \"\"\n", + "ph.mongo_client.admin.command(\"dropUser\", user)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60eaf7fe-df95-4679-9238-dd2d7b866f7d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/config.py b/tutorials/config.py index 7b4f76f..9ca6c06 100644 --- a/tutorials/config.py +++ b/tutorials/config.py @@ -1 +1 @@ -MONGODB_URL="mongodb://localhost:27017" +MONGODB_URL="mongodb://localhost:27017/"