Merge pull request #1175 from openego/fixes/#350-osm-bld-load-assignment

Fixes/#350 osm bld load assignment
openego · Jan 13, 2025 · d31bd82 · d31bd82
2 parents a2dbc97 + 4a5ebf9
commit d31bd82
Show file tree

Hide file tree

Showing 6 changed files with 277 additions and 32 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -264,6 +264,8 @@ Added
   `#207 <https://github.com/openego/powerd-data/pull/207>`_
 * Add MaStR geocoding and handling of conventional generators
   `#1095 <https://github.com/openego/eGon-data/issues/1095>`_
+* Improve building household load assignment
+  `#350 <https://github.com/openego/powerd-data/issues/350>`_
 
 .. _PR #159: https://github.com/openego/eGon-data/pull/159
 .. _PR #703: https://github.com/openego/eGon-data/pull/703

diff --git a/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py b/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py
@@ -50,6 +50,45 @@ class HouseholdElectricityProfilesOfBuildings(Base):
     profile_id = Column(String, index=True)
 
 
+class HouseholdElectricityProfilesOfBuildingsStats(Base):
+    """
+    Class definition of table `demand.egon_household_electricity_profile_of_buildings_stats`.
+    Contains number of households per building and type from table
+    `demand.egon_household_electricity_profile_of_buildings`
+
+    Columns
+    -------
+    building_id: Building id as used in tables `openstreetmap.osm_buildings_*`, index col
+    households_total: total count of households
+    SR: count of household type SR single retiree
+    SO: count of household type SA single adults
+    PR: count of household type PR pair retiree
+    PO: count of household type PA pair adults
+    SK: count of household type SK single n children
+    P1: count of household type P1 pair 1 child
+    P2: count of household type P2 pair 2 children
+    P3: count of household type P3 pair 3 children
+    OR: count of household type OR multi retiree n children
+    OO: count of household type OO multi adults n children
+    """
+
+    __tablename__ = "egon_household_electricity_profile_of_buildings_stats"
+    __table_args__ = {"schema": "demand"}
+
+    building_id = Column(Integer, primary_key=True)
+    households_total = Column(Integer, nullable=True)
+    SR = Column(Integer, nullable=True)
+    SO = Column(Integer, nullable=True)
+    PR = Column(Integer, nullable=True)
+    PO = Column(Integer, nullable=True)
+    SK = Column(Integer, nullable=True)
+    P1 = Column(Integer, nullable=True)
+    P2 = Column(Integer, nullable=True)
+    P3 = Column(Integer, nullable=True)
+    OR = Column(Integer, nullable=True)
+    OO = Column(Integer, nullable=True)
+
+
 class OsmBuildingsSynthetic(Base):
     """
     Class definition of table demand.osm_buildings_synthetic.
@@ -437,9 +476,9 @@ def create_pool(buildings, profiles):
         .reset_index()
     )
     # add profile position as attribute by number of entries per cell (*)
-    mapping_profiles_to_buildings[
-        "profile"
-    ] = mapping_profiles_to_buildings.groupby(["cell_id"]).cumcount()
+    mapping_profiles_to_buildings["profile"] = (
+        mapping_profiles_to_buildings.groupby(["cell_id"]).cumcount()
+    )
     # get multiindex of profiles in cells (*)
     index_profiles = mapping_profiles_to_buildings.set_index(
         ["cell_id", "profile"]
@@ -454,9 +493,9 @@ def create_pool(buildings, profiles):
         profile_ids_per_cell_reduced.explode().reset_index()
     )
     # assign profile position by order of list
-    profile_ids_per_cell_reduced[
-        "profile"
-    ] = profile_ids_per_cell_reduced.groupby(["cell_id"]).cumcount()
+    profile_ids_per_cell_reduced["profile"] = (
+        profile_ids_per_cell_reduced.groupby(["cell_id"]).cumcount()
+    )
     profile_ids_per_cell_reduced = profile_ids_per_cell_reduced.set_index(
         ["cell_id", "profile"]
     )
@@ -591,9 +630,11 @@ def ve(s):
         iterate_over = (
             "nuts3"
             if dataset == "Everything"
-            else "cell_id"
-            if dataset == "Schleswig-Holstein"
-            else ve(f"'{dataset}' is not a valid dataset boundary.")
+            else (
+                "cell_id"
+                if dataset == "Schleswig-Holstein"
+                else ve(f"'{dataset}' is not a valid dataset boundary.")
+            )
         )
 
         df_building_peak_loads = pd.DataFrame()
@@ -657,13 +698,13 @@ def ve(s):
 
 def map_houseprofiles_to_buildings():
     """
-    Cencus hh demand profiles are assigned to buildings via osm ids. If no OSM
-    ids available, synthetic buildings are generated. A list of the generated
-    buildings and supplementary data as well as the mapping table is stored
+    Census hh demand profiles are assigned to residential buildings via osm ids.
+    If no OSM ids are available, synthetic buildings are generated. A list of the
+    generated buildings and supplementary data as well as the mapping table is stored
     in the db.
 
-    Tables:
-    ----------
+    Tables
+    ------
     synthetic_buildings:
         schema: openstreetmap
         tablename: osm_buildings_synthetic
@@ -675,26 +716,93 @@ def map_houseprofiles_to_buildings():
     Notes
     -----
     """
-    #
-    egon_map_zensus_buildings_residential = Table(
-        "egon_map_zensus_buildings_residential",
+    # ========== Get census cells ==========
+    egon_census_cells = Table(
+        "egon_destatis_zensus_apartment_building_population_per_ha",
         Base.metadata,
-        schema="boundaries",
+        schema="society",
     )
-    # get table metadata from db by name and schema
-    inspect(engine).reflecttable(egon_map_zensus_buildings_residential, None)
+    inspect(engine).reflecttable(egon_census_cells, None)
 
     with db.session_scope() as session:
-        cells_query = session.query(egon_map_zensus_buildings_residential)
-    egon_map_zensus_buildings_residential = pd.read_sql(
-        cells_query.statement, cells_query.session.bind, index_col=None
+        cells_query = session.query(
+            egon_census_cells.c.zensus_population_id,
+            egon_census_cells.c.population,
+            egon_census_cells.c.geom,
+        ).order_by(egon_census_cells.c.zensus_population_id)
+        gdf_egon_census_cells = gpd.read_postgis(
+            cells_query.statement, cells_query.session.bind, geom_col="geom"
+        )
+
+    # ========== Get residential buildings ==========
+    egon_osm_buildings_residential = Table(
+        "osm_buildings_residential",
+        Base.metadata,
+        schema="openstreetmap",
+    )
+    inspect(engine).reflecttable(egon_osm_buildings_residential, None)
+
+    with db.session_scope() as session:
+        cells_query = session.query(
+            egon_osm_buildings_residential.c.id.label("building_id"),
+            egon_osm_buildings_residential.c.geom_building,
+        ).order_by(egon_osm_buildings_residential.c.id)
+        gdf_egon_osm_buildings = gpd.read_postgis(
+            cells_query.statement,
+            cells_query.session.bind,
+            geom_col="geom_building",
+        )
+
+    # ========== Clip buildings with census cells ==========
+
+    # Clip to create new build parts as buildings
+    gdf_egon_osm_buildings_census_cells = gdf_egon_census_cells.overlay(
+        gdf_egon_osm_buildings, how="intersection"
+    )
+    # gdf_egon_osm_buildings_census_cells["population"] = gdf_egon_osm_buildings_census_cells.population.fillna(0)
+    gdf_egon_osm_buildings_census_cells["geom_point"] = (
+        gdf_egon_osm_buildings_census_cells.centroid
+    )
+
+    # Add column with unique building ids using suffixes (building parts split by clipping)
+    gdf_egon_osm_buildings_census_cells["building_id_temp"] = (
+        gdf_egon_osm_buildings_census_cells["building_id"].astype(str)
+    )
+    g = (
+        gdf_egon_osm_buildings_census_cells.groupby("building_id_temp")
+        .cumcount()
+        .add(1)
+        .astype(str)
+    )
+    gdf_egon_osm_buildings_census_cells["building_id_temp"] += "_" + g
+
+    # Check
+    try:
+        assert len(
+            gdf_egon_osm_buildings_census_cells.building_id_temp.unique()
+        ) == len(gdf_egon_osm_buildings_census_cells)
+    except AssertionError:
+        print(
+            "The length of split buildings do not match with original count."
+        )
+
+    egon_map_zensus_buildings_residential = (
+        gdf_egon_osm_buildings_census_cells[
+            ["zensus_population_id", "building_id_temp"]
+        ].rename(
+            columns={
+                "zensus_population_id": "cell_id",
+                "building_id_temp": "id",
+            }
+        )
     )
 
+    # Get household profile to census cells allocations
     with db.session_scope() as session:
         cells_query = session.query(HouseholdElectricityProfilesInCensusCells)
     egon_hh_profile_in_zensus_cell = pd.read_sql(
         cells_query.statement, cells_query.session.bind, index_col=None
-    )  # index_col="cell_id")
+    )
 
     # Match OSM and zensus data to define missing buildings
     missing_buildings = match_osm_and_zensus_data(
@@ -722,12 +830,20 @@ def map_houseprofiles_to_buildings():
         egon_hh_profile_in_zensus_cell,
     )
 
+    # remove suffixes from buildings split into parts before to merge them back together
+    mapping_profiles_to_buildings["building_id"] = (
+        mapping_profiles_to_buildings.building_id.astype(str).apply(
+            lambda s: s.split("_")[0] if "_" in s else s
+        )
+    )
+    mapping_profiles_to_buildings["building_id"] = (
+        mapping_profiles_to_buildings["building_id"].astype(int)
+    )
+
     # reduce list to only used synthetic buildings
     synthetic_buildings = reduce_synthetic_buildings(
         mapping_profiles_to_buildings, synthetic_buildings
     )
-    # TODO remove unused code
-    # synthetic_buildings = synthetic_buildings.drop(columns=["grid_id"])
     synthetic_buildings["n_amenities_inside"] = 0
 
     OsmBuildingsSynthetic.__table__.drop(bind=engine, checkfirst=True)
@@ -766,6 +882,54 @@ def map_houseprofiles_to_buildings():
         )
 
 
+def create_buildings_profiles_stats():
+    """
+    Create DB table `demand.egon_household_electricity_profile_of_buildings_stats`
+    with household profile type counts per building
+    """
+
+    # Drop and recreate table if existing
+    HouseholdElectricityProfilesOfBuildingsStats.__table__.drop(
+        bind=engine, checkfirst=True
+    )
+    HouseholdElectricityProfilesOfBuildingsStats.__table__.create(
+        bind=engine, checkfirst=True
+    )
+
+    # Query final profile table
+    with db.session_scope() as session:
+        cells_query = session.query(
+            HouseholdElectricityProfilesOfBuildings,
+        ).order_by(HouseholdElectricityProfilesOfBuildings.id)
+
+        df_buildings_and_profiles = pd.read_sql(
+            cells_query.statement, cells_query.session.bind, index_col="id"
+        )
+
+    # Extract household type prefix
+    df_buildings_and_profiles = df_buildings_and_profiles.assign(
+        household_type=df_buildings_and_profiles.profile_id.str[:2]
+    )
+
+    # Unstack and create total
+    df_buildings_and_profiles = (
+        df_buildings_and_profiles.groupby("building_id")
+        .value_counts(["household_type"])
+        .unstack(fill_value=0)
+    )
+    df_buildings_and_profiles["households_total"] = (
+        df_buildings_and_profiles.sum(axis=1)
+    )
+
+    # Write to DB
+    df_buildings_and_profiles.to_sql(
+        name=HouseholdElectricityProfilesOfBuildingsStats.__table__.name,
+        schema=HouseholdElectricityProfilesOfBuildingsStats.__table__.schema,
+        con=engine,
+        if_exists="append",
+    )
+
+
 class setup(Dataset):
     """
     Household electricity demand time series for scenarios in 2035 and 2050
@@ -886,9 +1050,13 @@ class setup(Dataset):
     #:
     name: str = "Demand_Building_Assignment"
     #:
-    version: str = "0.0.5"
+    version: str = "0.0.6"
     #:
-    tasks = (map_houseprofiles_to_buildings, get_building_peak_loads)
+    tasks = (
+        map_houseprofiles_to_buildings,
+        create_buildings_profiles_stats,
+        get_building_peak_loads,
+    )
 
     def __init__(self, dependencies):
         super().__init__(

diff --git a/src/egon/data/datasets/osm_buildings_streets/__init__.py b/src/egon/data/datasets/osm_buildings_streets/__init__.py
@@ -40,6 +40,14 @@ def filter_buildings_residential():
     execute_sql_script("osm_buildings_filter_residential.sql")
 
 
+def extend_buildings_residential():
+    print(
+        "Extend residential buildings by commercial/retail/office/hotel "
+        "buildings in cells with census population but without buildings..."
+    )
+    execute_sql_script("osm_buildings_extend_residential.sql")
+
+
 def create_buildings_filtered_zensus_mapping():
     print(
         "Create census mapping table for filtered buildings in populated areas..."
@@ -134,6 +142,11 @@ class OsmBuildingsStreets(Dataset):
       * All buildings: `openstreetmap.osm_buildings`
       * Filtered buildings: `openstreetmap.osm_buildings_filtered`
       * Residential buildings: `openstreetmap.osm_buildings_residential`
+        * 1st step: Filter by tags (see `osm_buildings_filter_residential.sql`)
+        * 2nd step: Table is extended by finding census cells with population
+          but no residential buildings and extended by commercial/retail/office/
+          hotel buildings (see `osm_buildings_extend_residential.sql`) since they
+          often include apartments as well.
     * Extract amenities and filter using relevant tags, e.g. shops and restaurants,
       see script `osm_amenities_shops_preprocessing.sql` for the full list of tags.
       Resulting table: `openstreetmap.osm_amenities_shops_filtered`
@@ -171,7 +184,7 @@ class OsmBuildingsStreets(Dataset):
     #:
     name: str = "OsmBuildingsStreets"
     #:
-    version: str = "0.0.6"
+    version: str = "0.0.7"
 
     def __init__(self, dependencies):
         super().__init__(
@@ -181,6 +194,7 @@ def __init__(self, dependencies):
             tasks=(
                 preprocessing,
                 {filter_buildings, filter_buildings_residential},
+                extend_buildings_residential,
                 extract_buildings_filtered_amenities,
                 {
                     create_buildings_filtered_zensus_mapping,

diff --git a/src/egon/data/datasets/osm_buildings_streets/osm_buildings_extend_residential.sql b/src/egon/data/datasets/osm_buildings_streets/osm_buildings_extend_residential.sql
@@ -0,0 +1,37 @@
+/*
+ * Original Autor: nesnoj (jonathan.amme@rl-institut.de)
+*/
+
+--------------------------------------------------------------------------------
+-- Extend residential buildings by finding census cells with population but   --
+-- no residential buildings before in osm_buildings_filter_residential.sql .  --
+-- Mark commercial, retail, office, hotel buildings as residential in those   --
+-- cells.                                                                     --
+--------------------------------------------------------------------------------
+
+INSERT INTO openstreetmap.osm_buildings_residential
+	SELECT *
+	FROM openstreetmap.osm_buildings_filtered
+	WHERE id IN (
+		SELECT id FROM (
+			-- get buildings from filtered table in census cells (by centroid)
+			SELECT
+				bld.id,
+				zensus.grid_id,
+				zensus.zensus_population_id AS cell_id
+			FROM openstreetmap.osm_buildings_filtered bld
+			LEFT JOIN society.egon_destatis_zensus_apartment_building_population_per_ha zensus
+			ON ST_Within(bld.geom_point, zensus.geom)
+			WHERE building in ('commercial', 'retail', 'office', 'hotel')
+			AND zensus.zensus_population_id in (
+				-- census cell ids which have population but no res. buildings
+				SELECT zensus.zensus_population_id
+				FROM society.egon_destatis_zensus_apartment_building_population_per_ha zensus
+				LEFT OUTER JOIN openstreetmap.osm_buildings_residential bld
+				ON ST_Intersects(bld.geom_building, zensus.geom)
+				WHERE bld.id IS NULL
+			)
+		) bld2
+		WHERE bld2.id IS NOT NULL AND bld2.grid_id IS NOT NULL
+	)
+;