From 018cf74f4cc71af10dd2f1cf1b1f1e10fbe95899 Mon Sep 17 00:00:00 2001 From: Michael Aydinbas Date: Thu, 1 Feb 2024 12:31:22 +0100 Subject: [PATCH] add genesis parse code for regio, too, for the moment. --- src/pystatis/table.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/pystatis/table.py b/src/pystatis/table.py index cad56b2..f5cb395 100644 --- a/src/pystatis/table.py +++ b/src/pystatis/table.py @@ -125,4 +125,20 @@ def parse_zensus_table(data: pd.DataFrame) -> pd.DataFrame: @staticmethod def parse_regio_table(data: pd.DataFrame) -> pd.DataFrame: """Parse Regionalstatistik table ffcsv format into a more readable format""" - pass + # Extracts time column with name from first element of Zeit_Label column + time = pd.DataFrame({data["Zeit_Label"].iloc[0]: data["Zeit"]}) + + # Extracts new column names from first values of the Merkmal_Label columns + # and assigns these to the relevant attribute columns (Auspraegung_Label) + attributes = data.filter(like="Auspraegung_Label") + attributes.columns = data.filter(like="Merkmal_Label").iloc[0].tolist() + + # Selects all columns containing the values + values = data.filter(like="__") + + # Given a name like BEV036__Bevoelkerung_in_Hauptwohnsitzhaushalten__1000 + # extracts the readable label and omit both the code and the unit + values.columns = [name.split("__")[1] for name in values.columns] + + pretty_data = pd.concat([time, attributes, values], axis=1) + return pretty_data