From c469a924a80ae01266988146cef039ca4499469d Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Sun, 5 Nov 2023 22:27:25 +0300
Subject: [PATCH 01/47] Add a headers option for direct download

---
 scripts/_helpers.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index ea98d629b..b1d45cb7a 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -386,7 +386,9 @@ def aggregate_costs(n, flatten=False, opts=None, existing_only=False):
     return costs
 
 
-def progress_retrieve(url, file, data=None, disable_progress=False, roundto=1.0):
+def progress_retrieve(
+    url, file, data=None, headers=None, disable_progress=False, roundto=1.0
+):
     """
     Function to download data from a url with a progress bar progress in
     retrieving data.
@@ -418,6 +420,11 @@ def dlProgress(count, blockSize, totalSize, roundto=roundto):
     if data is not None:
         data = urllib.parse.urlencode(data).encode()
 
+    if headers:
+        opener = urllib.request.build_opener()
+        opener.addheaders = [("User-agent", "Mozilla/5.0")]
+        urllib.request.install_opener(opener)
+
     urllib.request.urlretrieve(url, file, reporthook=dlProgress, data=data)
 
 

From 4e7cb638138ebfc14b558f96ea4dfb4fe2691410 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Sun, 5 Nov 2023 22:28:25 +0300
Subject: [PATCH 02/47] Use a headers option when loading data

---
 scripts/retrieve_databundle_light.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 7b9b6e125..dff5a6666 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -337,7 +337,9 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F
 
         try:
             logger.info(f"Downloading resource '{resource}' from cloud '{url}'.")
-            progress_retrieve(url, file_path, disable_progress=disable_progress)
+            progress_retrieve(
+                url, file_path, headers=True, disable_progress=disable_progress
+            )
 
             # if the file is a zipfile and unzip is enabled
             # then unzip it and remove the original file
@@ -394,7 +396,11 @@ def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=Fal
         logger.info(f"Downloading resource '{resource}' from cloud '{url}'.")
 
         progress_retrieve(
-            url, file_path, data=postdata, disable_progress=disable_progress
+            url,
+            file_path,
+            data=postdata,
+            header=header,
+            disable_progress=disable_progress,
         )
 
         # if the file is a zipfile and unzip is enabled

From b40230c737a4021982544ccbf8ece1a4286137eb Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Sun, 5 Nov 2023 22:28:56 +0300
Subject: [PATCH 03/47] Read unzip option from the file name

---
 scripts/retrieve_databundle_light.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index dff5a6666..c8ee31f3d 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -343,7 +343,7 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F
 
             # if the file is a zipfile and unzip is enabled
             # then unzip it and remove the original file
-            if config.get("unzip", False):
+            if config.get("unzip", False) or bool(re.search(".zip$", file_path)):
                 with ZipFile(file_path, "r") as zipfile:
                     zipfile.extractall(config["destination"])
 

From 7e49b26bcada70498d4ab22c3e7035c46c0439bf Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Sun, 5 Nov 2023 22:34:04 +0300
Subject: [PATCH 04/47] Add load config parameters for hydrobasins

---
 configs/bundle_config.yaml | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/configs/bundle_config.yaml b/configs/bundle_config.yaml
index 987b2c524..62bbb289e 100644
--- a/configs/bundle_config.yaml
+++ b/configs/bundle_config.yaml
@@ -141,6 +141,17 @@ databundles:
     - data/hydrobasins/hybas_world_lev04_v1c.shp
     - data/hydrobasins/hybas_world_lev05_v1c.shp
 
+  # global data for hydrobasins
+  bundle_hydrobasins:
+    countries: [Earth]
+    tutorial: false
+    category: common
+    destination: "data/hydrobasins"
+    urls:
+      direct: https://data.hydrosheds.org/file/HydroBASINS/standard/hybas_af_lev06_v1c.zip
+    output:
+    - data/hydrobasins/hybas_world_lev06_v1c.shp
+
   # data bundle containing the data of the data folder common to all regions of the world
   bundle_data_earth:
     countries: [Earth]

From 3645dc88550935ea2cf30e0624f185908057c618 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Wed, 8 Nov 2023 02:18:46 +0300
Subject: [PATCH 05/47] Add unzip argument

---
 configs/bundle_config.yaml           | 1 +
 scripts/retrieve_databundle_light.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/configs/bundle_config.yaml b/configs/bundle_config.yaml
index 62bbb289e..f2f22b742 100644
--- a/configs/bundle_config.yaml
+++ b/configs/bundle_config.yaml
@@ -149,6 +149,7 @@ databundles:
     destination: "data/hydrobasins"
     urls:
       direct: https://data.hydrosheds.org/file/HydroBASINS/standard/hybas_af_lev06_v1c.zip
+    unzip: true
     output:
     - data/hydrobasins/hybas_world_lev06_v1c.shp
 
diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index c8ee31f3d..dff5a6666 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -343,7 +343,7 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F
 
             # if the file is a zipfile and unzip is enabled
             # then unzip it and remove the original file
-            if config.get("unzip", False) or bool(re.search(".zip$", file_path)):
+            if config.get("unzip", False):
                 with ZipFile(file_path, "r") as zipfile:
                     zipfile.extractall(config["destination"])
 

From cc51b7ec3400c1b5abf9fd1f46056b36ca548b85 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Wed, 8 Nov 2023 02:19:21 +0300
Subject: [PATCH 06/47] Replace hard-coding

---
 scripts/_helpers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index b1d45cb7a..fa06771c8 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -422,7 +422,7 @@ def dlProgress(count, blockSize, totalSize, roundto=roundto):
 
     if headers:
         opener = urllib.request.build_opener()
-        opener.addheaders = [("User-agent", "Mozilla/5.0")]
+        opener.addheaders = headers
         urllib.request.install_opener(opener)
 
     urllib.request.urlretrieve(url, file, reporthook=dlProgress, data=data)

From 6798cb1726cb17a7f79f2be58bba760cb49d3294 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Wed, 8 Nov 2023 02:19:49 +0300
Subject: [PATCH 07/47] Add a dedicated function to retrieve hydrobasins

---
 scripts/retrieve_databundle_light.py | 58 ++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index dff5a6666..4bed0607c 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -356,6 +356,64 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F
     return True
 
 
+def download_and_unzip_hydrobasins(
+    config, rootpath, hot_run=True, disable_progress=False
+):
+    """
+    download_and_unzip_basins(config, rootpath, dest_path, hot_run=True,
+    disable_progress=False)
+
+    Function to download and unzip the data for hydrobasins.
+
+    Inputs
+    ------
+    config : Dict
+        Configuration data for the category to download
+    rootpath : str
+        Absolute path of the repository
+    hot_run : Bool (default True)
+        When true the data are downloaded
+        When false, the workflow is run without downloading and unzipping
+    disable_progress : Bool (default False)
+        When true the progress bar to download data is disabled
+
+    Outputs
+    -------
+    True when download is successful, False otherwise
+    """
+    resource = config["category"]
+    url = config["urls"]["hydrobasins"]
+
+    file_path = os.path.join(config["destination"], os.path.basename(url))
+
+    if hot_run:
+        if os.path.exists(file_path):
+            os.remove(file_path)
+
+        try:
+            logger.info(f"Downloading resource '{resource}' from cloud '{url}'.")
+            progress_retrieve(
+                url,
+                file_path,
+                headers=[("User-agent", "Mozilla/5.0")],
+                disable_progress=disable_progress,
+            )
+
+            # if the file is a zipfile and unzip is enabled
+            # then unzip it and remove the original file
+            if config.get("unzip", False):
+                with ZipFile(file_path, "r") as zipfile:
+                    zipfile.extractall(config["destination"])
+
+                os.remove(file_path)
+            logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.")
+        except:
+            logger.warning(f"Failed download resource '{resource}' from cloud '{url}'.")
+            return False
+
+    return True
+
+
 def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=False):
     """
     download_and_unzip_post(config, rootpath, dest_path, hot_run=True,

From 52c901692786bfdc3212e392b9ed8cf6be3de508 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Thu, 9 Nov 2023 02:19:57 +0300
Subject: [PATCH 08/47] Generalize hydrobasins url

---
 configs/bundle_config.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/configs/bundle_config.yaml b/configs/bundle_config.yaml
index f2f22b742..ee07271a0 100644
--- a/configs/bundle_config.yaml
+++ b/configs/bundle_config.yaml
@@ -148,7 +148,8 @@ databundles:
     category: common
     destination: "data/hydrobasins"
     urls:
-      direct: https://data.hydrosheds.org/file/HydroBASINS/standard/hybas_af_lev06_v1c.zip
+      hydrobasins: https://data.hydrosheds.org/file/HydroBASINS/standard/
+      suffixes: ["af", "ar", "as", "au", "eu", "gr", "na", "sa", "si"]
     unzip: true
     output:
     - data/hydrobasins/hybas_world_lev06_v1c.shp

From f3f32d816c64297c2889ed333dea7a82b83a61c7 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Sat, 11 Nov 2023 22:53:43 +0300
Subject: [PATCH 09/47] Generalize hydrobasing retrieval

---
 scripts/retrieve_databundle_light.py | 53 +++++++++++++++-------------
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 4bed0607c..efaab2568 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -382,34 +382,39 @@ def download_and_unzip_hydrobasins(
     True when download is successful, False otherwise
     """
     resource = config["category"]
-    url = config["urls"]["hydrobasins"]
-
-    file_path = os.path.join(config["destination"], os.path.basename(url))
-
-    if hot_run:
-        if os.path.exists(file_path):
-            os.remove(file_path)
+    url_templ = config["urls"]["hydrobasins"]  # + "hybas_af_lev01_v1c.zip"
+    suffix_list = config["urls"]["suffixes"]
+    # url = url + "hybas_" + suffix_list[2] + "_lev01_v1c.zip"
+
+    for rg in suffix_list:
+        url = url_templ + "hybas_" + rg + "_lev01_v1c.zip"
+        file_path = os.path.join(config["destination"], os.path.basename(url))
+        if hot_run:
+            if os.path.exists(file_path):
+                os.remove(file_path)
 
-        try:
-            logger.info(f"Downloading resource '{resource}' from cloud '{url}'.")
-            progress_retrieve(
-                url,
-                file_path,
-                headers=[("User-agent", "Mozilla/5.0")],
-                disable_progress=disable_progress,
-            )
+            try:
+                logger.info(f"Downloading resource '{resource}' from cloud '{url}'.")
+                progress_retrieve(
+                    url,
+                    file_path,
+                    headers=[("User-agent", "Mozilla/5.0")],
+                    disable_progress=disable_progress,
+                )
 
-            # if the file is a zipfile and unzip is enabled
-            # then unzip it and remove the original file
-            if config.get("unzip", False):
-                with ZipFile(file_path, "r") as zipfile:
-                    zipfile.extractall(config["destination"])
+                # if the file is a zipfile and unzip is enabled
+                # then unzip it and remove the original file
+                if config.get("unzip", False):
+                    with ZipFile(file_path, "r") as zipfile:
+                        zipfile.extractall(config["destination"])
 
                 os.remove(file_path)
-            logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.")
-        except:
-            logger.warning(f"Failed download resource '{resource}' from cloud '{url}'.")
-            return False
+                logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.")
+            except:
+                logger.warning(
+                    f"Failed download resource '{resource}' from cloud '{url}'."
+                )
+                return False
 
     return True
 

From 79a071bbe50bc96171bcd36005b023762bca6047 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Sun, 12 Nov 2023 01:48:24 +0300
Subject: [PATCH 10/47] Add a basin level

---
 scripts/retrieve_databundle_light.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index efaab2568..967792083 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -382,12 +382,16 @@ def download_and_unzip_hydrobasins(
     True when download is successful, False otherwise
     """
     resource = config["category"]
-    url_templ = config["urls"]["hydrobasins"]  # + "hybas_af_lev01_v1c.zip"
+    url_templ = config["urls"]["hydrobasins"]
     suffix_list = config["urls"]["suffixes"]
-    # url = url + "hybas_" + suffix_list[2] + "_lev01_v1c.zip"
+
+    basins_fl = snakemake.config["renewable"]["hydro"]["resource"]["hydrobasins"]
+    level_pattern = r".*?lev(.*)_.*"
+    level_code = re.findall(level_pattern, basins_fl)
 
     for rg in suffix_list:
-        url = url_templ + "hybas_" + rg + "_lev01_v1c.zip"
+        # lev01_v1c.zip
+        url = url_templ + "hybas_" + rg + "_lev" + level_code + "_v1c.zip"
         file_path = os.path.join(config["destination"], os.path.basename(url))
         if hot_run:
             if os.path.exists(file_path):

From a644742136d5e7c54e35ddf9d38f0cd29d54f1e2 Mon Sep 17 00:00:00 2001
From: Ekaterina <e.v.kasilova@gmail.com>
Date: Wed, 15 Nov 2023 03:31:18 +0300
Subject: [PATCH 11/47] Release a restriction of xarray version (#924)

---
 envs/environment.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/envs/environment.yaml b/envs/environment.yaml
index a4f3068bd..6730fa74c 100644
--- a/envs/environment.yaml
+++ b/envs/environment.yaml
@@ -32,7 +32,7 @@ dependencies:
 - pandas
 - geopandas>=0.11.0
 - fiona!=1.8.22
-- xarray<=2023.8.0
+- xarray
 - netcdf4
 - networkx
 - scipy

From ef578941263a22ee34ab95a3906b83172bccecab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Crist=C3=B3v=C3=A3o=20Neves=20Ferreira?=
 <andrecnf@gmail.com>
Date: Wed, 15 Nov 2023 11:32:31 +0100
Subject: [PATCH 12/47] fix: :bug: Fix IDs of 9 countries, using the smallest
 geometry that contains them (#921)

* fix: :bug: Fix IDs of 9 countries, using the smallest geometry that contains them

* fix: :bug: Remove commas from YAML
---
 configs/osm_config.yaml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/configs/osm_config.yaml b/configs/osm_config.yaml
index 8c14e8cb5..fe78b699b 100644
--- a/configs/osm_config.yaml
+++ b/configs/osm_config.yaml
@@ -367,3 +367,12 @@ iso_to_geofk_dict:
   VC: "central-america"  # Saint Vincent e Grenadine
   KN: "central-america"  # Saint Kitts e Nevis
   GD: "central-america"  # Grenada
+  AW: "central-america"  # Aruba
+  AX: "finland"  # Aland
+  BM: "north-america"  # Bermuda
+  CW: "central-america"  # Curaçao
+  KY: "central-america"  # Cayman Islands
+  HK: "china"  # Hong Kong
+  MO: "china"  # Macau
+  SX: "central-america"  # Sint Maarten
+  TC: "central-america"  # Turks and Caicos Islands

From a19ab90f0fccaa441d6fcbf28464e1b64dfd7353 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Wed, 15 Nov 2023 20:41:23 +0300
Subject: [PATCH 13/47] Remove an unnecessary headers argument

---
 scripts/retrieve_databundle_light.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 967792083..dc189078a 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -337,9 +337,7 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F
 
         try:
             logger.info(f"Downloading resource '{resource}' from cloud '{url}'.")
-            progress_retrieve(
-                url, file_path, headers=True, disable_progress=disable_progress
-            )
+            progress_retrieve(url, file_path, disable_progress=disable_progress)
 
             # if the file is a zipfile and unzip is enabled
             # then unzip it and remove the original file
@@ -466,7 +464,6 @@ def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=Fal
             url,
             file_path,
             data=postdata,
-            header=header,
             disable_progress=disable_progress,
         )
 

From de5432182c4cb9e81c0f8a34564be6105d40d7dd Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Wed, 15 Nov 2023 21:13:24 +0300
Subject: [PATCH 14/47] Replace a path to hydrobasins with a level parameter

---
 config.default.yaml  | 2 +-
 config.tutorial.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/config.default.yaml b/config.default.yaml
index c977d8f96..08b5ba88c 100644
--- a/config.default.yaml
+++ b/config.default.yaml
@@ -288,7 +288,7 @@ renewable:
     cutout: cutout-2013-era5
     resource:
       method: hydro
-      hydrobasins: data/hydrobasins/hybas_world_lev06_v1c.shp
+      hydrobasins_level: 6
       flowspeed: 1.0  # m/s
       # weight_with_height: false
       # show_progress: true
diff --git a/config.tutorial.yaml b/config.tutorial.yaml
index 7d0ba04c9..3c99fddea 100644
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@@ -286,7 +286,7 @@ renewable:
     cutout: cutout-2013-era5-tutorial
     resource:
       method: hydro
-      hydrobasins: data/hydrobasins/hybas_world_lev04_v1c.shp
+      hydrobasins_level: 4
       flowspeed: 1.0  # m/s
       # weight_with_height: false
       # show_progress: true

From bc1e02d2e0b5a767676d47bac2348f4c9db06bb2 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Wed, 15 Nov 2023 21:14:54 +0300
Subject: [PATCH 15/47] Add hydrobasins license to the docstring

---
 scripts/retrieve_databundle_light.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index dc189078a..5d2ca133f 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -361,7 +361,18 @@ def download_and_unzip_hydrobasins(
     download_and_unzip_basins(config, rootpath, dest_path, hot_run=True,
     disable_progress=False)
 
-    Function to download and unzip the data for hydrobasins.
+    Function to download and unzip the data for hydrobasins from HydroBASINS database
+    available via https://www.hydrosheds.org/products/hydrobasins
+
+    We are using data from the HydroSHEDS version 1 database
+    which is © World Wildlife Fund, Inc. (2006-2022) and has been used herein under license.
+    WWF has not evaluated our data pipeline and therefore gives no warranty regarding its
+    accuracy, completeness, currency or suitability for any particular purpose.
+    Portions of the HydroSHEDS v1 database incorporate data which are the intellectual property
+    rights of © USGS (2006-2008), NASA (2000-2005), ESRI (1992-1998), CIAT (2004-2006),
+    UNEP-WCMC (1993), WWF (2004), Commonwealth of Australia (2007), and Her Royal Majesty
+    and the British Crown and are used under license. The HydroSHEDS v1 database and
+    more information are available at https://www.hydrosheds.org.
 
     Inputs
     ------

From 46309ade954ee9288c0a0ae342c169042b051e31 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Wed, 15 Nov 2023 21:17:07 +0300
Subject: [PATCH 16/47] Update reading a hydrobasisn level in a hydrobasins
 loading script

---
 scripts/retrieve_databundle_light.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 5d2ca133f..4cd8f6b74 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -394,9 +394,11 @@ def download_and_unzip_hydrobasins(
     url_templ = config["urls"]["hydrobasins"]
     suffix_list = config["urls"]["suffixes"]
 
-    basins_fl = snakemake.config["renewable"]["hydro"]["resource"]["hydrobasins"]
-    level_pattern = r".*?lev(.*)_.*"
-    level_code = re.findall(level_pattern, basins_fl)
+    level_code = snakemake.config["renewable"]["hydro"]["resource"]["hydrobasins_level"]
+    if level_code <= 9:
+        level_code = "0" + str(level_code)
+    else:
+        level_code = str(level_code)
 
     for rg in suffix_list:
         # lev01_v1c.zip

From 7aa5f007160f990187d675f1272020d2cf1c72a2 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Wed, 15 Nov 2023 21:17:37 +0300
Subject: [PATCH 17/47] Remove an redundant check

---
 scripts/retrieve_databundle_light.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 4cd8f6b74..daad7852c 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -417,11 +417,8 @@ def download_and_unzip_hydrobasins(
                     disable_progress=disable_progress,
                 )
 
-                # if the file is a zipfile and unzip is enabled
-                # then unzip it and remove the original file
-                if config.get("unzip", False):
-                    with ZipFile(file_path, "r") as zipfile:
-                        zipfile.extractall(config["destination"])
+                with ZipFile(file_path, "r") as zipfile:
+                    zipfile.extractall(config["destination"])
 
                 os.remove(file_path)
                 logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.")

From 84cef6e1e128d9189103cfb2c44bc36e3c225ceb Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 16 Nov 2023 17:00:41 +0100
Subject: [PATCH 18/47] docs(contributor): contrib-readme-action has updated
 readme (#925)

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 README.md | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index c8852142b..b0b264309 100644
--- a/README.md
+++ b/README.md
@@ -354,6 +354,13 @@ The documentation is available here: [documentation](https://pypsa-earth.readthe
             <sub><b>Null</b></sub>
         </a>
     </td>
+    <td align="center">
+        <a href="https://github.com/AndreCNF">
+            <img src="https://avatars.githubusercontent.com/u/19359510?v=4" width="100;" alt="AndreCNF"/>
+            <br />
+            <sub><b>André Cristóvão Neves Ferreira</b></sub>
+        </a>
+    </td>
     <td align="center">
         <a href="https://github.com/EmreYorat">
             <img src="https://avatars.githubusercontent.com/u/93644024?v=4" width="100;" alt="EmreYorat"/>
@@ -367,15 +374,15 @@ The documentation is available here: [documentation](https://pypsa-earth.readthe
             <br />
             <sub><b>GridGrapher</b></sub>
         </a>
-    </td>
+    </td></tr>
+<tr>
     <td align="center">
         <a href="https://github.com/HanaElattar">
             <img src="https://avatars.githubusercontent.com/u/87770004?v=4" width="100;" alt="HanaElattar"/>
             <br />
             <sub><b>HanaElattar</b></sub>
         </a>
-    </td></tr>
-<tr>
+    </td>
     <td align="center">
         <a href="https://github.com/jarry7">
             <img src="https://avatars.githubusercontent.com/u/27745389?v=4" width="100;" alt="jarry7"/>
@@ -410,15 +417,15 @@ The documentation is available here: [documentation](https://pypsa-earth.readthe
             <br />
             <sub><b>Sylvain Quoilin</b></sub>
         </a>
-    </td>
+    </td></tr>
+<tr>
     <td align="center">
         <a href="https://github.com/juli-a-ko">
             <img src="https://avatars.githubusercontent.com/u/126512394?v=4" width="100;" alt="juli-a-ko"/>
             <br />
             <sub><b>Juli-a-ko</b></sub>
         </a>
-    </td></tr>
-<tr>
+    </td>
     <td align="center">
         <a href="https://github.com/stephenjlee">
             <img src="https://avatars.githubusercontent.com/u/11340470?v=4" width="100;" alt="stephenjlee"/>

From e72cfff948c8ba53033e41bee98576e78c7ad4c9 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Sat, 18 Nov 2023 01:54:38 +0300
Subject: [PATCH 19/47] Merge regional hydrobasins files into a single file

---
 scripts/retrieve_databundle_light.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index daad7852c..0b82a699a 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -762,6 +762,33 @@ def datafiles_retrivedatabundle(config):
         if not downloaded_bundle:
             logger.error(f"Bundle {b_name} cannot be downloaded")
 
+    basins_path = config_bundles["bundle_hydrobasins"]["destination"]
+
+    files_in_dir = [
+        f
+        for f in os.listdir(basins_path)
+        if os.path.isfile(os.path.join(basins_path, f))
+    ]
+
+    hydrobasins_level = snakemake.config["renewable"]["hydro"]["resource"][
+        "hydrobasins_level"
+    ]
+    if hydrobasins_level <= 9:
+        hydrobasins_level = "0" + str(hydrobasins_level)
+    else:
+        hydrobasins_level = str(hydrobasins_level)
+
+    regex_to_look_for = re.compile(".*_lev" + hydrobasins_level + "_v1c.shp$")
+    files_to_merge = list(filter(regex_to_look_for.match, files_in_dir))
+
+    gpdf_list = [None] * len(files_to_merge)
+    for i in range(0, len(files_to_merge)):
+        gpdf_list[i] = gpd.read_file(os.path.join(basins_path, files_to_merge[i]))
+    fl_merged = gpd.GeoDataFrame(pd.concat(gpdf_list))
+    fl_merged.to_file(
+        os.path.join(basins_path, "hybas_world_lev" + hydrobasins_level + "_v1c.shp")
+    )
+
     logger.info(
         "Bundle successfully loaded and unzipped:\n\t"
         + "\n\t".join(bundles_to_download)

From 0d3621818c96db08de154f012e493069856efe6f Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Sat, 18 Nov 2023 01:55:15 +0300
Subject: [PATCH 20/47] Add imports

---
 scripts/retrieve_databundle_light.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 0b82a699a..d16e6fe67 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -85,6 +85,8 @@
 import re
 from zipfile import ZipFile
 
+import geopandas as gpd
+import pandas as pd
 import yaml
 from _helpers import (
     configure_logging,

From bd5877a0630ca9f36d87656d6d54685403ab886e Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Sat, 18 Nov 2023 02:00:37 +0300
Subject: [PATCH 21/47] Fix bundle config for hydrobasing

---
 configs/bundle_config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/bundle_config.yaml b/configs/bundle_config.yaml
index ee07271a0..2bef92dc8 100644
--- a/configs/bundle_config.yaml
+++ b/configs/bundle_config.yaml
@@ -152,7 +152,7 @@ databundles:
       suffixes: ["af", "ar", "as", "au", "eu", "gr", "na", "sa", "si"]
     unzip: true
     output:
-    - data/hydrobasins/hybas_world_lev06_v1c.shp
+    - data/hydrobasins/*.shp
 
   # data bundle containing the data of the data folder common to all regions of the world
   bundle_data_earth:

From 3b2153f5bcb5afb2b2f6e5fde06dd7d7bcdd77a5 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Sat, 18 Nov 2023 11:02:42 +0300
Subject: [PATCH 22/47] Improve an info message

---
 scripts/retrieve_databundle_light.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index d16e6fe67..55390b94e 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -411,7 +411,9 @@ def download_and_unzip_hydrobasins(
                 os.remove(file_path)
 
             try:
-                logger.info(f"Downloading resource '{resource}' from cloud '{url}'.")
+                logger.info(
+                    f"Downloading resource '{resource}' for hydrobasins in '{rg}' from cloud '{url}'."
+                )
                 progress_retrieve(
                     url,
                     file_path,

From 4db8f471573336381cc451af25a4cfc9af444bc8 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Sat, 18 Nov 2023 11:03:12 +0300
Subject: [PATCH 23/47] Put merge into a function

---
 scripts/retrieve_databundle_light.py | 59 ++++++++++++++++------------
 1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 55390b94e..2d4359a14 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -701,6 +701,36 @@ def datafiles_retrivedatabundle(config):
 
     return listoutputs
 
+    def merge_hydrobasins_shape(config):
+        basins_path = config_bundles["bundle_hydrobasins"]["destination"]
+
+        files_in_dir = [
+            f
+            for f in os.listdir(basins_path)
+            if os.path.isfile(os.path.join(basins_path, f))
+        ]
+
+        hydrobasins_level = snakemake.config["renewable"]["hydro"]["resource"][
+            "hydrobasins_level"
+        ]
+        if hydrobasins_level <= 9:
+            hydrobasins_level = "0" + str(hydrobasins_level)
+        else:
+            hydrobasins_level = str(hydrobasins_level)
+
+        regex_to_look_for = re.compile(".*_lev" + hydrobasins_level + "_v1c.shp$")
+        files_to_merge = list(filter(regex_to_look_for.match, files_in_dir))
+
+        gpdf_list = [None] * len(files_to_merge)
+        for i in range(0, len(files_to_merge)):
+            gpdf_list[i] = gpd.read_file(os.path.join(basins_path, files_to_merge[i]))
+        fl_merged = gpd.GeoDataFrame(pd.concat(gpdf_list))
+        fl_merged.to_file(
+            os.path.join(
+                basins_path, "hybas_world_lev" + hydrobasins_level + "_v1c.shp"
+            )
+        )
+
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
@@ -766,32 +796,9 @@ def datafiles_retrivedatabundle(config):
         if not downloaded_bundle:
             logger.error(f"Bundle {b_name} cannot be downloaded")
 
-    basins_path = config_bundles["bundle_hydrobasins"]["destination"]
-
-    files_in_dir = [
-        f
-        for f in os.listdir(basins_path)
-        if os.path.isfile(os.path.join(basins_path, f))
-    ]
-
-    hydrobasins_level = snakemake.config["renewable"]["hydro"]["resource"][
-        "hydrobasins_level"
-    ]
-    if hydrobasins_level <= 9:
-        hydrobasins_level = "0" + str(hydrobasins_level)
-    else:
-        hydrobasins_level = str(hydrobasins_level)
-
-    regex_to_look_for = re.compile(".*_lev" + hydrobasins_level + "_v1c.shp$")
-    files_to_merge = list(filter(regex_to_look_for.match, files_in_dir))
-
-    gpdf_list = [None] * len(files_to_merge)
-    for i in range(0, len(files_to_merge)):
-        gpdf_list[i] = gpd.read_file(os.path.join(basins_path, files_to_merge[i]))
-    fl_merged = gpd.GeoDataFrame(pd.concat(gpdf_list))
-    fl_merged.to_file(
-        os.path.join(basins_path, "hybas_world_lev" + hydrobasins_level + "_v1c.shp")
-    )
+    if "bundle_hydrobasins" in bundles_to_download:
+        logger.info("Merging regional hydrobasins files into a global shapefile")
+        merge_hydrobasins_shape(config=config_bundles["bundle_hydrobasins"])
 
     logger.info(
         "Bundle successfully loaded and unzipped:\n\t"

From 9501492320db747718ac70b6e520e6843130e95c Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Sun, 19 Nov 2023 18:47:58 +0300
Subject: [PATCH 24/47] Assess files names using the format function

---
 scripts/retrieve_databundle_light.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 2d4359a14..138e3d111 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -397,13 +397,9 @@ def download_and_unzip_hydrobasins(
     suffix_list = config["urls"]["suffixes"]
 
     level_code = snakemake.config["renewable"]["hydro"]["resource"]["hydrobasins_level"]
-    if level_code <= 9:
-        level_code = "0" + str(level_code)
-    else:
-        level_code = str(level_code)
+    level_code = "{:02d}".format(int(level_code))
 
     for rg in suffix_list:
-        # lev01_v1c.zip
         url = url_templ + "hybas_" + rg + "_lev" + level_code + "_v1c.zip"
         file_path = os.path.join(config["destination"], os.path.basename(url))
         if hot_run:

From 2c2d2fb2840ad2cab3bbf1e9b2f0609a97b70eaa Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Sun, 19 Nov 2023 18:48:47 +0300
Subject: [PATCH 25/47] Improve basins merge

---
 scripts/retrieve_databundle_light.py | 51 ++++++++++++++--------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 138e3d111..c0e64ca71 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -697,35 +697,34 @@ def datafiles_retrivedatabundle(config):
 
     return listoutputs
 
-    def merge_hydrobasins_shape(config):
-        basins_path = config_bundles["bundle_hydrobasins"]["destination"]
 
-        files_in_dir = [
-            f
-            for f in os.listdir(basins_path)
-            if os.path.isfile(os.path.join(basins_path, f))
-        ]
+def merge_hydrobasins_shape(config):
+    basins_path = config_bundles["bundle_hydrobasins"]["destination"]
+    hydrobasins_level = snakemake.config["renewable"]["hydro"]["resource"][
+        "hydrobasins_level"
+    ]
 
-        hydrobasins_level = snakemake.config["renewable"]["hydro"]["resource"][
-            "hydrobasins_level"
-        ]
-        if hydrobasins_level <= 9:
-            hydrobasins_level = "0" + str(hydrobasins_level)
-        else:
-            hydrobasins_level = str(hydrobasins_level)
-
-        regex_to_look_for = re.compile(".*_lev" + hydrobasins_level + "_v1c.shp$")
-        files_to_merge = list(filter(regex_to_look_for.match, files_in_dir))
-
-        gpdf_list = [None] * len(files_to_merge)
-        for i in range(0, len(files_to_merge)):
-            gpdf_list[i] = gpd.read_file(os.path.join(basins_path, files_to_merge[i]))
-        fl_merged = gpd.GeoDataFrame(pd.concat(gpdf_list))
-        fl_merged.to_file(
-            os.path.join(
-                basins_path, "hybas_world_lev" + hydrobasins_level + "_v1c.shp"
-            )
+    mask_file = os.path.join(
+        basins_path, "hybas_*_lev{:02d}_v1c.shp".format(int(hydrobasins_level))
+    )
+    files_to_merge = glob.glob(mask_file)
+
+    gpdf_list = [None] * len(files_to_merge)
+    logger.info("Reading hydrobasins files \n\r")
+    for i in tqdm(range(0, len(files_to_merge))):
+        gpdf_list[i] = gpd.read_file(files_to_merge[i])
+    fl_merged = gpd.GeoDataFrame(pd.concat(gpdf_list))
+    logger.info(
+        "Merging single files into:\n\t"
+        + "hybas_world_lev"
+        + str(hydrobasins_level)
+        + "_v1c.shp"
+    )
+    fl_merged.to_file(
+        os.path.join(
+            basins_path, "hybas_world_lev{:02d}_v1c.shp".format(int(hydrobasins_level))
         )
+    )
 
 
 if __name__ == "__main__":

From 3793fb4b49187495c5cf85bcccc2ebad5bc0dbed Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Sun, 19 Nov 2023 18:48:58 +0300
Subject: [PATCH 26/47] Add imports

---
 scripts/retrieve_databundle_light.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index c0e64ca71..bf4c03021 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -80,6 +80,7 @@
 - ``cutouts``: input data unzipped into the cutouts folder
 
 """
+import glob
 import logging
 import os
 import re
@@ -96,6 +97,7 @@
     sets_path_to_root,
 )
 from google_drive_downloader import GoogleDriveDownloader as gdd
+from tqdm import tqdm
 
 logger = create_logger(__name__)
 

From 4273c4312631ad89887d7ad655d84da7760a9dce Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Mon, 20 Nov 2023 00:30:50 +0300
Subject: [PATCH 27/47] Fix configuration setup for basins level

---
 config.default.yaml  | 2 +-
 config.tutorial.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/config.default.yaml b/config.default.yaml
index 08b5ba88c..d6ee37e98 100644
--- a/config.default.yaml
+++ b/config.default.yaml
@@ -286,9 +286,9 @@ renewable:
     extendable: true
   hydro:
     cutout: cutout-2013-era5
+    hydrobasins_level: 6
     resource:
       method: hydro
-      hydrobasins_level: 6
       flowspeed: 1.0  # m/s
       # weight_with_height: false
       # show_progress: true
diff --git a/config.tutorial.yaml b/config.tutorial.yaml
index 3c99fddea..3532101a1 100644
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@@ -284,9 +284,9 @@ renewable:
     extendable: true
   hydro:
     cutout: cutout-2013-era5-tutorial
+    hydrobasins_level: 4
     resource:
       method: hydro
-      hydrobasins_level: 4
       flowspeed: 1.0  # m/s
       # weight_with_height: false
       # show_progress: true

From 74d0402e483c25c52fb60f00995642b01b2a57ef Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Wed, 22 Nov 2023 00:21:55 +0300
Subject: [PATCH 28/47] Fix duplicates

---
 scripts/retrieve_databundle_light.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index bf4c03021..f79b97523 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -715,7 +715,9 @@ def merge_hydrobasins_shape(config):
     logger.info("Reading hydrobasins files \n\r")
     for i in tqdm(range(0, len(files_to_merge))):
         gpdf_list[i] = gpd.read_file(files_to_merge[i])
-    fl_merged = gpd.GeoDataFrame(pd.concat(gpdf_list))
+    fl_merged = gpd.GeoDataFrame(pd.concat(gpdf_list)).drop_duplicates(
+        subset="HYBAS_ID", ignore_index=True
+    )
     logger.info(
         "Merging single files into:\n\t"
         + "hybas_world_lev"

From 075727a7b632c0462eedf87ba2cb016709f6e9de Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Wed, 22 Nov 2023 00:22:56 +0300
Subject: [PATCH 29/47] Fix reading from config

---
 scripts/retrieve_databundle_light.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index f79b97523..fb8bb8801 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -702,9 +702,7 @@ def datafiles_retrivedatabundle(config):
 
 def merge_hydrobasins_shape(config):
     basins_path = config_bundles["bundle_hydrobasins"]["destination"]
-    hydrobasins_level = snakemake.config["renewable"]["hydro"]["resource"][
-        "hydrobasins_level"
-    ]
+    hydrobasins_level = snakemake.config["renewable"]["hydro"]["hydrobasins_level"]
 
     mask_file = os.path.join(
         basins_path, "hybas_*_lev{:02d}_v1c.shp".format(int(hydrobasins_level))

From e21c150a71148f1283e61f636351ab707fa0df26 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Wed, 22 Nov 2023 00:46:33 +0300
Subject: [PATCH 30/47] Get hydrobasins back to the config

---
 config.default.yaml  | 1 +
 config.tutorial.yaml | 1 +
 2 files changed, 2 insertions(+)

diff --git a/config.default.yaml b/config.default.yaml
index d6ee37e98..5709244aa 100644
--- a/config.default.yaml
+++ b/config.default.yaml
@@ -289,6 +289,7 @@ renewable:
     hydrobasins_level: 6
     resource:
       method: hydro
+      hydrobasins: data/hydrobasins/hybas_world_lev{hydrobasins_level06_v1c.shp
       flowspeed: 1.0  # m/s
       # weight_with_height: false
       # show_progress: true
diff --git a/config.tutorial.yaml b/config.tutorial.yaml
index 3532101a1..358176790 100644
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@@ -287,6 +287,7 @@ renewable:
     hydrobasins_level: 4
     resource:
       method: hydro
+      hydrobasins: data/hydrobasins/hybas_world_lev{hydrobasins_level04_v1c.shp
       flowspeed: 1.0  # m/s
       # weight_with_height: false
       # show_progress: true

From 5f77c9814d942250371fb8959869370818a8aa48 Mon Sep 17 00:00:00 2001
From: Emre_Yorat <62134151+Emre-Yorat89@users.noreply.github.com>
Date: Wed, 22 Nov 2023 17:59:39 +0300
Subject: [PATCH 31/47] Retrieve databundle light data size check (#911)

* retrieve_databundle size check commit #1

* retrieve_databundle size check commit #2

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Revised get_best_bundles_by_category() function

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* retrieve_databundle_light PR update

* release note is added to PR911

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Davide Fioriti <67809479+davide-f@users.noreply.github.com>
---
 doc/release_notes.rst                |  2 ++
 scripts/retrieve_databundle_light.py | 37 ++++++++++++++++------------
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index 5a48de0c3..3f50f3920 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -18,6 +18,8 @@ E.g. if a new rule becomes available describe how to use it `snakemake -j1 run_t
 
 * Function added in clean_osm_data script to allow the use of custom network data instead or on-top of OSM data. `PR #842 <'https://github.com/pypsa-meets-earth/pypsa-earth/pull/842>`__
 
+* Improve retrieve_databundle to prioritize smallest databundles `PR #911 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/911>`__
+
 
 PyPSA-Earth 0.2.3
 =================
diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 7b9b6e125..c05586642 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -85,6 +85,7 @@
 import re
 from zipfile import ZipFile
 
+import pandas as pd
 import yaml
 from _helpers import (
     configure_logging,
@@ -483,28 +484,32 @@ def get_best_bundles_by_category(
         List of bundles to download
     """
     # dictionary with the number of match by configuration for tutorial/non-tutorial configurations
-    dict_n_matched = {
-        bname: config_bundles[bname]["n_matched"]
-        for bname in config_bundles
-        if config_bundles[bname]["category"] == category
-        and config_bundles[bname].get("tutorial", False) == tutorial
-        and _check_disabled_by_opt(config_bundles[bname], config_enable) != ["all"]
-    }
+    df_matches = pd.DataFrame(columns=["bundle_name", "bundle_size", "n_matched"])
+
+    for bname, bvalue in config_bundles.items():
+        if (
+            bvalue["category"] == category
+            and bvalue.get("tutorial", False) == tutorial
+            and _check_disabled_by_opt(bvalue, config_enable) != ["all"]
+        ):
+            df_matches.loc[bname] = [
+                bname,
+                len(bvalue["countries"]),
+                bvalue["n_matched"],
+            ]
 
-    returned_bundles = []
+    df_matches["neg_bundle_size"] = -df_matches["bundle_size"]
+    df_matches.sort_values(
+        by=["n_matched", "neg_bundle_size"], inplace=True, ascending=False
+    )
 
-    # check if non-empty dictionary
-    if dict_n_matched:
-        # if non-empty, then pick bundles until all countries are selected
-        # or no more bundles are found
-        dict_sort = sorted(dict_n_matched.items(), key=lambda d: d[1])
+    returned_bundles = []
 
+    if not df_matches.empty:
         current_matched_countries = []
         remaining_countries = set(country_list)
 
-        for d_val in dict_sort:
-            bname = d_val[0]
-
+        for bname in df_matches.index:
             cbundle_list = set(config_bundles[bname]["countries"])
 
             # list of countries in the bundle that are not yet matched

From 06a834083e4da21e4adb3c2f8194f31bc7095eff Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Thu, 23 Nov 2023 01:31:36 +0300
Subject: [PATCH 32/47] Fix typos in a name of hydrobasins file

---
 config.default.yaml  | 2 +-
 config.tutorial.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/config.default.yaml b/config.default.yaml
index 5709244aa..ba6a2a51d 100644
--- a/config.default.yaml
+++ b/config.default.yaml
@@ -289,7 +289,7 @@ renewable:
     hydrobasins_level: 6
     resource:
       method: hydro
-      hydrobasins: data/hydrobasins/hybas_world_lev{hydrobasins_level06_v1c.shp
+      hydrobasins: data/hydrobasins/hybas_world_lev06_v1c.shp
       flowspeed: 1.0  # m/s
       # weight_with_height: false
       # show_progress: true
diff --git a/config.tutorial.yaml b/config.tutorial.yaml
index 358176790..01702e73a 100644
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@@ -287,7 +287,7 @@ renewable:
     hydrobasins_level: 4
     resource:
       method: hydro
-      hydrobasins: data/hydrobasins/hybas_world_lev{hydrobasins_level04_v1c.shp
+      hydrobasins: data/hydrobasins/hybas_world_lev04_v1c.shp
       flowspeed: 1.0  # m/s
       # weight_with_height: false
       # show_progress: true

From 2b908ffb9c5ba881d79bc2d7196efaddc08b965d Mon Sep 17 00:00:00 2001
From: Ekaterina <e.v.kasilova@gmail.com>
Date: Thu, 23 Nov 2023 18:20:58 +0300
Subject: [PATCH 33/47] Implement Davide's suggestion

Co-authored-by: Davide Fioriti <67809479+davide-f@users.noreply.github.com>
---
 configs/bundle_config.yaml | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/configs/bundle_config.yaml b/configs/bundle_config.yaml
index 2bef92dc8..8a9a7e0df 100644
--- a/configs/bundle_config.yaml
+++ b/configs/bundle_config.yaml
@@ -148,8 +148,15 @@ databundles:
     category: common
     destination: "data/hydrobasins"
     urls:
-      hydrobasins: https://data.hydrosheds.org/file/HydroBASINS/standard/
-      suffixes: ["af", "ar", "as", "au", "eu", "gr", "na", "sa", "si"]
+  bundle_hydrobasins:
+    countries: [Earth]
+    tutorial: false
+    category: common
+    destination: "data/hydrobasins"
+    urls:
+      hydrobasins:
+         base_url: https://data.hydrosheds.org/file/HydroBASINS/standard/
+         suffixes: ["af", "ar", "as", "au", "eu", "gr", "na", "sa", "si"]
     unzip: true
     output:
     - data/hydrobasins/*.shp

From 89cd5d1739b5220877a3218905cc47e9a425b4fd Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Thu, 23 Nov 2023 22:52:53 +0300
Subject: [PATCH 34/47] Update parameters reading

---
 scripts/retrieve_databundle_light.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 99e38b8a3..b19c73d51 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -395,8 +395,8 @@ def download_and_unzip_hydrobasins(
     True when download is successful, False otherwise
     """
     resource = config["category"]
-    url_templ = config["urls"]["hydrobasins"]
-    suffix_list = config["urls"]["suffixes"]
+    url_templ = config["urls"]["hydrobasins"]["base_url"]
+    suffix_list = config["urls"]["hydrobasins"]["suffixes"]
 
     level_code = snakemake.config["renewable"]["hydro"]["resource"]["hydrobasins_level"]
     level_code = "{:02d}".format(int(level_code))

From 706001d4509d8def1b7a265140f513ee7841a895 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Fri, 24 Nov 2023 14:53:12 +0300
Subject: [PATCH 35/47] Fix typo

---
 configs/bundle_config.yaml | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/configs/bundle_config.yaml b/configs/bundle_config.yaml
index 8a9a7e0df..346f74d6c 100644
--- a/configs/bundle_config.yaml
+++ b/configs/bundle_config.yaml
@@ -142,12 +142,6 @@ databundles:
     - data/hydrobasins/hybas_world_lev05_v1c.shp
 
   # global data for hydrobasins
-  bundle_hydrobasins:
-    countries: [Earth]
-    tutorial: false
-    category: common
-    destination: "data/hydrobasins"
-    urls:
   bundle_hydrobasins:
     countries: [Earth]
     tutorial: false
@@ -155,8 +149,8 @@ databundles:
     destination: "data/hydrobasins"
     urls:
       hydrobasins:
-         base_url: https://data.hydrosheds.org/file/HydroBASINS/standard/
-         suffixes: ["af", "ar", "as", "au", "eu", "gr", "na", "sa", "si"]
+        base_url: https://data.hydrosheds.org/file/HydroBASINS/standard/
+        suffixes: ["af", "ar", "as", "au", "eu", "gr", "na", "sa", "si"]
     unzip: true
     output:
     - data/hydrobasins/*.shp

From f357d53a1d3c037cfbb7f388ab054a178e8e57be Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Sat, 25 Nov 2023 19:02:18 +0300
Subject: [PATCH 36/47] Wrap-up download and unzip into a function

---
 scripts/retrieve_databundle_light.py | 116 +++++++++++++++++----------
 1 file changed, 72 insertions(+), 44 deletions(-)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index b19c73d51..a3ebf3d62 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -306,20 +306,24 @@ def download_and_unzip_protectedplanet(
     return True
 
 
-def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=False):
+def download_and_unpack(
+    url,
+    file_path,
+    resource,
+    destination,
+    headers=None,
+    hot_run=True,
+    unzip=True,
+    disable_progress=False,
+):
     """
-    download_and_unzip_direct(config, rootpath, dest_path, hot_run=True,
-    disable_progress=False)
+    download_and_unpack( url, file_path, resource, destination, headers=None,
+    hot_run=True, unzip=True, disable_progress=False)
 
-    Function to download the data by category from a direct url with no processing.
-    If in the configuration file the unzip is specified True, then the downloaded data is unzipped.
+    A helper function to encapsulate retrieval and unzip
 
     Inputs
     ------
-    config : Dict
-        Configuration data for the category to download
-    rootpath : str
-        Absolute path of the repository
     hot_run : Bool (default True)
         When true the data are downloaded
         When false, the workflow is run without downloading and unzipping
@@ -330,24 +334,21 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F
     -------
     True when download is successful, False otherwise
     """
-    resource = config["category"]
-    url = config["urls"]["direct"]
-
-    file_path = os.path.join(config["destination"], os.path.basename(url))
-
     if hot_run:
         if os.path.exists(file_path):
             os.remove(file_path)
 
         try:
             logger.info(f"Downloading resource '{resource}' from cloud '{url}'.")
-            progress_retrieve(url, file_path, disable_progress=disable_progress)
+            progress_retrieve(
+                url, file_path, headers=headers, disable_progress=disable_progress
+            )
 
             # if the file is a zipfile and unzip is enabled
             # then unzip it and remove the original file
-            if config.get("unzip", False):
+            if unzip:
                 with ZipFile(file_path, "r") as zipfile:
-                    zipfile.extractall(config["destination"])
+                    zipfile.extractall(destination)
 
                 os.remove(file_path)
             logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.")
@@ -358,6 +359,48 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F
     return True
 
 
+def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=False):
+    """
+    download_and_unzip_direct(config, rootpath, dest_path, hot_run=True,
+    disable_progress=False)
+
+    Function to download the data by category from a direct url with no processing.
+    If in the configuration file the unzip is specified True, then the downloaded data is unzipped.
+
+    Inputs
+    ------
+    config : Dict
+        Configuration data for the category to download
+    rootpath : str
+        Absolute path of the repository
+    hot_run : Bool (default True)
+        When true the data are downloaded
+        When false, the workflow is run without downloading and unzipping
+    disable_progress : Bool (default False)
+        When true the progress bar to download data is disabled
+
+    Outputs
+    -------
+    True when download is successful, False otherwise
+    """
+    resource = config["category"]
+    destination = config["destination"]
+    url = config["urls"]["direct"]
+
+    file_path = os.path.join(config["destination"], os.path.basename(url))
+
+    unzip = config.get("unzip", False)
+
+    download_and_unpack(
+        url=url,
+        file_path=file_path,
+        resource=resource,
+        hot_run=hot_run,
+        unzip=unzip,
+        disable_progress=disable_progress,
+    )
+
+
 def download_and_unzip_hydrobasins(
     config, rootpath, hot_run=True, disable_progress=False
 ):
@@ -395,42 +438,27 @@ def download_and_unzip_hydrobasins(
     True when download is successful, False otherwise
     """
     resource = config["category"]
+    destination = config["destination"]
     url_templ = config["urls"]["hydrobasins"]["base_url"]
     suffix_list = config["urls"]["hydrobasins"]["suffixes"]
 
-    level_code = snakemake.config["renewable"]["hydro"]["resource"]["hydrobasins_level"]
+    level_code = snakemake.config["renewable"]["hydro"]["hydrobasins_level"]
     level_code = "{:02d}".format(int(level_code))
 
     for rg in suffix_list:
         url = url_templ + "hybas_" + rg + "_lev" + level_code + "_v1c.zip"
         file_path = os.path.join(config["destination"], os.path.basename(url))
-        if hot_run:
-            if os.path.exists(file_path):
-                os.remove(file_path)
-
-            try:
-                logger.info(
-                    f"Downloading resource '{resource}' for hydrobasins in '{rg}' from cloud '{url}'."
-                )
-                progress_retrieve(
-                    url,
-                    file_path,
-                    headers=[("User-agent", "Mozilla/5.0")],
-                    disable_progress=disable_progress,
-                )
-
-                with ZipFile(file_path, "r") as zipfile:
-                    zipfile.extractall(config["destination"])
 
-                os.remove(file_path)
-                logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.")
-            except:
-                logger.warning(
-                    f"Failed download resource '{resource}' from cloud '{url}'."
-                )
-                return False
-
-    return True
+        download_and_unpack(
+            url=url,
+            file_path=file_path,
+            resource=resource,
+            destination=destination,
+            headers=[("User-agent", "Mozilla/5.0")],
+            hot_run=hot_run,
+            unzip=True,
+            disable_progress=disable_progress,
+        )
 
 
 def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=False):

From 9052d7923655277b4790f9100530fbd9b2b6b48c Mon Sep 17 00:00:00 2001
From: Davide Fioriti <fioritidavidesubs@gmail.com>
Date: Wed, 29 Nov 2023 12:41:21 +0100
Subject: [PATCH 37/47] Revise hydrobasin files to merge

---
 scripts/retrieve_databundle_light.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 2b0a23986..580ba20f7 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -736,15 +736,15 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level):
     basins_path = config_hydrobasin["destination"]
     output_fl = config_hydrobasin["output"][0]
 
-    mask_file = os.path.join(
-        basins_path, "hybas_??_lev{:02d}_v1c.shp".format(int(hydrobasins_level))
-    )
-    files_to_merge = glob.glob(mask_file)
+    files_to_merge = [
+        "hybas_{0:s}_lev{1:02d}_v1c.shp".format(suffix, hydrobasins_level)
+        for suffix in config_hydrobasin["urls"]["hydrobasins"]["suffixes"]
+    ]
 
     gpdf_list = [None] * len(files_to_merge)
     logger.info("Reading hydrobasins files \n\r")
-    for i in tqdm(range(0, len(files_to_merge))):
-        gpdf_list[i] = gpd.read_file(files_to_merge[i])
+    for i, f_name in tqdm(enumerate(files_to_merge)):
+        gpdf_list[i] = gpd.read_file(os.path.join(basins_path, f_name))
     fl_merged = gpd.GeoDataFrame(pd.concat(gpdf_list)).drop_duplicates(
         subset="HYBAS_ID", ignore_index=True
     )

From 35ada5e949a82985af7e9ef3adb9931323bdc9c7 Mon Sep 17 00:00:00 2001
From: Anton Achhammer <132910766+doneachh@users.noreply.github.com>
Date: Wed, 29 Nov 2023 12:43:38 +0100
Subject: [PATCH 38/47] feat: remove "scripts." in snakefile (#926)

* feat: remove "scripts." in snakefile

* feat: adjust get_last_commit_message()

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 Snakefile           | 10 +++++-----
 scripts/_helpers.py | 28 ++++++++++++++++++----------
 2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/Snakefile b/Snakefile
index 24899f6b6..e2b0b27c9 100644
--- a/Snakefile
+++ b/Snakefile
@@ -11,9 +11,9 @@ from shutil import copyfile, move
 
 from snakemake.remote.HTTP import RemoteProvider as HTTPRemoteProvider
 
-from scripts._helpers import create_country_list, get_last_commit_message
-from scripts.build_demand_profiles import get_load_paths_gegis
-from scripts.retrieve_databundle_light import datafiles_retrivedatabundle
+from _helpers import create_country_list, get_last_commit_message
+from build_demand_profiles import get_load_paths_gegis
+from retrieve_databundle_light import datafiles_retrivedatabundle
 from pathlib import Path
 
 HTTP = HTTPRemoteProvider()
@@ -28,7 +28,7 @@ if "config" not in globals() or not config:  # skip when used as sub-workflow
 configfile: "configs/bundle_config.yaml"
 
 
-config.update({"git_commit": get_last_commit_message()})
+config.update({"git_commit": get_last_commit_message(".")})
 
 # convert country list according to the desired region
 config["countries"] = create_country_list(config["countries"])
@@ -1043,7 +1043,7 @@ rule run_scenario:
     resources:
         mem_mb=5000,
     run:
-        from scripts.build_test_configs import create_test_config
+        from build_test_configs import create_test_config
         import yaml
 
         # get base configuration file from diff config
diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index ea98d629b..b3bb8e90f 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -816,21 +816,29 @@ def filter_codes(c_list, iso_coding=True):
     return full_codes_list
 
 
-def get_last_commit_message():
+def get_last_commit_message(path):
     """
-    Function to get the last Git commit message.
+    Function to get the last PyPSA-Earth Git commit message.
 
     Returns
     -------
     result : string
     """
+    _logger = logging.getLogger(__name__)
+    last_commit_message = None
+    backup_cwd = os.getcwd()
     try:
-        result = subprocess.run(
-            ["git", "log", "-1", "--pretty=format:%H %s"],
-            capture_output=True,
-            text=True,
+        os.chdir(path)
+        last_commit_message = (
+            subprocess.check_output(
+                ["git", "log", "-n", "1", "--pretty=format:%H %s"],
+                stderr=subprocess.STDOUT,
+            )
+            .decode()
+            .strip()
         )
-        return result.stdout.strip()
-    except Exception as e:
-        logger.warning(f"Error getting the last commit message: {e}")
-        return ""
+    except subprocess.CalledProcessError as e:
+        _logger.warning(f"Error executing Git: {e}")
+
+    os.chdir(backup_cwd)
+    return last_commit_message

From 1790d1881b24b958c3a58adc547f09bbd9a56dfa Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Wed, 29 Nov 2023 17:13:24 +0300
Subject: [PATCH 39/47] Fix file names

---
 config.default.yaml                  | 2 +-
 config.tutorial.yaml                 | 2 +-
 scripts/retrieve_databundle_light.py | 7 ++-----
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/config.default.yaml b/config.default.yaml
index 285b441a6..dd05a9fe2 100644
--- a/config.default.yaml
+++ b/config.default.yaml
@@ -295,7 +295,7 @@ renewable:
     hydrobasins_level: 6
     resource:
       method: hydro
-      hydrobasins: data/hydrobasins/hybas_world_lev06_v1c.shp
+      hydrobasins: data/hydrobasins/hybas_world.shp
       flowspeed: 1.0  # m/s
       # weight_with_height: false
       # show_progress: true
diff --git a/config.tutorial.yaml b/config.tutorial.yaml
index 54552d3d2..6aba3e46c 100644
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@@ -293,7 +293,7 @@ renewable:
     hydrobasins_level: 4
     resource:
       method: hydro
-      hydrobasins: data/hydrobasins/hybas_world_lev04_v1c.shp
+      hydrobasins: data/hydrobasins/hybas_world.shp
       flowspeed: 1.0  # m/s
       # weight_with_height: false
       # show_progress: true
diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index a3ebf3d62..1ba1b8938 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -735,6 +735,7 @@ def datafiles_retrivedatabundle(config):
 def merge_hydrobasins_shape(config):
     basins_path = config_bundles["bundle_hydrobasins"]["destination"]
     hydrobasins_level = snakemake.config["renewable"]["hydro"]["hydrobasins_level"]
+    output_fl = config_bundles["bundle_hydrobasins"]["output"][0]
 
     mask_file = os.path.join(
         basins_path, "hybas_*_lev{:02d}_v1c.shp".format(int(hydrobasins_level))
@@ -754,11 +755,7 @@ def merge_hydrobasins_shape(config):
         + str(hydrobasins_level)
         + "_v1c.shp"
     )
-    fl_merged.to_file(
-        os.path.join(
-            basins_path, "hybas_world_lev{:02d}_v1c.shp".format(int(hydrobasins_level))
-        )
-    )
+    fl_merged.to_file(output_fl)
 
 
 if __name__ == "__main__":

From 2f61d41b864e74b71a107d28b09e257e37ab50fe Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Wed, 29 Nov 2023 17:15:33 +0300
Subject: [PATCH 40/47] Add a hydrobasins tutorial databundle

---
 configs/bundle_config.yaml | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/configs/bundle_config.yaml b/configs/bundle_config.yaml
index 346f74d6c..8063c983a 100644
--- a/configs/bundle_config.yaml
+++ b/configs/bundle_config.yaml
@@ -68,6 +68,20 @@ databundles:
     - data/gebco/GEBCO_2021_TID.nc
     - data/copernicus/PROBAV_LC100_global_v3.0.1_2019-nrt_Discrete-Classification-map_EPSG-4326.tif
 
+  # load tutorial hydrobasins bundle for Africa only
+  bundle_tutorial_hydrobasins:
+    countries: [Africa]
+    tutorial: true
+    category: common
+    destination: "data/hydrobasins"
+    urls:
+      hydrobasins:
+        base_url: https://data.hydrosheds.org/file/HydroBASINS/standard/
+        suffixes: ["af"]
+    unzip: true
+    output:
+    - data/hydrobasins/hybas_world.shp
+
   # tutorial bundle specific for Nigeria and Benin only
   bundle_cutouts_tutorial_NGBJ:
     countries: [NG, BJ]
@@ -153,7 +167,7 @@ databundles:
         suffixes: ["af", "ar", "as", "au", "eu", "gr", "na", "sa", "si"]
     unzip: true
     output:
-    - data/hydrobasins/*.shp
+    - data/hydrobasins/hybas_world.shp
 
   # data bundle containing the data of the data folder common to all regions of the world
   bundle_data_earth:

From 79268e99fcd17333615ea2f946739760e37744ab Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Wed, 29 Nov 2023 17:37:38 +0300
Subject: [PATCH 41/47] Fix naming

---
 config.default.yaml        | 2 +-
 config.tutorial.yaml       | 2 +-
 configs/bundle_config.yaml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/config.default.yaml b/config.default.yaml
index dd05a9fe2..9db88d72a 100644
--- a/config.default.yaml
+++ b/config.default.yaml
@@ -295,7 +295,7 @@ renewable:
     hydrobasins_level: 6
     resource:
       method: hydro
-      hydrobasins: data/hydrobasins/hybas_world.shp
+      hydrobasins: data/hydrobasins/hybas_world_v1c.shp
       flowspeed: 1.0  # m/s
       # weight_with_height: false
       # show_progress: true
diff --git a/config.tutorial.yaml b/config.tutorial.yaml
index 6aba3e46c..873c0585d 100644
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@@ -293,7 +293,7 @@ renewable:
     hydrobasins_level: 4
     resource:
       method: hydro
-      hydrobasins: data/hydrobasins/hybas_world.shp
+      hydrobasins: data/hydrobasins/hybas_world_v1c.shp
       flowspeed: 1.0  # m/s
       # weight_with_height: false
       # show_progress: true
diff --git a/configs/bundle_config.yaml b/configs/bundle_config.yaml
index 8063c983a..4d53d6452 100644
--- a/configs/bundle_config.yaml
+++ b/configs/bundle_config.yaml
@@ -80,7 +80,7 @@ databundles:
         suffixes: ["af"]
     unzip: true
     output:
-    - data/hydrobasins/hybas_world.shp
+    - data/hydrobasins/hybas_world_v1c.shp
 
   # tutorial bundle specific for Nigeria and Benin only
   bundle_cutouts_tutorial_NGBJ:

From 972b493c1ebde9c64ccc0a360ad4bf6e070f63b4 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Wed, 29 Nov 2023 17:38:14 +0300
Subject: [PATCH 42/47] Rename tutorial hydrobasins

---
 scripts/retrieve_databundle_light.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 1ba1b8938..55e547534 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -758,6 +758,19 @@ def merge_hydrobasins_shape(config):
     fl_merged.to_file(output_fl)
 
 
+def rename_hydrobasins_tutorial(config):
+    basins_path = config_bundles["bundle_tutorial_hydrobasins"]["destination"]
+    hydrobasins_level = snakemake.config["renewable"]["hydro"]["hydrobasins_level"]
+    output_fl = config_bundles["bundle_tutorial_hydrobasins"]["output"][0]
+
+    mask_file = os.rename(
+        os.path.join(
+            basins_path, "hybas_af_lev{:02d}_v1c.shp".format(int(hydrobasins_level))
+        ),
+        output_fl,
+    )
+
+
 if __name__ == "__main__":
     if "snakemake" not in globals():
         os.chdir(os.path.dirname(os.path.abspath(__file__)))
@@ -826,6 +839,11 @@ def merge_hydrobasins_shape(config):
         logger.info("Merging regional hydrobasins files into a global shapefile")
         merge_hydrobasins_shape(config=config_bundles["bundle_hydrobasins"])
 
+    if "bundle_tutorial_hydrobasins" in bundles_to_download:
+        rename_hydrobasins_tutorial(
+            config=config_bundles["bundle_tutorial_hydrobasins"]
+        )
+
     logger.info(
         "Bundle successfully loaded and unzipped:\n\t"
         + "\n\t".join(bundles_to_download)

From 484b7fc380f302452ff25cc3b61082a41263819f Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Wed, 29 Nov 2023 17:42:13 +0300
Subject: [PATCH 43/47] Add a hydrobasins level as a parameter

---
 Snakefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Snakefile b/Snakefile
index 24899f6b6..5a6978f87 100644
--- a/Snakefile
+++ b/Snakefile
@@ -150,6 +150,7 @@ if config["enable"].get("retrieve_databundle", True):
         params:
             countries=config["countries"],
             tutorial=config["tutorial"],
+            hydrobasins_level=config["renewable"]["hydro"]["hydrobasins_level"],
         output:  #expand(directory('{file}') if isdir('{file}') else '{file}', file=datafiles)
             expand("{file}", file=datafiles_retrivedatabundle(config)),
             directory("data/landcover"),

From 56307aef737ba93299f3f5428011c4e879ff2f07 Mon Sep 17 00:00:00 2001
From: Davide Fioriti <fioritidavidesubs@gmail.com>
Date: Wed, 29 Nov 2023 16:33:42 +0100
Subject: [PATCH 44/47] Improve helper efficiency

---
 scripts/_helpers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index fa06771c8..d503c96d7 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -530,7 +530,7 @@ def make_accessable(*ios):
     return snakemake
 
 
-def getContinent(code):
+def getContinent(code, world_iso=read_osm_config("world_iso")):
     """
     Returns continent names that contains list of iso-code countries.
 
@@ -554,7 +554,6 @@ def getContinent(code):
 
     continent_list = []
     code_set = set(code)
-    world_iso = read_osm_config("world_iso")
     for continent in world_iso.keys():
         single_continent_set = set(world_iso[continent])
         if code_set.intersection(single_continent_set):
@@ -794,9 +793,10 @@ def filter_codes(c_list, iso_coding=True):
 
     full_codes_list = []
 
+    world_iso, continent_regions = read_osm_config("world_iso", "continent_regions")
+
     for value1 in input:
         codes_list = []
-        world_iso, continent_regions = read_osm_config("world_iso", "continent_regions")
         # extract countries in world
         if value1 == "Earth":
             for continent in world_iso.keys():

From 29ee7a05865f6be6b8be8f5e4e90f39c727b9940 Mon Sep 17 00:00:00 2001
From: Davide Fioriti <fioritidavidesubs@gmail.com>
Date: Wed, 29 Nov 2023 16:39:13 +0100
Subject: [PATCH 45/47] Finalize automatic download of hydrobasins

---
 config.default.yaml                  |  2 +-
 config.tutorial.yaml                 |  2 +-
 scripts/retrieve_databundle_light.py | 44 ++++++++--------------------
 3 files changed, 15 insertions(+), 33 deletions(-)

diff --git a/config.default.yaml b/config.default.yaml
index 9db88d72a..dd05a9fe2 100644
--- a/config.default.yaml
+++ b/config.default.yaml
@@ -295,7 +295,7 @@ renewable:
     hydrobasins_level: 6
     resource:
       method: hydro
-      hydrobasins: data/hydrobasins/hybas_world_v1c.shp
+      hydrobasins: data/hydrobasins/hybas_world.shp
       flowspeed: 1.0  # m/s
       # weight_with_height: false
       # show_progress: true
diff --git a/config.tutorial.yaml b/config.tutorial.yaml
index 873c0585d..6aba3e46c 100644
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@@ -293,7 +293,7 @@ renewable:
     hydrobasins_level: 4
     resource:
       method: hydro
-      hydrobasins: data/hydrobasins/hybas_world_v1c.shp
+      hydrobasins: data/hydrobasins/hybas_world.shp
       flowspeed: 1.0  # m/s
       # weight_with_height: false
       # show_progress: true
diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 55e547534..2b0a23986 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -732,13 +732,12 @@ def datafiles_retrivedatabundle(config):
     return listoutputs
 
 
-def merge_hydrobasins_shape(config):
-    basins_path = config_bundles["bundle_hydrobasins"]["destination"]
-    hydrobasins_level = snakemake.config["renewable"]["hydro"]["hydrobasins_level"]
-    output_fl = config_bundles["bundle_hydrobasins"]["output"][0]
+def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level):
+    basins_path = config_hydrobasin["destination"]
+    output_fl = config_hydrobasin["output"][0]
 
     mask_file = os.path.join(
-        basins_path, "hybas_*_lev{:02d}_v1c.shp".format(int(hydrobasins_level))
+        basins_path, "hybas_??_lev{:02d}_v1c.shp".format(int(hydrobasins_level))
     )
     files_to_merge = glob.glob(mask_file)
 
@@ -749,26 +748,8 @@ def merge_hydrobasins_shape(config):
     fl_merged = gpd.GeoDataFrame(pd.concat(gpdf_list)).drop_duplicates(
         subset="HYBAS_ID", ignore_index=True
     )
-    logger.info(
-        "Merging single files into:\n\t"
-        + "hybas_world_lev"
-        + str(hydrobasins_level)
-        + "_v1c.shp"
-    )
-    fl_merged.to_file(output_fl)
-
-
-def rename_hydrobasins_tutorial(config):
-    basins_path = config_bundles["bundle_tutorial_hydrobasins"]["destination"]
-    hydrobasins_level = snakemake.config["renewable"]["hydro"]["hydrobasins_level"]
-    output_fl = config_bundles["bundle_tutorial_hydrobasins"]["output"][0]
-
-    mask_file = os.rename(
-        os.path.join(
-            basins_path, "hybas_af_lev{:02d}_v1c.shp".format(int(hydrobasins_level))
-        ),
-        output_fl,
-    )
+    logger.info("Merging single files into:\n\t" + output_fl)
+    fl_merged.to_file(output_fl, driver="ESRI Shapefile")
 
 
 if __name__ == "__main__":
@@ -835,13 +816,14 @@ def rename_hydrobasins_tutorial(config):
         if not downloaded_bundle:
             logger.error(f"Bundle {b_name} cannot be downloaded")
 
-    if "bundle_hydrobasins" in bundles_to_download:
+    hydrobasin_bundles = [
+        b_name for b_name in bundles_to_download if "hydrobasins" in b_name
+    ]
+    if len(hydrobasin_bundles) > 0:
         logger.info("Merging regional hydrobasins files into a global shapefile")
-        merge_hydrobasins_shape(config=config_bundles["bundle_hydrobasins"])
-
-    if "bundle_tutorial_hydrobasins" in bundles_to_download:
-        rename_hydrobasins_tutorial(
-            config=config_bundles["bundle_tutorial_hydrobasins"]
+        hydrobasins_level = snakemake.params["hydrobasins_level"]
+        merge_hydrobasins_shape(
+            config_bundles[hydrobasin_bundles[0]], hydrobasins_level
         )
 
     logger.info(

From 3fce893a4c78502fc4a472e69252224d347467b8 Mon Sep 17 00:00:00 2001
From: Davide Fioriti <fioritidavidesubs@gmail.com>
Date: Wed, 29 Nov 2023 16:41:25 +0100
Subject: [PATCH 46/47] update bundle links

---
 configs/bundle_config.yaml | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/configs/bundle_config.yaml b/configs/bundle_config.yaml
index 4d53d6452..2fa1c74f9 100644
--- a/configs/bundle_config.yaml
+++ b/configs/bundle_config.yaml
@@ -36,7 +36,7 @@ databundles:
     category: data
     destination: "data"
     urls:
-      zenodo: https://sandbox.zenodo.org/record/1223907/files/tutorial_data_NGBJ.zip?download=1
+      zenodo: https://sandbox.zenodo.org/records/3853/files/bundle_tutorial_NGBJ.zip?download=1
       gdrive: https://drive.google.com/file/d/1Vb1ISjhy7iwTTZYeezGd6S4nLt-EDGme/view?usp=drive_link
     output:
     - data/gebco/GEBCO_2021_TID.nc
@@ -49,7 +49,7 @@ databundles:
     category: data
     destination: "data"
     urls:
-      zenodo: https://sandbox.zenodo.org/record/1223907/files/tutorial_data_BW.zip?download=1
+      zenodo: https://sandbox.zenodo.org/records/3853/files/bundle_tutorial_BW.zip?download=1
       gdrive: https://drive.google.com/file/d/19IXvTD8gVSzgTInL85ta7QjaNI8ZPCCY/view?usp=drive_link
     output:
     - data/gebco/GEBCO_2021_TID.nc
@@ -62,7 +62,7 @@ databundles:
     category: data
     destination: "data"
     urls:
-      zenodo: https://sandbox.zenodo.org/record/1223907/files/tutorial_data_MA.zip?download=1
+      zenodo: https://sandbox.zenodo.org/records/3853/files/bundle_tutorial_MA.zip?download=1
       gdrive: https://drive.google.com/file/d/1VGzE8ZJHAvAQ9X44QNSX4rH3QF7Yi37D/view?usp=drive_link
     output:
     - data/gebco/GEBCO_2021_TID.nc
@@ -72,7 +72,7 @@ databundles:
   bundle_tutorial_hydrobasins:
     countries: [Africa]
     tutorial: true
-    category: common
+    category: hydrobasins
     destination: "data/hydrobasins"
     urls:
       hydrobasins:
@@ -80,7 +80,7 @@ databundles:
         suffixes: ["af"]
     unzip: true
     output:
-    - data/hydrobasins/hybas_world_v1c.shp
+    - data/hydrobasins/hybas_world.shp
 
   # tutorial bundle specific for Nigeria and Benin only
   bundle_cutouts_tutorial_NGBJ:
@@ -89,7 +89,7 @@ databundles:
     category: cutouts
     destination: "cutouts"
     urls:
-      zenodo: https://sandbox.zenodo.org/record/1223907/files/tutorial_cutouts_NGBJ.zip?download=1
+      zenodo: https://sandbox.zenodo.org/records/3853/files/bundle_cutouts_tutorial_NGBJ.zip?download=1
       gdrive: https://drive.google.com/file/d/1xnomHdXf_c5STrf7jtDiuRlN2zW0FSVC/view?usp=drive_link
     output: [cutouts/cutout-2013-era5-tutorial.nc]
     disable_by_opt:
@@ -102,7 +102,7 @@ databundles:
     category: cutouts
     destination: "cutouts"
     urls:
-      zenodo: https://sandbox.zenodo.org/record/1223907/files/tutorial_cutouts_BW.zip?download=1
+      zenodo: https://sandbox.zenodo.org/records/3853/files/bundle_cutouts_tutorial_BW.zip?download=1
       gdrive: https://drive.google.com/file/d/1DDQAtnIDM0FNC3vCldfHeH__IpTbyIJt/view?usp=drive_link
     output: [cutouts/cutout-2013-era5-tutorial.nc]
     disable_by_opt:
@@ -115,7 +115,7 @@ databundles:
     category: cutouts
     destination: "cutouts"
     urls:
-      zenodo: https://sandbox.zenodo.org/record/1223907/files/tutorial_cutouts_MA.zip?download=1
+      zenodo: https://sandbox.zenodo.org/records/3853/files/bundle_cutouts_tutorial_MA.zip?download=1
       gdrive: https://drive.google.com/file/d/1j5v2f4E756jmDMa707QvdNJq3xM4bYUk/view?usp=drive_link
     output: [cutouts/cutout-2013-era5-tutorial.nc]
     disable_by_opt:
@@ -139,12 +139,9 @@ databundles:
     category: common
     destination: "data"
     urls:
-      zenodo: https://sandbox.zenodo.org/record/1183583/files/tutorial_data_general.zip?download=1
+      zenodo: https://sandbox.zenodo.org/records/3853/files/tutorial_data_general.zip?download=1
       gdrive: https://drive.google.com/file/d/1nRLrs_kP0qVl-IHC4BFLjpoKa3HLk2Py/view
     output:
-    - data/costs.csv
-    - data/hydro_capacities.csv
-    - data/custom_powerplants.csv
     - data/eez/eez_v11.gpkg
     - data/ssp2-2.6/2030/era5_2013/Africa.nc
     - data/ssp2-2.6/2030/era5_2013/Asia.nc
@@ -152,14 +149,12 @@ databundles:
     - data/ssp2-2.6/2030/era5_2013/NorthAmerica.nc
     - data/ssp2-2.6/2030/era5_2013/SouthAmerica.nc
     - data/ssp2-2.6/2030/era5_2013/Oceania.nc
-    - data/hydrobasins/hybas_world_lev04_v1c.shp
-    - data/hydrobasins/hybas_world_lev05_v1c.shp
 
   # global data for hydrobasins
   bundle_hydrobasins:
     countries: [Earth]
     tutorial: false
-    category: common
+    category: hydrobasins
     destination: "data/hydrobasins"
     urls:
       hydrobasins:

From 482cabbedb466037c6c2544b9e4b340d6b0f1d52 Mon Sep 17 00:00:00 2001
From: ekatef <e.v.kasilova@gmail.com>
Date: Wed, 29 Nov 2023 19:54:08 +0300
Subject: [PATCH 47/47] Add a release note

---
 doc/release_notes.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index 3f50f3920..121c8fd6d 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -20,6 +20,8 @@ E.g. if a new rule becomes available describe how to use it `snakemake -j1 run_t
 
 * Improve retrieve_databundle to prioritize smallest databundles `PR #911 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/911>`__
 
+* Add functionality to load shapefiles for hydrobasins directly from the data source directly `PR #919 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/919>`__
+
 
 PyPSA-Earth 0.2.3
 =================