generated from worldbank/template
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
eec4151
commit d07f0c8
Showing
40 changed files
with
10,639 additions
and
2,047 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Commit to github | ||
|
||
# Installation testing | ||
conda create -n urban_test --file .\requirements.txt -c conda-forge | ||
conda activate urban_test | ||
conda install ipykernel | ||
python -m ipykernel install --user --name=urban_test | ||
pip install --no-build-isolation --no-deps . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,346 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Summarize files and folders in AWS\n", | ||
"\n", | ||
"This notebook provides an example of how to list files in an AWS bucket (including a prefix search) and group according to folder definitions" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import sys, os, importlib\n", | ||
"import rasterio, geojson, h3, boto3\n", | ||
"\n", | ||
"import pandas as pd\n", | ||
"import geopandas as gpd\n", | ||
"\n", | ||
"from botocore.config import Config\n", | ||
"from botocore import UNSIGNED" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"metadata": { | ||
"scrolled": true | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Completed loop: 0\n", | ||
"Completed loop: 1\n", | ||
"Completed loop: 2\n", | ||
"Completed loop: 3\n", | ||
"Completed loop: 4\n", | ||
"Completed loop: 5\n", | ||
"Completed loop: 6\n", | ||
"Completed loop: 7\n", | ||
"Completed loop: 8\n", | ||
"Completed loop: 9\n", | ||
"Completed loop: 10\n", | ||
"Completed loop: 11\n", | ||
"Completed loop: 12\n", | ||
"Completed loop: 13\n", | ||
"Completed loop: 14\n", | ||
"Completed loop: 15\n", | ||
"Completed loop: 16\n", | ||
"Completed loop: 17\n", | ||
"Completed loop: 18\n", | ||
"Completed loop: 19\n", | ||
"Completed loop: 20\n", | ||
"Completed loop: 21\n", | ||
"Completed loop: 22\n", | ||
"Completed loop: 23\n", | ||
"Completed loop: 24\n", | ||
"Completed loop: 25\n", | ||
"Completed loop: 26\n", | ||
"Completed loop: 27\n", | ||
"Completed loop: 28\n", | ||
"Completed loop: 29\n", | ||
"Completed loop: 30\n", | ||
"Completed loop: 31\n", | ||
"Completed loop: 32\n", | ||
"Completed loop: 33\n", | ||
"Completed loop: 34\n", | ||
"Completed loop: 35\n", | ||
"Completed loop: 36\n", | ||
"Completed loop: 37\n", | ||
"Completed loop: 38\n", | ||
"Completed loop: 39\n", | ||
"Completed loop: 40\n", | ||
"Completed loop: 41\n", | ||
"Completed loop: 42\n", | ||
"Completed loop: 43\n", | ||
"Completed loop: 44\n", | ||
"Completed loop: 45\n", | ||
"Completed loop: 46\n", | ||
"Completed loop: 47\n", | ||
"Completed loop: 48\n", | ||
"Completed loop: 49\n", | ||
"Completed loop: 50\n", | ||
"Completed loop: 51\n", | ||
"Completed loop: 52\n", | ||
"Completed loop: 53\n", | ||
"Completed loop: 54\n", | ||
"Completed loop: 55\n", | ||
"Completed loop: 56\n", | ||
"Completed loop: 57\n", | ||
"Completed loop: 58\n", | ||
"Completed loop: 59\n", | ||
"Completed loop: 60\n", | ||
"Completed loop: 61\n", | ||
"Completed loop: 62\n", | ||
"Completed loop: 63\n", | ||
"Completed loop: 64\n", | ||
"Completed loop: 65\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"bucket = 'wbg-geography01' \n", | ||
"prefix = 'sylvera'\n", | ||
"region = 'us-east-1'\n", | ||
"s3client = boto3.client('s3', region_name=region)\n", | ||
"\n", | ||
"# Loop through the S3 bucket and get all the file keys\n", | ||
"more_results = True\n", | ||
"try:\n", | ||
" del(token)\n", | ||
"except:\n", | ||
" pass\n", | ||
"loops = 0\n", | ||
"\n", | ||
"all_res = []\n", | ||
"while more_results:\n", | ||
" print(f\"Completed loop: {loops}\")\n", | ||
" if loops > 0:\n", | ||
" objects = s3client.list_objects_v2(Bucket=bucket, ContinuationToken=token, Prefix=prefix)\n", | ||
" else:\n", | ||
" objects = s3client.list_objects_v2(Bucket=bucket)\n", | ||
" more_results = objects['IsTruncated']\n", | ||
" if more_results:\n", | ||
" token = objects['NextContinuationToken']\n", | ||
" loops += 1\n", | ||
" for res in objects['Contents']:\n", | ||
" all_res.append(res)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<style scoped>\n", | ||
" .dataframe tbody tr th:only-of-type {\n", | ||
" vertical-align: middle;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe tbody tr th {\n", | ||
" vertical-align: top;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe thead th {\n", | ||
" text-align: right;\n", | ||
" }\n", | ||
"</style>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>Key</th>\n", | ||
" <th>LastModified</th>\n", | ||
" <th>ETag</th>\n", | ||
" <th>Size</th>\n", | ||
" <th>StorageClass</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>0</th>\n", | ||
" <td>.DS_Store</td>\n", | ||
" <td>2020-09-24 18:15:42+00:00</td>\n", | ||
" <td>\"098b7ee247e2688d3c110358e95be940\"</td>\n", | ||
" <td>24580</td>\n", | ||
" <td>STANDARD</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>1</th>\n", | ||
" <td>AIS/</td>\n", | ||
" <td>2021-06-17 16:09:49+00:00</td>\n", | ||
" <td>\"bd752504006b667e1139c9383472e928\"</td>\n", | ||
" <td>0</td>\n", | ||
" <td>INTELLIGENT_TIERING</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>2</th>\n", | ||
" <td>AIS_outputs/205762000.geojson</td>\n", | ||
" <td>2021-11-30 23:13:40+00:00</td>\n", | ||
" <td>\"36406a0f5b1d20b5f734ef775feb8b39\"</td>\n", | ||
" <td>630475</td>\n", | ||
" <td>STANDARD</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>3</th>\n", | ||
" <td>AIS_outputs/219833000.geojson</td>\n", | ||
" <td>2021-11-30 23:13:40+00:00</td>\n", | ||
" <td>\"e049985ba5d6327f1aafb33e8e92707a\"</td>\n", | ||
" <td>355694</td>\n", | ||
" <td>STANDARD</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>4</th>\n", | ||
" <td>AIS_outputs/229380000.geojson</td>\n", | ||
" <td>2021-11-30 23:13:40+00:00</td>\n", | ||
" <td>\"fb6c2da55db0cf1e7022d7d04c8de236\"</td>\n", | ||
" <td>539934</td>\n", | ||
" <td>STANDARD</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>...</th>\n", | ||
" <td>...</td>\n", | ||
" <td>...</td>\n", | ||
" <td>...</td>\n", | ||
" <td>...</td>\n", | ||
" <td>...</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>65246</th>\n", | ||
" <td>sylvera/MSL/0007MZ/UAV-LS/GIL06/L3A/0007MZ_GIL...</td>\n", | ||
" <td>2022-12-27 12:13:45+00:00</td>\n", | ||
" <td>\"cc1d2ab954a43f1fda2902eb547703aa-4\"</td>\n", | ||
" <td>28143740</td>\n", | ||
" <td>STANDARD</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>65247</th>\n", | ||
" <td>sylvera/MSL/0007MZ/UAV-LS/GIL06/L3A/0007MZ_GIL...</td>\n", | ||
" <td>2022-12-27 12:13:44+00:00</td>\n", | ||
" <td>\"9fbaf7579c5c7ec7cc263bc161da743c-2\"</td>\n", | ||
" <td>14073598</td>\n", | ||
" <td>STANDARD</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>65248</th>\n", | ||
" <td>sylvera/MSL/0007MZ/UAV-LS/GIL06/L3A/0007MZ_GIL...</td>\n", | ||
" <td>2022-12-27 12:13:46+00:00</td>\n", | ||
" <td>\"9dfcc537eabdc7c09ea4fd687c024787\"</td>\n", | ||
" <td>1972843</td>\n", | ||
" <td>STANDARD</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>65249</th>\n", | ||
" <td>sylvera/MSL/0007MZ/UAV-LS/GIL06/L3B/0007MZ_GIL...</td>\n", | ||
" <td>2022-12-27 13:16:38+00:00</td>\n", | ||
" <td>\"0c2ad1ebb1ed02bf783a042d6bcc3308-84\"</td>\n", | ||
" <td>703036683</td>\n", | ||
" <td>STANDARD</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>65250</th>\n", | ||
" <td>sylvera/MSL/0007MZ/UAV-LS/GIL06/L4/0007MZ_GIL0...</td>\n", | ||
" <td>2022-12-27 18:16:51+00:00</td>\n", | ||
" <td>\"44a8c09c25d43c499568b28b85b1c0fe\"</td>\n", | ||
" <td>564814</td>\n", | ||
" <td>STANDARD</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"<p>65251 rows × 5 columns</p>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" Key \\\n", | ||
"0 .DS_Store \n", | ||
"1 AIS/ \n", | ||
"2 AIS_outputs/205762000.geojson \n", | ||
"3 AIS_outputs/219833000.geojson \n", | ||
"4 AIS_outputs/229380000.geojson \n", | ||
"... ... \n", | ||
"65246 sylvera/MSL/0007MZ/UAV-LS/GIL06/L3A/0007MZ_GIL... \n", | ||
"65247 sylvera/MSL/0007MZ/UAV-LS/GIL06/L3A/0007MZ_GIL... \n", | ||
"65248 sylvera/MSL/0007MZ/UAV-LS/GIL06/L3A/0007MZ_GIL... \n", | ||
"65249 sylvera/MSL/0007MZ/UAV-LS/GIL06/L3B/0007MZ_GIL... \n", | ||
"65250 sylvera/MSL/0007MZ/UAV-LS/GIL06/L4/0007MZ_GIL0... \n", | ||
"\n", | ||
" LastModified ETag \\\n", | ||
"0 2020-09-24 18:15:42+00:00 \"098b7ee247e2688d3c110358e95be940\" \n", | ||
"1 2021-06-17 16:09:49+00:00 \"bd752504006b667e1139c9383472e928\" \n", | ||
"2 2021-11-30 23:13:40+00:00 \"36406a0f5b1d20b5f734ef775feb8b39\" \n", | ||
"3 2021-11-30 23:13:40+00:00 \"e049985ba5d6327f1aafb33e8e92707a\" \n", | ||
"4 2021-11-30 23:13:40+00:00 \"fb6c2da55db0cf1e7022d7d04c8de236\" \n", | ||
"... ... ... \n", | ||
"65246 2022-12-27 12:13:45+00:00 \"cc1d2ab954a43f1fda2902eb547703aa-4\" \n", | ||
"65247 2022-12-27 12:13:44+00:00 \"9fbaf7579c5c7ec7cc263bc161da743c-2\" \n", | ||
"65248 2022-12-27 12:13:46+00:00 \"9dfcc537eabdc7c09ea4fd687c024787\" \n", | ||
"65249 2022-12-27 13:16:38+00:00 \"0c2ad1ebb1ed02bf783a042d6bcc3308-84\" \n", | ||
"65250 2022-12-27 18:16:51+00:00 \"44a8c09c25d43c499568b28b85b1c0fe\" \n", | ||
"\n", | ||
" Size StorageClass \n", | ||
"0 24580 STANDARD \n", | ||
"1 0 INTELLIGENT_TIERING \n", | ||
"2 630475 STANDARD \n", | ||
"3 355694 STANDARD \n", | ||
"4 539934 STANDARD \n", | ||
"... ... ... \n", | ||
"65246 28143740 STANDARD \n", | ||
"65247 14073598 STANDARD \n", | ||
"65248 1972843 STANDARD \n", | ||
"65249 703036683 STANDARD \n", | ||
"65250 564814 STANDARD \n", | ||
"\n", | ||
"[65251 rows x 5 columns]" | ||
] | ||
}, | ||
"execution_count": 11, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"inD = pd.DataFrame(all_res)\n", | ||
"inD['folder'] = inD['Key'].apply(lambda x: \"_\".join(x.split(\"/\")[:])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Earth Engine", | ||
"language": "python", | ||
"name": "ee" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.4" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.