Skip to content

Commit

Permalink
trasnfer from bpstewar:gostrocks
Browse files Browse the repository at this point in the history
  • Loading branch information
andresfchamorro committed Feb 23, 2024
1 parent eec4151 commit d07f0c8
Show file tree
Hide file tree
Showing 40 changed files with 10,639 additions and 2,047 deletions.
253 changes: 9 additions & 244 deletions README.md

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions build_commands.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Commit to github

# Installation testing
conda create -n urban_test --file .\requirements.txt -c conda-forge
conda activate urban_test
conda install ipykernel
python -m ipykernel install --user --name=urban_test
pip install --no-build-isolation --no-deps .
346 changes: 346 additions & 0 deletions notebooks/AWS_Summarize.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,346 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Summarize files and folders in AWS\n",
"\n",
"This notebook provides an example of how to list files in an AWS bucket (including a prefix search) and group according to folder definitions"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import sys, os, importlib\n",
"import rasterio, geojson, h3, boto3\n",
"\n",
"import pandas as pd\n",
"import geopandas as gpd\n",
"\n",
"from botocore.config import Config\n",
"from botocore import UNSIGNED"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Completed loop: 0\n",
"Completed loop: 1\n",
"Completed loop: 2\n",
"Completed loop: 3\n",
"Completed loop: 4\n",
"Completed loop: 5\n",
"Completed loop: 6\n",
"Completed loop: 7\n",
"Completed loop: 8\n",
"Completed loop: 9\n",
"Completed loop: 10\n",
"Completed loop: 11\n",
"Completed loop: 12\n",
"Completed loop: 13\n",
"Completed loop: 14\n",
"Completed loop: 15\n",
"Completed loop: 16\n",
"Completed loop: 17\n",
"Completed loop: 18\n",
"Completed loop: 19\n",
"Completed loop: 20\n",
"Completed loop: 21\n",
"Completed loop: 22\n",
"Completed loop: 23\n",
"Completed loop: 24\n",
"Completed loop: 25\n",
"Completed loop: 26\n",
"Completed loop: 27\n",
"Completed loop: 28\n",
"Completed loop: 29\n",
"Completed loop: 30\n",
"Completed loop: 31\n",
"Completed loop: 32\n",
"Completed loop: 33\n",
"Completed loop: 34\n",
"Completed loop: 35\n",
"Completed loop: 36\n",
"Completed loop: 37\n",
"Completed loop: 38\n",
"Completed loop: 39\n",
"Completed loop: 40\n",
"Completed loop: 41\n",
"Completed loop: 42\n",
"Completed loop: 43\n",
"Completed loop: 44\n",
"Completed loop: 45\n",
"Completed loop: 46\n",
"Completed loop: 47\n",
"Completed loop: 48\n",
"Completed loop: 49\n",
"Completed loop: 50\n",
"Completed loop: 51\n",
"Completed loop: 52\n",
"Completed loop: 53\n",
"Completed loop: 54\n",
"Completed loop: 55\n",
"Completed loop: 56\n",
"Completed loop: 57\n",
"Completed loop: 58\n",
"Completed loop: 59\n",
"Completed loop: 60\n",
"Completed loop: 61\n",
"Completed loop: 62\n",
"Completed loop: 63\n",
"Completed loop: 64\n",
"Completed loop: 65\n"
]
}
],
"source": [
"bucket = 'wbg-geography01' \n",
"prefix = 'sylvera'\n",
"region = 'us-east-1'\n",
"s3client = boto3.client('s3', region_name=region)\n",
"\n",
"# Loop through the S3 bucket and get all the file keys\n",
"more_results = True\n",
"try:\n",
" del(token)\n",
"except:\n",
" pass\n",
"loops = 0\n",
"\n",
"all_res = []\n",
"while more_results:\n",
" print(f\"Completed loop: {loops}\")\n",
" if loops > 0:\n",
" objects = s3client.list_objects_v2(Bucket=bucket, ContinuationToken=token, Prefix=prefix)\n",
" else:\n",
" objects = s3client.list_objects_v2(Bucket=bucket)\n",
" more_results = objects['IsTruncated']\n",
" if more_results:\n",
" token = objects['NextContinuationToken']\n",
" loops += 1\n",
" for res in objects['Contents']:\n",
" all_res.append(res)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Key</th>\n",
" <th>LastModified</th>\n",
" <th>ETag</th>\n",
" <th>Size</th>\n",
" <th>StorageClass</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>.DS_Store</td>\n",
" <td>2020-09-24 18:15:42+00:00</td>\n",
" <td>\"098b7ee247e2688d3c110358e95be940\"</td>\n",
" <td>24580</td>\n",
" <td>STANDARD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>AIS/</td>\n",
" <td>2021-06-17 16:09:49+00:00</td>\n",
" <td>\"bd752504006b667e1139c9383472e928\"</td>\n",
" <td>0</td>\n",
" <td>INTELLIGENT_TIERING</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>AIS_outputs/205762000.geojson</td>\n",
" <td>2021-11-30 23:13:40+00:00</td>\n",
" <td>\"36406a0f5b1d20b5f734ef775feb8b39\"</td>\n",
" <td>630475</td>\n",
" <td>STANDARD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>AIS_outputs/219833000.geojson</td>\n",
" <td>2021-11-30 23:13:40+00:00</td>\n",
" <td>\"e049985ba5d6327f1aafb33e8e92707a\"</td>\n",
" <td>355694</td>\n",
" <td>STANDARD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>AIS_outputs/229380000.geojson</td>\n",
" <td>2021-11-30 23:13:40+00:00</td>\n",
" <td>\"fb6c2da55db0cf1e7022d7d04c8de236\"</td>\n",
" <td>539934</td>\n",
" <td>STANDARD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65246</th>\n",
" <td>sylvera/MSL/0007MZ/UAV-LS/GIL06/L3A/0007MZ_GIL...</td>\n",
" <td>2022-12-27 12:13:45+00:00</td>\n",
" <td>\"cc1d2ab954a43f1fda2902eb547703aa-4\"</td>\n",
" <td>28143740</td>\n",
" <td>STANDARD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65247</th>\n",
" <td>sylvera/MSL/0007MZ/UAV-LS/GIL06/L3A/0007MZ_GIL...</td>\n",
" <td>2022-12-27 12:13:44+00:00</td>\n",
" <td>\"9fbaf7579c5c7ec7cc263bc161da743c-2\"</td>\n",
" <td>14073598</td>\n",
" <td>STANDARD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65248</th>\n",
" <td>sylvera/MSL/0007MZ/UAV-LS/GIL06/L3A/0007MZ_GIL...</td>\n",
" <td>2022-12-27 12:13:46+00:00</td>\n",
" <td>\"9dfcc537eabdc7c09ea4fd687c024787\"</td>\n",
" <td>1972843</td>\n",
" <td>STANDARD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65249</th>\n",
" <td>sylvera/MSL/0007MZ/UAV-LS/GIL06/L3B/0007MZ_GIL...</td>\n",
" <td>2022-12-27 13:16:38+00:00</td>\n",
" <td>\"0c2ad1ebb1ed02bf783a042d6bcc3308-84\"</td>\n",
" <td>703036683</td>\n",
" <td>STANDARD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65250</th>\n",
" <td>sylvera/MSL/0007MZ/UAV-LS/GIL06/L4/0007MZ_GIL0...</td>\n",
" <td>2022-12-27 18:16:51+00:00</td>\n",
" <td>\"44a8c09c25d43c499568b28b85b1c0fe\"</td>\n",
" <td>564814</td>\n",
" <td>STANDARD</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>65251 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" Key \\\n",
"0 .DS_Store \n",
"1 AIS/ \n",
"2 AIS_outputs/205762000.geojson \n",
"3 AIS_outputs/219833000.geojson \n",
"4 AIS_outputs/229380000.geojson \n",
"... ... \n",
"65246 sylvera/MSL/0007MZ/UAV-LS/GIL06/L3A/0007MZ_GIL... \n",
"65247 sylvera/MSL/0007MZ/UAV-LS/GIL06/L3A/0007MZ_GIL... \n",
"65248 sylvera/MSL/0007MZ/UAV-LS/GIL06/L3A/0007MZ_GIL... \n",
"65249 sylvera/MSL/0007MZ/UAV-LS/GIL06/L3B/0007MZ_GIL... \n",
"65250 sylvera/MSL/0007MZ/UAV-LS/GIL06/L4/0007MZ_GIL0... \n",
"\n",
" LastModified ETag \\\n",
"0 2020-09-24 18:15:42+00:00 \"098b7ee247e2688d3c110358e95be940\" \n",
"1 2021-06-17 16:09:49+00:00 \"bd752504006b667e1139c9383472e928\" \n",
"2 2021-11-30 23:13:40+00:00 \"36406a0f5b1d20b5f734ef775feb8b39\" \n",
"3 2021-11-30 23:13:40+00:00 \"e049985ba5d6327f1aafb33e8e92707a\" \n",
"4 2021-11-30 23:13:40+00:00 \"fb6c2da55db0cf1e7022d7d04c8de236\" \n",
"... ... ... \n",
"65246 2022-12-27 12:13:45+00:00 \"cc1d2ab954a43f1fda2902eb547703aa-4\" \n",
"65247 2022-12-27 12:13:44+00:00 \"9fbaf7579c5c7ec7cc263bc161da743c-2\" \n",
"65248 2022-12-27 12:13:46+00:00 \"9dfcc537eabdc7c09ea4fd687c024787\" \n",
"65249 2022-12-27 13:16:38+00:00 \"0c2ad1ebb1ed02bf783a042d6bcc3308-84\" \n",
"65250 2022-12-27 18:16:51+00:00 \"44a8c09c25d43c499568b28b85b1c0fe\" \n",
"\n",
" Size StorageClass \n",
"0 24580 STANDARD \n",
"1 0 INTELLIGENT_TIERING \n",
"2 630475 STANDARD \n",
"3 355694 STANDARD \n",
"4 539934 STANDARD \n",
"... ... ... \n",
"65246 28143740 STANDARD \n",
"65247 14073598 STANDARD \n",
"65248 1972843 STANDARD \n",
"65249 703036683 STANDARD \n",
"65250 564814 STANDARD \n",
"\n",
"[65251 rows x 5 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"inD = pd.DataFrame(all_res)\n",
"inD['folder'] = inD['Key'].apply(lambda x: \"_\".join(x.split(\"/\")[:])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Earth Engine",
"language": "python",
"name": "ee"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit d07f0c8

Please sign in to comment.