diff --git a/.ipynb_checkpoints/build-env-checkpoint.sh b/.ipynb_checkpoints/build-env-checkpoint.sh new file mode 100644 index 0000000000000000000000000000000000000000..ad880ad972ba1ad9be70a08f6ac940c2f6fb8456 --- /dev/null +++ b/.ipynb_checkpoints/build-env-checkpoint.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# source activate base +basedir=$( cd "$(dirname "$0")" ; pwd -P) +echo installing environment... +mamba env create --name osgeo-env -f ${basedir}/environment.yml + +# Install the maap.py environment +echo trying to install maap-py... +source activate osgeo-env +git clone --single-branch --branch v3.0.1 https://github.com/MAAP-Project/maap-py.git +cd maap-py +pip install -e . +echo installed maap-py package! \ No newline at end of file diff --git a/.ipynb_checkpoints/environment-checkpoint.yml b/.ipynb_checkpoints/environment-checkpoint.yml new file mode 100644 index 0000000000000000000000000000000000000000..158209d75b5f92e09178deb8654829f253d97149 --- /dev/null +++ b/.ipynb_checkpoints/environment-checkpoint.yml @@ -0,0 +1,16 @@ +name: osgeo-env +channels: + - conda-forge + - defaults +dependencies: + - boto3 + - botocore + - fsspec + - s3fs + - geopandas + - geopandas-base + - h5py + - numpy + - pandas + - python + - scipy diff --git a/.ipynb_checkpoints/run-checkpoint.sh b/.ipynb_checkpoints/run-checkpoint.sh new file mode 100644 index 0000000000000000000000000000000000000000..b2a38f4414072ad3d88c9f3baef48200b4f4559b --- /dev/null +++ b/.ipynb_checkpoints/run-checkpoint.sh @@ -0,0 +1,20 @@ +source activate osgeo-env + +# Get current location of build script +basedir=$( cd "$(dirname "$0")" ; pwd -P ) + +# Create output directory to store outputs. +# The name is output as required by the DPS. +# Note how we dont provide an absolute path +# but instead a relative one as the DPS creates +# a temp working directory for our code. + +mkdir -p output + +# INPUT_FILES +L1B_URL=$1 +L2A_URL=$2 # e.g. GEDI01_B or GEDI02_A + +# Call the script using the absolute paths +# Any output written to the stdout and stderr streams will be automatically captured and placed in the output dir +python ${basedir}/main.py ${L1B_URL} ${L2A_URL} output \ No newline at end of file diff --git a/notebooks/.ipynb_checkpoints/access-gedi-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/access-gedi-checkpoint.ipynb index 363fcab7ed6e9634e198cf5555ceb88932c9a245..bff62e83cd78fe4f074faf8d21b1be3b9094b230 100644 --- a/notebooks/.ipynb_checkpoints/access-gedi-checkpoint.ipynb +++ b/notebooks/.ipynb_checkpoints/access-gedi-checkpoint.ipynb @@ -1,6 +1,358 @@ { - "cells": [], - "metadata": {}, + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "id": "9803c6a0-d547-4112-8edc-eb62680360e2", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import re\n", + "from maap.maap import MAAP\n", + "import re\n", + "# maap = MAAP(maap_host='api.maap-project.org')\n", + "maap = MAAP(maap_host='api.maap-project.org')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "96d5cdce-a2cf-4ec5-8671-f9edd500ab8b", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "<Response [200]>" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#maap.register_algorithm_from_yaml_file(\"/projects/arojas_download_gedi_data.yml\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "54d3df6a-e22a-4f15-ab22-859df5d4b367", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "on file num: 400\r" + ] + } + ], + "source": [ + "# Change to appropriate filepaths for urls\n", + "# l1b_urls_fpath = \"/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt\"\n", + "# l2a_urls_fpath = \"/projects/biomass-gedi-conus/data/GEDI-L2A-2022-URLS.txt\"\n", + "l1b_urls_fpath = \"./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt\"\n", + "l2a_urls_fpath = \"./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt\"\n", + "\n", + "with open(l1b_urls_fpath) as f:\n", + " l1b_fpaths = f.read().splitlines()\n", + "with open(l2a_urls_fpath) as f:\n", + " l2a_fpaths = f.read().splitlines()\n", + "\n", + "# Get matching string pattern and run main.py\n", + "jobs_list = []\n", + "counter=1\n", + "for l1b_fp in l1b_fpaths:\n", + " # Get string pattern\n", + " str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n", + " try:\n", + " l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]\n", + " except:\n", + " print(\"No matching L2 file!\", l1b_fp)\n", + " continue\n", + " # print(l1b_fp)\n", + " # print(l2a_fp)\n", + " job = maap.submitJob(identifier=\"SouthAmericaGEDI-run-20240117\",\n", + " algo_id=\"arojas_download_gedi_data\",\n", + " version=\"master\",\n", + " username=\"arojearthdata\",\n", + " queue=\"maap-dps-worker-8gb\",\n", + " L1B_URL=l1b_fp,\n", + " L2A_URL=l2a_fp)\n", + "\n", + " jobs_list.append(job)\n", + " \n", + " if counter%100==0:\n", + " print(\"on file num: \", counter, end='\\r')\n", + " counter+=1" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running\n", + "9efde8c0-b4aa-4d4b-9972-55b72be802a6\n", + "Running\n", + "d101a1b9-c44e-44e8-931a-6fe9f436e03d\n", + "Running\n", + "acc12502-5cfb-4f05-b05b-2f57657d2f53\n", + "Running\n", + "126548ea-2fd0-434a-b040-66a0a45d8f9c\n", + "Running\n", + "325641a2-f0f5-40a7-8904-c023d71a7b55\n", + "Running\n", + "a9039b0e-f678-403a-8ff2-576adb138199\n", + "Accepted\n", + "a99753e5-3ceb-4b25-b1e5-e1c154d2235d\n", + "Accepted\n", + "b985c8ff-77ae-41c9-8b00-d1e9ee0cc717\n", + "Accepted\n", + "d70e4917-323f-48e8-976d-6c94b08277a2\n", + "Accepted\n", + "0fe1d4f4-cb0c-41ae-99ad-0e9d28f7e695\n" + ] + } + ], + "source": [ + "for job in jobs_list[:10]:\n", + " print(job.retrieve_status())\n", + " print(job.id)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "e5f0edb0-ffe4-4f8e-bb4f-d51686ee47b4", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "89d3a568-4801-4168-891f-04c65091d336", + "metadata": {}, + "outputs": [], + "source": [ + "# Some files didnt download, lets check\n", + "import glob\n", + "indir = \"/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master\"\n", + "file_list = []\n", + "for subdir, dirs, files in os.walk(indir):\n", + " for file in files:\n", + " if file.endswith(\".h5\"):\n", + " # fp = os.path.join(subdir, file)\n", + " # print(fp)\n", + " file_list.append(file)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "55675d70-413e-4f1e-8666-3bb4f38e3359", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "74" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(file_list)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7513ce41-f4ea-44e2-9d76-ea3b857c8069", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "cd11283a-63f1-4893-828e-6cd72f646ef1", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "starting loop\n", + "Doneile num: 400\n" + ] + } + ], + "source": [ + "# rerun and get missed files\n", + "\n", + "# Get all CSV files\n", + "indir = \"/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master\"\n", + "file_list = []\n", + "for subdir, dirs, files in os.walk(indir):\n", + " for file in files:\n", + " if file.endswith(\".h5\"):\n", + " # fp = os.path.join(subdir, file)\n", + " # print(fp)\n", + " file_list.append(file)\n", + "\n", + "\n", + "# Change to appropriate filepaths for urls\n", + "l1b_urls_fpath = \"./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt\"\n", + "l2a_urls_fpath = \"./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt\"\n", + "\n", + "with open(l1b_urls_fpath) as f:\n", + " l1b_fpaths = f.readlines()\n", + "with open(l2a_urls_fpath) as f:\n", + " l2a_fpaths = f.readlines()\n", + "print(\"starting loop\")\n", + "# Get matching string pattern and run main.py\n", + "jobs_list = []\n", + "counter=1\n", + "for l1b_fp in l1b_fpaths:\n", + " # Get string pattern\n", + " str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n", + " try:\n", + " l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]\n", + " except:\n", + " print(\"No matching L2 file!\", l1b_fp)\n", + " continue\n", + " \n", + " # Check if file was already downloaded\n", + " date_str = os.path.basename(l1b_fp).split(\"_\")[2]\n", + " if any(date_str in x for x in file_list):\n", + " continue\n", + " \n", + " # Submit job\n", + " job = maap.submitJob(identifier=\"SouthAmericaGEDI\",\n", + " algo_id=\"arojas_download_gedi_data\",\n", + " version=\"master\",\n", + " username=\"arojearthdata\",\n", + " queue=\"maap-dps-worker-8gb\",\n", + " L1B_URL=l1b_fp,\n", + " L2A_URL=l2a_fp)\n", + " jobs_list.append(job)\n", + " \n", + " if counter%100==0:\n", + " print(\"on file num: \", counter, end='\\r')\n", + " counter+=1\n", + "\n", + "print('Done')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f17583e-20da-4dea-998c-9e0139700400", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35ff40f2-65f8-414d-a53f-e8e0f7487557", + "metadata": {}, + "outputs": [], + "source": [ + "## Condense files into new single folder" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c980c00-9823-4043-b235-4a837b8b92fa", + "metadata": {}, + "outputs": [], + "source": [ + "import shutil\n", + "# shutil.move(\"path/to/current/file.foo\", \"path/to/new/destination/for/file.foo\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d80ac20-b71e-486f-b153-6d19c3fd1fc2", + "metadata": {}, + "outputs": [], + "source": [ + "outdir = \"/projects/my-private-bucket/GEDI/raw/SouthAmerica\"\n", + "for fp in csv_list:\n", + " basename = os.path.basename(fp)\n", + " if \"GEDI01_B\" in basename:\n", + " outfp = os.path.join(outdir, \"L1B\", basename)\n", + " elif \"GEDI02_A\" in basename:\n", + " outfp = os.path.join(outdir, \"L2A\", basename)\n", + " shutil.move(fp, outfp)\n", + "print(\"DONE\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c3f7b66-e482-427f-85ba-19c6d2786e16", + "metadata": {}, + "outputs": [], + "source": [ + "## Check all files in new folder!\n", + "indir = \"/projects/my-private-bucket/GEDI/biomass/2022\"\n", + "csv_list = glob.glob(os.path.join(indir, \"*\"))\n", + "print(len(csv_list))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f63c64c5-4688-43f4-8139-bfc68236d4f4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, "nbformat": 4, "nbformat_minor": 5 } diff --git a/notebooks/access-gedi.ipynb b/notebooks/access-gedi.ipynb index b1b0b1f4ea572af07086326b781e4cc7cdd71495..2d0033be6e28c7078da1d5ee5aafd455183ce3c9 100644 --- a/notebooks/access-gedi.ipynb +++ b/notebooks/access-gedi.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "id": "9803c6a0-d547-4112-8edc-eb62680360e2", "metadata": {}, "outputs": [], @@ -17,21 +17,36 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "54d3df6a-e22a-4f15-ab22-859df5d4b367", + "execution_count": 9, + "id": "96d5cdce-a2cf-4ec5-8671-f9edd500ab8b", "metadata": { - "scrolled": true, "tags": [] }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "on file num: 400\r" - ] + "data": { + "text/plain": [ + "<Response [200]>" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" } ], + "source": [ + "#maap.register_algorithm_from_yaml_file(\"/projects/arojas_download_gedi_data.yml\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "54d3df6a-e22a-4f15-ab22-859df5d4b367", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], "source": [ "# Change to appropriate filepaths for urls\n", "# l1b_urls_fpath = \"/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt\"\n", @@ -47,7 +62,7 @@ "# Get matching string pattern and run main.py\n", "jobs_list = []\n", "counter=1\n", - "for l1b_fp in l1b_fpaths:\n", + "for l1b_fp in l1b_fpaths[:10]:\n", " # Get string pattern\n", " str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n", " try:\n", @@ -57,7 +72,7 @@ " continue\n", " # print(l1b_fp)\n", " # print(l2a_fp)\n", - " job = maap.submitJob(identifier=\"SouthAmericaGEDI\",\n", + " job = maap.submitJob(identifier=\"SouthAmericaGEDI-run-20240117\",\n", " algo_id=\"arojas_download_gedi_data\",\n", " version=\"master\",\n", " username=\"arojearthdata\",\n", @@ -74,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "id": "d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d", "metadata": {}, "outputs": [ @@ -82,16 +97,33 @@ "name": "stdout", "output_type": "stream", "text": [ - "Failed\n", - "353e658d-8bfa-4b7d-b508-24e8d42d3483\n" + "Accepted\n", + "84175819-3e25-4fee-a6c0-4e52b2dca88f\n", + "Accepted\n", + "7a478af7-e3e5-4206-a033-4fd23a1e3c81\n", + "Accepted\n", + "04d8a2dc-e54f-461d-8ace-39a381dd69d1\n", + "Accepted\n", + "c6bd0fc6-8f77-4f17-8cc1-4508232366d4\n", + "Accepted\n", + "630b5127-7b1f-4d2a-95a2-f571a6a32d6d\n", + "Accepted\n", + "747dbd20-d920-4658-aa0d-fe3c47838f49\n", + "Accepted\n", + "fa394b4b-0009-484f-af99-e96cfcc8d97b\n", + "Accepted\n", + "854aa24b-0861-42fa-9ff7-ae501791448b\n", + "Accepted\n", + "a28e565b-03a4-4c89-b4de-ad2c7d824fec\n", + "Accepted\n", + "3dc475c2-c87e-43a3-a876-1118815ff82f\n" ] } ], "source": [ - "for job in jobs_list:\n", + "for job in jobs_list[:10]:\n", " print(job.retrieve_status())\n", - " print(job.id)\n", - " break" + " print(job.id)" ] }, {