diff --git a/.ipynb_checkpoints/get_gedi_data-checkpoint.py b/.ipynb_checkpoints/get_gedi_data-checkpoint.py index 13606fb6013507f1221ebd448abcd2141499673c..95d42c9805a62e38b5cac3afe23a7d0b8d031818 100644 --- a/.ipynb_checkpoints/get_gedi_data-checkpoint.py +++ b/.ipynb_checkpoints/get_gedi_data-checkpoint.py @@ -12,6 +12,7 @@ import os def lpdaac_gedi_https_to_s3(url): dir_comps = url.split("/") + print(dir_comps) return f"s3://lp-prod-protected/{dir_comps[6]}/{dir_comps[8].strip('.h5')}/{dir_comps[8]}" def get_gedi_data(url): diff --git a/.ipynb_checkpoints/run-checkpoint.sh b/.ipynb_checkpoints/run-checkpoint.sh index b2a38f4414072ad3d88c9f3baef48200b4f4559b..dd83744d2320c6e2ee22765d3c3f98115859663e 100644 --- a/.ipynb_checkpoints/run-checkpoint.sh +++ b/.ipynb_checkpoints/run-checkpoint.sh @@ -1,4 +1,6 @@ -source activate osgeo-env +# source activate osgeo-env +source activate /projects/env/osgeo-env +# conda activate /projects/env/osgeo-env # Get current location of build script basedir=$( cd "$(dirname "$0")" ; pwd -P ) diff --git a/__pycache__/get_gedi_data.cpython-312.pyc b/__pycache__/get_gedi_data.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..488a1d2a2de7587e8e14be6fd7355f550c902bba Binary files /dev/null and b/__pycache__/get_gedi_data.cpython-312.pyc differ diff --git a/get_gedi_data.py b/get_gedi_data.py index 13606fb6013507f1221ebd448abcd2141499673c..95d42c9805a62e38b5cac3afe23a7d0b8d031818 100644 --- a/get_gedi_data.py +++ b/get_gedi_data.py @@ -12,6 +12,7 @@ import os def lpdaac_gedi_https_to_s3(url): dir_comps = url.split("/") + print(dir_comps) return f"s3://lp-prod-protected/{dir_comps[6]}/{dir_comps[8].strip('.h5')}/{dir_comps[8]}" def get_gedi_data(url): diff --git a/notebooks/.ipynb_checkpoints/access-gedi-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/access-gedi-checkpoint.ipynb index bff62e83cd78fe4f074faf8d21b1be3b9094b230..7babf4ec348cd6787b2a269f4a8c764531738e53 100644 --- a/notebooks/.ipynb_checkpoints/access-gedi-checkpoint.ipynb +++ b/notebooks/.ipynb_checkpoints/access-gedi-checkpoint.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "id": "9803c6a0-d547-4112-8edc-eb62680360e2", "metadata": {}, "outputs": [], @@ -12,49 +12,31 @@ "from maap.maap import MAAP\n", "import re\n", "# maap = MAAP(maap_host='api.maap-project.org')\n", - "maap = MAAP(maap_host='api.maap-project.org')" + "maap = MAAP(maap_host='api.maap-project.org')\n", + "import time" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 2, "id": "96d5cdce-a2cf-4ec5-8671-f9edd500ab8b", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "<Response [200]>" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "#maap.register_algorithm_from_yaml_file(\"/projects/arojas_download_gedi_data.yml\")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "id": "54d3df6a-e22a-4f15-ab22-859df5d4b367", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "on file num: 400\r" - ] - } - ], + "outputs": [], "source": [ "# Change to appropriate filepaths for urls\n", "# l1b_urls_fpath = \"/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt\"\n", @@ -70,7 +52,7 @@ "# Get matching string pattern and run main.py\n", "jobs_list = []\n", "counter=1\n", - "for l1b_fp in l1b_fpaths:\n", + "for l1b_fp in l1b_fpaths[:10]:\n", " # Get string pattern\n", " str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n", " try:\n", @@ -84,20 +66,58 @@ " algo_id=\"arojas_download_gedi_data\",\n", " version=\"master\",\n", " username=\"arojearthdata\",\n", - " queue=\"maap-dps-worker-8gb\",\n", + " queue=\"maap-dps-worker-16gb\",\n", " L1B_URL=l1b_fp,\n", " L2A_URL=l2a_fp)\n", "\n", " jobs_list.append(job)\n", - " \n", " if counter%100==0:\n", " print(\"on file num: \", counter, end='\\r')\n", + " time.sleep(900)\n", " counter+=1" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 16, + "id": "d71d17cf-9a79-4508-9047-dade9b64201b", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI01_B.002/2020.08.31/GEDI01_B_2020244155754_O09740_04_T06708_02_005_01_V002.h5\n", + "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_A.002/2020.08.31/GEDI02_A_2020244155754_O09740_04_T06708_02_003_01_V002.h5\n" + ] + } + ], + "source": [ + "for l1b_fp in l1b_fpaths[:10]:\n", + " \n", + " # Get string pattern\n", + " str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n", + " l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]\n", + " print(l1b_fp)\n", + " print(l2a_fp)\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1dce543-f916-4561-990b-ae8c3fdb7ad7", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, "id": "d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d", "metadata": {}, "outputs": [ @@ -105,31 +125,51 @@ "name": "stdout", "output_type": "stream", "text": [ - "Running\n", - "9efde8c0-b4aa-4d4b-9972-55b72be802a6\n", - "Running\n", - "d101a1b9-c44e-44e8-931a-6fe9f436e03d\n", - "Running\n", - "acc12502-5cfb-4f05-b05b-2f57657d2f53\n", - "Running\n", - "126548ea-2fd0-434a-b040-66a0a45d8f9c\n", - "Running\n", - "325641a2-f0f5-40a7-8904-c023d71a7b55\n", - "Running\n", - "a9039b0e-f678-403a-8ff2-576adb138199\n", - "Accepted\n", - "a99753e5-3ceb-4b25-b1e5-e1c154d2235d\n", - "Accepted\n", - "b985c8ff-77ae-41c9-8b00-d1e9ee0cc717\n", - "Accepted\n", - "d70e4917-323f-48e8-976d-6c94b08277a2\n", - "Accepted\n", - "0fe1d4f4-cb0c-41ae-99ad-0e9d28f7e695\n" + "Succeeded\n", + "f70b1586-e3ac-4155-a73e-49f5386c7e2c\n", + "Succeeded\n", + "10279c23-3225-460e-8cf9-6364fa0ec2a0\n", + "Succeeded\n", + "fe212a29-9da4-4a72-a02d-94a37750e1fa\n", + "Succeeded\n", + "7855a6a3-0416-41f6-b546-11eab9097adc\n", + "Succeeded\n", + "8c343834-6622-404e-9b83-d4dd771a151f\n", + "Succeeded\n", + "cc412cd5-1491-4dad-bc19-63ae70f9b6af\n", + "Succeeded\n", + "86b301cf-5b37-43af-b2f5-cb51d5f6b1c3\n", + "Succeeded\n", + "ec0e4005-b511-4d93-8fac-a0ff34682b1f\n", + "Succeeded\n", + "30f06fb8-aab6-4e74-aba4-e7a5d76ac100\n", + "Succeeded\n", + "30c44930-66fa-4215-b877-517a1e6472b1\n", + "Succeeded\n", + "c1e9fd05-241a-4bea-8a85-3fc8992cbf67\n", + "Succeeded\n", + "984eab57-449c-4fce-b490-b6ef05cd80f2\n", + "Succeeded\n", + "da048416-13f5-4a00-b7e0-76bff20bfa12\n", + "Succeeded\n", + "cc8d47fc-7615-4ba2-adbc-276b5f45c75a\n", + "Succeeded\n", + "c42d1afe-3d55-40d3-af4a-8ed98bc98492\n", + "Succeeded\n", + "e39ebed1-9694-4dfb-bafb-5249c7fca41e\n", + "Succeeded\n", + "656d0e68-ab9e-42a2-95b4-7fa45503cd8a\n", + "Succeeded\n", + "5c36624d-c33b-4756-9436-cac71ebb7ae9\n", + "Succeeded\n", + "1a5b59c1-c26e-4028-b6d9-54b7676f3c67\n", + "Succeeded\n", + "c2f3e181-22c5-460c-a4ac-d2fd2694f96e\n" ] } ], "source": [ - "for job in jobs_list[:10]:\n", + "for job in jobs_list[:20]:\n", " print(job.retrieve_status())\n", " print(job.id)" ] @@ -144,7 +184,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 17, "id": "89d3a568-4801-4168-891f-04c65091d336", "metadata": {}, "outputs": [], @@ -158,22 +198,24 @@ " if file.endswith(\".h5\"):\n", " # fp = os.path.join(subdir, file)\n", " # print(fp)\n", - " file_list.append(file)" + " file_list.append(os.path.join(subdir, file))" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "55675d70-413e-4f1e-8666-3bb4f38e3359", - "metadata": {}, + "execution_count": 24, + "id": "7dde8135-d6b6-43ce-b2c4-fd8d0cda4077", + "metadata": { + "tags": [] + }, "outputs": [ { "data": { "text/plain": [ - "74" + "918" ] }, - "execution_count": 9, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -285,7 +327,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "7c980c00-9823-4043-b235-4a837b8b92fa", "metadata": {}, "outputs": [], diff --git a/notebooks/access-gedi.ipynb b/notebooks/access-gedi.ipynb index 2dd5deaec654c95b0c8e59505a34358ffc3c2109..df842d8d972d4bdcad11cad440b9c74e62dc36d9 100644 --- a/notebooks/access-gedi.ipynb +++ b/notebooks/access-gedi.ipynb @@ -12,7 +12,8 @@ "from maap.maap import MAAP\n", "import re\n", "# maap = MAAP(maap_host='api.maap-project.org')\n", - "maap = MAAP(maap_host='api.maap-project.org')" + "maap = MAAP(maap_host='api.maap-project.org')\n", + "import time" ] }, { @@ -29,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "id": "54d3df6a-e22a-4f15-ab22-859df5d4b367", "metadata": { "scrolled": true, @@ -70,15 +71,53 @@ " L2A_URL=l2a_fp)\n", "\n", " jobs_list.append(job)\n", - " \n", " if counter%100==0:\n", " print(\"on file num: \", counter, end='\\r')\n", + " time.sleep(900)\n", " counter+=1" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 16, + "id": "d71d17cf-9a79-4508-9047-dade9b64201b", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI01_B.002/2020.08.31/GEDI01_B_2020244155754_O09740_04_T06708_02_005_01_V002.h5\n", + "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_A.002/2020.08.31/GEDI02_A_2020244155754_O09740_04_T06708_02_003_01_V002.h5\n" + ] + } + ], + "source": [ + "for l1b_fp in l1b_fpaths[:10]:\n", + " \n", + " # Get string pattern\n", + " str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n", + " l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]\n", + " print(l1b_fp)\n", + " print(l2a_fp)\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1dce543-f916-4561-990b-ae8c3fdb7ad7", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, "id": "d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d", "metadata": {}, "outputs": [ @@ -86,31 +125,51 @@ "name": "stdout", "output_type": "stream", "text": [ - "Accepted\n", - "54dc2f4c-eb12-4720-a83b-5b2e3548a94f\n", - "Accepted\n", - "6cc81082-0bc8-4d31-b295-62c4135b9b4b\n", - "Accepted\n", - "1b7bf749-78f6-49ee-9e84-71783e8d5449\n", - "Accepted\n", - "6e09b33b-8159-4449-9654-ccf29e6b6486\n", - "Accepted\n", - "774d6139-212f-4841-8f1b-13a5eadda4bc\n", - "Accepted\n", - "bbdb0764-3dc2-458c-aa8d-2151e03125c5\n", - "Accepted\n", - "1c5faed3-2c03-4260-ac4e-7478007c0b0d\n", - "Accepted\n", - "c92b2edf-2c1b-4b76-8e4d-d18634b7603d\n", - "Accepted\n", - "67ed03d8-7d9a-41a7-8858-55f58012a00e\n", - "Accepted\n", - "85c052ca-1269-4335-9559-63b2475a069f\n" + "Succeeded\n", + "f70b1586-e3ac-4155-a73e-49f5386c7e2c\n", + "Succeeded\n", + "10279c23-3225-460e-8cf9-6364fa0ec2a0\n", + "Succeeded\n", + "fe212a29-9da4-4a72-a02d-94a37750e1fa\n", + "Succeeded\n", + "7855a6a3-0416-41f6-b546-11eab9097adc\n", + "Succeeded\n", + "8c343834-6622-404e-9b83-d4dd771a151f\n", + "Succeeded\n", + "cc412cd5-1491-4dad-bc19-63ae70f9b6af\n", + "Succeeded\n", + "86b301cf-5b37-43af-b2f5-cb51d5f6b1c3\n", + "Succeeded\n", + "ec0e4005-b511-4d93-8fac-a0ff34682b1f\n", + "Succeeded\n", + "30f06fb8-aab6-4e74-aba4-e7a5d76ac100\n", + "Succeeded\n", + "30c44930-66fa-4215-b877-517a1e6472b1\n", + "Succeeded\n", + "c1e9fd05-241a-4bea-8a85-3fc8992cbf67\n", + "Succeeded\n", + "984eab57-449c-4fce-b490-b6ef05cd80f2\n", + "Succeeded\n", + "da048416-13f5-4a00-b7e0-76bff20bfa12\n", + "Succeeded\n", + "cc8d47fc-7615-4ba2-adbc-276b5f45c75a\n", + "Succeeded\n", + "c42d1afe-3d55-40d3-af4a-8ed98bc98492\n", + "Succeeded\n", + "e39ebed1-9694-4dfb-bafb-5249c7fca41e\n", + "Succeeded\n", + "656d0e68-ab9e-42a2-95b4-7fa45503cd8a\n", + "Succeeded\n", + "5c36624d-c33b-4756-9436-cac71ebb7ae9\n", + "Succeeded\n", + "1a5b59c1-c26e-4028-b6d9-54b7676f3c67\n", + "Succeeded\n", + "c2f3e181-22c5-460c-a4ac-d2fd2694f96e\n" ] } ], "source": [ - "for job in jobs_list[:10]:\n", + "for job in jobs_list[:20]:\n", " print(job.retrieve_status())\n", " print(job.id)" ] @@ -125,7 +184,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 31, "id": "89d3a568-4801-4168-891f-04c65091d336", "metadata": {}, "outputs": [], @@ -139,22 +198,24 @@ " if file.endswith(\".h5\"):\n", " # fp = os.path.join(subdir, file)\n", " # print(fp)\n", - " file_list.append(file)" + " file_list.append(os.path.join(subdir, file))" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "55675d70-413e-4f1e-8666-3bb4f38e3359", - "metadata": {}, + "execution_count": 32, + "id": "7dde8135-d6b6-43ce-b2c4-fd8d0cda4077", + "metadata": { + "tags": [] + }, "outputs": [ { "data": { "text/plain": [ - "74" + "0" ] }, - "execution_count": 9, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -256,7 +317,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "35ff40f2-65f8-414d-a53f-e8e0f7487557", "metadata": {}, "outputs": [], @@ -266,7 +327,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "7c980c00-9823-4043-b235-4a837b8b92fa", "metadata": {}, "outputs": [], @@ -283,7 +344,7 @@ "outputs": [], "source": [ "outdir = \"/projects/my-private-bucket/GEDI/raw/SouthAmerica\"\n", - "for fp in csv_list:\n", + "for fp in file_list:\n", " basename = os.path.basename(fp)\n", " if \"GEDI01_B\" in basename:\n", " outfp = os.path.join(outdir, \"L1B\", basename)\n", diff --git a/run.sh b/run.sh index b2a38f4414072ad3d88c9f3baef48200b4f4559b..dd83744d2320c6e2ee22765d3c3f98115859663e 100644 --- a/run.sh +++ b/run.sh @@ -1,4 +1,6 @@ -source activate osgeo-env +# source activate osgeo-env +source activate /projects/env/osgeo-env +# conda activate /projects/env/osgeo-env # Get current location of build script basedir=$( cd "$(dirname "$0")" ; pwd -P )