From 0e7907e66ad576c2bba29becb0598b296b22ccfd Mon Sep 17 00:00:00 2001 From: Alex Rojas <a.rojas8907@gmail.com> Date: Sat, 14 Sep 2024 09:49:44 -0700 Subject: [PATCH] updated to add print statements --- .../get_gedi_data-checkpoint.py | 1 + .ipynb_checkpoints/run-checkpoint.sh | 4 +- __pycache__/get_gedi_data.cpython-312.pyc | Bin 0 -> 1956 bytes get_gedi_data.py | 1 + .../access-gedi-checkpoint.ipynb | 158 +++++++++++------- notebooks/access-gedi.ipynb | 131 +++++++++++---- run.sh | 4 +- 7 files changed, 204 insertions(+), 95 deletions(-) create mode 100644 __pycache__/get_gedi_data.cpython-312.pyc diff --git a/.ipynb_checkpoints/get_gedi_data-checkpoint.py b/.ipynb_checkpoints/get_gedi_data-checkpoint.py index 13606fb..95d42c9 100644 --- a/.ipynb_checkpoints/get_gedi_data-checkpoint.py +++ b/.ipynb_checkpoints/get_gedi_data-checkpoint.py @@ -12,6 +12,7 @@ import os def lpdaac_gedi_https_to_s3(url): dir_comps = url.split("/") + print(dir_comps) return f"s3://lp-prod-protected/{dir_comps[6]}/{dir_comps[8].strip('.h5')}/{dir_comps[8]}" def get_gedi_data(url): diff --git a/.ipynb_checkpoints/run-checkpoint.sh b/.ipynb_checkpoints/run-checkpoint.sh index b2a38f4..dd83744 100644 --- a/.ipynb_checkpoints/run-checkpoint.sh +++ b/.ipynb_checkpoints/run-checkpoint.sh @@ -1,4 +1,6 @@ -source activate osgeo-env +# source activate osgeo-env +source activate /projects/env/osgeo-env +# conda activate /projects/env/osgeo-env # Get current location of build script basedir=$( cd "$(dirname "$0")" ; pwd -P ) diff --git a/__pycache__/get_gedi_data.cpython-312.pyc b/__pycache__/get_gedi_data.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..488a1d2a2de7587e8e14be6fd7355f550c902bba GIT binary patch literal 1956 zcmah~O>7%Q6rR~%uYcmyX`3bmlHv;0qOpbCP=TUKB~XZ(gi7TSQ)+E?H?dvYyWLr1 z64}T|m1v|&Y1KoFDnz(}(nGoRSm08Li!B$$Ou~hVLvN*tR4FIktg}fPD)B78H}9Ko z-_Fc?Z|ApA$d5pN9{78v#Uu2XIi7&;%Ek<sC8Q%A=TWL-aSFRWm*QN@r+C*2DZ#a3 zN(9U0rGlK2G2)OR>-=3|1$RQGd@f5#DY|$KrTn@CIxvT0@)JgwjA1HFoIQJC{-A1Q z;{{c<hAi968JZI}?OB*r7?#eM#L>1xx`AcTY}p$%P)n$Sa(ib1W1?ek8CC3fn?#iz z6vM(RSm*nIdk5Gs7T7o^vZr4ZUG!q@W!^#GN*7T~n%u&P`CgKIH<8G@JJ8uS9N31T zC!R<U>WL53JRi>t$9O6dE1z|!XxZ5!7~<GjE5=d2Wap`(XYI6R7A!(Icw}&JYdo=i zBqTANH47@)u|u<lo*mN6Vu>V(ttE7`T+ExQ-obU%Q4_O<lV+?m^WxSV?af=ds%jlD z<2V*cJ7$_B?J&G81b=cKR1LMedzO@2N<DhJt2VYSDA!|;goEwS@#U!>u6}=MdGg+| zdt>+2pT<{Dook*t_n_1|HPJ{;K2#eQrW&bFn-?xU{IU_a+z>8rKsfaB<ZVLImXN~; zuH@K`_sL6>v6DhbEGq?9a_orr$!>zZ<boAhlJ80@LWf-C9Je=^mkV~Vy*kHEW}j4$ zR}NKPmn%FJ0#_BB10F3G@kYD0!sQO_dfv$C^+vlfb>GVNN$83<s*3*;UC{j=cNkTr zikM^D+$SsB{`=^&g9ue+U>)5@0k<MoBrg_5Hx3I(VP6CI?V|AHtN(q{i;q!7xQL?Y zEktmc!)OX&_}w`A1wVz#I2M?kf5)voXc)IJJ4G2cRNKigK+BsYJzi9a8lN>kPmrW$ z8@f?+vTB}CjwES7)ii^U^TynmPQ%22dFQML(IA1jtXcfTykZog;rJ`Y9F;mODmo0E zm(7x6m7E0S>}iT^ip#Mu71-y7@@kpTBRh+M?nv)_zcfRLWoXntlg%4sjyOhvawegI zr8*hvn^uWYR0{?bGQ-et!Z~13!L*E`+hbIK#Q<3|VN!&R5+mlbS>4gVp10Vng6t@R zMOf*KMdeN(6Uytvp}aYrBdp>Fx5e&a+|UaRZzlj_fyiH=YG^%jc<IEg6RVMAGm>ok zMmKn_N2;CQ^dW!us?y(7`rE!>JJ?$fHG^-o!~Kn;C!67+c2{3L-t3BPDxzPiy|)=e z-I2xeLU}bj&<qbO^R4jl+DGlKLpL)wQ;pzY?Su8M$YOROTj#!$zmXpep1#kw21oAe zts|okF1EVHY9FpEp~b$1zMI4KXiItRDdNQFa<tvkzcg}d<o20o2rJR+qwQeN;`qXN zy{E4I9(?^D9>C{3FrP|D41PWF)x@=r+kx)d_;ZDUEXEdL$N0;eCr0D^uW@nov>Xdk z9x`Mf2b_x2reh`{L0~k~HtYe0`fTHB$si8t+%@hcWV74Ceub%HOtCM3dzL>UyPJ*K zS#SA93Z`Dl8)xjd!DSZ^NrHOHV~igo{~8Lcq2L+{ub~j=KT-c5a-ebe<Rf{gCafXl PnZ)BX-tb580XOKsk|NH) literal 0 HcmV?d00001 diff --git a/get_gedi_data.py b/get_gedi_data.py index 13606fb..95d42c9 100644 --- a/get_gedi_data.py +++ b/get_gedi_data.py @@ -12,6 +12,7 @@ import os def lpdaac_gedi_https_to_s3(url): dir_comps = url.split("/") + print(dir_comps) return f"s3://lp-prod-protected/{dir_comps[6]}/{dir_comps[8].strip('.h5')}/{dir_comps[8]}" def get_gedi_data(url): diff --git a/notebooks/.ipynb_checkpoints/access-gedi-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/access-gedi-checkpoint.ipynb index bff62e8..7babf4e 100644 --- a/notebooks/.ipynb_checkpoints/access-gedi-checkpoint.ipynb +++ b/notebooks/.ipynb_checkpoints/access-gedi-checkpoint.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "id": "9803c6a0-d547-4112-8edc-eb62680360e2", "metadata": {}, "outputs": [], @@ -12,49 +12,31 @@ "from maap.maap import MAAP\n", "import re\n", "# maap = MAAP(maap_host='api.maap-project.org')\n", - "maap = MAAP(maap_host='api.maap-project.org')" + "maap = MAAP(maap_host='api.maap-project.org')\n", + "import time" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 2, "id": "96d5cdce-a2cf-4ec5-8671-f9edd500ab8b", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "<Response [200]>" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "#maap.register_algorithm_from_yaml_file(\"/projects/arojas_download_gedi_data.yml\")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "id": "54d3df6a-e22a-4f15-ab22-859df5d4b367", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "on file num: 400\r" - ] - } - ], + "outputs": [], "source": [ "# Change to appropriate filepaths for urls\n", "# l1b_urls_fpath = \"/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt\"\n", @@ -70,7 +52,7 @@ "# Get matching string pattern and run main.py\n", "jobs_list = []\n", "counter=1\n", - "for l1b_fp in l1b_fpaths:\n", + "for l1b_fp in l1b_fpaths[:10]:\n", " # Get string pattern\n", " str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n", " try:\n", @@ -84,20 +66,58 @@ " algo_id=\"arojas_download_gedi_data\",\n", " version=\"master\",\n", " username=\"arojearthdata\",\n", - " queue=\"maap-dps-worker-8gb\",\n", + " queue=\"maap-dps-worker-16gb\",\n", " L1B_URL=l1b_fp,\n", " L2A_URL=l2a_fp)\n", "\n", " jobs_list.append(job)\n", - " \n", " if counter%100==0:\n", " print(\"on file num: \", counter, end='\\r')\n", + " time.sleep(900)\n", " counter+=1" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 16, + "id": "d71d17cf-9a79-4508-9047-dade9b64201b", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI01_B.002/2020.08.31/GEDI01_B_2020244155754_O09740_04_T06708_02_005_01_V002.h5\n", + "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_A.002/2020.08.31/GEDI02_A_2020244155754_O09740_04_T06708_02_003_01_V002.h5\n" + ] + } + ], + "source": [ + "for l1b_fp in l1b_fpaths[:10]:\n", + " \n", + " # Get string pattern\n", + " str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n", + " l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]\n", + " print(l1b_fp)\n", + " print(l2a_fp)\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1dce543-f916-4561-990b-ae8c3fdb7ad7", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, "id": "d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d", "metadata": {}, "outputs": [ @@ -105,31 +125,51 @@ "name": "stdout", "output_type": "stream", "text": [ - "Running\n", - "9efde8c0-b4aa-4d4b-9972-55b72be802a6\n", - "Running\n", - "d101a1b9-c44e-44e8-931a-6fe9f436e03d\n", - "Running\n", - "acc12502-5cfb-4f05-b05b-2f57657d2f53\n", - "Running\n", - "126548ea-2fd0-434a-b040-66a0a45d8f9c\n", - "Running\n", - "325641a2-f0f5-40a7-8904-c023d71a7b55\n", - "Running\n", - "a9039b0e-f678-403a-8ff2-576adb138199\n", - "Accepted\n", - "a99753e5-3ceb-4b25-b1e5-e1c154d2235d\n", - "Accepted\n", - "b985c8ff-77ae-41c9-8b00-d1e9ee0cc717\n", - "Accepted\n", - "d70e4917-323f-48e8-976d-6c94b08277a2\n", - "Accepted\n", - "0fe1d4f4-cb0c-41ae-99ad-0e9d28f7e695\n" + "Succeeded\n", + "f70b1586-e3ac-4155-a73e-49f5386c7e2c\n", + "Succeeded\n", + "10279c23-3225-460e-8cf9-6364fa0ec2a0\n", + "Succeeded\n", + "fe212a29-9da4-4a72-a02d-94a37750e1fa\n", + "Succeeded\n", + "7855a6a3-0416-41f6-b546-11eab9097adc\n", + "Succeeded\n", + "8c343834-6622-404e-9b83-d4dd771a151f\n", + "Succeeded\n", + "cc412cd5-1491-4dad-bc19-63ae70f9b6af\n", + "Succeeded\n", + "86b301cf-5b37-43af-b2f5-cb51d5f6b1c3\n", + "Succeeded\n", + "ec0e4005-b511-4d93-8fac-a0ff34682b1f\n", + "Succeeded\n", + "30f06fb8-aab6-4e74-aba4-e7a5d76ac100\n", + "Succeeded\n", + "30c44930-66fa-4215-b877-517a1e6472b1\n", + "Succeeded\n", + "c1e9fd05-241a-4bea-8a85-3fc8992cbf67\n", + "Succeeded\n", + "984eab57-449c-4fce-b490-b6ef05cd80f2\n", + "Succeeded\n", + "da048416-13f5-4a00-b7e0-76bff20bfa12\n", + "Succeeded\n", + "cc8d47fc-7615-4ba2-adbc-276b5f45c75a\n", + "Succeeded\n", + "c42d1afe-3d55-40d3-af4a-8ed98bc98492\n", + "Succeeded\n", + "e39ebed1-9694-4dfb-bafb-5249c7fca41e\n", + "Succeeded\n", + "656d0e68-ab9e-42a2-95b4-7fa45503cd8a\n", + "Succeeded\n", + "5c36624d-c33b-4756-9436-cac71ebb7ae9\n", + "Succeeded\n", + "1a5b59c1-c26e-4028-b6d9-54b7676f3c67\n", + "Succeeded\n", + "c2f3e181-22c5-460c-a4ac-d2fd2694f96e\n" ] } ], "source": [ - "for job in jobs_list[:10]:\n", + "for job in jobs_list[:20]:\n", " print(job.retrieve_status())\n", " print(job.id)" ] @@ -144,7 +184,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 17, "id": "89d3a568-4801-4168-891f-04c65091d336", "metadata": {}, "outputs": [], @@ -158,22 +198,24 @@ " if file.endswith(\".h5\"):\n", " # fp = os.path.join(subdir, file)\n", " # print(fp)\n", - " file_list.append(file)" + " file_list.append(os.path.join(subdir, file))" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "55675d70-413e-4f1e-8666-3bb4f38e3359", - "metadata": {}, + "execution_count": 24, + "id": "7dde8135-d6b6-43ce-b2c4-fd8d0cda4077", + "metadata": { + "tags": [] + }, "outputs": [ { "data": { "text/plain": [ - "74" + "918" ] }, - "execution_count": 9, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -285,7 +327,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "7c980c00-9823-4043-b235-4a837b8b92fa", "metadata": {}, "outputs": [], diff --git a/notebooks/access-gedi.ipynb b/notebooks/access-gedi.ipynb index 2dd5dea..df842d8 100644 --- a/notebooks/access-gedi.ipynb +++ b/notebooks/access-gedi.ipynb @@ -12,7 +12,8 @@ "from maap.maap import MAAP\n", "import re\n", "# maap = MAAP(maap_host='api.maap-project.org')\n", - "maap = MAAP(maap_host='api.maap-project.org')" + "maap = MAAP(maap_host='api.maap-project.org')\n", + "import time" ] }, { @@ -29,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "id": "54d3df6a-e22a-4f15-ab22-859df5d4b367", "metadata": { "scrolled": true, @@ -70,15 +71,53 @@ " L2A_URL=l2a_fp)\n", "\n", " jobs_list.append(job)\n", - " \n", " if counter%100==0:\n", " print(\"on file num: \", counter, end='\\r')\n", + " time.sleep(900)\n", " counter+=1" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 16, + "id": "d71d17cf-9a79-4508-9047-dade9b64201b", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI01_B.002/2020.08.31/GEDI01_B_2020244155754_O09740_04_T06708_02_005_01_V002.h5\n", + "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_A.002/2020.08.31/GEDI02_A_2020244155754_O09740_04_T06708_02_003_01_V002.h5\n" + ] + } + ], + "source": [ + "for l1b_fp in l1b_fpaths[:10]:\n", + " \n", + " # Get string pattern\n", + " str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n", + " l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]\n", + " print(l1b_fp)\n", + " print(l2a_fp)\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1dce543-f916-4561-990b-ae8c3fdb7ad7", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, "id": "d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d", "metadata": {}, "outputs": [ @@ -86,31 +125,51 @@ "name": "stdout", "output_type": "stream", "text": [ - "Accepted\n", - "54dc2f4c-eb12-4720-a83b-5b2e3548a94f\n", - "Accepted\n", - "6cc81082-0bc8-4d31-b295-62c4135b9b4b\n", - "Accepted\n", - "1b7bf749-78f6-49ee-9e84-71783e8d5449\n", - "Accepted\n", - "6e09b33b-8159-4449-9654-ccf29e6b6486\n", - "Accepted\n", - "774d6139-212f-4841-8f1b-13a5eadda4bc\n", - "Accepted\n", - "bbdb0764-3dc2-458c-aa8d-2151e03125c5\n", - "Accepted\n", - "1c5faed3-2c03-4260-ac4e-7478007c0b0d\n", - "Accepted\n", - "c92b2edf-2c1b-4b76-8e4d-d18634b7603d\n", - "Accepted\n", - "67ed03d8-7d9a-41a7-8858-55f58012a00e\n", - "Accepted\n", - "85c052ca-1269-4335-9559-63b2475a069f\n" + "Succeeded\n", + "f70b1586-e3ac-4155-a73e-49f5386c7e2c\n", + "Succeeded\n", + "10279c23-3225-460e-8cf9-6364fa0ec2a0\n", + "Succeeded\n", + "fe212a29-9da4-4a72-a02d-94a37750e1fa\n", + "Succeeded\n", + "7855a6a3-0416-41f6-b546-11eab9097adc\n", + "Succeeded\n", + "8c343834-6622-404e-9b83-d4dd771a151f\n", + "Succeeded\n", + "cc412cd5-1491-4dad-bc19-63ae70f9b6af\n", + "Succeeded\n", + "86b301cf-5b37-43af-b2f5-cb51d5f6b1c3\n", + "Succeeded\n", + "ec0e4005-b511-4d93-8fac-a0ff34682b1f\n", + "Succeeded\n", + "30f06fb8-aab6-4e74-aba4-e7a5d76ac100\n", + "Succeeded\n", + "30c44930-66fa-4215-b877-517a1e6472b1\n", + "Succeeded\n", + "c1e9fd05-241a-4bea-8a85-3fc8992cbf67\n", + "Succeeded\n", + "984eab57-449c-4fce-b490-b6ef05cd80f2\n", + "Succeeded\n", + "da048416-13f5-4a00-b7e0-76bff20bfa12\n", + "Succeeded\n", + "cc8d47fc-7615-4ba2-adbc-276b5f45c75a\n", + "Succeeded\n", + "c42d1afe-3d55-40d3-af4a-8ed98bc98492\n", + "Succeeded\n", + "e39ebed1-9694-4dfb-bafb-5249c7fca41e\n", + "Succeeded\n", + "656d0e68-ab9e-42a2-95b4-7fa45503cd8a\n", + "Succeeded\n", + "5c36624d-c33b-4756-9436-cac71ebb7ae9\n", + "Succeeded\n", + "1a5b59c1-c26e-4028-b6d9-54b7676f3c67\n", + "Succeeded\n", + "c2f3e181-22c5-460c-a4ac-d2fd2694f96e\n" ] } ], "source": [ - "for job in jobs_list[:10]:\n", + "for job in jobs_list[:20]:\n", " print(job.retrieve_status())\n", " print(job.id)" ] @@ -125,7 +184,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 31, "id": "89d3a568-4801-4168-891f-04c65091d336", "metadata": {}, "outputs": [], @@ -139,22 +198,24 @@ " if file.endswith(\".h5\"):\n", " # fp = os.path.join(subdir, file)\n", " # print(fp)\n", - " file_list.append(file)" + " file_list.append(os.path.join(subdir, file))" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "55675d70-413e-4f1e-8666-3bb4f38e3359", - "metadata": {}, + "execution_count": 32, + "id": "7dde8135-d6b6-43ce-b2c4-fd8d0cda4077", + "metadata": { + "tags": [] + }, "outputs": [ { "data": { "text/plain": [ - "74" + "0" ] }, - "execution_count": 9, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -256,7 +317,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "35ff40f2-65f8-414d-a53f-e8e0f7487557", "metadata": {}, "outputs": [], @@ -266,7 +327,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "7c980c00-9823-4043-b235-4a837b8b92fa", "metadata": {}, "outputs": [], @@ -283,7 +344,7 @@ "outputs": [], "source": [ "outdir = \"/projects/my-private-bucket/GEDI/raw/SouthAmerica\"\n", - "for fp in csv_list:\n", + "for fp in file_list:\n", " basename = os.path.basename(fp)\n", " if \"GEDI01_B\" in basename:\n", " outfp = os.path.join(outdir, \"L1B\", basename)\n", diff --git a/run.sh b/run.sh index b2a38f4..dd83744 100644 --- a/run.sh +++ b/run.sh @@ -1,4 +1,6 @@ -source activate osgeo-env +# source activate osgeo-env +source activate /projects/env/osgeo-env +# conda activate /projects/env/osgeo-env # Get current location of build script basedir=$( cd "$(dirname "$0")" ; pwd -P ) -- GitLab