From 633d842c2faab8f86f17e3f19ff2774a1a6bfa7c Mon Sep 17 00:00:00 2001 From: Alex Rojas <a.rojas8907@gmail.com> Date: Sun, 3 Dec 2023 00:50:10 +0000 Subject: [PATCH] getting gedi data using fsspec with MAAP credentials. Created new file get_gedi_data_fsspec.py --- .../get_gedi_data_fsspec.cpython-312.pyc | Bin 0 -> 2644 bytes get_gedi_data_fsspec.py | 61 ++++++++++++ main.py | 6 +- notebooks/run-gedi-biomass.ipynb | 93 ++++++------------ 4 files changed, 97 insertions(+), 63 deletions(-) create mode 100644 __pycache__/get_gedi_data_fsspec.cpython-312.pyc create mode 100644 get_gedi_data_fsspec.py diff --git a/__pycache__/get_gedi_data_fsspec.cpython-312.pyc b/__pycache__/get_gedi_data_fsspec.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1dd145893831fe9024ae578c30736fba7982492c GIT binary patch literal 2644 zcmai0O>7fK6rTO}X8n^85<>zc5Cpg)v1pREs`wEN4GoZh3M#^-Sj+B0Z0xL=SvO7O zRIXG+qgDzCsR=?A>7f!89O#WB?FosCEmPr6XjO?`xTT~LQciud_S#8H%UJu~%=??2 zx9@%9hk-x?f^q5L1?x8%p{H!&4WT+&!Z4XdCNjAcid8rl<J>hL<6SPq1ec33(dAN1 z0?w!8v=UP|#3M^Fg*)PHt`almbFI{vYD%A@Sc548)l3Db-}C_uOmGqP88aS<a8w-L zzyH|eT0N8KN$Yy1JA>_{WjH-HJ`Y-z$>O6nag1tscM}mPJHiqLGK-v=F3G$4RpfSc zHO1w)q_j%qE=<nkZ&&@M_#B^9{%w;(?h!pJAt#v9dN4C(MyJI8h^%-?nUZo+QhiBW z(e&L>YrD%i*=%qlDmmpiifAKLAS6AhW=Z$BMV#)}Pegbsj_7HN22UoO(F2x&Co)dL z9;50p9fRDlFqJ;gQ&|gK9YRAVvW|@tdg{1E2-tzY^1hw2_Tw=cVw0-ejj0V_%e2NE zFeed-N@s1y?xjlAgvv%L0a_Y3Z#nTyZ97dTB%8M47@|ddbPc?5T^k=`FRPHs?idOY zQMK+Hs!ih`vzNviGia_vG~JMK`z)&nsu}f{BR(+etd8Zw(Q4I@=-Gsw2IubcmYL`_ z?6E9~5^TIo%+A29;->EC@pFV^ETbngLBlIf6JN0<-T+$$;31cR<k26&=9%<#`b#^1 za8X9unwg&Ip1GEJt<<nHKTvLLe%!dZ*toeI=qQKV7bVoRWeEvQa(-yhhr*rnTCsKK zt&_#p{`}Du-lc{P@NL_8GkQH*ZtN(pi-2eQYv9=~7sT*l5H)X`PZ!tp<q!X^wHCzI zC3qFIsx?^gx~(w?lUKEd$w6WBtl(IISGmae9qBe}G8j;X@(ATgFV*%N27JcD*2IuW zL&u@?ET<--{%Vz+urF9+5rGO9tO+U;*NRGxE9I$NNi=OZmq=M;f;d(>!r?a9hp;B@ z#O*+7pkfu*-3{>$kZorsyMW};6Rqj7wxOtPDEs_ne|RoX^lvMN+6wJEi=pmvaQ$3Q zF&OzvmHhG|k|Y^#1h1FpC<&TbID4`NARwp?ldCl^07Q<f<mUz;CCuF%B|Q}NT!5RB z=e5%E2Kq*J^SI1KC&Q%o?PxURLY>L*&VlN-%;+=L8_!r_PYOMw{m`fq$)pkvFb5|x zuokj7MO8C_<A$Bi5W?W7vzoGSDr4%pQ7P$B$H|bmW5-D^W;u((Lk5845me@^dR3k4 zx)#h2l>=MnPu;rs?dkcEyPNM0-P3<K^0;rX*f)4TTk0Dw^o~5x3&&0sVjmWdoqq6X zK|50r&-_n1zlJofEIFr^pn5XB^h|M1wZITcvXlQ~yc8`_&6|f$!b4j5DZyco;f|!= zo7FiWbEv-ZW=@kg%hh6fu}z_d$Koqu3#RDV_9MV}G1=_Jt#j}GIe<P|3wMPKRxNd{ zH_OR6$&1CIuUbW<vg;Urbt!m;J^MQM;&W&Mb)j9*NaH+*P9Y9{U$uQAoI>MVL>ifF zi6-=P)V*XG<aJ|9H!VD=*jXo&b)uBRlpBxu-EMQij`B94Vn%mHsqd^#EEmbe(S86) zaS+;<igv~ta|?`$(CuJs*r1e*lufLNf?3&7mGoB86~D>W66|*_LzN2l5-OO)p#oq$ zVbE2D^ePOj!f4D2xECJs9T0$l#*L3Q^%ve4E^QhqH6G0mJqb0<j8Bh04s{em9rHpd zv^D=;IoNV#^h&JY@65mZB)DcKF`bxGZw9XiA8p@zkCe6#l-3=%Z<K<E@&})&ftmHw z>#y|Bb(Pdji-?!H=DW(xZL_<t?!LD7IpWl=F9yo~=9weYN9LO6jGz5m{uDs?LI6tu zamjyq__N_lM_(vx&D@s>E65-B?09FN@Y6nNz^6ola4Sy`{1(V@BM`_e+_8p@ExgTL zV(VfSZhBJr>*M|>!mN#`$i5uzXO8S9s(dEa+-_dJM{Zxbd0Y1FPTOWSW$nd>K+mv^ z>;bYUa2)p(H7p=)0r?kDXaNO){)*auQM5wq&PPgjUR*%xb2-d~pZCd}{JfFp;@pzY J$E|h4{|$V_b^8DS literal 0 HcmV?d00001 diff --git a/get_gedi_data_fsspec.py b/get_gedi_data_fsspec.py new file mode 100644 index 0000000..0ee0fbb --- /dev/null +++ b/get_gedi_data_fsspec.py @@ -0,0 +1,61 @@ + +import sys +import h5py +import boto3 +import botocore +import fsspec +import requests +from maap.maap import MAAP +maap = MAAP(maap_host="api.maap-project.org") +import os + + +def assume_role_credentials(ssm_parameter_name): + # Create a session using your current credentials + session = boto3.Session() + + # Retrieve the SSM parameter + ssm = session.client('ssm', "us-west-2") + parameter = ssm.get_parameter( + Name=ssm_parameter_name, + WithDecryption=True + ) + parameter_value = parameter['Parameter']['Value'] + + # Assume the DAAC access role + sts = session.client('sts') + assumed_role_object = sts.assume_role( + RoleArn=parameter_value, + RoleSessionName='TutorialSession' + ) + + # From the response that contains the assumed role, get the temporary + # credentials that can be used to make subsequent API calls + credentials = assumed_role_object['Credentials'] + + return credentials + +# We can pass assumed role credentials into fsspec +def fsspec_access(credentials): + return fsspec.filesystem( + "s3", + key=credentials['AccessKeyId'], + secret=credentials['SecretAccessKey'], + token=credentials['SessionToken'] + ) + +def lpdaac_gedi_https_to_s3(url): + dir_comps = url.split("/") + return f"s3://lp-prod-protected/{dir_comps[6]}/{dir_comps[8].strip('.h5')}/{dir_comps[8]}" + +def get_gedi_data(url): + s3_fsspec = fsspec_access(assume_role_credentials("/iam/maap-data-reader")) + basename = os.path.basename(url) + outfp = f"output/{basename}" + gedi_ds = h5py.File(s3_fsspec.open(lpdaac_gedi_https_to_s3(url)), "r") + with h5py.File(outfp, 'w') as dst: + for obj in gedi_ds.keys(): + gedi_ds.copy(obj, dst) + gedi_ds.close() + # Return filepath! + return outfp diff --git a/main.py b/main.py index 7b7ae49..b0f0536 100644 --- a/main.py +++ b/main.py @@ -20,7 +20,9 @@ from itertools import repeat from pgap import GapDS, wf_smooth # import custom functions, etc. # from download_gedi import download_gedi -from get_gedi_data import get_gedi_data +# from get_gedi_data import get_gedi_data +from get_gedi_data_fsspec import get_gedi_data + ## GET CWD of file to locate path CWD = os.path.dirname(os.path.abspath(__file__)) @@ -152,7 +154,7 @@ if __name__ == '__main__': if not isinstance(idx, list): idx = [idx] - # run function in parallel + # run function in parallel (not available in MAAP) # pool = mp.Pool(10) # results = pool.map(gedi_bioindex, idx) # results = pool.starmap(gedi_bioindex, zip(idx, repeat(l1b_ds), repeat(l2a_ds))) diff --git a/notebooks/run-gedi-biomass.ipynb b/notebooks/run-gedi-biomass.ipynb index 4c55089..4b6438d 100644 --- a/notebooks/run-gedi-biomass.ipynb +++ b/notebooks/run-gedi-biomass.ipynb @@ -53,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "id": "b39e76ac-d765-4b8c-baf1-fe41f3296de9", "metadata": {}, "outputs": [ @@ -63,17 +63,6 @@ "text": [ "on file num: 100\r" ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[3], line 35\u001b[0m\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m counter\u001b[38;5;241m%\u001b[39m\u001b[38;5;241m100\u001b[39m\u001b[38;5;241m==\u001b[39m\u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mon file num: \u001b[39m\u001b[38;5;124m\"\u001b[39m, counter, end\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;130;01m\\r\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m---> 35\u001b[0m \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m800\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 36\u001b[0m counter\u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] } ], "source": [ @@ -89,7 +78,7 @@ "# Get matching string pattern and run main.py\n", "jobs_list = []\n", "counter=1\n", - "for l1b_fp in l1b_fpaths[100:]:\n", + "for l1b_fp in l1b_fpaths[200:]:\n", " # Get string pattern\n", " str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n", " try:\n", @@ -111,7 +100,8 @@ " \n", " if counter%100==0:\n", " print(\"on file num: \", counter, end='\\r')\n", - " time.sleep(800)\n", + " break\n", + " time.sleep(1200)\n", " counter+=1" ] }, @@ -133,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "7fe1c6f4-37c5-4fe7-b9ff-51e17b7a5cf0", "metadata": {}, "outputs": [ @@ -141,8 +131,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Accepted\n", - "d974c84b-2691-4569-b496-1cc06380ec84\n" + "Succeeded\n", + "62fd5122-a199-4ae7-bbd1-d86e11a04a7e\n" ] } ], @@ -230,7 +220,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 3, "id": "9ecb7ffc-319f-40c9-8f6e-d18b6ea1c890", "metadata": {}, "outputs": [], @@ -250,17 +240,17 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 4, "id": "f3ae4e2c-fa90-41ae-8fa2-9e3fd1241157", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "13" + "169" ] }, - "execution_count": 10, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -271,7 +261,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 5, "id": "5bb204a4-e7d4-4a83-898f-81db03d080aa", "metadata": {}, "outputs": [ @@ -366,55 +356,36 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 3, "id": "b364772a", "metadata": {}, - "outputs": [], - "source": [ - "# Change to appropriate filepaths for urls\n", - "l1b_urls_fpath = \"/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt\"\n", - "l2a_urls_fpath = \"/projects/biomass-gedi-conus/data/GEDI-L2A-2022-URLS.txt\"\n", - "\n", - "with open(l1b_urls_fpath) as f:\n", - " l1b_fpaths = f.read().splitlines()\n", - "with open(l2a_urls_fpath) as f:\n", - " l2a_fpaths = f.read().splitlines()\n", - "\n", - "# Get matching string pattern and run main.py\n", - "jobs_list = []\n", - "counter=1\n", - "for l1b_fp in l1b_fpaths[1:]:\n", - " # Get string pattern\n", - " str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n", - " try:\n", - " l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]\n", - " except:\n", - " print(\"No matching L2 file!\", l1b_fp)\n", - " continue\n", - " break\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "4a6460a7-4d65-4311-901c-e5685daeb2f0", - "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI01_B.002/2022.08.31/GEDI01_B_2022243125140_O21055_03_T02667_02_005_02_V002.h5\n", - "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_A.002/2022.08.31/GEDI02_A_2022243125140_O21055_03_T02667_02_003_02_V002.h5\n" + "<KeysViewHDF5 ['BEAM0000', 'BEAM0001', 'BEAM0010', 'BEAM0011', 'BEAM0101', 'BEAM0110', 'BEAM1000', 'BEAM1011', 'METADATA']>\n", + "[243.6951 244.02603 244.26328 244.186 243.79663 243.32755 243.1004\n", + " 243.24414 243.63077 243.96375]\n" ] } ], "source": [ - "print(l1b_fp)\n", - "print(l2a_fp)" + "import h5py\n", + "fp = \"../output/GEDI01_B_2022243142430_O21056_03_T06784_02_005_02_V002.h5\"\n", + "with h5py.File(fp, \"r\") as f:\n", + " print(f.keys())\n", + " print(f['BEAM0000'][\"rxwaveform\"][:10])" ] }, + { + "cell_type": "code", + "execution_count": 13, + "id": "4a6460a7-4d65-4311-901c-e5685daeb2f0", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -426,9 +397,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "osgeo-env", "language": "python", - "name": "python3" + "name": "osgeo-env" }, "language_info": { "codemirror_mode": { @@ -440,7 +411,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.12.0" } }, "nbformat": 4, -- GitLab