From 633d842c2faab8f86f17e3f19ff2774a1a6bfa7c Mon Sep 17 00:00:00 2001
From: Alex Rojas <a.rojas8907@gmail.com>
Date: Sun, 3 Dec 2023 00:50:10 +0000
Subject: [PATCH] getting gedi data using fsspec with MAAP credentials. Created
 new file get_gedi_data_fsspec.py

---
 .../get_gedi_data_fsspec.cpython-312.pyc      | Bin 0 -> 2644 bytes
 get_gedi_data_fsspec.py                       |  61 ++++++++++++
 main.py                                       |   6 +-
 notebooks/run-gedi-biomass.ipynb              |  93 ++++++------------
 4 files changed, 97 insertions(+), 63 deletions(-)
 create mode 100644 __pycache__/get_gedi_data_fsspec.cpython-312.pyc
 create mode 100644 get_gedi_data_fsspec.py

diff --git a/__pycache__/get_gedi_data_fsspec.cpython-312.pyc b/__pycache__/get_gedi_data_fsspec.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1dd145893831fe9024ae578c30736fba7982492c
GIT binary patch
literal 2644
zcmai0O>7fK6rTO}X8n^85<>zc5Cpg)v1pREs`wEN4GoZh3M#^-Sj+B0Z0xL=SvO7O
zRIXG+qgDzCsR=?A>7f!89O#WB?FosCEmPr6XjO?`xTT~LQciud_S#8H%UJu~%=??2
zx9@%9hk-x?f^q5L1?x8%p{H!&4WT+&!Z4XdCNjAcid8rl<J>hL<6SPq1ec33(dAN1
z0?w!8v=UP|#3M^Fg*)PHt`almbFI{vYD%A@Sc548)l3Db-}C_uOmGqP88aS<a8w-L
zzyH|eT0N8KN$Yy1JA>_{WjH-HJ`Y-z$>O6nag1tscM}mPJHiqLGK-v=F3G$4RpfSc
zHO1w)q_j%qE=<nkZ&&@M_#B^9{%w;(?h!pJAt#v9dN4C(MyJI8h^%-?nUZo+QhiBW
z(e&L>YrD%i*=%qlDmmpiifAKLAS6AhW=Z$BMV#)}Pegbsj_7HN22UoO(F2x&Co)dL
z9;50p9fRDlFqJ;gQ&|gK9YRAVvW|@tdg{1E2-tzY^1hw2_Tw=cVw0-ejj0V_%e2NE
zFeed-N@s1y?xjlAgvv%L0a_Y3Z#nTyZ97dTB%8M47@|ddbPc?5T^k=`FRPHs?idOY
zQMK+Hs!ih`vzNviGia_vG~JMK`z)&nsu}f{BR(+etd8Zw(Q4I@=-Gsw2IubcmYL`_
z?6E9~5^TIo%+A29;->EC@pFV^ETbngLBlIf6JN0<-T+$$;31cR<k26&=9%<#`b#^1
za8X9unwg&Ip1GEJt<<nHKTvLLe%!dZ*toeI=qQKV7bVoRWeEvQa(-yhhr*rnTCsKK
zt&_#p{`}Du-lc{P@NL_8GkQH*ZtN(pi-2eQYv9=~7sT*l5H)X`PZ!tp<q!X^wHCzI
zC3qFIsx?^gx~(w?lUKEd$w6WBtl(IISGmae9qBe}G8j;X@(ATgFV*%N27JcD*2IuW
zL&u@?ET<--{%Vz+urF9+5rGO9tO+U;*NRGxE9I$NNi=OZmq=M;f;d(>!r?a9hp;B@
z#O*+7pkfu*-3{>$kZorsyMW};6Rqj7wxOtPDEs_ne|RoX^lvMN+6wJEi=pmvaQ$3Q
zF&OzvmHhG|k|Y^#1h1FpC<&TbID4`NARwp?ldCl^07Q<f<mUz;CCuF%B|Q}NT!5RB
z=e5%E2Kq*J^SI1KC&Q%o?PxURLY>L*&VlN-%;+=L8_!r_PYOMw{m`fq$)pkvFb5|x
zuokj7MO8C_<A$Bi5W?W7vzoGSDr4%pQ7P$B$H|bmW5-D^W;u((Lk5845me@^dR3k4
zx)#h2l>=MnPu;rs?dkcEyPNM0-P3<K^0;rX*f)4TTk0Dw^o~5x3&&0sVjmWdoqq6X
zK|50r&-_n1zlJofEIFr^pn5XB^h|M1wZITcvXlQ~yc8`_&6|f$!b4j5DZyco;f|!=
zo7FiWbEv-ZW=@kg%hh6fu}z_d$Koqu3#RDV_9MV}G1=_Jt#j}GIe<P|3wMPKRxNd{
zH_OR6$&1CIuUbW<vg;Urbt!m;J^MQM;&W&Mb)j9*NaH+*P9Y9{U$uQAoI>MVL>ifF
zi6-=P)V*XG<aJ|9H!VD=*jXo&b)uBRlpBxu-EMQij`B94Vn%mHsqd^#EEmbe(S86)
zaS+;<igv~ta|?`$(CuJs*r1e*lufLNf?3&7mGoB86~D>W66|*_LzN2l5-OO)p#oq$
zVbE2D^ePOj!f4D2xECJs9T0$l#*L3Q^%ve4E^QhqH6G0mJqb0<j8Bh04s{em9rHpd
zv^D=;IoNV#^h&JY@65mZB)DcKF`bxGZw9XiA8p@zkCe6#l-3=%Z<K<E@&})&ftmHw
z>#y|Bb(Pdji-?!H=DW(xZL_<t?!LD7IpWl=F9yo~=9weYN9LO6jGz5m{uDs?LI6tu
zamjyq__N_lM_(vx&D@s>E65-B?09FN@Y6nNz^6ola4Sy`{1(V@BM`_e+_8p@ExgTL
zV(VfSZhBJr>*M|>!mN#`$i5uzXO8S9s(dEa+-_dJM{Zxbd0Y1FPTOWSW$nd>K+mv^
z>;bYUa2)p(H7p=)0r?kDXaNO){)*auQM5wq&PPgjUR*%xb2-d~pZCd}{JfFp;@pzY
J$E|h4{|$V_b^8DS

literal 0
HcmV?d00001

diff --git a/get_gedi_data_fsspec.py b/get_gedi_data_fsspec.py
new file mode 100644
index 0000000..0ee0fbb
--- /dev/null
+++ b/get_gedi_data_fsspec.py
@@ -0,0 +1,61 @@
+
+import sys
+import h5py
+import boto3
+import botocore
+import fsspec
+import requests
+from maap.maap import MAAP
+maap = MAAP(maap_host="api.maap-project.org")
+import os
+
+
+def assume_role_credentials(ssm_parameter_name):
+    # Create a session using your current credentials
+    session = boto3.Session()
+
+    # Retrieve the SSM parameter
+    ssm = session.client('ssm', "us-west-2")
+    parameter = ssm.get_parameter(
+        Name=ssm_parameter_name,
+        WithDecryption=True
+    )
+    parameter_value = parameter['Parameter']['Value']
+
+    # Assume the DAAC access role
+    sts = session.client('sts')
+    assumed_role_object = sts.assume_role(
+        RoleArn=parameter_value,
+        RoleSessionName='TutorialSession'
+    )
+
+    # From the response that contains the assumed role, get the temporary
+    # credentials that can be used to make subsequent API calls
+    credentials = assumed_role_object['Credentials']
+
+    return credentials
+
+# We can pass assumed role credentials into fsspec
+def fsspec_access(credentials):
+    return fsspec.filesystem(
+        "s3",
+        key=credentials['AccessKeyId'],
+        secret=credentials['SecretAccessKey'],
+        token=credentials['SessionToken']
+    )
+
+def lpdaac_gedi_https_to_s3(url):
+    dir_comps = url.split("/")
+    return f"s3://lp-prod-protected/{dir_comps[6]}/{dir_comps[8].strip('.h5')}/{dir_comps[8]}"
+
+def get_gedi_data(url):
+    s3_fsspec = fsspec_access(assume_role_credentials("/iam/maap-data-reader"))
+    basename = os.path.basename(url)
+    outfp = f"output/{basename}"
+    gedi_ds = h5py.File(s3_fsspec.open(lpdaac_gedi_https_to_s3(url)), "r")
+    with h5py.File(outfp, 'w') as dst:
+        for obj in gedi_ds.keys():        
+            gedi_ds.copy(obj, dst)   
+    gedi_ds.close()
+    # Return filepath!
+    return outfp
diff --git a/main.py b/main.py
index 7b7ae49..b0f0536 100644
--- a/main.py
+++ b/main.py
@@ -20,7 +20,9 @@ from itertools import repeat
 from pgap import GapDS, wf_smooth
 # import custom functions, etc.
 # from download_gedi import download_gedi
-from get_gedi_data import get_gedi_data
+# from get_gedi_data import get_gedi_data
+from get_gedi_data_fsspec import get_gedi_data
+
 
 ## GET CWD of file to locate path
 CWD = os.path.dirname(os.path.abspath(__file__))
@@ -152,7 +154,7 @@ if __name__ == '__main__':
         if not isinstance(idx, list):
             idx = [idx]
 
-        # run function in parallel
+        # run function in parallel (not available in MAAP)
         # pool = mp.Pool(10)
         # results = pool.map(gedi_bioindex, idx)
         # results = pool.starmap(gedi_bioindex, zip(idx, repeat(l1b_ds), repeat(l2a_ds)))
diff --git a/notebooks/run-gedi-biomass.ipynb b/notebooks/run-gedi-biomass.ipynb
index 4c55089..4b6438d 100644
--- a/notebooks/run-gedi-biomass.ipynb
+++ b/notebooks/run-gedi-biomass.ipynb
@@ -53,7 +53,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 8,
    "id": "b39e76ac-d765-4b8c-baf1-fe41f3296de9",
    "metadata": {},
    "outputs": [
@@ -63,17 +63,6 @@
      "text": [
       "on file num:  100\r"
      ]
-    },
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[3], line 35\u001b[0m\n\u001b[1;32m     33\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m counter\u001b[38;5;241m%\u001b[39m\u001b[38;5;241m100\u001b[39m\u001b[38;5;241m==\u001b[39m\u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m     34\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mon file num: \u001b[39m\u001b[38;5;124m\"\u001b[39m, counter, end\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;130;01m\\r\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m---> 35\u001b[0m     \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m800\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     36\u001b[0m counter\u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m\n",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
-     ]
     }
    ],
    "source": [
@@ -89,7 +78,7 @@
     "# Get matching string pattern and run main.py\n",
     "jobs_list = []\n",
     "counter=1\n",
-    "for l1b_fp in l1b_fpaths[100:]:\n",
+    "for l1b_fp in l1b_fpaths[200:]:\n",
     "    # Get string pattern\n",
     "    str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n",
     "    try:\n",
@@ -111,7 +100,8 @@
     "    \n",
     "    if counter%100==0:\n",
     "        print(\"on file num: \", counter, end='\\r')\n",
-    "        time.sleep(800)\n",
+    "        break\n",
+    "        time.sleep(1200)\n",
     "    counter+=1"
    ]
   },
@@ -133,7 +123,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "id": "7fe1c6f4-37c5-4fe7-b9ff-51e17b7a5cf0",
    "metadata": {},
    "outputs": [
@@ -141,8 +131,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Accepted\n",
-      "d974c84b-2691-4569-b496-1cc06380ec84\n"
+      "Succeeded\n",
+      "62fd5122-a199-4ae7-bbd1-d86e11a04a7e\n"
      ]
     }
    ],
@@ -230,7 +220,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 3,
    "id": "9ecb7ffc-319f-40c9-8f6e-d18b6ea1c890",
    "metadata": {},
    "outputs": [],
@@ -250,17 +240,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 4,
    "id": "f3ae4e2c-fa90-41ae-8fa2-9e3fd1241157",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "13"
+       "169"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -271,7 +261,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 5,
    "id": "5bb204a4-e7d4-4a83-898f-81db03d080aa",
    "metadata": {},
    "outputs": [
@@ -366,55 +356,36 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 3,
    "id": "b364772a",
    "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Change to appropriate filepaths for urls\n",
-    "l1b_urls_fpath = \"/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt\"\n",
-    "l2a_urls_fpath = \"/projects/biomass-gedi-conus/data/GEDI-L2A-2022-URLS.txt\"\n",
-    "\n",
-    "with open(l1b_urls_fpath) as f:\n",
-    "    l1b_fpaths = f.read().splitlines()\n",
-    "with open(l2a_urls_fpath) as f:\n",
-    "    l2a_fpaths = f.read().splitlines()\n",
-    "\n",
-    "# Get matching string pattern and run main.py\n",
-    "jobs_list = []\n",
-    "counter=1\n",
-    "for l1b_fp in l1b_fpaths[1:]:\n",
-    "    # Get string pattern\n",
-    "    str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n",
-    "    try:\n",
-    "        l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]\n",
-    "    except:\n",
-    "        print(\"No matching L2 file!\", l1b_fp)\n",
-    "        continue\n",
-    "    break\n",
-    "    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "4a6460a7-4d65-4311-901c-e5685daeb2f0",
-   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI01_B.002/2022.08.31/GEDI01_B_2022243125140_O21055_03_T02667_02_005_02_V002.h5\n",
-      "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_A.002/2022.08.31/GEDI02_A_2022243125140_O21055_03_T02667_02_003_02_V002.h5\n"
+      "<KeysViewHDF5 ['BEAM0000', 'BEAM0001', 'BEAM0010', 'BEAM0011', 'BEAM0101', 'BEAM0110', 'BEAM1000', 'BEAM1011', 'METADATA']>\n",
+      "[243.6951  244.02603 244.26328 244.186   243.79663 243.32755 243.1004\n",
+      " 243.24414 243.63077 243.96375]\n"
      ]
     }
    ],
    "source": [
-    "print(l1b_fp)\n",
-    "print(l2a_fp)"
+    "import h5py\n",
+    "fp = \"../output/GEDI01_B_2022243142430_O21056_03_T06784_02_005_02_V002.h5\"\n",
+    "with h5py.File(fp, \"r\") as f:\n",
+    "    print(f.keys())\n",
+    "    print(f['BEAM0000'][\"rxwaveform\"][:10])"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "4a6460a7-4d65-4311-901c-e5685daeb2f0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -426,9 +397,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "osgeo-env",
    "language": "python",
-   "name": "python3"
+   "name": "osgeo-env"
   },
   "language_info": {
    "codemirror_mode": {
@@ -440,7 +411,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.8"
+   "version": "3.12.0"
   }
  },
  "nbformat": 4,
-- 
GitLab