From 06c17ec9fa4fb7b94082d0195768a337651d7798 Mon Sep 17 00:00:00 2001
From: Alex Rojas <a.rojas8907@gmail.com>
Date: Thu, 30 Nov 2023 06:06:23 +0000
Subject: [PATCH] Using S3 bucket access for gedi data.

---
 get_gedi_data.py                 | 27 +++++++++++++++++++++++++++
 main.py                          | 15 +++++++++------
 notebooks/run-gedi-biomass.ipynb |  2 +-
 3 files changed, 37 insertions(+), 7 deletions(-)
 create mode 100644 get_gedi_data.py

diff --git a/get_gedi_data.py b/get_gedi_data.py
new file mode 100644
index 0000000..fd77e7e
--- /dev/null
+++ b/get_gedi_data.py
@@ -0,0 +1,27 @@
+1import sys
+import h5py
+import boto3
+import botocore
+import fsspec
+from maap.maap import MAAP
+maap = MAAP(maap_host="api.maap-project.org")
+
+def lpdaac_gedi_https_to_s3(url):
+    dir_comps = url.split("/")
+    return f"s3://lp-prod-protected/{dir_comps[6]}/{dir_comps[8].strip('.h5')}/{dir_comps[8]}"
+
+def get_gedi_data(url):
+    credentials = maap.aws.earthdata_s3_credentials(
+        'https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials'
+    )
+
+    s3 = fsspec.filesystem(
+        "s3",
+        key=credentials['accessKeyId'],
+        secret=credentials['secretAccessKey'],
+        token=credentials['sessionToken']
+    )
+    with s3.open(lpdaac_gedi_https_to_s3(url), "rb") as f:
+        gedi_ds = h5py.File(f, "r")
+        
+    return gedi_ds
\ No newline at end of file
diff --git a/main.py b/main.py
index 5bf146f..025e500 100644
--- a/main.py
+++ b/main.py
@@ -20,6 +20,7 @@ from itertools import repeat
 from pgap import GapDS, wf_smooth
 # import custom functions, etc.
 from download_gedi import download_gedi
+from get_gedi_data import get_gedi_data
 
 ## GET CWD of file to locate path
 CWD = os.path.dirname(os.path.abspath(__file__))
@@ -91,14 +92,13 @@ def gedi_bioindex(index,l1b_ds,l2a_ds, beam, beam_filt, allom_df):
 ##########
 if __name__ == '__main__':
     
+    # File handling
     l1b_url = sys.argv[1] # first index is python file name, second is arg1, etc
     l2a_url = sys.argv[2] # e.g. 'GEDI01_B' or 'GEDI02_A'
     outdir = sys.argv[3]
-    # Call function
-    # main(l1b_fp,l2a_fp,outdir)
     #Download L1B and L2a
-    download_gedi(l1b_url,"GEDI01_B")
-    download_gedi(l2a_url,"GEDI02_A")
+    # download_gedi(l1b_url,"GEDI01_B")
+    # download_gedi(l2a_url,"GEDI02_A")
     # Get filenames for downloaded gedi
     l1b_basename = os.path.basename(l1b_url)
     l2a_basename = os.path.basename(l2a_url)
@@ -115,8 +115,11 @@ if __name__ == '__main__':
         print(l1b_basename)
         print(l2a_basename)
         CWD = os.path.dirname(os.path.abspath(__file__))
-        l1b_ds = h5py.File(os.path.join(CWD, f"{l1b_basename}"))
-        l2a_ds = h5py.File(os.path.join(CWD, f"{l2a_basename}"))
+        l1b_ds = get_gedi_data(l1b_url)
+        l2a_ds = get_gedi_data(l2a_url)
+        
+        # l1b_ds = h5py.File(os.path.join(CWD, f"{l1b_basename}"))
+        # l2a_ds = h5py.File(os.path.join(CWD, f"{l2a_basename}"))
     except Exception as e:
         # Some raw L1B files are corrupt?
         print("Corrupt file: ", l1b_basename)
diff --git a/notebooks/run-gedi-biomass.ipynb b/notebooks/run-gedi-biomass.ipynb
index 561ba0c..fcfacd9 100644
--- a/notebooks/run-gedi-biomass.ipynb
+++ b/notebooks/run-gedi-biomass.ipynb
@@ -17,7 +17,7 @@
     }
    ],
    "source": [
-    "assimport os\n",
+    "import os\n",
     "import re\n",
     "from maap.maap import MAAP\n",
     "import re\n",
-- 
GitLab