changed to access data using maap.py package with temporary credentials.

f8514385 · Alex Rojas · 78466653 · f8514385 · f8514385 · f8514385
Commit f8514385 authored 1 year ago by Alex Rojas
--- a/__pycache__/get_gedi_data.cpython-312.pyc
+++ b/__pycache__/get_gedi_data.cpython-312.pyc
--- a/get_gedi_data.py
+++ b/get_gedi_data.py
 import sys
 import h5py
 import boto3
 import botocore
 import fsspec
+import requests
 from maap.maap import MAAP
 maap = MAAP(maap_host="api.maap-project.org")
+import os
 def lpdaac_gedi_https_to_s3(url):
    dir_comps = url.split("/")
@@ -21,8 +25,11 @@ def get_gedi_data(url):
        secret=credentials['secretAccessKey'],
        token=credentials['sessionToken']
    )
-    # with s3.open(lpdaac_gedi_https_to_s3(url), "rb") as f:
+    basename = os.path.basename(url)
-    #     gedi_ds = h5py.File(f, "r")
+    outfp = f"output/{basename}.h5"
-    #     return gedi_ds
    gedi_ds = h5py.File(s3.open(lpdaac_gedi_https_to_s3(url), "rb"), "r")
-    return gedi_ds
+    with h5py.File(outfp, 'w') as dst:
\ No newline at end of file
+        for obj in gedi_ds.keys():        
+            gedi_ds.copy(obj, dst)   
+    gedi_ds.close()
+    return outfp
\ No newline at end of file
--- a/main.py
+++ b/main.py
@@ -94,9 +94,7 @@ if __name__ == '__main__':
    l1b_url = sys.argv[1] # first index is python file name, second is arg1, etc
    l2a_url = sys.argv[2] # e.g. 'GEDI01_B' or 'GEDI02_A'
    outdir = sys.argv[3]
-    #Download L1B and L2a
-    # download_gedi(l1b_url,"GEDI01_B")
-    # download_gedi(l2a_url,"GEDI02_A")
    # Get filenames for downloaded gedi
    l1b_basename = os.path.basename(l1b_url)
    l2a_basename = os.path.basename(l2a_url)
@@ -113,11 +111,10 @@ if __name__ == '__main__':
        print(l1b_basename)
        print(l2a_basename)
        CWD = os.path.dirname(os.path.abspath(__file__))
-        l1b_ds = get_gedi_data(l1b_url)
+        l1b_fp = get_gedi_data(l1b_url)
-        l2a_ds = get_gedi_data(l2a_url)
+        l2a_fp = get_gedi_data(l2a_url)
+        l1b_ds = h5py.File(l1b_fp, "r")
-        # l1b_ds = h5py.File(os.path.join(CWD, f"{l1b_basename}"))
+        l2a_ds = h5py.File(l2a_fp, "r")
-        # l2a_ds = h5py.File(os.path.join(CWD, f"{l2a_basename}"))
    except Exception as e:
        # Some raw L1B files are corrupt?
        print("Corrupt file: ", l1b_basename)
@@ -127,7 +124,7 @@ if __name__ == '__main__':
    orbit_num = re.findall("O[0-9]{5}", l1b_basename)[0]
    track_num = re.findall("T[0-9]{5}", l1b_basename)[0]
    date_str = re.findall("[0-9]{13}", l1b_basename)[0]
-    # outfp = os.path.join(outdir, "output",f"GEDI_bioindex_{date_str}_{orbit_num}_{track_num}.csv") # already saves to output
+    # outfp = os.path.join(outdir, "output",f"GEDI_bioindex_{date_str}_{orbit_num}_{track_num}.csv")
    outfp = os.path.join(outdir, f"GEDI_bioindex_{date_str}_{orbit_num}_{track_num}.csv")
@@ -199,14 +196,19 @@ if __name__ == '__main__':
        # append to df_list
        df_list.append(new_df)
-# Save
+    # Save
-try:
+    try:
-    out_df = pd.concat(df_list, axis=0, ignore_index=True)
+        out_df = pd.concat(df_list, axis=0, ignore_index=True)
-    out_df.to_csv(outfp, index=False)  
+        out_df.to_csv(outfp, index=False)
-except Exception as e:
+        # Close h5py files and delete so they are note saved!
-    print("Couldnt save file: ", l1b_basename)
+        l1b_ds.close()
-    print("outdir: ", outdir)
+        l2a_ds.close()
-    print("outfp: ", outfp)
+        # os.remove(l1b_fp)
-    print(e)
+        # os.remove(l2a_fp)
-    sys.exit()
+    except Exception as e:
+        print("Couldnt save file: ", l1b_basename)
+        print("outdir: ", outdir)
+        print("outfp: ", outfp)
+        print(e)
+        sys.exit()
--- a/run.sh
+++ b/run.sh
@@ -7,10 +7,11 @@ basedir=$( cd "$(dirname "$0")" ; pwd -P)
 source activate osgeo-env
 # Create output dir
-OUTPUTDIR="${PWD}/output"
+# OUTPUTDIR="${PWD}/output"
-mkdir -p ${OUTPUTDIR}
+mkdir -p output
 # INPUT_FILES
 L1B_URL=$1
 L2A_URL=$2 # e.g. GEDI01_B or GEDI02_A
 # YR=$3
-python ${basedir}/main.py ${L1B_URL} ${L2A_URL} ${OUTPUTDIR}
+# python ${basedir}/main.py ${L1B_URL} ${L2A_URL} ${OUTPUTDIR}
+python ${basedir}/main.py ${L1B_URL} ${L2A_URL} output