Skip to content
Snippets Groups Projects
Commit f8514385 authored by Alex Rojas's avatar Alex Rojas
Browse files

changed to access data using maap.py package with temporary credentials.

parent 78466653
No related branches found
No related tags found
No related merge requests found
No preview for this file type
import sys import sys
import h5py import h5py
import boto3 import boto3
import botocore import botocore
import fsspec import fsspec
import requests
from maap.maap import MAAP from maap.maap import MAAP
maap = MAAP(maap_host="api.maap-project.org") maap = MAAP(maap_host="api.maap-project.org")
import os
def lpdaac_gedi_https_to_s3(url): def lpdaac_gedi_https_to_s3(url):
dir_comps = url.split("/") dir_comps = url.split("/")
...@@ -21,8 +25,11 @@ def get_gedi_data(url): ...@@ -21,8 +25,11 @@ def get_gedi_data(url):
secret=credentials['secretAccessKey'], secret=credentials['secretAccessKey'],
token=credentials['sessionToken'] token=credentials['sessionToken']
) )
# with s3.open(lpdaac_gedi_https_to_s3(url), "rb") as f: basename = os.path.basename(url)
# gedi_ds = h5py.File(f, "r") outfp = f"output/{basename}.h5"
# return gedi_ds
gedi_ds = h5py.File(s3.open(lpdaac_gedi_https_to_s3(url), "rb"), "r") gedi_ds = h5py.File(s3.open(lpdaac_gedi_https_to_s3(url), "rb"), "r")
return gedi_ds with h5py.File(outfp, 'w') as dst:
\ No newline at end of file for obj in gedi_ds.keys():
gedi_ds.copy(obj, dst)
gedi_ds.close()
return outfp
\ No newline at end of file
...@@ -94,9 +94,7 @@ if __name__ == '__main__': ...@@ -94,9 +94,7 @@ if __name__ == '__main__':
l1b_url = sys.argv[1] # first index is python file name, second is arg1, etc l1b_url = sys.argv[1] # first index is python file name, second is arg1, etc
l2a_url = sys.argv[2] # e.g. 'GEDI01_B' or 'GEDI02_A' l2a_url = sys.argv[2] # e.g. 'GEDI01_B' or 'GEDI02_A'
outdir = sys.argv[3] outdir = sys.argv[3]
#Download L1B and L2a
# download_gedi(l1b_url,"GEDI01_B")
# download_gedi(l2a_url,"GEDI02_A")
# Get filenames for downloaded gedi # Get filenames for downloaded gedi
l1b_basename = os.path.basename(l1b_url) l1b_basename = os.path.basename(l1b_url)
l2a_basename = os.path.basename(l2a_url) l2a_basename = os.path.basename(l2a_url)
...@@ -113,11 +111,10 @@ if __name__ == '__main__': ...@@ -113,11 +111,10 @@ if __name__ == '__main__':
print(l1b_basename) print(l1b_basename)
print(l2a_basename) print(l2a_basename)
CWD = os.path.dirname(os.path.abspath(__file__)) CWD = os.path.dirname(os.path.abspath(__file__))
l1b_ds = get_gedi_data(l1b_url) l1b_fp = get_gedi_data(l1b_url)
l2a_ds = get_gedi_data(l2a_url) l2a_fp = get_gedi_data(l2a_url)
l1b_ds = h5py.File(l1b_fp, "r")
# l1b_ds = h5py.File(os.path.join(CWD, f"{l1b_basename}")) l2a_ds = h5py.File(l2a_fp, "r")
# l2a_ds = h5py.File(os.path.join(CWD, f"{l2a_basename}"))
except Exception as e: except Exception as e:
# Some raw L1B files are corrupt? # Some raw L1B files are corrupt?
print("Corrupt file: ", l1b_basename) print("Corrupt file: ", l1b_basename)
...@@ -127,7 +124,7 @@ if __name__ == '__main__': ...@@ -127,7 +124,7 @@ if __name__ == '__main__':
orbit_num = re.findall("O[0-9]{5}", l1b_basename)[0] orbit_num = re.findall("O[0-9]{5}", l1b_basename)[0]
track_num = re.findall("T[0-9]{5}", l1b_basename)[0] track_num = re.findall("T[0-9]{5}", l1b_basename)[0]
date_str = re.findall("[0-9]{13}", l1b_basename)[0] date_str = re.findall("[0-9]{13}", l1b_basename)[0]
# outfp = os.path.join(outdir, "output",f"GEDI_bioindex_{date_str}_{orbit_num}_{track_num}.csv") # already saves to output # outfp = os.path.join(outdir, "output",f"GEDI_bioindex_{date_str}_{orbit_num}_{track_num}.csv")
outfp = os.path.join(outdir, f"GEDI_bioindex_{date_str}_{orbit_num}_{track_num}.csv") outfp = os.path.join(outdir, f"GEDI_bioindex_{date_str}_{orbit_num}_{track_num}.csv")
...@@ -199,14 +196,19 @@ if __name__ == '__main__': ...@@ -199,14 +196,19 @@ if __name__ == '__main__':
# append to df_list # append to df_list
df_list.append(new_df) df_list.append(new_df)
# Save # Save
try: try:
out_df = pd.concat(df_list, axis=0, ignore_index=True) out_df = pd.concat(df_list, axis=0, ignore_index=True)
out_df.to_csv(outfp, index=False) out_df.to_csv(outfp, index=False)
except Exception as e: # Close h5py files and delete so they are note saved!
print("Couldnt save file: ", l1b_basename) l1b_ds.close()
print("outdir: ", outdir) l2a_ds.close()
print("outfp: ", outfp) # os.remove(l1b_fp)
print(e) # os.remove(l2a_fp)
sys.exit() except Exception as e:
print("Couldnt save file: ", l1b_basename)
print("outdir: ", outdir)
print("outfp: ", outfp)
print(e)
sys.exit()
...@@ -7,10 +7,11 @@ basedir=$( cd "$(dirname "$0")" ; pwd -P) ...@@ -7,10 +7,11 @@ basedir=$( cd "$(dirname "$0")" ; pwd -P)
source activate osgeo-env source activate osgeo-env
# Create output dir # Create output dir
OUTPUTDIR="${PWD}/output" # OUTPUTDIR="${PWD}/output"
mkdir -p ${OUTPUTDIR} mkdir -p output
# INPUT_FILES # INPUT_FILES
L1B_URL=$1 L1B_URL=$1
L2A_URL=$2 # e.g. GEDI01_B or GEDI02_A L2A_URL=$2 # e.g. GEDI01_B or GEDI02_A
# YR=$3 # YR=$3
python ${basedir}/main.py ${L1B_URL} ${L2A_URL} ${OUTPUTDIR} # python ${basedir}/main.py ${L1B_URL} ${L2A_URL} ${OUTPUTDIR}
python ${basedir}/main.py ${L1B_URL} ${L2A_URL} output
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment