diff --git a/__pycache__/get_gedi_data.cpython-312.pyc b/__pycache__/get_gedi_data.cpython-312.pyc index fde21e3b2669504f0372e45cd2a8817f1f243326..bb8e2b51b5bf9b27091d82bf6cb2c4d04f0e57ab 100644 Binary files a/__pycache__/get_gedi_data.cpython-312.pyc and b/__pycache__/get_gedi_data.cpython-312.pyc differ diff --git a/get_gedi_data.py b/get_gedi_data.py index 4ab61b4ae2d2f9c2d49fa1d2f9845c49619f905b..33e3a06b11dc2b0e6ca8a9aa4454f0ac138445cd 100644 --- a/get_gedi_data.py +++ b/get_gedi_data.py @@ -1,10 +1,14 @@ + import sys import h5py import boto3 import botocore import fsspec +import requests from maap.maap import MAAP maap = MAAP(maap_host="api.maap-project.org") +import os + def lpdaac_gedi_https_to_s3(url): dir_comps = url.split("/") @@ -21,8 +25,11 @@ def get_gedi_data(url): secret=credentials['secretAccessKey'], token=credentials['sessionToken'] ) - # with s3.open(lpdaac_gedi_https_to_s3(url), "rb") as f: - # gedi_ds = h5py.File(f, "r") - # return gedi_ds + basename = os.path.basename(url) + outfp = f"output/{basename}.h5" gedi_ds = h5py.File(s3.open(lpdaac_gedi_https_to_s3(url), "rb"), "r") - return gedi_ds \ No newline at end of file + with h5py.File(outfp, 'w') as dst: + for obj in gedi_ds.keys(): + gedi_ds.copy(obj, dst) + gedi_ds.close() + return outfp \ No newline at end of file diff --git a/main.py b/main.py index 1d7ce1a11f1325ab09d5775b1e01c781aa474274..e96daeeea320a737b032d8c11a28462778cbec93 100644 --- a/main.py +++ b/main.py @@ -94,9 +94,7 @@ if __name__ == '__main__': l1b_url = sys.argv[1] # first index is python file name, second is arg1, etc l2a_url = sys.argv[2] # e.g. 'GEDI01_B' or 'GEDI02_A' outdir = sys.argv[3] - #Download L1B and L2a - # download_gedi(l1b_url,"GEDI01_B") - # download_gedi(l2a_url,"GEDI02_A") + # Get filenames for downloaded gedi l1b_basename = os.path.basename(l1b_url) l2a_basename = os.path.basename(l2a_url) @@ -113,11 +111,10 @@ if __name__ == '__main__': print(l1b_basename) print(l2a_basename) CWD = os.path.dirname(os.path.abspath(__file__)) - l1b_ds = get_gedi_data(l1b_url) - l2a_ds = get_gedi_data(l2a_url) - - # l1b_ds = h5py.File(os.path.join(CWD, f"{l1b_basename}")) - # l2a_ds = h5py.File(os.path.join(CWD, f"{l2a_basename}")) + l1b_fp = get_gedi_data(l1b_url) + l2a_fp = get_gedi_data(l2a_url) + l1b_ds = h5py.File(l1b_fp, "r") + l2a_ds = h5py.File(l2a_fp, "r") except Exception as e: # Some raw L1B files are corrupt? print("Corrupt file: ", l1b_basename) @@ -127,7 +124,7 @@ if __name__ == '__main__': orbit_num = re.findall("O[0-9]{5}", l1b_basename)[0] track_num = re.findall("T[0-9]{5}", l1b_basename)[0] date_str = re.findall("[0-9]{13}", l1b_basename)[0] - # outfp = os.path.join(outdir, "output",f"GEDI_bioindex_{date_str}_{orbit_num}_{track_num}.csv") # already saves to output + # outfp = os.path.join(outdir, "output",f"GEDI_bioindex_{date_str}_{orbit_num}_{track_num}.csv") outfp = os.path.join(outdir, f"GEDI_bioindex_{date_str}_{orbit_num}_{track_num}.csv") @@ -199,14 +196,19 @@ if __name__ == '__main__': # append to df_list df_list.append(new_df) -# Save -try: - out_df = pd.concat(df_list, axis=0, ignore_index=True) - out_df.to_csv(outfp, index=False) -except Exception as e: - print("Couldnt save file: ", l1b_basename) - print("outdir: ", outdir) - print("outfp: ", outfp) - print(e) - sys.exit() + # Save + try: + out_df = pd.concat(df_list, axis=0, ignore_index=True) + out_df.to_csv(outfp, index=False) + # Close h5py files and delete so they are note saved! + l1b_ds.close() + l2a_ds.close() + # os.remove(l1b_fp) + # os.remove(l2a_fp) + except Exception as e: + print("Couldnt save file: ", l1b_basename) + print("outdir: ", outdir) + print("outfp: ", outfp) + print(e) + sys.exit() diff --git a/run.sh b/run.sh index 1572e6a553d2e666712b9b5d2425ef8893ffed27..047fbb54049c4ec327b937634a0187c682f4a9ef 100644 --- a/run.sh +++ b/run.sh @@ -7,10 +7,11 @@ basedir=$( cd "$(dirname "$0")" ; pwd -P) source activate osgeo-env # Create output dir -OUTPUTDIR="${PWD}/output" -mkdir -p ${OUTPUTDIR} +# OUTPUTDIR="${PWD}/output" +mkdir -p output # INPUT_FILES L1B_URL=$1 L2A_URL=$2 # e.g. GEDI01_B or GEDI02_A # YR=$3 -python ${basedir}/main.py ${L1B_URL} ${L2A_URL} ${OUTPUTDIR} +# python ${basedir}/main.py ${L1B_URL} ${L2A_URL} ${OUTPUTDIR} +python ${basedir}/main.py ${L1B_URL} ${L2A_URL} output