From f85143859adc0e99ced7e4208435ed97ddf1d716 Mon Sep 17 00:00:00 2001 From: Alex Rojas <a.rojas8907@gmail.com> Date: Fri, 1 Dec 2023 04:59:57 +0000 Subject: [PATCH] changed to access data using maap.py package with temporary credentials. --- __pycache__/get_gedi_data.cpython-312.pyc | Bin 1400 -> 1920 bytes get_gedi_data.py | 15 +++++--- main.py | 40 ++++++++++++---------- run.sh | 7 ++-- 4 files changed, 36 insertions(+), 26 deletions(-) diff --git a/__pycache__/get_gedi_data.cpython-312.pyc b/__pycache__/get_gedi_data.cpython-312.pyc index fde21e3b2669504f0372e45cd2a8817f1f243326..bb8e2b51b5bf9b27091d82bf6cb2c4d04f0e57ab 100644 GIT binary patch delta 992 zcmZuw-%ry}6h61NUDvH;Fn(nqpp2+X&}j&24B-VZBtm@fi5Ww>wZP^^=WUmX+1xZF zNTPwI50V%YUJd>OKB|wtV9+=_d{O@Zghb4qyceyDiMQ!F-#zC$-|amo{pl+Ql(&lF z1G?&yN&Qtt38A^lsdE=mT8If6Xk6Np5o00(4s=n&FZeCQ$N(`f&7!mvlQdx&Vm{47 zt-o^b3#!{HdMpGeN3jpk9QOm+H@KCaqeEC_N@n)oWh2~HDu63q#F|*ZPeJoO!$sa| zamlHmiyFSbrP>=i7g?PyYN%?`7WX@<`CL>K{;HSYzw*51cge$0^b~|t$dzvRtv_%a zZngmwMat@KWL;Dc3m#Vs!J`fyB%ocS4~vd78Uk9{;}$f$djQSz*C7n&!9;lu!3?dS zQr&m(4CGNnwkPm|uDha<v$8oWN+hb_a3t;^;-GvaU~l8gCC)G%KC4;_jyJBF`l6cA z9e!aryX5d!l4**KtiDLt03H68zC_6hgC>PU+A#Hqm$3Oc{shrAebGv)X_K(AkT44i z^yIm$BhKqua#k}PteKXBjd;rR()4k*`squW7+IOut=Zp9)lx~6#@MLMZ+42mdQyhw z;97n)|1r?_F3`7$cLD?D$!b&k`oel_!`zm`<%!*<p#75XE$vFmT4*)2KD-g$k$U%l z6T+L}YHQb%k;;fYx(`SSKOC#dt!q=OQyZ-ti4SuBcT9tam@@Y~Ajm7z_otVyRsGH7 zsY8hcnMU1YSA9KrdMt|HM1`?SVnlYZxnz<7_W45Gu#6$v#1i!hgXpA>g}p?-ozqRr zbfk=`X3sK1FwKx-%$#6`HTUSg&w(^E%=%IHj${lim)1wg1-fNFnmI(x9!3a#1z!#P oHIQo%sDVQ5XXyGQ`nNj<-*<{<%6tu^eGi9b(SbLJT8>nI0Xyl|MF0Q* delta 455 zcmYL_J4?e*6vyu?uck4y1;vUFL?{(2qPU1Sx`<r_hb}d3u6@`x?M)miibVun6iPmY z)NkVA;Gl^v4$h*BIC*bM@eKcS&+k0BJ@Y4KJQzj{aJycu(tO{Tg!}!>N(MEgrd*IA zK!D0Nz9$1HiU24mwsZ*!sx5QX`nT`!#P7fxX^iJIAEQI`0sJF$rRz~SmwMb!;M4@F zn<Mh?y@cPvsVQZEhRjw+uQPm#_dr(jDY?)B#7Y5VJc8230jaLHy8+p*HE2MbHf=Gn zJWH5Z8R%t3ZQAl`R>NfpL6i!$Yen0236JKuF+*UZLcQWJWqB4`;7oAW@n5AKk_2YT z-<4<1pY>!f&&2K7Sb*KG%VtHlRB}9LEl`CkJB+evLA9o3wU>o3%wv`oBCd<Z-+^dO zwEkD!Y&!OFgKn`!o)^FC=D2)f2w@0fA<#n*55c?{0^<#&UljdueCbKa^~e`Nun50Z I6Q-hhf7beDY5)KL diff --git a/get_gedi_data.py b/get_gedi_data.py index 4ab61b4..33e3a06 100644 --- a/get_gedi_data.py +++ b/get_gedi_data.py @@ -1,10 +1,14 @@ + import sys import h5py import boto3 import botocore import fsspec +import requests from maap.maap import MAAP maap = MAAP(maap_host="api.maap-project.org") +import os + def lpdaac_gedi_https_to_s3(url): dir_comps = url.split("/") @@ -21,8 +25,11 @@ def get_gedi_data(url): secret=credentials['secretAccessKey'], token=credentials['sessionToken'] ) - # with s3.open(lpdaac_gedi_https_to_s3(url), "rb") as f: - # gedi_ds = h5py.File(f, "r") - # return gedi_ds + basename = os.path.basename(url) + outfp = f"output/{basename}.h5" gedi_ds = h5py.File(s3.open(lpdaac_gedi_https_to_s3(url), "rb"), "r") - return gedi_ds \ No newline at end of file + with h5py.File(outfp, 'w') as dst: + for obj in gedi_ds.keys(): + gedi_ds.copy(obj, dst) + gedi_ds.close() + return outfp \ No newline at end of file diff --git a/main.py b/main.py index 1d7ce1a..e96daee 100644 --- a/main.py +++ b/main.py @@ -94,9 +94,7 @@ if __name__ == '__main__': l1b_url = sys.argv[1] # first index is python file name, second is arg1, etc l2a_url = sys.argv[2] # e.g. 'GEDI01_B' or 'GEDI02_A' outdir = sys.argv[3] - #Download L1B and L2a - # download_gedi(l1b_url,"GEDI01_B") - # download_gedi(l2a_url,"GEDI02_A") + # Get filenames for downloaded gedi l1b_basename = os.path.basename(l1b_url) l2a_basename = os.path.basename(l2a_url) @@ -113,11 +111,10 @@ if __name__ == '__main__': print(l1b_basename) print(l2a_basename) CWD = os.path.dirname(os.path.abspath(__file__)) - l1b_ds = get_gedi_data(l1b_url) - l2a_ds = get_gedi_data(l2a_url) - - # l1b_ds = h5py.File(os.path.join(CWD, f"{l1b_basename}")) - # l2a_ds = h5py.File(os.path.join(CWD, f"{l2a_basename}")) + l1b_fp = get_gedi_data(l1b_url) + l2a_fp = get_gedi_data(l2a_url) + l1b_ds = h5py.File(l1b_fp, "r") + l2a_ds = h5py.File(l2a_fp, "r") except Exception as e: # Some raw L1B files are corrupt? print("Corrupt file: ", l1b_basename) @@ -127,7 +124,7 @@ if __name__ == '__main__': orbit_num = re.findall("O[0-9]{5}", l1b_basename)[0] track_num = re.findall("T[0-9]{5}", l1b_basename)[0] date_str = re.findall("[0-9]{13}", l1b_basename)[0] - # outfp = os.path.join(outdir, "output",f"GEDI_bioindex_{date_str}_{orbit_num}_{track_num}.csv") # already saves to output + # outfp = os.path.join(outdir, "output",f"GEDI_bioindex_{date_str}_{orbit_num}_{track_num}.csv") outfp = os.path.join(outdir, f"GEDI_bioindex_{date_str}_{orbit_num}_{track_num}.csv") @@ -199,14 +196,19 @@ if __name__ == '__main__': # append to df_list df_list.append(new_df) -# Save -try: - out_df = pd.concat(df_list, axis=0, ignore_index=True) - out_df.to_csv(outfp, index=False) -except Exception as e: - print("Couldnt save file: ", l1b_basename) - print("outdir: ", outdir) - print("outfp: ", outfp) - print(e) - sys.exit() + # Save + try: + out_df = pd.concat(df_list, axis=0, ignore_index=True) + out_df.to_csv(outfp, index=False) + # Close h5py files and delete so they are note saved! + l1b_ds.close() + l2a_ds.close() + # os.remove(l1b_fp) + # os.remove(l2a_fp) + except Exception as e: + print("Couldnt save file: ", l1b_basename) + print("outdir: ", outdir) + print("outfp: ", outfp) + print(e) + sys.exit() diff --git a/run.sh b/run.sh index 1572e6a..047fbb5 100644 --- a/run.sh +++ b/run.sh @@ -7,10 +7,11 @@ basedir=$( cd "$(dirname "$0")" ; pwd -P) source activate osgeo-env # Create output dir -OUTPUTDIR="${PWD}/output" -mkdir -p ${OUTPUTDIR} +# OUTPUTDIR="${PWD}/output" +mkdir -p output # INPUT_FILES L1B_URL=$1 L2A_URL=$2 # e.g. GEDI01_B or GEDI02_A # YR=$3 -python ${basedir}/main.py ${L1B_URL} ${L2A_URL} ${OUTPUTDIR} +# python ${basedir}/main.py ${L1B_URL} ${L2A_URL} ${OUTPUTDIR} +python ${basedir}/main.py ${L1B_URL} ${L2A_URL} output -- GitLab