Skip to content
Snippets Groups Projects
Commit 68d0ba49 authored by Alex Rojas's avatar Alex Rojas
Browse files

updated notebooks

parent 9f1cb68e
No related branches found
No related tags found
No related merge requests found
import sys
import h5py
import boto3
import botocore
import fsspec
import requests
from maap.maap import MAAP
maap = MAAP(maap_host="api.maap-project.org")
import os
def lpdaac_gedi_https_to_s3(url):
dir_comps = url.split("/")
return f"s3://lp-prod-protected/{dir_comps[6]}/{dir_comps[8].strip('.h5')}/{dir_comps[8]}"
def get_gedi_data(url):
credentials = maap.aws.earthdata_s3_credentials(
'https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials'
)
s3 = fsspec.filesystem(
"s3",
key=credentials['accessKeyId'],
secret=credentials['secretAccessKey'],
token=credentials['sessionToken']
)
basename = os.path.basename(url)
outfp = f"output/{basename}"
gedi_ds = h5py.File(s3.open(lpdaac_gedi_https_to_s3(url), "rb"), "r")
with h5py.File(outfp, 'w') as dst:
for obj in gedi_ds.keys():
gedi_ds.copy(obj, dst)
gedi_ds.close()
return outfp
\ No newline at end of file
import sys
import h5py
import boto3
import botocore
import fsspec
import requests
from maap.maap import MAAP
maap = MAAP(maap_host="api.maap-project.org")
import os
from get_gedi_data import get_gedi_data
if __name__ == '__main__':
l1b_url = sys.argv[1] # first index is python file name, second is arg1, etc
l2a_url = sys.argv[2] # e.g. 'GEDI01_B' or 'GEDI02_A'
outdir = sys.argv[3]
l1b_fp = get_gedi_data(l1b_url)
l2a_ds = get_gedi_data(l2a_url)
# print(l2a_ds.keys())
\ No newline at end of file
%% Cell type:code id:9803c6a0-d547-4112-8edc-eb62680360e2 tags: %% Cell type:code id:9803c6a0-d547-4112-8edc-eb62680360e2 tags:
``` python ``` python
import os import os
import re import re
from maap.maap import MAAP from maap.maap import MAAP
import re import re
# maap = MAAP(maap_host='api.maap-project.org') # maap = MAAP(maap_host='api.maap-project.org')
maap = MAAP(maap_host='api.maap-project.org') maap = MAAP(maap_host='api.maap-project.org')
``` ```
%% Output
Unable to load config file from source maap.cfg
Unable to load config file from source ./maap.cfg
Unable to load config file from source /projects/maap.cfg
%% Cell type:code id:54d3df6a-e22a-4f15-ab22-859df5d4b367 tags: %% Cell type:code id:54d3df6a-e22a-4f15-ab22-859df5d4b367 tags:
``` python ``` python
# Change to appropriate filepaths for urls # Change to appropriate filepaths for urls
# l1b_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt" # l1b_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt"
# l2a_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L2A-2022-URLS.txt" # l2a_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L2A-2022-URLS.txt"
l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt" l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt" l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"
with open(l1b_urls_fpath) as f: with open(l1b_urls_fpath) as f:
l1b_fpaths = f.read().splitlines() l1b_fpaths = f.read().splitlines()
with open(l2a_urls_fpath) as f: with open(l2a_urls_fpath) as f:
l2a_fpaths = f.read().splitlines() l2a_fpaths = f.read().splitlines()
# Get matching string pattern and run main.py # Get matching string pattern and run main.py
jobs_list = [] jobs_list = []
counter=1 counter=1
for l1b_fp in l1b_fpaths[:99]: for l1b_fp in l1b_fpaths:
# Get string pattern # Get string pattern
str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0] str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
try: try:
l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0] l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
except: except:
print("No matching L2 file!", l1b_fp) print("No matching L2 file!", l1b_fp)
continue continue
# print(l1b_fp) # print(l1b_fp)
# print(l2a_fp) # print(l2a_fp)
job = maap.submitJob(identifier="SouthAmericaGEDI", job = maap.submitJob(identifier="SouthAmericaGEDI",
algo_id="arojas_download_gedi_data", algo_id="arojas_download_gedi_data",
version="master", version="master",
username="arojearthdata", username="arojearthdata",
queue="maap-dps-worker-8gb", queue="maap-dps-worker-8gb",
L1B_URL=l1b_fp, L1B_URL=l1b_fp,
L2A_URL=l2a_fp) L2A_URL=l2a_fp)
jobs_list.append(job) jobs_list.append(job)
if counter%100==0: if counter%100==0:
print("on file num: ", counter, end='\r') print("on file num: ", counter, end='\r')
counter+=1 counter+=1
``` ```
%% Output
on file num: 400
%% Cell type:code id:d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d tags: %% Cell type:code id:d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d tags:
``` python ``` python
for job in jobs_list: for job in jobs_list:
print(job.retrieve_status()) print(job.retrieve_status())
print(job.id) print(job.id)
break break
``` ```
%% Output %% Output
Deleted Failed
204d1914-c180-49c1-8d40-1c732b1b82f3 353e658d-8bfa-4b7d-b508-24e8d42d3483
%% Cell type:code id:e5f0edb0-ffe4-4f8e-bb4f-d51686ee47b4 tags: %% Cell type:code id:e5f0edb0-ffe4-4f8e-bb4f-d51686ee47b4 tags:
``` python ``` python
``` ```
%% Cell type:code id:89d3a568-4801-4168-891f-04c65091d336 tags: %% Cell type:code id:89d3a568-4801-4168-891f-04c65091d336 tags:
``` python ``` python
# Some files didnt download, lets check # Some files didnt download, lets check
import glob import glob
indir = "/projects/my-private-bucket/dps_output/arojas_access_gedi/master" indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
file_list = [] file_list = []
for subdir, dirs, files in os.walk(indir): for subdir, dirs, files in os.walk(indir):
for file in files: for file in files:
if file.endswith(".h5"): if file.endswith(".h5"):
# fp = os.path.join(subdir, file) # fp = os.path.join(subdir, file)
# print(fp) # print(fp)
file_list.append(file) file_list.append(file)
``` ```
%% Cell type:code id:55675d70-413e-4f1e-8666-3bb4f38e3359 tags: %% Cell type:code id:55675d70-413e-4f1e-8666-3bb4f38e3359 tags:
``` python ``` python
len(file_list) len(file_list)
``` ```
%% Output %% Output
840 74
%% Cell type:code id:7513ce41-f4ea-44e2-9d76-ea3b857c8069 tags: %% Cell type:code id:7513ce41-f4ea-44e2-9d76-ea3b857c8069 tags:
``` python ``` python
``` ```
%% Cell type:code id:cd11283a-63f1-4893-828e-6cd72f646ef1 tags: %% Cell type:code id:cd11283a-63f1-4893-828e-6cd72f646ef1 tags:
``` python ``` python
# rerun and get missed files
# print("Waiting some time to run this cell...")
# time.sleep(3600)
print("Running this cell now!")
# Get all CSV files # Get all CSV files
indir = "/projects/my-private-bucket/dps_output/arojas_access_gedi/master" indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
file_list = [] file_list = []
for subdir, dirs, files in os.walk(indir): for subdir, dirs, files in os.walk(indir):
for file in files: for file in files:
if file.endswith(".h5"): if file.endswith(".h5"):
# fp = os.path.join(subdir, file) # fp = os.path.join(subdir, file)
# print(fp) # print(fp)
file_list.append(file) file_list.append(file)
# Change to appropriate filepaths for urls # Change to appropriate filepaths for urls
l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt" l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt" l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"
with open(l1b_urls_fpath) as f: with open(l1b_urls_fpath) as f:
l1b_fpaths = f.readlines() l1b_fpaths = f.readlines()
with open(l2a_urls_fpath) as f: with open(l2a_urls_fpath) as f:
l2a_fpaths = f.readlines() l2a_fpaths = f.readlines()
print("starting loop")
# Get matching string pattern and run main.py # Get matching string pattern and run main.py
jobs_list = [] jobs_list = []
counter=1 counter=1
for l1b_fp in l1b_fpaths: for l1b_fp in l1b_fpaths:
# Get string pattern # Get string pattern
str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0] str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
try: try:
l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0] l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
except: except:
print("No matching L2 file!", l1b_fp) print("No matching L2 file!", l1b_fp)
continue continue
# Check if file was already downloaded # Check if file was already downloaded
date_str = os.path.basename(l1b_fp).split("_")[2] date_str = os.path.basename(l1b_fp).split("_")[2]
if any(date_str in x for x in file_list): if any(date_str in x for x in file_list):
continue continue
# Submit job # Submit job
job = maap.submitJob(identifier="SouthAmericaGEDI", job = maap.submitJob(identifier="SouthAmericaGEDI",
algo_id="arojas_access_gedi", algo_id="arojas_download_gedi_data",
version="master", version="master",
username="arojearthdata", username="arojearthdata",
queue="maap-dps-worker-8gb", queue="maap-dps-worker-8gb",
L1B_URL=l1b_fp, L1B_URL=l1b_fp,
L2A_URL=l2a_fp) L2A_URL=l2a_fp)
jobs_list.append(job) jobs_list.append(job)
if counter%100==0: if counter%100==0:
print("on file num: ", counter, end='\r') print("on file num: ", counter, end='\r')
counter+=1 counter+=1
print('Done') print('Done')
``` ```
%% Output %% Output
Running this cell now! starting loop
Done Doneile num: 400
%% Cell type:code id:0f17583e-20da-4dea-998c-9e0139700400 tags: %% Cell type:code id:0f17583e-20da-4dea-998c-9e0139700400 tags:
``` python ``` python
``` ```
%% Cell type:code id:35ff40f2-65f8-414d-a53f-e8e0f7487557 tags: %% Cell type:code id:35ff40f2-65f8-414d-a53f-e8e0f7487557 tags:
``` python ``` python
## Condense files into new single folder ## Condense files into new single folder
``` ```
%% Cell type:code id:7c980c00-9823-4043-b235-4a837b8b92fa tags: %% Cell type:code id:7c980c00-9823-4043-b235-4a837b8b92fa tags:
``` python ``` python
import shutil import shutil
# shutil.move("path/to/current/file.foo", "path/to/new/destination/for/file.foo") # shutil.move("path/to/current/file.foo", "path/to/new/destination/for/file.foo")
``` ```
%% Cell type:code id:3d80ac20-b71e-486f-b153-6d19c3fd1fc2 tags: %% Cell type:code id:3d80ac20-b71e-486f-b153-6d19c3fd1fc2 tags:
``` python ``` python
outdir = "/projects/my-private-bucket/GEDI/raw/SouthAmerica" outdir = "/projects/my-private-bucket/GEDI/raw/SouthAmerica"
for fp in csv_list: for fp in csv_list:
basename = os.path.basename(fp) basename = os.path.basename(fp)
if "GEDI01_B" in basename: if "GEDI01_B" in basename:
outfp = os.path.join(outdir, "L1B", basename) outfp = os.path.join(outdir, "L1B", basename)
elif "GEDI02_A" in basename: elif "GEDI02_A" in basename:
outfp = os.path.join(outdir, "L2A", basename) outfp = os.path.join(outdir, "L2A", basename)
shutil.move(fp, outfp) shutil.move(fp, outfp)
print("DONE") print("DONE")
``` ```
%% Cell type:code id:4c3f7b66-e482-427f-85ba-19c6d2786e16 tags: %% Cell type:code id:4c3f7b66-e482-427f-85ba-19c6d2786e16 tags:
``` python ``` python
## Check all files in new folder! ## Check all files in new folder!
indir = "/projects/my-private-bucket/GEDI/biomass/2022" indir = "/projects/my-private-bucket/GEDI/biomass/2022"
csv_list = glob.glob(os.path.join(indir, "*")) csv_list = glob.glob(os.path.join(indir, "*"))
print(len(csv_list)) print(len(csv_list))
``` ```
%% Cell type:code id:f63c64c5-4688-43f4-8139-bfc68236d4f4 tags: %% Cell type:code id:f63c64c5-4688-43f4-8139-bfc68236d4f4 tags:
``` python ``` python
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment