Skip to content
Snippets Groups Projects
Commit 0e7907e6 authored by Alex Rojas's avatar Alex Rojas
Browse files

updated to add print statements

parent 66021774
No related branches found
No related tags found
No related merge requests found
...@@ -12,6 +12,7 @@ import os ...@@ -12,6 +12,7 @@ import os
def lpdaac_gedi_https_to_s3(url): def lpdaac_gedi_https_to_s3(url):
dir_comps = url.split("/") dir_comps = url.split("/")
print(dir_comps)
return f"s3://lp-prod-protected/{dir_comps[6]}/{dir_comps[8].strip('.h5')}/{dir_comps[8]}" return f"s3://lp-prod-protected/{dir_comps[6]}/{dir_comps[8].strip('.h5')}/{dir_comps[8]}"
def get_gedi_data(url): def get_gedi_data(url):
......
source activate osgeo-env # source activate osgeo-env
source activate /projects/env/osgeo-env
# conda activate /projects/env/osgeo-env
# Get current location of build script # Get current location of build script
basedir=$( cd "$(dirname "$0")" ; pwd -P ) basedir=$( cd "$(dirname "$0")" ; pwd -P )
......
File added
...@@ -12,6 +12,7 @@ import os ...@@ -12,6 +12,7 @@ import os
def lpdaac_gedi_https_to_s3(url): def lpdaac_gedi_https_to_s3(url):
dir_comps = url.split("/") dir_comps = url.split("/")
print(dir_comps)
return f"s3://lp-prod-protected/{dir_comps[6]}/{dir_comps[8].strip('.h5')}/{dir_comps[8]}" return f"s3://lp-prod-protected/{dir_comps[6]}/{dir_comps[8].strip('.h5')}/{dir_comps[8]}"
def get_gedi_data(url): def get_gedi_data(url):
......
%% Cell type:code id:9803c6a0-d547-4112-8edc-eb62680360e2 tags: %% Cell type:code id:9803c6a0-d547-4112-8edc-eb62680360e2 tags:
``` python ``` python
import os import os
import re import re
from maap.maap import MAAP from maap.maap import MAAP
import re import re
# maap = MAAP(maap_host='api.maap-project.org') # maap = MAAP(maap_host='api.maap-project.org')
maap = MAAP(maap_host='api.maap-project.org') maap = MAAP(maap_host='api.maap-project.org')
import time
``` ```
%% Cell type:code id:96d5cdce-a2cf-4ec5-8671-f9edd500ab8b tags: %% Cell type:code id:96d5cdce-a2cf-4ec5-8671-f9edd500ab8b tags:
``` python ``` python
#maap.register_algorithm_from_yaml_file("/projects/arojas_download_gedi_data.yml") #maap.register_algorithm_from_yaml_file("/projects/arojas_download_gedi_data.yml")
``` ```
%% Output
<Response [200]>
%% Cell type:code id:54d3df6a-e22a-4f15-ab22-859df5d4b367 tags: %% Cell type:code id:54d3df6a-e22a-4f15-ab22-859df5d4b367 tags:
``` python ``` python
# Change to appropriate filepaths for urls # Change to appropriate filepaths for urls
# l1b_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt" # l1b_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt"
# l2a_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L2A-2022-URLS.txt" # l2a_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L2A-2022-URLS.txt"
l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt" l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt" l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"
with open(l1b_urls_fpath) as f: with open(l1b_urls_fpath) as f:
l1b_fpaths = f.read().splitlines() l1b_fpaths = f.read().splitlines()
with open(l2a_urls_fpath) as f: with open(l2a_urls_fpath) as f:
l2a_fpaths = f.read().splitlines() l2a_fpaths = f.read().splitlines()
# Get matching string pattern and run main.py # Get matching string pattern and run main.py
jobs_list = [] jobs_list = []
counter=1 counter=1
for l1b_fp in l1b_fpaths: for l1b_fp in l1b_fpaths[:10]:
# Get string pattern # Get string pattern
str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0] str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
try: try:
l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0] l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
except: except:
print("No matching L2 file!", l1b_fp) print("No matching L2 file!", l1b_fp)
continue continue
# print(l1b_fp) # print(l1b_fp)
# print(l2a_fp) # print(l2a_fp)
job = maap.submitJob(identifier="SouthAmericaGEDI-run-20240117", job = maap.submitJob(identifier="SouthAmericaGEDI-run-20240117",
algo_id="arojas_download_gedi_data", algo_id="arojas_download_gedi_data",
version="master", version="master",
username="arojearthdata", username="arojearthdata",
queue="maap-dps-worker-8gb", queue="maap-dps-worker-16gb",
L1B_URL=l1b_fp, L1B_URL=l1b_fp,
L2A_URL=l2a_fp) L2A_URL=l2a_fp)
jobs_list.append(job) jobs_list.append(job)
if counter%100==0: if counter%100==0:
print("on file num: ", counter, end='\r') print("on file num: ", counter, end='\r')
time.sleep(900)
counter+=1 counter+=1
``` ```
%% Cell type:code id:d71d17cf-9a79-4508-9047-dade9b64201b tags:
``` python
for l1b_fp in l1b_fpaths[:10]:
# Get string pattern
str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
print(l1b_fp)
print(l2a_fp)
break
```
%% Output %% Output
on file num: 400 https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI01_B.002/2020.08.31/GEDI01_B_2020244155754_O09740_04_T06708_02_005_01_V002.h5
https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_A.002/2020.08.31/GEDI02_A_2020244155754_O09740_04_T06708_02_003_01_V002.h5
%% Cell type:code id:f1dce543-f916-4561-990b-ae8c3fdb7ad7 tags:
``` python
```
%% Cell type:code id:d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d tags: %% Cell type:code id:d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d tags:
``` python ``` python
for job in jobs_list[:10]: for job in jobs_list[:20]:
print(job.retrieve_status()) print(job.retrieve_status())
print(job.id) print(job.id)
``` ```
%% Output %% Output
Running Succeeded
9efde8c0-b4aa-4d4b-9972-55b72be802a6 f70b1586-e3ac-4155-a73e-49f5386c7e2c
Running Succeeded
d101a1b9-c44e-44e8-931a-6fe9f436e03d 10279c23-3225-460e-8cf9-6364fa0ec2a0
Running Succeeded
acc12502-5cfb-4f05-b05b-2f57657d2f53 fe212a29-9da4-4a72-a02d-94a37750e1fa
Running Succeeded
126548ea-2fd0-434a-b040-66a0a45d8f9c 7855a6a3-0416-41f6-b546-11eab9097adc
Running Succeeded
325641a2-f0f5-40a7-8904-c023d71a7b55 8c343834-6622-404e-9b83-d4dd771a151f
Running Succeeded
a9039b0e-f678-403a-8ff2-576adb138199 cc412cd5-1491-4dad-bc19-63ae70f9b6af
Accepted Succeeded
a99753e5-3ceb-4b25-b1e5-e1c154d2235d 86b301cf-5b37-43af-b2f5-cb51d5f6b1c3
Accepted Succeeded
b985c8ff-77ae-41c9-8b00-d1e9ee0cc717 ec0e4005-b511-4d93-8fac-a0ff34682b1f
Accepted Succeeded
d70e4917-323f-48e8-976d-6c94b08277a2 30f06fb8-aab6-4e74-aba4-e7a5d76ac100
Accepted Succeeded
0fe1d4f4-cb0c-41ae-99ad-0e9d28f7e695 30c44930-66fa-4215-b877-517a1e6472b1
Succeeded
c1e9fd05-241a-4bea-8a85-3fc8992cbf67
Succeeded
984eab57-449c-4fce-b490-b6ef05cd80f2
Succeeded
da048416-13f5-4a00-b7e0-76bff20bfa12
Succeeded
cc8d47fc-7615-4ba2-adbc-276b5f45c75a
Succeeded
c42d1afe-3d55-40d3-af4a-8ed98bc98492
Succeeded
e39ebed1-9694-4dfb-bafb-5249c7fca41e
Succeeded
656d0e68-ab9e-42a2-95b4-7fa45503cd8a
Succeeded
5c36624d-c33b-4756-9436-cac71ebb7ae9
Succeeded
1a5b59c1-c26e-4028-b6d9-54b7676f3c67
Succeeded
c2f3e181-22c5-460c-a4ac-d2fd2694f96e
%% Cell type:code id:e5f0edb0-ffe4-4f8e-bb4f-d51686ee47b4 tags: %% Cell type:code id:e5f0edb0-ffe4-4f8e-bb4f-d51686ee47b4 tags:
``` python ``` python
``` ```
%% Cell type:code id:89d3a568-4801-4168-891f-04c65091d336 tags: %% Cell type:code id:89d3a568-4801-4168-891f-04c65091d336 tags:
``` python ``` python
# Some files didnt download, lets check # Some files didnt download, lets check
import glob import glob
indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master" indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
file_list = [] file_list = []
for subdir, dirs, files in os.walk(indir): for subdir, dirs, files in os.walk(indir):
for file in files: for file in files:
if file.endswith(".h5"): if file.endswith(".h5"):
# fp = os.path.join(subdir, file) # fp = os.path.join(subdir, file)
# print(fp) # print(fp)
file_list.append(file) file_list.append(os.path.join(subdir, file))
``` ```
%% Cell type:code id:55675d70-413e-4f1e-8666-3bb4f38e3359 tags: %% Cell type:code id:7dde8135-d6b6-43ce-b2c4-fd8d0cda4077 tags:
``` python ``` python
len(file_list) len(file_list)
``` ```
%% Output %% Output
74 918
%% Cell type:code id:7513ce41-f4ea-44e2-9d76-ea3b857c8069 tags: %% Cell type:code id:7513ce41-f4ea-44e2-9d76-ea3b857c8069 tags:
``` python ``` python
``` ```
%% Cell type:code id:cd11283a-63f1-4893-828e-6cd72f646ef1 tags: %% Cell type:code id:cd11283a-63f1-4893-828e-6cd72f646ef1 tags:
``` python ``` python
# rerun and get missed files # rerun and get missed files
# Get all CSV files # Get all CSV files
indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master" indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
file_list = [] file_list = []
for subdir, dirs, files in os.walk(indir): for subdir, dirs, files in os.walk(indir):
for file in files: for file in files:
if file.endswith(".h5"): if file.endswith(".h5"):
# fp = os.path.join(subdir, file) # fp = os.path.join(subdir, file)
# print(fp) # print(fp)
file_list.append(file) file_list.append(file)
# Change to appropriate filepaths for urls # Change to appropriate filepaths for urls
l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt" l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt" l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"
with open(l1b_urls_fpath) as f: with open(l1b_urls_fpath) as f:
l1b_fpaths = f.readlines() l1b_fpaths = f.readlines()
with open(l2a_urls_fpath) as f: with open(l2a_urls_fpath) as f:
l2a_fpaths = f.readlines() l2a_fpaths = f.readlines()
print("starting loop") print("starting loop")
# Get matching string pattern and run main.py # Get matching string pattern and run main.py
jobs_list = [] jobs_list = []
counter=1 counter=1
for l1b_fp in l1b_fpaths: for l1b_fp in l1b_fpaths:
# Get string pattern # Get string pattern
str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0] str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
try: try:
l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0] l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
except: except:
print("No matching L2 file!", l1b_fp) print("No matching L2 file!", l1b_fp)
continue continue
# Check if file was already downloaded # Check if file was already downloaded
date_str = os.path.basename(l1b_fp).split("_")[2] date_str = os.path.basename(l1b_fp).split("_")[2]
if any(date_str in x for x in file_list): if any(date_str in x for x in file_list):
continue continue
# Submit job # Submit job
job = maap.submitJob(identifier="SouthAmericaGEDI", job = maap.submitJob(identifier="SouthAmericaGEDI",
algo_id="arojas_download_gedi_data", algo_id="arojas_download_gedi_data",
version="master", version="master",
username="arojearthdata", username="arojearthdata",
queue="maap-dps-worker-8gb", queue="maap-dps-worker-8gb",
L1B_URL=l1b_fp, L1B_URL=l1b_fp,
L2A_URL=l2a_fp) L2A_URL=l2a_fp)
jobs_list.append(job) jobs_list.append(job)
if counter%100==0: if counter%100==0:
print("on file num: ", counter, end='\r') print("on file num: ", counter, end='\r')
counter+=1 counter+=1
print('Done') print('Done')
``` ```
%% Output %% Output
starting loop starting loop
Doneile num: 400 Doneile num: 400
%% Cell type:code id:0f17583e-20da-4dea-998c-9e0139700400 tags: %% Cell type:code id:0f17583e-20da-4dea-998c-9e0139700400 tags:
``` python ``` python
``` ```
%% Cell type:code id:35ff40f2-65f8-414d-a53f-e8e0f7487557 tags: %% Cell type:code id:35ff40f2-65f8-414d-a53f-e8e0f7487557 tags:
``` python ``` python
## Condense files into new single folder ## Condense files into new single folder
``` ```
%% Cell type:code id:7c980c00-9823-4043-b235-4a837b8b92fa tags: %% Cell type:code id:7c980c00-9823-4043-b235-4a837b8b92fa tags:
``` python ``` python
import shutil import shutil
# shutil.move("path/to/current/file.foo", "path/to/new/destination/for/file.foo") # shutil.move("path/to/current/file.foo", "path/to/new/destination/for/file.foo")
``` ```
%% Cell type:code id:3d80ac20-b71e-486f-b153-6d19c3fd1fc2 tags: %% Cell type:code id:3d80ac20-b71e-486f-b153-6d19c3fd1fc2 tags:
``` python ``` python
outdir = "/projects/my-private-bucket/GEDI/raw/SouthAmerica" outdir = "/projects/my-private-bucket/GEDI/raw/SouthAmerica"
for fp in csv_list: for fp in csv_list:
basename = os.path.basename(fp) basename = os.path.basename(fp)
if "GEDI01_B" in basename: if "GEDI01_B" in basename:
outfp = os.path.join(outdir, "L1B", basename) outfp = os.path.join(outdir, "L1B", basename)
elif "GEDI02_A" in basename: elif "GEDI02_A" in basename:
outfp = os.path.join(outdir, "L2A", basename) outfp = os.path.join(outdir, "L2A", basename)
shutil.move(fp, outfp) shutil.move(fp, outfp)
print("DONE") print("DONE")
``` ```
%% Cell type:code id:4c3f7b66-e482-427f-85ba-19c6d2786e16 tags: %% Cell type:code id:4c3f7b66-e482-427f-85ba-19c6d2786e16 tags:
``` python ``` python
## Check all files in new folder! ## Check all files in new folder!
indir = "/projects/my-private-bucket/GEDI/biomass/2022" indir = "/projects/my-private-bucket/GEDI/biomass/2022"
csv_list = glob.glob(os.path.join(indir, "*")) csv_list = glob.glob(os.path.join(indir, "*"))
print(len(csv_list)) print(len(csv_list))
``` ```
%% Cell type:code id:f63c64c5-4688-43f4-8139-bfc68236d4f4 tags: %% Cell type:code id:f63c64c5-4688-43f4-8139-bfc68236d4f4 tags:
``` python ``` python
``` ```
......
%% Cell type:code id:9803c6a0-d547-4112-8edc-eb62680360e2 tags: %% Cell type:code id:9803c6a0-d547-4112-8edc-eb62680360e2 tags:
``` python ``` python
import os import os
import re import re
from maap.maap import MAAP from maap.maap import MAAP
import re import re
# maap = MAAP(maap_host='api.maap-project.org') # maap = MAAP(maap_host='api.maap-project.org')
maap = MAAP(maap_host='api.maap-project.org') maap = MAAP(maap_host='api.maap-project.org')
import time
``` ```
%% Cell type:code id:96d5cdce-a2cf-4ec5-8671-f9edd500ab8b tags: %% Cell type:code id:96d5cdce-a2cf-4ec5-8671-f9edd500ab8b tags:
``` python ``` python
#maap.register_algorithm_from_yaml_file("/projects/arojas_download_gedi_data.yml") #maap.register_algorithm_from_yaml_file("/projects/arojas_download_gedi_data.yml")
``` ```
%% Cell type:code id:54d3df6a-e22a-4f15-ab22-859df5d4b367 tags: %% Cell type:code id:54d3df6a-e22a-4f15-ab22-859df5d4b367 tags:
``` python ``` python
# Change to appropriate filepaths for urls # Change to appropriate filepaths for urls
# l1b_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt" # l1b_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt"
# l2a_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L2A-2022-URLS.txt" # l2a_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L2A-2022-URLS.txt"
l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt" l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt" l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"
with open(l1b_urls_fpath) as f: with open(l1b_urls_fpath) as f:
l1b_fpaths = f.read().splitlines() l1b_fpaths = f.read().splitlines()
with open(l2a_urls_fpath) as f: with open(l2a_urls_fpath) as f:
l2a_fpaths = f.read().splitlines() l2a_fpaths = f.read().splitlines()
# Get matching string pattern and run main.py # Get matching string pattern and run main.py
jobs_list = [] jobs_list = []
counter=1 counter=1
for l1b_fp in l1b_fpaths[:10]: for l1b_fp in l1b_fpaths[:10]:
# Get string pattern # Get string pattern
str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0] str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
try: try:
l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0] l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
except: except:
print("No matching L2 file!", l1b_fp) print("No matching L2 file!", l1b_fp)
continue continue
# print(l1b_fp) # print(l1b_fp)
# print(l2a_fp) # print(l2a_fp)
job = maap.submitJob(identifier="SouthAmericaGEDI-run-20240117", job = maap.submitJob(identifier="SouthAmericaGEDI-run-20240117",
algo_id="arojas_download_gedi_data", algo_id="arojas_download_gedi_data",
version="master", version="master",
username="arojearthdata", username="arojearthdata",
queue="maap-dps-worker-16gb", queue="maap-dps-worker-16gb",
L1B_URL=l1b_fp, L1B_URL=l1b_fp,
L2A_URL=l2a_fp) L2A_URL=l2a_fp)
jobs_list.append(job) jobs_list.append(job)
if counter%100==0: if counter%100==0:
print("on file num: ", counter, end='\r') print("on file num: ", counter, end='\r')
time.sleep(900)
counter+=1 counter+=1
``` ```
%% Cell type:code id:d71d17cf-9a79-4508-9047-dade9b64201b tags:
``` python
for l1b_fp in l1b_fpaths[:10]:
# Get string pattern
str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
print(l1b_fp)
print(l2a_fp)
break
```
%% Output
https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI01_B.002/2020.08.31/GEDI01_B_2020244155754_O09740_04_T06708_02_005_01_V002.h5
https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_A.002/2020.08.31/GEDI02_A_2020244155754_O09740_04_T06708_02_003_01_V002.h5
%% Cell type:code id:f1dce543-f916-4561-990b-ae8c3fdb7ad7 tags:
``` python
```
%% Cell type:code id:d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d tags: %% Cell type:code id:d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d tags:
``` python ``` python
for job in jobs_list[:10]: for job in jobs_list[:20]:
print(job.retrieve_status()) print(job.retrieve_status())
print(job.id) print(job.id)
``` ```
%% Output %% Output
Accepted Succeeded
54dc2f4c-eb12-4720-a83b-5b2e3548a94f f70b1586-e3ac-4155-a73e-49f5386c7e2c
Accepted Succeeded
6cc81082-0bc8-4d31-b295-62c4135b9b4b 10279c23-3225-460e-8cf9-6364fa0ec2a0
Accepted Succeeded
1b7bf749-78f6-49ee-9e84-71783e8d5449 fe212a29-9da4-4a72-a02d-94a37750e1fa
Accepted Succeeded
6e09b33b-8159-4449-9654-ccf29e6b6486 7855a6a3-0416-41f6-b546-11eab9097adc
Accepted Succeeded
774d6139-212f-4841-8f1b-13a5eadda4bc 8c343834-6622-404e-9b83-d4dd771a151f
Accepted Succeeded
bbdb0764-3dc2-458c-aa8d-2151e03125c5 cc412cd5-1491-4dad-bc19-63ae70f9b6af
Accepted Succeeded
1c5faed3-2c03-4260-ac4e-7478007c0b0d 86b301cf-5b37-43af-b2f5-cb51d5f6b1c3
Accepted Succeeded
c92b2edf-2c1b-4b76-8e4d-d18634b7603d ec0e4005-b511-4d93-8fac-a0ff34682b1f
Accepted Succeeded
67ed03d8-7d9a-41a7-8858-55f58012a00e 30f06fb8-aab6-4e74-aba4-e7a5d76ac100
Accepted Succeeded
85c052ca-1269-4335-9559-63b2475a069f 30c44930-66fa-4215-b877-517a1e6472b1
Succeeded
c1e9fd05-241a-4bea-8a85-3fc8992cbf67
Succeeded
984eab57-449c-4fce-b490-b6ef05cd80f2
Succeeded
da048416-13f5-4a00-b7e0-76bff20bfa12
Succeeded
cc8d47fc-7615-4ba2-adbc-276b5f45c75a
Succeeded
c42d1afe-3d55-40d3-af4a-8ed98bc98492
Succeeded
e39ebed1-9694-4dfb-bafb-5249c7fca41e
Succeeded
656d0e68-ab9e-42a2-95b4-7fa45503cd8a
Succeeded
5c36624d-c33b-4756-9436-cac71ebb7ae9
Succeeded
1a5b59c1-c26e-4028-b6d9-54b7676f3c67
Succeeded
c2f3e181-22c5-460c-a4ac-d2fd2694f96e
%% Cell type:code id:e5f0edb0-ffe4-4f8e-bb4f-d51686ee47b4 tags: %% Cell type:code id:e5f0edb0-ffe4-4f8e-bb4f-d51686ee47b4 tags:
``` python ``` python
``` ```
%% Cell type:code id:89d3a568-4801-4168-891f-04c65091d336 tags: %% Cell type:code id:89d3a568-4801-4168-891f-04c65091d336 tags:
``` python ``` python
# Some files didnt download, lets check # Some files didnt download, lets check
import glob import glob
indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master" indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
file_list = [] file_list = []
for subdir, dirs, files in os.walk(indir): for subdir, dirs, files in os.walk(indir):
for file in files: for file in files:
if file.endswith(".h5"): if file.endswith(".h5"):
# fp = os.path.join(subdir, file) # fp = os.path.join(subdir, file)
# print(fp) # print(fp)
file_list.append(file) file_list.append(os.path.join(subdir, file))
``` ```
%% Cell type:code id:55675d70-413e-4f1e-8666-3bb4f38e3359 tags: %% Cell type:code id:7dde8135-d6b6-43ce-b2c4-fd8d0cda4077 tags:
``` python ``` python
len(file_list) len(file_list)
``` ```
%% Output %% Output
74 0
%% Cell type:code id:7513ce41-f4ea-44e2-9d76-ea3b857c8069 tags: %% Cell type:code id:7513ce41-f4ea-44e2-9d76-ea3b857c8069 tags:
``` python ``` python
``` ```
%% Cell type:code id:cd11283a-63f1-4893-828e-6cd72f646ef1 tags: %% Cell type:code id:cd11283a-63f1-4893-828e-6cd72f646ef1 tags:
``` python ``` python
# rerun and get missed files # rerun and get missed files
# Get all CSV files # Get all CSV files
indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master" indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
file_list = [] file_list = []
for subdir, dirs, files in os.walk(indir): for subdir, dirs, files in os.walk(indir):
for file in files: for file in files:
if file.endswith(".h5"): if file.endswith(".h5"):
# fp = os.path.join(subdir, file) # fp = os.path.join(subdir, file)
# print(fp) # print(fp)
file_list.append(file) file_list.append(file)
# Change to appropriate filepaths for urls # Change to appropriate filepaths for urls
l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt" l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt" l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"
with open(l1b_urls_fpath) as f: with open(l1b_urls_fpath) as f:
l1b_fpaths = f.readlines() l1b_fpaths = f.readlines()
with open(l2a_urls_fpath) as f: with open(l2a_urls_fpath) as f:
l2a_fpaths = f.readlines() l2a_fpaths = f.readlines()
print("starting loop") print("starting loop")
# Get matching string pattern and run main.py # Get matching string pattern and run main.py
jobs_list = [] jobs_list = []
counter=1 counter=1
for l1b_fp in l1b_fpaths: for l1b_fp in l1b_fpaths:
# Get string pattern # Get string pattern
str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0] str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
try: try:
l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0] l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
except: except:
print("No matching L2 file!", l1b_fp) print("No matching L2 file!", l1b_fp)
continue continue
# Check if file was already downloaded # Check if file was already downloaded
date_str = os.path.basename(l1b_fp).split("_")[2] date_str = os.path.basename(l1b_fp).split("_")[2]
if any(date_str in x for x in file_list): if any(date_str in x for x in file_list):
continue continue
# Submit job # Submit job
job = maap.submitJob(identifier="SouthAmericaGEDI", job = maap.submitJob(identifier="SouthAmericaGEDI",
algo_id="arojas_download_gedi_data", algo_id="arojas_download_gedi_data",
version="master", version="master",
username="arojearthdata", username="arojearthdata",
queue="maap-dps-worker-8gb", queue="maap-dps-worker-8gb",
L1B_URL=l1b_fp, L1B_URL=l1b_fp,
L2A_URL=l2a_fp) L2A_URL=l2a_fp)
jobs_list.append(job) jobs_list.append(job)
if counter%100==0: if counter%100==0:
print("on file num: ", counter, end='\r') print("on file num: ", counter, end='\r')
counter+=1 counter+=1
print('Done') print('Done')
``` ```
%% Output %% Output
starting loop starting loop
Doneile num: 400 Doneile num: 400
%% Cell type:code id:0f17583e-20da-4dea-998c-9e0139700400 tags: %% Cell type:code id:0f17583e-20da-4dea-998c-9e0139700400 tags:
``` python ``` python
``` ```
%% Cell type:code id:35ff40f2-65f8-414d-a53f-e8e0f7487557 tags: %% Cell type:code id:35ff40f2-65f8-414d-a53f-e8e0f7487557 tags:
``` python ``` python
## Condense files into new single folder ## Condense files into new single folder
``` ```
%% Cell type:code id:7c980c00-9823-4043-b235-4a837b8b92fa tags: %% Cell type:code id:7c980c00-9823-4043-b235-4a837b8b92fa tags:
``` python ``` python
import shutil import shutil
# shutil.move("path/to/current/file.foo", "path/to/new/destination/for/file.foo") # shutil.move("path/to/current/file.foo", "path/to/new/destination/for/file.foo")
``` ```
%% Cell type:code id:3d80ac20-b71e-486f-b153-6d19c3fd1fc2 tags: %% Cell type:code id:3d80ac20-b71e-486f-b153-6d19c3fd1fc2 tags:
``` python ``` python
outdir = "/projects/my-private-bucket/GEDI/raw/SouthAmerica" outdir = "/projects/my-private-bucket/GEDI/raw/SouthAmerica"
for fp in csv_list: for fp in file_list:
basename = os.path.basename(fp) basename = os.path.basename(fp)
if "GEDI01_B" in basename: if "GEDI01_B" in basename:
outfp = os.path.join(outdir, "L1B", basename) outfp = os.path.join(outdir, "L1B", basename)
elif "GEDI02_A" in basename: elif "GEDI02_A" in basename:
outfp = os.path.join(outdir, "L2A", basename) outfp = os.path.join(outdir, "L2A", basename)
shutil.move(fp, outfp) shutil.move(fp, outfp)
print("DONE") print("DONE")
``` ```
%% Cell type:code id:4c3f7b66-e482-427f-85ba-19c6d2786e16 tags: %% Cell type:code id:4c3f7b66-e482-427f-85ba-19c6d2786e16 tags:
``` python ``` python
## Check all files in new folder! ## Check all files in new folder!
indir = "/projects/my-private-bucket/GEDI/biomass/2022" indir = "/projects/my-private-bucket/GEDI/biomass/2022"
csv_list = glob.glob(os.path.join(indir, "*")) csv_list = glob.glob(os.path.join(indir, "*"))
print(len(csv_list)) print(len(csv_list))
``` ```
%% Cell type:code id:f63c64c5-4688-43f4-8139-bfc68236d4f4 tags: %% Cell type:code id:f63c64c5-4688-43f4-8139-bfc68236d4f4 tags:
``` python ``` python
``` ```
......
source activate osgeo-env # source activate osgeo-env
source activate /projects/env/osgeo-env
# conda activate /projects/env/osgeo-env
# Get current location of build script # Get current location of build script
basedir=$( cd "$(dirname "$0")" ; pwd -P ) basedir=$( cd "$(dirname "$0")" ; pwd -P )
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment