Skip to content
Snippets Groups Projects
Commit 0bd63d63 authored by Alex Rojas's avatar Alex Rojas
Browse files

updated files

parent 68d0ba49
No related branches found
No related tags found
No related merge requests found
#!/bin/bash
# source activate base
basedir=$( cd "$(dirname "$0")" ; pwd -P)
echo installing environment...
mamba env create --name osgeo-env -f ${basedir}/environment.yml
# Install the maap.py environment
echo trying to install maap-py...
source activate osgeo-env
git clone --single-branch --branch v3.0.1 https://github.com/MAAP-Project/maap-py.git
cd maap-py
pip install -e .
echo installed maap-py package!
\ No newline at end of file
name: osgeo-env
channels:
- conda-forge
- defaults
dependencies:
- boto3
- botocore
- fsspec
- s3fs
- geopandas
- geopandas-base
- h5py
- numpy
- pandas
- python
- scipy
source activate osgeo-env
# Get current location of build script
basedir=$( cd "$(dirname "$0")" ; pwd -P )
# Create output directory to store outputs.
# The name is output as required by the DPS.
# Note how we dont provide an absolute path
# but instead a relative one as the DPS creates
# a temp working directory for our code.
mkdir -p output
# INPUT_FILES
L1B_URL=$1
L2A_URL=$2 # e.g. GEDI01_B or GEDI02_A
# Call the script using the absolute paths
# Any output written to the stdout and stderr streams will be automatically captured and placed in the output dir
python ${basedir}/main.py ${L1B_URL} ${L2A_URL} output
\ No newline at end of file
%% Cell type:code id:9803c6a0-d547-4112-8edc-eb62680360e2 tags:
``` python
import os
import re
from maap.maap import MAAP
import re
# maap = MAAP(maap_host='api.maap-project.org')
maap = MAAP(maap_host='api.maap-project.org')
```
%% Cell type:code id:96d5cdce-a2cf-4ec5-8671-f9edd500ab8b tags:
``` python
#maap.register_algorithm_from_yaml_file("/projects/arojas_download_gedi_data.yml")
```
%% Output
<Response [200]>
%% Cell type:code id:54d3df6a-e22a-4f15-ab22-859df5d4b367 tags:
``` python
# Change to appropriate filepaths for urls
# l1b_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt"
# l2a_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L2A-2022-URLS.txt"
l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"
with open(l1b_urls_fpath) as f:
l1b_fpaths = f.read().splitlines()
with open(l2a_urls_fpath) as f:
l2a_fpaths = f.read().splitlines()
# Get matching string pattern and run main.py
jobs_list = []
counter=1
for l1b_fp in l1b_fpaths:
# Get string pattern
str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
try:
l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
except:
print("No matching L2 file!", l1b_fp)
continue
# print(l1b_fp)
# print(l2a_fp)
job = maap.submitJob(identifier="SouthAmericaGEDI-run-20240117",
algo_id="arojas_download_gedi_data",
version="master",
username="arojearthdata",
queue="maap-dps-worker-8gb",
L1B_URL=l1b_fp,
L2A_URL=l2a_fp)
jobs_list.append(job)
if counter%100==0:
print("on file num: ", counter, end='\r')
counter+=1
```
%% Output
on file num: 400
%% Cell type:code id:d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d tags:
``` python
for job in jobs_list[:10]:
print(job.retrieve_status())
print(job.id)
```
%% Output
Running
9efde8c0-b4aa-4d4b-9972-55b72be802a6
Running
d101a1b9-c44e-44e8-931a-6fe9f436e03d
Running
acc12502-5cfb-4f05-b05b-2f57657d2f53
Running
126548ea-2fd0-434a-b040-66a0a45d8f9c
Running
325641a2-f0f5-40a7-8904-c023d71a7b55
Running
a9039b0e-f678-403a-8ff2-576adb138199
Accepted
a99753e5-3ceb-4b25-b1e5-e1c154d2235d
Accepted
b985c8ff-77ae-41c9-8b00-d1e9ee0cc717
Accepted
d70e4917-323f-48e8-976d-6c94b08277a2
Accepted
0fe1d4f4-cb0c-41ae-99ad-0e9d28f7e695
%% Cell type:code id:e5f0edb0-ffe4-4f8e-bb4f-d51686ee47b4 tags:
``` python
```
%% Cell type:code id:89d3a568-4801-4168-891f-04c65091d336 tags:
``` python
# Some files didnt download, lets check
import glob
indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
file_list = []
for subdir, dirs, files in os.walk(indir):
for file in files:
if file.endswith(".h5"):
# fp = os.path.join(subdir, file)
# print(fp)
file_list.append(file)
```
%% Cell type:code id:55675d70-413e-4f1e-8666-3bb4f38e3359 tags:
``` python
len(file_list)
```
%% Output
74
%% Cell type:code id:7513ce41-f4ea-44e2-9d76-ea3b857c8069 tags:
``` python
```
%% Cell type:code id:cd11283a-63f1-4893-828e-6cd72f646ef1 tags:
``` python
# rerun and get missed files
# Get all CSV files
indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
file_list = []
for subdir, dirs, files in os.walk(indir):
for file in files:
if file.endswith(".h5"):
# fp = os.path.join(subdir, file)
# print(fp)
file_list.append(file)
# Change to appropriate filepaths for urls
l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"
with open(l1b_urls_fpath) as f:
l1b_fpaths = f.readlines()
with open(l2a_urls_fpath) as f:
l2a_fpaths = f.readlines()
print("starting loop")
# Get matching string pattern and run main.py
jobs_list = []
counter=1
for l1b_fp in l1b_fpaths:
# Get string pattern
str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
try:
l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
except:
print("No matching L2 file!", l1b_fp)
continue
# Check if file was already downloaded
date_str = os.path.basename(l1b_fp).split("_")[2]
if any(date_str in x for x in file_list):
continue
# Submit job
job = maap.submitJob(identifier="SouthAmericaGEDI",
algo_id="arojas_download_gedi_data",
version="master",
username="arojearthdata",
queue="maap-dps-worker-8gb",
L1B_URL=l1b_fp,
L2A_URL=l2a_fp)
jobs_list.append(job)
if counter%100==0:
print("on file num: ", counter, end='\r')
counter+=1
print('Done')
```
%% Output
starting loop
Doneile num: 400
%% Cell type:code id:0f17583e-20da-4dea-998c-9e0139700400 tags:
``` python
```
%% Cell type:code id:35ff40f2-65f8-414d-a53f-e8e0f7487557 tags:
``` python
## Condense files into new single folder
```
%% Cell type:code id:7c980c00-9823-4043-b235-4a837b8b92fa tags:
``` python
import shutil
# shutil.move("path/to/current/file.foo", "path/to/new/destination/for/file.foo")
```
%% Cell type:code id:3d80ac20-b71e-486f-b153-6d19c3fd1fc2 tags:
``` python
outdir = "/projects/my-private-bucket/GEDI/raw/SouthAmerica"
for fp in csv_list:
basename = os.path.basename(fp)
if "GEDI01_B" in basename:
outfp = os.path.join(outdir, "L1B", basename)
elif "GEDI02_A" in basename:
outfp = os.path.join(outdir, "L2A", basename)
shutil.move(fp, outfp)
print("DONE")
```
%% Cell type:code id:4c3f7b66-e482-427f-85ba-19c6d2786e16 tags:
``` python
## Check all files in new folder!
indir = "/projects/my-private-bucket/GEDI/biomass/2022"
csv_list = glob.glob(os.path.join(indir, "*"))
print(len(csv_list))
```
%% Cell type:code id:f63c64c5-4688-43f4-8139-bfc68236d4f4 tags:
``` python
```
......
%% Cell type:code id:9803c6a0-d547-4112-8edc-eb62680360e2 tags:
``` python
import os
import re
from maap.maap import MAAP
import re
# maap = MAAP(maap_host='api.maap-project.org')
maap = MAAP(maap_host='api.maap-project.org')
```
%% Cell type:code id:96d5cdce-a2cf-4ec5-8671-f9edd500ab8b tags:
``` python
#maap.register_algorithm_from_yaml_file("/projects/arojas_download_gedi_data.yml")
```
%% Output
<Response [200]>
%% Cell type:code id:54d3df6a-e22a-4f15-ab22-859df5d4b367 tags:
``` python
# Change to appropriate filepaths for urls
# l1b_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt"
# l2a_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L2A-2022-URLS.txt"
l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"
with open(l1b_urls_fpath) as f:
l1b_fpaths = f.read().splitlines()
with open(l2a_urls_fpath) as f:
l2a_fpaths = f.read().splitlines()
# Get matching string pattern and run main.py
jobs_list = []
counter=1
for l1b_fp in l1b_fpaths:
for l1b_fp in l1b_fpaths[:10]:
# Get string pattern
str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
try:
l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
except:
print("No matching L2 file!", l1b_fp)
continue
# print(l1b_fp)
# print(l2a_fp)
job = maap.submitJob(identifier="SouthAmericaGEDI",
job = maap.submitJob(identifier="SouthAmericaGEDI-run-20240117",
algo_id="arojas_download_gedi_data",
version="master",
username="arojearthdata",
queue="maap-dps-worker-8gb",
L1B_URL=l1b_fp,
L2A_URL=l2a_fp)
jobs_list.append(job)
if counter%100==0:
print("on file num: ", counter, end='\r')
counter+=1
```
%% Output
on file num: 400
%% Cell type:code id:d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d tags:
``` python
for job in jobs_list:
for job in jobs_list[:10]:
print(job.retrieve_status())
print(job.id)
break
```
%% Output
Failed
353e658d-8bfa-4b7d-b508-24e8d42d3483
Accepted
84175819-3e25-4fee-a6c0-4e52b2dca88f
Accepted
7a478af7-e3e5-4206-a033-4fd23a1e3c81
Accepted
04d8a2dc-e54f-461d-8ace-39a381dd69d1
Accepted
c6bd0fc6-8f77-4f17-8cc1-4508232366d4
Accepted
630b5127-7b1f-4d2a-95a2-f571a6a32d6d
Accepted
747dbd20-d920-4658-aa0d-fe3c47838f49
Accepted
fa394b4b-0009-484f-af99-e96cfcc8d97b
Accepted
854aa24b-0861-42fa-9ff7-ae501791448b
Accepted
a28e565b-03a4-4c89-b4de-ad2c7d824fec
Accepted
3dc475c2-c87e-43a3-a876-1118815ff82f
%% Cell type:code id:e5f0edb0-ffe4-4f8e-bb4f-d51686ee47b4 tags:
``` python
```
%% Cell type:code id:89d3a568-4801-4168-891f-04c65091d336 tags:
``` python
# Some files didnt download, lets check
import glob
indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
file_list = []
for subdir, dirs, files in os.walk(indir):
for file in files:
if file.endswith(".h5"):
# fp = os.path.join(subdir, file)
# print(fp)
file_list.append(file)
```
%% Cell type:code id:55675d70-413e-4f1e-8666-3bb4f38e3359 tags:
``` python
len(file_list)
```
%% Output
74
%% Cell type:code id:7513ce41-f4ea-44e2-9d76-ea3b857c8069 tags:
``` python
```
%% Cell type:code id:cd11283a-63f1-4893-828e-6cd72f646ef1 tags:
``` python
# rerun and get missed files
# Get all CSV files
indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
file_list = []
for subdir, dirs, files in os.walk(indir):
for file in files:
if file.endswith(".h5"):
# fp = os.path.join(subdir, file)
# print(fp)
file_list.append(file)
# Change to appropriate filepaths for urls
l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"
with open(l1b_urls_fpath) as f:
l1b_fpaths = f.readlines()
with open(l2a_urls_fpath) as f:
l2a_fpaths = f.readlines()
print("starting loop")
# Get matching string pattern and run main.py
jobs_list = []
counter=1
for l1b_fp in l1b_fpaths:
# Get string pattern
str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
try:
l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
except:
print("No matching L2 file!", l1b_fp)
continue
# Check if file was already downloaded
date_str = os.path.basename(l1b_fp).split("_")[2]
if any(date_str in x for x in file_list):
continue
# Submit job
job = maap.submitJob(identifier="SouthAmericaGEDI",
algo_id="arojas_download_gedi_data",
version="master",
username="arojearthdata",
queue="maap-dps-worker-8gb",
L1B_URL=l1b_fp,
L2A_URL=l2a_fp)
jobs_list.append(job)
if counter%100==0:
print("on file num: ", counter, end='\r')
counter+=1
print('Done')
```
%% Output
starting loop
Doneile num: 400
%% Cell type:code id:0f17583e-20da-4dea-998c-9e0139700400 tags:
``` python
```
%% Cell type:code id:35ff40f2-65f8-414d-a53f-e8e0f7487557 tags:
``` python
## Condense files into new single folder
```
%% Cell type:code id:7c980c00-9823-4043-b235-4a837b8b92fa tags:
``` python
import shutil
# shutil.move("path/to/current/file.foo", "path/to/new/destination/for/file.foo")
```
%% Cell type:code id:3d80ac20-b71e-486f-b153-6d19c3fd1fc2 tags:
``` python
outdir = "/projects/my-private-bucket/GEDI/raw/SouthAmerica"
for fp in csv_list:
basename = os.path.basename(fp)
if "GEDI01_B" in basename:
outfp = os.path.join(outdir, "L1B", basename)
elif "GEDI02_A" in basename:
outfp = os.path.join(outdir, "L2A", basename)
shutil.move(fp, outfp)
print("DONE")
```
%% Cell type:code id:4c3f7b66-e482-427f-85ba-19c6d2786e16 tags:
``` python
## Check all files in new folder!
indir = "/projects/my-private-bucket/GEDI/biomass/2022"
csv_list = glob.glob(os.path.join(indir, "*"))
print(len(csv_list))
```
%% Cell type:code id:f63c64c5-4688-43f4-8139-bfc68236d4f4 tags:
``` python
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment