updated to add print statements

0e7907e6 · Alex Rojas · 66021774 · 0e7907e6 · 0e7907e6 · 0e7907e6
Commit 0e7907e6 authored 10 months ago by Alex Rojas
--- a/.ipynb_checkpoints/get_gedi_data-checkpoint.py
+++ b/.ipynb_checkpoints/get_gedi_data-checkpoint.py
@@ -12,6 +12,7 @@ import os
 def lpdaac_gedi_https_to_s3(url):
    dir_comps = url.split("/")
+    print(dir_comps)
    return f"s3://lp-prod-protected/{dir_comps[6]}/{dir_comps[8].strip('.h5')}/{dir_comps[8]}"
 def get_gedi_data(url):

--- a/.ipynb_checkpoints/run-checkpoint.sh
+++ b/.ipynb_checkpoints/run-checkpoint.sh
-source activate osgeo-env
+# source activate osgeo-env
+source activate /projects/env/osgeo-env
+# conda activate /projects/env/osgeo-env
 # Get current location of build script
 basedir=$( cd "$(dirname "$0")" ; pwd -P )

--- a/__pycache__/get_gedi_data.cpython-312.pyc
+++ b/__pycache__/get_gedi_data.cpython-312.pyc
--- a/get_gedi_data.py
+++ b/get_gedi_data.py
@@ -12,6 +12,7 @@ import os
 def lpdaac_gedi_https_to_s3(url):
    dir_comps = url.split("/")
+    print(dir_comps)
    return f"s3://lp-prod-protected/{dir_comps[6]}/{dir_comps[8].strip('.h5')}/{dir_comps[8]}"
 def get_gedi_data(url):

--- a/notebooks/.ipynb_checkpoints/access-gedi-checkpoint.ipynb
+++ b/notebooks/.ipynb_checkpoints/access-gedi-checkpoint.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 1,
   "id": "9803c6a0-d547-4112-8edc-eb62680360e2",
   "metadata": {},
   "outputs": [],
@@ -12,49 +12,31 @@
    "from maap.maap import MAAP\n",
    "import re\n",
    "# maap = MAAP(maap_host='api.maap-project.org')\n",
-    "maap = MAAP(maap_host='api.maap-project.org')"
+    "maap = MAAP(maap_host='api.maap-project.org')\n",
+    "import time"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 2,
   "id": "96d5cdce-a2cf-4ec5-8671-f9edd500ab8b",
   "metadata": {
    "tags": []
   },
-   "outputs": [
+   "outputs": [],
-    {
-     "data": {
-      "text/plain": [
-       "<Response [200]>"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
   "source": [
    "#maap.register_algorithm_from_yaml_file(\"/projects/arojas_download_gedi_data.yml\")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
   "id": "54d3df6a-e22a-4f15-ab22-859df5d4b367",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
-   "outputs": [
+   "outputs": [],
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "on file num:  400\r"
-     ]
-    }
-   ],
   "source": [
    "# Change to appropriate filepaths for urls\n",
    "# l1b_urls_fpath = \"/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt\"\n",
@@ -70,7 +52,7 @@
    "# Get matching string pattern and run main.py\n",
    "jobs_list = []\n",
    "counter=1\n",
-    "for l1b_fp in l1b_fpaths:\n",
+    "for l1b_fp in l1b_fpaths[:10]:\n",
    "    # Get string pattern\n",
    "    str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n",
    "    try:\n",
@@ -84,20 +66,58 @@
    "                         algo_id=\"arojas_download_gedi_data\",\n",
    "                         version=\"master\",\n",
    "                         username=\"arojearthdata\",\n",
-    "                         queue=\"maap-dps-worker-8gb\",\n",
+    "                         queue=\"maap-dps-worker-16gb\",\n",
    "                         L1B_URL=l1b_fp,\n",
    "                         L2A_URL=l2a_fp)\n",
    "\n",
    "    jobs_list.append(job)\n",
-    "    \n",
    "    if counter%100==0:\n",
    "        print(\"on file num: \", counter, end='\\r')\n",
+    "        time.sleep(900)\n",
    "    counter+=1"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 16,
+   "id": "d71d17cf-9a79-4508-9047-dade9b64201b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI01_B.002/2020.08.31/GEDI01_B_2020244155754_O09740_04_T06708_02_005_01_V002.h5\n",
+      "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_A.002/2020.08.31/GEDI02_A_2020244155754_O09740_04_T06708_02_003_01_V002.h5\n"
+     ]
+    }
+   ],
+   "source": [
+    "for l1b_fp in l1b_fpaths[:10]:\n",
+    "    \n",
+    "    # Get string pattern\n",
+    "    str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n",
+    "    l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]\n",
+    "    print(l1b_fp)\n",
+    "    print(l2a_fp)\n",
+    "    break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f1dce543-f916-4561-990b-ae8c3fdb7ad7",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
   "id": "d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d",
   "metadata": {},
   "outputs": [
@@ -105,31 +125,51 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Running\n",
+      "Succeeded\n",
-      "9efde8c0-b4aa-4d4b-9972-55b72be802a6\n",
+      "f70b1586-e3ac-4155-a73e-49f5386c7e2c\n",
-      "Running\n",
+      "Succeeded\n",
-      "d101a1b9-c44e-44e8-931a-6fe9f436e03d\n",
+      "10279c23-3225-460e-8cf9-6364fa0ec2a0\n",
-      "Running\n",
+      "Succeeded\n",
-      "acc12502-5cfb-4f05-b05b-2f57657d2f53\n",
+      "fe212a29-9da4-4a72-a02d-94a37750e1fa\n",
-      "Running\n",
+      "Succeeded\n",
-      "126548ea-2fd0-434a-b040-66a0a45d8f9c\n",
+      "7855a6a3-0416-41f6-b546-11eab9097adc\n",
-      "Running\n",
+      "Succeeded\n",
-      "325641a2-f0f5-40a7-8904-c023d71a7b55\n",
+      "8c343834-6622-404e-9b83-d4dd771a151f\n",
-      "Running\n",
+      "Succeeded\n",
-      "a9039b0e-f678-403a-8ff2-576adb138199\n",
+      "cc412cd5-1491-4dad-bc19-63ae70f9b6af\n",
-      "Accepted\n",
+      "Succeeded\n",
-      "a99753e5-3ceb-4b25-b1e5-e1c154d2235d\n",
+      "86b301cf-5b37-43af-b2f5-cb51d5f6b1c3\n",
-      "Accepted\n",
+      "Succeeded\n",
-      "b985c8ff-77ae-41c9-8b00-d1e9ee0cc717\n",
+      "ec0e4005-b511-4d93-8fac-a0ff34682b1f\n",
-      "Accepted\n",
+      "Succeeded\n",
-      "d70e4917-323f-48e8-976d-6c94b08277a2\n",
+      "30f06fb8-aab6-4e74-aba4-e7a5d76ac100\n",
-      "Accepted\n",
+      "Succeeded\n",
-      "0fe1d4f4-cb0c-41ae-99ad-0e9d28f7e695\n"
+      "30c44930-66fa-4215-b877-517a1e6472b1\n",
+      "Succeeded\n",
+      "c1e9fd05-241a-4bea-8a85-3fc8992cbf67\n",
+      "Succeeded\n",
+      "984eab57-449c-4fce-b490-b6ef05cd80f2\n",
+      "Succeeded\n",
+      "da048416-13f5-4a00-b7e0-76bff20bfa12\n",
+      "Succeeded\n",
+      "cc8d47fc-7615-4ba2-adbc-276b5f45c75a\n",
+      "Succeeded\n",
+      "c42d1afe-3d55-40d3-af4a-8ed98bc98492\n",
+      "Succeeded\n",
+      "e39ebed1-9694-4dfb-bafb-5249c7fca41e\n",
+      "Succeeded\n",
+      "656d0e68-ab9e-42a2-95b4-7fa45503cd8a\n",
+      "Succeeded\n",
+      "5c36624d-c33b-4756-9436-cac71ebb7ae9\n",
+      "Succeeded\n",
+      "1a5b59c1-c26e-4028-b6d9-54b7676f3c67\n",
+      "Succeeded\n",
+      "c2f3e181-22c5-460c-a4ac-d2fd2694f96e\n"
     ]
    }
   ],
   "source": [
-    "for job in jobs_list[:10]:\n",
+    "for job in jobs_list[:20]:\n",
    "    print(job.retrieve_status())\n",
    "    print(job.id)"
   ]
@@ -144,7 +184,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 17,
   "id": "89d3a568-4801-4168-891f-04c65091d336",
   "metadata": {},
   "outputs": [],
@@ -158,22 +198,24 @@
    "        if file.endswith(\".h5\"):\n",
    "            # fp = os.path.join(subdir, file)\n",
    "            # print(fp)\n",
-    "            file_list.append(file)"
+    "            file_list.append(os.path.join(subdir, file))"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 24,
-   "id": "55675d70-413e-4f1e-8666-3bb4f38e3359",
+   "id": "7dde8135-d6b6-43ce-b2c4-fd8d0cda4077",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "74"
+       "918"
      ]
     },
-     "execution_count": 9,
+     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -285,7 +327,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
   "id": "7c980c00-9823-4043-b235-4a837b8b92fa",
   "metadata": {},
   "outputs": [],

 %% Cell type:code id:9803c6a0-d547-4112-8edc-eb62680360e2 tags:
 ``` python
 import os
 import re
 from maap.maap import MAAP
 import re
 # maap = MAAP(maap_host='api.maap-project.org')
 maap = MAAP(maap_host='api.maap-project.org')
+import time
 ```
 %% Cell type:code id:96d5cdce-a2cf-4ec5-8671-f9edd500ab8b tags:
 ``` python
 #maap.register_algorithm_from_yaml_file("/projects/arojas_download_gedi_data.yml")
 ```
-%% Output
-    <Response [200]>
 %% Cell type:code id:54d3df6a-e22a-4f15-ab22-859df5d4b367 tags:
 ``` python
 # Change to appropriate filepaths for urls
 # l1b_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt"
 # l2a_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L2A-2022-URLS.txt"
 l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
 l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"
 with open(l1b_urls_fpath) as f:
    l1b_fpaths = f.read().splitlines()
 with open(l2a_urls_fpath) as f:
    l2a_fpaths = f.read().splitlines()
 # Get matching string pattern and run main.py
 jobs_list = []
 counter=1
-for l1b_fp in l1b_fpaths:
+for l1b_fp in l1b_fpaths[:10]:
    # Get string pattern
    str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
    try:
        l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
    except:
        print("No matching L2 file!", l1b_fp)
        continue
    # print(l1b_fp)
    # print(l2a_fp)
    job = maap.submitJob(identifier="SouthAmericaGEDI-run-20240117",
                         algo_id="arojas_download_gedi_data",
                         version="master",
                         username="arojearthdata",
-                         queue="maap-dps-worker-8gb",
+                         queue="maap-dps-worker-16gb",
                         L1B_URL=l1b_fp,
                         L2A_URL=l2a_fp)
    jobs_list.append(job)
    if counter%100==0:
        print("on file num: ", counter, end='\r')
+        time.sleep(900)
    counter+=1
 ```
+%% Cell type:code id:d71d17cf-9a79-4508-9047-dade9b64201b tags:
+``` python
+for l1b_fp in l1b_fpaths[:10]:
+    # Get string pattern
+    str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
+    l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
+    print(l1b_fp)
+    print(l2a_fp)
+    break
+```
 %% Output
-    on file num:  400
+    https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI01_B.002/2020.08.31/GEDI01_B_2020244155754_O09740_04_T06708_02_005_01_V002.h5
+    https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_A.002/2020.08.31/GEDI02_A_2020244155754_O09740_04_T06708_02_003_01_V002.h5
+%% Cell type:code id:f1dce543-f916-4561-990b-ae8c3fdb7ad7 tags:
+``` python
+```
 %% Cell type:code id:d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d tags:
 ``` python
-for job in jobs_list[:10]:
+for job in jobs_list[:20]:
    print(job.retrieve_status())
    print(job.id)
 ```
 %% Output
-    Running
+    Succeeded
-    9efde8c0-b4aa-4d4b-9972-55b72be802a6
+    f70b1586-e3ac-4155-a73e-49f5386c7e2c
-    Running
+    Succeeded
-    d101a1b9-c44e-44e8-931a-6fe9f436e03d
+    10279c23-3225-460e-8cf9-6364fa0ec2a0
-    Running
+    Succeeded
-    acc12502-5cfb-4f05-b05b-2f57657d2f53
+    fe212a29-9da4-4a72-a02d-94a37750e1fa
-    Running
+    Succeeded
-    126548ea-2fd0-434a-b040-66a0a45d8f9c
+    7855a6a3-0416-41f6-b546-11eab9097adc
-    Running
+    Succeeded
-    325641a2-f0f5-40a7-8904-c023d71a7b55
+    8c343834-6622-404e-9b83-d4dd771a151f
-    Running
+    Succeeded
-    a9039b0e-f678-403a-8ff2-576adb138199
+    cc412cd5-1491-4dad-bc19-63ae70f9b6af
-    Accepted
+    Succeeded
-    a99753e5-3ceb-4b25-b1e5-e1c154d2235d
+    86b301cf-5b37-43af-b2f5-cb51d5f6b1c3
-    Accepted
+    Succeeded
-    b985c8ff-77ae-41c9-8b00-d1e9ee0cc717
+    ec0e4005-b511-4d93-8fac-a0ff34682b1f
-    Accepted
+    Succeeded
-    d70e4917-323f-48e8-976d-6c94b08277a2
+    30f06fb8-aab6-4e74-aba4-e7a5d76ac100
-    Accepted
+    Succeeded
-    0fe1d4f4-cb0c-41ae-99ad-0e9d28f7e695
+    30c44930-66fa-4215-b877-517a1e6472b1
+    Succeeded
+    c1e9fd05-241a-4bea-8a85-3fc8992cbf67
+    Succeeded
+    984eab57-449c-4fce-b490-b6ef05cd80f2
+    Succeeded
+    da048416-13f5-4a00-b7e0-76bff20bfa12
+    Succeeded
+    cc8d47fc-7615-4ba2-adbc-276b5f45c75a
+    Succeeded
+    c42d1afe-3d55-40d3-af4a-8ed98bc98492
+    Succeeded
+    e39ebed1-9694-4dfb-bafb-5249c7fca41e
+    Succeeded
+    656d0e68-ab9e-42a2-95b4-7fa45503cd8a
+    Succeeded
+    5c36624d-c33b-4756-9436-cac71ebb7ae9
+    Succeeded
+    1a5b59c1-c26e-4028-b6d9-54b7676f3c67
+    Succeeded
+    c2f3e181-22c5-460c-a4ac-d2fd2694f96e
 %% Cell type:code id:e5f0edb0-ffe4-4f8e-bb4f-d51686ee47b4 tags:
 ``` python
 ```
 %% Cell type:code id:89d3a568-4801-4168-891f-04c65091d336 tags:
 ``` python
 # Some files didnt download, lets check
 import glob
 indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
 file_list = []
 for subdir, dirs, files in os.walk(indir):
    for file in files:
        if file.endswith(".h5"):
            # fp = os.path.join(subdir, file)
            # print(fp)
-            file_list.append(file)
+            file_list.append(os.path.join(subdir, file))
 ```
-%% Cell type:code id:55675d70-413e-4f1e-8666-3bb4f38e3359 tags:
+%% Cell type:code id:7dde8135-d6b6-43ce-b2c4-fd8d0cda4077 tags:
 ``` python
 len(file_list)
 ```
 %% Output
-    74
+    918
 %% Cell type:code id:7513ce41-f4ea-44e2-9d76-ea3b857c8069 tags:
 ``` python
 ```
 %% Cell type:code id:cd11283a-63f1-4893-828e-6cd72f646ef1 tags:
 ``` python
 # rerun and get missed files
 # Get all CSV files
 indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
 file_list = []
 for subdir, dirs, files in os.walk(indir):
    for file in files:
        if file.endswith(".h5"):
            # fp = os.path.join(subdir, file)
            # print(fp)
            file_list.append(file)
 # Change to appropriate filepaths for urls
 l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
 l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"
 with open(l1b_urls_fpath) as f:
    l1b_fpaths = f.readlines()
 with open(l2a_urls_fpath) as f:
    l2a_fpaths = f.readlines()
 print("starting loop")
 # Get matching string pattern and run main.py
 jobs_list = []
 counter=1
 for l1b_fp in l1b_fpaths:
    # Get string pattern
    str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
    try:
        l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
    except:
        print("No matching L2 file!", l1b_fp)
        continue
    # Check if file was already downloaded
    date_str = os.path.basename(l1b_fp).split("_")[2]
    if any(date_str in x for x in file_list):
        continue
    # Submit job
    job = maap.submitJob(identifier="SouthAmericaGEDI",
                         algo_id="arojas_download_gedi_data",
                         version="master",
                         username="arojearthdata",
                         queue="maap-dps-worker-8gb",
                         L1B_URL=l1b_fp,
                         L2A_URL=l2a_fp)
    jobs_list.append(job)
    if counter%100==0:
        print("on file num: ", counter, end='\r')
    counter+=1
 print('Done')
 ```
 %% Output
    starting loop
    Doneile num:  400
 %% Cell type:code id:0f17583e-20da-4dea-998c-9e0139700400 tags:
 ``` python
 ```
 %% Cell type:code id:35ff40f2-65f8-414d-a53f-e8e0f7487557 tags:
 ``` python
 ## Condense files into new single folder
 ```
 %% Cell type:code id:7c980c00-9823-4043-b235-4a837b8b92fa tags:
 ``` python
 import shutil
 # shutil.move("path/to/current/file.foo", "path/to/new/destination/for/file.foo")
 ```
 %% Cell type:code id:3d80ac20-b71e-486f-b153-6d19c3fd1fc2 tags:
 ``` python
 outdir = "/projects/my-private-bucket/GEDI/raw/SouthAmerica"
 for fp in csv_list:
    basename = os.path.basename(fp)
    if "GEDI01_B" in basename:
        outfp = os.path.join(outdir, "L1B", basename)
    elif "GEDI02_A" in basename:
        outfp = os.path.join(outdir, "L2A", basename)
    shutil.move(fp, outfp)
 print("DONE")
 ```
 %% Cell type:code id:4c3f7b66-e482-427f-85ba-19c6d2786e16 tags:
 ``` python
 ## Check all files in new folder!
 indir = "/projects/my-private-bucket/GEDI/biomass/2022"
 csv_list = glob.glob(os.path.join(indir, "*"))
 print(len(csv_list))
 ```
 %% Cell type:code id:f63c64c5-4688-43f4-8139-bfc68236d4f4 tags:
 ``` python
 ```

--- a/notebooks/access-gedi.ipynb
+++ b/notebooks/access-gedi.ipynb
@@ -12,7 +12,8 @@
    "from maap.maap import MAAP\n",
    "import re\n",
    "# maap = MAAP(maap_host='api.maap-project.org')\n",
-    "maap = MAAP(maap_host='api.maap-project.org')"
+    "maap = MAAP(maap_host='api.maap-project.org')\n",
+    "import time"
   ]
  },
  {
@@ -29,7 +30,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 8,
   "id": "54d3df6a-e22a-4f15-ab22-859df5d4b367",
   "metadata": {
    "scrolled": true,
@@ -70,15 +71,53 @@
    "                         L2A_URL=l2a_fp)\n",
    "\n",
    "    jobs_list.append(job)\n",
-    "    \n",
    "    if counter%100==0:\n",
    "        print(\"on file num: \", counter, end='\\r')\n",
+    "        time.sleep(900)\n",
    "    counter+=1"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 16,
+   "id": "d71d17cf-9a79-4508-9047-dade9b64201b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI01_B.002/2020.08.31/GEDI01_B_2020244155754_O09740_04_T06708_02_005_01_V002.h5\n",
+      "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_A.002/2020.08.31/GEDI02_A_2020244155754_O09740_04_T06708_02_003_01_V002.h5\n"
+     ]
+    }
+   ],
+   "source": [
+    "for l1b_fp in l1b_fpaths[:10]:\n",
+    "    \n",
+    "    # Get string pattern\n",
+    "    str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n",
+    "    l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]\n",
+    "    print(l1b_fp)\n",
+    "    print(l2a_fp)\n",
+    "    break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f1dce543-f916-4561-990b-ae8c3fdb7ad7",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
   "id": "d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d",
   "metadata": {},
   "outputs": [
@@ -86,31 +125,51 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Accepted\n",
+      "Succeeded\n",
-      "54dc2f4c-eb12-4720-a83b-5b2e3548a94f\n",
+      "f70b1586-e3ac-4155-a73e-49f5386c7e2c\n",
-      "Accepted\n",
+      "Succeeded\n",
-      "6cc81082-0bc8-4d31-b295-62c4135b9b4b\n",
+      "10279c23-3225-460e-8cf9-6364fa0ec2a0\n",
-      "Accepted\n",
+      "Succeeded\n",
-      "1b7bf749-78f6-49ee-9e84-71783e8d5449\n",
+      "fe212a29-9da4-4a72-a02d-94a37750e1fa\n",
-      "Accepted\n",
+      "Succeeded\n",
-      "6e09b33b-8159-4449-9654-ccf29e6b6486\n",
+      "7855a6a3-0416-41f6-b546-11eab9097adc\n",
-      "Accepted\n",
+      "Succeeded\n",
-      "774d6139-212f-4841-8f1b-13a5eadda4bc\n",
+      "8c343834-6622-404e-9b83-d4dd771a151f\n",
-      "Accepted\n",
+      "Succeeded\n",
-      "bbdb0764-3dc2-458c-aa8d-2151e03125c5\n",
+      "cc412cd5-1491-4dad-bc19-63ae70f9b6af\n",
-      "Accepted\n",
+      "Succeeded\n",
-      "1c5faed3-2c03-4260-ac4e-7478007c0b0d\n",
+      "86b301cf-5b37-43af-b2f5-cb51d5f6b1c3\n",
-      "Accepted\n",
+      "Succeeded\n",
-      "c92b2edf-2c1b-4b76-8e4d-d18634b7603d\n",
+      "ec0e4005-b511-4d93-8fac-a0ff34682b1f\n",
-      "Accepted\n",
+      "Succeeded\n",
-      "67ed03d8-7d9a-41a7-8858-55f58012a00e\n",
+      "30f06fb8-aab6-4e74-aba4-e7a5d76ac100\n",
-      "Accepted\n",
+      "Succeeded\n",
-      "85c052ca-1269-4335-9559-63b2475a069f\n"
+      "30c44930-66fa-4215-b877-517a1e6472b1\n",
+      "Succeeded\n",
+      "c1e9fd05-241a-4bea-8a85-3fc8992cbf67\n",
+      "Succeeded\n",
+      "984eab57-449c-4fce-b490-b6ef05cd80f2\n",
+      "Succeeded\n",
+      "da048416-13f5-4a00-b7e0-76bff20bfa12\n",
+      "Succeeded\n",
+      "cc8d47fc-7615-4ba2-adbc-276b5f45c75a\n",
+      "Succeeded\n",
+      "c42d1afe-3d55-40d3-af4a-8ed98bc98492\n",
+      "Succeeded\n",
+      "e39ebed1-9694-4dfb-bafb-5249c7fca41e\n",
+      "Succeeded\n",
+      "656d0e68-ab9e-42a2-95b4-7fa45503cd8a\n",
+      "Succeeded\n",
+      "5c36624d-c33b-4756-9436-cac71ebb7ae9\n",
+      "Succeeded\n",
+      "1a5b59c1-c26e-4028-b6d9-54b7676f3c67\n",
+      "Succeeded\n",
+      "c2f3e181-22c5-460c-a4ac-d2fd2694f96e\n"
     ]
    }
   ],
   "source": [
-    "for job in jobs_list[:10]:\n",
+    "for job in jobs_list[:20]:\n",
    "    print(job.retrieve_status())\n",
    "    print(job.id)"
   ]
@@ -125,7 +184,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 31,
   "id": "89d3a568-4801-4168-891f-04c65091d336",
   "metadata": {},
   "outputs": [],
@@ -139,22 +198,24 @@
    "        if file.endswith(\".h5\"):\n",
    "            # fp = os.path.join(subdir, file)\n",
    "            # print(fp)\n",
-    "            file_list.append(file)"
+    "            file_list.append(os.path.join(subdir, file))"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 32,
-   "id": "55675d70-413e-4f1e-8666-3bb4f38e3359",
+   "id": "7dde8135-d6b6-43ce-b2c4-fd8d0cda4077",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "74"
+       "0"
      ]
     },
-     "execution_count": 9,
+     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -256,7 +317,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 28,
   "id": "35ff40f2-65f8-414d-a53f-e8e0f7487557",
   "metadata": {},
   "outputs": [],
@@ -266,7 +327,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 29,
   "id": "7c980c00-9823-4043-b235-4a837b8b92fa",
   "metadata": {},
   "outputs": [],
@@ -283,7 +344,7 @@
   "outputs": [],
   "source": [
    "outdir = \"/projects/my-private-bucket/GEDI/raw/SouthAmerica\"\n",
-    "for fp in csv_list:\n",
+    "for fp in file_list:\n",
    "    basename = os.path.basename(fp)\n",
    "    if \"GEDI01_B\" in basename:\n",
    "        outfp = os.path.join(outdir, \"L1B\", basename)\n",

 %% Cell type:code id:9803c6a0-d547-4112-8edc-eb62680360e2 tags:
 ``` python
 import os
 import re
 from maap.maap import MAAP
 import re
 # maap = MAAP(maap_host='api.maap-project.org')
 maap = MAAP(maap_host='api.maap-project.org')
+import time
 ```
 %% Cell type:code id:96d5cdce-a2cf-4ec5-8671-f9edd500ab8b tags:
 ``` python
 #maap.register_algorithm_from_yaml_file("/projects/arojas_download_gedi_data.yml")
 ```
 %% Cell type:code id:54d3df6a-e22a-4f15-ab22-859df5d4b367 tags:
 ``` python
 # Change to appropriate filepaths for urls
 # l1b_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt"
 # l2a_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L2A-2022-URLS.txt"
 l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
 l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"
 with open(l1b_urls_fpath) as f:
    l1b_fpaths = f.read().splitlines()
 with open(l2a_urls_fpath) as f:
    l2a_fpaths = f.read().splitlines()
 # Get matching string pattern and run main.py
 jobs_list = []
 counter=1
 for l1b_fp in l1b_fpaths[:10]:
    # Get string pattern
    str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
    try:
        l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
    except:
        print("No matching L2 file!", l1b_fp)
        continue
    # print(l1b_fp)
    # print(l2a_fp)
    job = maap.submitJob(identifier="SouthAmericaGEDI-run-20240117",
                         algo_id="arojas_download_gedi_data",
                         version="master",
                         username="arojearthdata",
                         queue="maap-dps-worker-16gb",
                         L1B_URL=l1b_fp,
                         L2A_URL=l2a_fp)
    jobs_list.append(job)
    if counter%100==0:
        print("on file num: ", counter, end='\r')
+        time.sleep(900)
    counter+=1
 ```
+%% Cell type:code id:d71d17cf-9a79-4508-9047-dade9b64201b tags:
+``` python
+for l1b_fp in l1b_fpaths[:10]:
+    # Get string pattern
+    str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
+    l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
+    print(l1b_fp)
+    print(l2a_fp)
+    break
+```
+%% Output
+    https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI01_B.002/2020.08.31/GEDI01_B_2020244155754_O09740_04_T06708_02_005_01_V002.h5
+    https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_A.002/2020.08.31/GEDI02_A_2020244155754_O09740_04_T06708_02_003_01_V002.h5
+%% Cell type:code id:f1dce543-f916-4561-990b-ae8c3fdb7ad7 tags:
+``` python
+```
 %% Cell type:code id:d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d tags:
 ``` python
-for job in jobs_list[:10]:
+for job in jobs_list[:20]:
    print(job.retrieve_status())
    print(job.id)
 ```
 %% Output
-    Accepted
+    Succeeded
-    54dc2f4c-eb12-4720-a83b-5b2e3548a94f
+    f70b1586-e3ac-4155-a73e-49f5386c7e2c
-    Accepted
+    Succeeded
-    6cc81082-0bc8-4d31-b295-62c4135b9b4b
+    10279c23-3225-460e-8cf9-6364fa0ec2a0
-    Accepted
+    Succeeded
-    1b7bf749-78f6-49ee-9e84-71783e8d5449
+    fe212a29-9da4-4a72-a02d-94a37750e1fa
-    Accepted
+    Succeeded
-    6e09b33b-8159-4449-9654-ccf29e6b6486
+    7855a6a3-0416-41f6-b546-11eab9097adc
-    Accepted
+    Succeeded
-    774d6139-212f-4841-8f1b-13a5eadda4bc
+    8c343834-6622-404e-9b83-d4dd771a151f
-    Accepted
+    Succeeded
-    bbdb0764-3dc2-458c-aa8d-2151e03125c5
+    cc412cd5-1491-4dad-bc19-63ae70f9b6af
-    Accepted
+    Succeeded
-    1c5faed3-2c03-4260-ac4e-7478007c0b0d
+    86b301cf-5b37-43af-b2f5-cb51d5f6b1c3
-    Accepted
+    Succeeded
-    c92b2edf-2c1b-4b76-8e4d-d18634b7603d
+    ec0e4005-b511-4d93-8fac-a0ff34682b1f
-    Accepted
+    Succeeded
-    67ed03d8-7d9a-41a7-8858-55f58012a00e
+    30f06fb8-aab6-4e74-aba4-e7a5d76ac100
-    Accepted
+    Succeeded
-    85c052ca-1269-4335-9559-63b2475a069f
+    30c44930-66fa-4215-b877-517a1e6472b1
+    Succeeded
+    c1e9fd05-241a-4bea-8a85-3fc8992cbf67
+    Succeeded
+    984eab57-449c-4fce-b490-b6ef05cd80f2
+    Succeeded
+    da048416-13f5-4a00-b7e0-76bff20bfa12
+    Succeeded
+    cc8d47fc-7615-4ba2-adbc-276b5f45c75a
+    Succeeded
+    c42d1afe-3d55-40d3-af4a-8ed98bc98492
+    Succeeded
+    e39ebed1-9694-4dfb-bafb-5249c7fca41e
+    Succeeded
+    656d0e68-ab9e-42a2-95b4-7fa45503cd8a
+    Succeeded
+    5c36624d-c33b-4756-9436-cac71ebb7ae9
+    Succeeded
+    1a5b59c1-c26e-4028-b6d9-54b7676f3c67
+    Succeeded
+    c2f3e181-22c5-460c-a4ac-d2fd2694f96e
 %% Cell type:code id:e5f0edb0-ffe4-4f8e-bb4f-d51686ee47b4 tags:
 ``` python
 ```
 %% Cell type:code id:89d3a568-4801-4168-891f-04c65091d336 tags:
 ``` python
 # Some files didnt download, lets check
 import glob
 indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
 file_list = []
 for subdir, dirs, files in os.walk(indir):
    for file in files:
        if file.endswith(".h5"):
            # fp = os.path.join(subdir, file)
            # print(fp)
-            file_list.append(file)
+            file_list.append(os.path.join(subdir, file))
 ```
-%% Cell type:code id:55675d70-413e-4f1e-8666-3bb4f38e3359 tags:
+%% Cell type:code id:7dde8135-d6b6-43ce-b2c4-fd8d0cda4077 tags:
 ``` python
 len(file_list)
 ```
 %% Output
-    74
+    0
 %% Cell type:code id:7513ce41-f4ea-44e2-9d76-ea3b857c8069 tags:
 ``` python
 ```
 %% Cell type:code id:cd11283a-63f1-4893-828e-6cd72f646ef1 tags:
 ``` python
 # rerun and get missed files
 # Get all CSV files
 indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
 file_list = []
 for subdir, dirs, files in os.walk(indir):
    for file in files:
        if file.endswith(".h5"):
            # fp = os.path.join(subdir, file)
            # print(fp)
            file_list.append(file)
 # Change to appropriate filepaths for urls
 l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
 l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"
 with open(l1b_urls_fpath) as f:
    l1b_fpaths = f.readlines()
 with open(l2a_urls_fpath) as f:
    l2a_fpaths = f.readlines()
 print("starting loop")
 # Get matching string pattern and run main.py
 jobs_list = []
 counter=1
 for l1b_fp in l1b_fpaths:
    # Get string pattern
    str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
    try:
        l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
    except:
        print("No matching L2 file!", l1b_fp)
        continue
    # Check if file was already downloaded
    date_str = os.path.basename(l1b_fp).split("_")[2]
    if any(date_str in x for x in file_list):
        continue
    # Submit job
    job = maap.submitJob(identifier="SouthAmericaGEDI",
                         algo_id="arojas_download_gedi_data",
                         version="master",
                         username="arojearthdata",
                         queue="maap-dps-worker-8gb",
                         L1B_URL=l1b_fp,
                         L2A_URL=l2a_fp)
    jobs_list.append(job)
    if counter%100==0:
        print("on file num: ", counter, end='\r')
    counter+=1
 print('Done')
 ```
 %% Output
    starting loop
    Doneile num:  400
 %% Cell type:code id:0f17583e-20da-4dea-998c-9e0139700400 tags:
 ``` python
 ```
 %% Cell type:code id:35ff40f2-65f8-414d-a53f-e8e0f7487557 tags:
 ``` python
 ## Condense files into new single folder
 ```
 %% Cell type:code id:7c980c00-9823-4043-b235-4a837b8b92fa tags:
 ``` python
 import shutil
 # shutil.move("path/to/current/file.foo", "path/to/new/destination/for/file.foo")
 ```
 %% Cell type:code id:3d80ac20-b71e-486f-b153-6d19c3fd1fc2 tags:
 ``` python
 outdir = "/projects/my-private-bucket/GEDI/raw/SouthAmerica"
-for fp in csv_list:
+for fp in file_list:
    basename = os.path.basename(fp)
    if "GEDI01_B" in basename:
        outfp = os.path.join(outdir, "L1B", basename)
    elif "GEDI02_A" in basename:
        outfp = os.path.join(outdir, "L2A", basename)
    shutil.move(fp, outfp)
 print("DONE")
 ```
 %% Cell type:code id:4c3f7b66-e482-427f-85ba-19c6d2786e16 tags:
 ``` python
 ## Check all files in new folder!
 indir = "/projects/my-private-bucket/GEDI/biomass/2022"
 csv_list = glob.glob(os.path.join(indir, "*"))
 print(len(csv_list))
 ```
 %% Cell type:code id:f63c64c5-4688-43f4-8139-bfc68236d4f4 tags:
 ``` python
 ```

--- a/run.sh
+++ b/run.sh
-source activate osgeo-env
+# source activate osgeo-env
+source activate /projects/env/osgeo-env
+# conda activate /projects/env/osgeo-env
 # Get current location of build script
 basedir=$( cd "$(dirname "$0")" ; pwd -P )