updated files

0bd63d63 · Alex Rojas · 68d0ba49 · 0bd63d63 · 0bd63d63 · 0bd63d63
Commit 0bd63d63 authored 1 year ago by Alex Rojas
--- a/.ipynb_checkpoints/build-env-checkpoint.sh
+++ b/.ipynb_checkpoints/build-env-checkpoint.sh
+#!/bin/bash
+
+# source activate base
+basedir=$( cd "$(dirname "$0")" ; pwd -P)
+echo installing environment...
+mamba env create --name osgeo-env -f ${basedir}/environment.yml
+
+# Install the maap.py environment
+echo trying to install maap-py...
+source activate osgeo-env
+git clone --single-branch --branch v3.0.1 https://github.com/MAAP-Project/maap-py.git
+cd maap-py
+pip install -e .
+echo installed maap-py package!
\ No newline at end of file
--- a/.ipynb_checkpoints/environment-checkpoint.yml
+++ b/.ipynb_checkpoints/environment-checkpoint.yml
+name: osgeo-env
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - boto3
+  - botocore
+  - fsspec
+  - s3fs
+  - geopandas
+  - geopandas-base
+  - h5py
+  - numpy
+  - pandas
+  - python
+  - scipy
--- a/.ipynb_checkpoints/run-checkpoint.sh
+++ b/.ipynb_checkpoints/run-checkpoint.sh
+source activate osgeo-env
+
+# Get current location of build script
+basedir=$( cd "$(dirname "$0")" ; pwd -P )
+
+# Create output directory to store outputs.
+# The name is output as required by the DPS.
+# Note how we dont provide an absolute path
+# but instead a relative one as the DPS creates
+# a temp working directory for our code.
+
+mkdir -p output
+
+# INPUT_FILES
+L1B_URL=$1
+L2A_URL=$2 # e.g. GEDI01_B or GEDI02_A
+
+# Call the script using the absolute paths
+# Any output written to the stdout and stderr streams will be automatically captured and placed in the output dir
+python ${basedir}/main.py ${L1B_URL} ${L2A_URL} output
\ No newline at end of file
--- a/notebooks/.ipynb_checkpoints/access-gedi-checkpoint.ipynb
+++ b/notebooks/.ipynb_checkpoints/access-gedi-checkpoint.ipynb
 {
- "cells": [],
- "metadata": {},
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "9803c6a0-d547-4112-8edc-eb62680360e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import re\n",
+    "from maap.maap import MAAP\n",
+    "import re\n",
+    "# maap = MAAP(maap_host='api.maap-project.org')\n",
+    "maap = MAAP(maap_host='api.maap-project.org')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "96d5cdce-a2cf-4ec5-8671-f9edd500ab8b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<Response [200]>"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#maap.register_algorithm_from_yaml_file(\"/projects/arojas_download_gedi_data.yml\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "54d3df6a-e22a-4f15-ab22-859df5d4b367",
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "on file num:  400\r"
+     ]
+    }
+   ],
+   "source": [
+    "# Change to appropriate filepaths for urls\n",
+    "# l1b_urls_fpath = \"/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt\"\n",
+    "# l2a_urls_fpath = \"/projects/biomass-gedi-conus/data/GEDI-L2A-2022-URLS.txt\"\n",
+    "l1b_urls_fpath = \"./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt\"\n",
+    "l2a_urls_fpath = \"./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt\"\n",
+    "\n",
+    "with open(l1b_urls_fpath) as f:\n",
+    "    l1b_fpaths = f.read().splitlines()\n",
+    "with open(l2a_urls_fpath) as f:\n",
+    "    l2a_fpaths = f.read().splitlines()\n",
+    "\n",
+    "# Get matching string pattern and run main.py\n",
+    "jobs_list = []\n",
+    "counter=1\n",
+    "for l1b_fp in l1b_fpaths:\n",
+    "    # Get string pattern\n",
+    "    str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n",
+    "    try:\n",
+    "        l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]\n",
+    "    except:\n",
+    "        print(\"No matching L2 file!\", l1b_fp)\n",
+    "        continue\n",
+    "    # print(l1b_fp)\n",
+    "    # print(l2a_fp)\n",
+    "    job = maap.submitJob(identifier=\"SouthAmericaGEDI-run-20240117\",\n",
+    "                         algo_id=\"arojas_download_gedi_data\",\n",
+    "                         version=\"master\",\n",
+    "                         username=\"arojearthdata\",\n",
+    "                         queue=\"maap-dps-worker-8gb\",\n",
+    "                         L1B_URL=l1b_fp,\n",
+    "                         L2A_URL=l2a_fp)\n",
+    "\n",
+    "    jobs_list.append(job)\n",
+    "    \n",
+    "    if counter%100==0:\n",
+    "        print(\"on file num: \", counter, end='\\r')\n",
+    "    counter+=1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running\n",
+      "9efde8c0-b4aa-4d4b-9972-55b72be802a6\n",
+      "Running\n",
+      "d101a1b9-c44e-44e8-931a-6fe9f436e03d\n",
+      "Running\n",
+      "acc12502-5cfb-4f05-b05b-2f57657d2f53\n",
+      "Running\n",
+      "126548ea-2fd0-434a-b040-66a0a45d8f9c\n",
+      "Running\n",
+      "325641a2-f0f5-40a7-8904-c023d71a7b55\n",
+      "Running\n",
+      "a9039b0e-f678-403a-8ff2-576adb138199\n",
+      "Accepted\n",
+      "a99753e5-3ceb-4b25-b1e5-e1c154d2235d\n",
+      "Accepted\n",
+      "b985c8ff-77ae-41c9-8b00-d1e9ee0cc717\n",
+      "Accepted\n",
+      "d70e4917-323f-48e8-976d-6c94b08277a2\n",
+      "Accepted\n",
+      "0fe1d4f4-cb0c-41ae-99ad-0e9d28f7e695\n"
+     ]
+    }
+   ],
+   "source": [
+    "for job in jobs_list[:10]:\n",
+    "    print(job.retrieve_status())\n",
+    "    print(job.id)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "e5f0edb0-ffe4-4f8e-bb4f-d51686ee47b4",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "89d3a568-4801-4168-891f-04c65091d336",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Some files didnt download, lets check\n",
+    "import glob\n",
+    "indir = \"/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master\"\n",
+    "file_list = []\n",
+    "for subdir, dirs, files in os.walk(indir):\n",
+    "    for file in files:\n",
+    "        if file.endswith(\".h5\"):\n",
+    "            # fp = os.path.join(subdir, file)\n",
+    "            # print(fp)\n",
+    "            file_list.append(file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "55675d70-413e-4f1e-8666-3bb4f38e3359",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "74"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(file_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7513ce41-f4ea-44e2-9d76-ea3b857c8069",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "cd11283a-63f1-4893-828e-6cd72f646ef1",
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "starting loop\n",
+      "Doneile num:  400\n"
+     ]
+    }
+   ],
+   "source": [
+    "# rerun and get missed files\n",
+    "\n",
+    "# Get all CSV files\n",
+    "indir = \"/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master\"\n",
+    "file_list = []\n",
+    "for subdir, dirs, files in os.walk(indir):\n",
+    "    for file in files:\n",
+    "        if file.endswith(\".h5\"):\n",
+    "            # fp = os.path.join(subdir, file)\n",
+    "            # print(fp)\n",
+    "            file_list.append(file)\n",
+    "\n",
+    "\n",
+    "# Change to appropriate filepaths for urls\n",
+    "l1b_urls_fpath = \"./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt\"\n",
+    "l2a_urls_fpath = \"./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt\"\n",
+    "\n",
+    "with open(l1b_urls_fpath) as f:\n",
+    "    l1b_fpaths = f.readlines()\n",
+    "with open(l2a_urls_fpath) as f:\n",
+    "    l2a_fpaths = f.readlines()\n",
+    "print(\"starting loop\")\n",
+    "# Get matching string pattern and run main.py\n",
+    "jobs_list = []\n",
+    "counter=1\n",
+    "for l1b_fp in l1b_fpaths:\n",
+    "    # Get string pattern\n",
+    "    str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n",
+    "    try:\n",
+    "        l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]\n",
+    "    except:\n",
+    "        print(\"No matching L2 file!\", l1b_fp)\n",
+    "        continue\n",
+    "        \n",
+    "    # Check if file was already downloaded\n",
+    "    date_str = os.path.basename(l1b_fp).split(\"_\")[2]\n",
+    "    if any(date_str in x for x in file_list):\n",
+    "        continue\n",
+    "        \n",
+    "    # Submit job\n",
+    "    job = maap.submitJob(identifier=\"SouthAmericaGEDI\",\n",
+    "                         algo_id=\"arojas_download_gedi_data\",\n",
+    "                         version=\"master\",\n",
+    "                         username=\"arojearthdata\",\n",
+    "                         queue=\"maap-dps-worker-8gb\",\n",
+    "                         L1B_URL=l1b_fp,\n",
+    "                         L2A_URL=l2a_fp)\n",
+    "    jobs_list.append(job)\n",
+    "    \n",
+    "    if counter%100==0:\n",
+    "        print(\"on file num: \", counter, end='\\r')\n",
+    "    counter+=1\n",
+    "\n",
+    "print('Done')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f17583e-20da-4dea-998c-9e0139700400",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "35ff40f2-65f8-414d-a53f-e8e0f7487557",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Condense files into new single folder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7c980c00-9823-4043-b235-4a837b8b92fa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import shutil\n",
+    "# shutil.move(\"path/to/current/file.foo\", \"path/to/new/destination/for/file.foo\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3d80ac20-b71e-486f-b153-6d19c3fd1fc2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "outdir = \"/projects/my-private-bucket/GEDI/raw/SouthAmerica\"\n",
+    "for fp in csv_list:\n",
+    "    basename = os.path.basename(fp)\n",
+    "    if \"GEDI01_B\" in basename:\n",
+    "        outfp = os.path.join(outdir, \"L1B\", basename)\n",
+    "    elif \"GEDI02_A\" in basename:\n",
+    "        outfp = os.path.join(outdir, \"L2A\", basename)\n",
+    "    shutil.move(fp, outfp)\n",
+    "print(\"DONE\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4c3f7b66-e482-427f-85ba-19c6d2786e16",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Check all files in new folder!\n",
+    "indir = \"/projects/my-private-bucket/GEDI/biomass/2022\"\n",
+    "csv_list = glob.glob(os.path.join(indir, \"*\"))\n",
+    "print(len(csv_list))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f63c64c5-4688-43f4-8139-bfc68236d4f4",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
 "nbformat": 4,
 "nbformat_minor": 5
 }
+%% Cell type:code id:9803c6a0-d547-4112-8edc-eb62680360e2 tags:
+
+``` python
+import os
+import re
+from maap.maap import MAAP
+import re
+# maap = MAAP(maap_host='api.maap-project.org')
+maap = MAAP(maap_host='api.maap-project.org')
+```
+
+%% Cell type:code id:96d5cdce-a2cf-4ec5-8671-f9edd500ab8b tags:
+
+``` python
+#maap.register_algorithm_from_yaml_file("/projects/arojas_download_gedi_data.yml")
+```
+
+%% Output
+
+    <Response [200]>
+
+%% Cell type:code id:54d3df6a-e22a-4f15-ab22-859df5d4b367 tags:
+
+``` python
+# Change to appropriate filepaths for urls
+# l1b_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt"
+# l2a_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L2A-2022-URLS.txt"
+l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
+l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"
+
+with open(l1b_urls_fpath) as f:
+    l1b_fpaths = f.read().splitlines()
+with open(l2a_urls_fpath) as f:
+    l2a_fpaths = f.read().splitlines()
+
+# Get matching string pattern and run main.py
+jobs_list = []
+counter=1
+for l1b_fp in l1b_fpaths:
+    # Get string pattern
+    str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
+    try:
+        l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
+    except:
+        print("No matching L2 file!", l1b_fp)
+        continue
+    # print(l1b_fp)
+    # print(l2a_fp)
+    job = maap.submitJob(identifier="SouthAmericaGEDI-run-20240117",
+                         algo_id="arojas_download_gedi_data",
+                         version="master",
+                         username="arojearthdata",
+                         queue="maap-dps-worker-8gb",
+                         L1B_URL=l1b_fp,
+                         L2A_URL=l2a_fp)
+
+    jobs_list.append(job)
+
+    if counter%100==0:
+        print("on file num: ", counter, end='\r')
+    counter+=1
+```
+
+%% Output
+
+    on file num:  400
+
+%% Cell type:code id:d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d tags:
+
+``` python
+for job in jobs_list[:10]:
+    print(job.retrieve_status())
+    print(job.id)
+```
+
+%% Output
+
+    Running
+    9efde8c0-b4aa-4d4b-9972-55b72be802a6
+    Running
+    d101a1b9-c44e-44e8-931a-6fe9f436e03d
+    Running
+    acc12502-5cfb-4f05-b05b-2f57657d2f53
+    Running
+    126548ea-2fd0-434a-b040-66a0a45d8f9c
+    Running
+    325641a2-f0f5-40a7-8904-c023d71a7b55
+    Running
+    a9039b0e-f678-403a-8ff2-576adb138199
+    Accepted
+    a99753e5-3ceb-4b25-b1e5-e1c154d2235d
+    Accepted
+    b985c8ff-77ae-41c9-8b00-d1e9ee0cc717
+    Accepted
+    d70e4917-323f-48e8-976d-6c94b08277a2
+    Accepted
+    0fe1d4f4-cb0c-41ae-99ad-0e9d28f7e695
+
+%% Cell type:code id:e5f0edb0-ffe4-4f8e-bb4f-d51686ee47b4 tags:
+
+``` python
+```
+
+%% Cell type:code id:89d3a568-4801-4168-891f-04c65091d336 tags:
+
+``` python
+# Some files didnt download, lets check
+import glob
+indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
+file_list = []
+for subdir, dirs, files in os.walk(indir):
+    for file in files:
+        if file.endswith(".h5"):
+            # fp = os.path.join(subdir, file)
+            # print(fp)
+            file_list.append(file)
+```
+
+%% Cell type:code id:55675d70-413e-4f1e-8666-3bb4f38e3359 tags:
+
+``` python
+len(file_list)
+```
+
+%% Output
+
+    74
+
+%% Cell type:code id:7513ce41-f4ea-44e2-9d76-ea3b857c8069 tags:
+
+``` python
+```
+
+%% Cell type:code id:cd11283a-63f1-4893-828e-6cd72f646ef1 tags:
+
+``` python
+# rerun and get missed files
+
+# Get all CSV files
+indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
+file_list = []
+for subdir, dirs, files in os.walk(indir):
+    for file in files:
+        if file.endswith(".h5"):
+            # fp = os.path.join(subdir, file)
+            # print(fp)
+            file_list.append(file)
+
+
+# Change to appropriate filepaths for urls
+l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
+l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"
+
+with open(l1b_urls_fpath) as f:
+    l1b_fpaths = f.readlines()
+with open(l2a_urls_fpath) as f:
+    l2a_fpaths = f.readlines()
+print("starting loop")
+# Get matching string pattern and run main.py
+jobs_list = []
+counter=1
+for l1b_fp in l1b_fpaths:
+    # Get string pattern
+    str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
+    try:
+        l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
+    except:
+        print("No matching L2 file!", l1b_fp)
+        continue
+
+    # Check if file was already downloaded
+    date_str = os.path.basename(l1b_fp).split("_")[2]
+    if any(date_str in x for x in file_list):
+        continue
+
+    # Submit job
+    job = maap.submitJob(identifier="SouthAmericaGEDI",
+                         algo_id="arojas_download_gedi_data",
+                         version="master",
+                         username="arojearthdata",
+                         queue="maap-dps-worker-8gb",
+                         L1B_URL=l1b_fp,
+                         L2A_URL=l2a_fp)
+    jobs_list.append(job)
+
+    if counter%100==0:
+        print("on file num: ", counter, end='\r')
+    counter+=1
+
+print('Done')
+```
+
+%% Output
+
+    starting loop
+    Doneile num:  400
+
+%% Cell type:code id:0f17583e-20da-4dea-998c-9e0139700400 tags:
+
+``` python
+```
+
+%% Cell type:code id:35ff40f2-65f8-414d-a53f-e8e0f7487557 tags:
+
+``` python
+## Condense files into new single folder
+```
+
+%% Cell type:code id:7c980c00-9823-4043-b235-4a837b8b92fa tags:
+
+``` python
+import shutil
+# shutil.move("path/to/current/file.foo", "path/to/new/destination/for/file.foo")
+```
+
+%% Cell type:code id:3d80ac20-b71e-486f-b153-6d19c3fd1fc2 tags:
+
+``` python
+outdir = "/projects/my-private-bucket/GEDI/raw/SouthAmerica"
+for fp in csv_list:
+    basename = os.path.basename(fp)
+    if "GEDI01_B" in basename:
+        outfp = os.path.join(outdir, "L1B", basename)
+    elif "GEDI02_A" in basename:
+        outfp = os.path.join(outdir, "L2A", basename)
+    shutil.move(fp, outfp)
+print("DONE")
+```
+
+%% Cell type:code id:4c3f7b66-e482-427f-85ba-19c6d2786e16 tags:
+
+``` python
+## Check all files in new folder!
+indir = "/projects/my-private-bucket/GEDI/biomass/2022"
+csv_list = glob.glob(os.path.join(indir, "*"))
+print(len(csv_list))
+```
+
+%% Cell type:code id:f63c64c5-4688-43f4-8139-bfc68236d4f4 tags:
+
+``` python
+```

--- a/notebooks/access-gedi.ipynb
+++ b/notebooks/access-gedi.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 5,
   "id": "9803c6a0-d547-4112-8edc-eb62680360e2",
   "metadata": {},
   "outputs": [],
@@ -17,21 +17,36 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
-   "id": "54d3df6a-e22a-4f15-ab22-859df5d4b367",
+   "execution_count": 9,
+   "id": "96d5cdce-a2cf-4ec5-8671-f9edd500ab8b",
   "metadata": {
-    "scrolled": true,
    "tags": []
   },
   "outputs": [
    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "on file num:  400\r"
-     ]
+     "data": {
+      "text/plain": [
+       "<Response [200]>"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
    }
   ],
+   "source": [
+    "#maap.register_algorithm_from_yaml_file(\"/projects/arojas_download_gedi_data.yml\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "54d3df6a-e22a-4f15-ab22-859df5d4b367",
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [],
   "source": [
    "# Change to appropriate filepaths for urls\n",
    "# l1b_urls_fpath = \"/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt\"\n",
@@ -47,7 +62,7 @@
    "# Get matching string pattern and run main.py\n",
    "jobs_list = []\n",
    "counter=1\n",
-    "for l1b_fp in l1b_fpaths:\n",
+    "for l1b_fp in l1b_fpaths[:10]:\n",
    "    # Get string pattern\n",
    "    str_pattern = re.findall(\"[0-9]{13}\", os.path.basename(l1b_fp))[0] \n",
    "    try:\n",
@@ -57,7 +72,7 @@
    "        continue\n",
    "    # print(l1b_fp)\n",
    "    # print(l2a_fp)\n",
-    "    job = maap.submitJob(identifier=\"SouthAmericaGEDI\",\n",
+    "    job = maap.submitJob(identifier=\"SouthAmericaGEDI-run-20240117\",\n",
    "                         algo_id=\"arojas_download_gedi_data\",\n",
    "                         version=\"master\",\n",
    "                         username=\"arojearthdata\",\n",
@@ -74,7 +89,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 13,
   "id": "d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d",
   "metadata": {},
   "outputs": [
@@ -82,16 +97,33 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Failed\n",
-      "353e658d-8bfa-4b7d-b508-24e8d42d3483\n"
+      "Accepted\n",
+      "84175819-3e25-4fee-a6c0-4e52b2dca88f\n",
+      "Accepted\n",
+      "7a478af7-e3e5-4206-a033-4fd23a1e3c81\n",
+      "Accepted\n",
+      "04d8a2dc-e54f-461d-8ace-39a381dd69d1\n",
+      "Accepted\n",
+      "c6bd0fc6-8f77-4f17-8cc1-4508232366d4\n",
+      "Accepted\n",
+      "630b5127-7b1f-4d2a-95a2-f571a6a32d6d\n",
+      "Accepted\n",
+      "747dbd20-d920-4658-aa0d-fe3c47838f49\n",
+      "Accepted\n",
+      "fa394b4b-0009-484f-af99-e96cfcc8d97b\n",
+      "Accepted\n",
+      "854aa24b-0861-42fa-9ff7-ae501791448b\n",
+      "Accepted\n",
+      "a28e565b-03a4-4c89-b4de-ad2c7d824fec\n",
+      "Accepted\n",
+      "3dc475c2-c87e-43a3-a876-1118815ff82f\n"
     ]
    }
   ],
   "source": [
-    "for job in jobs_list:\n",
+    "for job in jobs_list[:10]:\n",
    "    print(job.retrieve_status())\n",
-    "    print(job.id)\n",
-    "    break"
+    "    print(job.id)"
   ]
  },
  {

 %% Cell type:code id:9803c6a0-d547-4112-8edc-eb62680360e2 tags:

 ``` python
 import os
 import re
 from maap.maap import MAAP
 import re
 # maap = MAAP(maap_host='api.maap-project.org')
 maap = MAAP(maap_host='api.maap-project.org')
 ```

+%% Cell type:code id:96d5cdce-a2cf-4ec5-8671-f9edd500ab8b tags:
+
+``` python
+#maap.register_algorithm_from_yaml_file("/projects/arojas_download_gedi_data.yml")
+```
+
+%% Output
+
+    <Response [200]>
+
 %% Cell type:code id:54d3df6a-e22a-4f15-ab22-859df5d4b367 tags:

 ``` python
 # Change to appropriate filepaths for urls
 # l1b_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L1B-2022-URLS.txt"
 # l2a_urls_fpath = "/projects/biomass-gedi-conus/data/GEDI-L2A-2022-URLS.txt"
 l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
 l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"

 with open(l1b_urls_fpath) as f:
    l1b_fpaths = f.read().splitlines()
 with open(l2a_urls_fpath) as f:
    l2a_fpaths = f.read().splitlines()

 # Get matching string pattern and run main.py
 jobs_list = []
 counter=1
-for l1b_fp in l1b_fpaths:
+for l1b_fp in l1b_fpaths[:10]:
    # Get string pattern
    str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
    try:
        l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
    except:
        print("No matching L2 file!", l1b_fp)
        continue
    # print(l1b_fp)
    # print(l2a_fp)
-    job = maap.submitJob(identifier="SouthAmericaGEDI",
+    job = maap.submitJob(identifier="SouthAmericaGEDI-run-20240117",
                         algo_id="arojas_download_gedi_data",
                         version="master",
                         username="arojearthdata",
                         queue="maap-dps-worker-8gb",
                         L1B_URL=l1b_fp,
                         L2A_URL=l2a_fp)

    jobs_list.append(job)

    if counter%100==0:
        print("on file num: ", counter, end='\r')
    counter+=1
 ```

-%% Output
-
-    on file num:  400
-
 %% Cell type:code id:d88c4aa3-62bc-4a9e-8ab9-2c3a74de3f4d tags:

 ``` python
-for job in jobs_list:
+for job in jobs_list[:10]:
    print(job.retrieve_status())
    print(job.id)
-    break
 ```

 %% Output

-    Failed
-    353e658d-8bfa-4b7d-b508-24e8d42d3483
+    Accepted
+    84175819-3e25-4fee-a6c0-4e52b2dca88f
+    Accepted
+    7a478af7-e3e5-4206-a033-4fd23a1e3c81
+    Accepted
+    04d8a2dc-e54f-461d-8ace-39a381dd69d1
+    Accepted
+    c6bd0fc6-8f77-4f17-8cc1-4508232366d4
+    Accepted
+    630b5127-7b1f-4d2a-95a2-f571a6a32d6d
+    Accepted
+    747dbd20-d920-4658-aa0d-fe3c47838f49
+    Accepted
+    fa394b4b-0009-484f-af99-e96cfcc8d97b
+    Accepted
+    854aa24b-0861-42fa-9ff7-ae501791448b
+    Accepted
+    a28e565b-03a4-4c89-b4de-ad2c7d824fec
+    Accepted
+    3dc475c2-c87e-43a3-a876-1118815ff82f

 %% Cell type:code id:e5f0edb0-ffe4-4f8e-bb4f-d51686ee47b4 tags:

 ``` python
 ```

 %% Cell type:code id:89d3a568-4801-4168-891f-04c65091d336 tags:

 ``` python
 # Some files didnt download, lets check
 import glob
 indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
 file_list = []
 for subdir, dirs, files in os.walk(indir):
    for file in files:
        if file.endswith(".h5"):
            # fp = os.path.join(subdir, file)
            # print(fp)
            file_list.append(file)
 ```

 %% Cell type:code id:55675d70-413e-4f1e-8666-3bb4f38e3359 tags:

 ``` python
 len(file_list)
 ```

 %% Output

    74

 %% Cell type:code id:7513ce41-f4ea-44e2-9d76-ea3b857c8069 tags:

 ``` python
 ```

 %% Cell type:code id:cd11283a-63f1-4893-828e-6cd72f646ef1 tags:

 ``` python
 # rerun and get missed files

 # Get all CSV files
 indir = "/projects/my-private-bucket/dps_output/arojas_download_gedi_data/master"
 file_list = []
 for subdir, dirs, files in os.walk(indir):
    for file in files:
        if file.endswith(".h5"):
            # fp = os.path.join(subdir, file)
            # print(fp)
            file_list.append(file)


 # Change to appropriate filepaths for urls
 l1b_urls_fpath = "./GEDI-L1B-URLS-2020-MAYtoAUG-SA_AOI.txt"
 l2a_urls_fpath = "./GEDI-L2A-URLS-2020-MAYtoAUG-SA_AOI.txt"

 with open(l1b_urls_fpath) as f:
    l1b_fpaths = f.readlines()
 with open(l2a_urls_fpath) as f:
    l2a_fpaths = f.readlines()
 print("starting loop")
 # Get matching string pattern and run main.py
 jobs_list = []
 counter=1
 for l1b_fp in l1b_fpaths:
    # Get string pattern
    str_pattern = re.findall("[0-9]{13}", os.path.basename(l1b_fp))[0]
    try:
        l2a_fp = [s for s in l2a_fpaths if str_pattern in s][0]
    except:
        print("No matching L2 file!", l1b_fp)
        continue

    # Check if file was already downloaded
    date_str = os.path.basename(l1b_fp).split("_")[2]
    if any(date_str in x for x in file_list):
        continue

    # Submit job
    job = maap.submitJob(identifier="SouthAmericaGEDI",
                         algo_id="arojas_download_gedi_data",
                         version="master",
                         username="arojearthdata",
                         queue="maap-dps-worker-8gb",
                         L1B_URL=l1b_fp,
                         L2A_URL=l2a_fp)
    jobs_list.append(job)

    if counter%100==0:
        print("on file num: ", counter, end='\r')
    counter+=1

 print('Done')
 ```

 %% Output

    starting loop
    Doneile num:  400

 %% Cell type:code id:0f17583e-20da-4dea-998c-9e0139700400 tags:

 ``` python
 ```

 %% Cell type:code id:35ff40f2-65f8-414d-a53f-e8e0f7487557 tags:

 ``` python
 ## Condense files into new single folder
 ```

 %% Cell type:code id:7c980c00-9823-4043-b235-4a837b8b92fa tags:

 ``` python
 import shutil
 # shutil.move("path/to/current/file.foo", "path/to/new/destination/for/file.foo")
 ```

 %% Cell type:code id:3d80ac20-b71e-486f-b153-6d19c3fd1fc2 tags:

 ``` python
 outdir = "/projects/my-private-bucket/GEDI/raw/SouthAmerica"
 for fp in csv_list:
    basename = os.path.basename(fp)
    if "GEDI01_B" in basename:
        outfp = os.path.join(outdir, "L1B", basename)
    elif "GEDI02_A" in basename:
        outfp = os.path.join(outdir, "L2A", basename)
    shutil.move(fp, outfp)
 print("DONE")
 ```

 %% Cell type:code id:4c3f7b66-e482-427f-85ba-19c6d2786e16 tags:

 ``` python
 ## Check all files in new folder!
 indir = "/projects/my-private-bucket/GEDI/biomass/2022"
 csv_list = glob.glob(os.path.join(indir, "*"))
 print(len(csv_list))
 ```

 %% Cell type:code id:f63c64c5-4688-43f4-8139-bfc68236d4f4 tags:

 ``` python
 ```