From 98a01b977b81a413b68cc2a54f36afdc7a509fda Mon Sep 17 00:00:00 2001
From: Duflot <arthur.duflot@capgemini.com>
Date: Wed, 21 Feb 2024 16:52:36 +0100
Subject: [PATCH] synch

---
 .gitlab-ci.yml                                |  37 +++++
 Dockerfile                                    |  24 ++-
 README.md                                     |   4 +-
 app/get_dem.py                                | 146 ++++++++++++++++++
 build/entrypoint.sh                           |  29 +---
 .../workflow-checkpoint.cwl                   |  81 ++++++++++
 cwl/workflow.cwl                              |  65 ++++++++
 7 files changed, 346 insertions(+), 40 deletions(-)
 create mode 100644 .gitlab-ci.yml
 create mode 100644 app/get_dem.py
 create mode 100644 cwl/.ipynb_checkpoints/workflow-checkpoint.cwl
 create mode 100644 cwl/workflow.cwl

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000..16e8637
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,37 @@
+image: docker:18.09.7
+
+stages:
+  - build
+  - test
+
+services:
+  - docker:18.09.7-dind
+
+variables:
+    DOCKER_DRIVER: overlay
+    DOCKER_HOST: tcp://localhost:2375
+
+
+push-image-to-registry:
+  image: docker:18.09.7-dind
+  stage: build
+
+  script:
+    - docker login -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" $CI_REGISTRY
+    - docker info
+    - docker build --build-arg AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID --build-arg AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY --build-arg AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION --network host --pull -t "$CI_REGISTRY/$CI_PROJECT_PATH:latest" .
+    - docker push "$CI_REGISTRY/$CI_PROJECT_PATH:latest"
+
+  only:
+    - main
+
+push-data-to-s3:
+  image: python:3.7
+  stage: test
+
+  script:
+    - pip3 install awscli
+    - echo "Going to push data to s3"
+    - aws s3 cp cwl s3://s3public/cwl/get-dem/ --recursive --endpoint https://oss.eu-west-0.prod-cloud-ocb.orange-business.com
+  only:
+    - main
diff --git a/Dockerfile b/Dockerfile
index 2e8e8e4..7c8c121 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,25 +1,19 @@
-FROM python:3.12.1-bookworm
+FROM ghcr.io/osgeo/gdal:alpine-normal-3.8.2
 
 # Update package lists and install necessary packages
-RUN rm -rf /var/lib/apt/lists/*
-RUN apt-get update && apt-get install -y \
-    libproj-dev libgeos-dev libgdal-dev  \
-	wget vim curl\
+RUN apk add py3-pip
 
 
-# Create a virtual environment and activate it
-RUN python3 -m venv venv
-# to know GDAL version use gdal-config --version
-RUN . venv/bin/activate && pip3 install geopandas matplotlib scipy scikit-image wheel GDAL==3.0.4 scikit-learn rasterio
+# Add application
+RUN pip install sardem
 
 COPY ./build/entrypoint.sh /opt
-RUN chmod +x /opt/entrypoint.sh 
-
-# Add application
+RUN chmod +x /opt/entrypoint.sh
 
+RUN mkdir -p /opt/get-dem
+COPY ./app/* /opt/get-dem/
+RUN chmod -R +x /opt/get-dem
 
-# Clean up the image
-RUN apt-get clean && rm -rf /var/lib/apt/lists/*
 
 # Set the entry point or command if needed
-ENTRYPOINT ["/opt/entrypoint.sh"]
\ No newline at end of file
+ENTRYPOINT ["/opt/entrypoint.sh"]
diff --git a/README.md b/README.md
index 31859c2..9cc1bad 100644
--- a/README.md
+++ b/README.md
@@ -15,14 +15,14 @@ Already a pro? Just edit this README.md and make it your own. Want to make it ea
 
 ```
 cd existing_repo
-git remote add origin https://repo.maap-project.org/arthur.duflot/get-dem.git
+git remote add origin https://repo.uat.maap-project.org/esa-maap-dev/get-dem.git
 git branch -M main
 git push -uf origin main
 ```
 
 ## Integrate with your tools
 
-- [ ] [Set up project integrations](https://repo.maap-project.org/arthur.duflot/get-dem/-/settings/integrations)
+- [ ] [Set up project integrations](https://repo.uat.maap-project.org/esa-maap-dev/get-dem/-/settings/integrations)
 
 ## Collaborate with your team
 
diff --git a/app/get_dem.py b/app/get_dem.py
new file mode 100644
index 0000000..8b49f4e
--- /dev/null
+++ b/app/get_dem.py
@@ -0,0 +1,146 @@
+import os
+import argparse
+import subprocess
+from osgeo import gdal
+import numpy as np
+from time import time
+
+__version__ = "0.2.0"
+
+def get_dem(bbox: str, out_dir: str) -> str:
+    """
+    Generate a COP DEM Gtiff for the given bounding box.
+    
+    Parameters
+    ----------
+    bbox : str
+        lat/lon bounding box, with orientation: [left  bottom  right top].
+        Example: '-156 18.8 -154.7 20.3'.
+    out_dir : str
+        Path to an existing directory to store the generated DEM Gtiff in.
+        
+    Returns
+    -------
+    dem_file : str
+        Filepath to the generated DEM Gtiff.
+        In practise, this will be: "<out_dir>/dem.tif"
+    """
+
+    dem_file = os.path.join(out_dir, "dem.tif")
+
+    # Annoyingly, rasterio cannot find the PROJ_DATA directory
+    # when running in the NASA MAAP ADE.
+    # So, we need to manually set the environment variable, and
+    # then run sardem
+
+    # Step 1: Get the path to PROJ_DATA.
+    #     From Command Line, use the command: echo $PROJ_DATA
+    #     Example outputs:
+    #       In conda base environment in MAAP ADE, this produces: /opt/conda/share/proj
+    #       In a custom conda environment named 'dem', this produces: '/opt/conda/envs/dem/share/proj'
+    result = subprocess.run(['echo $PROJ_DATA'], stdout=subprocess.PIPE, shell=True)
+    proj_data_path = result.stdout.decode('utf-8').strip()
+    
+    os.environ['PROJ_DATA'] = proj_data_path
+    
+    # Step 2: Run sardem
+    start = time()
+    
+    os.system(f"sardem --bbox {bbox} --data-source COP -o {dem_file} --output-format GTiff")
+    
+    print(f"Time to fetch and create dem.tif: {time()-start} seconds")
+
+    # Warning: in a Jupyter notebook on NASA MAAP ADE, Steps 1 and 2 must be combined:
+    #     !PROJ_DATA={proj_data_path} sardem --bbox {bbox} --data-source COP -o {dem_file} --output-format GTiff
+    
+
+    return dem_file
+
+def do_computations(dem_file: str) -> None:
+    """
+    Open the DEM raster and do compute-intensive, multicore computations.
+
+    This function produces no meaningful output, and does NOT
+    modify the dem file. What it does do is exercise the underlying
+    compute nodes by using the maximum number of CPUs allowed by BLAS,
+    for an extended period of time, and using a significant amount of memory.
+    
+    By default, BLAS uses all available CPUs on a system.
+    To set this manually, from CLI use:
+        export OPENBLAS_NUM_THREADS = 20
+    
+    Parameters
+    ----------
+    dem_file : str
+        Filepath to the generated DEM Gtiff.
+        In practise, this will be: "<out_dir>/dem.tif"
+    """
+    # Read the DEM into a numpy array
+    ds = gdal.Open(dem_file)
+    dem = ds.GetRasterBand(1).ReadAsArray()
+    
+    # Truncate to make it a square array
+    min_edge = min(np.shape(dem))
+    dem = dem[:min_edge, :min_edge]
+    
+    print("Number of CPU cores available on instance: ", os.cpu_count())
+    
+    # Multi-core section:
+    start = time()
+    result = np.dot(np.linalg.inv(dem), dem)
+    print(f"Time to perform multicore computations: {time()-start} seconds")
+
+
+if __name__ == "__main__":
+    """
+    Take a bounding box and output a geotiff DEM.
+    
+    This is a thin wrapper around `sardem`: https://github.com/scottstanie/sardem
+    
+    This script is meant test the MAAP processing pipeline; it is
+    hardcoded to fetch the Copernicus DEM from the AWS Open Data registry.
+    See: https://registry.opendata.aws/copernicus-dem/
+    
+    The code will fetch the necessary DEM tiles, stitch them together with GDAL,
+    and create a single geotiff DEM in the `out_dir` directory, named `dem.tif`.
+
+    If the `--compute` flag is included, it will open the generated dem.tif
+    file and do compute-intensive, multi-core linear algebra computations
+    on that DEM raster. There are no changes made to the dem.tif; this command
+    is simply for benchmarking compute.
+
+    Example cmd line call:
+    python get_dem.py 
+        --bbox -156 18.8 -154.7 20.3  # bounding box: [left  bottom  right top]
+        --out_dir output
+
+    python get_dem.py 
+        --bbox -156 18.8 -154.7 20.3  # bounding box: [left  bottom  right top]
+        --compute  # flag to have the compute node perform intense, multi-core computations
+        --out_dir output
+    """
+
+    # Step 1: Parse Arguments
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("-v", "--version", action="version", version=__version__)
+    
+    msg = "lat/lon bounding box, with orientation: [left  bottom  right top]. Example: '--bbox -156 18.8 -154.7 20.3'."
+    parser.add_argument("-b", "--bbox", type=str, help=msg, nargs=4)
+
+    msg = "Flag to crunch numbers, exercise multiple cores, and use a LOT of memory."
+    parser.add_argument("-c", "--compute", type=str, help=msg)  # default to False
+
+    msg = "Path for an existing output directory. The output DEM geotiff will be saved in here."
+    parser.add_argument("-o", "--out_dir", type=str, help=msg)
+    
+    args = parser.parse_args()
+    
+    bbox = " ".join(args.bbox)
+    
+    # Step 2: Make dem.tif
+    dem_file = get_dem(bbox, args.out_dir)
+    
+    # Step 3: Perform compute-intensive, multicore operations
+    if args.compute == "TRUE":
+        do_computations(dem_file)
\ No newline at end of file
diff --git a/build/entrypoint.sh b/build/entrypoint.sh
index 8482102..e14b0aa 100644
--- a/build/entrypoint.sh
+++ b/build/entrypoint.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/sh
 
 
 set -x
@@ -6,28 +6,11 @@ export LC_ALL=C.UTF-8
 export LANG=C.UTF-8
 
 
-VALID_ARGS=$(getopt -o b:c: --long bbox:compute -- "$@")
-if [[ $? -ne 0 ]]; then
-    exit 1;
-fi
+# Creating output folder
+mkdir -p /projects/data/output
 
-eval set -- "$VALID_ARGS"
-while [ : ]; do
-  case "$1" in
-    -b | --bbox)
-        BBOX=$2
-        shift 2
-        ;;
-    -c | --compute)
-        COMPUTE=$2
-        shift 2
-        ;;
-    --) shift; 
-        break 
-        ;;
-  esac
-done
+cd /projects
 
-echo ${BBOX}
+python3 /opt/get-dem/get_dem.py -o /projects/data/output ${@}
 
-echo ${COMPUTE}
\ No newline at end of file
+find /projects/data/output -type f
\ No newline at end of file
diff --git a/cwl/.ipynb_checkpoints/workflow-checkpoint.cwl b/cwl/.ipynb_checkpoints/workflow-checkpoint.cwl
new file mode 100644
index 0000000..cc43b4c
--- /dev/null
+++ b/cwl/.ipynb_checkpoints/workflow-checkpoint.cwl
@@ -0,0 +1,81 @@
+$graph:
+
+- class: Workflow
+  doc: Launch S1-Tiling Algorithm
+  id: s1tiling-demo
+  requirements:
+  - class: ScatterFeatureRequirement
+  inputs:
+    input_s1_l1_grd:
+      doc: Folder containing input S1 L1 GRD product
+      label: S1L1GRD products folder
+      type: string[]
+    input_srtm_folder:
+      doc: Folder containing SRTM files
+      label: SRTM files folder
+      type: string[]
+    s_roi_id:
+      doc: Name of the S2 Tiles to process
+      label: ROI
+      type: string[]
+  label: s expressions
+  outputs:
+  - id: wf_outputs
+    outputSource:
+    - output
+    type:
+      Directory[]
+
+  steps:
+    step_1:
+      in:
+        input_s1_l1_grd: input_s1_l1_grd
+        input_srtm_folder: input_srtm_folder
+        s_roi_id: s_roi_id
+      out:
+      - output
+      run: '#clt'
+      scatter: [input_s1_l1_grd, input_srtm_folder, s_roi_id]
+      scatterMethod: parallel
+
+
+- baseCommand: /argosay
+  class: CommandLineTool
+
+  id: clt
+
+  arguments:
+  - echo
+  - valueFrom: $( inputs.input_reference )
+
+  inputs:
+    input_reference:
+      type: string
+    s_expression:
+      type: string
+    cbn:
+      type: string
+
+  outputs:
+    results:
+      outputBinding:
+        glob: .
+      type: Directory
+  requirements:
+    EnvVarRequirement:
+      envDef:
+        PATH: /srv/conda/envs/env_app_snuggs/bin:/srv/conda/bin:/srv/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+    ResourceRequirement: {}
+    InlineJavascriptRequirement: {}
+    DockerRequirement:
+      dockerPull: argoproj/argosay:v2
+  #stderr: std.err
+  #stdout: std.out
+
+cwlVersion: v1.0
+
+$namespaces:
+  s: https://schema.org/
+s:softwareVersion: 0.3.0
+schemas:
+- http://schema.org/version/9.0/schemaorg-current-http.rdf
\ No newline at end of file
diff --git a/cwl/workflow.cwl b/cwl/workflow.cwl
new file mode 100644
index 0000000..c94717d
--- /dev/null
+++ b/cwl/workflow.cwl
@@ -0,0 +1,65 @@
+$graph:
+
+- class: Workflow
+  doc: Get and merge DEM over a BBOx area
+  id: get-dem
+  inputs:
+    s_bbox:
+      doc: Bounding box
+      label:  Bounding box
+      type: string
+    s_compute:
+      doc: TRUE to enable heavy computation 
+      label: Compute
+      type: string
+  label: s expressions
+  outputs:
+  - id: wf_outputs
+    outputSource:
+    - /projects/data/output
+    type:
+      Directory[]
+
+  steps:
+    step_1:
+      in:
+        s_bbox: s_bbox
+        s_compute: s_compute
+      run: '#command'
+
+- baseCommand: /opt/entrypoint.sh
+  class: CommandLineTool
+
+  id: driver-command
+
+  arguments:
+  - --bbox
+  - valueFrom: $( inputs.s_bbox )
+  - --compute
+  - valueFrom: $( inputs.s_compute )
+
+  inputs:
+    s_bbox:
+      type: string
+    s_compute:
+      type: string
+
+
+  requirements:
+    EnvVarRequirement:
+      envDef:
+        PATH: /srv/conda/envs/env_app_snuggs/bin:/srv/conda/bin:/srv/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+    ResourceRequirement: {}
+    InlineJavascriptRequirement: {}
+    DockerRequirement:
+      dockerPull: registry.eu-west-0.prod-cloud-ocb.orange-business.com/esa-maap.org/esa-maap-dev/get-dem:latest
+  #stderr: std.err
+  #stdout: std.out
+
+cwlVersion: v1.0
+
+$namespaces:
+  s: https://schema.org/
+s:softwareVersion: 0.3.0
+schemas:
+- http://schema.org/version/9.0/schemaorg-current-http.rdf
-- 
GitLab