From 98a01b977b81a413b68cc2a54f36afdc7a509fda Mon Sep 17 00:00:00 2001 From: Duflot <arthur.duflot@capgemini.com> Date: Wed, 21 Feb 2024 16:52:36 +0100 Subject: [PATCH] synch --- .gitlab-ci.yml | 37 +++++ Dockerfile | 24 ++- README.md | 4 +- app/get_dem.py | 146 ++++++++++++++++++ build/entrypoint.sh | 29 +--- .../workflow-checkpoint.cwl | 81 ++++++++++ cwl/workflow.cwl | 65 ++++++++ 7 files changed, 346 insertions(+), 40 deletions(-) create mode 100644 .gitlab-ci.yml create mode 100644 app/get_dem.py create mode 100644 cwl/.ipynb_checkpoints/workflow-checkpoint.cwl create mode 100644 cwl/workflow.cwl diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..16e8637 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,37 @@ +image: docker:18.09.7 + +stages: + - build + - test + +services: + - docker:18.09.7-dind + +variables: + DOCKER_DRIVER: overlay + DOCKER_HOST: tcp://localhost:2375 + + +push-image-to-registry: + image: docker:18.09.7-dind + stage: build + + script: + - docker login -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" $CI_REGISTRY + - docker info + - docker build --build-arg AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID --build-arg AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY --build-arg AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION --network host --pull -t "$CI_REGISTRY/$CI_PROJECT_PATH:latest" . + - docker push "$CI_REGISTRY/$CI_PROJECT_PATH:latest" + + only: + - main + +push-data-to-s3: + image: python:3.7 + stage: test + + script: + - pip3 install awscli + - echo "Going to push data to s3" + - aws s3 cp cwl s3://s3public/cwl/get-dem/ --recursive --endpoint https://oss.eu-west-0.prod-cloud-ocb.orange-business.com + only: + - main diff --git a/Dockerfile b/Dockerfile index 2e8e8e4..7c8c121 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,25 +1,19 @@ -FROM python:3.12.1-bookworm +FROM ghcr.io/osgeo/gdal:alpine-normal-3.8.2 # Update package lists and install necessary packages -RUN rm -rf /var/lib/apt/lists/* -RUN apt-get update && apt-get install -y \ - libproj-dev libgeos-dev libgdal-dev \ - wget vim curl\ +RUN apk add py3-pip -# Create a virtual environment and activate it -RUN python3 -m venv venv -# to know GDAL version use gdal-config --version -RUN . venv/bin/activate && pip3 install geopandas matplotlib scipy scikit-image wheel GDAL==3.0.4 scikit-learn rasterio +# Add application +RUN pip install sardem COPY ./build/entrypoint.sh /opt -RUN chmod +x /opt/entrypoint.sh - -# Add application +RUN chmod +x /opt/entrypoint.sh +RUN mkdir -p /opt/get-dem +COPY ./app/* /opt/get-dem/ +RUN chmod -R +x /opt/get-dem -# Clean up the image -RUN apt-get clean && rm -rf /var/lib/apt/lists/* # Set the entry point or command if needed -ENTRYPOINT ["/opt/entrypoint.sh"] \ No newline at end of file +ENTRYPOINT ["/opt/entrypoint.sh"] diff --git a/README.md b/README.md index 31859c2..9cc1bad 100644 --- a/README.md +++ b/README.md @@ -15,14 +15,14 @@ Already a pro? Just edit this README.md and make it your own. Want to make it ea ``` cd existing_repo -git remote add origin https://repo.maap-project.org/arthur.duflot/get-dem.git +git remote add origin https://repo.uat.maap-project.org/esa-maap-dev/get-dem.git git branch -M main git push -uf origin main ``` ## Integrate with your tools -- [ ] [Set up project integrations](https://repo.maap-project.org/arthur.duflot/get-dem/-/settings/integrations) +- [ ] [Set up project integrations](https://repo.uat.maap-project.org/esa-maap-dev/get-dem/-/settings/integrations) ## Collaborate with your team diff --git a/app/get_dem.py b/app/get_dem.py new file mode 100644 index 0000000..8b49f4e --- /dev/null +++ b/app/get_dem.py @@ -0,0 +1,146 @@ +import os +import argparse +import subprocess +from osgeo import gdal +import numpy as np +from time import time + +__version__ = "0.2.0" + +def get_dem(bbox: str, out_dir: str) -> str: + """ + Generate a COP DEM Gtiff for the given bounding box. + + Parameters + ---------- + bbox : str + lat/lon bounding box, with orientation: [left bottom right top]. + Example: '-156 18.8 -154.7 20.3'. + out_dir : str + Path to an existing directory to store the generated DEM Gtiff in. + + Returns + ------- + dem_file : str + Filepath to the generated DEM Gtiff. + In practise, this will be: "<out_dir>/dem.tif" + """ + + dem_file = os.path.join(out_dir, "dem.tif") + + # Annoyingly, rasterio cannot find the PROJ_DATA directory + # when running in the NASA MAAP ADE. + # So, we need to manually set the environment variable, and + # then run sardem + + # Step 1: Get the path to PROJ_DATA. + # From Command Line, use the command: echo $PROJ_DATA + # Example outputs: + # In conda base environment in MAAP ADE, this produces: /opt/conda/share/proj + # In a custom conda environment named 'dem', this produces: '/opt/conda/envs/dem/share/proj' + result = subprocess.run(['echo $PROJ_DATA'], stdout=subprocess.PIPE, shell=True) + proj_data_path = result.stdout.decode('utf-8').strip() + + os.environ['PROJ_DATA'] = proj_data_path + + # Step 2: Run sardem + start = time() + + os.system(f"sardem --bbox {bbox} --data-source COP -o {dem_file} --output-format GTiff") + + print(f"Time to fetch and create dem.tif: {time()-start} seconds") + + # Warning: in a Jupyter notebook on NASA MAAP ADE, Steps 1 and 2 must be combined: + # !PROJ_DATA={proj_data_path} sardem --bbox {bbox} --data-source COP -o {dem_file} --output-format GTiff + + + return dem_file + +def do_computations(dem_file: str) -> None: + """ + Open the DEM raster and do compute-intensive, multicore computations. + + This function produces no meaningful output, and does NOT + modify the dem file. What it does do is exercise the underlying + compute nodes by using the maximum number of CPUs allowed by BLAS, + for an extended period of time, and using a significant amount of memory. + + By default, BLAS uses all available CPUs on a system. + To set this manually, from CLI use: + export OPENBLAS_NUM_THREADS = 20 + + Parameters + ---------- + dem_file : str + Filepath to the generated DEM Gtiff. + In practise, this will be: "<out_dir>/dem.tif" + """ + # Read the DEM into a numpy array + ds = gdal.Open(dem_file) + dem = ds.GetRasterBand(1).ReadAsArray() + + # Truncate to make it a square array + min_edge = min(np.shape(dem)) + dem = dem[:min_edge, :min_edge] + + print("Number of CPU cores available on instance: ", os.cpu_count()) + + # Multi-core section: + start = time() + result = np.dot(np.linalg.inv(dem), dem) + print(f"Time to perform multicore computations: {time()-start} seconds") + + +if __name__ == "__main__": + """ + Take a bounding box and output a geotiff DEM. + + This is a thin wrapper around `sardem`: https://github.com/scottstanie/sardem + + This script is meant test the MAAP processing pipeline; it is + hardcoded to fetch the Copernicus DEM from the AWS Open Data registry. + See: https://registry.opendata.aws/copernicus-dem/ + + The code will fetch the necessary DEM tiles, stitch them together with GDAL, + and create a single geotiff DEM in the `out_dir` directory, named `dem.tif`. + + If the `--compute` flag is included, it will open the generated dem.tif + file and do compute-intensive, multi-core linear algebra computations + on that DEM raster. There are no changes made to the dem.tif; this command + is simply for benchmarking compute. + + Example cmd line call: + python get_dem.py + --bbox -156 18.8 -154.7 20.3 # bounding box: [left bottom right top] + --out_dir output + + python get_dem.py + --bbox -156 18.8 -154.7 20.3 # bounding box: [left bottom right top] + --compute # flag to have the compute node perform intense, multi-core computations + --out_dir output + """ + + # Step 1: Parse Arguments + parser = argparse.ArgumentParser() + + parser.add_argument("-v", "--version", action="version", version=__version__) + + msg = "lat/lon bounding box, with orientation: [left bottom right top]. Example: '--bbox -156 18.8 -154.7 20.3'." + parser.add_argument("-b", "--bbox", type=str, help=msg, nargs=4) + + msg = "Flag to crunch numbers, exercise multiple cores, and use a LOT of memory." + parser.add_argument("-c", "--compute", type=str, help=msg) # default to False + + msg = "Path for an existing output directory. The output DEM geotiff will be saved in here." + parser.add_argument("-o", "--out_dir", type=str, help=msg) + + args = parser.parse_args() + + bbox = " ".join(args.bbox) + + # Step 2: Make dem.tif + dem_file = get_dem(bbox, args.out_dir) + + # Step 3: Perform compute-intensive, multicore operations + if args.compute == "TRUE": + do_computations(dem_file) \ No newline at end of file diff --git a/build/entrypoint.sh b/build/entrypoint.sh index 8482102..e14b0aa 100644 --- a/build/entrypoint.sh +++ b/build/entrypoint.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh set -x @@ -6,28 +6,11 @@ export LC_ALL=C.UTF-8 export LANG=C.UTF-8 -VALID_ARGS=$(getopt -o b:c: --long bbox:compute -- "$@") -if [[ $? -ne 0 ]]; then - exit 1; -fi +# Creating output folder +mkdir -p /projects/data/output -eval set -- "$VALID_ARGS" -while [ : ]; do - case "$1" in - -b | --bbox) - BBOX=$2 - shift 2 - ;; - -c | --compute) - COMPUTE=$2 - shift 2 - ;; - --) shift; - break - ;; - esac -done +cd /projects -echo ${BBOX} +python3 /opt/get-dem/get_dem.py -o /projects/data/output ${@} -echo ${COMPUTE} \ No newline at end of file +find /projects/data/output -type f \ No newline at end of file diff --git a/cwl/.ipynb_checkpoints/workflow-checkpoint.cwl b/cwl/.ipynb_checkpoints/workflow-checkpoint.cwl new file mode 100644 index 0000000..cc43b4c --- /dev/null +++ b/cwl/.ipynb_checkpoints/workflow-checkpoint.cwl @@ -0,0 +1,81 @@ +$graph: + +- class: Workflow + doc: Launch S1-Tiling Algorithm + id: s1tiling-demo + requirements: + - class: ScatterFeatureRequirement + inputs: + input_s1_l1_grd: + doc: Folder containing input S1 L1 GRD product + label: S1L1GRD products folder + type: string[] + input_srtm_folder: + doc: Folder containing SRTM files + label: SRTM files folder + type: string[] + s_roi_id: + doc: Name of the S2 Tiles to process + label: ROI + type: string[] + label: s expressions + outputs: + - id: wf_outputs + outputSource: + - output + type: + Directory[] + + steps: + step_1: + in: + input_s1_l1_grd: input_s1_l1_grd + input_srtm_folder: input_srtm_folder + s_roi_id: s_roi_id + out: + - output + run: '#clt' + scatter: [input_s1_l1_grd, input_srtm_folder, s_roi_id] + scatterMethod: parallel + + +- baseCommand: /argosay + class: CommandLineTool + + id: clt + + arguments: + - echo + - valueFrom: $( inputs.input_reference ) + + inputs: + input_reference: + type: string + s_expression: + type: string + cbn: + type: string + + outputs: + results: + outputBinding: + glob: . + type: Directory + requirements: + EnvVarRequirement: + envDef: + PATH: /srv/conda/envs/env_app_snuggs/bin:/srv/conda/bin:/srv/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + ResourceRequirement: {} + InlineJavascriptRequirement: {} + DockerRequirement: + dockerPull: argoproj/argosay:v2 + #stderr: std.err + #stdout: std.out + +cwlVersion: v1.0 + +$namespaces: + s: https://schema.org/ +s:softwareVersion: 0.3.0 +schemas: +- http://schema.org/version/9.0/schemaorg-current-http.rdf \ No newline at end of file diff --git a/cwl/workflow.cwl b/cwl/workflow.cwl new file mode 100644 index 0000000..c94717d --- /dev/null +++ b/cwl/workflow.cwl @@ -0,0 +1,65 @@ +$graph: + +- class: Workflow + doc: Get and merge DEM over a BBOx area + id: get-dem + inputs: + s_bbox: + doc: Bounding box + label: Bounding box + type: string + s_compute: + doc: TRUE to enable heavy computation + label: Compute + type: string + label: s expressions + outputs: + - id: wf_outputs + outputSource: + - /projects/data/output + type: + Directory[] + + steps: + step_1: + in: + s_bbox: s_bbox + s_compute: s_compute + run: '#command' + +- baseCommand: /opt/entrypoint.sh + class: CommandLineTool + + id: driver-command + + arguments: + - --bbox + - valueFrom: $( inputs.s_bbox ) + - --compute + - valueFrom: $( inputs.s_compute ) + + inputs: + s_bbox: + type: string + s_compute: + type: string + + + requirements: + EnvVarRequirement: + envDef: + PATH: /srv/conda/envs/env_app_snuggs/bin:/srv/conda/bin:/srv/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + ResourceRequirement: {} + InlineJavascriptRequirement: {} + DockerRequirement: + dockerPull: registry.eu-west-0.prod-cloud-ocb.orange-business.com/esa-maap.org/esa-maap-dev/get-dem:latest + #stderr: std.err + #stdout: std.out + +cwlVersion: v1.0 + +$namespaces: + s: https://schema.org/ +s:softwareVersion: 0.3.0 +schemas: +- http://schema.org/version/9.0/schemaorg-current-http.rdf -- GitLab