diff --git a/processing/apply_k_allom_adjustment.sql b/processing/apply_k_allom_adjustment.sql new file mode 100644 index 0000000000000000000000000000000000000000..0dd116855635d7ff7f3435c85420acc3751ced48 --- /dev/null +++ b/processing/apply_k_allom_adjustment.sql @@ -0,0 +1,18 @@ +BEGIN; +-- Add a new column to the menlove_healey_biohex table +ALTER TABLE menlove_healey_biohex +ADD COLUMN biwf_temp_conifer_na_adj NUMERIC, +ADD COLUMN biwf_temp_broadleaf_na_adj NUMERIC, +ADD COLUMN biwf_temp_conifer_all_adj NUMERIC, +ADD COLUMN biwf_temp_broadleaf_all_adj NUMERIC; + +-- Update the new columns with the values from the original columns +UPDATE menlove_healey_biohex +SET biwf_temp_conifer_na_adj = biwf_temp_conifer_na * 2.56, +biwf_temp_broadleaf_na_adj = biwf_temp_broadleaf_na * 0.58, +biwf_temp_conifer_all_adj = biwf_temp_conifer_all * 1.47, +biwf_temp_broadleaf_all_adj = biwf_temp_broadleaf_all * 1; + +COMMIT; + + diff --git a/processing/calc_cell_averages.sql b/processing/calc_cell_averages.sql new file mode 100644 index 0000000000000000000000000000000000000000..9ec5d63b383032ea83313a34ed3c509750aaf4a1 --- /dev/null +++ b/processing/calc_cell_averages.sql @@ -0,0 +1,30 @@ +-- Set the number of parallel workers (adjust as needed) +SET max_parallel_workers_per_gather = 16; +SET max_parallel_workers = 16; + +-- Create a temporary table to store the results +BEGIN; +CREATE TEMPORARY TABLE temp_results AS +SELECT + conus_cells.fid AS cell_id, + AVG(fr.biwf) AS avg_biwf +FROM + conus_cells + JOIN fourth_run_results fr ON ST_Contains(conus_cells.geom, fr.geom) +GROUP BY + conus_cells.fid; +COMMIT; + +BEGIN; +-- Add a new column to the conus_cells table +ALTER TABLE conus_cells ADD COLUMN biwf_fourth_run NUMERIC; + +-- Update the new column with the calculated average values +UPDATE conus_cells c +SET biwf_fourth_run = tr.avg_biwf +FROM temp_results tr +WHERE c.fid = tr.cell_id; + +-- Drop the temporary table +DROP TABLE temp_results; +COMMIT; diff --git a/processing/calc_menlove_averages.sql b/processing/calc_menlove_averages.sql new file mode 100644 index 0000000000000000000000000000000000000000..49bfeca8f9d64a2af2480beb9442f887721d7271 --- /dev/null +++ b/processing/calc_menlove_averages.sql @@ -0,0 +1,37 @@ +-- Set the number of parallel workers (adjust as needed) +SET max_parallel_workers_per_gather = 16; +SET max_parallel_workers = 16; + +-- Create a temporary table to store the results +BEGIN; +CREATE TEMPORARY TABLE temp_results AS +SELECT + mhb.ushexes_id AS hex_id, + AVG(results.biwf) AS avg_biwf +FROM + menlove_healey_biohex mhb +JOIN + conus_raster_params results +ON + ST_Contains(mhb.geom, results.geom) +GROUP BY + mhb.ushexes_id; + +COMMIT; + +BEGIN; +-- Add a new column to the menlove_healey_biohex table +ALTER TABLE menlove_healey_biohex +ADD COLUMN biwf_conus_raster_params NUMERIC; + +-- Update the new column with the calculated average values +UPDATE menlove_healey_biohex mhb +SET + biwf_conus_raster_params = tr.avg_biwf +FROM temp_results tr +WHERE mhb.ushexes_id = tr.hex_id; + +-- Drop the temporary table +DROP TABLE temp_results; + +COMMIT; diff --git a/processing/calc_menlove_count.sql b/processing/calc_menlove_count.sql new file mode 100644 index 0000000000000000000000000000000000000000..fa57f7c6910ad3a3b5f767dd99d7a53766e65a98 --- /dev/null +++ b/processing/calc_menlove_count.sql @@ -0,0 +1,10 @@ +-- Calculate the count of fifth_run_results points within +-- each menlove_healey_biohex polygon and store the results + +BEGIN; +CREATE TEMPORARY TABLE temp_results AS +SELECT + mhb.ushexes_id AS hex_id, + COUNT(fr.biwf) AS count_biwf_fifth_run, + COUNT(fr.l4_agbd) AS count_l4_agbd + diff --git a/processing/load_into_postgis.sh b/processing/load_into_postgis.sh new file mode 100755 index 0000000000000000000000000000000000000000..e1c3da87fded7f52aa0f42c096fbe0fafc0d8c30 --- /dev/null +++ b/processing/load_into_postgis.sh @@ -0,0 +1,116 @@ +#!/bin/bash + +# Check if correct number of arguments is provided +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <input_directory> <target_table>" + exit 1 +fi + +INPUT_DIR="$1" +TARGET_TABLE="$2" +BATCH_SIZE=10 + +# Set your PostgreSQL connection parameters +PG_HOST="localhost" +PG_PORT="5432" +PG_DB="nmbim_results" +PG_USER="ian" +PG_PASS="grant" + +# Check if input directory exists +if [ ! -d "$INPUT_DIR" ]; then + echo "Error: Input directory does not exist." + exit 1 +fi + +# Debugging: Check for .gpkg files +echo "Checking for .gpkg files in $INPUT_DIR" +find "$INPUT_DIR" -name "*.gpkg" | head -n 5 + +# Debugging: List all .gpkg files and count +echo "Listing found .gpkg files:" +find "$INPUT_DIR" -name "*.gpkg" +echo "Number of .gpkg files found: $(find "$INPUT_DIR" -name "*.gpkg" | wc -l)" + +# Debugging: Check for hidden characters in filenames +echo "Listing files with special characters visible:" +find "$INPUT_DIR" -name "*.gpkg" -print0 | xargs -0 ls -b + +# Function to merge and load a batch of GeoPackages +process_batch() { + local batch_num="$1" + shift + local input_gpkgs=("$@") + + local output_gpkg="/vsimem/merged_batch_${batch_num}.gpkg" + echo "Merging batch $batch_num" + # Use ogrmerge.py to merge the GeoPackages + ogrmerge.py -o "$output_gpkg" -f GPKG "${input_gpkgs[@]}" -overwrite_ds -single + if [ $? -ne 0 ]; then + echo "Error merging batch $batch_num" + return 1 + fi + + echo "Loading in-memory GeoPackage into PostGIS" + ogr2ogr -f PostgreSQL PG:"host=$PG_HOST port=$PG_PORT dbname=$PG_DB user=$PG_USER password=$PG_PASS" \ + "$output_gpkg" \ + -nln "$TARGET_TABLE" \ + -append \ + -update \ + -lco COPY_WKB=YES \ + -skipfailures + if [ $? -ne 0 ]; then + echo "Error loading batch $batch_num into PostGIS" + return 1 + fi + + # Cleanup in-memory file + gdal_translate -f MEM /vsimem/null "$output_gpkg" -q + gdal_translate -f MEM /vsimem/null /vsimem/null -q + echo "Batch $batch_num completed" +} + +export -f process_batch +export PG_HOST PG_PORT PG_DB PG_USER PG_PASS TARGET_TABLE + +# Find all GeoPackages (compatible with older Bash versions) +IFS=$'\n' read -r -d '' -a gpkg_files < <(find "$INPUT_DIR" -name "*.gpkg" && printf '\0') +total_files=${#gpkg_files[@]} + +# Debugging: Check array content +echo "Number of files in gpkg_files array: ${#gpkg_files[@]}" +echo "First few files:" +printf '%s\n' "${gpkg_files[@]:0:5}" + +# Debugging: Check if ogrmerge.py can see the files +if [ ${#gpkg_files[@]} -gt 0 ]; then + echo "Testing ogrmerge.py with first file:" + first_file="${gpkg_files[0]}" + ogrmerge.py -list "$first_file" +else + echo "No .gpkg files found in the array" +fi + +# Debugging: Verify GDAL installation +echo "GDAL version:" +gdalinfo --version + +# Determine number of CPU cores and set max parallel jobs +max_jobs=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 1) +echo "Using up to $max_jobs parallel jobs" + +# Create batches and process in parallel +seq 0 $BATCH_SIZE $((total_files - 1)) | \ +parallel --jobs $max_jobs --halt now,fail=1 --joblog parallel.log --eta \ +' + batch_num={#} + start_index={} + end_index=$((start_index + BATCH_SIZE - 1)) + if [ $end_index -ge '"$total_files"' ]; then + end_index=$(('"$total_files"' - 1)) + fi + batch_files=("${gpkg_files[@]:$start_index:$BATCH_SIZE}") + process_batch "$batch_num" "${batch_files[@]}" +' + +echo "All GeoPackages processed and loaded into PostGIS table: $TARGET_TABLE" diff --git a/processing/load_into_postgis_non_parallel.sh b/processing/load_into_postgis_non_parallel.sh new file mode 100755 index 0000000000000000000000000000000000000000..d5213fbd2abb472e844052a344d38e02e6397bf6 --- /dev/null +++ b/processing/load_into_postgis_non_parallel.sh @@ -0,0 +1,132 @@ +#!/bin/bash + +# Check if correct number of arguments is provided +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <input_directory> <target_table>" + exit 1 +fi + +INPUT_DIR="$1" +TARGET_TABLE="$2" +BATCH_SIZE=10 + +# Set your PostgreSQL connection parameters +PG_HOST="localhost" +PG_PORT="5432" +PG_DB="nmbim_results" +PG_USER="ian" +PG_PASS="grant" + +# Check if input directory exists +if [ ! -d "$INPUT_DIR" ]; then + echo "Error: Input directory does not exist." + exit 1 +fi + +# Debugging: Check for .gpkg files +echo "Checking for .gpkg files in $INPUT_DIR" +find "$INPUT_DIR" -name "*.gpkg" | head -n 5 + +# Debugging: List all .gpkg files and count +echo "Listing found .gpkg files:" +find "$INPUT_DIR" -name "*.gpkg" +echo "Number of .gpkg files found: $(find "$INPUT_DIR" -name "*.gpkg" | wc -l)" + +# Debugging: Check for hidden characters in filenames +echo "Listing files with special characters visible:" +find "$INPUT_DIR" -name "*.gpkg" -print0 | xargs -0 ls -b + +# Function to merge and load a batch of GeoPackages +process_batch() { + local batch_num="$1" + shift + local input_gpkgs=("$@") + + local output_gpkg="merged_batch_${batch_num}.gpkg" + echo "Merging batch $batch_num" + echo "Input files for this batch:" + printf '%s\n' "${input_gpkgs[@]}" + + # Check file permissions and existence + for file in "${input_gpkgs[@]}"; do + if [ ! -r "$file" ]; then + echo "Error: Cannot read file $file" + fi + if [ ! -s "$file" ]; then + echo "Error: File $file is empty" + fi + done + + # Use ogrinfo to check each file + for file in "${input_gpkgs[@]}"; do + echo "Checking $file with ogrinfo:" + ogrinfo -so "$file" + done + + # Use ogrmerge.py to merge the GeoPackages + echo "Running ogrmerge.py command:" + echo ogrmerge.py -o "$output_gpkg" -f GPKG "${input_gpkgs[@]}" -overwrite_ds -single + ogrmerge.py -o "$output_gpkg" -f GPKG "${input_gpkgs[@]}" -overwrite_ds -single + if [ $? -ne 0 ]; then + echo "Error merging batch $batch_num" + return 1 + fi + ogrinfo $output_gpkg + + echo "Loading in-memory GeoPackage into PostGIS" + ogr2ogr -f PostgreSQL PG:"host=$PG_HOST port=$PG_PORT dbname=$PG_DB user=$PG_USER password=$PG_PASS" \ + "$output_gpkg" \ + -nln "$TARGET_TABLE" \ + -append \ + -update \ + -lco COPY_WKB=YES \ + -skipfailures + if [ $? -ne 0 ]; then + echo "Error loading batch $batch_num into PostGIS" + return 1 + fi + + # Cleanup in-memory file + gdal_translate -f MEM /vsimem/null "$output_gpkg" -q + gdal_translate -f MEM /vsimem/null /vsimem/null -q + echo "Batch $batch_num completed" +} + +# Find all GeoPackages +mapfile -d $'\0' gpkg_files < <(find "$INPUT_DIR" -name "*.gpkg" -print0) +total_files=${#gpkg_files[@]} + +# Debugging: Check array content +echo "Number of files in gpkg_files array: ${#gpkg_files[@]}" +echo "First few files:" +printf '%s\n' "${gpkg_files[@]:0:5}" + +# Debugging: Check if ogrinfo can see the files +if [ ${#gpkg_files[@]} -gt 0 ]; then + echo "Testing ogrinfo with first file:" + first_file="${gpkg_files[0]}" + ogrinfo -so "$first_file" +else + echo "No .gpkg files found in the array" +fi + +# Debugging: Verify GDAL installation +echo "GDAL version:" +gdalinfo --version + +# Process batches sequentially +for ((i=0; i<total_files; i+=BATCH_SIZE)); do + batch_num=$((i / BATCH_SIZE + 1)) + end=$((i + BATCH_SIZE)) + if [ $end -gt $total_files ]; then + end=$total_files + fi + batch_files=("${gpkg_files[@]:i:BATCH_SIZE}") + process_batch "$batch_num" "${batch_files[@]}" + if [ $? -ne 0 ]; then + echo "Error processing batch $batch_num. Stopping execution." + exit 1 + fi +done + +echo "All GeoPackages processed and loaded into PostGIS table: $TARGET_TABLE"