Skip to content
Snippets Groups Projects
Commit 5d334c86 authored by Ian's avatar Ian
Browse files

added post-processing scripts

parent ebe4e4b4
Branches raster_params
No related tags found
No related merge requests found
BEGIN;
-- Add a new column to the menlove_healey_biohex table
ALTER TABLE menlove_healey_biohex
ADD COLUMN biwf_temp_conifer_na_adj NUMERIC,
ADD COLUMN biwf_temp_broadleaf_na_adj NUMERIC,
ADD COLUMN biwf_temp_conifer_all_adj NUMERIC,
ADD COLUMN biwf_temp_broadleaf_all_adj NUMERIC;
-- Update the new columns with the values from the original columns
UPDATE menlove_healey_biohex
SET biwf_temp_conifer_na_adj = biwf_temp_conifer_na * 2.56,
biwf_temp_broadleaf_na_adj = biwf_temp_broadleaf_na * 0.58,
biwf_temp_conifer_all_adj = biwf_temp_conifer_all * 1.47,
biwf_temp_broadleaf_all_adj = biwf_temp_broadleaf_all * 1;
COMMIT;
-- Set the number of parallel workers (adjust as needed)
SET max_parallel_workers_per_gather = 16;
SET max_parallel_workers = 16;
-- Create a temporary table to store the results
BEGIN;
CREATE TEMPORARY TABLE temp_results AS
SELECT
conus_cells.fid AS cell_id,
AVG(fr.biwf) AS avg_biwf
FROM
conus_cells
JOIN fourth_run_results fr ON ST_Contains(conus_cells.geom, fr.geom)
GROUP BY
conus_cells.fid;
COMMIT;
BEGIN;
-- Add a new column to the conus_cells table
ALTER TABLE conus_cells ADD COLUMN biwf_fourth_run NUMERIC;
-- Update the new column with the calculated average values
UPDATE conus_cells c
SET biwf_fourth_run = tr.avg_biwf
FROM temp_results tr
WHERE c.fid = tr.cell_id;
-- Drop the temporary table
DROP TABLE temp_results;
COMMIT;
-- Set the number of parallel workers (adjust as needed)
SET max_parallel_workers_per_gather = 16;
SET max_parallel_workers = 16;
-- Create a temporary table to store the results
BEGIN;
CREATE TEMPORARY TABLE temp_results AS
SELECT
mhb.ushexes_id AS hex_id,
AVG(results.biwf) AS avg_biwf
FROM
menlove_healey_biohex mhb
JOIN
conus_raster_params results
ON
ST_Contains(mhb.geom, results.geom)
GROUP BY
mhb.ushexes_id;
COMMIT;
BEGIN;
-- Add a new column to the menlove_healey_biohex table
ALTER TABLE menlove_healey_biohex
ADD COLUMN biwf_conus_raster_params NUMERIC;
-- Update the new column with the calculated average values
UPDATE menlove_healey_biohex mhb
SET
biwf_conus_raster_params = tr.avg_biwf
FROM temp_results tr
WHERE mhb.ushexes_id = tr.hex_id;
-- Drop the temporary table
DROP TABLE temp_results;
COMMIT;
-- Calculate the count of fifth_run_results points within
-- each menlove_healey_biohex polygon and store the results
BEGIN;
CREATE TEMPORARY TABLE temp_results AS
SELECT
mhb.ushexes_id AS hex_id,
COUNT(fr.biwf) AS count_biwf_fifth_run,
COUNT(fr.l4_agbd) AS count_l4_agbd
#!/bin/bash
# Check if correct number of arguments is provided
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <input_directory> <target_table>"
exit 1
fi
INPUT_DIR="$1"
TARGET_TABLE="$2"
BATCH_SIZE=10
# Set your PostgreSQL connection parameters
PG_HOST="localhost"
PG_PORT="5432"
PG_DB="nmbim_results"
PG_USER="ian"
PG_PASS="grant"
# Check if input directory exists
if [ ! -d "$INPUT_DIR" ]; then
echo "Error: Input directory does not exist."
exit 1
fi
# Debugging: Check for .gpkg files
echo "Checking for .gpkg files in $INPUT_DIR"
find "$INPUT_DIR" -name "*.gpkg" | head -n 5
# Debugging: List all .gpkg files and count
echo "Listing found .gpkg files:"
find "$INPUT_DIR" -name "*.gpkg"
echo "Number of .gpkg files found: $(find "$INPUT_DIR" -name "*.gpkg" | wc -l)"
# Debugging: Check for hidden characters in filenames
echo "Listing files with special characters visible:"
find "$INPUT_DIR" -name "*.gpkg" -print0 | xargs -0 ls -b
# Function to merge and load a batch of GeoPackages
process_batch() {
local batch_num="$1"
shift
local input_gpkgs=("$@")
local output_gpkg="/vsimem/merged_batch_${batch_num}.gpkg"
echo "Merging batch $batch_num"
# Use ogrmerge.py to merge the GeoPackages
ogrmerge.py -o "$output_gpkg" -f GPKG "${input_gpkgs[@]}" -overwrite_ds -single
if [ $? -ne 0 ]; then
echo "Error merging batch $batch_num"
return 1
fi
echo "Loading in-memory GeoPackage into PostGIS"
ogr2ogr -f PostgreSQL PG:"host=$PG_HOST port=$PG_PORT dbname=$PG_DB user=$PG_USER password=$PG_PASS" \
"$output_gpkg" \
-nln "$TARGET_TABLE" \
-append \
-update \
-lco COPY_WKB=YES \
-skipfailures
if [ $? -ne 0 ]; then
echo "Error loading batch $batch_num into PostGIS"
return 1
fi
# Cleanup in-memory file
gdal_translate -f MEM /vsimem/null "$output_gpkg" -q
gdal_translate -f MEM /vsimem/null /vsimem/null -q
echo "Batch $batch_num completed"
}
export -f process_batch
export PG_HOST PG_PORT PG_DB PG_USER PG_PASS TARGET_TABLE
# Find all GeoPackages (compatible with older Bash versions)
IFS=$'\n' read -r -d '' -a gpkg_files < <(find "$INPUT_DIR" -name "*.gpkg" && printf '\0')
total_files=${#gpkg_files[@]}
# Debugging: Check array content
echo "Number of files in gpkg_files array: ${#gpkg_files[@]}"
echo "First few files:"
printf '%s\n' "${gpkg_files[@]:0:5}"
# Debugging: Check if ogrmerge.py can see the files
if [ ${#gpkg_files[@]} -gt 0 ]; then
echo "Testing ogrmerge.py with first file:"
first_file="${gpkg_files[0]}"
ogrmerge.py -list "$first_file"
else
echo "No .gpkg files found in the array"
fi
# Debugging: Verify GDAL installation
echo "GDAL version:"
gdalinfo --version
# Determine number of CPU cores and set max parallel jobs
max_jobs=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 1)
echo "Using up to $max_jobs parallel jobs"
# Create batches and process in parallel
seq 0 $BATCH_SIZE $((total_files - 1)) | \
parallel --jobs $max_jobs --halt now,fail=1 --joblog parallel.log --eta \
'
batch_num={#}
start_index={}
end_index=$((start_index + BATCH_SIZE - 1))
if [ $end_index -ge '"$total_files"' ]; then
end_index=$(('"$total_files"' - 1))
fi
batch_files=("${gpkg_files[@]:$start_index:$BATCH_SIZE}")
process_batch "$batch_num" "${batch_files[@]}"
'
echo "All GeoPackages processed and loaded into PostGIS table: $TARGET_TABLE"
#!/bin/bash
# Check if correct number of arguments is provided
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <input_directory> <target_table>"
exit 1
fi
INPUT_DIR="$1"
TARGET_TABLE="$2"
BATCH_SIZE=10
# Set your PostgreSQL connection parameters
PG_HOST="localhost"
PG_PORT="5432"
PG_DB="nmbim_results"
PG_USER="ian"
PG_PASS="grant"
# Check if input directory exists
if [ ! -d "$INPUT_DIR" ]; then
echo "Error: Input directory does not exist."
exit 1
fi
# Debugging: Check for .gpkg files
echo "Checking for .gpkg files in $INPUT_DIR"
find "$INPUT_DIR" -name "*.gpkg" | head -n 5
# Debugging: List all .gpkg files and count
echo "Listing found .gpkg files:"
find "$INPUT_DIR" -name "*.gpkg"
echo "Number of .gpkg files found: $(find "$INPUT_DIR" -name "*.gpkg" | wc -l)"
# Debugging: Check for hidden characters in filenames
echo "Listing files with special characters visible:"
find "$INPUT_DIR" -name "*.gpkg" -print0 | xargs -0 ls -b
# Function to merge and load a batch of GeoPackages
process_batch() {
local batch_num="$1"
shift
local input_gpkgs=("$@")
local output_gpkg="merged_batch_${batch_num}.gpkg"
echo "Merging batch $batch_num"
echo "Input files for this batch:"
printf '%s\n' "${input_gpkgs[@]}"
# Check file permissions and existence
for file in "${input_gpkgs[@]}"; do
if [ ! -r "$file" ]; then
echo "Error: Cannot read file $file"
fi
if [ ! -s "$file" ]; then
echo "Error: File $file is empty"
fi
done
# Use ogrinfo to check each file
for file in "${input_gpkgs[@]}"; do
echo "Checking $file with ogrinfo:"
ogrinfo -so "$file"
done
# Use ogrmerge.py to merge the GeoPackages
echo "Running ogrmerge.py command:"
echo ogrmerge.py -o "$output_gpkg" -f GPKG "${input_gpkgs[@]}" -overwrite_ds -single
ogrmerge.py -o "$output_gpkg" -f GPKG "${input_gpkgs[@]}" -overwrite_ds -single
if [ $? -ne 0 ]; then
echo "Error merging batch $batch_num"
return 1
fi
ogrinfo $output_gpkg
echo "Loading in-memory GeoPackage into PostGIS"
ogr2ogr -f PostgreSQL PG:"host=$PG_HOST port=$PG_PORT dbname=$PG_DB user=$PG_USER password=$PG_PASS" \
"$output_gpkg" \
-nln "$TARGET_TABLE" \
-append \
-update \
-lco COPY_WKB=YES \
-skipfailures
if [ $? -ne 0 ]; then
echo "Error loading batch $batch_num into PostGIS"
return 1
fi
# Cleanup in-memory file
gdal_translate -f MEM /vsimem/null "$output_gpkg" -q
gdal_translate -f MEM /vsimem/null /vsimem/null -q
echo "Batch $batch_num completed"
}
# Find all GeoPackages
mapfile -d $'\0' gpkg_files < <(find "$INPUT_DIR" -name "*.gpkg" -print0)
total_files=${#gpkg_files[@]}
# Debugging: Check array content
echo "Number of files in gpkg_files array: ${#gpkg_files[@]}"
echo "First few files:"
printf '%s\n' "${gpkg_files[@]:0:5}"
# Debugging: Check if ogrinfo can see the files
if [ ${#gpkg_files[@]} -gt 0 ]; then
echo "Testing ogrinfo with first file:"
first_file="${gpkg_files[0]}"
ogrinfo -so "$first_file"
else
echo "No .gpkg files found in the array"
fi
# Debugging: Verify GDAL installation
echo "GDAL version:"
gdalinfo --version
# Process batches sequentially
for ((i=0; i<total_files; i+=BATCH_SIZE)); do
batch_num=$((i / BATCH_SIZE + 1))
end=$((i + BATCH_SIZE))
if [ $end -gt $total_files ]; then
end=$total_files
fi
batch_files=("${gpkg_files[@]:i:BATCH_SIZE}")
process_batch "$batch_num" "${batch_files[@]}"
if [ $? -ne 0 ]; then
echo "Error processing batch $batch_num. Stopping execution."
exit 1
fi
done
echo "All GeoPackages processed and loaded into PostGIS table: $TARGET_TABLE"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment