Newer
Older
""" FireIO
This module include functions used to read and save data
"""
# ------------------------------------------------------------------------------
#%% Read and filter active fire data
# ------------------------------------------------------------------------------
# Try to read a Geopandas file several times. Sometimes, the read fails the
# first time for mysterious reasons.
def gpd_read_file(filename, parquet=False, **kwargs):
import geopandas as gpd
itry = 0
maxtries = 5
fun = gpd.read_parquet if parquet else gpd.read_file
while itry < maxtries:
try:
return dat
except Exception as e:
itry += 1
print(f"Attempt {itry}/{maxtries} failed.")
if not itry < maxtries:
raise e
def os_path_exists(filename):
"""Alternative to os.path.exists that also works with S3 paths."""
if filename.startswith("s3://"):
import s3fs
s3 = s3fs.S3FileSystem(anon=False)
return s3.exists(filename)
else:
import os
def viirs_pixel_size(sample, band="i", rtSCAN_ANGLE=False):
"""calculate approximate size of i-band (375-m) or m-band (750-m) VIIRS pixel
Adapted from L. Giolio's IDL code
Usage: DS, DT = viirs_pixel_size(200,band='m',rtSCAN_ANGLE=False)
sample : int
sample number
band : str, 'i'|'m'
i (default) or m band
rtSCAN_ANGLE : bool, default = False
flag to return scan angle
DT : float
length in along-track dimension [km]
DS : float
length in along-scan dimension [km]
earth_radius = 6371.0 # earth radius [km]
h = 824.0 # SUOMI-NPP orbit altitude [km]
pt = 0.361 # nadir pixel resolution [km]
scan_rate = 56.06 / 6304 # scan rate (deg)
st1, st2, st3 = 1184, 1920, 3200 # sample tier steps
sb1, sb2, sb3 = 0, 3552, 5024 # sample base for each tier
ps_const = 0.128 # constant to convert zone number to along-scan deg for 1 pixel
# adjust constants for m band
pt *= 2
scan_rate *= 2
st1 /= 2
st2 /= 2
st3 /= 2
sb1 /= 2
sb2 /= 2
sb3 /= 2
ps_const *= 2
# derive more constants
st = pt / h # along-track deg for 1 pixel[rad]
r = earth_radius + h # r for satellite[km]
abs_sample = (sample >= st3) * (sample + 1 - st3) + (sample < st3) * (st3 - sample)
zone2 = np.logical_and(abs_sample > st1, abs_sample <= st2)
ps = ps_const * (3 * zone1 + 2 * zone2 + zone3)
ss = ps / h # along-scan deg for 1 pixel[rad]
abs_scan = (
zone1 * (abs_sample * 3 * scan_rate)
+ zone2 * ((sb2 + ((abs_sample - st1) * 2)) * scan_rate)
+ zone3 * ((sb3 + (abs_sample - st2)) * scan_rate)
) # scan angle, in degree
theta = np.deg2rad(abs_scan) # scan angle, in radian
cos_theta = np.cos(theta)
# calculate pixel size DS and DT
temp = (earth_radius / r) ** 2.0 - np.sin(theta) ** 2.0
DS = earth_radius * ss * (cos_theta / sqrt_temp - 1.0)
DT = r * st * (cos_theta - sqrt_temp)
if rtSCAN_ANGLE == True:
scan_angle = np.rad2deg(theta)
return DS, DT, scan_angle
def read_geojson_nv_CA(y0=2012, y1=2019):
""" read non vegetation fire data in California
Returns
-------
gdf : GeoDataFrame
the points of non vegetation fire location
import geopandas as gpd
from FireConsts import dirextdata
fnm = dirextdata + "CA/Calnvf/FCs_nv_" + str(y0) + "-" + str(y1) + ".geojson"
gdf = gpd_read_file(fnm)
def read_VNP14IMGML(yr, mo, ver="C1.05"):
""" read monthly VNP14IMGML data
Parameters
----------
yr : int
year
mo : int
month
Returns
-------
df : dataframe
the monthly dataframe of VIIRS active fires
from FireConsts import dirextdata
import os
import pandas as pd
# set monthly file name
"VIIRS","VNP14IMGML",
"VNP14IMGML." + str(yr) + str(mo).zfill(2) + "." + ver + ".txt",)
"YYYYMMDD",
"HHMM",
"Lat",
"Lon",
"Line",
"Sample",
"FRP",
"Confidence",
"Type",
"DNFlag",
]
if os_path_exists(fnmFC):
parse_dates=[["YYYYMMDD", "HHMM"]],
# sometimes the FRP value is '*******' and cause incorrect dtype, need to correct this
df = df.replace('*******',0)
df['FRP'] = df['FRP'].astype('float')
df["DT"], df["DS"] = viirs_pixel_size(df["Sample"].values)
df = df.drop(columns=["Sample", "Line"])
print('No data available for file',fnmFC)
Parameters
----------
t : tuple, (int,int,int,str)
the year, month, day and 'AM'|'PM' during the intialization
Returns
-------
vlist : list
(lat,lon) tuple of all daily active fires
from FireConsts import dirextdata
import pandas as pd
import os
from datetime import date
d = date(*t[:-1])
# derive monthly file name with path
dirFC = os.path.join(dirextdata,'VIIRS', "VNP14IMGTDL") + "/"
fnmFC = os.path.join(
dirFC, "SUOMI_VIIRS_C2_Global_VNP14IMGTDL_NRT_" + d.strftime("%Y%j") + ".txt"
)
usecols = [
"latitude",
"longitude",
"daynight",
"frp",
"confidence",
"scan",
"track",
"acq_date",
"acq_time",
]
if os_path_exists(fnmFC):
df = pd.read_csv(
fnmFC,
parse_dates=[["acq_date", "acq_time"]],
usecols=usecols,
skipinitialspace=True,
)
df = df.rename(
columns={
"latitude": "Lat",
"longitude": "Lon",
"frp": "FRP",
"scan": "DS",
"track": "DT",
"acq_date_acq_time": "YYYYMMDD_HHMM",
}
)
def read_VJ114IMGML(yr, mo):
""" read monthly VNP14IMGML data
Parameters
----------
yr : int
year
mo : int
month
Returns
-------
df : dataframe
the monthly dataframe of VIIRS active fires
from FireConsts import dirextdata
import os
import pandas as pd
# set monthly file name
fnmFC = os.path.join(
dirextdata,
"VJ114IMGML",
str(yr),
"VJ114IMGML_" + str(yr) + str(mo).zfill(2) + ".txt",
)
usecols = [
"year",
"month",
"day",
"hh",
"mm",
"lon",
"lat",
"mask",
"line",
"sample",
"frp",
]
def parser(yr, mo, dy, h, m):
return pd.to_datetime(
yr + "-" + mo + "-" + dy + " " + h + ":" + m, format="%Y-%m-%d %H:%M"
)
if os_path_exists(fnmFC):
# df = pd.read_csv(fnmFC,usecols=usecols,skipinitialspace=True)
df = pd.read_csv(
fnmFC,
parse_dates={"YYYYMMDD_HHMM": ["year", "month", "day", "hh", "mm"]},
date_parser=parser,
usecols=usecols,
skipinitialspace=True,
)
df = df.rename(
columns={
"lat": "Lat",
"lon": "Lon",
"frp": "FRP",
"line": "Line",
"sample": "Sample",
}
)
df["DT"], df["DS"] = viirs_pixel_size(df["Sample"].values)
""" Read daily NRT NOAA20 VIIRS fire location data
Parameters
----------
t : tuple, (int,int,int,str)
the year, month, day and 'AM'|'PM' during the intialization
Returns
-------
vlist : list
(lat,lon) tuple of all daily active fires
from FireConsts import dirextdata
import pandas as pd
import os
from datetime import date
d = date(*t[:-1])
# derive monthly file name with path
dirFC = os.path.join(dirextdata,'VIIRS', "VJ114IMGTDL") + "/"
fnmFC = os.path.join(
dirFC, "J1_VIIRS_C2_Global_VJ114IMGTDL_NRT_" + d.strftime("%Y%j") + ".txt"
)
# usecols = ['latitude','longitude','daynight','frp','confidence']
usecols = [
"latitude",
"longitude",
"daynight",
"frp",
"confidence",
"scan",
"track",
"acq_date",
"acq_time",
]
if os_path_exists(fnmFC):
#df = pd.read_csv(
# fnmFC,
# parse_dates=[["acq_date", "acq_time"]],
# usecols=usecols,
# skipinitialspace=True,
#)
df = pd.read_csv(fnmFC)
df['acq_date'] = str(d)
df["acq_date_acq_time"] = pd.to_datetime(df['acq_date'] +' '+ df['acq_time'])
#print(df['acq_date_acq_time'])
df = df.rename(
columns={
"latitude": "Lat",
"longitude": "Lon",
"frp": "FRP",
"scan": "DS",
"track": "DT",
"acq_date_acq_time": "YYYYMMDD_HHMM",
}
)
def AFP_regfilter(df, shp_Reg, strlat="Lat", strlon="Lon"):
""" filter fire pixels using a given shp_Reg
Parameters
----------
df : pandas DataFrame
fire pixel data with lat and lon information
shp_Reg : geometry
the geometry of the region shape
strlat : str
the column name for the latitude
strlon : str
the column name for the longitude
Returns
-------
df_filtered : pandas DataFrame
the filtered fire pixels
Tempest McCabe
committed
from FireConsts import remove_static_sources_bool
from shapely.geometry import Point
import geopandas as gpd
import pandas as pd
# preliminary spatial filter and quality filter
regext = shp_Reg.bounds
newfirepixels = df.loc[
(df[strlat] >= regext[1])
& (df[strlat] <= regext[3])
& (df[strlon] >= regext[0])
& (df[strlon] <= regext[2])
]
point_data = [Point(xy) for xy in zip(newfirepixels[strlon], newfirepixels[strlat])]
gdf_filtered = gpd.GeoDataFrame(newfirepixels, geometry=point_data, crs=4326)
# if shp_Reg is not a rectangle, do detailed filtering (within shp_Reg)
import math
Tempest McCabe
committed
if ((not math.isclose(shp_Reg.minimum_rotated_rectangle.area, shp_Reg.area)) | remove_static_sources_bool):
gdf_filtered = gdf_filtered[gdf_filtered["geometry"].within(shp_Reg)]
#print('current proj:',gdf_filtered.crs)
#print('converting to proj:',epsg)
# df_filtered = pd.DataFrame(gdf_filtered.drop(columns='geometry'))
""" Function used to filter non-static VIIRS active fire data
Parameters
----------
df : pandas DataFrame
Returns
-------
df_filtered : pandas DataFrame
the filtered fire pixels
# filter non-veg fires using a pre-derived mask
gdf_nv = read_geojson_nv_CA()
buf_nv = 0.0071 # buffer for each nv point (now set to 0.0071 deg = 0.71 km)
df_filtered = df[
(df.geometry.within(gdf_nv.iloc[0].geometry.buffer(0.005)) == False)
]
""" Function to set ampm column (using local hour) and update the df
Parameters
----------
df : pandas DataFrame
fire pixel data, with 'Lon' and 'YYYYMMDD_HHMM' column
df_withampm : pandas DataFrame
the DataFrame with 'ampm' column
import pandas as pd
import numpy as np
# calculate local hour using the longitude and YYYYMMDD_HHMM column
localhour = (
pd.to_timedelta(df.Lon / 15, unit="hours") + df["YYYYMMDD_HHMM"]
).dt.hour
# set am/pm flag based on local hour
df_withampm = df.assign(
ampm=np.where(((localhour > 6) & (localhour < 18)), "PM", "AM")
)
def AFP_toprj(gdf, epsg=32610):
"""Transforms lat/lon coordinates to projected coordinate system for computations in m
for global studies probably proj:cea would be a good choice
for the boreals we use North Pole LAEA (epsg:3571)
for CA may use WGS 84 / UTM zone 10N (epsg: 32610)
for US may use US National Atlas Equal Area (epsg: 9311)"""
import pandas as pd
gdf = gdf.to_crs(epsg=epsg)
gdf["x"] = gdf.geometry.x
gdf["y"] = gdf.geometry.y
df = pd.DataFrame(gdf.drop(columns="geometry"))
def save_AFPtmp(df, d, head="VNP14IMGML.", tail=".pkl"):
""" Function used to save regional filtered data to a temporary directory
"""
from FireConsts import dirtmpdata
import os
import pickle
fnm_tmp = os.path.join(dirtmpdata, head + d.strftime("%Y%m") + tail)
def load_AFPtmp(d, head="VNP14IMGML.", tail=".pkl"):
""" Function used to load regional filtered data to a temporary directory
"""
from FireConsts import dirtmpdata
import os
import pickle
# temporary file name
fnm_tmp = os.path.join(dirtmpdata, head + d.strftime("%Y%m") + tail)
if os_path_exists(fnm_tmp):
df = pickle.load(f)
return df
else:
print(f"Temporary file not found: {fnm_tmp}.")
def read_AFPVIIRS(
t,
region,
sat="SNPP",
cols=["Lat", "Lon", "FRP", "Sat", "DT", "DS", "YYYYMMDD_HHMM", "ampm", "x", "y"],
):
""" Read half-daily fire location for a region from monthly NOAA20 VIIRS fire data.
For the monthly VIIRS data, in order to reduce repeat calculation, we
do the spatial and quality filtering once and save the filtered data to
files in a temporary directory. For most time steps, we read the pre-saved
VIIRS data.
t : tuple, (int,int,int,str)
the year, month, day and 'AM'|'PM' during the intialization
region : 2-element tuple [regnm, regshp].
Regnm is the name of the region; Regshp can be
- a geometry
- a four-element list showing the extent of the region [lonmin,latmin,lonmax,latmax]
- a country name
cols : list
the column names of the output dataframe
df_AFP : pandas DataFrame
list of active fire pixels (filtered) from NOAA20
# read from pre-saved file
d = date(*t[:-1])
if sat == "SNPP":
sathead = "VNP14IMGML"
elif sat == "NOAA20":
sathead = "VJ114IMGML"
df = load_AFPtmp(d, head=region[0] + "_" + sathead + ".")
# if no pre-saved file, read from original VNP14IMGML file and do/save spatial filtering
if df is None:
# read or form shape used for filtering active fires
shp_Reg = get_reg_shp(region[1])
if sat == "SNPP":
df = read_VNP14IMGML(t[0], t[1])
df = df.loc[df["Type"] == 0] # type filtering
elif sat == "NOAA20":
df = read_VJ114IMGML(t[0], t[1])
df = df.loc[df["mask"] >= 7]
# set ampm
df = AFP_setampm(df)
save_AFPtmp(df, d, head=region[0] + "_" + sathead + ".")
# extract active pixels at current time step (day and ampm filter)
df = df.loc[(df["YYYYMMDD_HHMM"].dt.day == t[2]) & (df["ampm"] == t[-1])]
# return selected columns; need to change column names first
df_AFP = df[cols]
def read_AFPVIIRSNRT(
t,
region,
sat="SNPP",
cols=["Lat", "Lon", "FRP", "Sat", "DT", "DS", "YYYYMMDD_HHMM", "ampm", "x", "y"],):
""" Read half-daily active fire pixels in a region from daily NRT NOAA20 VIIRS
Parameters
----------
t : tuple, (int,int,int,str)
the year, month, day and 'AM'|'PM' during the intialization
region : 2-element tuple [regnm, regshp].
Regnm is the name of the region; Regshp can be
- a geometry
- a four-element list showing the extent of the region [lonmin,latmin,lonmax,latmax]
- a country name
cols : list
the column names of the output dataframe
Returns
-------
df_AFP : pandas DataFrame
list of active fire pixels (filtered) from SNPPNRT
from datetime import date
# read from VNP14IMGTDL file
df = read_VJ114IMGTDL(t)
else:
if df is None:
return None
# extract active pixes at current time step
# do regional filtering
shp_Reg = get_reg_shp(region[1])
# do non-static filtering; now only available at CA
# df = AFP_nonstatfilter(df)
# add the satellite information
# return selected columns; need to change column names first
df_AFP = df[cols]
return df_AFP
def read_AFP(t, src="SNPP", nrt=False, region=None):
""" The wrapper function used to read and extract half-daily active fire
pixels in a region.
Parameters
----------
t : tuple, (int,int,int,str)
the year, month, day and 'AM'|'PM' during the intialization
src : str
'SNPP' | 'NOAA20' | 'VIIRS' | 'BAMOD'
nrt : bool
if set to True, read NRT data instead of monthly data
region : 2-element tuple [regnm, regshp].
Regnm is the name of the region; Regshp can be
- a geometry
- a four-element list showing the extent of the region [lonmin,latmin,lonmax,latmax]
- a country name
Returns
-------
vlist : pandas DataFrame
extracted active fire pixel data used for fire tracking
vlist_SNPP = read_AFPVIIRSNRT(t, region, sat="SNPP")
vlist_NOAA20 = read_AFPVIIRSNRT(t, region, sat="NOAA20")
vlist = pd.concat([vlist_NOAA20, vlist_SNPP], ignore_index=True)
vlist_SNPP = read_AFPVIIRS(t, region, sat="SNPP")
vlist_NOAA20 = read_AFPVIIRS(t, region, sat="NOAA20")
vlist = pd.concat([vlist_NOAA20, vlist_SNPP], ignore_index=True)
elif src == "SNPP":
vlist = read_AFPVIIRS(t, region, sat="SNPP")
elif src == "NOAA20":
vlist = read_AFPVIIRS(t, region, sat="NOAA20")
elif src == "BAMOD":
vlist = read_BAMOD(t, region)
if vlist is None:
print("No data available for this source for time",t)
return
return vlist.reset_index(drop=True)
# ------------------------------------------------------------------------------
#%% Read other datasets
# ------------------------------------------------------------------------------
def read_mcd64_pixels(year, ext=[]):
"""Reads all Modis burned area pixels of a year from text file
year: int
ext: list or tuple, (minx, miny, maxx, maxy) extent for filtering pixels
df: pd.dataframe, dataframe containing lat, lon and day of burning
# import glob
import numpy as np
import pandas as pd
# filelist_viirs = glob.glob(dirpjdata+str(year)+'/Snapshot/*_NFP.txt')
# df = pd.concat([pd.read_csv(i, dtype = {'lat':np.float64, 'lon':np.float64},
# usecols = ['lat', 'lon'])
# for i in filelist_viirs], ignore_index=True)
# df['sensor'] = 'viirs'
filelist_mcd64 = os.path.join(mcd64dir, "ba_centroids_" + str(year) + ".csv")
df = pd.read_csv(
filelist_mcd64,
dtype={"lat": np.float64, "lon": np.float64},
usecols=["lat", "lon", "doy"],
)
# filter by bounding box
if len(ext) == 4:
minx, miny, maxx, maxy = ext
df = df.loc[
(df["lat"] >= miny)
& (df["lat"] <= maxy)
& (df["lon"] >= minx)
& (df["lon"] <= maxx)
]
def load_mcd64(year, xoff=0, yoff=0, xsize=None, ysize=None):
"""get annual circumpolar modis burned/unburned tif
Parameters
----------
year: int
xoff, yoff: int, UL pixel coordinates for clipping
xsize,ysize: int, pixels in x and y direction for clipping
Returns
-------
from FireConsts import dirextdata
import gdal
import os
mcd64dir = os.path.join(dirextdata, "MCD64A1")
fnm = os.path.join(mcd64dir, "mcd64_" + str(year) + ".tif")
arr = ds.ReadAsArray(xoff=xoff, yoff=yoff, xsize=xsize, ysize=ysize)
return arr
""" get shapefile of any region given the input file name
Parameters
----------
filename : str
the shapefile names saved in the directory dirextdata/shapefiles/
from FireConsts import dirextdata
import geopandas as gpd
import os
# find the california shapefile
dirshape = os.path.join(dirextdata, "shapefiles")
statefnm = os.path.join(dirshape, filename)
shp = gpd_read_file(statefnm).iloc[0].geometry
!!! this function can be realized using get_reg_shp(); still keep here for convenience...;
will be deleted or modified later !!!
from FireConsts import dirextdata
import geopandas as gpd
import os
# find the california shapefile
statefnm = os.path.join(dirextdata, "CA", "Calshape", "California.shp")
shp_Cal = gpd_read_file(statefnm).iloc[0].geometry
Parameters
----------
ctr : str
country name
import geopandas as gpd
import os
from FireConsts import dirextdata
ctyfnm = os.path.join(dirextdata, "World", "country.shp")
gdf_cty = gpd_read_file(ctyfnm)
g = gdf_cty[gdf_cty.CNTRY_NAME == ctr].iloc[0].geometry
return g
else:
return None
""" return the shape of a region, given an optional reg input
reg : object
region definition, one of the following
- a geometry
- a four-element list showing the extent of the region [lonmin,latmin,lonmax,latmax]
- a country name
shp_Reg : geometry
the geometry of the region shape
from shapely.geometry import Point, Polygon
import shapely
# read or form shape used for filtering active fires
if isinstance(reg, shapely.geometry.base.BaseGeometry):
shp_Reg = reg
shp_Reg = get_Cty_shp(reg)
if shp_Reg is None:
elif isinstance(reg, list):
shp_Reg = Polygon(
[
[reg[0], reg[1]],
[reg[0], reg[3]],
[reg[2], reg[3]],
[reg[2], reg[1]],
[reg[0], reg[1]],
]
)
print(
"Please use geometry, country name (in str), or [lonmin,latmin,lonmax,latmax] list for the parameter region"
)
Parameters
----------
locs : list of lists (nx2)
lat and lon values for each active fire detection
Returns
-------
vLCT : list of ints
land cover types for all input active fires
fnmLCT = os.path.join(dirextdata, "CA", "nlcd_510m.tif")
dataset = rasterio.open(fnmLCT)
transformer = pyproj.Transformer.from_crs("epsg:4326", dataset.crs)
locs_crs_x, locs_crs_y = transformer.transform(
# NOTE: EPSG 4326 expected coordinate order latitude, longitude, but
# `locs` is x (longitude), y (latitude). That's why `l[1]`, then `l[0]`
# here.
[l[1] for l in locs],
[l[0] for l in locs]
)
locs_crs = list(zip(locs_crs_x, locs_crs_y))
samps = list(dataset.sample(locs_crs))
vLCT = [int(s) for s in samps]
""" Get land cover type for active fires - CONUS scale.
This is the same function as get_LCT but with a CONUS wide file.
Parameters
----------
locs : list of lists (nx2)
lat and lon values for each active fire detection
Returns
-------
vLCT : list of ints
land cover types for all input active fires
"""
from FireConsts import dirextdata
import rasterio
import pyproj
import os
# read NLCD 500m data
fnmLCT = os.path.join(dirextdata, "NLCD", "nlcd_export_510m_simplified.tif")
dataset = rasterio.open(fnmLCT)
transformer = pyproj.Transformer.from_crs("epsg:4326", dataset.crs)
locs_crs_x, locs_crs_y = transformer.transform(
# NOTE: EPSG 4326 expected coordinate order latitude, longitude, but
# `locs` is x (longitude), y (latitude). That's why `l[1]`, then `l[0]`