"Table[nrow(Table) + 1,1] = \"Fay-Herriot R-squared = \" # R2 indicates the proportion of variation in MAP minus NFI estimated AGBD explained by terrain-slope \n",
# Function to compute the maximum likelihood estimates (MLEs) for a Fay-Herriot model.
# The function asks for
# x = covariates,
# y = AGB map estimates,
# vardir = sampling variance of map estimates
# If the variance associated with x (i.e. NFI estimates) is available, it may be injested too, see example 2 below.
FHfit=function(y,x,vardir){
n=length(y)# sample size
x1=cbind(rep(1,n),x)# Appends a column of 1's to the covariate matrix to allow an intercept
p=ncol(x1)# total number of regression coefficients (including the intercept)
# Below is the negative log-likelihood, dependent on log(\sigma^2),
# where \sigma^2 is the variance of the regression error
# i.e. the error that would remain even with 0 sampling error.
# The MLEs for regression coefficients are available in closed form
# GIVEN a fixed \sigma_2, so the likelihood only depends on \sigma_2.
# The log transformation is the release the strictly positive constraints
# on \sigma^2. This eases the numerical optimization with respect to \sigma^2.
nll=function(log.s2,y,x1,vardir){
s2=exp(log.s2)# backtransform to yield \sigma_2
S=s2+vardir# Compute the diagonal of $\Sigma$, the covariance matrix for the total error (sampling + regression error).
x1s=x1/S# Compute \Sigma^{-1}x1
ys=y/S# Compute \Sigma^{-1}y
Coefficient.hat=solve(crossprod(x1s,x1),crossprod(x1s,y))#Use the previous two quantities to compute the GLS estimates for the regression coeffecients
res=y-x1%*%Coefficient.hat# The GLS residuals
ress=ys-x1s%*%Coefficient.hat# \Sigma^{-1} times the GLS residuals
obj=sum(log(S))+crossprod(res,ress)[1,1]#negative log likelihood = log-determinant of the error covariance matrix + dot-product of standardized residuals
return(obj)# return the negative log-likelihood
}
# The following line computes the MLE for \sigma^2 by minimizing the negative log-likelihood.
# (minimizing negative is the same as maximizing the positive)
# A lower bound of \sigma^2 = 0.001 and an
# upper bound of \sigma^2 = the sample variance of y
# is placed to avoid potential numerical issues in the optimization.
# The upper bound is almost certainly always reasonable
# (the regression variance should be less than the total variance)
# but depending on the units, the lower bound can be changed.
# Note that if the units are Mg/ha biomass, this lower bound corresponds
# to a error standard deviation of ~0.03 Mg/ha, implausibly low for most applications...
# However, if your returned MLE for s2 (\sigma^2) is equal to the lower bound,
# you should lower this lower bound
fit=optimize(interval=c(log(0.001),# Lower bound for search
log(var(y))),# Upper bound for search
f=nll,# The function to be minimized
y=y,x1=x1,vardir=vardir)# The variables nll() needs.
s2=exp(fit$minimum)# Undo the log transform to yield the MLE for \sigma^2
S=s2+vardir# Compute the diagonal of $\Sigma$, the covariance matrix for the total error (sampling + regression error).
x1s=x1/S# Compute \Sigma^{-1}x1
ys=y/S# Compute \Sigma^{-1}y
Coefficient.hat=solve(crossprod(x1s,x1),crossprod(x1s,y))#Use the previous two quantities to compute the GLS estimates for the regression coeffecients
Coefficient.var=chol2inv(chol(crossprod(x1s,x1)))#covariance matrix of the reg. coefficients
Coefficient.sd=sqrt(diag(Coefficient.var))
p.vals=2*pt(Coefficient.hat/Coefficient.sd,df=n-p,lower.tail=F)#p-values, computed using a t distribution