Trend surface estimation

Prepare the data for analysis

First, we download the spatial data:

  1. The GeoDatabase containing all shapefiles for the study area
  2. The CABIN data for Hamilton Harbour

Import the data in R

library(sf)
# The RDS file is an R object and can be read with the following command:
hamilton_habitat_desc <- readRDS("../data/post_process/hamilton_habitat.rds")
lake <- st_read("../data/raw/hamilton_harbor.gdb", layer="waterbody_2")
## Reading layer `waterbody_2' from data source `/home/steve/Documents/Git/PRStats/PR-GSFE01/data/raw/hamilton_harbor.gdb' using driver `OpenFileGDB'
## Simple feature collection with 45 features and 32 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -8899166 ymin: 5349406 xmax: -8878953 ymax: 5360658
## epsg (SRID):    3857
## proj4string:    +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs
# Making sure both are on the same CRS
st_crs(lake) == st_crs(hamilton_habitat_desc)
## [1] FALSE
hamilton_habitat_desc <- st_transform(hamilton_habitat_desc, st_crs(lake))

Filter the variable total phosphorus (ppm)

# Because, the data contains replicates (several years and samples), we want to average all measurements of
# total phosphorus by site.
library(dplyr)

# Total Phosphorus in sediment
hamilton_tp <- hamilton_habitat_desc %>% 
  filter(Variable == "TP") %>% 
  group_by(Site.Site) 

# Lake Bottom Temperature
hamilton_lbt <- hamilton_habitat_desc %>% 
  filter(Variable == "General-TempLakeBottom") %>% 
  group_by(Site.Site) 

# Depth from Surface
hamilton_depth <- hamilton_habitat_desc %>% 
  filter(Variable == "Depth-Lake") %>% 
  group_by(Site.Site) 

We perform a visual check of the mean variable

library(mapview)
mapview(hamilton_tp, zcol='Year.Année', cex = "Value.Valeur")   
mapview(hamilton_lbt, zcol='Year.Année', cex = "Value.Valeur")  
mapview(hamilton_depth, zcol='Year.Année', cex = "Value.Valeur")

We need to transform the sf spatial object to sp, because the gstat package doesn’t support sf class.

# Organise into SpatialPointDataFrame
hml_tp <- as(hamilton_tp, "Spatial")
hml_depth <- as(hamilton_depth, "Spatial")
hml_lbt <- as(hamilton_lbt, "Spatial")

# Organise into a single SpatialPointDataFrame
hml <- hml_tp
colnames(hml@data)[17] <- "tp"
hml$depth <- hml_depth$Value.Valeur
hml$lbt <- hml_lbt$Value.Valeur

# A data.frame of the non-spatial data
tp <- hml@data$tp

# SpatialPolygons
lake <- as(lake, "Spatial")

Build a reference raster to make the projection

library(raster)
r_ref <- raster(lake,res=100)

Find pixel location from the raster. In the process, it is important to standardize the coordinates

sampleLoc <- cellFromXY(r_ref, hml)

xy <-scale(coordinates(r_ref))
xySample <- xy[sampleLoc,]

# Add x and y to hmlDF
hmlDF <- data.frame(tp = tp, x = xySample[,1], y = xySample[,2])

The data is now ready for analysis. We need to load the needed packages first.

Trend surface analysis using lm

Trend surface analysis of degree 3

lmTp <- lm(tp ~ x + y + I(x^2) + I(y^2) + I(x*y), data = hmlDF)
summary(lmTp)
## 
## Call:
## lm(formula = tp ~ x + y + I(x^2) + I(y^2) + I(x * y), data = hmlDF)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1839.80  -362.14   -14.65   393.16  1891.33 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2305.47      98.32  23.450  < 2e-16 ***
## x            -300.38     349.03  -0.861  0.39131    
## y            1590.85     321.49   4.948 2.69e-06 ***
## I(x^2)        808.93     375.81   2.152  0.03352 *  
## I(y^2)      -1250.06     393.61  -3.176  0.00193 ** 
## I(x * y)     -777.88     471.59  -1.649  0.10188    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 716.3 on 111 degrees of freedom
## Multiple R-squared:  0.2007, Adjusted R-squared:  0.1646 
## F-statistic: 5.573 on 5 and 111 DF,  p-value: 0.0001282
rTrend <- r_ref
rTrend[1:length(rTrend)]<-predict(lmTp, newdata = as.data.frame(xy))

rTrendMask <- mask(rTrend, lake)
plot(rTrendMask)

Calculate residuals sums of squares

\[\text{RSS} = \sum_{j=1}^T\sum_{i=1}^m \left(Z(\mathbf{s}_i, t_j) - \widehat{Z}(\mathbf{s}_i, t_j)\right)^2\]

RSSLM <- sum((hmlDF$tp - predict(lmTp))^2)

Calculate model variance

\[\widehat\sigma^2_\varepsilon = \frac{\text{RSS}}{mT-p-1}.\]

m <- nrow(hmlDF)
ncoef <- length(coef(lmTp))
varResidLM <- RSSLM/(m - ncoef)
varResidLM
## [1] 513142.7

Trend surface analysis using additive models

library(mgcv)
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:raster':
## 
##     getData
## The following object is masked from 'package:dplyr':
## 
##     collapse
## This is mgcv 1.8-29. For overview type 'help("mgcv-package")'.
AMTp <- gam(tp ~ s(x,y), data = hmlDF, family = "gaussian")
summary(AMTp)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## tp ~ s(x, y)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2278.9       52.3   43.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##          edf Ref.df     F  p-value    
## s(x,y) 21.94  26.13 4.121 3.86e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.479   Deviance explained = 57.7%
## GCV = 3.9814e+05  Scale est. = 3.2009e+05  n = 117
rAMTrend <- r_ref
rAMTrend[1:length(rAMTrend)]<-predict(AMTp, newdata = as.data.frame(xy))

rAMTrendMask <- mask(rAMTrend, lake)
plot(rAMTrendMask)

Calculate residuals sums of squares

RSSAM <- sum((hmlDF$tp - predict(AMTp))^2)

Calculate model variance

m <- nrow(hmlDF)
ncoef <- length(coef(AMTp))
varResidAM <- RSSAM/(m - ncoef)
varResidAM
## [1] 346068.6

Question :

Which model is the “best” one based on this statistics ?

Moran’s \(I\)

library(spdep)

# Construct neigbours
dnn <- dnearneigh(coordinates(hml), 0, 1500)
plot(dnn, coordinates(hml))

MoranI <- moran.mc(tp, nb2listw(dnn), nsim = 999,  alternative = "greater", zero.policy = TRUE)

MoranI
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  tp 
## weights: nb2listw(dnn)  
## number of simulations + 1: 1000 
## 
## statistic = 0.076833, observed rank = 981, p-value = 0.019
## alternative hypothesis: greater

Exercice :

Recalculating the Moran’s \(I\) at different distances. Think of the number of permutations to use