Trend surface estimation

Prepare the data for analysis

First, we download the spatial data:

Import the data in R

library(sf)
# The RDS file is an R object and can be read with the following command:
hamilton_habitat_desc <- readRDS("../data/post_process/hamilton_habitat.rds")
lake <- st_read("../data/raw/hamilton_harbor.gdb", layer="waterbody_2")

## Reading layer `waterbody_2' from data source `/home/steve/Documents/Git/PRStats/PR-GSFE01/data/raw/hamilton_harbor.gdb' using driver `OpenFileGDB'
## Simple feature collection with 45 features and 32 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -8899166 ymin: 5349406 xmax: -8878953 ymax: 5360658
## epsg (SRID):    3857
## proj4string:    +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs

# Making sure both are on the same CRS
st_crs(lake) == st_crs(hamilton_habitat_desc)

## [1] FALSE

hamilton_habitat_desc <- st_transform(hamilton_habitat_desc, st_crs(lake))

Filter the variable total phosphorus (ppm)

# Because, the data contains replicates (several years and samples), we want to average all measurements of
# total phosphorus by site.
library(dplyr)

# Total Phosphorus in sediment
hamilton_tp <- hamilton_habitat_desc %>% 
  filter(Variable == "TP") %>% 
  group_by(Site.Site) 

# Lake Bottom Temperature
hamilton_lbt <- hamilton_habitat_desc %>% 
  filter(Variable == "General-TempLakeBottom") %>% 
  group_by(Site.Site) 

# Depth from Surface
hamilton_depth <- hamilton_habitat_desc %>% 
  filter(Variable == "Depth-Lake") %>% 
  group_by(Site.Site)

We perform a visual check of the mean variable

library(mapview)
mapview(hamilton_tp, zcol='Year.Année', cex = "Value.Valeur")

mapview(hamilton_lbt, zcol='Year.Année', cex = "Value.Valeur")

mapview(hamilton_depth, zcol='Year.Année', cex = "Value.Valeur")

We need to transform the sf spatial object to sp, because the gstat package doesn’t support sf class.

# Organise into SpatialPointDataFrame
hml_tp <- as(hamilton_tp, "Spatial")
hml_depth <- as(hamilton_depth, "Spatial")
hml_lbt <- as(hamilton_lbt, "Spatial")

# Organise into a single SpatialPointDataFrame
hml <- hml_tp
colnames(hml@data)[17] <- "tp"
hml$depth <- hml_depth$Value.Valeur
hml$lbt <- hml_lbt$Value.Valeur

# A data.frame of the non-spatial data
tp <- hml@data$tp

# SpatialPolygons
lake <- as(lake, "Spatial")

Build a reference raster to make the projection

library(raster)
r_ref <- raster(lake,res=100)

Find pixel location from the raster. In the process, it is important to standardize the coordinates

sampleLoc <- cellFromXY(r_ref, hml)

xy <-scale(coordinates(r_ref))
xySample <- xy[sampleLoc,]

# Add x and y to hmlDF
hmlDF <- data.frame(tp = tp, x = xySample[,1], y = xySample[,2])

The data is now ready for analysis. We need to load the needed packages first.

Trend surface analysis using `lm`

Trend surface analysis of degree 3

lmTp <- lm(tp ~ x + y + I(x^2) + I(y^2) + I(x*y), data = hmlDF)
summary(lmTp)

## 
## Call:
## lm(formula = tp ~ x + y + I(x^2) + I(y^2) + I(x * y), data = hmlDF)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1839.80  -362.14   -14.65   393.16  1891.33 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2305.47      98.32  23.450  < 2e-16 ***
## x            -300.38     349.03  -0.861  0.39131    
## y            1590.85     321.49   4.948 2.69e-06 ***
## I(x^2)        808.93     375.81   2.152  0.03352 *  
## I(y^2)      -1250.06     393.61  -3.176  0.00193 ** 
## I(x * y)     -777.88     471.59  -1.649  0.10188    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 716.3 on 111 degrees of freedom
## Multiple R-squared:  0.2007, Adjusted R-squared:  0.1646 
## F-statistic: 5.573 on 5 and 111 DF,  p-value: 0.0001282

rTrend <- r_ref
rTrend[1:length(rTrend)]<-predict(lmTp, newdata = as.data.frame(xy))

rTrendMask <- mask(rTrend, lake)
plot(rTrendMask)

Calculate residuals sums of squares

\[\text{RSS} = \sum_{j=1}^T\sum_{i=1}^m \left(Z(\mathbf{s}_i, t_j) - \widehat{Z}(\mathbf{s}_i, t_j)\right)^2\]

RSSLM <- sum((hmlDF$tp - predict(lmTp))^2)

Calculate model variance

\[\widehat\sigma^2_\varepsilon = \frac{\text{RSS}}{mT-p-1}.\]

m <- nrow(hmlDF)
ncoef <- length(coef(lmTp))
varResidLM <- RSSLM/(m - ncoef)
varResidLM

## [1] 513142.7

Trend surface analysis using additive models

library(mgcv)

## Loading required package: nlme

## 
## Attaching package: 'nlme'

## The following object is masked from 'package:raster':
## 
##     getData

## The following object is masked from 'package:dplyr':
## 
##     collapse

## This is mgcv 1.8-29. For overview type 'help("mgcv-package")'.

AMTp <- gam(tp ~ s(x,y), data = hmlDF, family = "gaussian")
summary(AMTp)

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## tp ~ s(x, y)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2278.9       52.3   43.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##          edf Ref.df     F  p-value    
## s(x,y) 21.94  26.13 4.121 3.86e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.479   Deviance explained = 57.7%
## GCV = 3.9814e+05  Scale est. = 3.2009e+05  n = 117

rAMTrend <- r_ref
rAMTrend[1:length(rAMTrend)]<-predict(AMTp, newdata = as.data.frame(xy))

rAMTrendMask <- mask(rAMTrend, lake)
plot(rAMTrendMask)

Calculate residuals sums of squares

RSSAM <- sum((hmlDF$tp - predict(AMTp))^2)

Calculate model variance

m <- nrow(hmlDF)
ncoef <- length(coef(AMTp))
varResidAM <- RSSAM/(m - ncoef)
varResidAM

## [1] 346068.6

Question :

Which model is the “best” one based on this statistics ?

Moran’s \(I\)

library(spdep)

# Construct neigbours
dnn <- dnearneigh(coordinates(hml), 0, 1500)
plot(dnn, coordinates(hml))

MoranI <- moran.mc(tp, nb2listw(dnn), nsim = 999,  alternative = "greater", zero.policy = TRUE)

MoranI

## 
##  Monte-Carlo simulation of Moran I
## 
## data:  tp 
## weights: nb2listw(dnn)  
## number of simulations + 1: 1000 
## 
## statistic = 0.076833, observed rank = 981, p-value = 0.019
## alternative hypothesis: greater

Exercice :

Recalculating the Moran’s \(I\) at different distances. Think of the number of permutations to use

Trend surface estimation with R

Guillaume Blanchet & Steve Vissault

November 19th, 2019

Trend surface estimation

Prepare the data for analysis

Trend surface analysis using `lm`

Calculate residuals sums of squares

Calculate model variance

Trend surface analysis using additive models

Calculate residuals sums of squares

Calculate model variance

Question :

Moran’s \(I\)

Exercice :

Trend surface estimation with R

Guillaume Blanchet & Steve Vissault

November 19th, 2019

Trend surface estimation

Prepare the data for analysis

Trend surface analysis using lm

Calculate residuals sums of squares

Calculate model variance

Trend surface analysis using additive models

Calculate residuals sums of squares

Calculate model variance

Question :

Moran’s \(I\)

Exercice :

Trend surface analysis using `lm`