First, we download the spatial data:
Import the data in R
library(sf)
# The RDS file is an R object and can be read with the following command:
hamilton_habitat_desc <- readRDS("../data/post_process/hamilton_habitat.rds")
lake <- st_read("../data/raw/hamilton_harbor.gdb", layer="waterbody_2")
## Reading layer `waterbody_2' from data source `/home/steve/Documents/Git/PRStats/PR-GSFE01/data/raw/hamilton_harbor.gdb' using driver `OpenFileGDB'
## Simple feature collection with 45 features and 32 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -8899166 ymin: 5349406 xmax: -8878953 ymax: 5360658
## epsg (SRID): 3857
## proj4string: +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs
## [1] FALSE
Filter the variable total phosphorus (ppm)
# Because, the data contains replicates (several years and samples), we want to average all measurements of
# total phosphorus by site.
library(dplyr)
# Total Phosphorus in sediment
hamilton_tp <- hamilton_habitat_desc %>%
filter(Variable == "TP") %>%
group_by(Site.Site)
# Lake Bottom Temperature
hamilton_lbt <- hamilton_habitat_desc %>%
filter(Variable == "General-TempLakeBottom") %>%
group_by(Site.Site)
# Depth from Surface
hamilton_depth <- hamilton_habitat_desc %>%
filter(Variable == "Depth-Lake") %>%
group_by(Site.Site)
We perform a visual check of the mean variable
We need to transform the sf
spatial object to sp
, because the gstat
package doesn’t support sf
class.
# Organise into SpatialPointDataFrame
hml_tp <- as(hamilton_tp, "Spatial")
hml_depth <- as(hamilton_depth, "Spatial")
hml_lbt <- as(hamilton_lbt, "Spatial")
# Organise into a single SpatialPointDataFrame
hml <- hml_tp
colnames(hml@data)[17] <- "tp"
hml$depth <- hml_depth$Value.Valeur
hml$lbt <- hml_lbt$Value.Valeur
# A data.frame of the non-spatial data
tp <- hml@data$tp
# SpatialPolygons
lake <- as(lake, "Spatial")
Build a reference raster to make the projection
Find pixel location from the raster. In the process, it is important to standardize the coordinates
sampleLoc <- cellFromXY(r_ref, hml)
xy <-scale(coordinates(r_ref))
xySample <- xy[sampleLoc,]
# Add x and y to hmlDF
hmlDF <- data.frame(tp = tp, x = xySample[,1], y = xySample[,2])
The data is now ready for analysis. We need to load the needed packages first.
lm
Trend surface analysis of degree 3
##
## Call:
## lm(formula = tp ~ x + y + I(x^2) + I(y^2) + I(x * y), data = hmlDF)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1839.80 -362.14 -14.65 393.16 1891.33
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2305.47 98.32 23.450 < 2e-16 ***
## x -300.38 349.03 -0.861 0.39131
## y 1590.85 321.49 4.948 2.69e-06 ***
## I(x^2) 808.93 375.81 2.152 0.03352 *
## I(y^2) -1250.06 393.61 -3.176 0.00193 **
## I(x * y) -777.88 471.59 -1.649 0.10188
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 716.3 on 111 degrees of freedom
## Multiple R-squared: 0.2007, Adjusted R-squared: 0.1646
## F-statistic: 5.573 on 5 and 111 DF, p-value: 0.0001282
rTrend <- r_ref
rTrend[1:length(rTrend)]<-predict(lmTp, newdata = as.data.frame(xy))
rTrendMask <- mask(rTrend, lake)
plot(rTrendMask)
\[\text{RSS} = \sum_{j=1}^T\sum_{i=1}^m \left(Z(\mathbf{s}_i, t_j) - \widehat{Z}(\mathbf{s}_i, t_j)\right)^2\]
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:raster':
##
## getData
## The following object is masked from 'package:dplyr':
##
## collapse
## This is mgcv 1.8-29. For overview type 'help("mgcv-package")'.
##
## Family: gaussian
## Link function: identity
##
## Formula:
## tp ~ s(x, y)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2278.9 52.3 43.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(x,y) 21.94 26.13 4.121 3.86e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.479 Deviance explained = 57.7%
## GCV = 3.9814e+05 Scale est. = 3.2009e+05 n = 117
rAMTrend <- r_ref
rAMTrend[1:length(rAMTrend)]<-predict(AMTp, newdata = as.data.frame(xy))
rAMTrendMask <- mask(rAMTrend, lake)
plot(rAMTrendMask)
library(spdep)
# Construct neigbours
dnn <- dnearneigh(coordinates(hml), 0, 1500)
plot(dnn, coordinates(hml))
MoranI <- moran.mc(tp, nb2listw(dnn), nsim = 999, alternative = "greater", zero.policy = TRUE)
MoranI
##
## Monte-Carlo simulation of Moran I
##
## data: tp
## weights: nb2listw(dnn)
## number of simulations + 1: 1000
##
## statistic = 0.076833, observed rank = 981, p-value = 0.019
## alternative hypothesis: greater
Recalculating the Moran’s \(I\) at different distances. Think of the number of permutations to use