Prepare the R environment

We load the packages needed for this practice.

# Spatial
library(sf)
## Linking to GEOS 3.6.2, GDAL 2.2.3, PROJ 4.9.3
library(sp) 
# Manipulation
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Analysis
library(gstat)
## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo

The first step is to load the data from the the CABIN Canadian Aquatic Biomonitoring Network in the Hamilton Harbour. We then want to project in the NAD83 / Ontario MNR Lambert (more accurate and suitable for the study area). If you want to read more about it: http://epsg.io/3161

Load data

The .rds can be load in the R environment using the function readRDS(). The file contains an R object: the CABIN data for the Hamilton Harbour.

hamilton <- readRDS("../data/post_process/hamilton_habitat.rds")
# Reproject the spatial data in NAD83 / Ontario MNR Lambert
ham_sf <- st_transform(hamilton, crs = 3161)

Filter the data

Here, we focus on the dependant variable TP and the year 2007.

ham_sf_2005 <- ham_sf %>% 
        filter(Year.Année == 2005 & Variable == "TP")

ham_sf_2005Depth <- ham_sf %>% 
        filter(Year.Année == 2005 & Variable == "Depth-Lake")

ham_sf_2005_NoDup <- ham_sf_2005[!duplicated(ham_sf_2005$Site.Site),]

# Transform into an sp object needed for the variogram function
ham_SP_2005 <- as(ham_sf_2005_NoDup, "Spatial")

#---------------------------------------------------
## Another way to work around the duplicated samples
#---------------------------------------------------
ham_sf_2005 <- ham_sf %>% 
        filter(Year.Année == 2005 & Variable == "TP")

ham_sf_2005Depth <- ham_sf %>% 
        filter(Year.Année == 2005 & Variable == "Depth-Lake")

ham_sf_2005_WithDepth <- ham_sf_2005
ham_sf_2005_WithDepth$depth <- ham_sf_2005Depth$Value.Valeur

# Transform into an sp object needed for the variogram function
ham_SP_2005_DepthSP <- as(ham_sf_2005_WithDepth, "Spatial")
ham_SP_2005_DepthSP@coords <- jitter(ham_SP_2005_DepthSP@coords,factor = 0.015)

Cloud variogram

Build the cloud variogram

maxDist <- max(dist(coordinates(ham_SP_2005)))
varioCloud <- variogram(Value.Valeur ~ 1,
                   data = ham_SP_2005,
                   cloud= TRUE,
                   cutoff = maxDist)

Plot the cloud variogram

par(mar = c(5,6.5,0.5,0.5))
plot(varioCloud$dist, varioCloud$gamma,
     xlab = "Distance (m)",
     ylab = "Variance",
     las = 1)

Directional variogram

To convert the variogram into a directional one, we have to set the argument alpha. We pass a vector of angles through the alpha parameters: c(0, 45, 90, 135).

Empirical variogram

Emprical sometimes referred to sample variogram.

Step 1 - Study spatial autocorrelation

plot(varioCloud$dist, varioCloud$gamma,
     xlab = "Distance (m)",
     ylab = "Variance",
     las = 1)

Visual autocorrelation check

hscat(Value.Valeur ~ 1,
      data = ham_SP_2005,
      breaks = seq(0,maxDist, by = 500))

Step 2 - Cloud variogram

Construct a cloud variogram with a maximum distance of 1500m.

Step 3 - Check for outliers

Select point pairs by digitizing a region with the mouse (left mouse button adds a point, right mouse button ends)

# This is easier to run outside RStudio as such:
# windows() # For windows users
# quartz() # For mac users

outliers <- plot(varioCloud, digitize = TRUE)
outliers

Ids of the row for the pair of points

##   head tail
## 1    4    6
## 2    5    6

Step 4 - Remove outlier

ham_SP_2005_clean <- ham_SP_2005[-6,]

Step 5 - Redraw the cloud variogram

varioCloud <- variogram(Value.Valeur ~ 1, data = ham_SP_2005_clean,
                   cloud= TRUE,cutoff = 1500)
plot(varioCloud)

Step 6 - Define bin limits

binLimit <- c(seq(0,1000,by = 100),
              seq(1200,1500, by = 200))
# Construct variogram
varioEmpGood <- variogram(Value.Valeur ~ 1,
                          data = ham_SP_2005_clean,
                          cutoff = 1500,
                          boundaries = binLimit)
plot(varioEmpGood, cex = 1, pch = 19)

Residual empirical variogram

It is also possible to study the autocorrelation structure in the data after accounting for one or more covariates.

In the example below, we evaluate the autocorrelation structure after accounting for sampling effort.

varioEmpResid <- variogram(Value.Valeur ~ SampleNumber.Numérod.échantillon.,
                           data = ham_SP_2005_clean,
                           cutoff = 1500,
                          boundaries = binLimit)
## Warning in variogram.default(y, locations, X, trend.beta = beta, grid =
## grid, : linear model has singular covariance matrix
plot(varioEmpResid, cex = 1, pch = 19, col = "orange")

Model variogram

In regard to step 1 and step 2, we choose to fit an exponential model variogram with the restricted maximum likelihood (REML).

# Fit using cloud variogram
modelIni <-  vgm(psill = 4e5, model = "Exp",
                 range = 800, nugget = 50000)

modelVario <- fit.variogram.reml(Value.Valeur ~ 1,
                                 data = ham_SP_2005_clean,
                                 model = modelIni)

# Fit using empirical variogram
modelIni <-  vgm(psill = 3.5e5, model = "Exp",
                 range = 600, nugget = 1e5)

# plot(varioEmpGood,model = modelIni)
#Do this step a few times to make sure you have good starting values

modelVarioEmp <- fit.variogram(varioEmpGood,
                               model = modelIni,
                               fit.method = 7)

Model visualization

plot(varioCloud, model = modelVario, lwd = 3)

plot(varioEmpGood, model = modelVarioEmp, lwd = 3)

Exercice

  1. Build a model variogram using the empirical variogram
  • This means using another model variogram fitting method
  1. Build a model variogram with another variable
  • This may require using another model variogram
  1. Build a model variogram on your own data
  • Think of the question you want to answer
  • The size of your data will influence the choice of model fitting procedure

Kriging

Using the model variogram defined above we can now build kriging maps

Step 1 - Define a grid

library(sf)
library(raster)
## 
## Attaching package: 'raster'
## The following object is masked from 'package:dplyr':
## 
##     select
lake <- st_read("../data/raw/hamilton_harbor.gdb", layer="waterbody_2", quiet = TRUE)

# Align the projections
lake <- st_transform(lake, st_crs(ham_SP_2005_clean))

# Create the reference grid for model projection (using the lake extent)
r_ref <- raster(lake,res=100)
r_ref
## class      : RasterLayer 
## dimensions : 82, 148, 12136  (nrow, ncol, ncell)
## resolution : 100, 100  (x, y)
## extent     : 1341058, 1355858, 11862377, 11870577  (xmin, xmax, ymin, ymax)
## crs        : +proj=lcc +lat_1=44.5 +lat_2=53.5 +lat_0=0 +lon_0=-85 +x_0=930000 +y_0=6430000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs

As for inverse distance weighting, we build a grid of \(100 \times 100\) m.

Simple kriging

###
# Performing kriging using the interpolate function
###
# Simple kriging
skModel <- gstat(formula = Value.Valeur ~ 1, location = ham_SP_2005_clean,
            model = modelVario, beta = 2)

# Build interpolation map
skMap <- interpolate(object = r_ref, model = skModel, debug = 0)

###
# Performing kriging using the krige function
###

# Organise raster into a dara.frame
r_refDF<-as.data.frame(r_ref, xy = TRUE)

# SpatialPointsDataFrame
coordinates(r_refDF) <- ~ x+ y

# SpatialPixelDataFrame
gridded(r_refDF) <- TRUE

# Add the projection
proj4string(r_refDF) <- crs("+proj=lcc +lat_1=44.5 +lat_2=53.5 +lat_0=0 +lon_0=-85 +x_0=930000 +y_0=6430000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs")

# Build our model
skModelKrige <- krige(formula = Value.Valeur ~ 1, location = ham_SP_2005_clean, model = modelVario, beta = 2, newdata = r_refDF)
## [using simple kriging]
# Plot prediction
spplot(skModelKrige["var1.pred"])

# Plot variation
spplot(skModelKrige["var1.var"])

# Building 95% confidence interval around the model
sdsk <- sqrt(skModelKrige$var1.var)
CI.025 <- qnorm(0.025) * sdsk + skModelKrige$var1.pred
CI.975 <- qnorm(0.975) * sdsk + skModelKrige$var1.pred

# Build raster for 95% confidence interval
skMap$CI.025 <- CI.025
skMap$CI.975 <- CI.975

Draw the map

library(mapview)
# Mask 
skMapMask <- mask(skMap, lake) 
plot(skMapMask$CI.025, zlim = c(-2000, 4000))

plot(skMapMask$var1.pred, zlim = c(-2000, 4000))

plot(skMapMask$CI.975, zlim = c(-2000, 4000))

Ordinary kriging

# Ordinary kriging
okModel <- gstat(formula = Value.Valeur ~ 1, location = ham_SP_2005_clean,
            model = modelVario)

# Build interpolation map
okMap <- interpolate(object = r_ref, model = okModel, debug = 0)

Draw the map

library(mapview)
# Mask 
okMapMask <- mask(okMap, lake) 
mapview(okMapMask)

Exercice

  1. Build a kriging maps with another variable
  • This means using another model variogram
  1. Perform universal kriging
  2. Build a kriging maps on your own data
  • Think of the question you want to answer
  • The size of your data will influence the choice of model fitting procedure