Visualizing spatio-temporal data

install & load packages

library(robis)
library(sf)
library(dplyr)
library(ggplot2)
library(raster)
library(sf)
library(spacetime)
library(sp)
library(mapview)

# if needed
# install.packages(c("robis","sf","dplyr","ggplot2","raster","sf","spacetime","sp"))
# install_github("andrewzm/STRbook")

Gathering occurrences from OBIS

Explore OBIS database and get species occurrences for Halibut (Hippoglossus hippoglossus).

halibut <- robis::occurrence(scientificname = "Hippoglossus hippoglossus", 
               geometry="POLYGON((-64.84626027288095 48.91641792598304,-64.01129933538095 50.49356505646415,-68.82331105413095 50.11464395818477,-71.04254933538095 46.80876488134784,-64.84626027288095 48.91641792598304))"
          )
## Retrieved 228 records of approximately 228 (100%)

Convert halibut dataframe to a sf object

sf_halibut <- st_as_sf(halibut, 
                    coords = c("decimalLongitude","decimalLatitude"),
                    crs = 4326)

The OBIS database provide a lot of context on the records.

names(sf_halibut)
##  [1] "rightsHolder"             "country"                 
##  [3] "date_year"                "institutionID"           
##  [5] "scientificNameID"         "year"                    
##  [7] "scientificName"           "dynamicProperties"       
##  [9] "dropped"                  "aphiaID"                 
## [11] "language"                 "phylumid"                
## [13] "familyid"                 "catalogNumber"           
## [15] "basisOfRecord"            "terrestrial"             
## [17] "superclass"               "modified"                
## [19] "maximumDepthInMeters"     "id"                      
## [21] "day"                      "order"                   
## [23] "superclassid"             "dataset_id"              
## [25] "collectionCode"           "speciesid"               
## [27] "date_end"                 "occurrenceID"            
## [29] "license"                  "date_start"              
## [31] "month"                    "genus"                   
## [33] "eventDate"                "brackish"                
## [35] "scientificNameAuthorship" "absence"                 
## [37] "subfamily"                "genusid"                 
## [39] "originalScientificName"   "marine"                  
## [41] "minimumDepthInMeters"     "subphylumid"             
## [43] "subfamilyid"              "institutionCode"         
## [45] "countryCode"              "date_mid"                
## [47] "eventTime"                "class"                   
## [49] "orderid"                  "datasetName"             
## [51] "kingdom"                  "specificEpithet"         
## [53] "classid"                  "phylum"                  
## [55] "species"                  "subphylum"               
## [57] "family"                   "category"                
## [59] "kingdomid"                "node_id"                 
## [61] "individualCount"          "fieldNumber"             
## [63] "type"                     "rights"                  
## [65] "recordedBy"               "datasetID"               
## [67] "locality"                 "taxonRank"               
## [69] "depth"                    "geometry"

We perform a visual check of the occurrences.

mapview(sf_halibut)

Visual representation per year

We can now build a representation of the occurrences per year, but firsts we have to extract the polygon for the area around the estuary.

poly_ca <- raster::getData('GADM', country='CA', level=1)
sf_ca <- st_as_sf(poly_ca)
sf_estuary <- st_crop(sf_ca, st_as_sfc(st_bbox(sf_halibut)))
## although coordinates are longitude/latitude, st_intersection assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant
## throughout all geometries

Following, we can visualize the data

ggplot() + 
     geom_sf(data = sf_estuary, fill = "grey50", col = "white") + 
     geom_sf(data = filter(sf_halibut, year > 2006), aes(col = depth)) + 
     facet_wrap(~year)

Count observations per year with rasterize

If you have a large dataset with a high density of points, it is a good idea to summarize the informations using a grid of abundance counts.

# get number of years
years <- unique(sf_halibut$year)

# Create a reference grid
ref_grid <- raster(sf_halibut, res=0.1)

# Open empty stack
stack_count <- stack()

for(i in 1:length(years)){
     # Filter for one year (years[i])
     sf_halibut_yr <- filter(sf_halibut, year == years[i])
     
     # Prepare the geometry
     sp_halibut_yr <- as(st_geometry(sf_halibut_yr), "Spatial")

     # Count points per cell
     rs_count <- rasterize(sp_halibut_yr, ref_grid, fun = 'count')

     # Add the layer to the stack
     stack_count <- addLayer(stack_count,rs_count)
}

names(stack_count) <- years
mapview(stack_count[["X2011"]])

Compute mean values with rasterize

We load the air data PM10 air quality data.

raw_air <- readRDS("./assets/data/air.rds") 
# Transforming data to sf
sf_air <- st_as_sf(raw_air, coords=c("lng", "lat"), crs = 4326)

We select the variable that we want to compute the mean.

sf_air_pm10 <- sf_air[,"pm10"]

Then, we create the reference grid and perform the rasterize on the points.

ref_rs <- raster(sf_air_pm10, res = 0.5)
rs_mean <- rasterize(sf_air_pm10, ref_rs, fun = mean)
mapview(rs_mean[["pm10"]])

We can now compute the mean values per year.

# Create the year column
sf_air$year <- format(sf_air$dates, "%Y")
# Create vector of all year
years <- unique(sf_air$year)

# Create reference grid
ref_rs <- raster(sf_air, res = 0.5)
# create empty stack
st_mean_by_yr <- stack()

# Loop over year and compute the mean per cells
for(y in 1:length(years)){
     sf_air_yr <- subset(sf_air, year == years[y])[,"pm10"]
     st_mean_by_yr <- addLayer(st_mean_by_yr,rasterize(sf_air_yr, ref_rs, fun = mean)[['pm10']])
}

Mapping rasterStack

So now, we have rasterStack where each layer is the mean values gridded for a specific year. Because it’s multiple layers, you have to transform the rasterStack to a star object to be able to plot it with ggplot().

Mapping rasterStack through times with package ggplot2

library(raster)
library(ggplot2)
library(tidyr)

# Convert the rasterStack to data.frame
z <- as.data.frame(st_mean_by_yr)
names(z) <- years
# Merge coordinates with values
df_mean_by_yr <- data.frame(coordinates(st_mean_by_yr), z)
# Melt the data.frame to a tidy format
df_mean_by_yr <- gather(df_mean_by_yr, year, pm10, -x, -y, na.rm = TRUE)
head(df_mean_by_yr)
##           x        y  year     pm10
## 13 12.53107 54.67497 X2001 11.77922
## 25 10.03107 54.17497 X2001 17.72159
## 36  7.03107 53.67497 X2001 34.38259
## 40  9.03107 53.67497 X2001 26.88333
## 41  9.53107 53.67497 X2001 26.84869
## 46 12.03107 53.67497 X2001 15.39233
# remove the X in the year column 
df_mean_by_yr$year <- as.numeric(gsub("X", "", df_mean_by_yr$year))
head(df_mean_by_yr)
##           x        y year     pm10
## 13 12.53107 54.67497 2001 11.77922
## 25 10.03107 54.17497 2001 17.72159
## 36  7.03107 53.67497 2001 34.38259
## 40  9.03107 53.67497 2001 26.88333
## 41  9.53107 53.67497 2001 26.84869
## 46 12.03107 53.67497 2001 15.39233
# Plot raster with ggplot
ggplot() + geom_raster(data = df_mean_by_yr, aes(x = x, y = y, fill = pm10)) + facet_wrap(~year)

Mapping rasterStack through times with packages stars & ggplot2

library(stars)

# Before transforming the raster, you add the time as the Z value of the rasterStack
st_mean_by_yr <- setZ(st_mean_by_yr,as.Date(paste(years,"01","01", sep="-")))

# Convert rasterStack to stars object (x, y, time)
strs_mean <- st_as_stars(st_mean_by_yr) 

# Plot grids through time.
ggplot() + geom_stars(data=strs_mean) + coord_equal() + facet_wrap("band") 

Practice

  1. Collect the occurences of your favorite species on OBIS.
    • robis::occurrence(scientificname = "Hippoglossus hippoglossus")
  2. Visualize the distribution of this species across several years using ggplot2.
  3. Produce count grids per year for this species.

Spatial variability

Hovmöller plot

data("NOAA_df_1990", package = "STRbook")

Tmax <- filter(NOAA_df_1990,     # subset the data
              proc == "Tmax" &   # only max temperature
              month %in% 5:9 &   # May to September
              year == 1993)      # year of 1993

lim_lat <- range(Tmax$lat)        # latitude range

lat_axis <- seq(lim_lat[1],       # latitude axis
                lim_lat[2],
                length=25)

Tmax$t <- Tmax$julian - 728049     # create a new time variable
Tmax_grid <- Tmax
dists <- abs(outer(Tmax$lat, lat_axis, "-"))
Tmax_grid$lat <- lat_axis[apply(dists, 1, which.min)]

Tmax_lat_Hov <- group_by(Tmax_grid, lat, t) %>%
                summarise(z = mean(z))

library(STRbook)
Hovmoller_lat <- ggplot(Tmax_lat_Hov) +            # take data
        geom_tile(aes(x = lat, y = t, fill = z)) + # plot
        fill_scale(name = "degF") +     # add color scale
        scale_y_reverse() +             # rev y scale
        ylab("Day number (days)") +     # add y label
        xlab("Latitude (degrees)") +    # add x label
        theme_bw()                      # change theme

Hovmoller_lat

Temporal variability

Trends plots by stations

data("NOAA_df_1990", package = "STRbook")

Tmax <- filter(NOAA_df_1990,     # subset the data
              proc == "Tmax" &   # only max temperature
              month %in% 5:9 &   # May to September
              year == 1993)      # year of 1993

Tmax_av <- group_by(Tmax, date) %>%
           summarise(meanTmax = mean(z))

gTmaxav <-
    ggplot() +
    geom_line(data = Tmax,aes(x = date, y = z, group = id),
              colour = "blue", alpha = 0.04) +
    geom_line(data = Tmax_av, aes(x = date, y = meanTmax)) +
    xlab("Month") + ylab("Maximum temperature (degF)") +
    theme_bw()

gTmaxav

Transform your data into spacetime classes

To perform spatio-temporal variograms with gstat, we need to convert our data into one of the following different spacetime classes of object. To build these objects, the best way is to use the stConstruct function in the spacetime package.

spacetime classes

  • full grid (STF), a combination of any sp object and any xts object to represent all possible locations on the implied space-time lattice;
  • sparse grid (STS), as STF, but contains only the non-missing space-time combinations on a space-time lattice;
  • irregular (STI), an irregular space-time data structure, where each point is allocated a spatial coordinate and a time stamp;
  • simple trajectories (STT), a sequence of space-time points that form trajectories.

Get access to the data

Download PM10 air quality data

library(spacetime)

raw_air <- readRDS("./assets/data/air.rds")
head(raw_air)
##   station_id   pm10      dates      lng      lat
## 1    DEBB051  8.688 2001-06-08 14.05746 52.54136
## 2    DEBB051  7.354 2001-09-10 14.05746 52.54136
## 3    DEBB051 15.729 2001-10-10 14.05746 52.54136
## 4    DEBB051 26.312 2001-05-03 14.05746 52.54136
## 5    DEBB051 13.958 2001-10-28 14.05746 52.54136
## 6    DEBB051 39.188 2001-10-16 14.05746 52.54136
spaceTime_air <- stConstruct(raw_air, c("lng","lat"),"dates", crs = CRS("+init=epsg:4326"))
class(spaceTime_air)
## [1] "STIDF"
## attr(,"package")
## [1] "spacetime"
# subset 
raw_air_062005 <- subset(raw_air, dates > as.Date("2005-06-01") & dates < as.Date("2005-06-30"))

raw_air_062005 <- stConstruct(raw_air_062005, c("lng","lat"),"dates", crs = CRS("+init=epsg:4326"))

# Coerce the STI class to STF
STFDF_air_062005 <- as(raw_air_062005, "STFDF")
summary(STFDF_air_062005)
## Object of class STFDF
##  with Dimensions (s, t, attr): (46, 28, 2)
## [[Spatial:]]
## Object of class SpatialPoints
## Coordinates:
##          min      max
## lng  6.28107 14.78617
## lat 47.80847 54.92497
## Is projected: FALSE 
## proj4string :
## [+init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84
## +towgs84=0,0,0]
## Number of points: 46
## [[Temporal:]]
##      Index              timeIndex    
##  Min.   :2005-06-02   Min.   : 1.00  
##  1st Qu.:2005-06-08   1st Qu.: 7.75  
##  Median :2005-06-15   Median :14.50  
##  Mean   :2005-06-15   Mean   :14.50  
##  3rd Qu.:2005-06-22   3rd Qu.:21.25  
##  Max.   :2005-06-29   Max.   :28.00  
## [[Data attributes:]]
##    station_id        pm10       
##  DESH001:  28   Min.   : 2.708  
##  DENI063:  28   1st Qu.:10.739  
##  DEUB038:  28   Median :14.191  
##  DEBE056:  28   Mean   :15.100  
##  DEBE032:  28   3rd Qu.:18.583  
##  (Other):1108   Max.   :53.292  
##  NA's   :  40   NA's   :40
# Hovmöller plot
stplot(STFDF_air_062005, mode = "xt")

# Temporal-space variability
stplot(STFDF_air_062005[,,"pm10"], mode = "xy")