Wilke et al. diversification rates and ages of rate shifts

The influence of species richness and climate and environmental conditions on diversification rates may change over time, for instance, due to inherent uncaptured changes of the system affecting the dynamic of the species assemblage (Silvestro & Schnitzler 2018). Such confounding effects were incorporated by time‑stratifying the analysis of covariate influence on speciation and extinction rates. We estimate the temporal heterogeneity of speciation and extinction rates in endemic species from three random samples of speciation and extinction times drawn from each of the 100 BDS analyses.

# Load required packages
library(coda)
library(foreach)
library(doParallel)

# Auxiliary functions
source("AuxiliaryFunctions/extractAgesFromMcmcLog.R")

Due to computational limits we will only extract 2 random sets out of the speciation and extinction times inferred by each of the 2 BDS analyses.

# 
set.seed(23) # Reproducibility
BDS <- 1:2 # Number of BDS analyses
RanSamp <- 2 # Random samples of speciation and extinction times
for(i in BDS){
    McmcLog <- read.table(paste0("Results/BDS/pyrate_mcmc_logs/TrimBounded_",
                                 BDS[i], "_GBD1-1_mcmc.log"), 
                          header = TRUE, sep = "\t")
    extractAgesFromMcmcLog(McmcLog, N = RanSamp, 
                           Out = paste0("Results/BD/Fix_", BDS[i], "_")) 
}

We analysed the times of origination and extinction using a reversible jump (rj) MCMC (Silvestro et al. 2019) analysis. Marginal speciation and extinction rates were obtained from 2,000,000 rj-MCMC iterations with sampling every 2,500 iterations after discarding the first 50,000 iterations as burn-in.

runPyRateShift <- function(Fix, i){
        system( paste("python2 PyRate/PyRate.py",
                    paste0("-d Results/BD/", Fix[i], ".txt"),
                    "-n 500001",  # Generations to run
                    "-s 2500", # Sampling frequency 
                    "-b 50000", # Burn-in
                    "-p 500000", # Print frequency
                    "-A 4", 
                    "-edgeShift 13.65 0",
                    "-log_marginal_rates 0",
                    paste0("-wd Results/BD"))) 
}


Fix <- paste(paste0("Fix_", BDS), rep(1:RanSamp, each = length(BDS)), sep = "_")
Fix <- sort(Fix)

# Run on 2 cores in parallel
Ncores <- 2
registerDoParallel(Ncores)
PyRate <- foreach(iter = 1:4, .inorder = FALSE) %dopar% runPyRateShift(Fix, iter)
stopImplicitCluster()

Check effective sampling sizes

McmcLogFiles <- list.files("Results/BD/pyrate_mcmc_logs", pattern = "rj_mcmc.log")
McmcLogs <- vector(mode = "list", length(McmcLogFiles)) 
for(i in 1:length(McmcLogFiles)){
  McmcLogs[[i]] <- read.table(paste0("Results/BD/pyrate_mcmc_logs/", McmcLogFiles[i]), 
                              header = TRUE, sep = "\t")
}
lapply( McmcLogs, function(x) apply(x, 2, function(y) round(effectiveSize(as.mcmc(y)))))

## [[1]]
##         it  posterior      prior     BD_lik    k_birth    k_death 
##          0        181         57        181        116         67 
##      RJ_hp   root_age  death_age tot_length 
##        181          0          0          0 
## 
## [[2]]
##         it  posterior      prior     BD_lik    k_birth    k_death 
##          0        181         97        181        137         40 
##      RJ_hp   root_age  death_age tot_length 
##        181          0          0          0 
## 
## [[3]]
##         it  posterior      prior     BD_lik    k_birth    k_death 
##          0         93        108         82        113        114 
##      RJ_hp   root_age  death_age tot_length 
##        181          0          0          0 
## 
## [[4]]
##         it  posterior      prior     BD_lik    k_birth    k_death 
##          0        181        114        124        181         86 
##      RJ_hp   root_age  death_age tot_length 
##        181          0          0          0

Summarize from the posterior distribution the number of shifts in speciation and extinction rates and their ages.

SpRj <- list.files("Results/BD/pyrate_mcmc_logs/", pattern = "sp_rates")
ExRj <- list.files("Results/BD/pyrate_mcmc_logs/", pattern = "ex_rates")
EdgeShift <- 13.65
TimeSeq <- seq(EdgeShift, 0, by = -0.05) 
SpTmp <- read.table(paste0("Results/BD/pyrate_mcmc_logs/", SpRj[1]), 
                    sep = "\t", fill = NA, col.names = paste0("V", 1:20))
Nrow <- nrow(SpTmp)
ExThroughTime <- SpThroughTime <- matrix(NA_real_, 
                                         ncol = length(TimeSeq), 
                                         nrow = length(SpRj)*Nrow)
ExShifts <- SpShifts <- matrix(NA_real_, ncol = 8, nrow = length(SpRj) * Nrow)
Counter <- 1

for(i in 1:length(SpRj)){
  # Give a high number of col.names because each line may differ in length
  SpTmp <- read.table(paste0("Results/BD/pyrate_mcmc_logs/", SpRj[i]), 
                      sep = "\t", fill = NA, col.names = paste0("V", 1:20))
  ExTmp <- read.table(paste0("Results/BD/pyrate_mcmc_logs/", ExRj[i]), 
                      sep = "\t", fill = NA, col.names = paste0("V", 1:20))
  for(y in 1:nrow(SpTmp)){
    # Speciation
    Sp1Tmp <- unlist( SpTmp[y, ] )
    # Rarely there is a shift inferred earlier then 13.65
    if (all(Sp1Tmp <= EdgeShift, na.rm = TRUE)){
      # Which element is > 136 (i.e., the constrained edgeShift)
      W136 <- which(Sp1Tmp >= EdgeShift)
      ShiftTimesTmp <- na.omit(Sp1Tmp[W136:length(Sp1Tmp)])
      # Get the rates for each time step:
      SpThroughTime[Counter, ] <- approx(x = c(ShiftTimesTmp, 0), y = Sp1Tmp[1:(W136-1)], 
                                         xout = TimeSeq, method = "constant")$y
      SpShifts[Counter, 1:length(ShiftTimesTmp)] <- ShiftTimesTmp
    }
    # Extinction
    Ex1Tmp <- unlist( ExTmp[y, ] )
    if (all(Ex1Tmp <= EdgeShift, na.rm = TRUE)){
      # Which element is > 136 (i.e., the constrained edgeShift)
      W136 <- which(Ex1Tmp >= EdgeShift)
      ShiftTimesTmp <- na.omit(Ex1Tmp[W136:length(Ex1Tmp)])
      # Get the rates for each time step:
      ExThroughTime[Counter, ] <- approx(x = c(ShiftTimesTmp, 0), y = Ex1Tmp[1:(W136-1)], 
                                         xout = TimeSeq, method = "constant")$y
      ExShifts[Counter, 1:length(ShiftTimesTmp)] <- ShiftTimesTmp
    }
    Counter <- Counter + 1
  }
}

# Omit the fixed edge shift of 13.65 million years
SpThroughTime <- SpThroughTime[, -1]
ExThroughTime <- ExThroughTime[, -1]
SpShifts <- SpShifts[, -1]
ExShifts <- ExShifts[, -1]
# Omit all rows with occasional NAs 
Keep <- 1:(Counter-1) # Easier
SpThroughTime <- SpThroughTime[Keep, ]
ExThroughTime <- ExThroughTime[Keep, ]
SpShifts <- SpShifts[Keep, ]
ExShifts <- ExShifts[Keep, ]
# Rescale rates from 13.6 million years to 1.36
SpThroughTime <- SpThroughTime * 10
ExThroughTime <- ExThroughTime * 10
NetDivThroughTime <- SpThroughTime - ExThroughTime
SpShifts <- SpShifts / 10
ExShifts <- ExShifts / 10

# Fequency of shifts found across all birth-death analyses
PerSpShift <- apply(SpShifts, 2, function(x) sum(!is.na(x)) / length(x))
PerExShift <- apply(ExShifts, 2, function(x) sum(!is.na(x)) / length(x))
# 
MedSpShift <- apply(SpShifts, 2, median, na.rm = TRUE) 
MedExShift <- apply(ExShifts, 2, median, na.rm = TRUE) 

ExShiftsHpd <- SpShiftsHpd <- matrix(NA_real_, nrow = 2, ncol = ncol(SpShifts))
for(i in 1:ncol(SpShifts)){
  TmpShift <- na.omit(SpShifts[, i])
  if(length(TmpShift) > 1){
    SpShiftsHpd[, i] <- HPDinterval(coda:::mcmc(TmpShift))
  }
  TmpShift <- na.omit(ExShifts[, i])
  if(length(TmpShift) > 1){
    ExShiftsHpd[, i] <- HPDinterval(coda:::mcmc(TmpShift))
  }
}

SummarySpShift <- rbind(PerSpShift, MedSpShift, SpShiftsHpd) 
SummarySpShift <- SummarySpShift[, SummarySpShift[1, ] != 0] 
rownames(SummarySpShift) <- c("Frequency", "Age", "LwrHPD", "UprHPD")
colnames(SummarySpShift) <- paste("Shift", 1:ncol(SummarySpShift))
SummarySpShift

##            Shift 1   Shift 2   Shift 3     Shift 4
## Frequency 1.000000 0.9889503 0.1077348 0.004143646
## Age       1.242096 1.0304314 0.6242546 0.612175085
## LwrHPD    1.194638 0.5455362 0.1591756 0.578141248
## UprHPD    1.264968 1.1704639 1.1509796 0.793681392

SummaryExShift <- rbind(PerExShift, MedExShift, ExShiftsHpd) 
SummaryExShift <- SummaryExShift[, SummaryExShift[1, ] != 0] 
rownames(SummaryExShift) <- c("Frequency", "Age", "LwrHPD", "UprHPD")
colnames(SummaryExShift) <- paste("Shift", 1:ncol(SummaryExShift))
SummaryExShift

##            Shift 1      Shift 2
## Frequency 0.788674 0.0704419890
## Age       1.174994 0.0086376024
## LwrHPD    1.018517 0.0006749044
## UprHPD    1.264901 1.1041635518

Plot speciation and extinction rates (including shifts) through time.

Prob <- seq(0.95, 0.05, by = -0.05)
NetDivThroughTimeHPD <- ExThroughTimeHPD <- SpThroughTimeHPD <- vector(mode = "list", length(Prob)) 
for(i in 1:length(Prob)){
  SpThroughTimeHPD[[i]] <- apply(SpThroughTime, 2,
                                 function(x) HPDinterval(coda:::mcmc(x), prob = Prob[i]))
  ExThroughTimeHPD[[i]] <- apply(ExThroughTime, 2,
                                 function(x) HPDinterval(coda:::mcmc(x), prob = Prob[i]))
  NetDivThroughTimeHPD[[i]] <- apply(NetDivThroughTime, 2,
                                     function(x) HPDinterval(coda:::mcmc(x), prob = Prob[i]))
}
# Means
SpThroughTimeMean <- apply(SpThroughTime, 2, median)
ExThroughTimeMean <- apply(ExThroughTime, 2, median)

TimeSeq <- TimeSeq[-1]

par(las = 1, mar = c(5.1, 4.5, 0.1, 0.8))
plot(1, 1, type = "n", 
     xlim = c(14, 0), ylim = c(0, max(unlist(SpThroughTimeHPD))), 
     xaxs = "i", xaxt = "n", 
     xlab = "Age (Myr ago)", 
     ylab = expression(paste("Speciation rate (events ", lineage^{-1}, " ", Myr^{-1}, ")")))
axis(side = 1, at = seq(14, 0, by = -2), labels = seq(14, 0, by = -2)/10)
abline(v = SummarySpShift[2, SummarySpShift[1, ] > 0.75]*10, lty = 2, col = "dodgerblue")
for(i in 1:length(Prob)){
  polygon(c(TimeSeq, rev(TimeSeq)), 
          c(SpThroughTimeHPD[[i]][1, ], rev(SpThroughTimeHPD[[i]][2, ])), 
          col = adjustcolor("dodgerblue", alpha = 1/length(Prob)), border = NA)
}
lines(TimeSeq, SpThroughTimeMean, col = "dodgerblue")

par(las = 1, mar = c(5.1, 4.5, 0.1, 0.8))
plot(1, 1, type = "n", 
     xlim = c(14, 0), ylim = c(0, max(unlist(ExThroughTimeHPD))), 
     xaxs = "i", xaxt = "n", 
     xlab = "Age (Myr ago)", 
     ylab = expression(paste("Extinction rate (events ", lineage^{-1}, " ", Myr^{-1}, ")")))
axis(side = 1, at = seq(14, 0, by = -2), labels = seq(14, 0, by = -2)/10)
abline(v = SummaryExShift[2, SummaryExShift[1, ] > 0.75]*10, lty = 2, col = "red")
for(i in 1:length(Prob)){
  polygon(c(TimeSeq, rev(TimeSeq)), 
          c(ExThroughTimeHPD[[i]][1, ], rev(ExThroughTimeHPD[[i]][2, ])), 
          col = adjustcolor("red", alpha = 1/length(Prob)), border = NA)
}
lines(TimeSeq, ExThroughTimeMean, col = "red")

Plattform and resources

Sys.time()

## [1] "2020-01-03 22:45:44 CET"

sessionInfo()

## R version 3.6.2 (2019-12-12)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=de_DE.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=de_DE.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=de_DE.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] doParallel_1.0.15 iterators_1.0.12  foreach_1.4.7     coda_0.19-3      
## [5] knitr_1.25       
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.2       codetools_0.2-16 lattice_0.20-38  digest_0.6.21   
##  [5] grid_3.6.2       magrittr_1.5     evaluate_0.14    rlang_0.4.0     
##  [9] stringi_1.4.3    rmarkdown_1.15   tools_3.6.2      stringr_1.4.0   
## [13] xfun_0.10        yaml_2.2.0       compiler_3.6.2   htmltools_0.4.0