library(phytools)
library(car)
library(mnormt)
library(caper)
library(geiger)
library(nlme)
library(phylolm)
library(phangorn)
library(castor)
library(diversitree)
library(mvSLOUCH)

Evolution of continuous traits

Simulation exercises

This little simulation shows how BM generates normal distribution and covariance

tree<-pbtree(n=6,tip.label=LETTERS[1:6])
X<-fastBM(tree,nsim=100)
plot(rotateNodes(tree,"all"),edge.width=2,no.margin=TRUE)
plot of chunk unnamed-chunk-2
scatterplotMatrix(t(X))
plot of chunk unnamed-chunk-2
par(mfrow=c(1,2))
hist(t(X)[,"A"], main="variance across simulations")
hist(X[,1], main="variance across species")
plot of chunk unnamed-chunk-2

We will simulate data under different models of trait evolution for a predefined number of species

nspec=100
tree<-pbtree(n=nspec,scale=100)
x1<-fastBM(tree,a=2,sig2=0.01, internal=TRUE) #BM model
x2<-fastBM(tree,a=2,sig2=0.01, mu=0.05, internal=TRUE) #BM with a trend
x3<-fastBM(tree,a=2,theta=2,alpha=0.8,sig2=0.1, internal=TRUE, model = "OU") #OU model

par(mfrow=c(1,3))
phenogram(tree,x1, spread.labels=F, ylim=c(0, max(as.numeric(x3),as.numeric(x2),as.numeric(x1))), main="BM")
phenogram(tree,x2, spread.labels=F, ylim=c(0, max(as.numeric(x3),as.numeric(x2),as.numeric(x1))), main="BM with trend")
phenogram(tree,x3, spread.labels=F, ylim=c(0, max(as.numeric(x3),as.numeric(x2),as.numeric(x1))), main="OU")
plot of chunk unnamed-chunk-3

check out how different models fit with the simulated data

fit1=fitContinuous(tree,x1[1:nspec])
fit2=fitContinuous(tree,x1[1:nspec],model="trend")
fit3=fitContinuous(tree,x1[1:nspec],model="OU")
setNames(c(fit1$opt$aicc,fit2$opt$aicc,fit3$opt$aicc),c("BM","trend","OU"))
##       BM    trend       OU 
## 136.8844 138.9788 139.0106

Exercise: Try to play with the above code i.e. by varying the parameters and the number of species. When do you have confident support for one model over another?

Now we are going to work with the Pagel's parameters. Let us start with lambda

nspec=100
tree<-pbtree(n=nspec,scale=100)

##transform lambda
tree1=rescale(tree, "lambda", 0)
tree2=rescale(tree, "lambda", 0.5)

#plot
par(mfrow=c(1,3))
plot(tree1, edge.width=1.5, cex=1.2); mtext("Lambda = 0")
plot(tree2, edge.width=1.5, cex=1.2); mtext("Lambda = 0.5")
plot(tree, edge.width=1.5, cex=1.2); mtext("Lambda = 1 (default)")
plot of chunk unnamed-chunk-5
#simulate data
x<-fastBM(tree,a=0,sig2=0.01, internal=F)
x1<-fastBM(tree1,a=0,sig2=0.01, internal=F)
x2<-fastBM(tree2,a=0,sig2=0.01, internal=F)

#estimate lambda for the simulated data
fitContinuous(tree,x, model="lambda")$opt$lambda
## [1] 0.9979673
fitContinuous(tree,x1, model="lambda")$opt$lambda
## [1] 4.160831e-217
fitContinuous(tree,x2, model="lambda")$opt$lambda
## [1] 0.8445418

To draw likelihood surfaces, we will fit PGLS models with intercept only

#check likelihood surface
xdata=data.frame(x)
xdata$species=rownames(xdata)
tree.x=comparative.data(phy = tree, data = xdata, names.col =species)
res<-pgls(x~ 1, data = tree.x, lambda='ML')

xdata1=data.frame(x1)
xdata1$species=rownames(xdata1)
tree.x1=comparative.data(phy = tree, data = xdata1, names.col =species)
res1<-pgls(x1~ 1, data = tree.x, lambda='ML')

xdata2=data.frame(x2)
xdata2$species=rownames(xdata2)
tree.x2=comparative.data(phy = tree, data = xdata2, names.col =species)
res2<-pgls(x2~ 1, data = tree.x2, lambda='ML')

par(mfrow=c(1,3))
plot(pgls.profile(res, 'lambda'))
plot(pgls.profile(res1, 'lambda'))
plot(pgls.profile(res2, 'lambda'))
plot of chunk unnamed-chunk-6

Exercise: Try to vary the number of species and check how the likelihood surface changes

To continue with the phylogenetic signal we will estimate the K and lambda statsistic

phylosig(tree,x,method="lambda")$lambda
## [1] 0.9979644
phylosig(tree,x1,method="lambda")$lambda
## [1] 6.622674e-05
phylosig(tree,x2,method="lambda")$lambda
## [1] 0.8445479
phylosig(tree,x,method="K", test=T)
## $K
## [1] 0.9199781
## 
## $P
## [1] 0.001
phylosig(tree,x1,method="K", test=T)
## $K
## [1] 0.04061124
## 
## $P
## [1] 0.7
phylosig(tree,x2,method="K", test=T)
## $K
## [1] 0.1998206
## 
## $P
## [1] 0.001

This are very similar exercises focusing on kappa and delta

#delta transformations
tree1=rescale(tree, "delta", 0.1)
tree2=rescale(tree, "delta", 2)

par(mfrow=c(1,3))
plot(tree1, edge.width=1.5, cex=1.2); mtext("delta = 0.1")
plot(tree, edge.width=1.5, cex=1.2); mtext("delta = 1 (default)")
plot(tree2, edge.width=1.5, cex=1.2); mtext("delta = 2")
plot of chunk unnamed-chunk-8
#kappa transformations
tree1=rescale(tree, "kappa", 0)
tree2=rescale(tree, "kappa", 2)

par(mfrow=c(1,3))
plot(tree1, edge.width=1.5, cex=1.2); mtext("kappa = 0")
plot(tree, edge.width=1.5, cex=1.2); mtext("kappa = 1 (default)")
plot(tree2, edge.width=1.5, cex=1.2); mtext("kappa = 2")
plot of chunk unnamed-chunk-8

Exercise: Try to simulate data with the trees tranformed above and repeat the model fits to see if the generated parametres can be recovered

Evolution of discrete traits

Simulation exercises

We will simulate data under different evolutionary scenarios then fit models to see how different parameters can be estimated

nspec=100
tree<-pbtree(n=nspec,scale=100)
x1<-rTraitDisc(tree, model = "ER")
x2<-rTraitDisc(tree, model = "SYM")
x3<-rTraitDisc(tree, model = "ARD", rate=c(0.1,0.4))

fit1<-fitDiscrete(tree,x1,model="ER")
fit2<-fitDiscrete(tree,x1,model="SYM")
fit3<-fitDiscrete(tree,x1,model="ARD")
fit4<-fitDiscrete(tree,x1,model=matrix(c(0,0,1,0),2,2))
## Warning in fitDiscrete(tree, x1, model = matrix(c(0, 0, 1, 0), 2, 2)): Parameter estimates appear at bounds:
## 	q21
par(mfrow=c(1,4))
plot(fit1, signif=5, main=paste("ER model, LL =", round(fit1$opt$lnL,3)))
plot(fit2, signif=5, main=paste("SYM model, LL =", round(fit2$opt$lnL,3)))
plot(fit3, signif=5, main=paste("ARD model, LL =", round(fit3$opt$lnL,3)))
plot(fit4, signif=5, main=paste("IRR model, LL =", round(fit4$opt$lnL,3)))
plot of chunk unnamed-chunk-9
#model comparison with AIC
aicc<-setNames(
    c(fit1$opt$aicc,fit2$opt$aicc,fit3$opt$aicc,fit4$opt$aicc),
    c("ER","SYM","ARD","IRR"))
aicc
##       ER      SYM      ARD      IRR 
## 140.2142 140.2142 142.0018 214.2515
#model comparison with likelihood ratio test
LRT=2*(fit1$opt$lnL-fit3$opt$lnL)
pchisq(abs(LRT), 1, lower.tail=FALSE)
## [1] 0.5867853
LRT=2*(fit1$opt$lnL-fit4$opt$lnL)
pchisq(abs(LRT), 2, lower.tail=FALSE)
## [1] 8.375725e-17

It is possible to model evolution of traits with k>2 states. We will still use fitDiscrete from geiger, but note that fitMk from phytools can also be used, which is actually faster. Below, you can set up the number of states then you appropriately define rates from a random (uniform) distribution between 0.1 and 1.

k=3
tree<-pbtree(n=nspec,scale=100)
x1<-rTraitDisc(tree, k=k,model = "ER")
x2<-rTraitDisc(tree, k=k, model = "SYM", rate=sample(seq(0.1, 1, by=0.1), k*(k - 1)/2))
x3<-rTraitDisc(tree, k=k, model = "ARD", rate=sample(seq(0.1, 1, by=0.1), k*(k - 1)))

See how different models fit with the simulated data.

fit1<-fitDiscrete(tree,x2,model="ER")
fit2<-fitDiscrete(tree,x2,model="SYM")
fit3<-fitDiscrete(tree,x2,model="ARD")

par(mfrow=c(1,3))
plot(fit1, signif=5, main="ER")
plot(fit2, signif=5, main="SYM")
plot(fit3, signif=5, main="ARD")
plot of chunk unnamed-chunk-11

Note 1: with castor you can generate Q matrix

Q = castor::get_random_mk_transition_matrix(k, rate_model="ER")
tip_states = LETTERS[simulate_mk_model(tree, Q)$tip_states]
names(tip_states)=tree$tip.label

Note 2: you can also define your own matrix and use evaluate fit

model<-matrix(c(0,1,0,0, #this will be an ordered matrix
    2,0,3,0,
    0,4,0,5,
    0,0,6,0),4,4)
x1<-rTraitDisc(tree, k=4,model = "ER")
fit1<-fitDiscrete(tree,x1,model="ER")
fit4<-fitDiscrete(tree,x1,model=model)
## Warning in fitDiscrete(tree, x1, model = model): Parameter estimates appear at bounds:
## 	q13
## 	q14
## 	q23
## 	q24
## 	q31
## 	q34
## 	q41
## 	q42
## 	q43
par(mfrow=c(1,2))
plot(fit1, signif=5, main="ER")
plot(fit2, signif=5, main="ordered")
plot of chunk unnamed-chunk-13
LRT=2*(fit1$opt$lnL-fit4$opt$lnL)
pchisq(abs(LRT), 1, lower.tail=FALSE)
## [1] 0.02492401

Exercise: Check model outpouts, try to interpret Q matrix and other estimates

Ancestral state estimation

Continiuous characters

We will use real data for this exercise (primate body mass)

#import tree and data
tree=read.tree("~/Desktop/data/primate/primate_tree.phy")
xdata=read.table(file="~/Desktop/data/primate/primate_spec.txt",header=T,sep="\t",fill=T, strip.white=T, dec = ".")

#to define the trait vector, you need to sort the data as in the tree
rownames(xdata)=xdata$species
xdata=xdata[tree$tip.label,]
x=xdata$lg.body
names(x)=rownames(xdata)

Ancestral state estimation and ploting

fit<-fastAnc(tree,x,vars=TRUE,CI=TRUE)
print(fit,printlen=10)
## Ancestral character estimates using fastAnc:
##        87       88       89       90       91       92       93       94
##  7.452362 7.800204 8.711863 8.614069 8.294638 8.006528 7.971666 8.022384
##       95       96     
##  8.03683 7.980642 ....
## 
## Variances on ancestral states:
##        87       88       89       90       91       92       93       94
##  1.555912 0.803708 0.449232 0.276079 0.161237 0.133447 0.127501 0.104562
##        95       96     
##  0.101878 0.111492 ....
## 
## Lower & upper 95% CIs:
##       lower     upper
## 87 5.007533  9.897191
## 88 6.043069  9.557339
## 89 7.398178 10.025548
## 90 7.584223  9.643915
## 91 7.507614  9.081662
## 92 7.290532  8.722525
## 93 7.271803  8.671529
## 94 7.388598   8.65617
## 95 7.411231  8.662429
## 96 7.326189  8.635095
##        ....      ....

Inspect tho output and check the precision of estimate at the root

range(x); mean(x)
## [1]  4.056817 11.613533
## [1] 7.899106
fit$CI95[1,]
## [1] 5.007533 9.897191

Create a plot to visualise trait evolution along the tree

to_plot<-contMap(tree,x,plot=F)
plot(to_plot,type="fan",legend=0.7*max(nodeHeights(tree)),sig=2,fsize=c(0.7,0.9))
plot of chunk unnamed-chunk-17

Look it in morphosace

phenogram(tree,x)
## Optimizing the positions of the tip labels...
plot of chunk unnamed-chunk-18

We may not have paleontological data to check how well the ancestral state reconstruction performed. To check the reliability of the method we are gonna look at some simulated data with known ancestral states

#simulation part
nspec=100
tree<-pbtree(n=nspec,scale=1)
x1<-fastBM(tree,internal=TRUE)
a<-x1[as.character(1:tree$Nnode+Ntip(tree))] #these will be the true ancestral states
x1<-x1[tree$tip.label]   #this is the tip data

#estimate ancestral states
fit=ace(x1, tree) #from ape
plot(a,fit$ace,xlab="True states",ylab="Estimated states",bg="black",cex=1.4,pch=21)
lines(range(c(x,a)),range(c(x,a)),lty="dashed",col="red") ## 1:1 line
segments(x0=a, y0=fit$CI95[,1], x1=a, y1=fit$CI95[,2])
plot of chunk unnamed-chunk-19

Exercise: how would you interpret this outcome

Discrete characters

We will use real data for this exercise (female song in Estrildid finches)

#load tree and data
trees=read.nexus("~/Desktop/data/Estrildid/23755.nex")
xdata=read.table(file="~/Desktop/data/Estrildid/dt(85species).txt",header=T,sep="\t",fill=T, strip.white=T, dec = ".")

tree=trees[[1]] #this is a multitree object, we will only need one 

#we need to create the trait vector with appropriate sorting
rownames(xdata)=xdata$name
xdata=xdata[tree$tip.label,]
x=xdata$Fsongbi
names(x)=rownames(xdata)

plot the tree with tip values, then estimate ancestral states and also add on figure

#plot tree and tips
plotTree(tree, type = "fan", fsize = 0.7, ftype="i")
cols <- setNames(palette()[1:length(unique(x))], sort(unique(x)))
tiplabels(pie = x, piecol = cols, cex = 0.2)

#fit ancestral states and add to the nodes
fit=ace(x, tree, model = "ER")
#fit=rerootingMethod(tree, x, model = "ER") if you want marginal likelihoods
nodelabels(node = as.numeric(names(fit$ace)), pie = fit$ace, piecol = cols, cex = 0.5)
add.simmap.legend(leg= c("song", "no song"),colors = cols, prompt = FALSE, x = 0.9 * par()$usr[1], y = -max(nodeHeights(tree)))
plot of chunk unnamed-chunk-21

using the same data we will perform stochastic character mapping

#single map
mtree <- make.simmap(tree, x, model = "ER")
## make.simmap is sampling character histories conditioned on the transition matrix
## 
## Q =
##             0           1
## 0 -0.03429279  0.03429279
## 1  0.03429279 -0.03429279
## (estimated using likelihood);
## and (mean) root node prior probabilities
## pi =
##   0   1 
## 0.5 0.5
## Done.
plotSimmap(mtree, cols, type = "fan", fsize = 0.9, ftype = "i")
plot of chunk unnamed-chunk-22
#simulate several and make inference from them
mtrees <- make.simmap(tree, x, model = "ER", nsim = 25)
## make.simmap is sampling character histories conditioned on the transition matrix
## 
## Q =
##             0           1
## 0 -0.03429279  0.03429279
## 1  0.03429279 -0.03429279
## (estimated using likelihood);
## and (mean) root node prior probabilities
## pi =
##   0   1 
## 0.5 0.5
## Done.
par(mfrow = c(5, 5))
plotSimmap(mtrees, cols, lwd = 1, ftype = "off")
plot of chunk unnamed-chunk-22
par(mfrow = c(1, 1))
pd <- describe.simmap(mtrees, plot = F)
plotSimmap(mtree, cols, type = "fan", fsize = 0.9, ftype = "i")
nodelabels(pie = pd$ace, piecol = cols, cex = 0.5)
add.simmap.legend(leg= c("no song", "song"),colors = cols, prompt = FALSE, x = 0.9 * par()$usr[1], y = -max(nodeHeights(tree)))
plot of chunk unnamed-chunk-22