library(phytools) library(car) library(mnormt) library(caper) library(geiger) library(nlme) library(phylolm) library(phangorn) library(castor) library(diversitree) library(mvSLOUCH)
This little simulation shows how BM generates normal distribution and covariance
tree<-pbtree(n=6,tip.label=LETTERS[1:6]) X<-fastBM(tree,nsim=100) plot(rotateNodes(tree,"all"),edge.width=2,no.margin=TRUE)
scatterplotMatrix(t(X))
par(mfrow=c(1,2)) hist(t(X)[,"A"], main="variance across simulations") hist(X[,1], main="variance across species")
We will simulate data under different models of trait evolution for a predefined number of species
nspec=100 tree<-pbtree(n=nspec,scale=100) x1<-fastBM(tree,a=2,sig2=0.01, internal=TRUE) #BM model x2<-fastBM(tree,a=2,sig2=0.01, mu=0.05, internal=TRUE) #BM with a trend x3<-fastBM(tree,a=2,theta=2,alpha=0.8,sig2=0.1, internal=TRUE, model = "OU") #OU model par(mfrow=c(1,3)) phenogram(tree,x1, spread.labels=F, ylim=c(0, max(as.numeric(x3),as.numeric(x2),as.numeric(x1))), main="BM") phenogram(tree,x2, spread.labels=F, ylim=c(0, max(as.numeric(x3),as.numeric(x2),as.numeric(x1))), main="BM with trend") phenogram(tree,x3, spread.labels=F, ylim=c(0, max(as.numeric(x3),as.numeric(x2),as.numeric(x1))), main="OU")
check out how different models fit with the simulated data
fit1=fitContinuous(tree,x1[1:nspec]) fit2=fitContinuous(tree,x1[1:nspec],model="trend") fit3=fitContinuous(tree,x1[1:nspec],model="OU") setNames(c(fit1$opt$aicc,fit2$opt$aicc,fit3$opt$aicc),c("BM","trend","OU"))
## BM trend OU ## 136.8844 138.9788 139.0106
Now we are going to work with the Pagel's parameters. Let us start with lambda
nspec=100 tree<-pbtree(n=nspec,scale=100) ##transform lambda tree1=rescale(tree, "lambda", 0) tree2=rescale(tree, "lambda", 0.5) #plot par(mfrow=c(1,3)) plot(tree1, edge.width=1.5, cex=1.2); mtext("Lambda = 0") plot(tree2, edge.width=1.5, cex=1.2); mtext("Lambda = 0.5") plot(tree, edge.width=1.5, cex=1.2); mtext("Lambda = 1 (default)")
#simulate data x<-fastBM(tree,a=0,sig2=0.01, internal=F) x1<-fastBM(tree1,a=0,sig2=0.01, internal=F) x2<-fastBM(tree2,a=0,sig2=0.01, internal=F) #estimate lambda for the simulated data fitContinuous(tree,x, model="lambda")$opt$lambda
## [1] 0.9979673
fitContinuous(tree,x1, model="lambda")$opt$lambda
## [1] 4.160831e-217
fitContinuous(tree,x2, model="lambda")$opt$lambda
## [1] 0.8445418
To draw likelihood surfaces, we will fit PGLS models with intercept only
#check likelihood surface xdata=data.frame(x) xdata$species=rownames(xdata) tree.x=comparative.data(phy = tree, data = xdata, names.col =species) res<-pgls(x~ 1, data = tree.x, lambda='ML') xdata1=data.frame(x1) xdata1$species=rownames(xdata1) tree.x1=comparative.data(phy = tree, data = xdata1, names.col =species) res1<-pgls(x1~ 1, data = tree.x, lambda='ML') xdata2=data.frame(x2) xdata2$species=rownames(xdata2) tree.x2=comparative.data(phy = tree, data = xdata2, names.col =species) res2<-pgls(x2~ 1, data = tree.x2, lambda='ML') par(mfrow=c(1,3)) plot(pgls.profile(res, 'lambda')) plot(pgls.profile(res1, 'lambda')) plot(pgls.profile(res2, 'lambda'))
To continue with the phylogenetic signal we will estimate the K and lambda statsistic
phylosig(tree,x,method="lambda")$lambda
## [1] 0.9979644
phylosig(tree,x1,method="lambda")$lambda
## [1] 6.622674e-05
phylosig(tree,x2,method="lambda")$lambda
## [1] 0.8445479
phylosig(tree,x,method="K", test=T)
## $K ## [1] 0.9199781 ## ## $P ## [1] 0.001
phylosig(tree,x1,method="K", test=T)
## $K ## [1] 0.04061124 ## ## $P ## [1] 0.7
phylosig(tree,x2,method="K", test=T)
## $K ## [1] 0.1998206 ## ## $P ## [1] 0.001
This are very similar exercises focusing on kappa and delta
#delta transformations tree1=rescale(tree, "delta", 0.1) tree2=rescale(tree, "delta", 2) par(mfrow=c(1,3)) plot(tree1, edge.width=1.5, cex=1.2); mtext("delta = 0.1") plot(tree, edge.width=1.5, cex=1.2); mtext("delta = 1 (default)") plot(tree2, edge.width=1.5, cex=1.2); mtext("delta = 2")
#kappa transformations tree1=rescale(tree, "kappa", 0) tree2=rescale(tree, "kappa", 2) par(mfrow=c(1,3)) plot(tree1, edge.width=1.5, cex=1.2); mtext("kappa = 0") plot(tree, edge.width=1.5, cex=1.2); mtext("kappa = 1 (default)") plot(tree2, edge.width=1.5, cex=1.2); mtext("kappa = 2")
We will simulate data under different evolutionary scenarios then fit models to see how different parameters can be estimated
nspec=100 tree<-pbtree(n=nspec,scale=100) x1<-rTraitDisc(tree, model = "ER") x2<-rTraitDisc(tree, model = "SYM") x3<-rTraitDisc(tree, model = "ARD", rate=c(0.1,0.4)) fit1<-fitDiscrete(tree,x1,model="ER") fit2<-fitDiscrete(tree,x1,model="SYM") fit3<-fitDiscrete(tree,x1,model="ARD") fit4<-fitDiscrete(tree,x1,model=matrix(c(0,0,1,0),2,2))
## Warning in fitDiscrete(tree, x1, model = matrix(c(0, 0, 1, 0), 2, 2)): Parameter estimates appear at bounds: ## q21
par(mfrow=c(1,4)) plot(fit1, signif=5, main=paste("ER model, LL =", round(fit1$opt$lnL,3))) plot(fit2, signif=5, main=paste("SYM model, LL =", round(fit2$opt$lnL,3))) plot(fit3, signif=5, main=paste("ARD model, LL =", round(fit3$opt$lnL,3))) plot(fit4, signif=5, main=paste("IRR model, LL =", round(fit4$opt$lnL,3)))
#model comparison with AIC aicc<-setNames( c(fit1$opt$aicc,fit2$opt$aicc,fit3$opt$aicc,fit4$opt$aicc), c("ER","SYM","ARD","IRR")) aicc
## ER SYM ARD IRR ## 140.2142 140.2142 142.0018 214.2515
#model comparison with likelihood ratio test LRT=2*(fit1$opt$lnL-fit3$opt$lnL) pchisq(abs(LRT), 1, lower.tail=FALSE)
## [1] 0.5867853
LRT=2*(fit1$opt$lnL-fit4$opt$lnL) pchisq(abs(LRT), 2, lower.tail=FALSE)
## [1] 8.375725e-17
It is possible to model evolution of traits with k>2 states. We will still use fitDiscrete from geiger, but note that fitMk from phytools can also be used, which is actually faster. Below, you can set up the number of states then you appropriately define rates from a random (uniform) distribution between 0.1 and 1.
k=3 tree<-pbtree(n=nspec,scale=100) x1<-rTraitDisc(tree, k=k,model = "ER") x2<-rTraitDisc(tree, k=k, model = "SYM", rate=sample(seq(0.1, 1, by=0.1), k*(k - 1)/2)) x3<-rTraitDisc(tree, k=k, model = "ARD", rate=sample(seq(0.1, 1, by=0.1), k*(k - 1)))
See how different models fit with the simulated data.
fit1<-fitDiscrete(tree,x2,model="ER") fit2<-fitDiscrete(tree,x2,model="SYM") fit3<-fitDiscrete(tree,x2,model="ARD") par(mfrow=c(1,3)) plot(fit1, signif=5, main="ER") plot(fit2, signif=5, main="SYM") plot(fit3, signif=5, main="ARD")
Note 1: with castor you can generate Q matrix
Q = castor::get_random_mk_transition_matrix(k, rate_model="ER") tip_states = LETTERS[simulate_mk_model(tree, Q)$tip_states] names(tip_states)=tree$tip.label
Note 2: you can also define your own matrix and use evaluate fit
model<-matrix(c(0,1,0,0, #this will be an ordered matrix 2,0,3,0, 0,4,0,5, 0,0,6,0),4,4) x1<-rTraitDisc(tree, k=4,model = "ER") fit1<-fitDiscrete(tree,x1,model="ER") fit4<-fitDiscrete(tree,x1,model=model)
## Warning in fitDiscrete(tree, x1, model = model): Parameter estimates appear at bounds: ## q13 ## q14 ## q23 ## q24 ## q31 ## q34 ## q41 ## q42 ## q43
par(mfrow=c(1,2)) plot(fit1, signif=5, main="ER") plot(fit2, signif=5, main="ordered")
LRT=2*(fit1$opt$lnL-fit4$opt$lnL) pchisq(abs(LRT), 1, lower.tail=FALSE)
## [1] 0.02492401
We will use real data for this exercise (primate body mass)
#import tree and data tree=read.tree("~/Desktop/data/primate/primate_tree.phy") xdata=read.table(file="~/Desktop/data/primate/primate_spec.txt",header=T,sep="\t",fill=T, strip.white=T, dec = ".") #to define the trait vector, you need to sort the data as in the tree rownames(xdata)=xdata$species xdata=xdata[tree$tip.label,] x=xdata$lg.body names(x)=rownames(xdata)
Ancestral state estimation and ploting
fit<-fastAnc(tree,x,vars=TRUE,CI=TRUE) print(fit,printlen=10)
## Ancestral character estimates using fastAnc: ## 87 88 89 90 91 92 93 94 ## 7.452362 7.800204 8.711863 8.614069 8.294638 8.006528 7.971666 8.022384 ## 95 96 ## 8.03683 7.980642 .... ## ## Variances on ancestral states: ## 87 88 89 90 91 92 93 94 ## 1.555912 0.803708 0.449232 0.276079 0.161237 0.133447 0.127501 0.104562 ## 95 96 ## 0.101878 0.111492 .... ## ## Lower & upper 95% CIs: ## lower upper ## 87 5.007533 9.897191 ## 88 6.043069 9.557339 ## 89 7.398178 10.025548 ## 90 7.584223 9.643915 ## 91 7.507614 9.081662 ## 92 7.290532 8.722525 ## 93 7.271803 8.671529 ## 94 7.388598 8.65617 ## 95 7.411231 8.662429 ## 96 7.326189 8.635095 ## .... ....
Inspect tho output and check the precision of estimate at the root
range(x); mean(x)
## [1] 4.056817 11.613533
## [1] 7.899106
fit$CI95[1,]
## [1] 5.007533 9.897191
Create a plot to visualise trait evolution along the tree
to_plot<-contMap(tree,x,plot=F) plot(to_plot,type="fan",legend=0.7*max(nodeHeights(tree)),sig=2,fsize=c(0.7,0.9))
Look it in morphosace
phenogram(tree,x)
## Optimizing the positions of the tip labels...
We may not have paleontological data to check how well the ancestral state reconstruction performed. To check the reliability of the method we are gonna look at some simulated data with known ancestral states
#simulation part nspec=100 tree<-pbtree(n=nspec,scale=1) x1<-fastBM(tree,internal=TRUE) a<-x1[as.character(1:tree$Nnode+Ntip(tree))] #these will be the true ancestral states x1<-x1[tree$tip.label] #this is the tip data #estimate ancestral states fit=ace(x1, tree) #from ape plot(a,fit$ace,xlab="True states",ylab="Estimated states",bg="black",cex=1.4,pch=21) lines(range(c(x,a)),range(c(x,a)),lty="dashed",col="red") ## 1:1 line segments(x0=a, y0=fit$CI95[,1], x1=a, y1=fit$CI95[,2])
We will use real data for this exercise (female song in Estrildid finches)
#load tree and data trees=read.nexus("~/Desktop/data/Estrildid/23755.nex") xdata=read.table(file="~/Desktop/data/Estrildid/dt(85species).txt",header=T,sep="\t",fill=T, strip.white=T, dec = ".") tree=trees[[1]] #this is a multitree object, we will only need one #we need to create the trait vector with appropriate sorting rownames(xdata)=xdata$name xdata=xdata[tree$tip.label,] x=xdata$Fsongbi names(x)=rownames(xdata)
plot the tree with tip values, then estimate ancestral states and also add on figure
#plot tree and tips plotTree(tree, type = "fan", fsize = 0.7, ftype="i") cols <- setNames(palette()[1:length(unique(x))], sort(unique(x))) tiplabels(pie = x, piecol = cols, cex = 0.2) #fit ancestral states and add to the nodes fit=ace(x, tree, model = "ER") #fit=rerootingMethod(tree, x, model = "ER") if you want marginal likelihoods nodelabels(node = as.numeric(names(fit$ace)), pie = fit$ace, piecol = cols, cex = 0.5) add.simmap.legend(leg= c("song", "no song"),colors = cols, prompt = FALSE, x = 0.9 * par()$usr[1], y = -max(nodeHeights(tree)))
using the same data we will perform stochastic character mapping
#single map mtree <- make.simmap(tree, x, model = "ER")
## make.simmap is sampling character histories conditioned on the transition matrix ## ## Q = ## 0 1 ## 0 -0.03429279 0.03429279 ## 1 0.03429279 -0.03429279 ## (estimated using likelihood); ## and (mean) root node prior probabilities ## pi = ## 0 1 ## 0.5 0.5
plotSimmap(mtree, cols, type = "fan", fsize = 0.9, ftype = "i")
#simulate several and make inference from them mtrees <- make.simmap(tree, x, model = "ER", nsim = 25)
## make.simmap is sampling character histories conditioned on the transition matrix ## ## Q = ## 0 1 ## 0 -0.03429279 0.03429279 ## 1 0.03429279 -0.03429279 ## (estimated using likelihood); ## and (mean) root node prior probabilities ## pi = ## 0 1 ## 0.5 0.5
par(mfrow = c(5, 5)) plotSimmap(mtrees, cols, lwd = 1, ftype = "off")
par(mfrow = c(1, 1)) pd <- describe.simmap(mtrees, plot = F) plotSimmap(mtree, cols, type = "fan", fsize = 0.9, ftype = "i") nodelabels(pie = pd$ace, piecol = cols, cex = 0.5) add.simmap.legend(leg= c("no song", "song"),colors = cols, prompt = FALSE, x = 0.9 * par()$usr[1], y = -max(nodeHeights(tree)))