Phylogenetic tree
# update all installed R packages
update.packages()
# download and install the R ape package
install.packages('ape')
2) Get pairwise distances between taxa
# activate ape package
library(ape)
# read phylogenetic tree from file (Newick format)
mytree <- read.tree('mytree.tre')
# or, create simple example tree
mytree <- read.tree(text='((A:3.7,B:3.7):4.5,C:8.2);')
# get pairwise taxa-taxa distance matrix
d=cophenetic(mytree)
d
A B C
A 0.0 7.4 16.4
B 7.4 0.0 16.4
C 16.4 16.4 0.0
# print distance matrix 'd' to text file
write.table(d, file = 'taxa_pairwise_dist.txt',sep = '\t', quote = FALSE, col.names=NA)
3) Plot tree
# plot phylogenetic tree
plot(mytree, edge.width = 2)
# save figure to file myfigure.pdf
dev.copy(pdf, 'myfigure.pdf', width = 6, height = 4)
dev.off()
add.scale.bar() # add tree distance scale bar
tiplabels() # show tip IDs
nodelabels() # show node IDs
4) Delete nodes from tree
# get taxa names
mytree$tip.label
[1] "A" "B" "C"
# remove taxa 'A' and 'C'
pruned_tree = drop.tip(mytree, c('A','C'))
pruned_tree$tip.label
[1] "B"
# keep taxa 'A' and 'C', remove all other leafs (tree tips)
keep_taxa <- c('A','C')
setdiff(mytree$tip.label, keep_taxa)
remove_taxa = setdiff(mytree$tip.label, keep_taxa) # prepare taxa list to delete from tree (taxa: B)
pruned_tree = drop.tip(mytree, remove_taxa)
pruned_tree$tip.label
[1] "A" "C"
5) Write tree to file
write.tree(mytree)
[1] "((A:3.7,B:3.7):4.5,C:8.2);"
write.tree( mytree, file = 'mytree.tre') # write file mytree.tre (Newick file format)
write.nexus(mytree, file = 'mytree.nex') # write file mytree.tre (NEXUS file format)
6) Get tree statistics
summary(mytree)
Phylogenetic tree: mytree
Number of tips: 3
Number of nodes: 2
Branch lengths:
mean: 5.025
variance: 4.6225
distribution summary:
Min. 1st Qu. Median 3rd Qu. Max.
3.700 3.700 4.100 5.425 8.200
No root edge.
Tip labels: A
B
C
No node labels.
# total tree size: sum of all edge lengths
sum(mytree$edge.length)
[1] 20.1
7) Re-root tree
tree_rerooted = root(mytree,'A')
plot(tree_rerooted)
8) Get inner-node names (branch or clade names)
clades = mytree$node.label
clades[clades != ""] # remove empty node-names
[1] "Bacteria" "Archaea" "Eukarya"
9) Extract a subtree
# Select the clade or branch rooted by inner node-label 'Archaea'
tree_branch = extract.clade(mytree, 'Archaea')
10) Add new taxa (new tip / tree-leaf)
# find inner node number
plot(mytree)
nodelabels() # show inner node IDs
# add new leaf 'X' to inner node number 5
library(phytools)
newtree = bind.tip(mytree, 'X', edge.length=3.7, where=5, position=0)
# X - new leaf name (new tip-label)
# where=5 - attached to inner node number 5 (num tips (3) + second (2) inner node = 5)
# edge.length=3.7 - length between inner node 5 and new leaf X
# position=0 - branch at same level as inner node (>0 branch more close to parent nodes)
# add new leaf to a labeled node
# add label 'Archaea' to second inner node
mytree$node.label[2] = 'Archaea'
mytree$node.label[is.na(mytree$node.label)] = '' # remove 'NA's
write.tree(mytree)
[1] "((A:3.7,B:3.7)Archaea:4.5,C:8.2);"
# add new leaf 'X' to inner node 'Archaea'
nodeID = length(mytree$tip) + which(mytree$node.label == 'Archaea') # nodeID=5 (2+3tips)
newtree = bind.tip(mytree, 'X', edge.length=3.7, where=nodeID, position=0)
write.tree(newtree)
[1] "((X:3.7,A:3.7,B:3.7)Archaea:4.5,C:8.2);"
plot(newtree)
read more
http://ape-package.ird.fr/ape_installation.html
http://www.phytools.org/eqg/Exercise_3.2/
https://rstudio-pubs-static.s3.amazonaws.com/144342_acf2743a187d47589d11e85be28206ed.html
https://www.r-phylo.org/wiki/HowTo/DataTreeManipulation
http://schmitzlab.info/phylo.html
https://cran.r-project.org/web/packages/dendextend/vignettes/Cluster_Analysis.html