Phylogenetic tree

1) Install ape R package

# update all installed R packages

update.packages()

# download and install the R ape package

install.packages('ape')

2) Get pairwise distances between taxa

# activate ape package

library(ape)

# read phylogenetic tree from file (Newick format)

mytree <- read.tree('mytree.tre')

# or, create simple example tree

mytree <- read.tree(text='((A:3.7,B:3.7):4.5,C:8.2);')

# get pairwise taxa-taxa distance matrix

d=cophenetic(mytree)

d

A B C

A 0.0 7.4 16.4

B 7.4 0.0 16.4

C 16.4 16.4 0.0

# print distance matrix 'd' to text file

write.table(d, file = 'taxa_pairwise_dist.txt',sep = '\t', quote = FALSE, col.names=NA)

3) Plot tree

# plot phylogenetic tree

plot(mytree, edge.width = 2)

# save figure to file myfigure.pdf

dev.copy(pdf, 'myfigure.pdf', width = 6, height = 4)

dev.off()

add.scale.bar() # add tree distance scale bar

tiplabels() # show tip IDs

nodelabels() # show node IDs

4) Delete nodes from tree

# get taxa names

mytree$tip.label

[1] "A" "B" "C"

# remove taxa 'A' and 'C'

pruned_tree = drop.tip(mytree, c('A','C'))

pruned_tree$tip.label

[1] "B"

# keep taxa 'A' and 'C', remove all other leafs (tree tips)

keep_taxa <- c('A','C')

setdiff(mytree$tip.label, keep_taxa)

remove_taxa = setdiff(mytree$tip.label, keep_taxa) # prepare taxa list to delete from tree (taxa: B)

pruned_tree = drop.tip(mytree, remove_taxa)

pruned_tree$tip.label

[1] "A" "C"

5) Write tree to file

write.tree(mytree)

[1] "((A:3.7,B:3.7):4.5,C:8.2);"

write.tree( mytree, file = 'mytree.tre') # write file mytree.tre (Newick file format)

write.nexus(mytree, file = 'mytree.nex') # write file mytree.tre (NEXUS file format)

6) Get tree statistics

summary(mytree)

Phylogenetic tree: mytree

Number of tips: 3

Number of nodes: 2

Branch lengths:

mean: 5.025

variance: 4.6225

distribution summary:

Min. 1st Qu. Median 3rd Qu. Max.

3.700 3.700 4.100 5.425 8.200

No root edge.

Tip labels: A

B

C

No node labels.

# total tree size: sum of all edge lengths

sum(mytree$edge.length)

[1] 20.1

7) Re-root tree

tree_rerooted = root(mytree,'A')

plot(tree_rerooted)

8) Get inner-node names (branch or clade names)

clades = mytree$node.label

clades[clades != ""] # remove empty node-names

[1] "Bacteria" "Archaea" "Eukarya"

9) Extract a subtree

# Select the clade or branch rooted by inner node-label 'Archaea'

tree_branch = extract.clade(mytree, 'Archaea')

10) Add new taxa (new tip / tree-leaf)

# find inner node number

plot(mytree)

nodelabels() # show inner node IDs

# add new leaf 'X' to inner node number 5

library(phytools)

newtree = bind.tip(mytree, 'X', edge.length=3.7, where=5, position=0)

# X - new leaf name (new tip-label)

# where=5 - attached to inner node number 5 (num tips (3) + second (2) inner node = 5)

# edge.length=3.7 - length between inner node 5 and new leaf X

# position=0 - branch at same level as inner node (>0 branch more close to parent nodes)


# add new leaf to a labeled node

# add label 'Archaea' to second inner node

mytree$node.label[2] = 'Archaea'

mytree$node.label[is.na(mytree$node.label)] = '' # remove 'NA's

write.tree(mytree)

[1] "((A:3.7,B:3.7)Archaea:4.5,C:8.2);"

# add new leaf 'X' to inner node 'Archaea'

nodeID = length(mytree$tip) + which(mytree$node.label == 'Archaea') # nodeID=5 (2+3tips)

newtree = bind.tip(mytree, 'X', edge.length=3.7, where=nodeID, position=0)

write.tree(newtree)

[1] "((X:3.7,A:3.7,B:3.7)Archaea:4.5,C:8.2);"

plot(newtree)


read more

http://ape-package.ird.fr/ape_installation.html

http://www.phytools.org/eqg/Exercise_3.2/

https://rstudio-pubs-static.s3.amazonaws.com/144342_acf2743a187d47589d11e85be28206ed.html

https://www.r-phylo.org/wiki/HowTo/DataTreeManipulation

http://schmitzlab.info/phylo.html


https://cran.r-project.org/web/packages/dendextend/vignettes/Cluster_Analysis.html

http://www.statmethods.net/advstats/cluster.html

https://rpubs.com/gaston/dendrograms