Bioinformatics‎ > ‎

Phylogenetic tree

1) Install ape R package

# update all installed R packages
update.packages()

# download and install the R ape package
install.packages('ape')

2) Get pairwise distances between taxa

# activate ape package
library(ape)

# Read phylogenetic tree from file (Newick format)
mytree <- read.tree('mytree.tre')

# or, create simple example tree
mytree <- read.tree(text='((A:3.7,B:3.7):4.5,C:8.2);')


# get pairwise taxa-taxa distance matrix
d=cophenetic(mytree)
d
      A    B    C
  A  0.0  7.4 16.4
  B  7.4  0.0 16.4
  C 16.4 16.4  0.0


# print distance matrix d to text file
write.table(d, file = 'taxa_pairwise_dist.txt',sep = '\t', quote = FALSE, col.names=NA)

3) Plot tree


# plot phylogenetic tree
plot(mytree, edge.width = 2)
# save figure to file myfigure.pdf
dev.copy(pdf, 'myfigure.pdf', width = 6, height = 4)
dev.off()



add.scale.bar() # add tree distance scale bar
tiplabels()     # show tip IDs
nodelabels()    # show node IDs

4) Delete nodes from tree


# get taxa names
mytree$tip.label
 [1] "A" "B" "C"

# remove taxa 'A' and 'C'
pruned_tree = drop.tip(mytree, c('A','C'))
pruned_tree$tip.label
 [1] "B"

# keep taxa 'A' and 'C', remove all other leafs (tree tips)
keep_taxa <- c('A','C')
setdiff(mytree$tip.label, keep_taxa)
remove_taxa = setdiff(mytree$tip.label, keep_taxa)  # get set of taxa to delete from tree (taxa: B)
pruned_tree = drop.tip(mytree, remove_taxa)
pruned_tree$tip.label
 [1] "A" "C"

5) Write tree to file

write.tree(mytree)
 [1] "((A:3.7,B:3.7):4.5,C:8.2);"
write.tree( mytree, file = 'mytree.tre') # write file mytree.tre (Newick file format)
write.nexus(mytree, file = 'mytree.nex') # write file mytree.tre (NEXUS file format)

6) Get tree statistics


summary(mytree)
Phylogenetic tree: mytree

  Number of tips: 3
  Number of nodes: 2
  Branch lengths:
    mean: 5.025
    variance: 4.6225
    distribution summary:
   Min. 1st Qu.  Median 3rd Qu.    Max.
  3.700   3.700   4.100   5.425   8.200
  No root edge.
  Tip labels: A
              B
              C
  No node labels.


# total tree size: sum of all edge lengths
sum(mytree$edge.length)
 [1] 20.1

7) Re-root tree

tree_rerooted = root(mytree,'A') 
plot(tree_rerooted)

8) Get inner-node names (branch or clade names)


clades = mytree$node.label
clades[clades != ""]  # remove empty node-names
 [1] "Bacteria"  "Archaea"  "Eukarya"

9) Extract a subtree (the clade or branch rooted by inner node-label 'Archaea')


tree_branch = extract.clade(mytree, 'Archaea')


10) Add new taxa (new tip / tree-leaf)

# find inner node number
plot(mytree)
nodelabels() # show inner node IDs

# add new leaf 'X' to inner node number 5
library(phytools)
newtree = bind.tip(mytree, 'X', edge.length=3.7, where=5, position=0)

  # X - new leaf name (new tip-label)
  # where=5  - attached to inner node number 5 (num tips (3) + second (2) inner node = 5)
  # edge.length=3.7   - length between inner node 5 and new leaf X
  # position=0 - branch at same level as inner node (>0 branch more close to parent nodes)  


# add new leaf to a labeled node

# add label 'Archaea' to second inner node
mytree$node.label[2] = 'Archaea'
mytree$node.label[is.na(mytree$node.label)] = ''  # remove 'NA's
write.tree(mytree)
 [1] "((A:3.7,B:3.7)Archaea:4.5,C:8.2);"

# add new leaf to inner node 'Archaea'
nodeID  = length(mytree$tip) + which(mytree$node.label == 'Archaea') # nodeID=5 (2+3tips)
newtree = bind.tip(mytree, 'X', edge.length=3.7, where=nodeID, position=0)
write.tree(newtree)
 [1] "((X:3.7,A:3.7,B:3.7)Archaea:4.5,C:8.2);"

plot(newtree)







read more