#======================================================================
# Program name:
# matrogram
# Summary:
# S-Plus program for a two-dimensional representation of a dendrogram.
# Author:
# Marcel Hendrickx
# E-mail:
# hendrickx.marcel@compaqnet.be
# You may contact me when you have questions related to this
# contribution.
# Creation date:
# 19/01/2000
# Copyright:
# This software can be freely distributed. It may be used for
# commercial as well as non-commercial purposes.
#
#======================================================================
# Contents of this file:
# This file is a text file. It contains 1) a description of how
# a dendrogram can be represented as a matrix, 2) the S-Plus
# functions to do so and 3) a demo program.
# The file can be directly executed with S-Plus. Just type
# 'source(filename of this file)' at the S-Plus command line.
# The demo program generates 8 plots. After each plot you must enter
# a return to continue. Set first the colors to 'Black on White').
# The first plot shows the dendrogram for the swiss data set, that
# is included in the S-Plus distribution. The other 7 plots are
# matrograms for this dendrogram.
#
#======================================================================
# Matrograms: a two dimensional representation for dendrograms.
#
# Suppose we have a binary tree. At the highest level we have
#
# T = (T1,T2)
#
# where T1 and T2 are binary subtrees. This tree can be represented
# as a matrix
#
# --------- ---------
# | M1 | | | | M1 |
# M = |---------| or M = |---------|
# | | M2 | | M2 | |
# --------- ---------
#
# where M1 and M2 are the matrix representations of the subtrees.
# If we construct M1 and M2 in the same way as we constructed M
# then it can easily be seen that we arrive at diagonal matrices.
# In the first case we obtain a matrix with only elements on the
# first diagonal (we represent end nodes of the tree as 'x' in the
# matrix). In the second case we obtain a matrix with only elements
# on the second diagonal. This is obviously not what we want.
# The solution is to use alternatively the first and the second
# construction.
#
# Suppose we have the following dendrogram, which is a perfectly
# binary tree:
#
# _______|_______
# ___|___ ___|___
# _|_ _|_ _|_ _|_
# | | | | | | | |
# 1 2 3 4 5 6 7 8
#
#
# At the highest level we apply the second construction:
# we use the upper right and lower left boxes and leave the upper
# left and lower right boxes empty. At the second level we apply
# the first construction: we use the upper left and lower right
# (sub)boxes and leave the upper right and lower left (sub)boxes
# empty. We then obtain the following matrogram:
#
# ------------------------
# 1 | x |
# 2 | x |
# 3 | x |
# 4 | x |
# 5 | x |
# 6 | x |
# 7 | x |
# 8 | x |
# ------------------------
# 6 5 8 7 2 1 4 3
#
# The numbers on the axes denote the objects.
#
# Instead of indicating an object with an 'x' and leaving the rest
# of the matrix empty we could use grey values that represent the
# distance (or some other measure of correlation) between the objects.
# That is what the demo program does. The objects themselves
# (distance = 0) are then represented by black squares.
#
# It should be noted that the tree structure of the dendrogram can be
# recovered from the matrogram but not the heights of the branches.
#======================================================================
# Function name: traverse
#
# Parameters:
# merge: the result of applying hclust i.e. hclust()$merge
# (from S-PLUS help for hclust: 'an (n-1) by 2 matrix, if
# there were n objects in the original data. Row i of
# merge describes the merging of clusters at step i of
# the clustering. If an element j in the row is negative,
# then object -j was merged at this stage. If j is positive,
# then the merge was with the cluster formed at the
# (earlier) stage j of the algorithm.')
# index: merge[index,] represents the final cluster
# level: depth in the dendrogram, starts at 1
# res: integer vector of length n-1
# res[1:(count-1)] = list of objects already found
# count: number of objects already added to the output list
#
# Return value:
# traverse()$res[1:(traverse$count-1)]=
# final list of objects = numbers as they appear on the
# vertical axis of the matrogram
#
# Synopsis:
# The tree is traversed top down and alternatively right-left
# and left-right. When a leaf is encountered, the object is
# assigned to res[count] and count is incremented by 1.
#
#======================================================================
traverse <- function(merge,index,res,count,level)
{
if(index < 0)
{ # 'index' represents the object with sequence number (-index)
# append the object to the result array and return
res[count] <- -index
count <- count + 1
list(res = res, count = count)
}
else
{ # 'index' represents a cluster
# merge[index,1] represents the left subtree
# merge[index,2] represents the right subtree
if (level%%2 == 0)
{ # level=even: traverse first right subtree and then left subtree
result <- traverse(merge,merge[index,2],res,count,level+1)
result <- traverse(merge,merge[index,1],result$res,result$count,level+1)
}
else
{ # level=uneven: traverse first left subtree and then right subtree
result <- traverse(merge,merge[index,1],res,count,level+1)
result <- traverse(merge,merge[index,2],result$res,result$count,level+1)
}
list(res = result$res, count = result$count)
}
}
#======================================================================
# Function name: dist2full
#
# Parameters:
# dis: a distance structure from S-PLUS like swiss.x from the
# swiss dataset
#
# Return value:
# a matrix
#
# Synopsis:
# The function converts a distance structure to a matrix.
# The code is taken from the S-PLUS help on 'dist'.
#
#======================================================================
dist2full <- function(dis)
{
n <- attr(dis, "Size")
full <- matrix(0, n, n)
full[lower.tri(full)] <- dis
full + t(full)
}
#======================================================================
# Function name: compl
#
# Parameters:
# m: a matrix with grey values
#
# Return value:
# matrix with the complementary grey values
# (black->white and white->black)
#
#======================================================================
compl <- function(m)
{
max(m)-m
}
#======================================================================
# Function name: displayobjects
#
# Parameters:
# hor : list of objects as they appear on the horizontal axis
# vert: list of objects as they appear on the vertical axis
#
# Synopsis:
# Displays a two-dimensional plot of the objects.
#
#======================================================================
displayobjects <- function(hor,vert)
{
# open window
win.graph()
# plot the labels
plot(hor,vert,type="n",axes=F)
n <- length(hor)
axis(1,at=1:n,labels=order(hor),cex=0.8)
axis(2,at=1:n,labels=order(vert),cex=0.8)
# plot the object numbers
text(hor,vert,cex=0.8)
}
#======================================================================
# Function name: displayimage
#
# Parameters:
# mat : matrix of distances, output of dist2full
# hor : list of objects as they appear on the horizontal axis
# vert: list of objects as they appear on the vertical axis
#
# Synopsis:
# The matrix contains the distances between the objects:
# mat[i,j] is the distance between object i and object j. These
# distances are displayed as grey values. Black corresponds to a
# distance=0 and thus represents the objects themselves.
#
# Remark:
# Use 'Black on White' colors.
#
#======================================================================
displayimage <- function(mat,hor,vert)
{
# open window
win.graph()
# display image
image(mat[order(hor),order(vert)],axes=F)
# plot the labels
plot(hor,vert,type="n",axes=F)
n <- length(hor)
axis(1,at=1:n,labels=order(hor),cex=0.8)
axis(2,at=1:n,labels=order(vert),cex=0.8)
}
#======================================================================
# Function name: test
#
# Synopsis:
# This demo program performs a clustering on the swiss dataset,
# converts the resulting dendrogram to a matrogram and displays
# the matrogram. The first diagram displays the numbered objects.
# The others diagrams display the matrogram as a grey level image
# where the grey level corresponds to the distance between objects.
# The first image displays the objects as black squares with the
# rest of the matrogram blanc. In this diagram the objects are
# clearly visible but the relation (i.e. distance) between the
# objects is not. The last diagram displays the distances as
# grey levels. The black boxes (distance=0) represent the objects.
# Displaying the distances makes it more easy to evaluate the
# quality of the clustering. A good clustering gives dark regions
# (small distances between the objects of one cluster or region)
# separated by white space (large distances between objects of
# different clusters or regions). The other diagrams use a
# threshold to eliminate 'noise'. (I'm not sure this makes sense
# but it looks interesting to me).
#
# Remark:
# Use 'Black on White' colors
#
#======================================================================
test <- function()
{
# cluster swiss dataset
h <- hclust(dist(swiss.x),method="connected")
# create matrogram
# order(h$order) lists the objects in the same order as they appear
# in the dendrogram plcust(h)
hor <- order(h$order)
res <- hor
vert <- order(traverse(h$merge,length(hor)-1,res,1,1)$res)
# convert distance function to matrix
swiss.mat <- dist2full(dist(swiss.x))
# display the dendrogram
win.graph()
plclust(h)
cat("Enter return to continue")
readline()
# display numbered objects
displayobjects(hor,vert)
cat("Enter return to continue")
readline()
# display greylevel images with different thresholds
# for technical reasons the threshold is applied to the
# complement of the image
threshold <- max(swiss.mat)
while (threshold > 0)
{
mat <- compl(compl(swiss.mat)*(compl(swiss.mat) >= threshold))
threshold <- threshold - 30
displayimage(mat,hor,vert)
cat("Enter return to continue")
readline()
}
}
#======================================================================
# Execute demo
#======================================================================
test()