###### published on:
#   http://www.r-statistics.com/2010/06/clustergram-visualization-and-diagnostics-for-cluster-analysis-r-code/
## Main author of the function:  Tal Galili (tal.galili@gmail.com)


clustergram.kmeans <- function(Data, k, ...)
{
	# this is the type of function that the clustergram
	# 	function takes for the clustering.
	# 	using similar structure will allow implementation of different clustering algorithms
 
	#	It returns a list with two elements:
	#	cluster = a vector of length of n (the number of subjects/items)
	#				indicating to which cluster each item belongs.
	#	centers = a k dimensional vector.  Each element is 1 number that represent that cluster
	#				In our case, we are using the weighted mean of the cluster dimensions by 
	#				Using the first component (loading) of the PCA of the Data.
 
	cl <- kmeans(Data, k,...)
 
	cluster <- cl$cluster
	centers <- cl$centers %*% princomp(Data)$loadings[,1]	# 1 number per center
												# here we are using the weighted mean for each
 
	return(list(
				cluster = cluster,
				centers = centers
			))
}		
 
clustergram.plot.matlines <- function(X,Y, k.range, 
											x.range, y.range , COL, 
											add.center.points , centers.points)
	{
		plot(0,0, col = "white", xlim = x.range, ylim = y.range,
			axes = F,
			xlab = "Number of clusters (k)", ylab = "PCA weighted Mean of the clusters", main = c("Clustergram of the PCA-weighted Mean of" ,"the clusters k-mean clusters vs number of clusters (k)"))
		axis(side =1, at = k.range)
		axis(side =2)
		abline(v = k.range, col = "grey")
 
		matlines(t(X), t(Y), pch = 19, col = COL, lty = 1, lwd = 1.5)
 
		if(add.center.points)
		{
			require(plyr)
 
			xx <- ldply(centers.points, rbind)
			points(xx$y~xx$x, pch = 19, col = "red", cex = 1.3)
 
			# add points	
			# temp <- l_ply(centers.points, function(xx) {
									# with(xx,points(y~x, pch = 19, col = "red", cex = 1.3))
									# points(xx$y~xx$x, pch = 19, col = "red", cex = 1.3)
									# return(1)
									# })
						# We assign the lapply to a variable (temp) only to suppress the lapply "NULL" output
		}	
	}
 
 
clustergram <- function(Data, k.range = 2:10 , 
							clustering.function = clustergram.kmeans,
							clustergram.plot = clustergram.plot.matlines, 
							line.width = .004, add.center.points = T)
{
	# Data - should be a scales matrix.  Where each column belongs to a different dimension of the observations
	# k.range - is a vector with the number of clusters to plot the clustergram for
	# clustering.function - this is not really used, but offers a bases to later extend the function to other algorithms 
	#			Although that would  more work on the code
	# line.width - is the amount to lift each line in the plot so they won't superimpose eachother
	# add.center.points - just assures that we want to plot points of the cluster means
 
	n <- dim(Data)[1]
 
	PCA.1 <- Data %*% princomp(Data)$loadings[,1]	# first principal component of our data
 
	if(require(colorspace)) {
			COL <- heat_hcl(n)[order(PCA.1)]	# line colors
		} else {
			COL <- rainbow(n)[order(PCA.1)]	# line colors
			warning('Please consider installing the package "colorspace" for prittier colors')
		}
 
	line.width <- rep(line.width, n)
 
	Y <- NULL	# Y matrix
	X <- NULL	# X matrix
 
	centers.points <- list()
 
	for(k in k.range)
	{
		k.clusters <- clustering.function(Data, k)
 
		clusters.vec <- k.clusters$cluster
			# the.centers <- apply(cl$centers,1, mean)
		the.centers <- k.clusters$centers 
 
		noise <- unlist(tapply(line.width, clusters.vec, cumsum))[order(seq_along(clusters.vec)[order(clusters.vec)])]	
		# noise <- noise - mean(range(noise))
		y <- the.centers[clusters.vec] + noise
		Y <- cbind(Y, y)
		x <- rep(k, length(y))
		X <- cbind(X, x)
 
		centers.points[[k]] <- data.frame(y = the.centers , x = rep(k , k))	
	#	points(the.centers ~ rep(k , k), pch = 19, col = "red", cex = 1.5)
	}
 
 
	x.range <- range(k.range)
	y.range <- range(PCA.1)
 
	clustergram.plot(X,Y, k.range, 
											x.range, y.range , COL, 
											add.center.points , centers.points)
 
 
}


if(F) {

#Examples:

png("d:\\clustergram_plots_%03d.png",650,650, pointsize  = 15)

data(iris)
set.seed(250)
par(cex.lab = 1.5, cex.main = 1.2)
Data <- scale(iris[,-5]) # notice I am scaling the vectors)
clustergram(Data, k.range = 2:8, line.width = 0.004) # notice how I am using line.width.  Play with it on your problem, according to the scale of Y.

set.seed(500)
Data <- scale(iris[,-5]) # notice I am scaling the vectors)
par(cex.lab = 1.2, cex.main = .7)
par(mfrow = c(3,2))
for(i in 1:6) clustergram(Data, k.range = 2:8 , line.width = .004, add.center.points = T)
par(mfrow = c(1,1))

set.seed(250)
Data <- rbind(
				cbind(rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3)),
				cbind(rnorm(100,1, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,1, sd = 0.3)),
				cbind(rnorm(100,2, sd = 0.3),rnorm(100,2, sd = 0.3),rnorm(100,2, sd = 0.3))
				)				
clustergram(Data, k.range = 2:5 , line.width = .004, add.center.points = T)

set.seed(250)
Data <- rbind(
				cbind(rnorm(100,1, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3)),
				cbind(rnorm(100,0, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3)),
				cbind(rnorm(100,0, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,0, sd = 0.3)),
				cbind(rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,1, sd = 0.3))
				)				
clustergram(Data, k.range = 2:8 , line.width = .004, add.center.points = T)


dev.off()
}