#Input:  (1) A binary vector of length m (binary.vec), (2) a natural number k (start.site).
#Output:  a binary vector of length m (output.vec) that has only one contiguous string of 1's, namely the one that 
#appeared in binary.vec at start.site.
#Description:  For convenience we write k instead of start.site.  The kth entry of output.vec is 
#defined to be 1.  Starting at j = k + 1 and continuing for all j > k, the jth entry of output.vec 
#is 1 if and only if (1) the jth entry of binary.vec is 1, and (2) for all l such that k <= l <= j, 
#the lth entry of binary.vec is 1.  The case j < k is handled similarly.
#Requires these functions:  none.
#Is called by these functions:  peeling.
zero.make = function(binary.vec, start.site)
	{
	#Constants and vectors used in the function
	m = length(binary.vec)
	output.vec = rep(0, m)

	#Define the entries of output.vec
	#Move to the right of start.site
	temp.spot = start.site
	temp.value = binary.vec[temp.spot]
	while ((temp.value > 0) && (temp.spot < m))
		{
		output.vec[temp.spot] = 1
		temp.spot = temp.spot + 1
		temp.value = binary.vec[temp.spot]
		}
	if ((temp.spot == m) && (temp.value > 0))
		{
		output.vec[temp.spot] = 1
		}

	#Move to the left of start.site
	temp.spot = start.site
	temp.value = binary.vec[temp.spot]
	while ((temp.value > 0) && (temp.spot > 1))
		{
		output.vec[temp.spot] = 1
		temp.spot = temp.spot - 1
		temp.value = binary.vec[temp.spot]
		}
	if ((temp.spot == 1) && (temp.value > 0))
		{
		output.vec[temp.spot] = 1
		}
	return(output.vec)
	}

#Input:  (1) An m x r data frame of marker data (marker.data), and (2) a four-column cytoband annotation
#file (annot.file).
#Output:  A vector of length m that contains the cytoband location of each marker (output.vec).
#Description:  This function finds the cytoband location of each marker.
#Requires these functions:  none.
#Is called by these functions:  peeling.
make.cytoband = function(marker.data, annot.file)
	{
	#Constants and vectors used in the function
	n = dim(marker.data)[1]
	output.vec = rep(-23, n)

	#Find the cytoband for each marker based on its genomic position
	for (i in (1:n))
		{
		temp = as.numeric(marker.data[i, 1:2])
		chrom = temp[1]
		position = temp[2]
		vec1 = as.numeric(as.numeric(annot.file[,1]) == chrom)
		vec2 = as.numeric(as.numeric(annot.file[,2]) <= position)
		vec3 = as.numeric(as.numeric(annot.file[,3]) > position)
		annot.row = which((vec1 + vec2 + vec3) == 3)
		output.vec[i] = annot.file[annot.row, 4]
		}

	#Return the output
	return(output.vec)
	}

#Input:  (1) an n x m data matrix (x), (2) a natural number (num.perms)
#Output:  A num.perms x 2 matrix (output)
#Description:  This function uses num.perms iterations of the cyclic shift procedure to find an empirical null 
#distribution for T(X) = the maximum and minimum column sum of X.  The null distribution of the minimum column 
#sum appears in column 1 of output, whereas the null distribution for the maximum column sum appears in column 
#2 of output.
#Requires these functions:  none.
#Is called by these functions:  detailed.look, quick.look.
find.null = function(x, num.perms, random.seed)
	{
	#Set random seed, if apppropriate
	if (length(random.seed) > 0)
		{
		set.seed(random.seed)
		}

	#Constants and vectors used in the function
	n = dim(x)[1]
	m = dim(x)[2]
	shift.vec = rep(23, num.perms)

	#Perform the cyclic shift procedure, and record the minimum and maximum column sum after each
	#cyclic shift.
	for (j in (1:num.perms))
		{
		cutpoints = sample(c(1:m), size = n, replace = TRUE)
	 	perm.x = matrix(0, n, m)
	 	for (i in (1:n))
			{
			index = cutpoints[i]
			if (index == 1)
				{
				second = c()
				}
			if (index > 1)
				{
				second = x[i, 1:(index - 1)]
				}			
			first = x[i, index:m]
	  		perm.x[i,] = c(first, second)
	  		}
		perm.col.sums = colSums(perm.x)
		shift.vec[j] = max(colSums(perm.x))		
 		}

	#Define and return the output
	return(shift.vec)
	}

#Input:  (1) An n x m data matrix (x), (2) an m x r data frame of marker data (marker.data), 
#(3) a vector of length m (cytoband) that contains the cytoband for each marker, and
#(4) a natural number (k).
#Output:  A list containing the following items:  (1) An n x m data matrix, (2) the peak interval.
#Description:  This function applies the peeling procedure to the input data matrix x starting at 
#the most aberrant marker k.
#Requires these functions:  zero.make.
#Is called by these functions:  detailed.look, quick.look.
peeling = function(x, marker.data, cytoband, k)
	{
	n = dim(x)[1]
 	m = dim(x)[2]
	rowmeans = rowMeans(x)	
	grandmean = mean(rowmeans)
	
	#This section of code creates exceed, a binary n x chr.m matrix, where chr.m is the number of markers
	#in the chromosome containing k, the marker where the peeling procedure begins.  An entry of exceed is 1
	#if the corresponding entry of x belongs to the peak interval
	chr = marker.data[k, 1]
	chr.start = min(which(marker.data[,1] == chr))
	chr.end = max(which(marker.data[,1] == chr))
	chr.m =  chr.end - chr.start + 1
	chr.k = k - chr.start + 1
	chr.data = x[,chr.start:chr.end]
	col.means = colMeans(chr.data)
	col.indicator = as.numeric(col.means > grandmean)
	col.indicator = zero.make(col.indicator, chr.k)
	exceed = rep(1, n) %o% col.indicator
	which.band = substr(cytoband[k], 1, 1)
	band.vec = substr(cytoband[chr.start:chr.end], 1, 1)
	band.ind.vec = as.numeric(band.vec == which.band)
	band.matrix = rep(1, n) %o% band.ind.vec
	exceed = exceed * band.matrix
	
	#This section of code find the markers that make up the peak interval
	peak.interval = as.numeric(colSums(exceed) > 0)
	chr.left.side = min(which(peak.interval == 1))
	chr.right.side = max(which(peak.interval == 1))
	left.side = chr.left.side + chr.start - 1
	right.side = chr.right.side + chr.start - 1

	#These will be used below
 	columnmean = mean(chr.data[,chr.k])
	
	#This section of code creates exceed.running, a binary n x chr.m matrix.  An entry of exceed.running
	#is 1 if the corresponding entry of x will be peeled, otherwise it is zero.
 	exceed.running = matrix(0, n, chr.m)
 	exceed.running[,chr.k] = as.numeric(chr.data[,chr.k] > rowmeans)
	if (chr.k > 1)
		{
 		for (j in (chr.k - 1):1)
			{
  			exceed.running[,j] = exceed.running[,(j + 1)] * exceed[,j] * as.numeric(chr.data[,j] > rowmeans)
  			}
		}
	if (chr.k < chr.m)
		{
 		for (j in (chr.k + 1):chr.m)
			{
  			exceed.running[,j] = exceed.running[,(j - 1)] * exceed[,j] * as.numeric(chr.data[,j] > rowmeans)
  			}
		}

	#This section of code implements a multiplicative-based shrinkage approach at column
	not.imputed = chr.data * (1 - exceed.running)
	to.be.imputed = chr.data * exceed.running
	num.term1 = n * grandmean
	num.term2 = sum(not.imputed[,chr.k])
	num.term3 = sum(exceed.running[,chr.k] * rowmeans)
	den.term = sum(to.be.imputed[,chr.k]) - num.term3
	tau = (num.term1 - num.term2 - num.term3)/den.term
	imputed = tau * (to.be.imputed - (matrix(rowmeans, n, chr.m) * exceed.running)) + 
		(matrix(rowmeans, n, chr.m) * exceed.running)
	final.matrix = imputed + not.imputed
		
	#Create the output
	x[,chr.start:chr.end] = final.matrix
	output.list = list(x, c(left.side, right.side))	
		
	#Return the output
	return(output.list)
	}

#Input:  (1) An n x m data matrix (x), (2) a m x r data frame of marker data (marker.data) as defined in
#cytoband.function, (3) a cytoband annotation file (annot.file), (4) a natural number (num.perms), (5) a
#natural number (num.iters), (6) a character string - either "gain" or "loss" - used to indicate whether
#gains or losses, respectively, are analyzed, and (7) an optional random seed.
#Output:  A list containing two elements:  (1) a num.iters x (r + 4) data frame, and (2) an two-dimensional
#array of lists.  See DiNAMIC_README.pdf for more information about the output.
#Description:  This function applies the Detailed Look procedure to the input data matrix x.  Empirical
#null distributions of T(X) = maximum or minimum column sum are created based on num.perms cyclic shifts.
#The function finds the num.iters most aberrant markers, assesses their statistical significance, peels
#them, and finds the various peak intervals.  By default the function considers both the minimum and 
#maximum column sum (high.only = FALSE), but it can also assess the significance of only the maximum
#column sum (high.only = TRUE).
#Requires these functions:  find.null, marker.finder, peeling, zero.make, make.cytoband.
#Is called by these functions:  none.
detailed.look = function(x, marker.data, annot.file, num.perms, num.iters, 
	gain.loss = "gain", random.seed = NULL)
	{
	#Constants and vectors used in the function
	n = dim(x)[1]
 	m = dim(x)[2]
	r = dim(marker.data)[2]
	small.marker.data = as.matrix(marker.data[,1:2])
	chrom.vec = small.marker.data[,1]
	cytoband = make.cytoband(small.marker.data, annot.file)
	marker.matrix = as.matrix(marker.data)
	gain.loss.ind = as.numeric(gain.loss == "gain") - as.numeric(gain.loss == "loss")
	
	#Perform the Detailed Look procedure
	data.matrix = x * gain.loss.ind
	output.matrix = matrix(23, num.iters, r + 1)
	colnames(output.matrix) = c(colnames(marker.matrix), "p-Value")
	output.list = array(list(), c(num.iters, r))
	colnames(output.list) = colnames(marker.matrix)

	for (i in (1:num.iters))
		{
		null.dist = find.null(data.matrix, num.perms, random.seed)
		col.sums = colSums(data.matrix)
		obs.max = max(col.sums)
		k = which.max(col.sums)
		p.val = min(mean(obs.max <null.dist) + 1/num.perms, 1)		
		peeling.data = peeling(data.matrix, marker.matrix, cytoband, k)
		data.matrix = peeling.data[[1]]
		interval = peeling.data[[2]]
		output.matrix[i,] = c(marker.matrix[k,], p.val)
		output.list[i, 1] = list(marker.data[k, 1])
		output.list[i, 2] = list(c(marker.data[interval[1], 2], marker.data[interval[2], 2]))
		if (r > 2)
			{
			for (j in (3:r))
				{
				output.list[i, j] = list(marker.matrix[interval[1]:interval[2], j])
				}
			}
		}

	output.matrix = as.data.frame(output.matrix)	

	output = list(output.matrix, output.list)
	#Return output
	return(output)
	}

#Input:  (1) An n x m data matrix (x), (2) a m x r data frame of marker data (marker.data) as defined in
#cytoband.function, (3) a cytoband annotation file (annot.file), (4) a natural number (num.perms), (5) a
#natural number (num.iters), (6) a character string - either "gain" or "loss" - used to indicate whether
#gains or losses, respectively, are analyzed, and (7) an optional random seed.
#Output:  A list containing two elements:  (1) a num.iters x (r + 4) data frame, and (2) an two-dimensional
#array of lists.  See DiNAMIC_README.pdf for more information about the output.
#Description:  This function applies the Detailed Look procedure to the input data matrix x.  Empirical
#null distributions of T(X) = maximum or minimum column sum are created based on num.perms cyclic shifts.
#The function finds the num.iters most aberrant markers, assesses their statistical significance, peels
#them, and finds the various peak intervals.  By default the function considers both the minimum and 
#maximum column sum (high.only = FALSE), but it can also assess the significance of only the maximum
#column sum (high.only = TRUE).
#Requires these functions:  find.null, marker.finder, peeling, zero.make, make.cytoband.
#Is called by these functions:  none.
quick.look = function(x, marker.data, annot.file, num.perms, num.iters, 
	gain.loss = "gain", random.seed = NULL)
	{
	#Constants and vectors used in the function
	n = dim(x)[1]
 	m = dim(x)[2]
	r = dim(marker.data)[2]
	small.marker.data = as.matrix(marker.data[,1:2])
	chrom.vec = small.marker.data[,1]
	cytoband = make.cytoband(small.marker.data, annot.file)
	marker.matrix = as.matrix(marker.data)
	gain.loss.ind = as.numeric(gain.loss == "gain") - as.numeric(gain.loss == "loss")
	
	#Perform the Detailed Look procedure
	data.matrix = x * gain.loss.ind
	output.matrix = matrix(23, num.iters, r + 1)
	colnames(output.matrix) = c(colnames(marker.matrix), "p-Value")
	output.list = array(list(), c(num.iters, r))
	colnames(output.list) = colnames(marker.matrix)
	null.dist = find.null(data.matrix, num.perms, random.seed)

	for (i in (1:num.iters))
		{
		col.sums = colSums(data.matrix)
		obs.max = max(col.sums)
		k = which.max(col.sums)
		p.val = min(mean(obs.max <null.dist) + 1/num.perms, 1)		
		peeling.data = peeling(data.matrix, marker.matrix, cytoband, k)
		data.matrix = peeling.data[[1]]
		interval = peeling.data[[2]]
		output.matrix[i,] = c(marker.matrix[k,], p.val)
		output.list[i, 1] = list(marker.data[k, 1])
		output.list[i, 2] = list(c(marker.data[interval[1], 2], marker.data[interval[2], 2]))
		if (r > 2)
			{
			for (j in (3:r))
				{
				output.list[i, j] = list(marker.matrix[interval[1]:interval[2], j])
				}
			}
		}

	output.matrix = as.data.frame(output.matrix)	

	output = list(output.matrix, output.list)
	#Return output
	return(output)
	}

#Input:  (1) A six-column matrix (seg.matrix) produced by the segmentation algorithm DNAcopy, (2) an m x r 
#data frame of marker data (marker.matrix).
#Output:  An n x m matrix.
#Description:  This function converts the six-column output of DNAcopy to an n x m matrix that can be 
#analyzed with the cyclic shift procedure.  Here n represents the number of subjects, and m is the number
#of markers.
#Requires these functions:  none.
#Is called by these functions:  bias.correction.
make.shift.array = function(seg.matrix, marker.data)
	{
	#Remove the first column of seg.matrix
	seg.matrix = as.matrix(seg.matrix[,2:6])

	#Find the marker where each subject's segmentation begins
	marker.matrix = as.matrix(marker.data[,1:2])
	marker.vec = marker.matrix[,2]
	marker.vec = cumsum(as.numeric(marker.vec))
	first.marker = marker.vec[1]

	#Find the number of subjects
	n = sum(as.numeric(as.numeric(seg.matrix[,2]) == first.marker))

	#Find the number of markers
	m = length(marker.vec)

	#Find the rows of seg.matrix for each subject
	start.rows = which(as.numeric(seg.matrix[,2]) == first.marker)
	end.rows = c((start.rows[2:n] - 1), dim(seg.matrix)[1])
	location.matrix = cbind(start.rows, end.rows)

	#Create a blank n x m matrix
	data.matrix = matrix(23, n, m)

	#Fill data.matrix one row at a time
	subset = seg.matrix[,4:5]
	for (i in (1:n))
		{
		start = location.matrix[i, 1]
		stop = location.matrix[i, 2]
		temp.matrix = matrix(as.numeric(subset[start:stop,]), stop - start + 1, 2)
		temp.row = c()
		for (j in (1:dim(temp.matrix)[1]))
			{
			temp.row = c(temp.row, rep(temp.matrix[j, 2], temp.matrix[j, 1]))
			}
		data.matrix[i,] = temp.row
		}

	#Return the output
	return(data.matrix)
	}

#Input:  (1) An n x m data matrix (x), (2) an m x r data frame of marker data (marker.data).
#Output:  An n x m matrix.
#Description:  This function applies the bias-correction procedure to the input matrix x.
#Requires these functions:  make.shift.array.  Also, the package DNAcopy is used.
#Is called by these functions:  none.
bias.correction = function(x, marker.data)
	{
	#Load DNAcopy
	library(DNAcopy)

	#Constants and vectors used in the function
	marker.matrix = as.matrix(marker.data[,1:2])
	chr.vec = marker.matrix[,1]
	start.sites = marker.matrix[,2]
	n = dim(x)[1]
	m = dim(x)[2]

	#Use DNAcopy to create a segmented version of x
	cna.first = CNA(t(x), chr.vec, start.sites, data.type = "logratio")
	seg.first = segment(cna.first)
	output.first = seg.first$output
	num.cols = dim(output.first)[2]
	seg.values.first = as.numeric(output.first[,num.cols])
	output.first[,num.cols] = seg.values.first
	segmented.first = make.shift.array(output.first, marker.data)

	#Estimate and remove the bias
	resid = x - segmented.first
	bias.est = colMeans(resid)
	bias.matrix = matrix(bias.est, n, m, byrow = TRUE)
	adjusted.matrix = x - bias.matrix
	
	#Create the bias-corrected version of x
	cna.second = CNA(t(adjusted.matrix), chr.vec, start.sites, data.type = "logratio")
	seg.second = segment(cna.second)
	output.second = seg.second$output
	seg.values.second = as.numeric(output.second[,num.cols])
	output.second[,num.cols] = seg.values.second
	segmented.second = make.shift.array(output.second, marker.data)

	#Return the output
	return(segmented.second)
	}