#! /usr/bin/Rscript

###########################################################################################
########################             READ ARGUMENTS              ##########################
###########################################################################################
args=commandArgs(trailingOnly=TRUE)
Prof.file=args[1]
Probes.file=args[2]
Reference.file=args[3]
th.bkp=as.numeric(args[4])


###########################################################################################
####################             INTERMEDIATE FUNCTIONS              ######################
###########################################################################################
# Input: A vector of integers
# Action: Fragment v into a series of continuous intervals
# Output: A data frame giving start and end of each interval
intervals=function(v)
{
  l=length(v)
  from=v[1];to=c()
  k=1
  while(k < l)
  {
    while(v[k+1]==v[k]+1 & (k+1) < l){k=k+1}
    if(k+1==l)
    {
      if(v[k+1]==v[k]+1){to=c(to,v[l]);k=l}
      else{to=c(to,v[k],v[l]);from=c(from,v[l]);k=l}
    }
    else{to=c(to,v[k]);from=c(from,v[k+1]);k=k+1}
  }
  if(l==1){from=v;to=v}
  if(l==0){from=c();to=c()}
  return(data.frame(from,to))
}


# Input: A nsample x nchr matrix Schr giving the copy number for each segment of a given chromosome in each samples
# Action: Compute the nsample x (nchr+1) matrix Bchr giving the shift between each segment in each sample
# Output: The breakpoints matrix Bchr for this chromosome
S2B <- function(Schr)
{
  if(class(Schr)=="numeric"){Schr=matrix(Schr,nrow=1)}
  nsample=nrow(Schr)
  nchr=ncol(Schr)
  Bchr=matrix(nrow=nsample,ncol=nchr+1)  
  for(i in 1:nsample)
  {
     Bchr[i,1]=Schr[i,1]
     if(nchr > 1){
  	    for(j in 2:nchr)
  	    {
  	      Bchr[i,j]=Schr[i,j]-Schr[i,j-1]
       }
  	   }
  	   Bchr[i,nchr+1]=-Schr[i,nchr]
  }
  if(nsample==1){Bchr=as.numeric(Bchr)}else{rownames(Bchr)=rownames(Schr)}
  return(Bchr)
}


# Input: A nsample x (nchr+1) matrix Bchr giving the shift between each segment of a given chromosome in each sample
# Action: Compute the nsample x nchr matrix Schr giving the copy number for each segment in each sample
# Output: The segments matrix Schr
B2S <- function(Bchr)
{
  if(class(Bchr)=="numeric"){Bchr=matrix(Bchr,nrow=1)}
  nsample=nrow(Bchr)
  nchr=ncol(Bchr)-1
  Schr=matrix(nrow=nsample,ncol=nchr)
  for(i in 1:nsample)
  {
    Schr[i,1]=Bchr[i,1]
    if(nchr > 1)
    {
      for(j in 2:nchr)
      {
 	      Schr[i,j]=Schr[i,j-1]+Bchr[i,j]
      }
    }
  }
  if(nsample==1){Schr=as.numeric(Schr)}else{rownames(Schr)=rownames(Bchr)}
  return(Schr)
}


# Input: A nsample x (nchr+1) matrix Bchr giving the shift between each segment of a given chromosome in each sample
# Action: Compute the nsample x (2*nchr+2) matrix Achr giving the amplitude matrix for the chromosome
# Output: The amplitude matrix Achr
B2A <- function(Bchr)
{
  if(class(Bchr)=="numeric"){Bchr=matrix(Bchr,nrow=1)}
  nsample=nrow(Bchr)
  nbkp=ncol(Bchr)
  Achr=matrix(nrow=nsample,ncol=2*nbkp)
  for(i in 1:nsample)
  {
    for(j in 1:nbkp)
    {
      if(Bchr[i,j] >= 0){Achr[i,j]=Bchr[i,j];Achr[i,nbkp+j]=0}
      if(Bchr[i,j] <= 0){Achr[i,nbkp+j]=-Bchr[i,j];Achr[i,j]=0}      
    }
  }
  if(nsample==1){Achr=as.numeric(Achr)}else{rownames(Achr)=rownames(Bchr)}
  return(Achr)
}


# Input: The amplitude matrix Achr
# Action: Computes the breakpoint matrix corresponding to the amplitude matrix given
# Output: The breakpoint matrix
A2B <- function(Achr)
{
  if(class(Achr)=="numeric"){Achr=matrix(Achr,nrow=1)}
  nsample=nrow(Achr)
  nbkp=ncol(Achr)/2
  Bchr=matrix(nrow=nsample,ncol=nbkp)
  for(i in 1:nsample)
  {
    for(j in 1:nbkp)
    {
      Bchr[i,j]=Achr[i,j]-Achr[i,nbkp+j]
    }
  }
  if(nsample==1){Bchr=as.numeric(Bchr)}else{rownames(Bchr)=rownames(Achr)}
  return(Bchr)
}


# Input: A vector giving the chromosome for each probe, the y-coordinate at which to write the chromosome name and the size cx of the labels
# Action: Add the chromosome limits on the active plot
affchrom=function(chr,ord,cx,texte=TRUE)
{
  # Détection des limites entre les chromosomes
  nprobe=length(chr)
  limchrom=c()
  for(i in 1:(nprobe-1))
  {
    if(chr[i]!=chr[i+1]){limchrom=c(limchrom,i+0.5)}
  }
  
  # Traçage des limites sur le plot
  for(i in 1:(max(chr)-1))
  {
    abline(v=limchrom[i],col="darkgrey",lwd=1)
  }

  # Ecriture du numéro de chaque chromosome
  labchr=as.character(unique(chr));labchr[labchr=="23"]="X";labchr[labchr=="24"]="Y"
  xcord=rep(-9,max(chr))
  xcord[1]=limchrom[1]/2
  for(i in 2:(max(chr)-1)){xcord[i]=(limchrom[i]+limchrom[i-1])/2}
  xcord[max(chr)]=(nprobe+limchrom[max(chr)-1])/2
  ycord=rep(ord,max(chr))
  if(texte==T){text(labels=labchr,x=xcord,y=ycord,cex=cx,col="darkgrey")}
}


################################################################################
###############             SEGMENTATION            ############################
################################################################################
# Input: A nsample x nprobe P matrix containing the discretized copy number profiles of all samples from one patient
# Action: Merge breakpoints of the same sign that are closer than a given threshold
# Output: The P matrix in which very close breakpoints have been merged
merge.breakpoints=function(P,th.bkp,chr)
{
  number.merged=0
  for(c in unique(chr))
  {
    p=S2B(P[,chr==c])
    Bup=which(p > 0,arr.ind=TRUE)
    Bdown=which(p < 0,arr.ind=TRUE)
    if(nrow(Bup) > 0)
    {
      for(i in 1:nrow(Bup))
      {
        d=abs(Bup[,2]-Bup[i,2])
        close=which(d > 0 & d <= th.bkp & Bup[,1]!=Bup[i,1])
        if(length(close) > 0)
        {
          middle=round(mean(c(Bup[i,2],Bup[close,2])))
          for(k in c(i,close))
          {
            if(setequal(unique(p[Bup[k,1],(Bup[k,2]:middle)[-1]]),0)){
            	p[Bup[k,1],middle]=p[Bup[k,1],Bup[k,2]]
              p[Bup[k,1],Bup[k,2]]=0
              Bup[k,2]=middle
              number.merged=number.merged+1
            }
          }
        }
      }
    }
    if(nrow(Bdown) > 0)
    {
      for(i in 1:nrow(Bdown))
      {
        d=abs(Bdown[,2]-Bdown[i,2])
        close=which(d > 0 & d <= th.bkp & Bdown[,1]!=Bdown[i,1])
        if(length(close) > 0)
        { 
          middle=round(mean(c(Bdown[i,2],Bdown[close,2])))
          for(k in c(i,close))
          {
            if(setequal(unique(p[Bdown[k,1],(Bdown[k,2]:middle)[-1]]),0)){
              p[Bdown[k,1],middle]=p[Bdown[k,1],Bdown[k,2]]
              p[Bdown[k,1],Bdown[k,2]]=0
              Bdown[k,2]=middle
              number.merged=number.merged+1
            }
          }
        }
      }
    }
    P[,chr==c]=B2S(p)
  }
  print(paste("->",number.merged,"breakpoints closer than",th.bkp,"probes were identified and were merged."))
  return(P)
}


# Input: A nsample x nprobe P matrix containing the discretized copy number profiles of all samples from one patient
# Action: Partition all chromosomes into segments delineated by all breakpoints found in at least one sample
# Output: A data frame describing each segment
segments.delineation=function(P,chr,pos,name,cytostart,cytoend,lobound=NULL,upbound=NULL)
{
  nsample=nrow(P);nprobe=ncol(P)
  Segment.ID=c()
  Start.probe.number=c();End.probe.number=c()
  for(i in 1:max(chr))
  {
    bkp=c()
    x=which(chr==i)
    if(length(x) > 1)
    {
      for(e in 1:nsample)
      {
        for(j in 2:length(x))
        {
      	  if(P[e,x[j]]!=P[e,x[j-1]] & j%in%bkp==FALSE){bkp=c(bkp,j)}
        }      
      }
    }
    if(length(bkp) > 0){bkp=sort(bkp)}
    Start.probe.number=c(Start.probe.number,x[c(1,bkp)])
    End.probe.number=c(End.probe.number,x[c(bkp-1,length(x))])
    Segment.ID=c(Segment.ID,paste(i,".",1:(length(bkp)+1),sep=""))
  }
  Chromosome=chr[Start.probe.number] # Note: all probes of the segment are necessarily on the same chromosome
  Start.probe.name=name[Start.probe.number];End.probe.name=name[End.probe.number]
  if(!is.null(lobound) & !is.null(upbound)){
    Start.position=lobound[Start.probe.number]
    End.position=upbound[End.probe.number]}else{
      Start.position=pos[Start.probe.number];End.position=pos[End.probe.number]}
  Size=End.position-Start.position
  Start.cytoband=cytostart[Start.probe.number];End.cytoband=cytoend[End.probe.number]
  return(data.frame(Segment.ID,Start.probe.name,End.probe.name,Start.probe.number,End.probe.number,Chromosome,Start.position,End.position,Size,Start.cytoband,End.cytoband))
}


# Input: the nsample x nprobe P matrix and the data frame describing all segments
# Action: Make the matrix containing the copy number value for each segment in each sample
# Output: the nsample x nsegm segments matrix
profile.segmentation=function(P,segments.table)
{
  nsample=nrow(P);nsegm=nrow(segments.table)
  S=matrix(nrow=nsample,ncol=nsegm)
  for(s in 1:nsegm)
  {
  	  for(e in 1:nsample)
  	  {
  	  	  S[e,s]=P[e,segments.table$Start.probe.number[s]]  # Note: copy number is the same on any probe of the segment
  	  }
  }
  rownames(S)=rownames(P);colnames(S)=rownames(segments)
  return(S)
}


###################################################################################
#############             TUMORS' TREE RECONSTRUCTION              ################
###################################################################################
# Input: 2 vectors containing the segmented profiles of 2 samples, the data frame describing all segments and 2 lists containing the frequencies of breakpoints of each sign in the reference data set
# Action: Compute the partial identity score between the 2 samples
# Output: the partial identity score between the 2 samples
identity.score <- function(sampleA,sampleB,segments.table,Fup,Fdown)
{
  CBS=0;D=0
  for(c in unique(segments.table$Chromosome))
  {
    indc=which(segments.table$Chromosome==c)
    Schr=rbind(sampleA[indc],sampleB[indc])
    Bchr=S2B(Schr);nbkp=ncol(Bchr)
    Bcp=numeric(nbkp)
    for(j in 1:nbkp)
	 {
  	   s=sign(Bchr[1,j])
  	   if(s!=0 & sign(Bchr[2,j])==s){Bcp[j]=s*min(abs(Bchr[,j]))}
	 }
    for(j in 1:nbkp)
    {
      if(Bcp[j] > 0){CBS=CBS+abs(Bcp[j])*(1-Fup[[c]][j])}
      if(Bcp[j] < 0){CBS=CBS+abs(Bcp[j])*(1-Fdown[[c]][j])}
    }
	 Bdiff=Bchr-matrix(rep(Bcp,2),nrow=2,byrow=TRUE)
	 for(i in 1:2)
    {
      for(j in 1:nbkp)
      {
        	if(Bdiff[i,j] > 0){D=D+Bdiff[i,j]*(1-Fup[[c]][j])}
        	if(Bdiff[i,j] < 0){D=D+abs(Bdiff[i,j])*(1-Fdown[[c]][j])}
      }
    }
  }
  return(list(CBS=CBS,D=D))
}


# Input: a 3 x nchr scenario matrix containing segmented profiles of a hypothetical common precursor (line 1) and its two descending tumors (lines 2 and 3)
# Action: Evaluate the likelihood of this hypothetical scenario
# Output: 0 if the scenario is impossible, 0 otherwise
check.CP <- function(scenario)
{
    scenario.segm=B2S(scenario)
    ok=1
    for(k in which(scenario.segm[1,]!=0))
    {
      if(scenario.segm[1,k] < -2 | scenario.segm[1,k] > 1){ok=0;break} # One segment of the common precursor goes out of authorized values
      if(scenario.segm[1,k]==-2 & sum(scenario.segm[2:3,k]!=0)!=0){ok=0;break} # A segment with status -2 (homozygous deletion) in the common precursor changes of status later
    }
    return(ok)
}


# Input: a 3 x nchr scenario matrix containing segmented profiles of a hypothetical common precursor (line 1) and its two descending tumors (lines 2 and 3)
# Action: Evaluate the likelyhood of this hypothetical scenario
# Output: the penalty score associated to the scenario
distance.scenario <- function(scenario,Bchr,a,b,c,segments.table,Fup,Fdown)
{
  d=0
  #breakpoints that will remain in the front after the CP's profile reconstruction
  fr=rbind(scenario[1,],Bchr[-c(a,b),])
  for(j in 1:ncol(fr))
  {
    if(sum(fr[,j] < 0) > 0){d=d+(1-max(abs(fr[fr[,j] < 0,j]))*Fdown[[c]][j])}
    if(sum(fr[,j] > 0) > 0){d=d+(1-max(fr[fr[,j] > 0,j])*Fup[[c]][j])}
  }
  #breakpoints necessary to go from the CP to each tumor
  spe=scenario[2:3,]
  bkp=which(spe!=0,arr.ind=TRUE)
  for(i in 1:nrow(bkp))
  {
    val=spe[bkp[i,1],bkp[i,2]]
    if(val > 0){F=Fup[[c]]}else{F=Fdown[[c]]}
    d=d+abs(val)*(1-F[bkp[i,2]])
  }
  return(d)
}


# Input: the front of the tree, indices of the 2 tumors with the most breakpoints in common, and the segments table
# Action: compute the most likely profile of the 2 tumors' common precursor
# Output: the common precursor's profile, and the aberrations that occured on each edge
CP.reconstruction <- function(front,closest,segments.table,Fup,Fdown)
{
  a=closest[1];b=closest[2]
  
  # THE PARTICULAR CASE OF AMPLICONS
  # Common amplicons and amplicons specific to one sample are stored
  ac=which(front[a,]==2 & front[b,]==2)
  aspeA=which(front[a,]==2 & front[b,]!=2)
  aspeB=which(front[a,]!=2 & front[b,]==2)
  # Amplicons are then removed from the profile and asigned the value of the closest segment
  subst=which(front==2,arr.ind=T)
  if(nrow(subst)!=0){
    for(i in 1:nrow(subst))
    {
       x=subst[i,1];y=subst[i,2];before=y;after=y;before.val=NA;after.val=NA
       if(y > 1){
     	    before=y-1;while(front[x,before]==2 & before > 1){before=before-1}
     	    if(segments.table$Chromosome[before]==segments.table$Chromosome[y] & front[x,before]!=2){before.val=front[x,before]}}
       if(y < ncol(front)){
     	    after=y+1;while(front[x,after]==2 & after < ncol(front)){after=after+1}   #recherche du segment suivant non égal à 2
     	    if(segments.table$Chromosome[after]==segments.table$Chromosome[y] & front[x,after]!=2){after.val=front[x,after]}}
       if(segments.table$Start.position[y]-segments.table$End.position[before] < segments.table$Start.position[after]-segments.table$End.position[y]){front[x,y]=before.val}else{front[x,y]=after.val}
       if(is.na(front[x,y])){front[x,y]=0}
      }
  }
  
  # RECONSTRUCTION OF THE COMMON PRECURSOR'S PROFILE
  CP.profile=c();speA=c();speB=c() 
  for(c in unique(segments.table$Chromosome))
  {
     Schr=as.matrix(front[,segments.table$Chromosome ==c])
     nchr=ncol(Schr)
     Bchr=S2B(Schr)
     Achr=B2A(Bchr)
     #Compute the common breakpoints amplitude vector Acp and the common breakpoints Bcp
     Acp=apply(Achr[closest,],2,min)
     Bcp=A2B(Acp)
	  #If common breakpoints don't fully define the CP's profile, complete it
	  if(sum(Bcp)!=0)
	  {
	    delta=sum(Bcp)
	    spe=Bchr[closest,]-matrix(rep(Bcp,2),nrow=2,byrow=TRUE)
	    poss=unique(c(which(sign(spe)==-sign(delta),arr.ind=T)[,2],which(sign(Bcp)==delta)))
	    if(length(poss) > 1){poss=combn(poss,abs(delta))}else{poss=matrix(poss,nrow=1)}
  		 #Verify each possibility gives a realistic scenario and compute the overall distance associated to each
  		 scen.OK=numeric(ncol(poss));scen.dist=numeric(ncol(poss))
		 for(i in 1:ncol(poss))
		 {
		   bkphyp=Bcp
		   bkphyp[poss[,i]]=bkphyp[poss[,i]]-sign(delta)
		   scenario=rbind(bkphyp,Bchr[c(a,b),]-matrix(rep(bkphyp,2),nrow=2,byrow=T))
		   scen.OK[i]=check.CP(scenario)
		   scen.dist[i]=distance.scenario(scenario,Bchr,a,b,c,segments.table,Fup,Fdown)
		 }
		 #Keep the possible scenario that minimizes the distance
		 keep=which(scen.OK==1)[which.min(scen.dist[scen.OK==1])]
		 Bcp[poss[,keep]]=Bcp[poss[,keep]]-sign(delta)
	  }
	  spe=Bchr[closest,]-matrix(rep(Bcp,2),nrow=2,byrow=TRUE)
 	  #Go back to segments
 	  Scp=B2S(Bcp)
  	  CP.profile=c(CP.profile,Scp)
  	  speA=c(speA,B2S(spe[1,]))
  	  speB=c(speB,B2S(spe[2,]))   
  	}
  	CP.profile[ac]=2;speA[aspeA]=2;speB[aspeB]=2
  	list(CPprof=CP.profile,indiv1=speA,indiv2=speB)
}


# Input: indices a and b of 2 segments
# Action: formats the interval between segments a and b in a compact way
# Output: a character string representing the genomic interval between segments a and b
format.cytoband=function(a,b,segments.table)
{
  chr.a=segments.table$Chromosome[a]
  chr.b=segments.table$Chromosome[b]
  cyto.a=as.character(segments.table$Start.cytoband[a])
  cyto.b=as.character(segments.table$End.cytoband[b])
  tmp=unlist(strsplit(cyto.a,""));arm.a=tmp[grep("[p,q]",tmp)]
  tmp=unlist(strsplit(cyto.b,""));arm.b=tmp[grep("[p,q]",tmp)]
  if(a > 1){
    chr.bef=segments.table$Chromosome[a-1]
  	 tmp=unlist(strsplit(as.character(segments.table$End.cytoband[a-1]),""));arm.bef=tmp[grep("[p,q]",tmp)]}else{chr.bef=0;arm.bef=""}
  if(b < nrow(segments.table)){
  	 chr.aft=segments.table$Chromosome[b+1]
  	 tmp=unlist(strsplit(as.character(segments.table$Start.cytoband[b+1]),""));arm.aft=tmp[grep("[p,q]",tmp)]}else{chr.aft=0;arm.aft=""}
  	if(chr.bef!=chr.a & chr.aft!=chr.b){ # the aberration spans a whole chromosome
  	  if(chr.a==23){chr.a="X"};if(chr.a==24){chr.a="Y"};return(as.character(chr.a))}else{
  	  	if((chr.bef!=chr.a | arm.bef!=arm.a) & (chr.aft!=chr.b | arm.aft!=arm.b)){ # the aberration spans a whole chromosome arm
  	  	  return(paste(chr.a,arm.a,sep=""))
  	  	}else{
  		  aband=unlist(strsplit(cyto.a,"\\."))[1]
  		  bband=unlist(strsplit(cyto.b,"\\."))[1]
  		  if(aband==bband){return(aband)}else{
  	       bband=unlist(strsplit(bband,"[p,q]"))[2]
  	       if(arm.a!=arm.b){bband=paste(arm.b,bband,sep="")}
  	       return(paste(aband,bband,sep="-"))
  		  }
  	  	}
  	}
}


# Input: a vector ev of the segments modifications that occur on an edge
# Action: format the events associated to the edge
# Output: a list of chromosomal events that occur on the edge
format.events <- function(ev,segments.table)
{
  cytostart=as.character(segments.table$Start.cytoband);cytoend=as.character(segments.table$End.cytoband)
  aber=matrix(nrow=0,ncol=3,dimnames=list(c(),c("Start","End","Type")))
  for(c in unique(segments.table$Chromosome))
  {
    g=as.matrix(intervals(which(ev > 0 & segments.table$Chromosome==c)));if(nrow(g) > 0){g=cbind(g,rep(1,nrow(g)))}else{g=matrix(nrow=0,ncol=3)}
    a=as.matrix(intervals(which(ev==2 & segments.table$Chromosome==c)));if(nrow(a) > 0){a=cbind(a,rep(2,nrow(a)))}else{a=matrix(nrow=0,ncol=3)}
    l=as.matrix(intervals(which(ev < 0 & segments.table$Chromosome==c)));if(nrow(l) > 0){l=cbind(l,rep(-1,nrow(l)))}else{l=matrix(nrow=0,ncol=3)}
    d=as.matrix(intervals(which(ev==-2 & segments.table$Chromosome==c)));if(nrow(d) > 0){d=cbind(d,rep(-2,nrow(d)))}else{d=matrix(nrow=0,ncol=3)}
    rmg=numeric(0);rml=numeric(0)
    if(nrow(a) > 0){
     for(i in 1:nrow(g)){if(sum(a[,1]==g[i,1] & a[,2]==g[i,2])!=0){rmg=c(rmg,i)}}
     }
    if(nrow(d) > 0){
     for(i in 1:nrow(l)){if(sum(d[,1]==l[i,1] & d[,2]==l[i,2])!=0){rml=c(rml,i)}}
     }
    if(length(rmg) > 0){g=g[-rmg,]};if(length(rml) > 0){l=l[-rml,]}
    aber=rbind(aber,g,a,l,d)
  }
  listev.segments=listev.cyto=character(0)
  if(nrow(aber) > 0)
  {
    for(i in 1:nrow(aber))
    {
      if(aber[i,3]==1){signe="+"}else{if(aber[i,3]==-1){signe="-"}else{if(aber[i,3]==2){signe="++"}else{if(aber[i,3]==-2){signe="--"}}}}
      if(cytostart[aber[i,1]]!=cytoend[aber[i,2]]){listev.cyto=c(listev.cyto,paste(signe,format.cytoband(aber[i,1],aber[i,2],segments.table),sep=""))}else{listev.cyto=c(listev.cyto,paste(signe,unlist(strsplit(cytostart[aber[i,1]],"\\."))[1],sep=""))}
      if(aber[i,1]!=aber[i,2]){listev.segments=c(listev.segments,paste(signe,paste(segments.table$Segment.ID[aber[i,1]],segments.table$Segment.ID[aber[i,2]],sep=":"),sep=""))}else{listev.segments=c(listev.segments,paste(signe,segments.table$Segment.ID[aber[i,1]],sep=""))}
    }
  }
  list(segments=listev.segments,cyto=listev.cyto)
}


# Input: the matrix of segmented profiles S and the table describing segments
# Action: reconstructs the tumors'tree and the sequence of chromosomal aberrations involved
# Output: a list of 3 vectors: the nodes, the edges, and the events associated to each edge
tree.reconstruction=function(S,segments.table,Fup,Fdown) {
    # Initiation
    Nodes=list();for(i in 1:nrow(S)){Nodes[[i]]=S[i,]};names(Nodes)=rownames(S)
    Edges=data.frame(0,0);colnames(Edges)=c("from","to")
    Events.cyto=Events.segments=list()
    left=1:length(Nodes) # indices of nodes that are in the front
    ncp=0                # number of common precursors inferred
    while(length(left) > 1)
    {
    	 # Find the two nodes in the front with the most breakpoints in common
    	 Front=Nodes[left]
		 nn=length(Front)
		 C=matrix(nrow=nn,ncol=nn);D=matrix(nrow=nn,ncol=nn)
		 for(i in 1:(nn-1))
		 {
 		  for(j in (i+1):nn)
		   {
		     idscore=identity.score(Front[[i]],Front[[j]],segments.table,Fup,Fdown)
		     C[i,j]=idscore$CBS
		     D[i,j]=idscore$D
		   }
		 }
		 closest=which(C==max(C,na.rm=TRUE),arr.ind=TRUE)[which.min(D[which.max(C)]),]
	    # Reconstruct their common precursor's profile
		 cp=CP.reconstruction(t(as.matrix(as.data.frame(Front))),closest,segments.table,Fup,Fdown) 
		 # Add the node, edges and events to the tree
		 if(setequal(cp$indiv1,0)){                  # if one of the node is the common precursor, draw an edge from it to the other node
		 	Edges=rbind(Edges,match(names(Front)[closest],names(Nodes)))
		 	formatted.events=format.events(cp$indiv2,segments.table)
		    Events.cyto=append(Events.cyto,list(formatted.events$cyto))
		    Events.segments=append(Events.segments,list(formatted.events$segments))
	  	 }else{if(setequal(cp$indiv2,0)){
	  		 	 Edges=rbind(Edges,match(names(Front)[closest[2:1]],names(Nodes)))
	  		 	 formatted.events=format.events(cp$indiv1,segments.table)
	  		 	 Events.cyto=append(Events.cyto,list(formatted.events$cyto))
	  		 	 Events.segments=append(Events.segments,list(formatted.events$segments))
	  		  }else{
	  		 		  Nodes[[length(Nodes)+1]]=cp$CPprof
	  		 		  ncp=ncp+1;names(Nodes)[length(Nodes)]=ncp
		 			  Edges=rbind(Edges,c(length(Nodes),which(names(Nodes)==names(Front)[closest[1]])))
		 			  formatted.events=format.events(cp$indiv1,segments.table)
		 			  Events.cyto=append(Events.cyto,list(formatted.events$cyto))
		 			  Events.segments=append(Events.segments,list(formatted.events$segments))
		 			  Edges=rbind(Edges,c(length(Nodes),which(names(Nodes)==names(Front)[closest[2]])))
		 			  formatted.events=format.events(cp$indiv2,segments.table)
		 			  Events.cyto=append(Events.cyto,list(formatted.events$cyto))
		 			  Events.segments=append(Events.segments,list(formatted.events$segments))
	  		 }
	  	 }	
		 left=setdiff((1:length(Nodes)),Edges$to)
    }
    if(ncp > 1){names(Nodes)[(length(Nodes)):(length(Nodes)-ncp+1)]=c("Common precursor",paste("Common precursor",2:ncp))}else{if(ncp==1){names(Nodes)[length(Nodes)]="Common precursor"}}
    # Add the "normal tissue" node
    if(setequal(unique(Nodes[[length(Nodes)]]),0)){names(Nodes)[length(Nodes)]="Normal tissue"}else{
      Nodes=append(list(root=numeric(ncol(S))),Nodes);Edges=Edges+1;names(Nodes)[1]="Normal tissue"
      LCA=setdiff(1:length(Nodes),Edges$to)
      Edges=rbind(Edges,c(1,LCA))
      formatted.events=format.events(Nodes[[LCA]],segments.table)
      Events.cyto=append(Events.cyto,list(formatted.events$cyto))
      Events.segments=append(Events.segments,list(formatted.events$segments))
    }
    # Format edges as a list
    Edges.list=list()
    for(i in 2:nrow(Edges))
    {
      Edges.list=append(Edges.list,list(c(from=Edges[i,1],to=Edges[i,2])))
    }
    list(nodes=Nodes, edges=Edges.list, events.cyto=Events.cyto, events.segments=Events.segments)
} 


####################################################################
#############              EXPORT RESULTS              #############
####################################################################
export2dot=function(tumors.tree, dot.file.segments, dot.file.cyto) {
    # Export the tree with cytoband labels
    unlink(dot.file.cyto)
    sink(dot.file.cyto)
    Edges=tumors.tree$edges
    Nodes=tumors.tree$nodes
    Events.cyto=tumors.tree$events.cyto
    cat("digraph G { \n")
    cat("\t ratio=fill\n")
    cat("\t size=\"20,20\"\n")
    cat("\t","node[fontsize=100,style=\"setlinewidth(8)\"]")
    n=length(Edges)
    for (i in 1:n) {
      from=Nodes[Edges[[i]]["from"]]
      labfrom=names(from)
      to=Nodes[Edges[[i]]["to"]]
      labto=names(to)
      evt=Events.cyto[[i]]
      # Do a new line for edge labels every 4 aberrations
      labevt=character(0)
      maxline=40
      rest=evt
      while(length(rest)!=0)
      {
        eol=max(which(cumsum(nchar(rest)) <= maxline))
        labevt=paste(c(labevt,paste(rest[1:eol],collapse=","),"\\l"),collapse="")
        if(eol!=length(rest)){rest=rest[(eol+1):length(rest)]}else{rest=c()}
      }
      if (length(labevt)==0) labevt="No event"
      cat ("\t \"",labfrom, "\"->\"", labto, "\"[label=\"",labevt,"\"",",fontsize=100,style=\"setlinewidth(8)\",arrowsize=2","]\n", sep="")
    }
    cat("}\n")
    sink()
    # Export the tree with segments labels
    unlink(dot.file.segments)
    sink(dot.file.segments)
    Edges=tumors.tree$edges
    Nodes=tumors.tree$nodes
    Events.segments=tumors.tree$events.segments
    cat("digraph G { \n")
    cat("\t ratio=fill\n")
    cat("\t size=\"20,20\"\n")
    cat("\t","node[fontsize=100,style=\"setlinewidth(8)\"]")
    n=length(Edges)
    for (i in 1:n) {
      from=Nodes[Edges[[i]]["from"]]
      labfrom=names(from)
      to=Nodes[Edges[[i]]["to"]]
      labto=names(to)
      evt=Events.segments[[i]]
      # Do a new line for edge labels every 4 aberrations
      labevt=character(0)
      maxline=40
      rest=evt
      while(length(rest)!=0)
      {
        eol=max(which(cumsum(nchar(rest)) <= maxline))
        labevt=paste(c(labevt,paste(rest[1:eol],collapse=","),"\\l"),collapse="")
        if(eol!=length(rest)){rest=rest[(eol+1):length(rest)]}else{rest=c()}
      }
      if (length(labevt)==0) labevt="No event"
      cat ("\t \"",labfrom, "\"->\"", labto, "\"[label=\"",labevt,"\"",",fontsize=100,style=\"setlinewidth(8)\",arrowsize=2","]\n", sep="")
    }
    cat("}\n")
    sink()
}


plotbkp=function(tumors.tree,segments.table,chr,arm,cyto,P.val,P,bkp.file)
{
  # Find cytoband coordinates
  tmp=unlist(strsplit(unique(cyto),split="\\."))
  cytolab=unique(tmp[grep("[p,q]",tmp)])
  tel=union(grep("ptel",cytolab),grep("qtel",cytolab))
  if(length(tel) > 0){cytolab=cytolab[-tel]}
  ncyto=length(cytolab);colcyto=rep(c("white","grey"),ncyto)
  cytodeb=cytofin=numeric(ncyto)
  for(i in 1:ncyto)
  {
    int=intervals(grep(paste("^",cytolab[i],sep=""),cyto));if(nrow(int) > 1){print(i)}
    cytodeb[i]=int$from
    cytofin[i]=int$to
  }

  # Identification of common breakpoints
  nsample=nrow(P);nprobe=ncol(P)
  normal.node=which(names(tumors.tree$nodes)=='Normal tissue')
  normal.edge=which(unlist(tumors.tree$edges)==normal.node)
  if(length(normal.edge)==1){LCA=unlist(tumors.tree$edges)[normal.edge+1]}else{LCA=normal.node} # index of the last common ancestor
  v=tumors.tree$nodes[[LCA]]
  if(setequal(unique(v),0)){combkp.start.visible=combkp.end.visible=combkp.start.inferred=combkp.end.inferred=c()}else{
    if(v[1]!=0){start=1;end=1}else{start=c();end=c()}
    for(i in 2:nrow(segments.table)){if(v[i]!=0){if(v[i]==v[i-1] & segments.table$Chromosome[i]==segments.table$Chromosome[i-1]){end[length(end)]=i}else{start=c(start,i);end=c(end,i)}}}
    combkp.start=sort(segments.table$Start.probe.number[start])
    combkp.end=sort(segments.table$End.probe.number[end])
  # Differenciating common breakpoints still visible in the profiles from common breakpoints inferred by TuMult to balance chromosomes
  combkp.start.visible=combkp.start.inferred=combkp.end.visible=combkp.end.inferred=c()
  for(k in 1:length(combkp.start)){
  	 if(combkp.start[k]==1 | chr[max(1,combkp.start[k]-1)]!=chr[combkp.start[k]]){bef=rep(0,nsample)}else{bef=P[,combkp.start[k]-1]}
  	 if(abs(sum(sign(P[,combkp.start[k]]-bef)))==nsample){combkp.start.visible=c(combkp.start.visible,combkp.start[k])}else{combkp.start.inferred=c(combkp.start.inferred,combkp.start[k])}}
  for(k in 1:length(combkp.end)){
  	 if(combkp.end[k]==nprobe | chr[min(nprobe,combkp.end[k]+1)]!=chr[combkp.end[k]]){aft=rep(0,nsample)}else{aft=P[,combkp.end[k]+1]}
  	 if(abs(sum(sign(aft-P[,combkp.end[k]])))==nsample){combkp.end.visible=c(combkp.end.visible,combkp.end[k])}else{combkp.end.inferred=c(combkp.end.inferred,combkp.end[k])}}
  }
  
  
  pdf(bkp.file,width=12,height=8)
  # Page 1: Entire profiles
  combkp.visible=c(combkp.start.visible-0.5,combkp.end.visible+0.5);combkp.inferred=c(combkp.start.inferred-0.5,combkp.end.inferred+0.5)
  M=max(abs(P.val),na.rm=TRUE);my.ylim=c(-M,M)
  mypch="."
  cx=1;cx.points=5*nsample/log10(nprobe);ld=1;colchrom=rep(c("white","grey"),12)
  m=matrix(1:nsample,nsample)
  layout(m)
  par(mai=c(0,par()$mai[2],0,par()$mai[4]))
  mycol=c("blue","green","yellow","red","purple")[P[1,]+3]  
  plot(P.val[1,],col=mycol,pch=mypch,xlab="",ylab=rownames(P)[1],xaxt="n",cex.axis=cx,cex.lab=cx,cex=cx.points,ylim=my.ylim,bty="n",las=1)
  for(c in unique(chr))
  {
    p.arm=intervals(which(chr==c & arm=="p"));q.arm=intervals(which(chr==c & arm=="q"))
 	 if(nrow(p.arm)!=0){rect(p.arm$from,my.ylim[1],p.arm$to,my.ylim[1]+0.2,col=colchrom[c])}
 	 if(nrow(q.arm)!=0){rect(q.arm$from,my.ylim[1],q.arm$to,my.ylim[1]+0.2,col=colchrom[c])}
  }
  affchrom(chr,my.ylim[2]-0.1,cx)
  if(length(combkp.visible) > 0){for(k in 1:length(combkp.visible)){segments(combkp.visible[k],par("usr")[3],combkp.visible[k],my.ylim[2]-0.3,lwd=ld,lty=2)}}
  if(length(combkp.inferred) > 0){for(k in 1:length(combkp.inferred)){segments(combkp.inferred[k],par("usr")[3],combkp.inferred[k],my.ylim[2]-0.3,lwd=ld,lty=3)}}
  if(nsample > 2){
    for(i in 2:(nsample-1))
    {
      mycol=c("blue","green","yellow","red","purple")[P[i,]+3]  
      plot(P.val[i,],col=mycol,pch=mypch,xlab="",ylab=rownames(P)[i],xaxt="n",cex.axis=cx,cex.lab=cx,cex=cx.points,ylim=my.ylim,bty="n",las=1)
      for(c in unique(chr))
      {
        p.arm=intervals(which(chr==c & arm=="p"));q.arm=intervals(which(chr==c & arm=="q"))
 	     if(nrow(p.arm)!=0){rect(p.arm$from,my.ylim[1],p.arm$to,my.ylim[1]+0.2,col=colchrom[c])}
 	     if(nrow(q.arm)!=0){rect(q.arm$from,my.ylim[1],q.arm$to,my.ylim[1]+0.2,col=colchrom[c])}
      }
      affchrom(chr,my.ylim[1]+0.4,cx,texte=FALSE)
      abline(v=combkp.visible,lwd=ld,lty=2);abline(v=combkp.inferred,lwd=ld,lty=3)
    }	
  }
  mycol=c("blue","green","yellow","red","purple")[P[nsample,]+3]  
  plot(P.val[nsample,],col=mycol,pch=mypch,xlab="",ylab=rownames(P)[nsample],xaxt="n",cex.axis=cx,cex.lab=cx,cex=cx.points,ylim=my.ylim,bty="n",las=1)
  for(c in unique(chr))
  {
    p.arm=intervals(which(chr==c & arm=="p"));q.arm=intervals(which(chr==c & arm=="q"))
 	 if(nrow(p.arm)!=0){rect(p.arm$from,my.ylim[1],p.arm$to,my.ylim[1]+0.2,col=colchrom[c])}
 	 if(nrow(q.arm)!=0){rect(q.arm$from,my.ylim[1],q.arm$to,my.ylim[1]+0.2,col=colchrom[c])}
  }
  affchrom(chr,my.ylim[1]-0.4,cx,texte=FALSE)
  if(length(combkp.visible) > 0){for(k in 1:length(combkp.visible)){segments(combkp.visible[k],my.ylim[1]+0.2,combkp.visible[k],par("usr")[4],lwd=ld,lty=2)}}
  if(length(combkp.inferred) > 0){for(k in 1:length(combkp.inferred)){segments(combkp.inferred[k],my.ylim[1]+0.2,combkp.inferred[k],par("usr")[4],lwd=ld,lty=3)}}

  
  # One page per chromosome
  for(c in unique(chr))
  {
    sel=which(chr==c);P.chr=P[,sel];P.val.chr=P.val[,sel]
    cx=1;cx.points=5*nrow(P.chr)/log10(ncol(P.chr));ld=1;colchrom=rep(c("white","grey"),12)
    m=matrix(1:nrow(P.chr),nrow(P.chr))
    layout(m)
    par(mai=c(0,par()$mai[2],0,par()$mai[4]))
    mycol=c("blue","green","yellow","red","purple")[P.chr[1,]+3]  
    plot(P.val.chr[1,],col=mycol,pch=mypch,xlab="",ylab=rownames(P.chr)[1],xaxt="n",cex.axis=cx,cex.lab=cx,cex=cx.points,ylim=my.ylim,bty="n",las=1)
	 cytochrom=which(cytodeb%in%sel)
	 for(k in cytochrom){rect(match(cytodeb[k],sel)-0.5,my.ylim[1],match(cytofin[k],sel)+0.5,my.ylim[1]+0.3,col=colcyto[k])}
    axis(side=1,at=apply(cbind(match(cytodeb[cytochrom],sel),match(cytofin[cytochrom],sel)),1,mean),labels=cytolab[cytochrom],pos=my.ylim[1],las=3,cex.axis=0.8*cx)
    centro=match(which(chr==c & arm=="q")[1],sel)-0.5;arrows(centro,my.ylim[1],centro,my.ylim[1]+0.8,lwd=2*ld,angle=120,length=.15)
 	 lab=c(1:22,"X","Y")[c];title(paste("Chromosome",lab),line=-1)
    combkp.visible=c(match(combkp.start.visible,sel)-0.5,match(combkp.end.visible,sel)+0.5)
    combkp.inferred=c(match(combkp.start.inferred,sel)-0.5,match(combkp.end.inferred,sel)+0.5)
    if(length(combkp.visible) > 0){for(k in 1:length(combkp.visible)){segments(combkp.visible[k],par("usr")[3],combkp.visible[k],my.ylim[2]-0.3,lwd=ld,lty=2)}}
    if(length(combkp.inferred) > 0){for(k in 1:length(combkp.inferred)){segments(combkp.inferred[k],par("usr")[3],combkp.inferred[k],my.ylim[2]-0.3,lwd=ld,lty=3)}}
    if(nsample > 2){
      for(i in 2:(nsample-1))
      {
        mycol=c("blue","green","yellow","red","purple")[P.chr[i,]+3]  
        plot(P.val.chr[i,],col=mycol,pch=mypch,xlab="",ylab=rownames(P.chr)[i],xaxt="n",cex.axis=cx,cex.lab=cx,cex=cx.points,ylim=my.ylim,bty="n",las=1)
	     cytochrom=which(cytodeb%in%sel)
	     for(k in cytochrom){rect(match(cytodeb[k],sel)-0.5,my.ylim[1],match(cytofin[k],sel)+0.5,my.ylim[1]+0.3,col=colcyto[k])}
        axis(side=1,at=apply(cbind(match(cytodeb[cytochrom],sel),match(cytofin[cytochrom],sel)),1,mean),labels=cytolab[cytochrom],pos=my.ylim[1],las=3,cex.axis=0.8*cx)
        centro=match(which(chr==c & arm=="q")[1],sel)-0.5;arrows(centro,my.ylim[1],centro,my.ylim[1]+0.8,lwd=2*ld,angle=120,length=.15)
        abline(v=combkp.visible,lwd=ld,lty=2);abline(v=combkp.inferred,lwd=ld,lty=3)
      } 	
    }
    par(mai=c(0.5,par()$mai[2],0,par()$mai[4]))
    mycol=c("blue","green","yellow","red","purple")[P.chr[nrow(P.chr),]+3]  
    plot(P.val.chr[nrow(P.chr),],col=mycol,pch=mypch,xlab="",ylab=rownames(P.chr)[nrow(P.chr)],xaxt="n",cex.axis=cx,cex.lab=cx,cex=cx.points,ylim=my.ylim,bty="n",las=1)
	 cytochrom=which(cytodeb%in%sel)
	 for(k in cytochrom){rect(match(cytodeb[k],sel)-0.5,my.ylim[1],match(cytofin[k],sel)+0.5,my.ylim[1]+0.3,col=colcyto[k])}
    axis(side=1,at=apply(cbind(match(cytodeb[cytochrom],sel),match(cytofin[cytochrom],sel)),1,mean),labels=cytolab[cytochrom],pos=my.ylim[1],las=3,cex.axis=0.8*cx)
    centro=match(which(chr==c & arm=="q")[1],sel)-0.5;arrows(centro,my.ylim[1],centro,my.ylim[1]+0.8,lwd=2*ld,angle=120,length=.15)
    if(length(combkp.visible) > 0){for(k in 1:length(combkp.visible)){segments(combkp.visible[k],my.ylim[1]+0.2,combkp.visible[k],par("usr")[4],lwd=ld,lty=2)}}
    if(length(combkp.inferred) > 0){for(k in 1:length(combkp.inferred)){segments(combkp.inferred[k],my.ylim[1]+0.2,combkp.inferred[k],par("usr")[4],lwd=ld,lty=3)}}
  }
  dev.off()
}


####################################################################
##############              MAIN PROGRAM              ##############
####################################################################
# Read probes information 
print("Loading the probes table...")
probes.table=read.csv(Probes.file,as.is=TRUE,fill=FALSE)
chr=probes.table$Chromosome;lobound=probes.table$StartPosition;upbound=probes.table$EndPosition;pos=probes.table$Position;name=probes.table$Name
chr[chr=="X"]=23;chr[chr=="Y"]=24;chr=as.numeric(chr)
if(is.null(pos)){pos=(lobound+upbound)/2}
cyto=probes.table[,grep("Cytoband",colnames(probes.table))[1]]
arm=character(nrow(probes.table))
arm[grep("p",cyto)]="p";arm[grep("q",cyto)]="q"
if("StartCytoband"%in%colnames(probes.table) & "StartCytoband"%in%colnames(probes.table)){cytostart=probes.table$StartCytoband;cytoend=probes.table$EndCytoband}else{cytostart=cytoend=cyto}
print(paste("->","Table of",nrow(probes.table),"probes read."))

# Load the reference dataset
print("Loading the reference data set...")
load(Reference.file)
nref=nrow(ref)
print(paste("->",nref,"reference profiles read."))

# Output files
pat=unlist(strsplit(Prof.file,split=".csv"))[1]
dot.file.segments=paste(pat,"_tree_segments.dot",sep="")
dot.file.cyto=paste(pat,"_tree_cytobands.dot",sep="")
segments.file=paste(pat,"_segments.csv",sep="")
tree.file=paste(pat,"_tree.Rdata",sep="")
bkp.file=paste(pat,"_common_breakpoints.pdf",sep="")
    
# Read the tumors profile table
print("Loading the tumors profiles...")
profile=as.matrix(read.csv(Prof.file,check.names=FALSE,fill=FALSE))
P=profile[,grep("status",colnames(profile))]
P.val=profile[,grep("value",colnames(profile))]
sample.names=unlist(strsplit(colnames(P),".status"))
colnames(P)=sample.names;colnames(P.val)=sample.names
for(i in 1:ncol(P)){P[,i]=as.numeric(P[,i]);P.val[,i]=as.numeric(P.val[,i])}
P=t(P);P.val=t(P.val)
    
# Replace NAs by the value of the closest probe
number.NA=sum(is.na(P))
for(i in 1:nrow(P))
{
  for(j in 1:ncol(P))
  {
    if(is.na(P[i,j])){
      x=which(chr==chr[j] & !is.na(P[i,]))
      P[i,j]=P[i,x[which.min(abs(pos[x]-pos[j]))]]
    }	
  }	
}
print(paste("->",nrow(P),"profiles read."))
    
# Merge breakpoints closer than the threshold th.bkp
print(paste("Merging breakpoints closer than",th.bkp,"probes..."))
P=merge.breakpoints(P,th.bkp,chr)
    
# Segmentation of the tumors profiles
print("Delineating homogenous segments...")
segments.table=segments.delineation(P,chr,pos,name,cytostart,cytoend,lobound,upbound)
S=profile.segmentation(P,segments.table)
print(paste("->",nrow(segments.table),"homogenous segments delineated."))
    
# Generation of the amplitude matrix and calculation of the breakpoints frequencies in the reference data set
A=matrix(nrow=nrow(P),ncol=0);F=c();Fup=list();Fdown=list()
for(c in unique(chr))
{
  indc=which(segments.table$Chromosome==c)
  Schr=as.matrix(S[,indc])
  Bchr=S2B(Schr)
  Achr=B2A(Bchr)
  A=cbind(A,Achr)
  Fup[[c]]=sum(ref[,segments.table$Start.probe.number[indc[1]]] > 0,na.rm=TRUE)/nref
  Fdown[[c]]=sum(ref[,segments.table$Start.probe.number[indc[1]]] < 0,na.rm=TRUE)/nref
  if(length(indc) > 1)
  {
    for(i in 2:length(indc))
    {
      Fup[[c]]=c(Fup[[c]],sum(ref[,segments.table$Start.probe.number[indc[i]]] > ref[,segments.table$End.probe.number[indc[i-1]]],na.rm=TRUE)/nref)
      Fdown[[c]]=c(Fdown[[c]],sum(ref[,segments.table$Start.probe.number[indc[i]]] < ref[,segments.table$End.probe.number[indc[i-1]]],na.rm=TRUE)/nref)
    }
  }
  Fup[[c]]=c(Fup[[c]],sum(ref[,segments.table$End.probe.number[length(indc)]] < 0,na.rm=TRUE)/nref)
  Fdown[[c]]=c(Fdown[[c]],sum(ref[,segments.table$End.probe.number[length(indc)]] > 0,na.rm=TRUE)/nref)
  F=c(F,Fup[[c]],Fdown[[c]])
}

# Reconstruct tumors lineage and the sequence of chromosomal aberrations
print("Reconstructing the tumor progression tree.")
tumors.tree=tree.reconstruction(S,segments.table,Fup,Fdown)
print("-> Tumor progression tree reconstructed.")
          
# Export results
print("Exporting results.")
export2dot(tumors.tree,dot.file.segments,dot.file.cyto) # tree in .dot format
write.csv(cbind(Number=1:nrow(segments.table),segments.table),segments.file,row.names=FALSE,quote=FALSE) # segments.table in .csv format
save(tumors.tree,file=tree.file) # Rdata of the tumors tree   
plotbkp(tumors.tree,segments.table,chr,arm,cyto,P.val,P,bkp.file) # Plot of the profiles and the common breakpoints
print("Done.")