#$Author: sinnwell $
#$Date: 2011/02/25 16:37:23 $
#$Header: /projects/genetics/cvs/cvsroot/mend.err/R/mend.err.q,v 1.2 2011/02/25 16:37:23 sinnwell Exp $
#$Locker:  $
#$Log: mend.err.q,v $
#Revision 1.2  2011/02/25 16:37:23  sinnwell
#long to int, add x.sexcheck, T to TRUE
#
#Revision 1.1.1.1  2011/02/16 14:33:20  sinnwell
#initial for mend.err package
#
#Revision 1.3  2009/02/25 22:13:17  sinnwell
#add keywords
#
#      This program, mend.err(), processes a data frame ped that contains
# multiple pedigrees.  An optional parameter geno may also be passed
# containing the genotypes for each pedigree if these genotypes are not
# already contained in ped.
#
#      Each pedigree in ped will have genotype elimination performed upon
# it, and pedigrees containing Mendelian Errors will be jackknifed to
# determine which members, if any, may be at the root of a Mendelian
# Inconsistency.  At program conclusion, an optional summary is printed
# to display the final results in a tabular fashion.

mend.err <- function(ped,                   # Pedigree data
                     geno=NULL,             # Genotype data (if not in ped)
                     pedigree.column=1,     # Pedigree ID column
                     person.column=2,       # Person ID column
                     labID.column=3,        # Lab ID column
                     father.column=4,       # Father ID column
                     mother.column=5,       # Mother ID column
                     sex.column=6,          # Sex ID column
                     locus.start=7,         # Starting column of the loci
                     locus.end=ncol(ped),   # Ending column of the loci
                     miss.val=c(NA,0),      # Missing Value code
                     male.code=1,           # Male sex code
                     female.code=2,         # Female sex code
                     warn.untyped=FALSE,    # Warn if a locus is untyped
                     check.missing=TRUE,    # Check if all alleles are missing
                     print.no.error=TRUE,   # List pedigrees with no errors
                     print.error=TRUE,      # Print jackknife results
                     print.summary=FALSE,   # Summary of mendelian errors
                     print.num.processed=FALSE, # Print number of pedigrees handled
                     sort.pedigrees=FALSE,  # Process pedigrees in sorted order
                     batch.size=NULL,       # Number of loci to process at once
                     locus.labels=NULL)	    # Optional lables for each locus
{
 # Check if two datasets are used instead of one:
 if(!is.null(geno))
   {
    locus.start <- ncol(ped)+1
    ped <- cbind(ped,geno)
    locus.end <- ncol(ped)
   }

 # Convert to a matrix:
 null.names <- FALSE
 if(is.data.frame(ped))
   {
    if(is.null(names(ped)))
      {
       names(ped) <- 1:ncol(ped)
       null.names <- TRUE
      } 
    ped <- as.matrix.data.frame(ped, justify.format="none")
   }

 # Enumerate the distinct pedigrees and process them in sorted order
 # if requested:
 distinct.pedigrees <- unique(ped[,pedigree.column])
 if(sort.pedigrees)
   distinct.pedigrees <- sort(distinct.pedigrees)

 ret       <- list(NULL)
 ret$n.ped <- 0
 ret$n.err <- 0

 # Use the locus labels specified by the user or the user's data set:
 if(is.null(locus.labels))
   {
    locus.labels <- dimnames(ped)[[2]]
    if(!is.null(locus.labels) && !null.names)
       { 
        locus.labels  <- locus.labels[-c(pedigree.column, person.column, 
                                         labID.column,    father.column,
                                         mother.column,   sex.column)]
        locus.labels  <- locus.labels[as.logical(c(1:length(locus.labels) 
                                                   %% 2))] 
        summary.names <- c("Pedigree", locus.labels)
       }         
    else
       {
       locus.labels   <- 1:((locus.end - locus.start + 1)/2)
       summary.names  <- c("Pedigree", paste("Loc",locus.labels,sep=""))
       }
   }
 else
   {
    summary.names     <- c("Pedigree", locus.labels)
   }

 # Process each pedigree:

 ###################################
 ### Do all loci at once:        ###
 ###################################
 if(is.null(batch.size))
   {
    # Process each pedigree:
    for(pedID in distinct.pedigrees)
      {
       err.tmp <- mend.err.engine(ped = ped[ped[,pedigree.column] == pedID,],
                                  geno=NULL,
                                  pedigree.column=pedigree.column,
                                  person.column=person.column,
                                  labID.column=labID.column,
                                  father.column=father.column,
                                  mother.column=mother.column,
                                  sex.column=sex.column,
                                  locus.start=locus.start,
                                  locus.end=locus.end,
                                  miss.val=miss.val,
                                  male.code=male.code,
                                  female.code=female.code,
                                  warn.untyped=warn.untyped,
                                  check.missing=check.missing,
                                  print.no.error=print.no.error,
                                  print.error=print.error,
                                  print.summary=print.summary,
                                  print.num.processed=print.num.processed,
                                  sort.pedigrees=FALSE,
                                  locus.labels=locus.labels,
                                  summary.names=summary.names)

       # Store return value for this pedigree in the appropriate table:
       if(!is.null(err.tmp$ped.no.err))
          ret$ped.no.err <- rbind(ret$ped.no.err,err.tmp$ped.no.err)

       if(!is.null(err.tmp$ped.err))
          ret$ped.err <- rbind(ret$ped.err,err.tmp$ped.err)

       if(!is.null(err.tmp$ped.missing))
          ret$ped.missing <- rbind(ret$ped.missing,err.tmp$ped.missing)

       # Increment number of pedigrees processed and number of erroneous
       # pedigrees:
       ret$n.ped <- ret$n.ped + 1
       ret$n.err <- ret$n.err + err.tmp$n.err
       
      } # end for(pedID)               ###
   }   ## end if(is.null(batch.size))  ###

 ###################################
 ### Do batch.size loci at once: ###
 ###################################
 if(!is.null(batch.size))
  {
   nLoci <- (locus.end - locus.start + 1)/2
   # Check for illegal batch size:
   if(batch.size <= 0)
     {
      warning("A positive integer batch.size must be specified")
      return(NULL)
     }
   # Check for batch.size > nLoci:
   if(batch.size > nLoci)
     batch.size <- nLoci

   # Columns of the pedigree:
   columns <- c(pedigree.column, person.column, father.column,   
                mother.column,   sex.column,    labID.column)

   # Reorder ped to have pedigree-genotypes order:
   ped <- ped[,c(columns,locus.start:locus.end)]

   # Account for both a present and missing lab ID column:
   if(is.null(labID.column))
     {
      locus.start  <- 6
      locus.end    <- ncol(ped)
      columns      <- 1:5
     }
   else
     {
      labID.column <- 6
      locus.start  <- 7
      locus.end    <- ncol(ped)
      columns      <- 1:6
     }

   # Set up quot and rem such that nLoci = batch.size*quot + rem:
   quot  <- nLoci %/% batch.size
   rem   <- nLoci %%  batch.size

   # Process each pedigree:
   for(pedID in distinct.pedigrees)
     {
      # Starting column in ped of the batch of loci being processed:
      start            <- locus.start

      # Index used for passing batches of locus labels and summary names
      # to mend.err.engine:
      start.labels <- 1
      
      # Variables and flags to keep track of the return value for 
      # an entire pedigree:
      ret.tmp          <- pedID
      ped.no.err.flag  <- TRUE
      ped.err.flag     <- FALSE
      ped.missing.flag <- TRUE

      # Process the quot batches of loci of size batch.size that we have:
      for(i in 1:quot)
        {
         # Output the pedigree and loci being processed:
         cat("\n#################################\n")
         cat("#################################\n")          
         cat(paste("PEDIGREE ",
                   pedID,
                   ":\n",sep=""))
         if(batch.size==1) cat(paste("Locus ", locus.labels[i], ":\n",sep=""))
         else cat(paste("Loci ", locus.labels[(i-1)*batch.size + 1], 
                        "-", locus.labels[i*batch.size],":\n",sep=""))

         # loci contains the columns of ped belonging to the current batch: 
         loci    <- start:(start+2*batch.size-1)
         
         # Call
         err.tmp <- mend.err.engine(ped = ped[ped[,1] == pedID,
                                              c(columns,loci)],
                                    geno=NULL,
                                    pedigree.column=1,
                                    person.column=2,
                                    labID.column=labID.column,
                                    father.column=3,
                                    mother.column=4,
                                    sex.column=5,
                                    locus.start=locus.start,
                                    locus.end=locus.start + 2*batch.size - 1,
                                    miss.val=miss.val,
                                    male.code=male.code,
                                    female.code=female.code,
                                    warn.untyped=warn.untyped,
                                    check.missing=check.missing,
                                    print.no.error=print.no.error,
                                    print.error=print.error,
                                    print.summary=print.summary,
                                    print.num.processed=print.num.processed,
                                    sort.pedigrees=FALSE,
                                    locus.labels=locus.labels[
                                                 start.labels:
                                                 (start.labels+batch.size-1)
                                                             ],
                                    summary.names=summary.names[
                                                  c(1,
                                                   (start.labels+1):
                                                   (start.labels+batch.size))
                                                                ]
                                    )

         # Concatenate partial results and update state flags that will
         # eventually tell which table the total result belongs in:
         if(!is.null(err.tmp$ped.no.err))
           {
            ret.tmp             <- cbind(ret.tmp,t(rep(0,batch.size)))
            ped.no.err.flag     <- ped.no.err.flag && TRUE
            ped.missing.flag    <- FALSE
           }
         if(!is.null(err.tmp$ped.err))
           {
            ret.tmp             <- cbind(ret.tmp,as.matrix(err.tmp$ped.err[-1]))
            ped.err.flag        <- TRUE
            ped.no.err.flag     <- FALSE
            ped.missing.flag    <- FALSE
           }
         if(!is.null(err.tmp$ped.missing))
           {
            ret.tmp             <- cbind(ret.tmp,t(rep(0,batch.size)))
            ped.missing.flag    <- ped.missing.flag && TRUE
           }
         
         # Update start to the next batch of loci (2 columns per locus):
         start <- start + 2*batch.size

         # Update start.labels to the next batch of labels (1 entry per locus):
         start.labels <- start.labels + batch.size
         
        } # for(i) ###

      # Process remaining loci (sub-batch of size rem):
      if(rem > 0)
        {
         # Output the pedigree and loci being processed:          
         cat("\n#################################\n")
         cat("#################################\n")          
         cat(paste("PEDIGREE ",
                   pedID,
                   ":\n",sep=""))
         if(rem > 1)
             cat(paste("Loci ",
                      locus.labels[nLoci-rem+1],
                      "-",
                      locus.labels[nLoci],
                      ":\n",sep=""))
         else
             cat(paste("Locus ",
                      locus.labels[nLoci],
                      ":\n",sep=""))


         loci <- (locus.end - 2*rem+1):locus.end
         err.tmp <- mend.err.engine(ped = ped[ped[,1] == pedID,
                                              c(columns,loci)],
                                    geno=NULL,
                                    pedigree.column=1,
                                    person.column=2,
                                    labID.column=labID.column,
                                    father.column=3,
                                    mother.column=4,
                                    sex.column=5,
                                    locus.start=locus.start,
                                    locus.end=locus.start + 2*rem - 1,
                                    miss.val=miss.val,
                                    male.code=male.code,
                                    female.code=female.code,
                                    warn.untyped=warn.untyped,
                                    check.missing=check.missing,
                                    print.no.error=print.no.error,
                                    print.error=print.error,
                                    print.summary=print.summary,
                                    print.num.processed=print.num.processed,
                                    sort.pedigrees=FALSE,
                                    locus.labels=locus.labels[
                                                 start.labels:
                                                 (start.labels+rem-1)
                                                             ],
                                    summary.names=summary.names[
                                                  c(1,
                                                   (start.labels+1):
                                                   (start.labels+rem))
                                                               ]
                                    )

         # Concatenate partial results and update state flags that will
         # eventually tell which table the total result belongs in:         
         if(!is.null(err.tmp$ped.no.err))
           {
            ret.tmp <- cbind(ret.tmp,t(rep(0,rem)))
            ped.no.err.flag      <- ped.no.err.flag && TRUE
            ped.missing.flag     <- FALSE
           }
         if(!is.null(err.tmp$ped.err))
           {
            ret.tmp <- cbind(ret.tmp,as.matrix(err.tmp$ped.err[-1]))
            ped.err.flag         <- TRUE
            ped.no.err.flag      <- FALSE
            ped.missing.flag     <- FALSE
           }
         if(!is.null(err.tmp$ped.missing))
           {
            ret.tmp <- cbind(ret.tmp,t(rep(0,rem)))
            ped.missing.flag     <- ped.missing.flag && TRUE
           }
        } # end if(rem > 0) ###

      # Add this pedigree to the appropriate return value slot:  
      if(ped.no.err.flag && !ped.missing.flag)
         ret$ped.no.err <- rbind(ret$ped.no.err,ret.tmp)

      if(ped.err.flag)
        {
         ret$ped.err <- rbind(ret$ped.err,ret.tmp)
         ret$n.err <- ret$n.err + 1
        }

      if(ped.missing.flag)
         ret$ped.missing <- rbind(ret$ped.missing,ret.tmp[1])

      ret$n.ped <- ret$n.ped + 1
     } # end for(pedID) ###

      # Rename the loci so that they number loc1,loc2,...,locN:
#      summary.names <- c("Pedigree",paste("Loc",1:nLoci,sep=""))
      old.options                   <- options(warn=-1)
      on.exit(options(old.options))
      if(!is.null(ret$ped.no.err))
        {
         # Pedigrees without Mendelian Errors:
         ped.no.err            <- data.frame(ret$ped.no.err)
         labels.ped            <- list(1:nrow(ped.no.err),summary.names)
         dimnames(ped.no.err)  <- labels.ped
         ret$ped.no.err        <- ped.no.err
        }
      if(!is.null(ret$ped.err))
        {
         # Pedigrees with Mendelian Errors:        
         ped.err               <- data.frame(ret$ped.err)
         labels.ped            <- list(1:nrow(ped.err),summary.names)
         dimnames(ped.err)     <- labels.ped
         ret$ped.err           <- ped.err
        }
      if(!is.null(ret$ped.missing))
        {
         # Pedigrees with no allele data:        
         ped.missing           <- data.frame(ret$ped.missing)
         labels.ped            <- list(1:nrow(ped.missing),"Pedigree")
         dimnames(ped.missing) <- labels.ped
         ret$ped.missing       <- ped.missing
        }
      options(old.options)
  } # end if(!is.null(batch.size)) ###

 if(exists("is.R") && is.function(is.R) && is.R())
    class(ret) <- "mend.err"   
 else
    oldClass(ret) <- "mend.err"

 return(ret)
}
