#$Author: sinnwell $
#$Date: 2011/02/25 16:37:23 $
#$Header: /projects/genetics/cvs/cvsroot/mend.err/R/sge.q,v 1.3 2011/02/25 16:37:23 sinnwell Exp $
#$Locker:  $
#$Log: sge.q,v $
#Revision 1.3  2011/02/25 16:37:23  sinnwell
#long to int, add x.sexcheck, T to TRUE
#
#Revision 1.2  2011/02/16 16:40:00  sinnwell
#fixed a small bug in sge.q, small fixes to c code, but still error
#
#Revision 1.1.1.1  2011/02/16 14:33:20  sinnwell
#initial for mend.err package
#
#Revision 1.4  2009/03/02 22:28:57  sinnwell
#add PACKAGE to .C
#
#Revision 1.3  2009/02/25 22:13:49  sinnwell
#add keywords
#
sge <- function(ped,                # Data frame containing pedigree data
                geno,               # Data frame containing genotypes
                miss.val = c(NA,0), # Missing value codes 
                male.code = 1,      # Male sex code
                female.code = 2,    # Female sex code2
                pedigree.column=1,  # Pedigree ID column
                person.column=2,    # Person ID column
                labID.column=NULL,  # Lab ID column
                father.column=3,    # Father ID column
                mother.column=4,    # Mother ID column
                sex.column=5,       # Sex ID column
                warn.untyped=TRUE,     # Warn if a locus is untyped
                sort.loci = TRUE,      # Sort loci into lexicographic order
                print.stdio = FALSE,    # Useful for debugging
                locus.labels=NULL)  # Optional lables for each locus
{        
  pedigree <- ped[,pedigree.column]
  person   <- ped[,person.column]
  father   <- ped[,father.column]
  mother   <- ped[,mother.column]
  sex      <- ped[,sex.column]
  
  # Proper sex not specified.
  if(any((sex != male.code) & (sex != female.code)))
    warning("One or more subjects do not have there sex correctly specified.")
  
  if(is.factor(sex))
    {
     sex <- as.character(sex)
    }

  # Unspecified sex defaults to female
  sex[(sex != male.code) & (sex != female.code)] <- 0

  # Match male to sex code 1, and females to sex code 0
  sex[sex == male.code] <-   1
  sex[sex == female.code] <- 0

  # We will well-order the person, father, and mother vectors by considering
  # the entries of the person vector to be an ordering and then mapping this
  # ordering to 1,2,...,n where n = length(person):
    
  if(is.factor(person))
    {
     # Convert factors to characters
     person  <- as.character(person)
     father  <- as.character(father)
     mother  <- as.character(mother)
     
     # All the codes involved in the pedigree should be present the in
     # person array:
     lev <- unique(person)
     IDcodes <- as.numeric(factor(c(father,mother,person),
                                  levels=lev,exclude=miss.val))
     IDcodes[is.na(IDcodes)] <- 0
    }  
  else
    {     
     # All the codes involved in the pedigree should be present the in
     # person array:
     lev     <- unique(person)
     IDcodes <- as.numeric(factor(c(father,mother,person),
                                  levels=lev,exclude=miss.val))
     IDcodes[is.na(IDcodes)] <- 0
   }
       
  n              <- length(IDcodes)
  n              <- n/3
  father         <- IDcodes[1:n]
  mother         <- IDcodes[(n+1):(2*n)]
  person         <- IDcodes[(2*n+1):(3*n)]

  # Figure out the number of loci that we have, and set up some initial
  # stubs:
  
  nAllele         <- integer(0)
  nLoci           <- (ncol(geno)) / 2
  loci            <- 1:nLoci
  pheno           <- NULL
  phenoCats       <- list(NULL)
  genoMaps        <- list(NULL)
  lengthPhenoCats <- NULL
  lengthGenoMaps  <- NULL
  genoSize        <- 0
  k1              <- 1
  k2              <- 1
  allele.labels   <- list(NULL)
  loci.used       <- NULL

  i               <- 1
  for(j in loci)
    {
     tmp1 <- geno[,2*j-1]
     tmp2 <- geno[,2*j]
     loc  <- locus(allele1=tmp1, allele2=tmp2, 
                   male.code=male.code, female.code=female.code,
                   miss.val=miss.val)
     
     tmp  <- c(loc[,1],loc[,2])
     if(any(tmp[!is.na(tmp)] == 0))
       tmp[!is.na(tmp)] <- tmp[!is.na(tmp)] + 1
     
     tmp[is.na(tmp)] <- 0
     tmp1 <- tmp[1:n]
     tmp2 <- tmp[(n+1):(2*n)]

     # a1 < a2
     a1   <- ifelse(tmp1 < tmp2, tmp1, tmp2)
     a2   <- ifelse(tmp1 < tmp2, tmp2, tmp1)
     a1a2 <- cbind(a1,a2)
     
     # Missing two alleles:
     missingTwo  <-  a2 == 0

     # The following is a placeholder to prevent segmentation faults. It will
     # need to be replaced by a smarter solution when one is devised.
     if(all(missingTwo))
       {
        if(warn.untyped)
          {
           msg <- paste("All sujects in pedigree ",pedigree[1],
                        " are untyped at locus ",j,
                        ".\nNo genotype elimination is possible at ",
                        "this locus.\n",
                     sep="")
           warning(msg)
          }
        next
       }

     loci.used <- c(loci.used,j)
     ##allele.labels[[i]] <- loc@allele.labels
     allele.labels[[i]] <- attributes(loc)$allele.labels
     allele.labels[[i]][length(allele.labels[[i]]) + 1] <-"-1"

     # Tells if any subjects are missing two alleles:
     if(any(missingTwo)) m2 <- TRUE else m2 <- FALSE
         
     missingOne  <- (a1 == 0) & (!(a2 == 0))
     # Tells if any subjects are missing one allele:                        
     if(any(missingOne)) m1 <- TRUE else m1 <- FALSE
                       
     missingZero <- (!missingOne) & (!missingTwo)
     # Tells if any subjects are missing zero alleles:
     if(any(missingZero)) mz <- TRUE else mz <- FALSE
   
     if(all(missingZero))
        { nAllele[i] <- length(unique(c(a1,a2))) }
     else
        { nAllele[i] <- length(unique(c(a1,a2))) - 1 }

     phenoTmp <- gcode(a1,a2)

     if(m1)
       {
        # maxmzp1 means maximum missing zero plus 1
        maxmzp1 = max(phenoTmp) + 1
        phenoTmp[missingOne] = phenoTmp[missingOne] + maxmzp1 
       }

     # The genotypes now all have unique phenotype numbers
     lev      <- unique(phenoTmp)
     phenoTmp <- as.numeric(factor(phenoTmp, levels=lev)) - 1
     pheno    <- cbind(pheno, phenoTmp)

     # Form k-by-2 and k-by-3 matrices with column one containing the phenotype 
     # code and the remaining columns containing the allele codes:

     # Missing zero alleles:
                       
     if(mz)
       {
        strictGenotypes  <- cbind(pheno[missingZero, i], 
                                  matrix(a1a2[missingZero,], ncol=2))

        if(is.vector(strictGenotypes)) 
          strictGenotypes <- matrix(strictGenotypes, nrow=1)
       }

     # Missing one allele:
                       
     if(m1)
       {
        halfGenotypes    <- cbind(pheno[missingOne, i], a2[missingOne])
        if(is.vector(halfGenotypes)) 
          halfGenotypes <- matrix(halfGenotypes, nrow=1)
       }

     # Missing two alleles:
                       
     if(m2)
       {
        missingGenotypes <- cbind(pheno[missingTwo, i], 
                                  matrix(a1a2[missingTwo,], ncol=2))
        if(is.vector(missingGenotypes)) 
          missingGenotypes <- matrix(missingGenotypes, nrow=1)
       }         

     # Need to subset the above three matrices using column 1 as a key and 
     # finding the subset of rows with unique entries in column 1:

     # Missing zero alleles:
     
     if(mz)
       {
        index <- match(unique(strictGenotypes[,1]), strictGenotypes[,1])
        strictGenotypes <- strictGenotypes[index,]
        if(is.vector(strictGenotypes)) 
          strictGenotypes <- matrix(strictGenotypes, nrow=1)
       }

     # Missing one allele:
     
     if(m1)
       {
        index <- match(unique(halfGenotypes[,1]), halfGenotypes[,1])
        halfGenotypes <- halfGenotypes[index,]
        if(is.vector(halfGenotypes)) 
          halfGenotypes <- matrix(halfGenotypes, nrow=1)
       }

     # Missing two alleles:

     if(m2)
       {
        index <- match(unique(missingGenotypes[,1]), missingGenotypes[,1])
        missingGenotypes <- missingGenotypes[index,]
        if(is.vector(missingGenotypes)) 
          missingGenotypes <- matrix(missingGenotypes, nrow=1)
       }

     phenoCats[[k1]]  <- c(if(m2) missingGenotypes[,1],
                          if(m1) halfGenotypes[,1],
                          if(mz) strictGenotypes[,1])
   
     lengthPhenoCats[i] <- length(phenoCats[[k1]])
     k1 <- k1 + 1

     # Form the genotype mappings:

     # Missing two alleles:

     if(m2)
       {
        alleles <- 1:nAllele[i]
        tmp <- expand.grid(alleles,alleles)
        t1 <- tmp[,2]
        t2 <- tmp[,1]
        tmp <- cbind(t1,t2)
        tmp <- tmp[tmp[,1] <= tmp[,2],]
        genoMaps[[k2]] <- as.vector(t(tmp))
        lengthGenoMaps[[k2]] <- length(genoMaps[[k2]])
        k2 <- k2 + 1
       }

     # Missing one allele:

     if(m1)
       {
        alleles <- 1:nAllele[i]
        tmp <- expand.grid(alleles, halfGenotypes[,2])
        genoMaps[[k2]] <- as.vector(t(tmp))
        lengthGenoMaps[[k2]] <- list(rep(2*nAllele[i],
                                     length(halfGenotypes[,1])))
        k2 <- k2 + 1
       }

     # Missing zero alleles:

     if(mz)
       {
        tmp <- strictGenotypes[,2:3]
        genoMaps[[k2]] <- as.vector(t(tmp))
        lengthGenoMaps[[k2]] <- list(rep(2,length(strictGenotypes[,1])))
        k2 <- k2 + 1
       }
    # Size of the memory passed to C for return size:

    genoSize <- genoSize +
                2*sum(as.numeric(missingZero)) +
                2*nAllele[i]*sum(as.numeric(missingOne)) +
                nAllele[i] * (nAllele[i]+1) * sum(as.numeric(missingTwo))

    i <- i + 1
    }

  # Transform the arguments to vectors for passage to the C program
  # "genotype_elimination:
  
  pheno            <- as.vector(pheno)
  phenoCats        <- unlist(phenoCats)
  nLengthPhenoCats <- length(lengthPhenoCats)

  genoMaps         <- unlist(genoMaps)
  lengthGenoMaps   <- unlist(lengthGenoMaps)
  nLengthGenoMaps  <- length(lengthGenoMaps)

  flags            <- c(sort.loci, print.stdio)
  nLoci            <- length(loci.used)

  ans <- 
  .C("genotype_elimination",
     as.integer(person),
     as.integer(father),
     as.integer(mother),
     as.integer(sex),
     as.integer(n),
     as.integer(nLoci),
     as.integer(nAllele),
     as.integer(pheno),
     as.integer(phenoCats),
     as.integer(lengthPhenoCats),
     as.integer(nLengthPhenoCats),
     as.integer(genoMaps),
     as.integer(lengthGenoMaps),
     as.integer(nLengthGenoMaps),
     as.integer(flags),
     err.locus    = as.integer(integer(nLoci)),
     err.relation = as.integer(integer(nLoci)),
     err.spouse   = as.integer(integer(nLoci)),
     err.fatal    = integer(1),
     returnIndex  = as.integer(integer(n*nLoci)),
     returnValue  = as.integer(integer(genoSize)),
     PACKAGE="mend.err")

  if(ans$err.fatal)
    { # Got a memory-related error:
      msg <- paste("Fatal program error detected.  Try decreasing batch.size ",
                   "or checking the\npedigree format of your data.  If the ",
                   "problem persists, please report\nthis error.\n", sep="")
      stop(msg)
    }
     
  genoRet <- list(NULL)
  x = 0;

  for(i in 1:(n*nLoci))
    {
     genoRet[[i]] <- ans$returnValue[(x+1):(x+ans$returnIndex[i])]
     x <- x + ans$returnIndex[i]
    }

  ret <- list(list(NULL))
  names(ret) <- "person"
  
  for(i in 1:n)
    {
     ret$person[[i]] <- list(genoRet[((i-1)*nLoci+1):(i*nLoci)])
     names(ret$person[[i]]) <-"locus"
    }

  names(ret$person)   <- person
  ret$n               <- n
  ret$nLoci           <- nLoci
  ret$loci.used       <- loci.used
  ret$genoLengths     <- ans$returnIndex
  ret$ped             <- ped
  ret$geno            <- geno
  ret$miss.val        <- miss.val
  ret$male.code       <- male.code
  ret$female.code     <- female.code
  ret$pedigree.column <- pedigree.column
  ret$person.column   <- person.column
  ret$labID.column    <- labID.column
  ret$father.column   <- father.column
  ret$mother.column   <- mother.column
  ret$sex.column      <- sex.column
  ret$allele.labels   <- allele.labels
  ret$locus.labels    <- locus.labels
  ret$err.locus       <- ans$err.locus
  ret$err.relation    <- ans$err.relation
  ret$err.spouse      <- ans$err.spouse
 
if(exists("is.R") && is.function(is.R) && is.R())
   class(ret) <- "sge"   
else
   oldClass(ret) <- "sge"

  return(ret)
 }
  
