%- $Author: sinnwell $
%- $Date: 2011/02/25 16:37:23 $
%- $Header: /projects/genetics/cvs/cvsroot/mend.err/man/mend.err.Rd,v 1.3 2011/02/25 16:37:23 sinnwell Exp $
%- $Locker:  $
% 
% $Log: mend.err.Rd,v $
% Revision 1.3  2011/02/25 16:37:23  sinnwell
% long to int, add x.sexcheck, T to TRUE
%
% Revision 1.2  2011/02/16 15:08:20  sinnwell
% add keywords and Folie test suite
%
% 
\name{mend.err}
\alias{mend.err}
\alias{print.mend.err}
\alias{mend.err.engine}
\alias{sge.jackknife}
\alias{sge}
\title{
Check for Mendelian Errors for Pedigree Data  
}
\description{
The mend.err() program checks pedigrees for Mendelian Errors and, when
errors are found, systematically jackknifes every typed pedigree member
to determine if eliminating this member will remove all Mendelian 
Errors from the pedigree.
}
\usage{
mend.err(ped,                   geno=NULL,
         pedigree.column=1,     person.column=2,
         labID.column=3,        father.column=4,
         mother.column=5,       sex.column=6,
         locus.start=7,         locus.end=ncol(ped),
         miss.val=c(NA, 0),     male.code=1,
         female.code=2,         warn.untyped=FALSE,
         check.missing=TRUE,       print.no.error=TRUE,
         print.error=TRUE,         print.summary=TRUE,
         print.num.processed=TRUE, sort.pedigree=FALSE,
         batch.size=NULL,       locus.labels=NULL)
}
\arguments{
\item{ped}{
A data.frame or matrix containing the pedigree data.  This data consists
of columns that give the pedigree ID, person ID, lab ID, father ID, 
mother ID, and sex classification.  All of these columns are required 
with the exception of the column listing the lab ID's, which is optional 
(SEE labID.column BELOW).   An arbitrary number of columns containing 
genotype data may also be included in ped, with two columns used per 
locus.  The only requirement is that these genotype columns must be 
consecutive (such as columns 7-12).  If the genotype data is included 
in the value passed to ped, then the argument geno described below must 
be left as NULL.  Currently, only pedigrees without loops that contain a
single pair of ancestral founders are supported.
}
\item{geno}{
A data.frame or matrix containing genotype data (two columns of alleles
per locus). Each row in geno must match each row in ped in a one-to-one 
fashion or erroneous results will be obtained.  If ped contains genotype
data, geno should  be left as NULL.
}
\item{pedigree.column }{
The column index of the pedigree ID column in ped.
}
\item{person.column }{
The column index of the person ID column in ped.
}
\item{labID.column }{
The column index of the lab ID column in ped.  If the pedigree value
assigned to ped does not contain a lab ID column, then labID.column
should be set equal to NULL.
}
\item{father.column }{
The column index of the father ID column in ped.
}
\item{mother.column }{
The column index of the mother ID column in ped.
}
\item{sex.column }{
The column index of the sex classification column in ped.
}
\item{locus.start }{
The column index of the first column in ped that comprises the genotype
data.  If the genotype data is contained in geno rather than ped, the
locus.start value does not need to be altered by the user.
}
\item{locus.end }{
The column index of the last column in ped that comprises the genotype
data.  If the genotype data is contained in geno rather than ped, the
locus.end value does not need to be altered by the user.
}
\item{miss.val }{
Codes that denote missing values in the pedigree data given by ped and
by geno.
}
\item{male.code }{
The code that denotes a sex classification of "male".
}
\item{female.code }{
The code that denotes a sex classification of "female".
}
\item{warn.untyped }{
A logical variable indicating whether a warning should be issued when
all pedigree members are unknown at a locus.  If a locus is unknown for 
all pedigree members, specifying warn.untyped=TRUE will result in a warning
being issued that displays which locus is unknown and in which pedigree 
it resides.  It is recommended that warn.untyped be left as FALSE, since 
unknown loci will be handled appropriately and setting warn.untyped to TRUE
could result in a large number of spurious warnings - especially when 
mend.err() is incorporated into another program or an automated 
environment.
}
\item{check.missing }{
A logical variable indicating whether each pedigree should be be checked
for all pedigree members being unknown at all loci.  If check.missing is
specified as FALSE, the user is responsible for ensuring that degenerate 
pedigrees are not passed to mend.err().
}
\item{print.no.error }{
A logical variable indicating whether or not a message should be 
displayed stating that a processed pedigree contained no Mendelian 
Errors.
}
\item{print.error }{
A logical variable that indicates whether the results obtained by 
jackknifing pedigrees with Mendelian Errors should be displayed.
}
\item{print.summary }{
A logical variable that indicates whether summary tables should be 
printed that list the pedigrees with no Mendelian Errors, pedigrees 
with Mendelian Errors, and pedigrees that are missing all genotype 
data.
}
\item{print.num.processed }{
A logical variable that indicates whether the number of erroneous 
pedigrees found and the total number of pedigrees processed should  
be displayed upon program exit.
}
\item{sort.pedigree }{
A logical variable that indicates whether pedigrees should be processed
in sorted order.  If sort.pedigrees=TRUE, the pedigrees are sorted based 
upon their pedigree ID's before they are processed and the results are
displayed in sorted order.  If sort.pedigrees=FALSE, the pedigree are 
processed in the order in which they appear in ped, and the results are
also displayed as such.
}
\item{batch.size }{
An integer variable that indicates the number of loci that should be
processed at a time.  The default value of NULL causes all loci to be
handled at once.  Usually, it is desirable to process all loci at one
time, since this leads to more intuitive program output.  But in situ-
ations where computer resources such as memory are constrained, setting
batch.size to a value smaller than the total number of loci will allow
larger pedigree data to be processed.
}
\item{locus.labels}{
A vector of labels that will be used to display the loci at which 
Mendelian Errors are detected.  This vector should be of length equal
to one-half the number of allele columns in the data set since locus
values consist of a pair of genotypes.  If locus.labels is left as NULL,
then the column labels of the allele columns will be used for locus display
purposes under the convention that the label of the first of two allele
columns denoting a locus will be used as the label.  If the data set has no
column labels, then the loci will be labeled using the first k positive 
integers on jack-knife result printouts, k being the number of loci present
in the data, and using Loc1, Loc2,..., Lock on the summary tables returned
by the mend.err program. 
}
}
\value{
The return value of mend.err() is an object of class "mend.err".  This
object is primarily a list consisting of the following list elements:

  ped.no.err    : A data.frame listing the pedigrees with no Mendelian
                  Errors.

  ped.err       : A data.frame listing the pedigrees with Mendelian
                  Errors and a printout of the loci with an indicator
                  as to which loci do and do not have Mendelian Errors.

  ped.missing   : A data.frame listing any pedigrees that were passed to
                  mend.err() with no genotype/marker data.

  n.ped         : An integer giving the number of pedigrees processed, 
                  including pedigrees with no genotype/marker data.

  n.err         : An integer giving the total number of pedigrees that
                  were found to have Mendelian Errors.
}
\section{Side Effects}{
A verbose output detailing which pedigrees (and which loci in these 
pedigrees) contain Mendelian Errors and which do not is output to the 
screen.  For pedigrees with Mendelian Errors, a listing of pedigree
members whose jackknifing eliminates these errors will also be listed.
At the conclusion of mend.err(), several tables are displayed giving 
an overall picture of which pedigrees and loci have genetic 
inconsistencies.  The verbosity of this output may be controlled by
the arguments print.no.error, print.error, and print.summary (SEE ABOVE).
}
\details{

}
\section{Notes and warnings}{
1.  The mend.err program should handle both character and numeric values
for all fields (columns) in the data specified as the "ped" and "geno" 
arguments.  It would be advisable, though, to have the person, father,
and mother fields be all one type: either all numeric or all character.

2.  If your data contains character fields and you request that pedigrees
be processed in sorted order by setting sort.pedigree=TRUE, then the pedigree
ID will be sorted in lexicographic order, commonly referred to as alpha-
betical or dictionary order.  Therefore, pedigrees with pedigree ID values
that appear to be numeric may not be processed in the expected numeric
ordering when other data fields are of type character.

3.  Mend.err places no hard limits on the length of the numeric or
character data that may be used in the fields in your data.  The only
limitations are the limitations inherent in the S-Plus data modeling
environment, and these limitations are quite generous.

4.  You may use more than one code for missing values in your data.  For 
instance, you may use all four of 0, -2, NA, and "missing" as missing 
values in your data set, but don't use a value as a missing value code 
in one field that is not considered a missing value in other fields.  As 
an example, if you use 0 as a missing value code in the allele fields, 
but 0 is the numeric label of an actual person in the person field, then 
this may potentially cause problems.  Also, the four missing value codes 
that were just specified of  0, -2, NA, and "missing" were arbitrary.  
Any code may be used to denote missing values.

5.  A commonly occurring code used to specify missing allele/marker data
is 0, but when single nucleotide polymorphisms (SNP's) are being analyzed,
the values used to represent a typed subject's allele data are often 0 
and 1.  Thus, zero is frequently not intended to denote a missing value when
processing SNP's.  Therefore, when SNP's are present in pedigree data, 
exercise caution to avoid the accidental use of zero as a missing value code.

6.  Mend.err has no predefined limits set upon how much data may be processed
at each invocation of the program - everything scales dynamically at runtime
to handle the amount of data specified by the user.  Unfortunately, computer
hardware does not come equipped with an infinite supply of memory.  It is
therefore possible to exhaust the total amount of memory that a computer 
system will allocate to mend.err, causing the program to terminate before 
successfully completing (this is usually referred to in colloquial terms 
as "crash badly").  The amount of data that will cause the preceding 
condition is highly platform dependent, and it is therefore not possible
to give a general estimate of how much data is too much.  This warning is
not meant as an urgent alert, since it usually takes a very significant amount
of pedigree data to exhaust all available memory.  But if mend.err fails to
run on large volumes of data, it would be advisable to try partitioning one 
large data set into several smaller data sets and using separate runs of 
mend.err on each of these smaller data sets.

7.  To use mend.err to process SAS data sets, please read the sas.get() 
help file in the S-Plus online help system.
}
\section{References}{
[1] Jeffrey R. O'Connell and Daniel E. Weeks.  An Optimal Algorithm for 
    Automatic Genotype Elimination.  "American Journal of Human Genetics,"
    65:1733-1740, 1999.

[2] Kenneth Lange and Tushar Madhu Goradia.  An Algorithm for Automatic
    Genotype Elimination.  "American Journal of Human Genetics," 
    40:250-256, 1987.
}
\seealso{
\code{\link{locus}}
\code{\link{gcode}}
\code{\link{sas.get}}
}
\examples{
data("famMendErr")

### FAM1: THREE LOCI, NO ERRORS: ###
fam1 <- famMendErr[famMendErr$Pedigree==1001,]
fam1
#  Pedigree Person LabID Father Mother Sex a1.1 a2.1 a1.2 a2.2 a1.3 a2.3 
#1     1001      1    11      0      0   1    0    0    1    1    1    0
#2     1001      2    12      0      0   2    0    0    1    2    2    0
#3     1001      3    13      1      2   1    1    2    1    2    3    0
#4     1001      4    14      1      2   1    1    0    1    1    4    0

err1 <- mend.err(fam1)

# PEDIGREE 1001:  No Mendelian Errors detected.

print(err1)

#########################################################################
######################## MENDELIAN ERROR RESULTS ########################
#########################################################################

#<0 = No Mendelian Error, 1 = Mendelian Error>:
#
#The following pedigrees had no Mendelian Errors:
#
#  Pedigree a1.1 a1.2 a1.3 
#1     1001    0    0    0
#
#No pedigrees contained Mendelian Errors.
#A total of 1 pedigree was processed.


### FAM2: THREE LOCI, ONE ERROR: ###
fam2 <- famMendErr[famMendErr$Pedigree==1002,]
fam2
#  Pedigree Person LabID Father Mother Sex a1.1 a2.1 a1.2 a2.2 a1.3 a2.3 
#1     1002      1    11      0      0   1    0    0    2    2    1    0
#2     1002      2    12      0      0   2    0    0    2    2    2    0
#3     1002      3    13      1      2   1    1    2    1    1    3    0
#4     1002      4    14      1      2   1    1    0    1    2    4    0

err2 <- mend.err(fam2)


#PEDIGREE 1002:
#
#At the following loci in pedigree 1002, jackknifing one of the 
#listed subjects will eliminate Mendelian Errors:
#
########################### LOCUS a1.2 ###########################
#Locus a1.2:
#No single subject can be jackknifed to eliminate Mendelian Errors in
#pedigree 1002.

err2

#########################################################################
######################## MENDELIAN ERROR RESULTS ########################
#########################################################################
#
#<0 = No Mendelian Error, 1 = Mendelian Error>:
#
#The following pedigrees had Mendelian Errors:
#
#  Pedigree a1.1 a1.2 a1.3 
#1     1002    0    1    0
#
#There was 1 pedigree with Mendelian Errors.
#A total of 1 pedigree was processed.
#

 ### FAM3: THREE LOCI, THREE ERRORS: ###
fam3 <- famMendErr[famMendErr$Pedigree==1003,]
fam3
#  Pedigree Person LabID Father Mother Sex a1.1 a2.1 a1.2 a2.2 a1.3 a2.3 
#1     1003      1    11      0      0   1    2    2    2    2    2    2
#2     1003      2    12      0      0   2    2    2    2    2    2    2
#3     1003      3    13      1      2   1    1    1    1    1    1    1
#4     1003      4    14      1      2   1    1    2    1    2    1    2

err3 <- mend.err(fam3)


# PEDIGREE 1003:
#
# At the following loci in pedigree 1003, jackknifing one of the 
# listed subjects will eliminate Mendelian Errors:
#
########################### LOCUS a1.1 ###########################
#Locus a1.1:
#No single subject can be jackknifed to eliminate Mendelian Errors in
#pedigree 1003.
#
########################### LOCUS a1.2 ###########################
#Locus a1.2:
#No single subject can be jackknifed to eliminate Mendelian Errors in
#pedigree 1003.
#
########################### LOCUS a1.3 ###########################
#Locus a1.3:
#No single subject can be jackknifed to eliminate Mendelian Errors in
#pedigree 1003.

err3

#########################################################################
######################## MENDELIAN ERROR RESULTS ########################
#########################################################################
#
#<0 = No Mendelian Error, 1 = Mendelian Error>:
#
#The following pedigrees had Mendelian Errors:
#
#  Pedigree a1.1 a1.2 a1.3 
#1     1003    1    1    1
#
#There was 1 pedigree with Mendelian Errors.
#A total of 1 pedigree was processed.


### THREE PEDIGREES, THREE LOCI EACH: ###
famMendErr

#   Pedigree Person LabID Father Mother Sex a1.1 a2.1 a1.2 a2.2 a1.3 a2.3 
# 1     1001      1    11      0      0   1    0    0    1    1    1    0
# 2     1001      2    12      0      0   2    0    0    1    2    2    0
# 3     1001      3    13      1      2   1    1    2    1    2    3    0
# 4     1001      4    14      1      2   1    1    0    1    1    4    0
# 5     1002      1    11      0      0   1    0    0    2    2    1    0
# 6     1002      2    12      0      0   2    0    0    2    2    2    0
# 7     1002      3    13      1      2   1    1    2    1    1    3    0
# 8     1002      4    14      1      2   1    1    0    1    2    4    0
# 9     1003      1    11      0      0   1    2    2    2    2    2    2
#10     1003      2    12      0      0   2    2    2    2    2    2    2
#11     1003      3    13      1      2   1    1    1    1    1    1    1
#12     1003      4    14      1      2   1    1    2    1    2    1    2

errAll <- mend.err(famMendErr)

# PEDIGREE 1001:  No Mendelian Errors detected.
#
# PEDIGREE 1002:
#
# At the following loci in pedigree 1002, jackknifing one of the 
# listed subjects will eliminate Mendelian Errors:
#
########################### LOCUS a1.2 ###########################
# Locus a1.2:
# No single subject can be jackknifed to eliminate Mendelian Errors in
# pedigree 1002.
#
# PEDIGREE 1003:
#
# At the following loci in pedigree 1003, jackknifing one of the 
# listed subjects will eliminate Mendelian Errors:
#
########################### LOCUS a1.1 ###########################
# Locus a1.1:
# No single subject can be jackknifed to eliminate Mendelian Errors in
# pedigree 1003.
#
########################### LOCUS a1.2 ###########################
# Locus a1.2:
# No single subject can be jackknifed to eliminate Mendelian Errors in
# pedigree 1003.
#
########################### LOCUS a1.3 ###########################
# Locus a1.3:
# No single subject can be jackknifed to eliminate Mendelian Errors in
# pedigree 1003.

errAll

#########################################################################
######################## MENDELIAN ERROR RESULTS ########################
#########################################################################
#
#<0 = No Mendelian Error, 1 = Mendelian Error>:
#
#The following pedigrees had no Mendelian Errors:
#
#  Pedigree a1.1 a1.2 a1.3 
#1     1001    0    0    0
#
#The following pedigrees had Mendelian Errors:
#
#  Pedigree a1.1 a1.2 a1.3 
#1     1002    0    1    0
#2     1003    1    1    1
#
#There were 2 pedigrees with Mendelian Errors.
#A total of 3 pedigrees were processed.


### THREE PEDIGREES, THREE LOCI EACH (SUMMARY ONLY): ###
errSummary <- mend.err(famMendErr, print.no.error=FALSE, print.error=FALSE)
errSummary


### THREE PEDIGREES, THREE LOCI EACH, TWO DATASETS (SUMMARY ONLY): ###
# Retrieve the pedigree data:
ped.all <- famMendErr[,1:6]
ped.all

# Retrieve the genotype data:
geno.all <- famMendErr[,7:12]

geno.all

errAll <- mend.err(ped=ped.all, geno=geno.all,
                   print.no.error=FALSE, print.error=FALSE)
errAll


### THREE PEDIGREES, NO LabID COLUMN : ###
famAll.noLab <- famMendErr[,c(1,2,4:12)]
names(famAll.noLab)

#   Pedigree Person Father Mother Sex a1.1 a2.1 a1.2 a2.2 a1.3 a2.3 

err.noLab <- mend.err(famAll.noLab,
                     pedigree.column=1,
                     person.column=2,
                     labID.column=NULL,
                     father.column=3,
                     mother.column=4,
                     sex.column=5,
                     locus.start=6,
                     print.no.error=FALSE,
                     print.error=FALSE)
err.noLab
}
\keyword{}
% docclass is function
% Converted by Sd2Rd version 43267.
