#!/usr/local/bin/perl
#$Author: ecarlson $
#$Date: 2006/05/23 15:34:54 $
#$Header: /people/biostat3/sinnwell/Projects/arp.gee/Make/RCS/parse.merlin.pl,v 1.5 2006/05/23 15:34:54 ecarlson Exp $
#$Locker:  $
#$Log: parse.merlin.pl,v $
#Revision 1.5  2006/05/23 15:34:54  ecarlson
#Fixed a few minor errors
#
#Revision 1.4  2006/05/16 14:36:37  ecarlson
#Modified to allow for output from Merlin 1.0.1 parametric and non-parametric analysis
#
#Revision 1.3  2005/12/15 15:59:44  m000135
#*** empty log message ***
#
#Revision 1.2  2005/12/14 22:38:36  m000135
#Cleaned up some of the file processing code
#This script will now handle merlin output with or without info content
#
#Revision 1.1  2003/01/15 16:30:46  schaid
#Initial revision
#

# Convert MERLIN output file to a structured file that can be input
# to Splus

($filein)  = @ARGV;
$start=0;
$nrec=0;
$parametric=0;
$gotInfo=0;

open (FILEIN, "< $filein" ) || die "Input file $filein could not be opened\n";

while(<FILEIN>){
  chomp(@Fld = split(' ', $_));

  if (?Data File?) {
    $DataFile = $Fld[3];
    next;
  }

  elsif (?Pedigree File?) {
    $PedFile=$Fld[3];
    next;
  }

  elsif(?Missing Value Code?){
    $MissValCode=$Fld[4];
    next;
  }

  elsif(?Map File?){
    $MapFile=$Fld[3];
    next;
  }

  elsif(?Allele Frequencies?){
    $AFreq= $Fld[3].$Fld[4];
    next;
  }

  if($Fld[0] eq "Parametric" && $Fld[1] eq "Analysis,"){
    ## indicate parametric analysis was run;
    $parametric=1;
  }

  if(/Position\s+Info/){
    ## indicate program was run with info content;
    $gotInfo=1;

    $_ = <FILEIN>;  # skip header, then read position & info
    chomp(@Fld = split(' ', $_));

    while (/^\s*\d+\.\d+/) {
      # while line starts with a decimal number (padded by white space);

      push(@info, $Fld[1]);

      $_ = <FILEIN>;  # process next line
      chomp(@Fld = split(' ', $_));
    }
  }

  if($parametric==1){
    # skip 3 header lines
    chomp(@Fld = split(' ', $_));
    while (/^\s*\d+\.\d+/) {
      # while line starts with a decimal number (padded by white space);

      push(@pos, $Fld[0]);
      push(@lod, $Fld[1]);
      push(@alpha, $Fld[2]);
      push(@hlod, $Fld[3]);

      $_ = <FILEIN>;  # process next line
      chomp(@Fld = split(' ', $_));
    }
  }

  elsif (/Pos\s+Zmean\s+pvalue/i){
    # skip 3 header lines
    $_ = <FILEIN>;
    $_ = <FILEIN>;
    $_ = <FILEIN>;
    chomp(@Fld = split(' ', $_));

    while (/^\s*\d+\.\d+/) {
      # while line starts with a decimal number (padded by white space);

      push(@pos, $Fld[0]);
      push(@zmean, $Fld[1]);
      push(@zmeanPval, $Fld[2]);
      push(@delta, $Fld[3]);
      push(@lod, $Fld[4]);
      push(@lodPval, $Fld[5]);

      $_ = <FILEIN>;  # process next line
      chomp(@Fld = split(' ', $_));
    }
  }

}
close(FILEIN);

$nrec = @pos;

print "s.class merlin\n";
print "s.vector dataFile 1 $DataFile \n";
print "s.vector pedFile 1 $PedFile\n";
print "s.vector missValCode 1 $MissValCode\n";
print "s.vector mapFile 1 $MapFile\n";
print "s.vector aFreq 1 $AFreq\n";

if($parametric==1) {
  if($gotInfo==1) {
    print "s.matrix linkage $nrec 5 pos lod alpha hlod info\n";
    for($i=0;$i<$nrec;$i++) {
      printf("%7.3f %7.3f %7.3f %7.3f %7.3f\n",
	     $pos[$i],$lod[$i],$alpha[$i],$hlod[$i],$info[$i]);
    }
  } else {
    print "s.matrix linkage $nrec 4 pos lod alpha hlod\n";
    for($i=0;$i<$nrec;$i++){
      printf("%7.3f %7.3f %7.3f %7.3f\n",
	     $pos[$i],$lod[$i],$alpha[$i],$hlod[$i]);
    }
  }
} else {
  if($gotInfo==1) {
    print "s.matrix linkage $nrec 7 pos zmean zmeanPval delta lod lodPval info\n";
    for($i=0;$i<$nrec;$i++){
      printf("%7.3f %7.3f %7.3f %7.3f %7.3f %7.3f %7.4f\n",
	     $pos[$i],$zmean[$i],$zmeanPval[$i],$delta[$i],$lod[$i],$lodPval[$i],$info[$i]);
    }
  } else {
    print "s.matrix linkage $nrec 6 pos zmean zmeanPval delta lod lodPval\n";
    for($i=0;$i<$nrec;$i++){
      printf("%7.3f %7.3f %7.3f %7.3f %7.3f %7.3f\n",
	     $pos[$i],$zmean[$i],$zmeanPval[$i],$delta[$i],$lod[$i],$lodPval[$i]);
    }
  }
}

