#!/usr/bin/perl
use strict;
use Getopt::Long;
# first example requires 3906 tagSNPs with 2 OPAs.
# - Analyze Results for 1536,(GoldenGate)
# - Analyze Results for Infinium 7600, 28880,60800 tag SNPs

my @maxSNPs = (768,1536,3072,7600,28880,60800);
my @maxUtility = (0,0,0,0,0,0);
my @maxNSNPs = (0,0,0,0,0,0);
my @maxBINs = (0,0,0,0,0,0);
my @maxBEADs = (0,0,0,0,0,0);


my $above_score=0;
my $below_score_and_in_solution=0;
my $nconflicts=0; # Number of conflicting tagSNP (or obligate) before allowing multiple OPAs.
my $nopaconflicts =0; # Number of conflicts after allowing spread to multiple OPAs.


#			Tabulate, number of bins, number of tags, number of conflicts,utility 
#							Compare utility of top K tags
#								-one tag/bin

# Only works for SNPPicker output.
# Need to run SNPPicker with --computeutilityonly, nOPA==0 to create a file which reflects the other programs.


my $report_prefix ="poland/poland";
my $report_suffix =".out";


my $addlowscoretoexcludes=''; # by default, do not put in exclude list snps below the cutoff... and do not consider them
                              # as a source of conflict (since they would not be selected).
my $minscore=0.4;
my $help='';
my $result;
my $position_flag=''; 
my $infinium_flag=''; # if running an infinium assays .. will count utility using number of beads.
my $maxOPA=1;

# The input file structure must be 
#  directory_for_one_pop/source/binfile

# There can be multiple directories for the same pop, e.g. as in the case when one makes one directory
#  per chromosome.

my $inmayofile='';
die "Error processing input paremeters" unless 
	$result = GetOptions("help"=> \$help, 
	                     "report_prefix=s" =>\$report_prefix,
	                     "report_suffix=s" =>\$report_suffix,
	                     "minscore=f" => \$minscore,
	                     "addlowscoretoexcludes" => \$addlowscoretoexcludes,
	                     "nOPA=i" => \$maxOPA,
	                     "infinium" => \$infinium_flag,
			     "allinonefile=s" => \$inmayofile
	                     );
if($help) {
	usage();
	die;
}

my %BINS;
my %UNSEL_BINS;
my %BINHASTAG;
my %CATEGORIES;
my %NSITES;
my %ADDOBLIG;


#gene name       gene_id snp source      ld bin  snp_id  rsid    chromosome      position        score   num_beads       maf     alleles location
# Illumina Score  nSites  note    populations     OPAid   Pops    utility incrementalUtility
my $CHR_COL=4;
my $POS_COL=5; # awk column 6
my $OBLIG_NAME_COL=5;
my $NOTE_COL =17; 
my $OPA_COL=19; # AWK colomn 20 
my $BINS_COL=15;
my $NUMBEADS_COL = 14;
my $SNPID_COL=6;
my $DBSNPID_COL=7;

my @totallist=(); # list with all selected tagSNPs and obligates.

my $should_have_lines=0;
my $totalTagSNP=0;
my $totalInputSnps=0;
my $totalUnselectedBins=0;

my @chrs = ("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","X","Y");
my @emptybins;
my @alllist=();
my %HAVESNP2;
my %HAVESNP;
my %DUPSNP;
my %HAVESCORE;
my $nredundant=0;
my $binexcludedFromScore=0;
print "maxOPA=$maxOPA\n";
open FSCORE, ">kept_score";
open FTOT, ">allinput";
for(my $ichr =0;$ichr<=$#chrs;$ichr++) {
	my $tchr = $chrs[$ichr];
	$tchr =~ tr/a-z/A-Z/;
	my $chrfile = "$report_prefix" . "_chr$tchr$report_suffix";
	my $mayofile;
	if(length($inmayofile)>0) {
	    $mayofile=$inmayofile;
	} else {
	    $mayofile = $chrfile;
	}
#	print "opening $mayofile\n";
	if((-f $mayofile) &&( -e $mayofile)) {
#	    print "opened $mayofile\n";
	        open FIN , "<$mayofile";
		my $header=<FIN>;
		if($header) {
#		    print "read first line $mayofile\n";
			my $l="";
			my @lines;
			my $nlines=0;
			my $n_should_have_kept=0;
			my $nlines_kept=0;
			while ($l=<FIN>) {
				$nlines++;
				chomp $l;
				my @a=split(/\t/,$l);				
				my $processSNP=0;			    				
				my $ochr = $a[$CHR_COL];
				$ochr =~ tr/a-z/A-Z/;
				$a[$CHR_COL]=$ochr;
				my $pos_id = $ochr . ":" . $a[$POS_COL];
				my $snpid = $a[$SNPID_COL];
				$snpid =~ s/ //g;
				my $dbsnpid = $a[$DBSNPID_COL];
				$dbsnpid =~ s/ //g;
				if($snpid eq "null") {
					$snpid="";
				}
				if($snpid eq "NA") {
					$snpid="";
				}
				if($dbsnpid eq "null") {
					$dbsnpid="";
				}
				if($dbsnpid eq "NA") {
					$dbsnpid="";
				}

				
				if(length($snpid)==0 && length($dbsnpid)>0) {
				    $a[$SNPID_COL]=$dbsnpid;
				    $snpid=$dbsnpid;
				} elsif (length($dbsnpid)==0 && length($snpid)>0 && $snpid =~ /^rs[0-9]+$/) {
				    $a[$DBSNPID_COL]=$snpid;
				    $dbsnpid = $snpid;
				}
				$pos_id =~ s/ //g;
				if($snpid eq "null") {
					$snpid="";
				}
				if($snpid eq "NA") {
					$snpid="";
				}
				
				if(length($snpid)==0) {
				    $snpid=$pos_id;
				    $a[$SNPID_COL]=$pos_id;
				}
				$a[$SNPID_COL]=$snpid;
				$a[$DBSNPID_COL]=$dbsnpid;
				
				
				
				# if both snpid and dbsnpid==null ==> No_SNP_Selected line
				my $tagFlag = $a[$NOTE_COL];
				# if same chromosome or unknown chromosome and unknown SNPid or SNPid we have not processed.
				if(($ochr eq $tchr  || length($ochr)==0) && (!($ochr eq "-99") ) && ( $tagFlag =~ /^No_SNP_Selected.*/ || !exists $HAVESNP{$snpid})) {
				    if(length($snpid)>0 && !($tagFlag =~ /^No_SNP_Selected.*/)) {
				    	$HAVESNP{$snpid}=1;$HAVESNP{$pos_id}=1;$HAVESNP{$dbsnpid}=1;
				    }

				    my $oblig_name = $a[$OBLIG_NAME_COL]; # name is position
				    my $note = $a[$NOTE_COL];
				    if($note =~ /^obligate.*/) { # Account for obligate only once.
					if($ochr eq $tchr) {
					    $processSNP=1;
					    push(@alllist,\@a);
					} elsif(length($ochr)==0 || $ochr eq "-99") {
					    if(exists $ADDOBLIG{$oblig_name}) {
						$processSNP=0;
					    } else { # only process first occurence of tag SNP without chromosome.
						$ADDOBLIG{$oblig_name}=$l;
						$processSNP=1;
						$a[$CHR_COL]=$tchr; ### Needed in report below..
						push(@alllist,\@a);
					    }
					} else  {
					    $processSNP=0;
					}
				    } elsif (!($tagFlag =~ /^No_SNP_Selected.*/)) {
					$processSNP=1;
					$a[$CHR_COL]=$tchr; # force it.
					push(@alllist,\@a);
				    }
				    if($processSNP ==1 || ($tagFlag =~ /^No_SNP_Selected.*/)) {
				        $nlines_kept++;
					if(length($tagFlag)==0) {
					    $tagFlag = "not a tag";
					} elsif ($tagFlag eq "tagSNP") {
					    $totalTagSNP++;
					}
					if(!($tagFlag =~ /^No_SNP_Selected.*/)) {
					    $totalInputSnps++;
					    print FTOT "$pos_id\n";
					    
					} elsif (($tagFlag =~ /^No_SNP_Selected.*/) && length($a[$BINS_COL])>0 && !($a[$BINS_COL] =~ /^obligate.*/ ) ) {

					    my @unsb = split(",",$a[$BINS_COL]);
					    my @alltags = split(",",$tagFlag);
					    my $nexcl_from_score=0;
					    my $it=0;
					    for($it=1;$it<=$#alltags;$it++) {
					    	my $ttag =$alltags[$it];
					    	if ($ttag eq "excluded_lowscore" || $ttag eq "excludedbadilluminascore") {
					    		$nexcl_from_score++;
					    	}
					    }
					    if($nexcl_from_score>0 && $nexcl_from_score == $#alltags) {
					    	$binexcludedFromScore++;
					    }
					    for(my $ii=0;$ii<=$#unsb;$ii++) {
					    	my $ubin = $unsb[$ii];
					    	my @binspl = split(";",$ubin);
					    	my $tbin = $binspl[1];
						if(!exists $UNSEL_BINS{$tbin}) {
						    $totalUnselectedBins++; # unselected bins that are not fake obligate bins"; 
						    # Bug.. this may count some bins twice
						    $UNSEL_BINS{$tbin} =1;
						    push(@emptybins,$a[$CHR_COL] . ":" . $a[$BINS_COL]);

						    #	print "$snpid " . join("-",@unsb) . "\n";

							 
						}
					    }
					}
					
					my $ncat = 0+$CATEGORIES{$tagFlag};
					
					$CATEGORIES{$tagFlag} = 1+$ncat;

					if(length($snpid)>0 && (! ($tagFlag =~ /^No_SNP_Selected.*/))) { # e.g. only real SNPs, no empty bins aka No_SNP_Selected
					    my $score =$a[13];
					    if(length($score)==0) {
						$score = $minscore;
					    }
					    if($score>=$minscore) {
						push(@lines,\@a);
						$n_should_have_kept++;
						if(! exists $HAVESCORE{$snpid}) {
						    $above_score++;
						    print FSCORE "$snpid\n";
						    $HAVESCORE{$snpid}=1;
						}
					    } else {
						if($tagFlag eq "tagSNP") {
						    $below_score_and_in_solution++; # that's a bug.
#							print $l . "\n";
						    push(@lines,\@a);
						} 
					    }
					
					
					    # To get accurate statistics, we need information about the number of sites/bin
					    # in addition to the tag-related information.
					    my $chr = $a[$CHR_COL];
					    $chr =~ tr/a-z/A-Z/;
					    $a[$CHR_COL]=$chr;
					    my @binids = split(",",$a[15]);
					    for(my $ii=0;$ii<=$#binids;$ii++) {
					    	my $ubin = $binids[$ii];
					    	my @binspl = split(";",$ubin);
					    	my $tbin = $binspl[1];
					    	$binids[$ii]=$tbin;
					    }
					    
					    my @hugos = split(",",$a[0]);
					    my @geneids = split(",",$a[1]);
					    my @sources = split(",",$a[2]);
					    my @nsites = split(",",$a[16]);
					    if($#binids>=0) {
						    if($#hugos==0) { my $hugo0 = $hugos[0];@hugos = map {$hugo0} @binids;}
						    if($#geneids==0) { my $geneid0 = $geneids[0];@geneids = map {$geneid0} @binids;}
						    if($#sources==0) { my $source0 = $sources[0];@sources = map {$source0} @binids;}
						    if($#nsites==0) { my $nsite0 = $nsites[0];@nsites = map {$nsite0} @binids;}
					   
						    for(my $kk=0;$kk<=$#binids;$kk++) {
							my $binid = $binids[$kk];
							if(length($binid)>0 && !($binid =~ /^obligate.*/)) {
							    my $uniq_binid =  $chr . ":" . $binid;
							    my $ntags4bin=0;
							    if(exists $BINS{$uniq_binid}) {
								$ntags4bin = $BINS{$uniq_binid};
							    }
							    $BINS{$uniq_binid} = 1+$ntags4bin;
							    $NSITES{$uniq_binid} = $nsites[$kk];
							    if(!($tagFlag =~ /^No_SNP_Selected.*/)) {
								if(($tagFlag eq "obligate") || ($tagFlag eq "fixed_proximity") || ($tagFlag eq "obligate_fixed_proximity") 
								   || ($tagFlag eq "multi-SNP test") || ($tagFlag eq "tagSNP")  ) {
								    $BINHASTAG{$uniq_binid}=1;
								} 
							    }
							}
						    }
					    }
					} else {
						#print "REJ:$l\n";
					}
				    } else  {
				    	#print "$snpid: ---- $l\n";
				    }	
				} # chromosome.
			} # while FIN
			print "read nlines = " . $nlines . ", Kept = $nlines_kept  lines\n";
			close FIN;



			
			# Sort all SNPS (tags and obligates) by chromosome position and by position.
			my @slines = sort {if($a->[$CHR_COL] eq $b->[$CHR_COL]) {
						if($a->[$POS_COL] == $b->[$POS_COL]) {
					   		return 0;
						} elsif($a->[$POS_COL]<$b->[$POS_COL]) {
					   		return -1;
						} else {
						   return 1;
						}
				       	     } elsif ($a->[$CHR_COL]<$b->[$CHR_COL]) {return 1;} else {return -1;} 
				      }
			           @lines;
#			print "sorted " . (1+ $#slines ) . "\n";
#			for(my $iii=0;$iii<30;$iii++) {
#			    print $slines[$iii]->[$CHR_COL] . "\t" . $slines[$iii]->[$POS_COL] . "\n";
#			}
			my $lastpos=-100;
			my $lastrsid='';
			my $lastchr="";
			my @lastfewpos=();
			my @lastfewOPA=();
			my %cOPAs=();
			my @taglist=();
			my $clusterid=0;
			my $lastclusterid=-1;
			my $countPerCluster=0;
			my %cOPAs=();
			for(my $i=0;$i<=$#slines;$i++) {
				my $line_ref = $slines[$i];
				my $schr = $line_ref->[$CHR_COL];
				my $pos = $line_ref->[$POS_COL];
#				print "$schr\t$pos\n";
				my $snpid = $line_ref->[$SNPID_COL];
				my $pos_id = $schr . ":" . $pos;
				my $dbsnpid = $line_ref->[$DBSNPID_COL];
				my $tagFlag = $line_ref->[$NOTE_COL];
				my $opaID = $line_ref->[$OPA_COL];
				if(!($schr eq $lastchr)) {
					@lastfewpos=();
#					@lastfewOPA=();
					$clusterid++;
				}
				$lastchr = $schr;
				if(length($snpid)==0) {
				    $snpid=$pos_id;
				}
				if((($tagFlag eq "tagSNP") || ($tagFlag =~ /^fixed.*/) || ($tagFlag =~ /^obligate.*/)) && !( $tagFlag =~ /.*excluded.*/)) {
					push (@taglist,$line_ref);
					if(exists $HAVESNP2{$snpid}) {
						my $old_line_ref = $HAVESNP2{$snpid};
#						print "already read this $snpid . " . join(" ",@$line_ref) . join(" ",@$old_line_ref) ."\n" ;
						print "DUP::" . join(",",@$line_ref) . "\n";
						$DUPSNP{$snpid}="1";
						$nredundant+=1;
					}
					$HAVESNP2{$snpid}=$line_ref;
					$HAVESNP2{$pos_id}=$line_ref;
#					print join(" ",@$line_ref) . "\n";
#					print "found tag\n";
					my $conflict=0;
					my $ninlast=$#lastfewpos;

					my %cidOPAs=();
					for(my $j=0;$j<=$#lastfewpos;) {
						if($pos-$lastfewpos[$j]<60) {
							$conflict+=1; # Count additiont to the pair
							print "conflict = $snpid at " . join(";",@lastfewpos) . "\n";
							if($opaID == $lastfewOPA[$j]) {
								my $nConflictsInOPA = $cOPAs{$opaID};
								if($nConflictsInOPA +1 ==1) {
									$nConflictsInOPA=2;
									
								} else {
									$nConflictsInOPA+=1;
								}
								$cOPAs{$opaID}=$nConflictsInOPA;
							}
							$j++;
						} else { 
# Delete from list, since "out of range"	
							splice(@lastfewpos,$j,1);
							splice(@lastfewOPA,$j,1);

						}
					}
					if($conflict>0) {
						$countPerCluster++;
					}
					if((($#lastfewpos == -1) || ($i==$#slines))) {

						my @opaIDS = keys %cOPAs;
						my $this_opaconflicts=0;
						for(my $j=0;$j<=$#opaIDS;$j++) {
							my $nConflictsInOPA = $cOPAs{$opaIDS[$j]};
							if($nConflictsInOPA>1) {
							    $this_opaconflicts+=$nConflictsInOPA; 
							}
						}
						if($this_opaconflicts>0) {
						    $nopaconflicts+=$this_opaconflicts;
						}
						if($countPerCluster>0) {
							$nconflicts+=(1+$countPerCluster);
						}
						
						%cOPAs = ();
						%cidOPAs =();
						$clusterid +=1;
						$countPerCluster=0;
					}
					$conflict=0;
					$lastpos=$pos;
					$lastrsid=$snpid;
					push(@lastfewpos,$pos);
        #				push(@lastfewOPA,$opaID);
#					print join(",",@lastfewpos) . "\n";
				} # if tagSNP fixed obligate

			}
			if($#taglist>=0) {
#				print "taglist = " . (1+$#taglist) . "\n";
				$should_have_lines += (1 + $#taglist);
				push(@totallist,@taglist);
			}
		} else { 
			close FIN;
#			print "closing $mayofile\n";
		}
		
		
	} # if file exists
} # loop over chromosomes

if($maxOPA<0) {
    $nopaconflicts = $nconflicts;
}

# Now resort by incremental utility as set SNP totals.
#.. for selected tag SNPs.


my @soln_lines = grep {($_->[$NOTE_COL]  eq "obligate") || ($_->[$NOTE_COL]   eq  "obligate_fixed_proximity")} @totallist;
my @lost_obl = grep {($_->[$NOTE_COL]  =~ /^obligate_excluded.*/)} @alllist;	
my @goodlines = grep {($_->[$NOTE_COL] eq  "tagSNP") || ($_->[$NOTE_COL] eq "fixed_proximity") } @totallist;

#sort by incremental obligate status first,
# then 
my @ulines  = sort {
		if($a->[21] == $b->[21]) {			
		   return 0;
		} elsif($a->[21]<$b->[21]) {
		   return 1;
		} else {
		   return -1;
		}
	      }
	   @goodlines;

print "total SNPs in report = " . $totalInputSnps . "\n";
print "total SNPs with status in report = " . (1+$#alllist) . "\n";

print "total selected SNPs in report = " . (1+$#totallist) . "( and should be ==)" . $should_have_lines . ")" . "\n";

print "found selected obligates = " . (1+ $#soln_lines ) . "\n";
print "found rejected obligates = " . (1+ $#lost_obl ) . "\n";
print "found tagSNPs+fixed_proximity = " . (1 + $#ulines  ) . "\n";
print "total freely selected tags-only SNPs = $totalTagSNP\n";
push(@soln_lines, @ulines);
print "found tags+obligates+fixed_proximity = " . (1 + $#soln_lines)  . "\n";
my $tot_util=0;
my $totbeads=0;
my $totsnps=0;
my $nredundant2=0;

my %NBINS;
my %CURBINS;
my %USEDSNP;
my %DUPSNPS;
for(my $i=0;$i<=$#soln_lines;$i++) {
	my $line_ref = $soln_lines[$i];
	my $incr_utility = $line_ref->[23];
	my $snp = $line_ref->[$SNPID_COL];
	my $dbsnpid = $line_ref->[$DBSNPID_COL];
	my $snpid = $line_ref->[$SNPID_COL];
	if(!  (exists $USEDSNP{$snpid})) {
	
		#print "$incr_utility\n";
		$tot_util+=$incr_utility;

		my @bins = split(",",$line_ref->[15]);
		for(my $ii=0;$ii<=$#bins;$ii++) {
			my $ubin = $bins[$ii];
			my @binspl = split(";",$ubin);
			my $tbin = $binspl[1];
			$bins[$ii]=$tbin;
		}
		
		#print $line_ref->[15] . "\n";
		my $nbins=0;
		my $chr = $line_ref->[$CHR_COL];
		for(my $k=0;$k<=$#bins;$k++) {
			my $binid = $bins[$k];
			if(length($binid)>0 ) {
				if( !($binid =~ /^obligate.*/)) { # don't count as bins obligate-only singleton bins.
					$CURBINS{$chr . ":" . $binid}=1; # do not need to add the source for unique name.
					$nbins++;
				}
			}
		}
		my $snpbeads=1;
		if($infinium_flag) {
		    $snpbeads=$line_ref->[$NUMBEADS_COL];
		}
		$totbeads+=$snpbeads;
		$totsnps++;
		if(!(  ($line_ref->[$NOTE_COL] =~ /^No_SNP_Selected.*/) || ($nbins==0) || ($line_ref->[$NOTE_COL] =~ /^non_tag.*/))) { # do not count empty bins as a SNP .
			my $catbincount = 0+$NBINS{$nbins};
			$NBINS{$nbins} = 1 + $catbincount;
		}
		for(my $j=0;$j<=$#maxSNPs;$j++) {
			my $limitvalue = $maxSNPs[$j];
			if(($infinium_flag && $totbeads>=$limitvalue) || ((!$infinium_flag) && $totsnps>=$limitvalue)) {
				my $maxUtil = $maxUtility[$j];
				if($maxUtil+1==1) {
					$maxUtility[$j]=$tot_util;
					$maxNSNPs[$j]=$i+1;
					my @totbins = keys %CURBINS;
					$maxBINs[$j] = 1+$#totbins;
					$maxBEADs[$j] = $totbeads;
				}
			}
		}
		$USEDSNP{$snpid}=1;
	} else {
		$DUPSNPS{$snpid}="1";
		$nredundant2+=1;
       }
}

#
#
# print report
#
#
close FSCORE;
close FTOT;
print "SNPs above score = $above_score\n";
print "SNPs below score and in solution = $below_score_and_in_solution\n";
print "Position conflict SNPs (before OPA spreading) = $nconflicts\n"; # Number of conflicting tagSNP (or obligate) before allowing multiple OPAs.
print "Position conflict SNPs (after OPA spreading) = $nopaconflicts\n"; # Number of conflicts after allowing spread to multiple OPAs.


for(my $j=0;$j<=$#maxSNPs;$j++) {
	my $utility = 0;
	my $nbeads = $maxSNPs[$j];
	if($infinium_flag) {
	    $nbeads = $maxBEADs[$j];
	}
	if($nbeads>0) {
		$utility = $maxUtility[$j]/$nbeads;
	}
	print "For panel of size=" . $maxSNPs[$j] . ", nBEADS = " .  $maxBEADs[$j] . ", selected nSNPS= " . $maxNSNPs[$j] . ", with expected total utility= ". $maxUtility[$j] . " , and  specific utility = " . $utility . ", and bins covered = " .  $maxBINs[$j] . "\n";
}
my $utility=0;
if($totbeads>0) {
	if($infinium_flag) {
		$utility = $tot_util/$totbeads;
	} else {
		$utility = $tot_util/$totsnps;
	}
}

my @numtagged = keys %BINHASTAG;
my @numbins = keys %BINS;
my @selectedbins = keys %CURBINS;
my @dupsnps1 = keys %DUPSNPS;
my @dupsnp1 = keys %DUPSNP;
my $ndups1=1+$#dupsnps1;
my $ndups2=1+$#dupsnp1;

print "Total: nBEADS = " .  $totbeads . ", unique selected nSNPs= " . $totsnps . ", with expected total utility= ". $tot_util . ", and specific utility = " . $utility . " , and bins covered = " . (1+$#numbins) . "\n";

print "Number of bins = " . (1+$#numbins) ."\n";
print "Number of tagged bins = " . (1+$#numtagged) . "[" . (1+$#selectedbins) . "]\n";
print "Number of untagged bins = " . (($#numbins) - ($#numtagged)) . "\n";
print "Number of unselected bins =" . $totalUnselectedBins . "\n";
print "Number of bins excluded from low scoring or bad scoring SNPs = $binexcludedFromScore\n";;

print "Number of duplicate selected (uniq) SNPS = $ndups1\n";
print "Number of duplicate selected (uniq) SNPS = $ndups2\n";
print "Number of duplicate selected redundant SNPS =" . $nredundant . "\n";
print "Number of duplicate selected redundant SNPS =" . $nredundant2 . "\n";
print "Dups = " . join(",", @dupsnps1) . "\n";

my @combined = ();
push(@combined,@numbins);
push(@combined,@numtagged);
@combined = sort(@combined);
my $last="";
my $n=0;
my @nobins=();
for(my $i=0;$i<=$#combined;$i++) {
	if(!($combined[$i] eq $last)) {
		if($n==1) {
			push(@nobins, $last);
		}
		$n=1;
		
	} else{
		$n=2;
	}
	$last=$combined[$i];
}
if($n==1) {
	push(@nobins, $last);
}

my @bothuntagged=();
push(@bothuntagged,@nobins);
push(@bothuntagged,@emptybins);
@bothuntagged = sort(@bothuntagged);
$last="";
$n=0;
print "---------------Untagged Bins------------\n";
for(my $i=0;$i<=$#bothuntagged;$i++) {
	if(!($bothuntagged[$i] eq $last)) {
		if($n==1) {
			print $last . "\n";
		}
		$n=1;
		
	} else {
		$n=2;
	}
	$last=$bothuntagged[$i] ;
}
if($n==1) {
	print "$last\n";
}


#XXXXXXXXXXXX  print CATEGORIES REPORT
my @cats = keys %CATEGORIES;
for(my $i=0;$i<=$#cats;$i++) {
	print "Snp disposition: " . $cats[$i] . " = " . $CATEGORIES{$cats[$i]} . "\n";
}



my @bincats = keys %NBINS;


for(my $i=0;$i<=$#bincats;$i++) {
	if($bincats[$i] ==0) {
		print "Number of snps touching  0 bins [obligate in proximity to other obligates got assigned a fake bin] = " . $NBINS{$bincats[$i]} . "\n";
	} else {
		print "Number of snps including " . $bincats[$i] . " bins = " . $NBINS{$bincats[$i]} . "\n";
	}
}


my @goodscores = keys %HAVESCORE;
open F1 ,">f1";
print F1 join("\n",@goodscores);
close F1;

sub usage() {
  print "help not available, Read the Fine Source Code.\n";
}
