#!/usr/bin/perl 
use strict;
use warnings;
if(@ARGV<4){
	die "Usage: perl strandbias_extract.pl <input file>  <r_cutoff> <r_cutoff2> <pileup file>or gunzip -c pileup | perl fetch_pos.pl <locus.txt> - \n";

}

my @pos;
my $ls;
my $FP;
open($FP,$ARGV[0]) || die "cannot open location file\n";
# The input file should be 1_324353_A_G 

while($ls=<$FP>){
   if($ls=~m/_/){
	$ls=~s/^chr//i;
	$ls=~s/^X/23/i;
	$ls=~s/^Y/24/i;
	$ls=~s/\r|\n//g; #remove all carriage or return
	$ls=~s/_/\t/g;
	push @pos, $ls;
   }
}

my $r_cutoff="$ARGV[1]";
my $r_cutoff2="$ARGV[2]";
close($FP);

print STDERR "finish read in input file, and we have ", scalar(@pos) ," entries\n";
my $stdin=0;
if($ARGV[3] eq "-"){
	# read from stadard input stdin;
	$FP=*STDIN;
	$stdin++;
	print STDERR "STDIN: $stdin\n";
}else{
	open($FP,$ARGV[3]) || die "cannot open pileup file\n";
}
print STDERR "===========================================================\n";
my $ind=0;
my $cur_chr="";
$|=1;
#open(FP2, ">$ARGV[0].original.pileup") || die "cannot open $ARGV[0].original.pileup\n";


my @des=split(/\t/,$pos[0]);
my $des_p=$des[0]."\t".$des[1];
my ($loci,$newdes_p);
my $string;
my (@p,@pileup,@newdes);
print "ID\ttotal_DP\tObs\tA+\tA-\tC+\tC-\tG+\tG-\tT+\tT-\tN+\tN-\tSBR_ref\tSBR_alt\tBias\n";
while($ls=<$FP>){
	#print STDERR ".";
	$ls=~s/^chr//;
	$ls=~s/^X/23/i;
	$ls=~s/^Y/24/i;
	@pileup=split(/\t/,$ls);
	if($pileup[0] ne $cur_chr) {$cur_chr=$pileup[0];print STDERR "\n$cur_chr\n";} 
	$loci=$pileup[0]."\t".$pileup[1];
	if($loci eq $des_p){
SUCCESS:	print STDERR "!";
		#print STDERR "found. current $ind: $des_p;\n";
		@p=split("\t",$ls);
		# input sample : 1    12153   T       1       ^~.     b       ~	
		# whatever in the input file (id \t extra info) \t  counts \t pileup sequence  
		# $pos[$ind] sample: 1	12153	A	G	low(extra)
		$string=$pos[$ind]."\t".$p[3]."\t".$p[4];
		# $string sample: 1       12153   A       G	low(extra)	 1	^~.	
		str_process($string);
		#print STDERR "\nstring:",$string,"\n";
		#print FP2 $ls;
		#print STDERR "$des_p\n";
		# get_next_ind() will not update the value of @des and $des_p, just return next valid $ind;
		#print STDERR "$string\nF:before getnext $ind; $pos[$ind]\n";
		#print STDERR "\nF:before getnext $ind; $pos[$ind]\n";
		while(!get_next_ind($ind,$string)){$ind++;}
		$ind++;
		if($ind == @pos) {print STDERR "Done!\n"; last;} 
		#print STDERR "F:after getnext $ind; $pos[$ind]\n";
                @des=split(/\t/,$pos[$ind]);
		$des_p=$des[0]."\t".$des[1];
		
	}else{
		#TEST:
			
	 	if( $des[0] < $pileup[0]  || ( $des[0] == $pileup[0] &&  $des[1]<$pileup[1]) ) {
			#print STDERR "\ndes[$ind]: $des[0], $des[1];  pileup: $pileup[0], $pileup[1] \n";
			#my $nf=$pos[$ind]; 
			#$nf=~ s/\t/_/g ;
			#print"$nf\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\n";
                	print STDERR "\nx@des\n";
			$string=$pos[$ind]."\t"."NA"."\t"."NA";	
			str_process($string);
			#print STDERR "lla: $des_p\n";
			#print STDERR "NF:before getnext $ind; $pos[$ind]\n";
			while(!get_next_ind($ind,$string)){$ind++;} 
			$ind++;
			#print STDERR "$des_p to ";	
			if($ind >= @pos) {print STDERR "Done!\n"; goto DONE;} 
                	@des=split(/\t/,$pos[$ind]);
			$des_p=$des[0]."\t".$des[1];
			#print STDERR "$des_p , $pileup[0], $pileup[1]\n";	
			while( $des[0] < $pileup[0] || ( $des[0] == $pileup[0] && $des[1] <$pileup[1] )){	
				str_process($pos[$ind]."\t"."NA"."\t"."NA");
				while(!get_next_ind($ind,$string)){$ind++;} 
				$ind++;
				#print STDERR "$des_p to ";	
				if($ind >= @pos) {print STDERR "Done!\n"; goto DONE;} 
                		@des=split(/\t/,$pos[$ind]);
				$des_p=$des[0]."\t".$des[1];
				#print STDERR "$des_p , $pileup[0], $pileup[1]\n";	
			}
			if($des[0] == $pileup[0] && $des[1] == $pileup[1]){
				#print STDERR "success, $des[0], $des[1] \n";
				goto SUCCESS;}
			#print STDERR "NF:after getnext $ind; $pos[$ind]\n";
		}
		# if not the above case just go to the next line of the pileup files   
	}
}
DONE: close($FP) unless $stdin;
#if($ind<@pos){print STDERR "starting from the ", $ind+1," th entry, nothing is found in the pileup file: $pos[$ind]\n";}
# if $ind<@pos; print the rest out using NA;
for(my $l=$ind; $l<@pos;$l++){
                	print STDERR "\nx$l:$pos[$l]";
			$string=$pos[$l]."\t"."NA"."\t"."NA";	
			str_process($string);
}
print STDERR "\n";

sub str_process{
	my @bases=("A","C","G","T","N");
	my $ii;	
	my @a=split(/\t/,$_[0]);
	my (%plus,%minus);
	for($ii=0;$ii<5;$ii++){
		$plus{$bases[$ii]}=0;
		$minus{$bases[$ii]}=0;
	}
	my $chr=shift(@a);
	#print STDERR "chr:",$chr,"\n";
	if ($chr==23) {$chr="X";}
	elsif ($chr==24) {$chr="Y";}
	my $pos=shift(@a);
	my $ref=uc(shift(@a)); 
	my $alt=uc(shift(@a));
	my @reads=split(/|/,pop(@a));
	my $total=sprintf("%s",pop(@a));
	my $rest=join("\t",@a); # $rest is extra;
	if($total eq "NA") {
		print "${chr}_${pos}_${ref}_${alt}\t";
		if($rest ne "") { print "$rest\t";}
		print "NA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\n"; return;
	}
	#print "reads: @reads\n";
	for($ii=0; $ii<@reads; $ii++){
		if($reads[$ii] eq "."){
			#print "dot ";
			$plus{$ref}++;		
		}elsif($reads[$ii] eq ","){
			$minus{$ref}++;
		}elsif($reads[$ii]=~/[ACGTN]/ ){
			$plus{$reads[$ii]}++; 
		}elsif($reads[$ii]=~/[acgtn]/){
			$reads[$ii]=uc $reads[$ii]; 
			$minus{$reads[$ii]}++; 

		}elsif($reads[$ii] eq "^"){
                        $ii++; # there was a mapping quality character after ^.
                }elsif($reads[$ii] eq "+" || $reads[$ii] eq "-"  ){
			#while($reads[$ii] ne "." && $ii<@reads){$ii++;}
			my @digit=();
			$ii++;
			while($reads[$ii]=~/[0-9]/){unshift @digit, $reads[$ii]; $ii++;}
			my $d=0; 
			for(my $k=0; $k<@digit; $k++){$d+=$digit[$k]*(10**$k);}
			$ii+=$d-1;
			#print "currnt: $reads[$ii]\n"
		}
	}
	#print "plus: $ref $plus{$ref} \n";	
	print  "${chr}_${pos}_${ref}_$alt\t";
	if($rest ne "") { print "$rest\t";}
	my @seq=("A","C","G","T","N");
	my $tot_depth=0; my $tstr=""; my $base="";
	for(my $l=0; $l<@seq; $l++){
		my $tot_k=$plus{$seq[$l]}+$minus{$seq[$l]}; 
		$tstr=$tstr."\t".$plus{$seq[$l]}."\t".$minus{$seq[$l]};
		$tot_depth+=$tot_k;
	}
	if($plus{"A"}!=0 || $minus{"A"} !=0){$base=$base."A/";}
	if($plus{"C"}!=0 || $minus{"C"} !=0){$base=$base."C/";}
	if($plus{"G"}!=0 || $minus{"G"} !=0){$base=$base."G/";}
	if($plus{"T"}!=0 || $minus{"T"} !=0){$base=$base."T/";}
	if($tot_depth==0){$base="-";}
	my ($sbr_ref,$sbr_alt);
	if($plus{$ref}==0 && $minus{$ref}==0 ){$sbr_ref=1;}
	else{$sbr_ref=($plus{$ref}>$minus{$ref})?$minus{$ref}/$plus{$ref}:$plus{$ref}/$minus{$ref};}
	if($plus{$alt}==0 && $minus{$alt}==0 ){$sbr_alt=0;}
	else{$sbr_alt=($plus{$alt}>$minus{$alt})?$minus{$alt}/$plus{$alt}:$plus{$alt}/$minus{$alt};}
	my $sbr_cutoff;
	if($tot_depth<100) {$sbr_cutoff = $r_cutoff;}
	else {$sbr_cutoff = $r_cutoff2;} 
	print $tot_depth,"\t",$base,$tstr,"\t",sprintf("%.3f",$sbr_ref),"\t",sprintf("%.3f",$sbr_alt),"\t",($sbr_alt>$sbr_cutoff )?"N":"Y","\n";

}

sub get_next_ind{
	my $cur_ind=$_[0];
	my $cur_str=$_[1];

	my $next_ind=$cur_ind+1;
	if ($next_ind >= @pos) { return 1;}  # cur_ind already last one, exit;	
        my @next_des=split(/\t/,$pos[$next_ind]);
	my $ndes_p=$next_des[0]."\t".$next_des[1];
	if($ndes_p eq $des_p){
		$cur_str=~s/$pos[$cur_ind]/$pos[$next_ind]/;
		#print STDERR "NA: $cur_str\n";
		str_process($cur_str);
		return 0;
	}
	else{return 1;}
}

