#!/usr/bin/perl
# Jeff Nie
# 08/16/2013
# allow both input files with duplicated keys (see example files at end)
use strict;
my %args = @ARGV;
my $infile=$args{'-i'};
my $infile_col=$args{'-ic'}||0; #key col for input file, start from 0;
my $rec_col=$args{'-rc'}||0; #key col for rec file, start from 0;
my $rec_file=$args{'-r'};
unless ($infile and $rec_file){
   usage();
   exit; 
}
my $outfile=$args{'-o'}|| $infile."_ann.txt";
open (IN, "<$infile")|| die "cannot open infile $infile!\n";
open (REC, "<$rec_file") || die "cannot open record $rec_file!\n";
open (OUT, ">$outfile")|| die "cannot open output file!\n";
my %HoA;
while(<REC>){
  chomp;
  next if /^\#/;
  s/\cM*$//;
  my @rcol=split(/\t/);
  push (@{$HoA{$rcol[$rec_col]}}, $_);
}
#my @f2=<REC>;
while (<IN>) {
  chomp;
  next if /^\#/;
  s/\cM*$//;
  my @columns=split(/\t/);
  #print join("\t",@columns),"\t";
  my $flag=0;
  if ($HoA{$columns[$infile_col]}){ # id in reference file
      foreach (@{$HoA{$columns[$infile_col]}}) { ## for each line with same id in refernce file
         print OUT join("\t",@columns),"\t$_\n";
         $flag=1;
      }

  }
  unless ($flag){
      print OUT "$_\t0\t0\t0\t0\n";
  }
} 

close IN;
close REC;
close OUT;
sub usage {
  print<<USAGE;
Usage:
  $0  -i infile -r refrenceFile [-ic keyInReffile] [-rc keyInRefFile] [-o outputFile]
      
      -i input file path
      -ic column number of input file that includes the key for comparison. default 4(column number start from 0).
      -r reference file path
      -rc column number of reference file that includes the key for comparison. default 4(column number start from 0).
      -o output file path and name, optional default write to same directory as input with name infile_reffile.

Example:   $0 -i /tmp/test -r /tmp/reffile
    or:    $0 -i /tmp/test -ic 2 -r  /tmp/reffile -rc 8  -o test2.mapped 
    

End of Usage.
USAGE
}

