#!/usr/bin/perl
# AA2CD - Program to map AAChange data to protein domain annotations
# K Shameer, X Tang, KR Kalari, JP Kocher 
# Require two accessory file, a mapping file that map RefSeq IDs to Uniprot and a table of domain annotations available for human proteome using hmmer / Pfam 
# Version 1 - Nov 1 2012 Developed for mapping SNVs to domains 
# Version 2 - Jan 28 2013 Adapted to included in eSNV pipeline 
# Version 2.1 - Feb 26 : Output was changed as suggested by X Tang 
### added a paramters to pass the reference file by Saurabh
if (@ARGV!=4)
	{
	print "AA2CD.pl v1.1 :: Program to map AAChange data to protein domain annotations\n"; 
	print "Input Error :: AA2CD.pl <output folder> <input_file_from_eSNV_pipieline> <refseq uniport mapping file> <human domain coordinate file>\n"; 
	exit;
	} 
else
{
$outdir=shift @ARGV;
$sample_name =shift @ARGV;
$refseq_uniport=shift @ARGV;
$human_domain=shift @ARGV;
@eSNV_AAChange = `cat $sample_name \| awk -F"\t" '{print \$5}'`; 
shift @eSNV_AAChange; 
chomp @eSNV_AAChange; 
$AAChange_no_refseq_file = $sample_name."_AAC2D_Refseq_no_mapping.txt";
$AAChange_full_domain_data = $sample_name."_AAC2D_full_annotation.txt";
open (F1, ">>$outdir/$AAChange_no_refseq_file"); 
open (F2, ">>$outdir/$AAChange_full_domain_data"); 
print F2 "#AAChange\t<seq id>\t<alignment start>\t<alignment end>\t<envelope start>\t<envelope end>\t<hmm acc>\t<hmm name>\t<type>\t<hmm start>\t<hmm end>\t<hmm length>\t<bit score>\t<E-value>\t<clan>"; 
foreach $eSNV_AAChange(@eSNV_AAChange) 
	{
		if($eSNV_AAChange eq ""){print "\nNA\tNA";next;}
		@as = split(/\:/, $eSNV_AAChange); 
		chomp @as; 

		$as[2] =~ s/p//g;
		$as[2] =~ s/\.//g; 
		$as[2] =~ s/[A-Z]//g; 
		@m=`grep -w $as[0] $refseq_uniport`;
		chomp @m;
		$mapped_NM_size = scalar(@m); 
		if ($mapped_NM_size == 0)
			{
			print  "\n$as[0]\tRefSeq ID not mapped to Uniprot ID";
			print F1 "$as[0]\tRefSeq ID not mapped to Uniprot ID\n"; 
			next; 
			}
		print "\n$eSNV_AAChange\t";
		foreach$m(@m)
		{
			#print "\n$eSNV_AAChange\t";
			@ms=split(/\s+/,$m);
			@d=`grep -w $ms[1] $human_domain`;
			$domain_found=0;
			foreach$d(@d)
			{
				@ds=split(/\s+/,$d);
				
				
				if($as[2]>=$ds[1] && $as[2]<=$ds[2])
				{	
					print "$ms[1]:$ds[6]; ";
					print F2 "$ms[1]:$ds[6]; ";
					#print F2 "$eSNV_AAChange\t$d\n";
					$domain_found=1;
				}
			}
			if($domain_found==0)
			{
				print"$ms[1]:Not in a domain region; ";
			}
		}
		
	}	
}
print "\n"; 
