#!/usr/local/biotools/perl/5.14.2/bin/perl

=head1 NAME
   stich_cdna.pl

=head1 SYNOPSIS

    USAGE: stitch_cdna.pl -t=transcripts_list_file -f=exon_fasta_file -o=output_dir

=head1 OPTIONS


B<--transcripts, -t>
	Required. List file of all transcripts per line

B<--fasta, -f>
	Required.  Multi FASTA nucleotide file of all exons.

B<--output_dir, -o>
	Requried. Output directory where each transcripts cDNA FASTA is written.

B<--help,-h>

=head1 DESCRIPTION
	Stitch all cdna per transcript together

=head1 INPUT
	List of transcripts of interest and all exon fasta file

=head1 OUTPUT
	cDNA fasta file per transcript id.

=head1 VERSION
	1.0

=head1  CONTACT
  bjaysheel@gmail.com


==head1 EXAMPLE
	./stitch_cdna.pl -t=transcript_list_file -f=exon_fasta_file -o=output_dir

=cut

use strict;
use warnings;
use Data::Dumper;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);

my %options = ();
my $results = GetOptions (\%options,
                          'transcript|t=s',
						  'fasta|f=s',
						  'output_dir|o=s',
						  'log|l=s',
			              'debug=s',
						  'help|h') || pod2usage();

#### display documentation
if( $options{'help'} ){
    pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
}

#############################################################################
#### set global vars
my $cmd = "";

#### make sure everything passed was peachy
&check_parameters(\%options);

create_dir_struct(\%options);

#### open transcript file
open(FHD, "<", $options{transcript}) or die "Could not open transcript file $options{transcript}\n";
while (<FHD>) {
	#### skip lines starting with # assuming they are comments and header info
	next if ($_ =~ /^#/);

	#### remove new line chr
	chomp $_;

	#### transcript file is expected to be a tab delimited file of following format
	####
	#### 0: chr
	#### 1: start position
	#### 2: end position
	#### 3: transcript name
	my @info = split(/\t/, $_);

	$cmd = "grep -A 1 $info[3]	$options{fasta}";

	my $dna = `$cmd`;

	#### remove last new line chr.
	chomp $dna;

	#### move on if there is sequence in fasta file.
	if (! length($dna)) {
		next;
	}

	#### remove header line.
	$dna =~ s/>.*\n//g;

	#### remove any new line chr, if there are multiple seq.
	#### causing concatination of all exons.
	$dna =~ s/\n//g;

	#### print as fasta sequence with transcript name is sequence id.
	open(OUT, ">", "$options{output_dir}/$info[3].fsa") or die "Could not open file to write $options{output_dir}/$info[3].fsa";
	print OUT ">$info[3]\n$dna\n";
	close(OUT);
}
close(FHD);

exit();

#############################################################################
sub check_parameters {
    my $options = shift;

	my @required = qw(transcript fasta output_dir);

	foreach my $key (@required) {
		unless (defined $options{$key}) {
			print STDERR "ARG: $key is required\n";
			pod2usage({-exitval => 2,  -message => "error message", -verbose => 1, -output => \*STDERR});
			exit(-1);
		}
	}

	$options{'debug'} = 3 unless (defined $options{'debug'});
}

#############################################################################
sub create_dir_struct {
	my $options = shift;

	unless ( -d $options{output_dir} ) {
		`mkdir -p $options{output_dir}`;
	}
}
