#!/usr/local/biotools/perl/5.14.2/bin/perl

=head1 NAME
   uniquefy_cufflinks_transcripts.pl

=head1 SYNOPSIS

    USAGE: uniquefy_cufflinks_transcripts.pl -i=input.gtf -o=output.gtf

=head1 OPTIONS


B<--input, -i>
	Required. Input GTF file name

B<--output, -o>
	Required.  Output GTF file name

B<--help,-h>


=head1 DESCRIPTION
	Create unique GTF value for each transcript in GTF file

=head1 INPUT
	Cufflinks GTF file

=head1 OUTPUT

=head1 VERSION
	1.0

=head1  CONTACT
  bjaysheel@gmail.com


==head1 EXAMPLE
	./uniquefy_cufflinks_transcripts.pl -i=input.gtf -o=output.gtf

=cut

use strict;
use warnings;
use Data::Dumper;
use Cwd;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use POSIX;

my %options = ();
my $results = GetOptions (\%options,
                          'input|i=s',
						  'output|o=s',
						  'log|l=s',
			              'debug=s',
						  'help|h') || pod2usage();

#### display documentation
if( $options{'help'} ){
    pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
}

#############################################################################
#### make sure everything passed was peachy
&check_parameters(\%options);

#### set local variables.
my $cmd = "";
my $this;
my $hash;

#check_input($options{input});

#### expect input file of following format
####
#### chr1    Cufflinks       transcript      69091   70008   1       +       .       gene_id "ENSG00000186092.4"; transcript_id "ENSG00000186092.4"; ....
#### chr1    Cufflinks       exon    69091   70008   1       +       .       gene_id "ENSG00000186092.4"; transcript_id "ENSG00000186092.4"; exon_number "1"; ....
#### chr1    Cufflinks       transcript      62948   63887   1       +       .       gene_id "ENSG00000240361.1"; transcript_id "ENSG00000240361.1"; ....
#### chr1    Cufflinks       exon    62948   63887   1       +       .       gene_id "ENSG00000240361.1"; transcript_id "ENSG00000240361.1"; exon_number "1"; ....
####
#### where all exons belonging to a trascript are always listed after the transcript.

open(FHD, "<", $options{input}) or die "Could not open GTF file $options{input}";
open(OUT, ">", $options{output}) or die "Could not open GTF file $options{output}";

my $count = 0;

while (<FHD>) {
	chomp $_;
	my @info = split(/\t/, $_);

	#### update transcript_id value with new count increase.
	#### does not matter weather its a transcript or an exon.
	#### since transcripts are always followed by exons.  we assume
	#### that exon after the transcript are for the same trascript we just saw
	#### hence no need to check for the transcript value just append new counter value
	#### to make each transcript unique, and update corresponding exon line to reflect
	#### same transcript id.
	if ($info[2] =~ /transcript/i) {
		$count += 1;
		$info[8] =~ s/^(.*) transcript_id "(.*)"; (FPKM .*)/$1 transcript_id "$2.T$count"; $3/;
	} else {
		$info[8] =~ s/^(.*) transcript_id "(.*)"; (exon_number .*)/$1 transcript_id "$2.T$count"; $3/;
	}




	print OUT join("\t", @info)."\n";
}


exit();

#############################################################################
sub check_parameters {
    my $options = shift;

	my @required = qw(input output);

	foreach my $key (@required) {
		unless (defined $options{$key}) {
			print STDERR "ARG: $key is required\n";
			pod2usage({-exitval => 2,  -message => "error message", -verbose => 1, -output => \*STDERR});
			exit(-1);
		}
	}
}
