#!/usr/local/biotools/perl/5.14.2/bin/perl

=head1 NAME
   getLargestReads.pl

=head1 SYNOPSIS

    USAGE: getLargestReads.pl -r=run_info.txt -s=sample_name

=head1 OPTIONS


B<--input,-i>
	Required. Input fasta file.

B<--output, -o>
	Required.  Ouput fasta file


B<--help,-h>


=head1 DESCRIPTION
	Find largest reads among each set of reads.

=head1 INPUT
	Fasta file from emboss

=head1 OUTPUT

=head1 VERSION
	1.0

=head1  CONTACT
  bjaysheel@gmail.com


==head1 EXAMPLE
	./getLargestReads.pl -i=input.fasta -o=output.fasta

=cut

#use lib "/usr/local/biotools/perl/5.10.0/lib/site_perl/5.10.0";
use strict;
use warnings;
use Data::Dumper;
use Cwd;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use Bio::SeqIO;

my %options = ();
my $results = GetOptions (\%options,
						  'input|i=s',
						  'output|o=s',
						  'help|h') || pod2usage();

#### display documentation
if( $options{'help'} ){
    pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
}

#############################################################################
#### make sure everything passed was peachy
&check_parameters(\%options);

my $seq_in = Bio::SeqIO->new( -file   => $options{input},
						   -format => 'fasta' );
my $reads_hash;

#### read in fasta file and create a hash of each unique id.
#### by removing emboss suffix _1,2...
while (my $seq = $seq_in->next_seq){
	#### remove _1,2 etc
	my $id = $seq->display_id;
	$id =~ s/_\d+$//;

	push @{$reads_hash->{$id}}, {len=>$seq->length(), seq=>$seq->seq};
}

my $c = 0;
open(OUT, ">", $options{output}) or die "Could not open file to write $options{output}";

#### sort each sequence by read length and print largest of all for each
#### read is.
foreach my $read (sort keys %{$reads_hash}){
	my @sorted = sort { $b->{len} <=> $a->{len} } @{$reads_hash->{$read}};

	if ($c != 0) {
		print OUT "\n";
	}

	$sorted[0]->{seq} =~ s/-//g;

	print OUT ">$read\n";
	print OUT "$sorted[0]->{seq}";

	$c++;
}

exit();

#############################################################################
sub check_parameters {
    my $options = shift;

	#print Dumper($options);

	my @required = qw(input output);

	foreach my $key (@required) {
		unless (defined $options{$key}) {
			print STDERR "ARG: $key is required\n";
			pod2usage({-exitval => 2,  -message => "error message", -verbose => 1, -output => \*STDERR});
			exit(-1);
		}
	}

	$options{'debug'} = 3 unless (defined $options{'debug'});
}
