package CircosUtil;

use Exporter 'import';
use IO::File;
use Carp;
use List::MoreUtils qw/ uniq /;
use HTML::TableExtract;
use Data::Dumper;

@EXPORT = qw( fpkm_from_gtf mk_histogram_conf read_fusion_data mk_fusion_conf mk_fusion_conf2 extract_fusion_info gen_snp_circos_conf );
#@EXPORT_OK = qw( fpkm_from_gtf mk_histogram_conf read_fusion_data mk_fusion_conf mk_fusion_conf2 extract_fusion_info );

sub fpkm_from_gtf
{
    my ($file) = @_;

    my $fh = IO::File->new($file) or croak "can't open $file : $?";

    my $rec_ref;

    my $max = 0;
    while( my $line = <$fh> )
    {
        chomp $line;

        my @cols = split(/\t/, $line);
        next unless $cols[2] eq 'transcript';
        my $attr_ref = attribs2hash($cols[$#cols]);
        $rec_ref->{$cols[0]}->{$cols[3]} = $attr_ref->{FPKM};

        if($attr_ref->{FPKM} > $max)
        {
            $max = $attr_ref->{FPKM};
        }
    }

    $fh->close();

    my @fpkm_data;
    foreach my $chr (sort keys %{$rec_ref})
    {
        foreach my $pos (sort { $a <=> $b } keys %{$rec_ref->{$chr}})
        {
            my $chr_name = $chr;
            $chr_name =~ s/chr/hs/;
            print $chr_name, "\t",
                  $pos-1000, "\t",
                  $pos+1000, "\t",
                  $rec_ref->{$chr}->{$pos}, "\n";
            push(@fpkm_data, join("\t", $chr_name, $pos-1000, $pos+1000,
                                        $rec_ref->{$chr}->{$pos}) );
        }
    }

    if( wantarray )
    {
        return @fpkm_data;
    }
    else
    {
        return \@fpkm_data;
    }
}



# just a helper function for now.
sub attribs2hash
{
    my ($string) = @_;
    my %attrib_hash;
    my @attribs = split(/; /, $string);

    foreach my $ele (@attribs)
    {
        my ($key, $val) = split(/ "/,$ele);
        $val =~ s/"$//;
        $attrib_hash{$key} = $val;
    }
    return \%attrib_hash;
}


sub run_circos
{
    my ($conf_file) = @_;

    system("$circos_pth -conf $conf_file");
    my $retcode = $?;
    # check the return codes more here.
    return $retcode;
}

sub mk_histogram_conf
{
    my ($file, $histogram, $img, $karyotype) = @_;

    my $core_conf = <<EOS;
<colors>
<<include etc/colors.conf>>
<<include etc/brewer.conf>>
</colors>

<fonts>
<<include etc/fonts.conf>>
</fonts>

<ideogram>
<spacing>
default = 0.01r
break = 2u
</spacing>

radius           = 0.85r
thickness        = 50p
fill             = yes
fill_color       = black
stroke_thickness = 2
stroke_color     = black

show_label       = yes
label_font       = bold
label_radius     = dims(ideogram,radius) + 0.15r
label_size       = 36
label_parallel   = no

show_bands            = yes
fill_bands            = yes
band_stroke_thickness = 2
band_stroke_color     = white
band_transparency     = 0
</ideogram>

#<<include ticks.conf>>
show_ticks          = yes
show_tick_labels    = yes

show_grid          = no
grid_start         = 0.5r
grid_end           = 1.0r

<ticks>
skip_first_label     = no
skip_last_label      = no
radius               = dims(ideogram,radius_outer)
tick_separation      = 2p
min_label_distance_to_edge = 10p
label_separation = 5p
label_offset     = 5p
multiplier       = 1e-6
color            = black

<tick>
spacing        = 1u
size           = 8p
thickness      = 2p
show_label     = no
</tick>
<tick>
spacing        = 5u
size           = 12p
thickness      = 2p
show_label     = yes
label_size     = 20p
format         = \%d
grid           = yes
grid_color     = lgrey
grid_thickness = 2p
</tick>
<tick>
spacing        = 10u
size           = 14p
thickness      = 2p
show_label     = yes
label_size     = 24p
format         = \%d
grid           = yes
grid_color     = dgrey
grid_thickness = 2p
</tick>
</ticks>
<image>
#<<include etc/image.conf>>
#Background* = AppistryBackground.png
background = white

dir = .
file = \@\@IMGFILE\@\@
png = yes
svg = no
radius = 1500p

angle_offset = -80
24bit = yes
auto_alpha_colors = yes
auto_alpha_steps = 10
</image>

karyotype   = \@\@KARYOTYPE\@\@

chromosomes_units           = 1000000
chromosomes_display_default = yes
#chromosomes = \@\@CHROMOSOMES\@\@


<plots>

<plot>

show = yes

file = \@\@HISTOGRAM_DATA\@\@
#file = try0.out
type = histogram
#type = line

r0 = 0.41r
r1 = 0.71r

min = 0.0
max = 2000.0

color     = black
thickness = 2

extend_bin = yes

axis           = yes
axis_color     = lgrey
axis_thickness = 2
axis_spacing   = 250.0

<rules>
<rule>
importance = 110
condition = _VALUE_ < 0
show = no
</rule>

<rule>
importance = 100
condition = _VALUE_ > 0
show = yes
fill_under = yes
fill_color = dblue
flow = continue
</rule>

# we will probably have to fill this out somehow
<rule>
importance = 90
condition = _VALUE_ > 500.0
color = red
show = yes
fill_under = yes
fill_color = red
flow = continue
</rule>

</rules>

</plot>

</plots>

#anglestep       = 0.5
#minslicestep    = 10
#beziersamples   = 40
#debug           = no
#warnings        = no
#imagemap        = no

# don't touch!
#units_ok        = bupr
#units_nounit    = n
<<include etc/housekeeping.conf>>


EOS

    $core_conf =~ s/\@\@IMGFILE\@\@/$img/;
    $core_conf =~ s/\@\@HISTOGRAM_DATA\@\@/$histogram/;
    $core_conf =~ s/\@\@KARYOTYPE\@\@/$karyotype/;

    my $fh = IO::File->new(">$file");
    #
    print $fh $core_conf;
    $fh->close();
    return;
}


sub mk_fusion_conf
{
    my ($file, $fusion, $img, $karyotype, $genes) = @_;

# build some of the the config file, figure out what needs to be
# included and where, what colors to use for the arcs
# how to include the gene names...

    my $core_conf = <<EOS;

<colors>
<<include etc/colors.conf>>
<<include etc/brewer.conf>>
</colors>

<fonts>
<<include etc/fonts.conf>>
</fonts>

<ideogram>
<spacing>
default = 0.01r
break = 2u
</spacing>

radius           = 0.80r
thickness        = 50p
fill             = yes
fill_color       = black
stroke_thickness = 2
stroke_color     = black

show_label       = yes
label_font       = nbold
label_radius     = dims(ideogram,radius) + 0.15r
label_size       = 36
label_parallel   = no
label_color      = blue

show_bands            = yes
fill_bands            = yes
band_stroke_thickness = 2
band_stroke_color     = white
band_transparency     = 0
</ideogram>

#<<include ticks.conf>>
show_ticks          = yes
show_tick_labels    = yes

show_grid          = no
grid_start         = 0.5r
grid_end           = 1.0r

<ticks>
skip_first_label     = no
skip_last_label      = no
radius               = dims(ideogram,radius_outer)
tick_separation      = 2p
min_label_distance_to_edge = 10p
label_separation = 5p
label_offset     = 5p
multiplier       = 1e-6
color            = black

<tick>
spacing        = 1u
size           = 8p
thickness      = 2p
show_label     = no
</tick>
<tick>
spacing        = 5u
size           = 12p
thickness      = 2p
show_label     = yes
label_size     = 20p
format         = %d
grid           = yes
grid_color     = lgrey
grid_thickness = 2p
</tick>
<tick>
spacing        = 10u
size           = 14p
thickness      = 2p
show_label     = yes
label_size     = 24p
format         = %d
grid           = yes
grid_color     = dgrey
grid_thickness = 2p
</tick>
</ticks>

<image>
#<<include etc/image.conf>>
background = white

dir = .
file = \@\@IMGFILE\@\@
png = yes
svg = no
radius = 1500p

angle_offset = -90
24bit = yes
auto_alpha_colors = yes
auto_alpha_steps = 10
</image>

karyotype   = \@\@KARYOTYPE\@\@

chromosomes_units           = 1000000
chromosomes_display_default = yes
#chromosomes = hs2



<plots>
<plot>
type             = text
color            = dgrey
file             = \@\@GENES_FILE\@\@

# on tick scale
r0 = 1r
r1 = 1r+260p

show_links     = yes
link_dims      = 0p,0p,50p,0p,10p
#link_dims      = 4p,4p,8p,4p,4p
link_thickness = 2p
link_color     = red

label_size   = 24p
label_font   = condensed

# turn on snuggling
label_snuggle = yes
max_snuggle_distance = 1r
snuggle_sampling = 2
snuggle_tolerance = 0.25r
snuggle_link_overlap_test = yes
snuggle_link_overlap_tolerance = 2p
snuggle_refine = yes

#padding  = 2p
#rpadding = 0.1r
padding  = 0p
rpadding = 0p

</plot>
</plots>

<links>

z = 0
radius = 0.99r
bezier_radius = 0.1r

<link fusion>
show  = yes
color = red
thickness = 2
file = \@\@FUSION_DATA\@\@
record_limit = 2500
</link>

</links>

#anglestep       = 0.5
#minslicestep    = 10
#beziersamples   = 40
#debug           = no
#warnings        = no
#imagemap        = no

# don't touch!
#units_ok        = bupr
#units_nounit    = n
<<include etc/housekeeping.conf>>

EOS

    $core_conf =~ s/\@\@IMGFILE\@\@/$img/;
    $core_conf =~ s/\@\@FUSION_DATA\@\@/$fusion/;
    $core_conf =~ s/\@\@KARYOTYPE\@\@/$karyotype/;
    $core_conf =~ s/\@\@GENES_FILE\@\@/$genes/;

    my $fh = IO::File->new(">$file");
    #
    print $fh $core_conf;
    $fh->close();
    return;
}

sub mk_fusion_conf2
{
    my ($file, $fusion, $img, $karyotype,$genes) = @_;

# build some of the the config file, figure out what needs to be
# included and where, what colors to use for the arcs
# how to include the gene names...

    my $core_conf = <<EOS;

<colors>
<<include etc/colors.conf>>
<<include etc/brewer.conf>>
</colors>

<fonts>
<<include etc/fonts.conf>>
</fonts>

<ideogram>
<spacing>
default = 0.01r
break = 2u
</spacing>

radius           = 0.80r
thickness        = 50p
fill             = yes
fill_color       = black
stroke_thickness = 2
stroke_color     = black

show_label       = yes
label_font       = bold
label_radius     = dims(ideogram,radius) + 0.15r
label_size       = 36
label_parallel   = no

show_bands            = yes
fill_bands            = yes
band_stroke_thickness = 2
band_stroke_color     = white
band_transparency     = 0
</ideogram>

#<<include ticks.conf>>
show_ticks          = yes
show_tick_labels    = yes

show_grid          = no
grid_start         = 0.5r
grid_end           = 1.0r

<ticks>
skip_first_label     = no
skip_last_label      = no
radius               = dims(ideogram,radius_outer)
tick_separation      = 2p
min_label_distance_to_edge = 10p
label_separation = 5p
label_offset     = 5p
multiplier       = 1e-6
color            = black

<tick>
spacing        = 1u
size           = 8p
thickness      = 2p
show_label     = no
</tick>
<tick>
spacing        = 5u
size           = 12p
thickness      = 2p
show_label     = yes
label_size     = 20p
format         = %d
grid           = yes
grid_color     = lgrey
grid_thickness = 2p
</tick>
<tick>
spacing        = 10u
size           = 14p
thickness      = 2p
show_label     = yes
label_size     = 24p
format         = %d
grid           = yes
grid_color     = dgrey
grid_thickness = 2p
</tick>
</ticks>

<image>
#<<include etc/image.conf>>
background = white

dir = .
file = \@\@IMGFILE\@\@
png = yes
svg = no
radius = 1500p

angle_offset = -90
24bit = yes
auto_alpha_colors = yes
auto_alpha_steps = 10
</image>

karyotype   = \@\@KARYOTYPE\@\@

chromosomes_units           = 1000000
chromosomes_display_default = no
chromosomes = \@\@CHROMOSOMES\@\@



<plots>
<plot>
type             = text
color            = black
file             = \@\@GENES_FILE\@\@

# on tick scale
r0 = 1r
r1 = 1r+260p

show_links     = yes
link_dims      = 0p,0p,50p,0p,10p
#link_dims      = 4p,4p,8p,4p,4p
link_thickness = 2p
link_color     = red

label_size   = 24p
label_font   = condensed

# turn on snuggling
label_snuggle = yes
max_snuggle_distance = 1r
snuggle_sampling = 2
snuggle_tolerance = 0.25r
snuggle_link_overlap_test = yes
snuggle_link_overlap_tolerance = 2p
snuggle_refine = yes

#padding  = 2p
#rpadding = 0.1r
padding  = 0p
rpadding = 0p

</plot>
</plots>

<links>

z = 0
radius = 0.99r
bezier_radius = 0.1r

<link fusion>
show  = yes
color = red
thickness = 2
file = \@\@FUSION_DATA\@\@
record_limit = 2500
</link>

</links>

#anglestep       = 0.5
#minslicestep    = 10
#beziersamples   = 40
#debug           = no
#warnings        = no
#imagemap        = no

# don't touch!
#units_ok        = bupr
#units_nounit    = n
<<include etc/housekeeping.conf>>

EOS

    $core_conf =~ s/\@\@IMGFILE\@\@/$img/;
    $core_conf =~ s/\@\@FUSION_DATA\@\@/$fusion/;
    $core_conf =~ s/\@\@KARYOTYPE\@\@/$karyotype/;
    $core_conf =~ s/\@\@GENES_FILE\@\@/$genes/;

    my $ch = IO::File->new("$fusion");
    my @chromosomes;
    while( my $line = <$ch> )
    {
        my @f = split(/\t/, $line);
        push(@chromosomes, $f[1]);
    }
    $ch->close();

    @chromosomes = uniq @chromosomes;
    my $chr_string = join(";", @chromosomes);
    print Dumper(\@chromosomes),"\n";

    $core_conf =~ s/\@\@CHROMOSOMES\@\@/$chr_string/;
    my $fh = IO::File->new(">$file");
    #
    print $fh $core_conf;
    $fh->close();
    return;
}


sub read_fusion_data
{

    return;
}

sub extract_fusion_info
{
    my ($result_file, $fusion_links, $gene_locations) = @_;

    my $t = HTML::TableExtract->new( attribs => { cellpadding => 3,
                                                   border => 1,
						   id => 'fusion_list' } );
    my $oh = IO::File->new(">$fusion_links")
                or croak "can't open $fusion_links for writing : $!";
    $t->parse_file($result_file);
    my @tables = $t->tables();
    my $fusion_idx = 1;
    my %genes;

    foreach my $table (@tables)
    {
        my @rows = $table->rows();

        foreach my $row (0..$#rows)
        {

            my @row_list = $table->row($row);

            my $num_elements = scalar(@row_list);

            if($num_elements => 10) {
                #print join(" : ", @row_list),"\n";
                my $sample_id = $row_list[0];

                my $gene1 = $row_list[1];
                my $chr1  = $row_list[2];
                my $pos1  = $row_list[3];
                my $gene2 = $row_list[4];
                my $chr2  = $row_list[5];
                my $pos2  = $row_list[6];
                $chr1 =~ s/chr/hs/;
                $chr2 =~ s/chr/hs/;
                my $junction_id = sprintf("%05d", $fusion_idx);
                print $oh "fusion".$junction_id,"\t", $chr1, "\t", $pos1, "\t",
                      $pos1 + 1, "\n";
                print $oh "fusion".$junction_id,"\t", $chr2, "\t", $pos2, "\t",
                      $pos2 + 1, "\n";
                $genes{$gene1} = [ $chr1, $pos1 ];
                $genes{$gene2} = [ $chr2, $pos2 ];

                $fusion_idx += 1;
            }
            # I don'k know how usefull this would be in the future.
            #elsif($num_elements == 4) {
                #print join(" : ", @row_list),"\n";
            #}

        }
    }
    $oh->close();
    $oh = IO::File->new(">$gene_locations")
                   or croak "can't open $gene_locations for writing: $!";
    foreach my $gene (sort keys %genes)
    {
        print $oh  $genes{$gene}->[0], "\t",
              $genes{$gene}->[1], "\t",
              $genes{$gene}->[1] + 1, "\t",
              $gene,"\n";
    }


    return 1;
}

sub gen_snp_circos_conf
{
    my ( $config_file, $work_dir, $output_image, $chrom, $karyotype,
         $exonic, $novel, $indel
       ) = @_;

    my $fh = IO::File->new(">$config_file");

    # colors
    my $colors = "
<colors>
<<include etc/colors.conf>>
<<include etc/brewer.conf>>
</colors>
";

    # fonts
    my $fonts = "
<fonts>
<<include etc/fonts.conf>>
</fonts>
";

    my $misc = "
<ideogram>
<spacing>
#default = 0.01r
#default = 1000u
#break = 10u
default = 0.25r
</spacing>

radius           = 0.85r
thickness        = 50p
fill             = yes
fill_color       = black
stroke_thickness = 2
stroke_color     = black

show_label       = yes
label_font       = bold
label_radius     = dims(ideogram,radius) + 0.12r
label_size       = 72
label_parallel   = yes

show_bands            = yes
fill_bands            = yes
band_stroke_thickness = 2
band_stroke_color     = white
band_transparency     = 0
</ideogram>

show_ticks          = yes
show_tick_labels    = yes

show_grid          = no
grid_start         = 1.5r
grid_end           = 1.0r

<ticks>
skip_first_label     = no
skip_last_label      = no
radius               = dims(ideogram,radius_outer)
tick_separation      = 2p
min_label_distance_to_edge = 10p
label_separation = 5p
label_offset     = 5p
multiplier       = 1e-6
color            = black

<tick>
spacing        = 1u
size           = 8p
thickness      = 2p
show_label     = no
</tick>
<tick>
spacing        = 5u
size           = 12p
thickness      = 2p
show_label     = yes
label_size     = 20p
format         = %d
grid           = yes
grid_color     = lgrey
grid_thickness = 2p
</tick>
<tick>
spacing        = 10u
size           = 14p
thickness      = 2p
show_label     = yes
label_size     = 24p
format         = %d
grid           = yes
grid_color     = dgrey
grid_thickness = 2p
</tick>
</ticks>

";

    # image
    my $image = "
<image>
#<<include etc/image.conf>>
#Background* = AppistryBackground.png
background = white

dir = $work_dir
file = $output_image
png = yes
svg = no
radius = 1500p

angle_offset = -80
auto_alpha_colors = yes
auto_alpha_steps = 5
</image>
";

    # karyotype
    my $ktype = "
karyotype   = $karyotype
";

    # chromosome
    my $chromosomes = create_chromosome_conf( $chrom );

    # highlights - build this based on inputs
    my $highlights = "
<highlights>

z  = 0
<highlight>
file       = $exonic
r0         = 0.8r
r1         = 0.8r + 150p
stroke_thickness = 6
stroke_color = black
</highlight>

<highlight>
file       = $novel
r0         = 0.6r
r1         = 0.6r + 150p
stroke_thickness = 6
stroke_color     = blue
</highlight>

<highlight>
file       = $indel
r0         = 0.4r
r1         = 0.4r + 150p
stroke_thickness = 6
stroke_color     = red
</highlight>

</highlights>

";

    # housekeeping?
    my $hk = "
<<include etc/housekeeping.conf>>
";
    # squirt out the chunks of the config file.
    print $fh $colors. $fonts. $misc. $image. $ktype.
              $chromosomes. $highlights. $hk;
    $fh->close();


    return 1;
}

sub create_chromosome_conf {
    my ($chromes2use) = @_;

    my $c_units = 1000000;
    if(($chromes2use eq "hs1") ||
       ($chromes2use eq "hs2"))
    {
        $c_units = 1500000;
    }
    # below is the default; do everything.
    my $chr_str = "
chromosomes_units           = 1000000
chromosomes_display_default = yes
";

    if(defined($chromes2use)) {
        $chr_str = "
chromosomes_units           = 1000000
chromosomes_display_default = no
";
        $chr_str .= "chromosomes = ". $chromes2use;
    }

    return $chr_str;
}

1;
