#! usr/bin/perl
# This program is intended to create all the report HTML files.
use Getopt::Std;
use Data::Dumper;
use Cwd 'abs_path';
use File::Basename;

my $SCRIPTNEST = dirname(abs_path($0));
 
##	INFO
##	script to create IGV session for NGS visualization
our ($opt_r);

getopt('r');
if ( (!defined $opt_r) ) {
	die ("Usage: $0 \n\t -r [ run info file ]\n");   
}

######## Parse runinfo file #########
print "Runinfo = " . $opt_r . "\n";
open(RUN, "<", $opt_r)||die$!;
my %run_info_vars = grep {!/^(\s+|1)$/} map{ (split /\=/, $_ ) unless $_=~/^(#|>)/ } (<RUN>); 
chomp(%run_info_vars);##required because values retain "\n"

#print Dumper %run_info_vars;
my $PI=$run_info_vars{'PI'};
my $GENOMEBUILD=$run_info_vars{'GENOMEBUILD'};
my $PROJECT_NAME=$run_info_vars{'PROJECT_NAME'};
my $RUNID=$run_info_vars{'RUNID'};
my $PEAK_CALLER=$run_info_vars{'PEAK_CALLER'};
my $SEQ_TYPE=$run_info_vars{'SEQ_TYPE'};
my $END1_SEQ=$run_info_vars{'END1_SEQ'};
my $END2_SEQ=$run_info_vars{'END2_SEQ'};
my $IP_FILE=$run_info_vars{'IP_FILE'};
my $INPUT_FILE=$run_info_vars{'INPUT_FILE'};
my $TOOL_INFO=$run_info_vars{'TOOL_INFO'};

# If "dedup" is specified, that means "deduplicate" or "remove duplicates," so REMOVE_DUP should be "YES"
# If "nodedup" is specified, that means "do not deduplicate" or "do not remove duplicates," so REMOVE_DUP should be "NO"
my $REMOVE_DUP="YES";
if ($run_info_vars{'REMOVE_DUP'} eq "dedup") { $REMOVE_DUP = "NO"; }

# Parse tool info file
my @line=split(/=/,`perl -ne "/^TOOL_VERSION/ && print" $TOOL_INFO`);
my $TOOL_VERSION=$line[$#line];
chomp $TOOL_VERSION;

my @line=split(/=/,`perl -ne "/^ORGANISM/ && print" $TOOL_INFO`);
my $ORGANISM=$line[$#line];
chomp $ORGANISM;

my @line=split(/=/,`perl -ne "/^LOCATION/ && print" $TOOL_INFO`);
my $LOCATION=$line[$#line];
chomp $LOCATION;

my @line=split(/=/,`perl -ne "/^PLATFORM/ && print" $TOOL_INFO`);
my $PLATFORM=$line[$#line];
chomp $PLATFORM;

my @line=split(/=/,`perl -ne "/^OPEN_SOURCE/ && print" $TOOL_INFO`);
my $OPEN_SOURCE=$line[$#line];
chomp $OPEN_SOURCE;


######## Constants used within this file #######

my $MAP_OUT_DIR=$run_info_vars{'WORK_DIR'}."/mapout";
my $FASTQC_DIR=$run_info_vars{'WORK_DIR'}."/fastqc";
my $DELIVERY_DIR=$run_info_vars{'WORK_DIR'}."/delivery";

## Clean variables that contain quotes
#$END1_SEQ =~ s/^[\"\'](.*)[\"\']$/$1/;
#$END2_SEQ =~ s/^[\"\'](.*)[\"\']$/$1/;
#$IP_FILE =~ s/^[\"\'](.*)[\"\']$/$1/;
#$INPUT_FILE =~ s/^[\"\'](.*)[\"\']$/$1/;

my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime;   ## to pull todays date
# corrections for human readability
$year += 1900;
$mon++;

my $username = getpwuid( $< );
my @user=split(/[:;]/,`finger $username`);

my $TODAY = $mon."/".$mday ."/".$year."\n";

@end1seqfiles = split(/\s/, $END1_SEQ);
@end2seqfiles = split(/\s/, $END2_SEQ);
@ipfiles = split(/\s/, $IP_FILE);
@inputfiles = split(/\s/, $INPUT_FILE);

my $IDR_ANALYSIS = "No";

if($PEAK_CALLER eq "macs2idr" or $PEAK_CALLER eq "macs2noidr") {
	if($PEAK_CALLER eq "macs2idr") {
		$IDR_ANALYSIS="Yes";
	}
	$PEAK_CALLER= "macs2";
}

my $num_ipfiles=scalar(@ipfiles);
my $num_inputfiles=scalar(@inputfiles);

######## Get filenames for map output. These are all files that match the pattern *summary* ########
my @library_complexity_files = glob($MAP_OUT_DIR . "/*mappingQC\.summary*");
my @mapping_summary_files = glob($MAP_OUT_DIR . "/*mapping\.summary*");

######## Get FASTQC-related files. ########
my @fastqc_files = <$FASTQC_DIR/*/fastqc_report.html $FASTQC_DIR/*/fastqc_data.txt $FASTQC_DIR/*/summary.txt>;

######## Get other mapout files that need to be delivered ########
my @wig_files = glob($MAP_OUT_DIR . "/*wig");
my @bdg_files = glob($MAP_OUT_DIR . "/*bdg");



# Parse mapout/*summary* files for IV. Results Summary
# foreach $file (@library_complexity_files) {

	# open(IN, "$file") or die "Unable to open" . $file; 
	# while(<IN>){
		# if(/\#/ || /^\s/){ next; }
		# # TODO MTK: this needs to be done more understandably
		# ($co1[$i], $co2[$i], $co3[$i], $co4[$i], $co5[$i], $co6[$i]) = split("\t"); 
		# $i++;
	# }
	# close(IN);
# }

foreach $file (@mapping_summary_files) {
	open(IN, "$file") or die "Unable to open" . $file; 
	while(<IN>){
		if(/\#/ || /^\s/){ next; }
		# TODO MTK: this needs to be done more understandably
		($col1[$j], $col2[$j], $col3[$j], $col4[$j], $col5[$j], $col6[$j], $col7[$j], $col8[$j]) = split("\t"); 
		$j++;

	}
	close(IN);
}

print ">$DELIVERY_DIR/MainDocument.html\n";
open(OUT, ">$DELIVERY_DIR/MainDocument.html")||die$!;
print OUT get_css();
print OUT "
<title>HiChIP Main Report</title>
<div id='top_header'>	
<div class='innerHeader'>Mayo BIC PI Support: HiChIP Main Report</div>
</div>

<script>
	var shownImg = \"pic1\";
	var lastLi = \"itemLi_1\";
    function imgSwap(imgId, myId){
		var previousImg = document.getElementById(shownImg);
		previousImg.style.display = \"none\";
		var newImg = document.getElementById(imgId);
		newImg.style.display = \"block\";
		shownImg=imgId;
		// now update list items
		document.getElementById(lastLi).className = \"\";
		document.getElementById(myId).className = \"selected\";
		lastLi=myId;
	}

	function toggle2(showHideDiv, switchTextDiv){
	var ele = document.getElementById(showHideDiv);
	var text = document.getElementById(switchTextDiv);
	if(ele.style.display == 'none'){
			ele.style.display = 'block';
			text.innerHTML = 'Less Detail';
	}else{
			ele.style.display = 'none';
			text.innerHTML = 'More Detail';
	}};
</script>

<style>
#lableContainer { width:320px; font-family:Georgia, Times, serif; font-size:15px; }
#lableContainer ul { list-style: none; }
#lableContainer ul li { }
#lableContainer ul li.selected { background:#E6E6E6 }
</style>

<div id='right-sidebar'><div id='nav-header'>Page Navigation</div>
<ol><li><a href='#Project Details'>Project Details</a></li>
<li><a href='#Analysis Plan'>Analysis Plan</a></li>
<li><a href='#Received Data'>Received Data</a></li>
<li><a href='#Results Summary'>Results Summary</a></li>
<li><a href='#Resources'>Resources</a></li>
</ol>
</div>

<a name='Project Details' id='Project Details'></a><h3>I. Project Details</h3>

<table cellspacing='0' class='sofT'>
<tr class='helpHed'><td>Criterion</td><td>Descriptor</td></tr>
<tr><td>Workflow</td><td>$TOOL_VERSION</td></tr>
<tr><td>Organism</td><td>$ORGANISM</td></tr>
<tr><td>Genome Build</td><td>$GENOMEBUILD</td></tr>
<tr><td>Location</td><td>$LOCATION</td></tr>
<tr><td>Platform</td><td>$PLATFORM</td></tr>
<tr><td>Peak Caller</td><td>$PEAK_CALLER</td></tr>
<tr><td>IDR Analysis performed?</td><td>$IDR_ANALYSIS</td></tr>
<tr><td>Sequence Type</td><td>$SEQ_TYPE</td></tr>
<tr><td>Number of INPUT_FILES</td><td>$num_inputfiles</td></tr>
<tr><td>Number of IP_FILES</td><td>$num_ipfiles</td></tr>
<tr><td>Referenced PI</td><td>$PI</td></tr>
<tr><td>Duplicates Removed?</td><td>$REMOVE_DUP</td></tr>
<tr><td>Run Date</td><td>$TODAY</td></tr>
<tr><td>Results Compiled By</td><td>$user[2]</td></tr>
</table>

<a name='Analysis Plan' id='Analysis Plan'></a><h3>II. Analysis Plan</h3>

<!-- TODO: MTK - Is this image always going to be the same? Will we overlay it with javascript or use a different image 
to show the actual path run? -->";

if($OPEN_SOURCE eq "true") {
	print OUT "
<img src=\"chipseq-pic.jpg\" width=720 height=540>";
} else {
	print OUT "
<img src=http://bsiweb.mayo.edu/sites/default/files/u414/chipseq_pic.jpg width=720 height=540>";
}

print OUT "
<p>
<b>Workflow description:</b> <a href=\"ChIP-Seq_workflow_summary.doc\">ChIP-Seq_workflow_summary.doc</a>
<p>

<a name='Received Data' id='Received Data'></a><h3>III. Received Data</h3>

<ol><li>Run Name: $PROJECT_NAME</li>

<br>
<table cellspacing='0' class='sofT'>
<tr class='helpHed'><td>Run #</td></tr>
<tr><td>$RUNID</td></tr>
</table>
<p>
END1_SEQS:<p>
<table cellspacing='0' class='sofT'>
<tr class='helpHed'><td>File Name</td></tr>";

for $file (@end1seqfiles){ print OUT "<tr><td>" . $file . "</td></tr>" }

print OUT "</table>";

# End2 sequence files will only be available for PE
if($SEQ_TYPE eq "PE") {
	print OUT "<p>
	END2_SEQS:<p>
	<table cellspacing='0' class='sofT'>
	<tr class='helpHed'><td>File Name</td></tr>";
	
	for $file (@end2seqfiles){ print OUT "<tr><td>" . $file . "</td></tr>" }
	
	print OUT "</table>";
}

print OUT "<p>
IP_FILES:<p>
<table cellspacing='0' class='sofT'>
<tr class='helpHed'><td>File Name</td></tr>";

for $file (@ipfiles){ print OUT "<tr><td>" . $file . "</td></tr>" }

print OUT "</table>";

print OUT "<p>
INPUT_FILES:<p>
<table cellspacing='0' class='sofT'>
<tr class='helpHed'><td>File Name</td></tr>";

for $file (@inputfiles){ print OUT "<tr><td>" . $file . "</td></tr>" }

print OUT "</table>";

my @sample_summary_row;
$i=0;

# Parse input/IP files for III.2 Sample Summary
# InputFiles == Control
foreach $file (@inputfiles) {
		
	if($file =~ /.*_L([0-9]).*/) { $lane = $1; }
	
	$file =~ s/\.fastq\.gz//; 
	
	@sample_summary_row[$i] = "<tr><td>$lane</td><td>$file</td><td>Control</td></tr>\n";
	
	$i++;
}

# IPFiles == treatment
foreach $file (@ipfiles) {
		
	if($file =~ /.*_L([0-9]).*/) { $lane = $1; }
	
	$file =~ s/\.fastq\.gz//; 
	
	@sample_summary_row[$i] = "<tr><td>$lane</td><td>$file</td><td>Treatment</td></tr>\n";
	$i++;
}

print OUT "
<li>Sample Summary</li>
<p>
<table cellspacing='0' class='sofT'><tr class='helpHed'><td>Lane</td><td>Sample Name</td><td>Aspect</td></tr>";

foreach $row (@sample_summary_row) {
	print OUT $row;
}


print OUT "</table>
</ol><br>";

print OUT 
"<a name='Results Summary' id='Results Summary'></a><h3>IV. Results Summary</h3>

<ol><li>Quality Control Checks</li>
<p>";


# Build up a list of the quality control check files and format them for the results file
# The result will look something like:
# Quality Control Checks
#
# <PLAIN_TEXT_DIRECTORY_NAME> <LINKABLE_FASTQC_REPORT_HTML> <LINKABLE_FASTQC_DATA_TXT> <LINKABLE_SUMMARY_TXT>
# 
# In the future, it may make sense to put this in an actual table instead of this format.  

$fastqc_report_html_count = 0;
$fastqc_data_txt_count = 0;
$summary_txt_count = 0;

$directory_column = 0;
$fastqc_report_html_column = 1;
$fastqc_data_txt_column = 2;
$summary_txt_column = 3;

# This loop builds up the array used in the next section.
foreach $file (@fastqc_files){

	$fname = $file;
	$fname =~ s/.*fastqc\///;
	
	$ftext = $file;
	$ftext =~ s/fastqc_report\.html//;
	$ftext =~ s/.*\/(fastqc.*)$/$1/;
	
	if($file =~ /(fastqc.*)$/) {
		$href = "$1";
	}
	
	if($file =~ /fastqc_report.html/) {
		$fqc_table[$fastqc_report_html_count][$directory_column] = $ftext;
		$fqc_table[$fastqc_report_html_count++][$fastqc_report_html_column] = "<a href=$href>$fname</a>";
	} elsif ($file =~ /fastqc_data.txt/){
		$fqc_table[$fastqc_data_txt_count++][$fastqc_data_txt_column] = "<a href=$href>$fname</a>";
	} elsif ($file =~ /summary.txt/) {
		$fqc_table[$summary_txt_count++][$summary_txt_column] = "<a href=$href>$fname</a>";
	}
	
}

# Need better column headers here.
print OUT "<p>
<table cellspacing='0' class='sofT'>
<tr class='helpHed'><td>Directory</td><td>fastqc_report.html</td><td>fastqc_data.txt</td><td>summary.txt</td></tr>";

for $fct (0 .. $fastqc_report_html_count-1) { 
	print OUT "<tr><td>" . $fqc_table[$fct][0] . "</td><td>" . $fqc_table[$fct][1] . "</td><td>" . $fqc_table[$fct][2] . "</td><td>" . $fqc_table[$fct][3] . "</td></tr>\n"; 
}

print OUT "</table>"; 

print OUT "<li>Summary of Library Complexity</li>\n<p>\n<table cellspacing='0' class='sofT'>";

$switchWord="Pairs";
if($SEQ_TYPE eq "SE") {
	$switchWord="Reads";
}
print OUT "<tr class='helpHed'><td>File Name</td>
	<td title=\"Number of Genomic Coordinates with Single Uniquely Mapped $switchWord\">Have 1 Uniq $switchWord</td>
	<td title=\"Genomic Coordinates with 1 or More Uniquely Mapped $switchWord\">Have more than 1 Uniq $switchWord</td>
	<td>Total # Uniquely Mapped $switchWord</td>
	<td>Ratio of Col 3/Col 4</td>
	<td>Ratio of Col 2/Col 3</td></tr>";

	
	
# Parse mapout/*summary* files for IV. Results Summary
foreach $file (@library_complexity_files) {
	open(IN, "$file") or die "Unable to open" . $file;
	$fName=(split(/\//,$file))[-1]; #grab last string in file path.
	$sampleName=(split(/\./,$fName))[0];
	print OUT "<tr><td title=\"$fName\" style=\"white-space: nowrap;\"> $sampleName </td>";
	$lastLine="";
	while(<IN>){ $lastLine = $_ if eof; }
	close(IN);
	@ComplexityStats=split(/\t/, $lastLine);
	print OUT "<td>".commify($ComplexityStats[1])."</td>";
	print OUT "<td>".commify($ComplexityStats[2])."</td>";
	print OUT "<td>".commify($ComplexityStats[3])."</td>";
	print OUT "<td>".$ComplexityStats[4]."</td>";
	print OUT "<td>".$ComplexityStats[5]."</td></tr>";
	
}

#if(/\#/ || /^\s/){ next; }
		# TODO MTK: this needs to be done more understandably
		#($co1[$i], $co2[$i], $co3[$i], $co4[$i], $co5[$i], $co6[$i]) = split("\t"); 
		#$i++;
# TODO: Choose a better index name than _i
# for $_i (0 .. $#library_complexity_files) {
	# @name=split(/\./, $co1[$_i]);
	# print "Library Complexity: $_i  $name[0]\n";
   # print OUT "<tr><td title=\"$co1[$_i]\" style=\"white-space: nowrap;\">". $name[0] . 
	# "</td><td>". commify($co2[$_i]) .
	# "</td><td>". commify($co3[$_i]) .
	# "</td><td>". commify($co4[$_i]) .
	# "</td><td>". $co5[$_i] .
	# "</td><td>". $co6[$_i] . "</td></tr>"; 
# }

print OUT "</table>
</p>

<li>Mapping Summary</li>
<p>
<table cellspacing='0' class='sofT'>";

# There are different table formats for SE vs. PE.
if($SEQ_TYPE eq "SE") {
	print OUT "<tr class='helpHed'><td>File Name</td>
		<td># Uniquely Mapped Reads</td>
		<td>Reads with multiple hits</td>
		<td># Unmapped reads</td>
		<td>Total # of Reads</td></tr>";
	
	# TODO MTK: this needs to be done more understandably
	for $_j (0 .. $#mapping_summary_files) {
		print OUT "<tr><td>" . $col1[$_j] ."</td>
		<td>". $col2[$_j] ."</td>
		<td>". $col3[$_j] ."</td>
		<td>". $col4[$_j] ."</td>
		<td>". $col5[$_j] ."</td></tr>"; 
	}
} elsif ($SEQ_TYPE eq "PE") {

	print OUT "<tr class='helpHed'><td>File Name</td>
		<td># Uniquely Mapped Pairs</td>
		<td># Pairs w/ 1 End Uniquely Mapped</td>
		<td># Pairs w/ Both Ends Mapped to Multiple Locations</td>
		<td># Unmapped Pairs</td>
		<td># Pairs with Only 1 End Mapped (to 1 or Multiple Locations)</td>
		<td># of Unproperly Mapped Pairs (wrong orientation, wrong size, etc)</td>
		<td>Total # of Pairs</td>
		<td>% Unique Pairs</td>
		</tr>";
	
	# TODO MTK: this needs to be done more understandably
	for $_j (0 .. $#mapping_summary_files) {
		if($col8[$_j]==0){$col8[$_j]=0.0000000000001}
		$uniqPercentage = fiftyFiftyColorPercentage( ($col2[$_j] / $col8[$_j])*100 );
		@name=split(/\./, $col1[$_j]);
		print OUT "<tr><td title=\"$col1[$_j]\" style=\"white-space: nowrap;\">" . $name[0] ."</td>
		<td>". commify($col2[$_j]) ."</td>
		<td>". commify($col3[$_j]) ."</td>
		<td>". commify($col4[$_j]) ."</td>
		<td>". commify($col5[$_j]) ."</td>
		<td>". commify($col6[$_j]) ."</td>
		<td>". commify($col7[$_j]) ."</td>
		<td>". commify($col8[$_j]) . "</td>
		<td>". $uniqPercentage . "</td>
		</tr>"; 
	}
}
print OUT "</table></p>";


##########
## Raymond Added code: peak counts table & image?

my $HIST=$DELIVERY_DIR."/hist";
mkdir $HIST;

if( $PEAK_CALLER eq "macs2"){
	my $MACS_DIR=$run_info_vars{'WORK_DIR'}."/macs2out";
    my $IDR_ANALYSIS_prefix = "";
    if( $IDR_ANALYSIS eq "Yes" ) { $IDR_ANALYSIS_prefix = "pr0"; }
    
    my @peak_files = glob($MACS_DIR . "/*" . $IDR_ANALYSIS_prefix . "_macs2_peaks.xls");
	
	foreach my $file(@peak_files){
		print "Rscript $SCRIPTNEST/peak_stat.R macs $file $run_info_vars{'WORK_DIR'}\n\n";
		`Rscript $SCRIPTNEST/peak_stat.R macs $file $run_info_vars{'WORK_DIR'}`;
	}
}
else{
	## Assumes peak caller is sicer
	my $SICER_DIR=$run_info_vars{'WORK_DIR'}."/sicerout";
	my @peak_files = glob($SICER_DIR . "/*-islands-summary-FDR1E-2");
	foreach my $file(@peak_files){
		`Rscript $SCRIPTNEST/peak_stat.R sicer $file $run_info_vars{'WORK_DIR'}`;
	}
}

print OUT "<li>Peak Calling Results</li>";
open(PEAKSTATS, "<", $run_info_vars{'WORK_DIR'}."/peak_counts.out")||die$!;
@title=("Sample Name","Algorithm","Total Peaks","Width Median","Width Mean","Width SD (+/-)","Fold Change Mean","Fold Change SD (+/-)");
print OUT "<table cellspacing='0' class='sofT'>";
print OUT "<tr class='helpHed'><td>".join("</td><td>", @title)."</td></tr>";
while(<PEAKSTATS>){
	@ln=split(/\t/, $_);
	$ln[2]=commify($ln[2]);
	print OUT "<tr><td>".join("</td><td>", @ln)."</td></tr>";
}
print OUT "</table></br>";



print OUT "<li>Supporting Read Plots</li>";
##pic1 must be first image id

opendir(D, $HIST) || warn $!;
my @pngs = readdir(D);
closedir(D);

if($#pngs <= 1){
	print "No Plots Exist!\n";
	print OUT "<span style=\"font-size:1.3em;color:red;\">No Plots Were Generated!</span>"
}
else{

	print OUT "<div id=\"fullContainer\" style=\"width:700px\">
	<div id=\"lableContainer\" style=\"width:290px; display: inline-block; float: right\">\n</br><ul>\n";

	my $id=1; 
	foreach my $p (@pngs){
		next if($p =~ m/^\./);
		@name=split(/\./, $p);
		if($id == 1){
			print OUT "<li id=\"itemLi_".$id."\" class=\"selected\" onclick=\"imgSwap('pic".$id."', 'itemLi_".$id."')\">".$name[0]."</li>";
		}
		else{
			print OUT "<li id=\"itemLi_".$id."\" onclick=\"imgSwap('pic".$id."', 'itemLi_".$id."')\">".$name[0]."</li>";
		}
		$id++;
	}
	print OUT "</ul>\n</div>\n<div id=\"imgContainer\" style=\"width:405px;\">";

	$id=1;
	foreach my $p (@pngs){
		next if($p =~ m/^\./);
		if($id == 1){
			print OUT "<img id=\"pic".$id."\" src=\"hist/".$p."\" height=\"400px\" width=\"400px\"></img>";
		}
		else{
			print OUT "<img id=\"pic".$id."\" src=\"hist/".$p."\" height=\"400px\" width=\"400px\" style=\"display:none;\"></img>";
		}
		$id++;
	}
	print OUT "</div>\n</div></br>";
  
}


########
### End of 4th Section tables.

print OUT "<li>Additional Result Files</li>
<p>IGV xml: <a href=igv_session.xml>igv_session.xml</a>
<br>IGV PDF: <a href=IGV_Setup.pdf>IGV_Setup.pdf</a>
</ol>";

print OUT "
<a name='Resources' id='Resources'></a><h3>V. Resources</h3>
<ul><p><div id='headerDiv'><div id='titleText'>Peaks & Nearby Transcripts</div>
<a id='myHeader_0' href=\"javascript:toggle2('myContent_0','myHeader_0');\" >More Detail</a><div style='clear:both;'><p>Contains all of the significant peaks along with transcripts found within 10,000 bps of the peak. (*_peak_vs_gene.xls)</p></div>
<div id='contentDiv'><div id='myContent_0' style='display: none;'>This report contains all of the significant peaks found by the peak-finding tool. The nearest transcript is reported plus all transcripts having a TSS or TTS within 10,000bps of the center of the peak. If there are no nearby transcripts for a peak then NAs are used for the transcript columns for that peak. Some peaks may be found on multiple rows if there are more than one nearby transcripts. For the USeq peaks, the \"Peak\" column is useful for finding the most significant peaks with peak 1 being the most significant and so on. There is also an IGV link for each peak that will allow the peak location to be viewed in the IGV browser.<br><br></div></div>
</p>
<div id='contentDiv'><div id='myContent_1' style='display: none;'>Contains all of the genes reported to be near peaks. It includes the number of peaks that reported the gene nearby in the *_Peaks_and_Nearby_Transcripts.xls report. Each gene contains an IGV link that will display the location of that gene in the IGV browser. In order for the link to work, IGV must currently be running on your computer with the provided IGV session loaded. There are also links to each gene's Entrez Gene page.<br><br></div></div>
</p>
<p><div id='headerDiv'><div id='titleText'>CEAS</div>
<a id='myHeader_2' href=\"javascript:toggle2('myContent_2','myHeader_2');\" >More Detail</a><div style='clear:both;'><p>Various plots and visualizations showing the distribution and location of the MACS peaks. (*_ceas.pdf)</p></div>
<div id='contentDiv'><div id='myContent_2' style='display: none;'>CEAS estimates the relative enrichment level of ChIP regions in each gene feature with respect to the whole genome. For this, it first calculates the percentages of the ChIP regions that reside in the following four categories: (a) promoters, (b) bidirectional promoters, (c) downstreams of a gene, and (d) gene bodies (3'UTRs, 5'UTRs, coding exons, and introns). 'Promoters' correspond to the upstream regions of the transcription start site (TSS) of genes. Three promoter sizes are displayed (1kb, 2kb, and 3kb by default) which shows the cumulative percentages of ChIP regions that fall in the region <= to that distance upstream of the TSS. `Bidirectional promoters' are promoter regions between divergently transcribed genes whose TSS are closer in proximity than user-defined distances (two options, 2.5kb and 5kb by default). `Downstreams' refer to the regions immediately downstream of genes, spanning up to the same search range as in `promoters' from the transcription termination site (TTS). `Gene bodies' are further categorized into UTR regions (3' and 5' UTRs), coding exons and introns. After the percentages of ChIP regions respective categories, P-values for the significance of the relative enrichment with respect to the background are calculated using one-sided binomial test. As a final summary of ChIP region annotation, CEAS draws a pie chart of how ChIP regions spread over the categories. If ChIP regions do not fall into any of the categories, they are considered to be `distal intergenic'.<br><br></div></div>
</p>
<p><div id='headerDiv'><div id='titleText'>MEME</div>
<a id='myHeader_3' href=\"javascript:toggle2('myContent_3','myHeader_3');\" >More Detail</a><div style='clear:both;'><p>MEME is a tool for discovering motifs in a group of related DNA or protein sequences.</p></div>
<div id='contentDiv'><div id='myContent_3' style='display: none;'>
A motif is a sequence pattern that occurs repeatedly in a group of related protein or DNA sequences. MEME represents motifs as position-dependent letter-probability matrices which describe the probability of each possible letter at each position in the pattern. Individual MEME motifs do not contain gaps. Patterns with variable-length gaps are split by MEME into two or more separate motifs.
MEME takes as input a group of DNA or protein sequences and outputs as many motifs as requested. MEME uses statistical modeling techniques to automatically choose the best width, number of occurrences, and description for each motif.<br><br></div></div></p>
<p><div id='headerDiv'><div id='titleText'>SICER</div>
<a id='myHeader_5' href=\"javascript:toggle2('myContent_5','myHeader_5');\" >More Detail</a><div style='clear:both;'><p>  SICER is a clustering approach for identification of enriched domains from histone modification ChIP-Seq data </p></div>
<div id='contentDiv'><div id='myContent_5' style='display: none;'>
The software package for SICER is written in Python. The current version is SICER_V1.1.  It requires the standard python compiler, as well as the numpy and scipy package. The shell script in the package is run under linux/unix.  Instruction is in the README file included inside the package. The installation of scipy causes problem to some users. Instruction to installation of scipy is also included in the README. 
<br><br>
Motivation: Chromatin states are the key to gene regulation and cell identity. Chromatin immunoprecipitation (ChIP) coupled with high-throughput sequencing (ChIP-Seq) is increasingly being used to map epigenetic states across genomes of diverse species. Chromatin modification profiles are frequently noisy and diffuse, spanning regions ranging from several nucleosomes to large domains of multiple genes. Much of the early work on the identification of ChIP-enriched regions for ChIP-Seq data has focused on identifying localized regions, such as transcription factor binding sites. Bioinformatic tools to identify diffuse domains of ChIP-enriched regions have been lacking.
<br><br>
Results: Based on the biological observation that histone modifications tend to cluster to form domains, we present a method that identifies spatial clusters of signals unlikely to appear by chance. This method pools together enrichment information from neighboring nucleosomes to increase sensitivity and specificity. By using genomic-scale analysis, as well as the examination of loci with validated epigenetic states, we demonstrate that this method outperforms existing methods in the identification of ChIP-enriched signals for histone modification profiles. We demonstrate the application of this unbiased method in important issues in ChIP-Seq data analysis, such as data normalization for quantitative comparison of levels of epigenetic modifications across cell types and growth conditions.<br><br></div></div></p>
<p><div id='headerDiv'><div id='titleText'>MACS</div>
<a id='myHeader_7' href=\"javascript:toggle2('myContent_7','myHeader_7');\" >More Detail</a><div style='clear:both;'><p>MACS is a poisson model-based approach to identify narrow ChIP-Seq peaks from some histone modifications and transcription factor binding.</p></div>
<div id='contentDiv'><div id='myContent_7' style='display: none;'>
Currently using Macs version 2.
<br><br></div></div></p>
</ul>
<br style='clear:both;'><ul><li><a href=http://www.broadinstitute.org/software/igv/ 'target='_blank'>Integrative Genomics Viewer</a></li><li><a href=http://www.genecards.org target='_blank'>GeneCards</a></li><li><a href=http://www.ncbi.nlm.nih.gov/geo target='_blank'>Gene Expression Omnibus</a></li><li><a href=http://genome.ucsc.edu target='_blank'>UCSC Genome Browser</a></ul>";

if($OPEN_SOURCE eq "false") {
	print OUT "
<p><b><u>Authorship Consideration</u></b>: Advancing scientific research is a primary motivation of all bioinformaticians and acknowledgment of contribution through authorship on manuscripts arising from this analysis is one way our work is assessed and attributed. We request to be considered for authorship on any manuscripts using the analysis results provided if you believe we have made substantive intellectual contributions to the study.
</p>";
}

close(OUT);











sub commify {
   my $input = shift;
   $input = reverse $input;
   $input =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
   return reverse $input;
}

sub fiftyFiftyColorPercentage{
	$val=$_[0];
	$color;
	if(abs($val)<50){$color="#A80000 "} ## less than 50% = red
	elsif(abs($val)>89){$color="#008A00"}	
	else{$color="#000000"}
	$rounded = sprintf("%.1f", $val );
	return "<span style=\"color:$color\">$rounded %</span>";
}






sub get_css{
	$css="<style type='text/css'>
div#top_header{
        background-color: #2419B2;
        color: #fff;
        height:60px;
        vertical-align: middle;
        -webkit-box-shadow:#33333c 5px 15px 44px;
        box-shadow:#33337B 5px 15px 44px;
        font-size: 2em;
}
div.innerHeader{
        position: relative;
        top: 16%;
        left: 5%;
}
div#right-sidebar{
	float:right; /* added to appease IE */
	position:fixed;
	top:75px;
	right:10px;
	width:155px;
	border:1px solid #086CA2;
	text-align: center;
	background: white;
}
div#nav-header{
        margin-left:auto;
        margin-right:auto;
        background-color: #2419B2;
        color: #fff;
}
table.sofT{
        text-align: center;
        font-family: Verdana;
        font-weight: normal;
        font-size: 11px;
        color: #404040;
        background-color: #fafafa;
        border: 1px #6699CC solid;
        border-collapse: collapse;
        border-spacing: 0px;
        margin-left: 1em; /* instead of <p> indentation */
}
table.sofT tr.helpHed td{
        border-bottom: 2px solid #6699CC;
        border-left: 1px solid #6699CC;
        background-color: #BEC8D1;
        text-align: left;
        text-indent: 5px;
        font-family: Verdana;
        font-weight: bold;
        font-size: 11px;
        color: #404040;
        padding-right:20px;
        padding-left:20px;
}
table.sofT tr.hardcoded td{
        border-bottom: 2px solid #6699CC;
        border-left: 1px solid #6699CC;
        background-color: #CC0000;
        text-align: left;
        text-indent: 10px;
        font-family: Verdana;
        font-size: 11px;
        color: #404040;
        padding-right:10px;
        padding-left:10px;
}
table.sofT tr td{
	border-bottom: 1px solid #9CF;
	border-top: 0px;
	border-left: 1px solid #9CF;
	border-right: 0px;
	text-align: left;
	text-indent: 10px;
	font-family: Verdana, sans-serif, Arial;
	font-weight: normal;
	font-size: 11px;
	color: #404040;
	background-color: #fafafa;
	padding-left:10px;
	padding-right:10px;
}
.reminder{
        margin-left: 1.5em;
        font-size: 0.8em;
}
.minor_warning{
        margin-left: 1.5em;
        font-size: 0.8em;
        color: red;
}
img#wk-pic{
        width:724px;
        height:567px;
}
div#headerDiv, #contentDiv {
        float: left;
}
#titleText{
        float: left;
        font-size: 1.0em;
        font-weight: bold;
        margin: 5px 0px;
}
#myHeader{
        font-size: 0.8em;
        font-weight: bold;
        margin: 5px;
}
#myContent{
        margin: 5px 10px;
}
#headerDiv a {
        float: left;
        margin: 5px 10px 5px 5px;
}
#headerDiv a:hover {
        color: #306EFF;
}
</style>";
	return $css
}


