#!/usr/bin/perl -w
# bedgraph_norm.pl
# Author: Huihuang Yan, 9/25/2012
# This script does normalization based on total mapped reads or reads from peak regions
# normalized to 1M number of total mapped reads or to 1M number of reads in peak regions 
# which can be specified by "-m peak" or "-m total"

use strict;
use File::Basename;

my %args=@ARGV;
my $infile = $args{'-i'};
my $count = $args{'-c'};
my $method = $args{'-m'}||"total";
my $dir = $args{'-d'};

my $filename=basename($infile, ".raw.bdg");

my $outfile = $args{'-o'}||$filename."_".$method."_based_norm.bdg";

my $usage = "$0 -d DirName -i BdgInFile -c ReadsCount [-m NormMethod] [-o OutFile]\n";
print "$usage\n" unless $infile;

open (IN, "<$dir/$infile") || die  "Can't open raw bdg file $infile\n";
open (OUT, ">$dir/$outfile")|| die "Can't open output file $outfile\n";

while(<IN>) {
        chomp;
       @_=split;

if (/^chr/){
        printf OUT "%s\t%d\t%d\t%.2f\n",$_[0],$_[1],$_[2],($_[3]/$count)*1000000;
}

    else {
        s/raw/norm/g;
        print OUT $_,"\n";
}
}

