/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.sting.gatk.datasources.reads.utilities;

import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.List;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.ParsingEngine;
import org.broadinstitute.sting.gatk.datasources.reads.FilePointer;
import org.broadinstitute.sting.gatk.datasources.reads.IntervalSharder;
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.text.ListFileUtils;

public class FindLargeShards
extends CommandLineProgram {
    private static Logger logger = Logger.getLogger(FindLargeShards.class);
    @Input(fullName="input_file", shortName="I", doc="SAM or BAM file(s)", required=false)
    public List<String> samFiles = new ArrayList<String>();
    @Input(fullName="reference_sequence", shortName="R", doc="Reference sequence file", required=false)
    public File referenceFile = null;
    @Input(fullName="intervals", shortName="L", doc="A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required=false)
    public List<String> intervals = null;
    @Output(required=false)
    public PrintStream out = System.out;
    private BigInteger sumOfSquares = BigInteger.valueOf(0L);
    private BigInteger sum = BigInteger.valueOf(0L);
    private long numberOfShards;

    public int execute() throws IOException {
        IndexedFastaSequenceFile refReader = new IndexedFastaSequenceFile(this.referenceFile);
        GenomeLocParser genomeLocParser = new GenomeLocParser((ReferenceSequenceFile)refReader);
        List bamReaders = ListFileUtils.unpackBAMFileList(this.samFiles, (ParsingEngine)this.parser);
        SAMDataSource dataSource = new SAMDataSource(bamReaders, new ThreadAllocation(), null, genomeLocParser);
        GenomeLocSortedSet intervalSortedSet = null;
        if (this.intervals != null) {
            intervalSortedSet = IntervalUtils.sortAndMergeIntervals((GenomeLocParser)genomeLocParser, (List)IntervalUtils.parseIntervalArguments((GenomeLocParser)genomeLocParser, this.intervals), (IntervalMergingRule)IntervalMergingRule.ALL);
        } else {
            intervalSortedSet = new GenomeLocSortedSet(genomeLocParser);
            for (SAMSequenceRecord entry : refReader.getSequenceDictionary().getSequences()) {
                intervalSortedSet.add(genomeLocParser.createGenomeLoc(entry.getSequenceName(), 1, entry.getSequenceLength()));
            }
        }
        logger.info((Object)String.format("PROGRESS: Calculating mean and variance: Contig\tRegion.Start\tRegion.Stop\tSize", new Object[0]));
        IntervalSharder sharder = IntervalSharder.shardOverIntervals(dataSource, intervalSortedSet);
        while (sharder.hasNext()) {
            FilePointer filePointer = sharder.next();
            long size = filePointer.size();
            BigInteger bigSize = BigInteger.valueOf(size);
            this.sumOfSquares = this.sumOfSquares.add(bigSize.pow(2));
            this.sum = this.sum.add(bigSize);
            ++this.numberOfShards;
            if (this.numberOfShards % 1000L != 0L) continue;
            GenomeLoc boundingRegion = this.getBoundingRegion(filePointer, genomeLocParser);
            logger.info((Object)String.format("PROGRESS: Calculating mean and variance: %s\t%d\t%d\t%d", boundingRegion.getContig(), boundingRegion.getStart(), boundingRegion.getStop(), size));
        }
        long mean = this.sum.divide(BigInteger.valueOf(this.numberOfShards)).longValue();
        long stddev = (long)Math.sqrt(this.sumOfSquares.subtract(this.sum.pow(2).divide(BigInteger.valueOf(this.numberOfShards))).divide(BigInteger.valueOf(this.numberOfShards)).doubleValue());
        logger.info((Object)String.format("Number of shards: %d; mean uncompressed size = %d; stddev uncompressed size  = %d%n", this.numberOfShards, mean, stddev));
        long threshold = mean + stddev * 5L;
        logger.warn((Object)String.format("PROGRESS: Searching for large shards: Contig\tRegion.Start\tRegion.Stop\tSize", new Object[0]));
        this.out.printf("Contig\tRegion.Start\tRegion.Stop\tSize%n", new Object[0]);
        sharder = IntervalSharder.shardOverIntervals(dataSource, intervalSortedSet);
        while (sharder.hasNext()) {
            FilePointer filePointer = sharder.next();
            GenomeLoc boundingRegion = this.getBoundingRegion(filePointer, genomeLocParser);
            long size = filePointer.size();
            ++this.numberOfShards;
            if (filePointer.size() <= threshold) {
                if (this.numberOfShards % 1000L != 0L) continue;
                logger.info((Object)String.format("PROGRESS: Searching for large shards: %s\t%d\t%d\t%d", boundingRegion.getContig(), boundingRegion.getStart(), boundingRegion.getStop(), size));
                continue;
            }
            this.out.printf("%s\t%d\t%d\t%d%n", boundingRegion.getContig(), boundingRegion.getStart(), boundingRegion.getStop(), size);
        }
        return 0;
    }

    private GenomeLoc getBoundingRegion(FilePointer filePointer, GenomeLocParser genomeLocParser) {
        List<GenomeLoc> regions = filePointer.getLocations();
        String contig = regions.get(0).getContig();
        int start = regions.get(0).getStart();
        int stop = regions.get(regions.size() - 1).getStop();
        return genomeLocParser.createGenomeLoc(contig, start, stop);
    }

    public static void main(String[] argv) throws Exception {
        int returnCode = 0;
        try {
            FindLargeShards instance = new FindLargeShards();
            FindLargeShards.start((CommandLineProgram)instance, (String[])argv);
            returnCode = 0;
        }
        catch (Exception ex) {
            returnCode = 1;
            ex.printStackTrace();
            throw ex;
        }
        finally {
            System.exit(returnCode);
        }
    }
}

