/*
 * Decompiled with CFR 0.152.
 */
package net.sf.picard.sam;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import net.sf.picard.PicardException;
import net.sf.picard.cmdline.Option;
import net.sf.picard.cmdline.Usage;
import net.sf.picard.io.IoUtil;
import net.sf.picard.metrics.MetricsFile;
import net.sf.picard.sam.AbstractDuplicateFindingAlgorithm;
import net.sf.picard.sam.DuplicationMetrics;
import net.sf.picard.util.Histogram;
import net.sf.picard.util.Log;
import net.sf.picard.util.PeekableIterator;
import net.sf.picard.util.ProgressLogger;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.SequenceUtil;
import net.sf.samtools.util.SortingCollection;
import net.sf.samtools.util.StringUtil;

public class EstimateLibraryComplexity
extends AbstractDuplicateFindingAlgorithm {
    @Usage
    public final String USAGE = "Attempts to estimate library complexity from sequence of read pairs alone. Does so by sorting all reads by the first N bases (5 by default) of each read and then comparing reads with the first N bases identical to each other for duplicates.  Reads are considered to be duplicates if they match each other with no gaps and an overall mismatch rate less than or equal to MAX_DIFF_RATE (0.03 by default).\n\nReads of poor quality are filtered out so as to provide a more accurate estimate. The filtering removes reads with any no-calls in the first N bases or with a mean base quality lower than MIN_MEAN_QUALITY across either the first or second read.\n\nUnpaired reads are ignored in this computation.\n\nThe algorithm attempts to detect optical duplicates separately from PCR duplicates and excludes these in the calculation of library size. Also, since there is no alignment to screen out technical reads one further filter is applied on the data.  After examining all reads a histogram is built of [#reads in duplicate set -> #of duplicate sets]; all bins that contain exactly one duplicate set are then removed from the histogram as outliers before library size is estimated.";
    @Option(shortName="I", doc="One or more files to combine and estimate library complexity from. Reads can be mapped or unmapped.")
    public List<File> INPUT;
    @Option(shortName="O", doc="Output file to writes per-library metrics to.")
    public File OUTPUT;
    @Option(doc="The minimum number of bases at the starts of reads that must be identical for reads to be grouped together for duplicate detection.  In effect total_reads / 4^max_id_bases reads will be compared at a time, so lower numbers will produce more accurate results but consume exponentially more memory and CPU.")
    public int MIN_IDENTICAL_BASES = 5;
    @Option(doc="The maximum rate of differences between two reads to call them identical.")
    public double MAX_DIFF_RATE = 0.03;
    @Option(doc="The minimum mean quality of the bases in a read pair for the read to be analyzed. Reads with lower average quality are filtered out and not considered in any calculations.")
    public int MIN_MEAN_QUALITY = 20;
    @Option(doc="Do not process self-similar groups that are this many times over the mean expected group size. I.e. if the input contains 10m read pairs and MIN_IDENTICAL_BASES is set to 5, then the mean expected group size would be approximately 10 reads.")
    public int MAX_GROUP_RATIO = 500;
    private final Log log = Log.getInstance(EstimateLibraryComplexity.class);

    public static void main(String[] args) {
        new EstimateLibraryComplexity().instanceMainWithExit(args);
    }

    @Override
    protected int doWork() {
        for (File f : this.INPUT) {
            IoUtil.assertFileIsReadable(f);
        }
        int maxInMemory = (int)(Runtime.getRuntime().maxMemory() / (long)PairedReadSequence.size_in_bytes) / 2;
        this.log.info("Will store " + maxInMemory + " read pairs in memory before sorting.");
        ArrayList<SAMReadGroupRecord> readGroups = new ArrayList<SAMReadGroupRecord>();
        int recordsRead = 0;
        SortingCollection<PairedReadSequence> sorter = SortingCollection.newInstance(PairedReadSequence.class, new PairedReadCodec(), new PairedReadComparator(), maxInMemory, this.TMP_DIR);
        ProgressLogger progress = new ProgressLogger(this.log, 1000000, "Read");
        for (File f : this.INPUT) {
            HashMap<String, PairedReadSequence> pendingByName = new HashMap<String, PairedReadSequence>();
            SAMFileReader in = new SAMFileReader(f);
            readGroups.addAll(in.getFileHeader().getReadGroups());
            for (SAMRecord rec : in) {
                if (!rec.getReadPairedFlag() || !rec.getFirstOfPairFlag() && !rec.getSecondOfPairFlag()) continue;
                PairedReadSequence prs = (PairedReadSequence)pendingByName.remove(rec.getReadName());
                if (prs == null) {
                    SAMReadGroupRecord rg;
                    prs = new PairedReadSequence();
                    if (this.addLocationInformation(rec.getReadName(), prs) && (rg = rec.getReadGroup()) != null) {
                        prs.setReadGroup((short)readGroups.indexOf(rg));
                    }
                    pendingByName.put(rec.getReadName(), prs);
                }
                boolean passesQualityCheck = this.passesQualityCheck(rec.getReadBases(), rec.getBaseQualities(), this.MIN_IDENTICAL_BASES, this.MIN_MEAN_QUALITY);
                prs.qualityOk = prs.qualityOk && passesQualityCheck;
                byte[] bases = rec.getReadBases();
                if (rec.getReadNegativeStrandFlag()) {
                    SequenceUtil.reverseComplement(bases);
                }
                if (rec.getFirstOfPairFlag()) {
                    prs.read1 = bases;
                } else {
                    prs.read2 = bases;
                }
                if (prs.read1 != null && prs.read2 != null && prs.qualityOk) {
                    sorter.add(prs);
                }
                progress.record(rec);
            }
        }
        this.log.info("Finished reading - moving on to scanning for duplicates.");
        PeekableIterator<PairedReadSequence> iterator = new PeekableIterator<PairedReadSequence>(sorter.iterator());
        HashMap duplicationHistosByLibrary = new HashMap();
        HashMap opticalHistosByLibrary = new HashMap();
        int groupsProcessed = 0;
        long lastLogTime = System.currentTimeMillis();
        int meanGroupSize = Math.max(1, recordsRead / 2 / (int)Math.pow(4.0, this.MIN_IDENTICAL_BASES * 2));
        while (iterator.hasNext()) {
            List<PairedReadSequence> group = this.getNextGroup(iterator);
            if (group.size() > meanGroupSize * this.MAX_GROUP_RATIO) {
                PairedReadSequence prs = group.get(0);
                this.log.warn("Omitting group with over " + this.MAX_GROUP_RATIO + " times the expected mean number of read pairs. " + "Mean=" + meanGroupSize + ", Actual=" + group.size() + ". Prefixes: " + StringUtil.bytesToString(prs.read1, 0, this.MIN_IDENTICAL_BASES) + " / " + StringUtil.bytesToString(prs.read1, 0, this.MIN_IDENTICAL_BASES));
                continue;
            }
            Map<String, List<PairedReadSequence>> sequencesByLibrary = this.splitByLibrary(group, readGroups);
            for (Map.Entry<String, List<PairedReadSequence>> entry : sequencesByLibrary.entrySet()) {
                String library = entry.getKey();
                List<PairedReadSequence> seqs = entry.getValue();
                Histogram<Integer> duplicationHisto = (Histogram<Integer>)duplicationHistosByLibrary.get(library);
                Histogram<Integer> opticalHisto = (Histogram<Integer>)opticalHistosByLibrary.get(library);
                if (duplicationHisto == null) {
                    duplicationHisto = new Histogram<Integer>("duplication_group_count", library);
                    opticalHisto = new Histogram<Integer>("duplication_group_count", "optical_duplicates");
                    duplicationHistosByLibrary.put(library, duplicationHisto);
                    opticalHistosByLibrary.put(library, opticalHisto);
                }
                for (int i = 0; i < seqs.size(); ++i) {
                    PairedReadSequence lhs = seqs.get(i);
                    if (lhs == null) continue;
                    ArrayList<PairedReadSequence> dupes = new ArrayList<PairedReadSequence>();
                    for (int j = i + 1; j < seqs.size(); ++j) {
                        PairedReadSequence rhs = seqs.get(j);
                        if (rhs == null || !this.matches(lhs, rhs, this.MAX_DIFF_RATE)) continue;
                        dupes.add(rhs);
                        seqs.set(j, null);
                    }
                    if (dupes.size() > 0) {
                        boolean[] flags;
                        dupes.add(lhs);
                        int duplicateCount = dupes.size();
                        duplicationHisto.increment(duplicateCount);
                        for (boolean b : flags = this.findOpticalDuplicates(dupes, this.OPTICAL_DUPLICATE_PIXEL_DISTANCE)) {
                            if (!b) continue;
                            opticalHisto.increment(duplicateCount);
                        }
                        continue;
                    }
                    duplicationHisto.increment(1);
                }
            }
            ++groupsProcessed;
            if (lastLogTime >= System.currentTimeMillis() - 60000L) continue;
            this.log.info("Processed " + groupsProcessed + " groups.");
            lastLogTime = System.currentTimeMillis();
        }
        iterator.close();
        sorter.cleanup();
        MetricsFile file = this.getMetricsFile();
        for (String library : duplicationHistosByLibrary.keySet()) {
            Histogram duplicationHisto = (Histogram)duplicationHistosByLibrary.get(library);
            Histogram opticalHisto = (Histogram)opticalHistosByLibrary.get(library);
            DuplicationMetrics metrics = new DuplicationMetrics();
            metrics.LIBRARY = library;
            for (Integer bin : duplicationHisto.keySet()) {
                double opticalDuplicates;
                double duplicateGroups = ((Histogram.Bin)duplicationHisto.get(bin)).getValue();
                double d = opticalDuplicates = opticalHisto.get(bin) == null ? 0.0 : ((Histogram.Bin)opticalHisto.get(bin)).getValue();
                if (!(duplicateGroups > 1.0)) continue;
                metrics.READ_PAIRS_EXAMINED = (long)((double)metrics.READ_PAIRS_EXAMINED + (double)bin.intValue() * duplicateGroups);
                metrics.READ_PAIR_DUPLICATES = (long)((double)metrics.READ_PAIR_DUPLICATES + (double)(bin - 1) * duplicateGroups);
                metrics.READ_PAIR_OPTICAL_DUPLICATES = (long)((double)metrics.READ_PAIR_OPTICAL_DUPLICATES + opticalDuplicates);
            }
            metrics.calculateDerivedMetrics();
            file.addMetric(metrics);
            file.addHistogram(duplicationHisto);
        }
        file.write(this.OUTPUT);
        return 0;
    }

    private boolean matches(PairedReadSequence lhs, PairedReadSequence rhs, double maxDiffRate) {
        int i;
        int read1Length = Math.min(lhs.read1.length, rhs.read1.length);
        int read2Length = Math.min(lhs.read2.length, rhs.read2.length);
        int maxErrors = (int)Math.floor((double)(read1Length + read2Length) * maxDiffRate);
        int errors = 0;
        for (i = this.MIN_IDENTICAL_BASES; i < read1Length; ++i) {
            if (lhs.read1[i] == rhs.read1[i] || ++errors <= maxErrors) continue;
            return false;
        }
        for (i = this.MIN_IDENTICAL_BASES; i < read2Length; ++i) {
            if (lhs.read2[i] == rhs.read2[i] || ++errors <= maxErrors) continue;
            return false;
        }
        return true;
    }

    List<PairedReadSequence> getNextGroup(PeekableIterator<PairedReadSequence> iterator) {
        ArrayList<PairedReadSequence> group = new ArrayList<PairedReadSequence>();
        PairedReadSequence first = iterator.next();
        group.add(first);
        block0: while (iterator.hasNext()) {
            PairedReadSequence next = iterator.peek();
            for (int i = 0; i < this.MIN_IDENTICAL_BASES; ++i) {
                if (first.read1[i] != next.read1[i] || first.read2[i] != next.read2[i]) break block0;
            }
            group.add(iterator.next());
        }
        return group;
    }

    Map<String, List<PairedReadSequence>> splitByLibrary(List<PairedReadSequence> input, List<SAMReadGroupRecord> rgs) {
        HashMap<String, List<PairedReadSequence>> out = new HashMap<String, List<PairedReadSequence>>();
        for (PairedReadSequence seq : input) {
            ArrayList<PairedReadSequence> librarySeqs;
            String library = null;
            if (seq.getReadGroup() != -1) {
                library = rgs.get(seq.getReadGroup()).getLibrary();
                if (library == null) {
                    library = "Unknown";
                }
            } else {
                library = "Unknown";
            }
            if ((librarySeqs = (ArrayList<PairedReadSequence>)out.get(library)) == null) {
                librarySeqs = new ArrayList<PairedReadSequence>();
                out.put(library, librarySeqs);
            }
            librarySeqs.add(seq);
        }
        return out;
    }

    boolean passesQualityCheck(byte[] bases, byte[] quals, int seedLength, int minQuality) {
        if (bases.length < seedLength) {
            return false;
        }
        for (int i = 0; i < seedLength; ++i) {
            if (!SequenceUtil.isNoCall(bases[i])) continue;
            return false;
        }
        int total = 0;
        for (byte b : quals) {
            total += b;
        }
        return total / quals.length >= minQuality;
    }

    class PairedReadComparator
    implements Comparator<PairedReadSequence> {
        final int BASES;

        PairedReadComparator() {
            this.BASES = EstimateLibraryComplexity.this.MIN_IDENTICAL_BASES;
        }

        @Override
        public int compare(PairedReadSequence lhs, PairedReadSequence rhs) {
            int retval;
            int i;
            for (i = 0; i < this.BASES; ++i) {
                retval = lhs.read1[i] - rhs.read1[i];
                if (retval == 0) continue;
                return retval;
            }
            for (i = 0; i < this.BASES; ++i) {
                retval = lhs.read2[i] - rhs.read2[i];
                if (retval == 0) continue;
                return retval;
            }
            return System.identityHashCode(lhs) - System.identityHashCode(rhs);
        }
    }

    static class PairedReadCodec
    implements SortingCollection.Codec<PairedReadSequence> {
        private DataOutputStream out;
        private DataInputStream in;

        PairedReadCodec() {
        }

        @Override
        public void setOutputStream(OutputStream out) {
            this.out = new DataOutputStream(out);
        }

        @Override
        public void setInputStream(InputStream in) {
            this.in = new DataInputStream(in);
        }

        @Override
        public void encode(PairedReadSequence val) {
            try {
                this.out.writeShort(val.readGroup);
                this.out.writeByte(val.tile);
                this.out.writeShort(val.x);
                this.out.writeShort(val.y);
                this.out.writeInt(val.read1.length);
                this.out.write(val.read1);
                this.out.writeInt(val.read2.length);
                this.out.write(val.read2);
            }
            catch (IOException ioe) {
                throw new PicardException("Error write out read pair.", ioe);
            }
        }

        @Override
        public PairedReadSequence decode() {
            try {
                PairedReadSequence val = new PairedReadSequence();
                try {
                    val.readGroup = this.in.readShort();
                }
                catch (EOFException eof) {
                    return null;
                }
                val.tile = this.in.readByte();
                val.x = this.in.readShort();
                val.y = this.in.readShort();
                int length = this.in.readInt();
                val.read1 = new byte[length];
                if (this.in.read(val.read1) != length) {
                    throw new PicardException("Could not read " + length + " bytes from temporary file.");
                }
                length = this.in.readInt();
                val.read2 = new byte[length];
                if (this.in.read(val.read2) != length) {
                    throw new PicardException("Could not read " + length + " bytes from temporary file.");
                }
                return val;
            }
            catch (IOException ioe) {
                throw new PicardException("Exception reading read pair.", ioe);
            }
        }

        @Override
        public SortingCollection.Codec<PairedReadSequence> clone() {
            return new PairedReadCodec();
        }
    }

    static class PairedReadSequence
    implements AbstractDuplicateFindingAlgorithm.PhysicalLocation {
        static int size_in_bytes = 308;
        short readGroup = (short)-1;
        byte tile = (byte)-1;
        short x = (short)-1;
        short y = (short)-1;
        boolean qualityOk = true;
        byte[] read1;
        byte[] read2;

        PairedReadSequence() {
        }

        @Override
        public short getReadGroup() {
            return this.readGroup;
        }

        @Override
        public void setReadGroup(short readGroup) {
            this.readGroup = readGroup;
        }

        @Override
        public byte getTile() {
            return this.tile;
        }

        @Override
        public void setTile(byte tile) {
            this.tile = tile;
        }

        @Override
        public short getX() {
            return this.x;
        }

        @Override
        public void setX(short x) {
            this.x = x;
        }

        @Override
        public short getY() {
            return this.y;
        }

        @Override
        public void setY(short y) {
            this.y = y;
        }
    }
}

