/*
 * Decompiled with CFR 0.152.
 */
package net.sf.picard.illumina;

import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import net.sf.picard.PicardException;
import net.sf.picard.cmdline.CommandLineProgram;
import net.sf.picard.cmdline.Option;
import net.sf.picard.cmdline.Usage;
import net.sf.picard.illumina.parser.ClusterData;
import net.sf.picard.illumina.parser.IlluminaDataProvider;
import net.sf.picard.illumina.parser.IlluminaDataProviderFactory;
import net.sf.picard.illumina.parser.IlluminaDataType;
import net.sf.picard.illumina.parser.ReadDescriptor;
import net.sf.picard.illumina.parser.ReadStructure;
import net.sf.picard.illumina.parser.ReadType;
import net.sf.picard.io.IoUtil;
import net.sf.picard.metrics.MetricBase;
import net.sf.picard.metrics.MetricsFile;
import net.sf.picard.util.Log;
import net.sf.picard.util.TabbedTextFileWithHeaderParser;
import net.sf.samtools.util.SequenceUtil;
import net.sf.samtools.util.StringUtil;

public class ExtractIlluminaBarcodes
extends CommandLineProgram {
    @Usage
    public String USAGE = this.getStandardUsagePreamble() + "Determine the barcode for each read in an Illumina lane.\n" + "For each tile, a file is written to the basecalls directory of the form s_<lane>_<tile>_barcode.txt." + "An output file contains a line for each read in the tile, aligned with the regular basecall output\n" + "The output file contains the following tab-separated columns: \n" + "    * read subsequence at barcode position\n" + "    * Y or N indicating if there was a barcode match\n" + "    * matched barcode sequence\n" + "Note that the order of specification of barcodes can cause arbitrary differences in output for poorly matching barcodes.\n\n";
    @Option(doc="The Illumina basecalls output directory. ", shortName="B")
    public File BASECALLS_DIR;
    @Option(doc="Where to write _barcode.txt files.  By default, these are written to BASECALLS_DIR.", optional=true)
    public File OUTPUT_DIR;
    @Option(doc="Lane number. ", shortName="L")
    public Integer LANE;
    @Option(doc="A description of the logical structure of clusters in an Illumina Run, i.e. a description of the structure IlluminaBasecallsToSam assumes the  data to be in. It should consist of integer/character pairs describing the number of cycles and the type of those cycles (B for Barcode, T for Template, and S for skip).  E.g. If the input data consists of 80 base clusters and we provide a read structure of \"36T8B8S30T\" then, before being converted to SAM records those bases will be split into 4 reads where read one consists of 36 cycles of template, read two consists of 8 cycles of barcode, read three will be an 8 base read of skipped cycles and read four is another 30 cycle template read.  The read consisting of skipped cycles would NOT be included in output SAM/BAM file read groups.", shortName="RS", mutex={"BARCODE_CYCLE"})
    public String READ_STRUCTURE;
    @Option(doc="1-based cycle number of the start of the barcode.  This cannot be used with reads that have more than one barcode; use READ_STRUCTURE in that case", mutex={"READ_STRUCTURE"}, shortName="BARCODE_POSITION")
    public Integer BARCODE_CYCLE;
    @Option(doc="Barcode sequence.  These must be unique, and all the same length.  This cannot be used with reads that have more than one barcode; use BARCODE_FILE in that case. ", mutex={"BARCODE_FILE"})
    public List<String> BARCODE = new ArrayList<String>();
    @Option(doc="Tab-delimited file of barcode sequences, barcode name and and optionally library name.  Barcodes must be unique, and all the same length.  Column headers must be 'barcode_sequence', 'barcode_name', and 'library_name'.", mutex={"BARCODE"})
    public File BARCODE_FILE;
    @Option(doc="Per-barcode and per-lane metrics written to this file.", shortName="M")
    public File METRICS_FILE;
    @Option(doc="Maximum mismatches for a barcode to be considered a match.")
    public int MAX_MISMATCHES = 1;
    @Option(doc="Minimum difference between number of mismatches in the best and second best barcodes for a barcode to be considered a match.")
    public int MIN_MISMATCH_DELTA = 1;
    @Option(doc="Maximum allowable number of no-calls in a barcode read before it is considered unmatchable.")
    public int MAX_NO_CALLS = 2;
    @Option(shortName="GZIP", doc="Compress output s_l_t_barcode.txt files using gzip and append a .gz extension to the filenames.")
    public boolean COMPRESS_OUTPUTS = false;
    private final Log log = Log.getInstance(ExtractIlluminaBarcodes.class);
    private ReadStructure readStructure;
    private IlluminaDataProviderFactory factory;
    private int tile = 0;
    private File barcodeFile = null;
    private BufferedWriter writer = null;
    private final List<BarcodeMetric> barcodeMetrics = new ArrayList<BarcodeMetric>();
    private BarcodeMetric noMatchBarcodeMetric;
    private final NumberFormat tileNumberFormatter = NumberFormat.getNumberInstance();
    private final String barcodeDelimiter = "/";
    private static final String BARCODE_SEQUENCE_COLUMN = "barcode_sequence";
    private static final String BARCODE_SEQUENCE_1_COLUMN = "barcode_sequence_1";
    private static final String BARCODE_NAME_COLUMN = "barcode_name";
    private static final String LIBRARY_NAME_COLUMN = "library_name";

    public ExtractIlluminaBarcodes() {
        this.tileNumberFormatter.setMinimumIntegerDigits(4);
        this.tileNumberFormatter.setGroupingUsed(false);
    }

    @Override
    protected int doWork() {
        IoUtil.assertDirectoryIsWritable(this.BASECALLS_DIR);
        IoUtil.assertFileIsWritable(this.METRICS_FILE);
        if (this.OUTPUT_DIR == null) {
            this.OUTPUT_DIR = this.BASECALLS_DIR;
        }
        IoUtil.assertDirectoryIsWritable(this.OUTPUT_DIR);
        String[] noMatchBarcode = new String[this.readStructure.barcodeIndices.length];
        int index = 0;
        for (ReadDescriptor d : this.readStructure.descriptors) {
            if (d.type != ReadType.Barcode) continue;
            StringBuilder bc = new StringBuilder();
            for (int i = 0; i < d.length; ++i) {
                bc.append('N');
            }
            noMatchBarcode[index++] = bc.toString();
        }
        this.noMatchBarcodeMetric = new BarcodeMetric(null, null, ExtractIlluminaBarcodes.barcodeSeqsToString(noMatchBarcode, "/"), noMatchBarcode);
        IlluminaDataProvider dataProvider = this.factory.makeDataProvider();
        try {
            while (dataProvider.hasNext()) {
                ClusterData cluster = dataProvider.next();
                this.extractBarcode(cluster);
            }
            if (this.writer != null) {
                this.writer.close();
                this.writer = null;
            }
        }
        catch (IOException e) {
            throw new PicardException("IOException writing barcode file " + this.barcodeFile, e);
        }
        int totalReads = this.noMatchBarcodeMetric.READS;
        int totalPfReads = this.noMatchBarcodeMetric.PF_READS;
        int totalPfReadsAssigned = 0;
        for (BarcodeMetric barcodeMetric : this.barcodeMetrics) {
            totalReads += barcodeMetric.READS;
            totalPfReads += barcodeMetric.PF_READS;
            totalPfReadsAssigned += barcodeMetric.PF_READS;
        }
        if (totalReads > 0) {
            this.noMatchBarcodeMetric.PCT_MATCHES = (double)this.noMatchBarcodeMetric.READS / (double)totalReads;
            double bestPctOfAllBarcodeMatches = 0.0;
            for (BarcodeMetric barcodeMetric : this.barcodeMetrics) {
                barcodeMetric.PCT_MATCHES = (double)barcodeMetric.READS / (double)totalReads;
                if (!(barcodeMetric.PCT_MATCHES > bestPctOfAllBarcodeMatches)) continue;
                bestPctOfAllBarcodeMatches = barcodeMetric.PCT_MATCHES;
            }
            if (bestPctOfAllBarcodeMatches > 0.0) {
                this.noMatchBarcodeMetric.RATIO_THIS_BARCODE_TO_BEST_BARCODE_PCT = this.noMatchBarcodeMetric.PCT_MATCHES / bestPctOfAllBarcodeMatches;
                for (BarcodeMetric barcodeMetric : this.barcodeMetrics) {
                    barcodeMetric.RATIO_THIS_BARCODE_TO_BEST_BARCODE_PCT = barcodeMetric.PCT_MATCHES / bestPctOfAllBarcodeMatches;
                }
            }
        }
        if (totalPfReads > 0) {
            this.noMatchBarcodeMetric.PF_PCT_MATCHES = (double)this.noMatchBarcodeMetric.PF_READS / (double)totalPfReads;
            double bestPctOfAllBarcodeMatches = 0.0;
            for (BarcodeMetric barcodeMetric : this.barcodeMetrics) {
                barcodeMetric.PF_PCT_MATCHES = (double)barcodeMetric.PF_READS / (double)totalPfReads;
                if (!(barcodeMetric.PF_PCT_MATCHES > bestPctOfAllBarcodeMatches)) continue;
                bestPctOfAllBarcodeMatches = barcodeMetric.PF_PCT_MATCHES;
            }
            if (bestPctOfAllBarcodeMatches > 0.0) {
                this.noMatchBarcodeMetric.PF_RATIO_THIS_BARCODE_TO_BEST_BARCODE_PCT = this.noMatchBarcodeMetric.PF_PCT_MATCHES / bestPctOfAllBarcodeMatches;
                for (BarcodeMetric barcodeMetric : this.barcodeMetrics) {
                    barcodeMetric.PF_RATIO_THIS_BARCODE_TO_BEST_BARCODE_PCT = barcodeMetric.PF_PCT_MATCHES / bestPctOfAllBarcodeMatches;
                }
            }
        }
        if (totalPfReadsAssigned > 0) {
            double mean = (double)totalPfReadsAssigned / (double)this.barcodeMetrics.size();
            for (BarcodeMetric m : this.barcodeMetrics) {
                m.PF_NORMALIZED_MATCHES = (double)m.PF_READS / mean;
            }
        }
        MetricsFile metrics = this.getMetricsFile();
        for (BarcodeMetric barcodeMetric : this.barcodeMetrics) {
            metrics.addMetric(barcodeMetric);
        }
        metrics.addMetric(this.noMatchBarcodeMetric);
        metrics.write(this.METRICS_FILE);
        return 0;
    }

    private void ensureBarcodeFileOpen(int tile) {
        if (tile == this.tile) {
            return;
        }
        try {
            if (this.writer != null) {
                this.writer.close();
                this.writer = null;
            }
            this.tile = tile;
            this.barcodeFile = this.getBarcodeFile(tile);
            this.writer = IoUtil.openFileForBufferedWriting(this.barcodeFile);
            this.log.info("Extracting barcodes for tile " + tile);
        }
        catch (IOException e) {
            throw new PicardException("IOException " + this.barcodeFile, e);
        }
    }

    private void extractBarcode(ClusterData cluster) throws IOException {
        byte[][] barcodeSubsequences = new byte[this.readStructure.barcodeIndices.length][];
        for (int i = 0; i < this.readStructure.barcodeIndices.length; ++i) {
            int bcLen = this.readStructure.descriptors.get((int)this.readStructure.barcodeIndices[i]).length;
            barcodeSubsequences[i] = cluster.getRead(this.readStructure.barcodeIndices[i]).getBases();
            assert (barcodeSubsequences[i].length == bcLen);
        }
        boolean passingFilter = cluster.isPf();
        BarcodeMatch match = this.findBestBarcode(barcodeSubsequences, passingFilter);
        String yOrN = match.matched ? "Y" : "N";
        this.ensureBarcodeFileOpen(cluster.getTile());
        StringBuilder sb = new StringBuilder();
        for (byte[] bc : barcodeSubsequences) {
            sb.append(StringUtil.bytesToString((byte[])bc));
        }
        this.writer.write(StringUtil.join((String)"\t", (String[])new String[]{sb.toString(), yOrN, match.barcode, String.valueOf(match.mismatches), String.valueOf(match.mismatchesToSecondBest)}));
        this.writer.newLine();
    }

    private BarcodeMatch findBestBarcode(byte[][] readSubsequences, boolean passingFilter) {
        BarcodeMetric bestBarcodeMetric = null;
        int totalBarcodeReadBases = 0;
        int numNoCalls = 0;
        for (byte[] bc : readSubsequences) {
            totalBarcodeReadBases += bc.length;
            for (byte b : bc) {
                if (!SequenceUtil.isNoCall((byte)b)) continue;
                ++numNoCalls;
            }
        }
        int numMismatchesInBestBarcode = totalBarcodeReadBases + 1;
        int numMismatchesInSecondBestBarcode = totalBarcodeReadBases + 1;
        for (BarcodeMetric barcodeMetric : this.barcodeMetrics) {
            int numMismatches = this.countMismatches(barcodeMetric.barcodeBytes, readSubsequences);
            if (numMismatches < numMismatchesInBestBarcode) {
                if (bestBarcodeMetric != null) {
                    numMismatchesInSecondBestBarcode = numMismatchesInBestBarcode;
                }
                numMismatchesInBestBarcode = numMismatches;
                bestBarcodeMetric = barcodeMetric;
                continue;
            }
            if (numMismatches >= numMismatchesInSecondBestBarcode) continue;
            numMismatchesInSecondBestBarcode = numMismatches;
        }
        boolean matched = bestBarcodeMetric != null && numNoCalls <= this.MAX_NO_CALLS && numMismatchesInBestBarcode <= this.MAX_MISMATCHES && numMismatchesInSecondBestBarcode - numMismatchesInBestBarcode >= this.MIN_MISMATCH_DELTA;
        BarcodeMatch match = new BarcodeMatch();
        if (numNoCalls + numMismatchesInBestBarcode < totalBarcodeReadBases) {
            match.mismatches = numMismatchesInBestBarcode;
            match.mismatchesToSecondBest = numMismatchesInSecondBestBarcode;
            match.barcode = bestBarcodeMetric.BARCODE.toLowerCase().replaceAll("/", "");
        } else {
            match.mismatches = totalBarcodeReadBases;
            match.barcode = "";
        }
        if (matched) {
            ++bestBarcodeMetric.READS;
            if (passingFilter) {
                ++bestBarcodeMetric.PF_READS;
            }
            if (numMismatchesInBestBarcode == 0) {
                ++bestBarcodeMetric.PERFECT_MATCHES;
                if (passingFilter) {
                    ++bestBarcodeMetric.PF_PERFECT_MATCHES;
                }
            } else if (numMismatchesInBestBarcode == 1) {
                ++bestBarcodeMetric.ONE_MISMATCH_MATCHES;
                if (passingFilter) {
                    ++bestBarcodeMetric.PF_ONE_MISMATCH_MATCHES;
                }
            }
            match.matched = true;
            match.barcode = bestBarcodeMetric.BARCODE.replaceAll("/", "");
        } else {
            ++this.noMatchBarcodeMetric.READS;
            if (passingFilter) {
                ++this.noMatchBarcodeMetric.PF_READS;
            }
        }
        return match;
    }

    public static String barcodeSeqsToString(String[] barcodes, String delim) {
        StringBuilder sb = new StringBuilder();
        for (String bc : barcodes) {
            if (sb.length() > 0) {
                sb.append(delim);
            }
            sb.append(bc);
        }
        return sb.toString();
    }

    private int countMismatches(byte[][] barcodeBytes, byte[][] readSubsequence) {
        int numMismatches = 0;
        for (int j = 0; j < barcodeBytes.length; ++j) {
            int basesToCheck = Math.min(barcodeBytes[j].length, readSubsequence[j].length);
            for (int i = 0; i < basesToCheck; ++i) {
                if (SequenceUtil.isNoCall((byte)readSubsequence[j][i]) || SequenceUtil.basesEqual((byte)barcodeBytes[j][i], (byte)readSubsequence[j][i])) continue;
                ++numMismatches;
            }
        }
        return numMismatches;
    }

    private File getBarcodeFile(int tile) {
        return new File(this.OUTPUT_DIR, "s_" + this.LANE + "_" + this.tileNumberFormatter.format(tile) + "_barcode.txt" + (this.COMPRESS_OUTPUTS ? ".gz" : ""));
    }

    @Override
    protected String[] customCommandLineValidation() {
        ArrayList<String> messages = new ArrayList<String>();
        if (this.READ_STRUCTURE == null) {
            int barcodeLength = 0;
            if (this.BARCODE == null || this.BARCODE.size() == 0) {
                if (this.BARCODE_FILE != null) {
                    barcodeLength = this.getBarcodeLengthFromFile();
                }
            } else {
                barcodeLength = this.BARCODE.get(0).length();
            }
            if (barcodeLength == 0) {
                messages.add("Cannot determine barcode length");
            }
            this.log.debug("barcodeLenth is " + barcodeLength);
            this.factory = new IlluminaDataProviderFactory(this.BASECALLS_DIR, (int)this.LANE, this.BARCODE_CYCLE, barcodeLength, IlluminaDataType.BaseCalls, IlluminaDataType.PF);
            this.readStructure = this.factory.readStructure();
        } else {
            this.readStructure = new ReadStructure(this.READ_STRUCTURE);
            this.factory = new IlluminaDataProviderFactory(this.BASECALLS_DIR, (int)this.LANE, this.readStructure, IlluminaDataType.BaseCalls, IlluminaDataType.PF);
        }
        if (this.READ_STRUCTURE == null && this.BARCODE_CYCLE < 1) {
            messages.add("Invalid BARCODE_CYCLE=" + this.BARCODE_CYCLE + ".  Value must be positive.");
        }
        if (this.BARCODE_FILE != null) {
            this.parseBarcodeFile(messages);
        } else {
            HashSet<String> barcodes = new HashSet<String>();
            for (String barcode : this.BARCODE) {
                if (barcodes.contains(barcode)) {
                    messages.add("Barcode " + barcode + " specified more than once.");
                }
                barcodes.add(barcode);
                BarcodeMetric metric = new BarcodeMetric(null, null, barcode, new String[]{barcode});
                this.barcodeMetrics.add(metric);
            }
        }
        if (this.barcodeMetrics.size() == 0) {
            messages.add("No barcodes have been specified.");
        }
        if (messages.size() == 0) {
            return null;
        }
        return messages.toArray(new String[messages.size()]);
    }

    public static void main(String[] argv) {
        System.exit(new ExtractIlluminaBarcodes().instanceMain(argv));
    }

    private int getBarcodeLengthFromFile() {
        String sequenceColumn;
        TabbedTextFileWithHeaderParser barcodesParser = new TabbedTextFileWithHeaderParser(this.BARCODE_FILE);
        String string = barcodesParser.hasColumn(BARCODE_SEQUENCE_COLUMN) ? BARCODE_SEQUENCE_COLUMN : (sequenceColumn = barcodesParser.hasColumn(BARCODE_SEQUENCE_1_COLUMN) ? BARCODE_SEQUENCE_1_COLUMN : null);
        if (sequenceColumn == null) {
            return 0;
        }
        int result = ((TabbedTextFileWithHeaderParser.Row)barcodesParser.iterator().next()).getField(sequenceColumn).length();
        barcodesParser.close();
        return result;
    }

    private void parseBarcodeFile(ArrayList<String> messages) {
        String sequenceColumn;
        TabbedTextFileWithHeaderParser barcodesParser = new TabbedTextFileWithHeaderParser(this.BARCODE_FILE);
        String string = barcodesParser.hasColumn(BARCODE_SEQUENCE_COLUMN) ? BARCODE_SEQUENCE_COLUMN : (sequenceColumn = barcodesParser.hasColumn(BARCODE_SEQUENCE_1_COLUMN) ? BARCODE_SEQUENCE_1_COLUMN : null);
        if (sequenceColumn == null) {
            messages.add(this.BARCODE_FILE + " does not have " + BARCODE_SEQUENCE_COLUMN + " or " + BARCODE_SEQUENCE_1_COLUMN + " column header");
            return;
        }
        boolean hasBarcodeName = barcodesParser.hasColumn(BARCODE_NAME_COLUMN);
        boolean hasLibraryName = barcodesParser.hasColumn(LIBRARY_NAME_COLUMN);
        int numBarcodes = this.readStructure.barcodeIndices.length;
        HashSet<String> barcodes = new HashSet<String>();
        for (TabbedTextFileWithHeaderParser.Row row : barcodesParser) {
            String[] bcStrings = new String[numBarcodes];
            int barcodeNum = 1;
            for (ReadDescriptor rd : this.readStructure.descriptors) {
                if (rd.type != ReadType.Barcode) continue;
                String header = barcodeNum == 1 ? sequenceColumn : "barcode_sequence_" + String.valueOf(barcodeNum);
                bcStrings[barcodeNum - 1] = row.getField(header);
                ++barcodeNum;
            }
            String bcStr = ExtractIlluminaBarcodes.barcodeSeqsToString(bcStrings, "/");
            if (barcodes.contains(bcStr)) {
                messages.add("Barcode " + bcStr + " specified more than once in " + this.BARCODE_FILE);
            }
            barcodes.add(bcStr);
            String barcodeName = hasBarcodeName ? row.getField(BARCODE_NAME_COLUMN) : "";
            String libraryName = hasLibraryName ? row.getField(LIBRARY_NAME_COLUMN) : "";
            BarcodeMetric metric = new BarcodeMetric(barcodeName, libraryName, bcStr, bcStrings);
            this.barcodeMetrics.add(metric);
        }
        barcodesParser.close();
    }

    public static class BarcodeMetric
    extends MetricBase {
        public String BARCODE;
        public String BARCODE_NAME = "";
        public String LIBRARY_NAME = "";
        public int READS = 0;
        public int PF_READS = 0;
        public int PERFECT_MATCHES = 0;
        public int PF_PERFECT_MATCHES = 0;
        public int ONE_MISMATCH_MATCHES = 0;
        public int PF_ONE_MISMATCH_MATCHES = 0;
        public double PCT_MATCHES = 0.0;
        public double RATIO_THIS_BARCODE_TO_BEST_BARCODE_PCT = 0.0;
        public double PF_PCT_MATCHES = 0.0;
        public double PF_RATIO_THIS_BARCODE_TO_BEST_BARCODE_PCT = 0.0;
        public double PF_NORMALIZED_MATCHES;
        protected final byte[][] barcodeBytes;

        public BarcodeMetric(String barcodeName, String libraryName, String barcodeDisplay, String[] barcodeSeqs) {
            this.BARCODE = barcodeDisplay;
            this.BARCODE_NAME = barcodeName;
            this.LIBRARY_NAME = libraryName;
            this.barcodeBytes = new byte[barcodeSeqs.length][];
            for (int i = 0; i < barcodeSeqs.length; ++i) {
                this.barcodeBytes[i] = StringUtil.stringToBytes((String)barcodeSeqs[i]);
            }
        }

        public BarcodeMetric() {
            this.barcodeBytes = null;
        }
    }

    static class BarcodeMatch {
        boolean matched;
        String barcode;
        int mismatches;
        int mismatchesToSecondBest;

        BarcodeMatch() {
        }
    }
}

