/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.sting.gatk.walkers.recalibration;

import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.List;
import java.util.MissingResourceException;
import java.util.ResourceBundle;
import java.util.regex.Pattern;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMProgramRecord;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMTag;
import net.sf.samtools.SAMUtils;
import net.sf.samtools.util.SequenceUtil;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.BAQMode;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager;
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.collections.NestedHashMap;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import org.broadinstitute.sting.utils.text.XReadLines;

@BAQMode(QualityMode=BAQ.QualityMode.ADD_TAG, ApplicationTime=BAQ.ApplicationTime.ON_OUTPUT)
@WalkerName(value="TableRecalibration")
@Requires(value={DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES})
public class TableRecalibrationWalker
extends ReadWalker<SAMRecord, SAMFileWriter> {
    public static final String PROGRAM_RECORD_NAME = "GATK TableRecalibration";
    @ArgumentCollection
    private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
    @Input(fullName="recal_file", shortName="recalFile", required=true, doc="Filename for the input covariates table recalibration .csv file")
    public File RECAL_FILE = null;
    @Output(doc="The output recalibrated BAM file", required=true)
    private StingSAMFileWriter OUTPUT_BAM = null;
    @Argument(fullName="preserve_qscores_less_than", shortName="pQ", doc="Bases with quality scores less than this threshold won't be recalibrated. In general it's unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required=false)
    private int PRESERVE_QSCORES_LESS_THAN = 5;
    @Argument(fullName="smoothing", shortName="sm", required=false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points")
    private int SMOOTHING = 1;
    @Argument(fullName="max_quality_score", shortName="maxQ", required=false, doc="The integer value at which to cap the quality scores")
    private int MAX_QUALITY_SCORE = 50;
    @Argument(fullName="doNotWriteOriginalQuals", shortName="noOQs", required=false, doc="If true, we will not write the original quality (OQ) tag for each read")
    private boolean DO_NOT_WRITE_OQ = false;
    @Hidden
    @Argument(fullName="no_pg_tag", shortName="noPG", required=false, doc="Don't output the usual PG tag in the recalibrated bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
    private boolean NO_PG_TAG = false;
    @Hidden
    @Argument(fullName="fail_with_no_eof_marker", shortName="requireEOF", required=false, doc="If no EOF marker is present in the covariates file, exit the program with an exception.")
    private boolean REQUIRE_EOF = false;
    @Hidden
    @Argument(fullName="skipUQUpdate", shortName="skipUQUpdate", required=false, doc="If true, we will skip the UQ updating step for each read, speeding up the calculations")
    private boolean skipUQUpdate = false;
    private RecalDataManager dataManager;
    private final ArrayList<Covariate> requestedCovariates = new ArrayList();
    public static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
    public static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
    public static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
    public static final String EOF_MARKER = "EOF";
    private long numReadsWithMalformedColorSpace = 0L;
    private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap();

    @Override
    public void initialize() {
        if (this.RAC.FORCE_PLATFORM != null) {
            this.RAC.DEFAULT_PLATFORM = this.RAC.FORCE_PLATFORM;
        }
        List classes = new PluginManager(Covariate.class).getPlugins();
        int lineNumber = 0;
        boolean foundAllCovariates = false;
        logger.info((Object)"Reading in the data from input csv file...");
        boolean sawEOF = false;
        try {
            for (String line : new XReadLines(this.RECAL_FILE)) {
                ++lineNumber;
                if (EOF_MARKER.equals(line)) {
                    sawEOF = true;
                    continue;
                }
                if (COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches()) continue;
                if (COVARIATE_PATTERN.matcher(line).matches()) {
                    if (foundAllCovariates) {
                        throw new UserException.MalformedFile(this.RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + this.RECAL_FILE);
                    }
                    String[] vals = line.split(",");
                    for (int iii = 0; iii < vals.length - 3; ++iii) {
                        boolean foundClass = false;
                        for (Class covClass : classes) {
                            if (!(vals[iii] + "Covariate").equalsIgnoreCase(covClass.getSimpleName())) continue;
                            foundClass = true;
                            try {
                                Covariate covariate = (Covariate)covClass.newInstance();
                                this.requestedCovariates.add(covariate);
                            }
                            catch (Exception e) {
                                throw new DynamicClassResolutionException(covClass, e);
                            }
                        }
                        if (foundClass) continue;
                        throw new UserException.MalformedFile(this.RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + vals[iii] + "Covariate" + ") isn't a valid covariate option.");
                    }
                    continue;
                }
                if (!foundAllCovariates) {
                    foundAllCovariates = true;
                    if (this.requestedCovariates.size() < 2) {
                        throw new UserException.MalformedFile(this.RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + this.RECAL_FILE);
                    }
                    boolean createCollapsedTables = true;
                    for (Covariate cov : this.requestedCovariates) {
                        cov.initialize(this.RAC);
                    }
                    this.dataManager = new RecalDataManager(true, this.requestedCovariates.size());
                }
                this.addCSVData(this.RECAL_FILE, line);
            }
        }
        catch (FileNotFoundException e) {
            throw new UserException.CouldNotReadInputFile(this.RECAL_FILE, "Can not find input file", (Exception)e);
        }
        catch (NumberFormatException e) {
            throw new UserException.MalformedFile(this.RECAL_FILE, "Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
        }
        logger.info((Object)"...done!");
        if (!sawEOF) {
            String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.";
            if (this.REQUIRE_EOF) {
                throw new UserException.MalformedFile(this.RECAL_FILE, "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.");
            }
            logger.warn((Object)"No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.");
        }
        logger.info((Object)"The covariates being used here: ");
        for (Covariate cov : this.requestedCovariates) {
            logger.info((Object)("\t" + cov.getClass().getSimpleName()));
        }
        if (this.dataManager == null) {
            throw new UserException.MalformedFile(this.RECAL_FILE, "Can't initialize the data manager. Perhaps the recal csv file contains no data?");
        }
        logger.info((Object)"Generating tables of empirical qualities for use in sequential calculation...");
        this.dataManager.generateEmpiricalQualities(this.SMOOTHING, this.MAX_QUALITY_SCORE);
        logger.info((Object)"...done!");
        SAMFileHeader header = this.getToolkit().getSAMFileHeader().clone();
        if (!this.NO_PG_TAG) {
            SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME);
            ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle((String)"StingText");
            try {
                String version = headerInfo.getString("org.broadinstitute.sting.gatk.version");
                programRecord.setProgramVersion(version);
            }
            catch (MissingResourceException e) {
                // empty catch block
            }
            StringBuffer sb = new StringBuffer();
            sb.append(this.getToolkit().createApproximateCommandLineArgumentString(this.getToolkit(), this));
            sb.append(" Covariates=[");
            for (Covariate cov : this.requestedCovariates) {
                sb.append(cov.getClass().getSimpleName());
                sb.append(", ");
            }
            sb.setCharAt(sb.length() - 2, ']');
            sb.setCharAt(sb.length() - 1, ' ');
            programRecord.setCommandLine(sb.toString());
            List oldRecords = header.getProgramRecords();
            ArrayList<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size() + 1);
            for (SAMProgramRecord record : oldRecords) {
                if (record.getId().startsWith(PROGRAM_RECORD_NAME)) continue;
                newRecords.add(record);
            }
            newRecords.add(programRecord);
            header.setProgramRecords(newRecords);
            this.OUTPUT_BAM.writeHeader(header);
        }
    }

    private void addCSVData(File file, String line) {
        int iii;
        String[] vals = line.split(",");
        if (vals.length != this.requestedCovariates.size() + 3) {
            throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line + " --Perhaps the read group string contains a comma and isn't being parsed correctly.");
        }
        Object[] key = new Object[this.requestedCovariates.size()];
        for (iii = 0; iii < this.requestedCovariates.size(); ++iii) {
            Covariate cov = this.requestedCovariates.get(iii);
            key[iii] = cov.getValue(vals[iii]);
        }
        RecalDatum datum = new RecalDatum(Long.parseLong(vals[iii]), Long.parseLong(vals[iii + 1]), Double.parseDouble(vals[1]), 0.0);
        this.dataManager.addToAllTables(key, datum, this.PRESERVE_QSCORES_LESS_THAN);
    }

    @Override
    public SAMRecord map(ReferenceContext refBases, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
        if (read.getReadLength() == 0) {
            return read;
        }
        RecalDataManager.parseSAMRecord(read, this.RAC);
        byte[] originalQuals = read.getBaseQualities();
        byte[] recalQuals = (byte[])originalQuals.clone();
        String platform = read.getReadGroup().getPlatform();
        if (platform.toUpperCase().contains("SOLID") && this.RAC.SOLID_RECAL_MODE != RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING) {
            boolean badColor;
            if (this.RAC.SOLID_NOCALL_STRATEGY != RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION && (badColor = RecalDataManager.checkNoCallColorSpace(read))) {
                ++this.numReadsWithMalformedColorSpace;
                if (this.RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED) {
                    return read;
                }
                if (this.RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ) {
                    read.setReadFailsVendorQualityCheckFlag(true);
                    return read;
                }
            }
            originalQuals = RecalDataManager.calcColorSpace(read, originalQuals, this.RAC.SOLID_RECAL_MODE, refBases == null ? null : refBases.getBases());
        }
        Comparable[][] covariateValues_offset_x_covar = RecalDataManager.computeCovariates(read, this.requestedCovariates);
        for (int offset = 0; offset < read.getReadLength(); ++offset) {
            Object[] fullCovariateKey = covariateValues_offset_x_covar[offset];
            Byte qualityScore = (Byte)this.qualityScoreByFullCovariateKey.get(fullCovariateKey);
            if (qualityScore == null) {
                qualityScore = this.performSequentialQualityCalculation(fullCovariateKey);
                this.qualityScoreByFullCovariateKey.put((Object)qualityScore, fullCovariateKey);
            }
            recalQuals[offset] = qualityScore;
        }
        this.preserveQScores(originalQuals, recalQuals);
        read.setBaseQualities(recalQuals);
        if (!this.DO_NOT_WRITE_OQ && read.getAttribute("OQ") == null) {
            read.setAttribute("OQ", (Object)SAMUtils.phredToFastq((byte[])originalQuals));
        }
        if (!this.skipUQUpdate && refBases != null && read.getAttribute(SAMTag.UQ.name()) != null) {
            read.setAttribute(SAMTag.UQ.name(), (Object)SequenceUtil.sumQualitiesOfMismatches((SAMRecord)read, (byte[])refBases.getBases(), (int)(read.getAlignmentStart() - 1), (boolean)false));
        }
        if (this.RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N && refBases != null && read.getAttribute(SAMTag.NM.name()) != null) {
            read.setAttribute(SAMTag.NM.name(), (Object)SequenceUtil.calculateSamNmTag((SAMRecord)read, (byte[])refBases.getBases(), (int)(read.getAlignmentStart() - 1), (boolean)false));
        }
        return read;
    }

    private byte performSequentialQualityCalculation(Object ... key) {
        byte qualFromRead = (byte)Integer.parseInt(key[1].toString());
        Object[] readGroupCollapsedKey = new Object[1];
        Object[] qualityScoreCollapsedKey = new Object[2];
        Object[] covariateCollapsedKey = new Object[3];
        readGroupCollapsedKey[0] = key[0];
        RecalDatum globalRecalDatum = (RecalDatum)this.dataManager.getCollapsedTable(0).get(readGroupCollapsedKey);
        double globalDeltaQ = 0.0;
        if (globalRecalDatum != null) {
            double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
            double aggregrateQReported = globalRecalDatum.getEstimatedQReported();
            globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported;
        }
        qualityScoreCollapsedKey[0] = key[0];
        qualityScoreCollapsedKey[1] = key[1];
        RecalDatum qReportedRecalDatum = (RecalDatum)this.dataManager.getCollapsedTable(1).get(qualityScoreCollapsedKey);
        double deltaQReported = 0.0;
        if (qReportedRecalDatum != null) {
            double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
            deltaQReported = deltaQReportedEmpirical - (double)qualFromRead - globalDeltaQ;
        }
        double deltaQCovariates = 0.0;
        covariateCollapsedKey[0] = key[0];
        covariateCollapsedKey[1] = key[1];
        for (int iii = 2; iii < key.length; ++iii) {
            covariateCollapsedKey[2] = key[iii];
            RecalDatum covariateRecalDatum = (RecalDatum)this.dataManager.getCollapsedTable(iii).get(covariateCollapsedKey);
            if (covariateRecalDatum == null) continue;
            double deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
            deltaQCovariates += deltaQCovariateEmpirical - (double)qualFromRead - (globalDeltaQ + deltaQReported);
        }
        double newQuality = (double)qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
        return QualityUtils.boundQual((int)((int)Math.round(newQuality)), (byte)((byte)this.MAX_QUALITY_SCORE));
    }

    private void preserveQScores(byte[] originalQuals, byte[] recalQuals) {
        for (int iii = 0; iii < recalQuals.length; ++iii) {
            if (originalQuals[iii] >= this.PRESERVE_QSCORES_LESS_THAN) continue;
            recalQuals[iii] = originalQuals[iii];
        }
    }

    @Override
    public SAMFileWriter reduceInit() {
        return this.OUTPUT_BAM;
    }

    @Override
    public SAMFileWriter reduce(SAMRecord read, SAMFileWriter output) {
        if (output != null) {
            output.addAlignment(read);
        }
        return output;
    }

    @Override
    public void onTraversalDone(SAMFileWriter output) {
        if (this.numReadsWithMalformedColorSpace != 0L) {
            if (this.RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED) {
                Utils.warnUser((String)("Discovered " + this.numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " + "because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " + "for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " + "These reads remain in the output bam file but haven't been corrected for reference bias. !!! USE AT YOUR OWN RISK !!!"));
            } else if (this.RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ) {
                Utils.warnUser((String)("Discovered " + this.numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " + "because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " + "for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " + "These reads were completely removed from the output bam file."));
            }
        }
    }
}

