/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.sting.gatk.walkers.variantutils;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;

public class SelectVariants
extends RodWalker<Integer, Integer> {
    @ArgumentCollection
    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
    @Input(fullName="discordance", shortName="disc", doc="Output variants that were not called in this comparison track", required=false)
    private RodBinding<VariantContext> discordanceTrack;
    @Input(fullName="concordance", shortName="conc", doc="Output variants that were also called in this comparison track", required=false)
    private RodBinding<VariantContext> concordanceTrack;
    @Output(doc="File to which variants should be written", required=true)
    protected VCFWriter vcfWriter = null;
    @Argument(fullName="sample_name", shortName="sn", doc="Include genotypes from this sample. Can be specified multiple times", required=false)
    public Set<String> sampleNames = new HashSet<String>(0);
    @Argument(fullName="sample_expressions", shortName="se", doc="Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times", required=false)
    public Set<String> sampleExpressions;
    @Input(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false)
    public Set<File> sampleFiles;
    @Argument(fullName="exclude_sample_name", shortName="xl_sn", doc="Exclude genotypes from this sample. Can be specified multiple times", required=false)
    public Set<String> XLsampleNames = new HashSet<String>(0);
    @Input(fullName="exclude_sample_file", shortName="xl_sf", doc="File containing a list of samples (one per line) to exclude. Can be specified multiple times", required=false)
    public Set<File> XLsampleFiles = new HashSet<File>(0);
    @Argument(shortName="select", doc="One or more criteria to use when selecting the data", required=false)
    public ArrayList<String> SELECT_EXPRESSIONS = new ArrayList();
    @Argument(fullName="excludeNonVariants", shortName="env", doc="Don't include loci found to be non-variant after the subsetting procedure", required=false)
    private boolean EXCLUDE_NON_VARIANTS = false;
    @Argument(fullName="excludeFiltered", shortName="ef", doc="Don't include filtered loci in the analysis", required=false)
    private boolean EXCLUDE_FILTERED = false;
    @Argument(fullName="restrictAllelesTo", shortName="restrictAllelesTo", doc="Select only variants of a particular allelicity. Valid options are ALL (default), MULTIALLELIC or BIALLELIC", required=false)
    private NumberAlleleRestriction alleleRestriction = NumberAlleleRestriction.ALL;
    @Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't update the AC, AF, or AN values in the INFO field after selecting", required=false)
    private boolean KEEP_ORIGINAL_CHR_COUNTS = false;
    @Hidden
    @Argument(fullName="keepAFSpectrum", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure", required=false)
    private boolean KEEP_AF_SPECTRUM = false;
    @Hidden
    @Argument(fullName="afFile", shortName="afFile", doc="The output recal file used by ApplyRecalibration", required=false)
    private File AF_FILE = new File("");
    @Hidden
    @Argument(fullName="family_structure_file", shortName="familyFile", doc="use -family unless you know what you're doing", required=false)
    private File FAMILY_STRUCTURE_FILE = null;
    @Argument(fullName="family_structure", shortName="family", doc="string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
    private String FAMILY_STRUCTURE = "";
    @Argument(fullName="mendelianViolation", shortName="mv", doc="output mendelian violation sites only", required=false)
    private Boolean MENDELIAN_VIOLATIONS = false;
    @Argument(fullName="mendelianViolationQualThreshold", shortName="mvq", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
    private double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 0.0;
    @Argument(fullName="select_random_number", shortName="number", doc="Selects a number of variants at random from the variant track", required=false)
    private int numRandom = 0;
    @Argument(fullName="select_random_fraction", shortName="fraction", doc="Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track", required=false)
    private double fractionRandom = 0.0;
    @Argument(fullName="selectTypeToInclude", shortName="selectType", doc="Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times", required=false)
    private List<VariantContext.Type> TYPES_TO_INCLUDE = new ArrayList<VariantContext.Type>();
    @Hidden
    @Argument(fullName="outMVFile", shortName="outMVFile", doc="", required=false)
    private String outMVFile = null;
    private ArrayList<VariantContext.Type> selectedTypes = new ArrayList();
    private ArrayList<String> selectNames = new ArrayList();
    private List<VariantContextUtils.JexlVCMatchExp> jexls = null;
    private TreeSet<String> samples = new TreeSet();
    private boolean NO_SAMPLES_SPECIFIED = false;
    private boolean DISCORDANCE_ONLY = false;
    private boolean CONCORDANCE_ONLY = false;
    private Set<MendelianViolation> mvSet = new HashSet<MendelianViolation>();
    private boolean SELECT_RANDOM_NUMBER = false;
    private boolean SELECT_RANDOM_FRACTION = false;
    private int variantNumber = 0;
    private int nVariantsAdded = 0;
    private int positionToAdd = 0;
    private RandomVariantStructure[] variantArray;
    private ArrayList<Double> afBreakpoints = null;
    private ArrayList<Double> afBoosts = null;
    double bkDelta = 0.0;
    private PrintStream outMVFileStream = null;

    @Override
    public void initialize() {
        List<String> rodNames = Arrays.asList(this.variantCollection.variants.getName());
        Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(this.getToolkit(), rodNames);
        TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
        Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(this.sampleFiles);
        Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, this.sampleExpressions);
        this.samples.addAll(samplesFromFile);
        this.samples.addAll(samplesFromExpressions);
        this.samples.addAll(this.sampleNames);
        if (this.samples.isEmpty()) {
            this.samples.addAll(vcfSamples);
            this.NO_SAMPLES_SPECIFIED = true;
        }
        Collection<String> XLsamplesFromFile = SampleUtils.getSamplesFromFiles(this.XLsampleFiles);
        this.samples.removeAll(XLsamplesFromFile);
        this.samples.removeAll(this.XLsampleNames);
        if (this.samples.size() == 0 && !this.NO_SAMPLES_SPECIFIED) {
            throw new UserException("All samples requested to be included were also requested to be excluded.");
        }
        for (String sample : this.samples) {
            logger.info("Including sample '" + sample + "'");
        }
        if (this.TYPES_TO_INCLUDE.isEmpty()) {
            for (VariantContext.Type t : VariantContext.Type.values()) {
                this.selectedTypes.add(t);
            }
        } else {
            for (VariantContext.Type t : this.TYPES_TO_INCLUDE) {
                this.selectedTypes.add(t);
            }
        }
        Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
        headerLines.add(new VCFHeaderLine("source", "SelectVariants"));
        if (this.KEEP_ORIGINAL_CHR_COUNTS) {
            headerLines.add(new VCFFormatHeaderLine("AC_Orig", 1, VCFHeaderLineType.Integer, "Original AC"));
            headerLines.add(new VCFFormatHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF"));
            headerLines.add(new VCFFormatHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN"));
        }
        this.vcfWriter.writeHeader(new VCFHeader(headerLines, this.samples));
        for (int i = 0; i < this.SELECT_EXPRESSIONS.size(); ++i) {
            this.selectNames.add(String.format("select-%d", i));
        }
        this.jexls = VariantContextUtils.initializeMatchExps(this.selectNames, this.SELECT_EXPRESSIONS);
        this.DISCORDANCE_ONLY = this.discordanceTrack.isBound();
        if (this.DISCORDANCE_ONLY) {
            logger.info("Selecting only variants discordant with the track: " + this.discordanceTrack.getName());
        }
        this.CONCORDANCE_ONLY = this.concordanceTrack.isBound();
        if (this.CONCORDANCE_ONLY) {
            logger.info("Selecting only variants concordant with the track: " + this.concordanceTrack.getName());
        }
        if (this.MENDELIAN_VIOLATIONS.booleanValue()) {
            if (this.FAMILY_STRUCTURE_FILE != null) {
                try {
                    for (String line : new XReadLines(this.FAMILY_STRUCTURE_FILE)) {
                        MendelianViolation mv = new MendelianViolation(line, this.MENDELIAN_VIOLATION_QUAL_THRESHOLD);
                        if (!this.samples.contains(mv.getSampleChild()) || !this.samples.contains(mv.getSampleDad()) || !this.samples.contains(mv.getSampleMom())) continue;
                        this.mvSet.add(mv);
                    }
                }
                catch (FileNotFoundException e) {
                    throw new UserException.CouldNotReadInputFile(this.AF_FILE, (Exception)e);
                }
                if (this.outMVFile != null) {
                    try {
                        this.outMVFileStream = new PrintStream(this.outMVFile);
                    }
                    catch (FileNotFoundException e) {
                        throw new UserException.CouldNotCreateOutputFile(this.outMVFile, "Can't open output file", (Exception)e);
                    }
                }
            } else {
                this.mvSet.add(new MendelianViolation(this.getToolkit(), this.MENDELIAN_VIOLATION_QUAL_THRESHOLD));
            }
        } else if (!this.FAMILY_STRUCTURE.isEmpty()) {
            this.mvSet.add(new MendelianViolation(this.FAMILY_STRUCTURE, this.MENDELIAN_VIOLATION_QUAL_THRESHOLD));
            this.MENDELIAN_VIOLATIONS = true;
        }
        boolean bl = this.SELECT_RANDOM_NUMBER = this.numRandom > 0;
        if (this.SELECT_RANDOM_NUMBER) {
            logger.info("Selecting " + this.numRandom + " variants at random from the variant track");
            this.variantArray = new RandomVariantStructure[this.numRandom];
        }
        boolean bl2 = this.SELECT_RANDOM_FRACTION = this.fractionRandom > 0.0;
        if (this.SELECT_RANDOM_FRACTION) {
            logger.info("Selecting approximately " + 100.0 * this.fractionRandom + "% of the variants at random from the variant track");
        }
        if (this.KEEP_AF_SPECTRUM) {
            try {
                this.afBreakpoints = new ArrayList();
                this.afBoosts = new ArrayList();
                logger.info("Reading in AF boost table...");
                boolean firstLine = false;
                for (String line : new XReadLines(this.AF_FILE)) {
                    if (!firstLine) {
                        firstLine = true;
                        continue;
                    }
                    String[] vals = line.split(" ");
                    double bkp = Double.valueOf(vals[0]);
                    double afb = Double.valueOf(vals[1]);
                    this.afBreakpoints.add(bkp);
                    this.afBoosts.add(afb);
                }
                this.bkDelta = this.afBreakpoints.get(0);
            }
            catch (FileNotFoundException e) {
                throw new UserException.CouldNotReadInputFile(this.AF_FILE, (Exception)e);
            }
        }
    }

    @Override
    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        if (tracker == null) {
            return 0;
        }
        List<VariantContext> vcs = tracker.getValues(this.variantCollection.variants, context.getLocation());
        if (vcs == null || vcs.size() == 0) {
            return 0;
        }
        for (VariantContext vc : vcs) {
            double af;
            VariantContext sub;
            List<VariantContext> compVCs;
            if (this.MENDELIAN_VIOLATIONS.booleanValue()) {
                boolean foundMV = false;
                for (MendelianViolation mv : this.mvSet) {
                    if (!mv.isViolation(vc)) continue;
                    foundMV = true;
                    if (this.outMVFile == null) continue;
                    this.outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, childG=%s childGL=%s\n", vc.getChr(), vc.getStart(), vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getChromosomeCount(vc.getAlternateAllele(0)), mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(), vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), vc.getGenotype(mv.getSampleChild()).toBriefString(), vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString());
                }
                if (!foundMV) break;
            }
            if (this.DISCORDANCE_ONLY && !this.isDiscordant(vc, compVCs = tracker.getValues(this.discordanceTrack, context.getLocation()))) {
                return 0;
            }
            if (this.CONCORDANCE_ONLY && !this.isConcordant(vc, compVCs = tracker.getValues(this.concordanceTrack, context.getLocation()))) {
                return 0;
            }
            if (this.alleleRestriction.equals((Object)NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic() || this.alleleRestriction.equals((Object)NumberAlleleRestriction.MULTIALLELIC) && vc.isBiallelic() || !this.selectedTypes.contains((Object)vc.getType()) || !(sub = this.subsetRecord(vc, this.samples)).isPolymorphic() && this.EXCLUDE_NON_VARIANTS || sub.isFiltered() && this.EXCLUDE_FILTERED) continue;
            for (VariantContextUtils.JexlVCMatchExp jexl : this.jexls) {
                if (VariantContextUtils.match(sub, jexl)) continue;
                return 0;
            }
            if (this.SELECT_RANDOM_NUMBER) {
                this.randomlyAddVariant(++this.variantNumber, sub, ref.getBase());
                continue;
            }
            if (!this.SELECT_RANDOM_FRACTION || !this.KEEP_AF_SPECTRUM && GenomeAnalysisEngine.getRandomGenerator().nextDouble() < this.fractionRandom) {
                this.vcfWriter.add(sub);
                continue;
            }
            if (!this.SELECT_RANDOM_FRACTION || !this.KEEP_AF_SPECTRUM || !sub.hasAttribute("AF")) continue;
            String afo = sub.getAttributeAsString("AF");
            double afBoost = 1.0;
            if (afo.contains(",")) {
                String[] afs = afo.split(",");
                afs[0] = afs[0].substring(1, afs[0].length());
                afs[afs.length - 1] = afs[afs.length - 1].substring(0, afs[afs.length - 1].length() - 1);
                double[] afd = new double[afs.length];
                for (int k = 0; k < afd.length; ++k) {
                    afd[k] = Double.valueOf(afs[k]);
                }
                af = MathUtils.arrayMax(afd);
            } else {
                af = Double.valueOf(afo);
            }
            int bkidx = 0;
            if (!this.afBreakpoints.isEmpty()) {
                for (Double bkpt : this.afBreakpoints) {
                    if (af < bkpt + this.bkDelta) break;
                    ++bkidx;
                }
                if (bkidx >= this.afBoosts.size()) {
                    bkidx = this.afBoosts.size() - 1;
                }
                afBoost = this.afBoosts.get(bkidx);
            }
            if (!(GenomeAnalysisEngine.getRandomGenerator().nextDouble() < this.fractionRandom * afBoost * afBoost)) continue;
            this.vcfWriter.add(sub);
        }
        return 1;
    }

    private boolean isDiscordant(VariantContext vc, Collection<VariantContext> compVCs) {
        if (vc == null) {
            return false;
        }
        if (this.NO_SAMPLES_SPECIFIED && (compVCs == null || compVCs.isEmpty())) {
            return true;
        }
        Map<String, Genotype> genotypes = vc.getGenotypes(this.samples);
        for (Genotype g : genotypes.values()) {
            if (!this.sampleHasVariant(g)) continue;
            if (compVCs == null) {
                return true;
            }
            boolean foundVariant = false;
            for (VariantContext compVC : compVCs) {
                if (!this.sampleHasVariant(compVC.getGenotype(g.getSampleName()))) continue;
                foundVariant = true;
                break;
            }
            if (foundVariant) continue;
            return true;
        }
        return false;
    }

    private boolean isConcordant(VariantContext vc, Collection<VariantContext> compVCs) {
        if (vc == null || compVCs == null || compVCs.isEmpty()) {
            return false;
        }
        if (this.NO_SAMPLES_SPECIFIED) {
            return true;
        }
        Set<String> variantSamples = vc.getSampleNames();
        variantSamples.retainAll(this.samples);
        for (String sample : variantSamples) {
            boolean foundSample = false;
            for (VariantContext compVC : compVCs) {
                Genotype compG;
                Genotype varG = vc.getGenotype(sample);
                if (!this.haveSameGenotypes(varG, compG = compVC.getGenotype(sample))) continue;
                foundSample = true;
                break;
            }
            if (foundSample) continue;
            return false;
        }
        return true;
    }

    private boolean sampleHasVariant(Genotype g) {
        return g != null && !g.isHomRef() && (g.isCalled() || g.isFiltered() && !this.EXCLUDE_FILTERED);
    }

    private boolean haveSameGenotypes(Genotype g1, Genotype g2) {
        List<Allele> a2s;
        if (g1.isCalled() && g2.isFiltered() || g2.isCalled() && g1.isFiltered() || g1.isFiltered() && g2.isFiltered() && this.EXCLUDE_FILTERED) {
            return false;
        }
        List<Allele> a1s = g1.getAlleles();
        return a1s.containsAll(a2s = g2.getAlleles()) && a2s.containsAll(a1s);
    }

    @Override
    public Integer reduceInit() {
        return 0;
    }

    @Override
    public Integer reduce(Integer value, Integer sum) {
        return value + sum;
    }

    @Override
    public void onTraversalDone(Integer result) {
        logger.info(result + " records processed.");
        if (this.SELECT_RANDOM_NUMBER) {
            int positionToPrint = this.positionToAdd;
            for (int i = 0; i < this.numRandom; ++i) {
                this.vcfWriter.add(this.variantArray[positionToPrint].vc);
                positionToPrint = this.nextCircularPosition(positionToPrint);
            }
        }
    }

    private VariantContext subsetRecord(VariantContext vc, Set<String> samples) {
        if (samples == null || samples.isEmpty()) {
            return vc;
        }
        ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
        for (Map.Entry<String, Genotype> genotypePair : vc.getGenotypes().entrySet()) {
            if (!samples.contains(genotypePair.getKey())) continue;
            genotypes.add(genotypePair.getValue());
        }
        VariantContext sub = vc.subContextFromGenotypes(genotypes, vc.getAlleles());
        if (vc.getAlleles().size() != sub.getAlleles().size()) {
            sub = VariantContext.modifyGenotypes(sub, VariantContextUtils.stripPLs(vc.getGenotypes()));
        }
        HashMap<String, Object> attributes = new HashMap<String, Object>(sub.getAttributes());
        int depth = 0;
        for (String sample : sub.getSampleNames()) {
            String dp;
            Genotype g = sub.getGenotype(sample);
            if (!g.isNotFiltered() || !g.isCalled() || (dp = (String)g.getAttribute("DP")) == null || dp.equals("-1") || dp.equals(".")) continue;
            depth += Integer.valueOf(dp).intValue();
        }
        if (this.KEEP_ORIGINAL_CHR_COUNTS) {
            if (attributes.containsKey("AC")) {
                attributes.put("AC_Orig", attributes.get("AC"));
            }
            if (attributes.containsKey("AF")) {
                attributes.put("AF_Orig", attributes.get("AF"));
            }
            if (attributes.containsKey("AN")) {
                attributes.put("AN_Orig", attributes.get("AN"));
            }
        }
        VariantContextUtils.calculateChromosomeCounts(sub, attributes, false);
        attributes.put("DP", depth);
        sub = VariantContext.modifyAttributes(sub, attributes);
        return sub;
    }

    private void randomlyAddVariant(int rank, VariantContext vc, byte refBase) {
        if (this.nVariantsAdded < this.numRandom) {
            this.variantArray[this.nVariantsAdded++] = new RandomVariantStructure(vc);
        } else {
            double t;
            double v = GenomeAnalysisEngine.getRandomGenerator().nextDouble();
            if (v < (t = 1.0 / (double)(rank - this.numRandom + 1))) {
                this.variantArray[this.positionToAdd].set(vc);
                ++this.nVariantsAdded;
                this.positionToAdd = this.nextCircularPosition(this.positionToAdd);
            }
        }
    }

    private int nextCircularPosition(int cur) {
        if (cur + 1 == this.variantArray.length) {
            return 0;
        }
        return cur + 1;
    }

    public static enum NumberAlleleRestriction {
        ALL,
        BIALLELIC,
        MULTIALLELIC;

    }

    private class RandomVariantStructure {
        private VariantContext vc;

        RandomVariantStructure(VariantContext vcP) {
            this.vc = vcP;
        }

        public void set(VariantContext vcP) {
            this.vc = vcP;
        }
    }
}

