/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.sting.gatk.walkers.beagle;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.beagle.BeagleFeature;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;

public class BeagleOutputToVCFWalker
extends RodWalker<Integer, Integer> {
    @ArgumentCollection
    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
    @Input(fullName="comp", shortName="comp", doc="Comparison VCF file", required=false)
    public RodBinding<VariantContext> comp;
    @Input(fullName="beagleR2", shortName="beagleR2", doc="Beagle-produced .r2 file containing R^2 values for all markers", required=true)
    public RodBinding<BeagleFeature> beagleR2;
    @Input(fullName="beagleProbs", shortName="beagleProbs", doc="Beagle-produced .probs file containing posterior genotype probabilities", required=true)
    public RodBinding<BeagleFeature> beagleProbs;
    @Input(fullName="beaglePhased", shortName="beaglePhased", doc="Beagle-produced .phased file containing phased genotypes", required=true)
    public RodBinding<BeagleFeature> beaglePhased;
    @Output(doc="VCF File to which variants should be written", required=true)
    protected VCFWriter vcfWriter = null;
    @Argument(fullName="dont_mark_monomorphic_sites_as_filtered", shortName="keep_monomorphic", doc="If provided, we won't filter sites that beagle tags as monomorphic.  Useful for imputing a sample's genotypes from a reference panel", required=false)
    public boolean DONT_FILTER_MONOMORPHIC_SITES = false;
    @Argument(fullName="nocall_threshold", shortName="ncthr", doc="Threshold of confidence at which a genotype won't be called", required=false)
    private double noCallThreshold = 0.0;
    protected static String line = null;
    private final double MIN_PROB_ERROR = 1.0E-6;
    private final double MAX_GENOTYPE_QUALITY = 6.0;

    @Override
    public void initialize() {
        HashSet<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
        hInfo.addAll(VCFUtils.getHeaderFields(this.getToolkit()));
        hInfo.add(new VCFFormatHeaderLine("OG", 1, VCFHeaderLineType.String, "Original Genotype input to Beagle"));
        hInfo.add(new VCFInfoHeaderLine("R2", 1, VCFHeaderLineType.Float, "r2 Value reported by Beagle on each site"));
        hInfo.add(new VCFInfoHeaderLine("NumGenotypesChanged", 1, VCFHeaderLineType.Integer, "The number of genotypes changed by Beagle"));
        hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_A", "This 'A' site was set to monomorphic by Beagle"));
        hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_C", "This 'C' site was set to monomorphic by Beagle"));
        hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_G", "This 'G' site was set to monomorphic by Beagle"));
        hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_T", "This 'T' site was set to monomorphic by Beagle"));
        List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
        if (this.comp.isBound()) {
            hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site"));
            hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site"));
            hInfo.add(new VCFInfoHeaderLine("AFH", 1, VCFHeaderLineType.Float, "Allele Number from Comparison ROD at this site"));
        }
        Set<String> samples = SampleUtils.getSampleListWithVCFHeader(this.getToolkit(), Arrays.asList(this.variantCollection.variants.getName()));
        VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
        this.vcfWriter.writeHeader(vcfHeader);
    }

    @Override
    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        VariantContext filteredVC;
        if (tracker == null) {
            return 0;
        }
        GenomeLoc loc = context.getLocation();
        VariantContext vc_input = tracker.getFirstValue(this.variantCollection.variants, loc);
        VariantContext vc_comp = tracker.getFirstValue(this.comp, loc);
        if (vc_input == null) {
            return 0;
        }
        if (vc_input.isFiltered()) {
            this.vcfWriter.add(vc_input);
            return 1;
        }
        BeagleFeature beagleR2Feature = tracker.getFirstValue(this.beagleR2);
        BeagleFeature beagleProbsFeature = tracker.getFirstValue(this.beagleProbs);
        BeagleFeature beaglePhasedFeature = tracker.getFirstValue(this.beaglePhased);
        if (beagleR2Feature == null || beagleProbsFeature == null || beaglePhasedFeature == null) {
            this.vcfWriter.add(vc_input);
            return 1;
        }
        byte refByte = ref.getBase();
        HashMap<String, Genotype> genotypes = new HashMap<String, Genotype>(vc_input.getGenotypes().size());
        int numGenotypesChangedByBeagle = 0;
        Integer alleleCountH = 0;
        Integer chrCountH = 0;
        Double alleleFrequencyH = 0.0;
        int beagleVarCounts = 0;
        Map<String, Genotype> hapmapGenotypes = null;
        if (vc_comp != null) {
            hapmapGenotypes = vc_comp.getGenotypes();
        }
        for (Map.Entry<String, Genotype> originalGenotypes : vc_input.getGenotypes().entrySet()) {
            Genotype hapmapGenotype;
            Genotype g = originalGenotypes.getValue();
            LinkedHashSet<String> filters = new LinkedHashSet<String>(g.getFilters());
            boolean genotypeIsPhased = true;
            String sample = g.getSampleName();
            if (vc_comp != null && vc_input.getGenotypes().containsKey(sample) && hapmapGenotypes.containsKey(sample) && (hapmapGenotype = hapmapGenotypes.get(sample)).isCalled()) {
                chrCountH = chrCountH + 2;
                if (hapmapGenotype.isHet()) {
                    alleleCountH = alleleCountH + 1;
                } else if (hapmapGenotype.isHomVar()) {
                    alleleCountH = alleleCountH + 2;
                }
            }
            ArrayList<String> beagleProbabilities = beagleProbsFeature.getProbLikelihoods().get(sample);
            ArrayList<String> beagleGenotypePairs = beaglePhasedFeature.getGenotypes().get(sample);
            Allele originalAlleleA = g.getAllele(0);
            Allele originalAlleleB = g.getAlleles().size() == 2 ? g.getAllele(1) : g.getAllele(0);
            ArrayList<Allele> alleles = new ArrayList<Allele>();
            String alleleA = beagleGenotypePairs.get(0);
            String alleleB = beagleGenotypePairs.get(1);
            String refString = vc_input.getReference().getDisplayString();
            if (refString.length() == 0) {
                refString = "-";
            }
            Allele bglAlleleA = alleleA.matches(refString) ? Allele.create(alleleA, true) : Allele.create(alleleA, false);
            Allele bglAlleleB = alleleB.matches(refString) ? Allele.create(alleleB, true) : Allele.create(alleleB, false);
            alleles.add(bglAlleleA);
            alleles.add(bglAlleleB);
            Double homRefProbability = Double.valueOf(beagleProbabilities.get(0));
            Double hetProbability = Double.valueOf(beagleProbabilities.get(1));
            Double homVarProbability = Double.valueOf(beagleProbabilities.get(2));
            Double probWrongGenotype = bglAlleleA.isReference() && bglAlleleB.isReference() ? Double.valueOf(hetProbability + homVarProbability) : (bglAlleleB.isReference() && bglAlleleA.isNonReference() || bglAlleleA.isReference() && bglAlleleB.isNonReference() ? Double.valueOf(homRefProbability + homVarProbability) : Double.valueOf(hetProbability + homRefProbability));
            if (probWrongGenotype > 0.999999) {
                probWrongGenotype = 0.999999;
            }
            if (1.0 - probWrongGenotype < this.noCallThreshold) {
                alleles.clear();
                alleles.add(originalAlleleA);
                alleles.add(originalAlleleB);
                genotypeIsPhased = false;
            }
            Double genotypeQuality = probWrongGenotype < 1.0E-6 ? Double.valueOf(6.0) : Double.valueOf(-Math.log10(probWrongGenotype));
            HashMap<String, Object> originalAttributes = new HashMap<String, Object>(g.getAttributes());
            String a1 = originalAlleleA.isNoCall() ? "." : (originalAlleleA.isReference() ? "0" : "1");
            String a2 = originalAlleleB.isNoCall() ? "." : (originalAlleleB.isReference() ? "0" : "1");
            String og = a1 + "/" + a2;
            if (!(bglAlleleA.equals(originalAlleleA) && bglAlleleB.equals(originalAlleleB) || bglAlleleA.equals(originalAlleleB) && bglAlleleB.equals(originalAlleleA))) {
                originalAttributes.put("OG", og);
                ++numGenotypesChangedByBeagle;
            } else {
                originalAttributes.put("OG", ".");
            }
            Genotype imputedGenotype = new Genotype(originalGenotypes.getKey(), alleles, genotypeQuality, filters, originalAttributes, genotypeIsPhased);
            if (imputedGenotype.isHet() || imputedGenotype.isHomVar()) {
                ++beagleVarCounts;
            }
            genotypes.put(originalGenotypes.getKey(), imputedGenotype);
        }
        if (beagleVarCounts > 0 || this.DONT_FILTER_MONOMORPHIC_SITES) {
            filteredVC = new VariantContext("outputvcf", vc_input.getChr(), (long)vc_input.getStart(), (long)vc_input.getEnd(), vc_input.getAlleles(), genotypes, vc_input.getNegLog10PError(), vc_input.filtersWereApplied() ? vc_input.getFilters() : null, vc_input.getAttributes());
        } else {
            HashSet<String> removedFilters = vc_input.filtersWereApplied() ? new HashSet<String>(vc_input.getFilters()) : new HashSet(1);
            removedFilters.add(String.format("BGL_RM_WAS_%s", vc_input.getAlternateAllele(0)));
            filteredVC = new VariantContext("outputvcf", vc_input.getChr(), (long)vc_input.getStart(), (long)vc_input.getEnd(), new HashSet<Allele>(Arrays.asList(vc_input.getReference())), genotypes, vc_input.getNegLog10PError(), removedFilters, vc_input.getAttributes());
        }
        HashMap<String, Object> attributes = new HashMap<String, Object>(filteredVC.getAttributes());
        VariantContextUtils.calculateChromosomeCounts(filteredVC, attributes, false);
        if (vc_comp != null) {
            attributes.put("ACH", alleleCountH.toString());
            attributes.put("ANH", chrCountH.toString());
            attributes.put("AFH", String.format("%4.2f", (double)alleleCountH.intValue() / (double)chrCountH.intValue()));
        }
        attributes.put("NumGenotypesChanged", numGenotypesChangedByBeagle);
        if (!beagleR2Feature.getR2value().equals(Double.NaN)) {
            attributes.put("R2", beagleR2Feature.getR2value().toString());
        }
        this.vcfWriter.add(VariantContext.modifyAttributes(filteredVC, attributes));
        return 1;
    }

    @Override
    public Integer reduceInit() {
        return 0;
    }

    @Override
    public Integer reduce(Integer value, Integer sum) {
        return sum + value;
    }

    @Override
    public void onTraversalDone(Integer result) {
        System.out.printf("Processed %d loci.\n", result);
    }
}

