/*
 * Decompiled with CFR 0.152.
 */
package ca.mcgill.mcb.pcingola.spliceSites;

import ca.mcgill.mcb.pcingola.fileIterator.FastaFileIterator;
import ca.mcgill.mcb.pcingola.interval.Chromosome;
import ca.mcgill.mcb.pcingola.interval.Exon;
import ca.mcgill.mcb.pcingola.interval.Gene;
import ca.mcgill.mcb.pcingola.interval.Intron;
import ca.mcgill.mcb.pcingola.interval.Marker;
import ca.mcgill.mcb.pcingola.interval.Markers;
import ca.mcgill.mcb.pcingola.interval.SpliceSiteBranch;
import ca.mcgill.mcb.pcingola.interval.SpliceSiteBranchU12;
import ca.mcgill.mcb.pcingola.interval.Transcript;
import ca.mcgill.mcb.pcingola.motif.MotifLogo;
import ca.mcgill.mcb.pcingola.motif.Pwm;
import ca.mcgill.mcb.pcingola.probablility.FisherExactTest;
import ca.mcgill.mcb.pcingola.snpEffect.Config;
import ca.mcgill.mcb.pcingola.snpEffect.commandLine.SnpEff;
import ca.mcgill.mcb.pcingola.spliceSites.AcgtTree;
import ca.mcgill.mcb.pcingola.spliceSites.SpliceTypes;
import ca.mcgill.mcb.pcingola.stats.CountByType;
import ca.mcgill.mcb.pcingola.stats.IntStats;
import ca.mcgill.mcb.pcingola.util.Gpr;
import ca.mcgill.mcb.pcingola.util.Timer;
import ca.mcgill.mcb.pcingola.util.Tuple;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Random;

public class SpliceAnalysis
extends SnpEff {
    public static double P_VALUE_THRESHOLD = 0.001;
    public static int SIZE_CONSENSUS_DONOR = 2;
    public static int SIZE_CONSENSUS_ACCEPTOR = 2;
    public static final double THRESHOLD_ENTROPY = 0.05;
    public static final int THRESHOLD_COUNT = 100;
    public static final double THRESHOLD_P = 0.95;
    public static final double THRESHOLD_BRANCH_U12_PERCENTILE = 0.95;
    public static final double THRESHOLD_U12_OBSERVED_EXPECTED = 5.0;
    public static int HTML_WIDTH = 20;
    public static int HTML_HEIGHT = 100;
    String outputDir = ".";
    String genomeVer;
    String genomeFasta;
    StringBuilder out = new StringBuilder();
    Config config;
    SpliceTypes spliceTypes;
    ArrayList<String> geneList = new ArrayList();
    HashMap<String, PwmSet> pwmSetsByName = new HashMap();
    HashMap<String, PwmSet> pwmSetsExonTypeByName = new HashMap();
    HashMap<String, Intron> intronsByStr = new HashMap();
    double thresholdPDonor;
    double thresholdEntropyDonor;
    double thresholdPAcc;
    double thresholdEntropyAcc;
    double thresholdU12Score;
    int countIntrons = 0;
    Random random = new Random();

    int countDonorAcc(String donor, String acceptor) {
        int count = 0;
        for (int i = 0; i < this.spliceTypes.getDonorAccPairSize(); ++i) {
            String d = this.spliceTypes.getDonor(i);
            String a = this.spliceTypes.getAcceptor(i);
            if (!d.startsWith(donor) || !a.endsWith(acceptor)) continue;
            ++count;
        }
        return count;
    }

    double findEntropyThreshold(AcgtTree tree) {
        List<Double> values = tree.entropyAll(100);
        Collections.sort(values);
        int index = (int)((double)values.size() * 0.05);
        return values.get(index);
    }

    double findPthreshold(AcgtTree tree) {
        List<Double> values = tree.pAll(100);
        Collections.sort(values);
        int index = (int)((double)values.size() * 0.95);
        return values.get(index);
    }

    PwmSet getPwmSet(String key) {
        PwmSet ps = this.pwmSetsByName.get(key);
        if (ps == null) {
            ps = new PwmSet(key);
            this.pwmSetsByName.put(key, ps);
        }
        return ps;
    }

    PwmSet getPwmSetExonType(String key) {
        PwmSet ps = this.pwmSetsExonTypeByName.get(key);
        if (ps == null) {
            ps = new PwmSet(key);
            this.pwmSetsExonTypeByName.put(key, ps);
        }
        return ps;
    }

    void init() {
        if (this.verbose) {
            Timer.showStdErr("Initializing");
        }
        this.config = new Config(this.genomeVer);
        this.genomeFasta = this.config.getFileNameGenomeFasta();
        if (this.genomeFasta == null) {
            throw new RuntimeException("Cannot find reference genome: " + this.config.getFileListGenomeFasta());
        }
        this.outputDir = this.config.getDirData() + "/spliceSites";
        this.load();
    }

    void load() {
        if (this.verbose) {
            Timer.showStdErr("Loading: " + this.genomeVer);
        }
        this.config.loadSnpEffectPredictor();
        if (this.verbose) {
            Timer.showStdErr("done");
        }
    }

    void out(Object o) {
        String s = o.toString();
        this.out.append(s + "\n");
    }

    @Override
    public void parseArgs(String[] args) {
        int idx;
        if (args.length == 0) {
            this.usage(null);
        }
        if (args[idx = 0].equals("-v")) {
            this.verbose = true;
            ++idx;
        }
        if (args.length - idx != 1) {
            this.usage("Missing genome");
        }
        this.genomeVer = args[idx];
    }

    @Override
    public boolean run() {
        this.init();
        this.spliceTypes = new SpliceTypes(this.config);
        this.spliceTypes.setVerbose(this.verbose);
        this.spliceTypes.analyzeAndCreate();
        this.thresholdU12Score = this.spliceTypes.branchU12Threshold(0.95);
        this.spliceTypes.createSpliceFasta(this.outputDir);
        this.splicePwmAnalysis();
        String outputFile = this.outputDir + "/" + this.getClass().getSimpleName() + "_" + this.genomeVer + ".html";
        if (this.verbose) {
            Timer.showStdErr("Saving output to: " + outputFile);
        }
        Gpr.toFile(outputFile, this.out);
        if (this.verbose) {
            Timer.showStdErr("Saving database to file: " + this.config.getFileSnpEffectPredictor());
        }
        this.config.getSnpEffectPredictor().save(this.config);
        if (this.verbose) {
            Timer.showStdErr("Done.");
        }
        if (this.verbose) {
            Timer.showStdErr("Finished!");
        }
        return true;
    }

    void splicePwmAnalysis() {
        if (this.verbose) {
            Timer.showStdErr("Splice analysis (PWM). Reading fasta file: " + this.genomeFasta);
        }
        FastaFileIterator ffi = new FastaFileIterator(this.genomeFasta);
        this.out("<pre>\n");
        for (String chrSeq : ffi) {
            String chrName = Chromosome.simpleName(ffi.getName());
            this.splicePwmAnalysis(chrName, chrSeq);
        }
        this.out("</pre>\n");
        String bedFile = this.outputDir + "/" + this.getClass().getSimpleName() + "_" + this.genomeVer + "_introns_branchSitesU12.bed";
        if (this.verbose) {
            Timer.showStdErr("Writing Introns and SpliceSitesBranchU12 file to BED file: '" + bedFile + "'");
        }
        Markers markersBed = new Markers();
        for (String donorAcc : this.pwmSetsByName.keySet()) {
            PwmSet pwmSet = this.getPwmSet(donorAcc);
            if (pwmSet.updates < 100 || !(pwmSet.countU12ObsExp() > 5.0)) continue;
            List<SpliceSiteBranchU12> ssbu12sites = this.spliceTypes.getBranchU12(donorAcc);
            for (SpliceSiteBranchU12 bu12 : ssbu12sites) {
                Transcript tr = (Transcript)bu12.getParent();
                tr.add(bu12);
                markersBed.add(bu12);
                Gpr.debug("Adding BranchU12 '" + bu12 + "' to transcript " + tr.getId() + "\tDonor-acceptor pair: " + donorAcc + "\tObs/Expected: " + this.getPwmSet(donorAcc).countU12ObsExp());
            }
        }
        markersBed.addAll(this.intronsByStr.values());
        markersBed.sort(false, false);
        StringBuilder sb = new StringBuilder();
        for (Marker i : markersBed) {
            sb.append(i.getChromosomeName() + "\t" + (i.getStart() + 1) + "\t" + (i.getEnd() + 1) + "\t" + (i instanceof SpliceSiteBranch ? i.getType().toString() : i.getId()) + "\n");
        }
        Gpr.toFile(bedFile, sb);
        if (this.verbose) {
            Timer.showStdErr("Filter out low count splice sites. Exons: " + this.countIntrons + "\tThreshold: " + 100);
        }
        ArrayList<PwmSet> pwmsets = new ArrayList<PwmSet>();
        pwmsets.addAll(this.pwmSetsByName.values());
        Collections.sort(pwmsets);
        this.out("<p><center><h3>Analysis by Donnor-Acceptor type</h3></center><p><table border=1>\n");
        this.out("<p><b>U12 PWM score threshold:</b> " + this.thresholdU12Score + "<p>\n");
        this.out("<tr> <th> Rank </th> <th> Donor-Acceptor </th>  <th> Count </th>  <th> Donor Motif </th> <th> U12 matches (Observed / Expected) </th> <th> Acceptor Motif </th> <th> Intron length </th> <th> Intron Type Count </th> <th> Intron Type p-values </th> </tr>\n");
        int count = 0;
        for (PwmSet pwmset : pwmsets) {
            if (pwmset.updates < 100) continue;
            this.out("<tr> <td> " + count++ + " </td> " + pwmset + "</tr>\n");
        }
        this.out("</table>\n");
        pwmsets = new ArrayList();
        pwmsets.addAll(this.pwmSetsExonTypeByName.values());
        Collections.sort(pwmsets);
        this.out("<p><hr><p><center><h3>Analysis by Exon-Exon types</h3></center><p><table border=1>\n");
        this.out("<tr> <th> Rank </th> <th> Exon_Type --- Exon_Type</th>  <th> Count </th>  <th> Donor Motif </th> <th> U12 matches (Observed / Expected) </th> <th> Acceptor Motif </th> <th> Intron length </th> <th> Intron Type Count </th> <th> Intron Type p-values </th> </tr>\n");
        count = 0;
        for (PwmSet pwmset : pwmsets) {
            if (pwmset.updates < 100) continue;
            this.out("<tr> <td> " + count++ + " </td> " + pwmset + "</tr>\n");
        }
        this.out("</table>\n");
    }

    void splicePwmAnalysis(String chrName, String chrSeq) {
        int countEx = 0;
        HashSet<String> done = new HashSet<String>();
        for (Gene gene : this.config.getGenome().getGenes()) {
            if (!gene.getChromosomeName().equals(chrName)) continue;
            for (Transcript tr : gene) {
                Exon exPrev = null;
                for (Exon ex : tr.sortedStrand()) {
                    ++countEx;
                    if (exPrev != null) {
                        int end;
                        int start;
                        if (tr.isStrandPlus()) {
                            start = exPrev.getEnd();
                            end = ex.getStart();
                        } else {
                            start = ex.getEnd();
                            end = exPrev.getStart();
                        }
                        String exPrevType = exPrev != null ? exPrev.getSpliceType().toString() : "";
                        String exType = ex != null ? ex.getSpliceType().toString() : "";
                        String exonTypes = exPrevType + "-" + exType;
                        String key = chrName + ":" + start + "-" + end;
                        if (!done.contains(key)) {
                            this.updatePwm(tr, chrSeq, start, end, exonTypes);
                            done.add(key);
                            Intron intron = new Intron(tr, start, end, 1, exonTypes);
                            this.intronsByStr.put(intron.toString(), intron);
                        }
                    }
                    exPrev = ex;
                }
            }
        }
        if (this.verbose) {
            Timer.showStdErr("\tChromosome: " + chrName + "\tGenes: " + this.config.getGenome().getGenes().size() + "\tExons: " + countEx);
        }
    }

    void updatePwm(Transcript tr, String chrSeq, int intronStart, int intronEnd, String exonTypes) {
        int len = intronEnd - intronStart;
        if (len < 2 * SpliceTypes.MAX_SPLICE_SIZE) {
            return;
        }
        String donorStr = this.spliceTypes.seqDonor(tr, chrSeq, intronStart, intronEnd);
        String accStr = this.spliceTypes.seqAcceptor(tr, chrSeq, intronStart, intronEnd);
        String intronSeqDonor = donorStr.substring(SpliceTypes.MAX_SPLICE_SIZE + 1);
        String intronSeqAcc = accStr.substring(0, SpliceTypes.MAX_SPLICE_SIZE);
        ++this.countIntrons;
        String donorConsensus = donorStr.substring(SpliceTypes.MAX_SPLICE_SIZE + 1, SpliceTypes.MAX_SPLICE_SIZE + 1 + SIZE_CONSENSUS_DONOR);
        if (donorConsensus.indexOf(78) >= 0) {
            return;
        }
        String accConsensus = accStr.substring(SpliceTypes.MAX_SPLICE_SIZE - SIZE_CONSENSUS_ACCEPTOR, SpliceTypes.MAX_SPLICE_SIZE);
        if (donorConsensus.indexOf(78) >= 0) {
            return;
        }
        int maxLenDa = 0;
        for (int i = 0; i < this.spliceTypes.getDonorAccPairSize(); ++i) {
            int lenda;
            String don = this.spliceTypes.getDonor(i);
            String ac = this.spliceTypes.getAcceptor(i);
            if (!intronSeqDonor.startsWith(don) || !intronSeqAcc.endsWith(ac) || (lenda = don.length() + ac.length()) <= maxLenDa) continue;
            maxLenDa = lenda;
            donorConsensus = don;
            accConsensus = ac;
        }
        String consensus = donorConsensus + "_" + accConsensus;
        Tuple<Double, Integer> bestU12 = this.spliceTypes.addBestU12Score(tr, chrSeq, consensus, intronStart, intronEnd);
        double bestU12score = (Double)bestU12.first;
        PwmSet pwmSet = this.getPwmSet(consensus);
        pwmSet.update(accStr, donorStr);
        pwmSet.len(len);
        pwmSet.incExonTypes(exonTypes);
        if (bestU12score >= this.thresholdU12Score) {
            pwmSet.incU12();
        }
        pwmSet = this.getPwmSet(" ALL");
        pwmSet.update(accStr, donorStr);
        pwmSet.incExonTypes(exonTypes);
        pwmSet.len(len);
        pwmSet = this.getPwmSetExonType(exonTypes);
        pwmSet.update(accStr, donorStr);
        pwmSet.len(len);
        if (bestU12score >= this.thresholdU12Score) {
            pwmSet.incU12();
        }
    }

    @Override
    public void usage(String message) {
        if (message != null) {
            System.err.println("Error: " + message + "\n");
        }
        System.err.println("Usage: snpEff genome_version");
        System.exit(-1);
    }

    class PwmSet
    implements Comparable<PwmSet> {
        String name;
        Pwm pwmAcc;
        Pwm pwmDonor;
        CountByType countMotif;
        CountByType countExonTypes;
        IntStats lenStats;
        int motifMatchedBases = 0;
        int motifMatchedStr = 0;
        int updates = 0;
        int countU12 = 0;

        public PwmSet(String name) {
            this.name = name;
            this.pwmAcc = new Pwm(2 * SpliceTypes.MAX_SPLICE_SIZE + 1);
            this.pwmDonor = new Pwm(2 * SpliceTypes.MAX_SPLICE_SIZE + 1);
            this.lenStats = new IntStats();
            this.countMotif = new CountByType();
            this.countExonTypes = new CountByType();
        }

        @Override
        public int compareTo(PwmSet ps) {
            int diff = ps.updates - this.updates;
            if (diff != 0) {
                return diff;
            }
            return this.name.compareTo(ps.name);
        }

        public double countU12ObsExp() {
            double expected = (double)this.updates * 0.050000000000000044;
            double oe = (double)this.countU12 / expected;
            return oe;
        }

        void incExonTypes(String exonTypes) {
            this.countExonTypes.inc(exonTypes);
        }

        void incU12() {
            ++this.countU12;
        }

        void len(int len) {
            this.lenStats.sample(len);
        }

        String pExonTypes() {
            StringBuilder out = new StringBuilder();
            for (String type : this.countExonTypes.getTypeList()) {
                out.append(this.pExonTypes(type));
            }
            return out.toString();
        }

        String pExonTypes(String category) {
            double pUp;
            int countBlackDrawn = 0;
            for (String type : this.countExonTypes.getTypeList()) {
                if (type.equals(category)) continue;
                countBlackDrawn = (int)((long)countBlackDrawn + this.countExonTypes.get(type));
            }
            int countWhiteDrawn = (int)this.countExonTypes.get(category);
            PwmSet pwmSet = SpliceAnalysis.this.getPwmSet(" ALL");
            CountByType countExonTypesAll = pwmSet.countExonTypes;
            int countBlack = 0;
            for (String type : countExonTypesAll.getTypeList()) {
                if (type.equals(category)) continue;
                countBlack = (int)((long)countBlack + countExonTypesAll.get(type));
            }
            int countWhite = (int)countExonTypesAll.get(category);
            String out = "";
            double pDown = FisherExactTest.get().fisherExactTestDown(countWhiteDrawn, countBlack + countWhite, countWhite, countBlackDrawn + countWhiteDrawn);
            if (pDown < P_VALUE_THRESHOLD) {
                out = out + "p-value Down (" + category + ") : " + pDown + "\n";
            }
            if ((pUp = FisherExactTest.get().fisherExactTestUp(countWhiteDrawn, countBlack + countWhite, countWhite, countBlackDrawn + countWhiteDrawn)) < P_VALUE_THRESHOLD) {
                out = out + "p-value Up   (" + category + ") : " + pUp + "\n";
            }
            return out;
        }

        public String toString() {
            StringBuilder out = new StringBuilder();
            out.append("\t<td> <b>" + this.name + "</b> </td>\n");
            out.append("\t<td> " + this.updates + "</td>\n");
            MotifLogo mlDonor = new MotifLogo(this.pwmDonor);
            out.append("\t<td>\n");
            out.append(mlDonor.toStringHtml(HTML_WIDTH, HTML_HEIGHT));
            out.append("\t</td>\n");
            double oe = this.countU12ObsExp();
            String bg = "ffffff";
            if (oe > 5.0) {
                bg = "ff0000";
            } else if (oe > 2.0) {
                bg = "ff8888";
            } else if (oe > 1.2) {
                bg = "ffcccc";
            }
            out.append(String.format("\t<td bgcolor=%s> <center> %d (%1.2f) </center> </td>\n", bg, this.countU12, oe));
            MotifLogo mlAcc = new MotifLogo(this.pwmAcc);
            out.append("\t<td>\n");
            out.append(mlAcc.toStringHtml(HTML_WIDTH, HTML_HEIGHT));
            out.append("\t</td>\n");
            out.append("\t<td> <pre>\n");
            out.append(this.lenStats.toString());
            out.append("\t</pre></td>\n");
            out.append("\t<td> <pre>\n");
            out.append(this.countExonTypes);
            out.append("\t</pre></td>\n");
            out.append("\t<td> <pre>\n");
            out.append(this.pExonTypes());
            out.append("\t</pre></td>\n");
            return out.toString();
        }

        public void update(String accStr, String donorStr) {
            ++this.updates;
            if (accStr != null) {
                this.pwmAcc.updateCounts(accStr);
            }
            if (donorStr != null) {
                this.pwmDonor.updateCounts(donorStr);
            }
        }
    }
}

