/*
 * Decompiled with CFR 0.152.
 */
package ca.mcgill.mcb.pcingola.spliceSites;

import ca.mcgill.mcb.pcingola.fileIterator.FastaFileIterator;
import ca.mcgill.mcb.pcingola.interval.Chromosome;
import ca.mcgill.mcb.pcingola.interval.Exon;
import ca.mcgill.mcb.pcingola.interval.Gene;
import ca.mcgill.mcb.pcingola.interval.Transcript;
import ca.mcgill.mcb.pcingola.motif.Pwm;
import ca.mcgill.mcb.pcingola.snpEffect.Config;
import ca.mcgill.mcb.pcingola.spliceSites.AcgtTree;
import ca.mcgill.mcb.pcingola.util.Gpr;
import ca.mcgill.mcb.pcingola.util.GprSeq;
import ca.mcgill.mcb.pcingola.util.Timer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Set;

public class SpliceTypes {
    public static int MAX_SPLICE_SIZE = 10;
    public static int SIZE_BRANCH = 60;
    public static final double THRESHOLD_ENTROPY = 0.05;
    public static final int THRESHOLD_COUNT = 100;
    public static final double THRESHOLD_P = 0.95;
    boolean verbose = false;
    Config config;
    HashMap<String, String> donorsByIntron = new HashMap();
    HashMap<String, String> acceptorsByIntron = new HashMap();
    HashMap<String, String> branchByIntron = new HashMap();
    ArrayList<String> donorAccPairDonor = new ArrayList();
    ArrayList<String> donorAccPairAcc = new ArrayList();
    AcgtTree acgtTreeDonors = new AcgtTree();
    AcgtTree acgtTreeAcc = new AcgtTree();
    Pwm pwmU12;
    HashMap<String, Integer> donorAcc = new HashMap();
    double thresholdPDonor;
    double thresholdEntropyDonor;
    double thresholdPAcc;
    double thresholdEntropyAcc;
    double thresholdU12Score;

    public SpliceTypes(Config config) {
        this.config = config;
    }

    void acc4donor(String donorSeq) {
        AcgtTree tree = new AcgtTree();
        for (String key : this.donorsByIntron.keySet()) {
            String acc;
            String donor = this.donorsByIntron.get(key);
            if (!donor.startsWith(donorSeq) || (acc = GprSeq.reverse(this.acceptorsByIntron.get(key))).indexOf(78) >= 0) continue;
            tree.add(acc);
        }
        for (String accSeq : tree.findNodeNames(this.thresholdEntropyAcc, this.thresholdPAcc, 100)) {
            if (accSeq.length() <= 1) continue;
            accSeq = GprSeq.reverse(accSeq);
            this.add(donorSeq, accSeq);
        }
    }

    void add(String donor, String acceptor) {
        String key = String.format("%-10s\t%10s", donor, acceptor);
        int count2 = this.countDonorAcc(donor, acceptor);
        if (count2 >= 100) {
            this.donorAcc.put(key, count2);
        }
    }

    public boolean analyzeAndCreate() {
        if (this.verbose) {
            Timer.showStdErr("Splice site sequence conservation analysis: Start");
        }
        this.load();
        this.spliceSequences();
        this.spliceDonoAcceptorPairs();
        this.createSpliceSites();
        if (this.verbose) {
            Timer.showStdErr("Splice site sequence conservation analysis: Done.");
        }
        return true;
    }

    int bestMatchIndex(String intronSeqDonor, String intronSeqAcc) {
        if (intronSeqDonor == null || intronSeqAcc == null) {
            return -1;
        }
        int maxLenDa = -1;
        int idx = -1;
        int i = 0;
        while (i < this.donorAccPairDonor.size()) {
            int lenda;
            String don = this.donorAccPairDonor.get(i);
            String ac = this.donorAccPairAcc.get(i);
            if (intronSeqDonor.startsWith(don) && intronSeqAcc.endsWith(ac) && (lenda = don.length() + ac.length()) > maxLenDa) {
                maxLenDa = lenda;
                idx = i;
            }
            ++i;
        }
        return idx;
    }

    double bestU12Score(String seq2) {
        int max2 = seq2.length() - this.pwmU12.length();
        double best = 0.0;
        int i = 0;
        while (i < max2) {
            String sub = seq2.substring(i, i + this.pwmU12.length());
            if (sub.indexOf(78) < 0) {
                double score = this.pwmU12.score(sub);
                best = Math.max(best, score);
            }
            ++i;
        }
        return best;
    }

    public double branchU12Threshold(double thresholdU12Score) {
        Timer.showStdErr("Finding U12 PWM score distribution and threshold.");
        ArrayList<Double> scores = new ArrayList<Double>();
        for (String branch : this.branchByIntron.values()) {
            double bestScore = this.bestU12Score(branch);
            scores.add(bestScore);
        }
        Collections.sort(scores);
        int index2 = (int)(thresholdU12Score * (double)scores.size());
        double scoreTh = (Double)scores.get(index2);
        return scoreTh;
    }

    int countDonorAcc(String donor, String acceptor) {
        int count2 = 0;
        for (String key : this.donorsByIntron.keySet()) {
            String d = this.donorsByIntron.get(key);
            String a = this.acceptorsByIntron.get(key);
            if (!d.startsWith(donor) || !a.endsWith(acceptor)) continue;
            ++count2;
        }
        return count2;
    }

    public void createSpliceFasta(String outputDir) {
        if (this.verbose) {
            Timer.showStdErr("Creating FASTA files for each dono-acceptor pair.");
        }
        int i = 0;
        while (i < this.getDonorAccPairSize()) {
            String d = this.getDonor(i);
            String a = this.getAcceptor(i);
            String fastaFile = String.valueOf(outputDir) + "/" + this.config.getGenome().getId() + "." + d + "-" + a + ".fa";
            this.createSpliceFasta(fastaFile, d, a);
            ++i;
        }
    }

    void createSpliceFasta(String fastaFile, String donor, String acceptor) {
        StringBuilder fasta = new StringBuilder();
        int fastaId = 0;
        for (String intronKey : this.getIntronKeySet()) {
            String d = this.getDonorByIntron(intronKey);
            String a = this.getAcceptorsByIntron(intronKey);
            if (!d.startsWith(donor) || !a.endsWith(acceptor)) continue;
            String branch = this.getAcceptorsByIntron(intronKey);
            fasta.append(">id_" + fastaId + "\n" + branch.subSequence(0, branch.length() - acceptor.length()) + "\n");
            ++fastaId;
        }
        if (this.verbose) {
            Timer.showStdErr("\tWriting fasta sequences to file: " + fastaFile);
        }
        Gpr.toFile(fastaFile, fasta);
    }

    void createSpliceSites() {
        if (this.verbose) {
            Timer.showStdErr("\tCreating splice sites.");
        }
        for (Gene gene : this.config.getGenome().getGenes()) {
            for (Transcript tr : gene) {
                Exon exPrev = null;
                for (Exon ex : tr.sortedStrand()) {
                    if (exPrev != null) {
                        int end;
                        int start;
                        if (tr.isStrandPlus()) {
                            start = exPrev.getEnd();
                            end = ex.getStart();
                        } else {
                            start = ex.getEnd();
                            end = exPrev.getStart();
                        }
                        this.createSpliceSites(ex, exPrev, start, end);
                    }
                    exPrev = ex;
                }
            }
        }
    }

    void createSpliceSites(Exon ex, Exon exPrev, int start, int end) {
        String key = String.valueOf(ex.getChromosomeName()) + ":" + start + "-" + end;
        String donor = this.donorsByIntron.get(key);
        String acc = this.acceptorsByIntron.get(key);
        if (donor == null) {
            Gpr.debug("Cannot find donor for key:" + key);
        }
        if (acc == null) {
            Gpr.debug("Cannot find acceptor for key:" + key);
        }
        int idx = this.bestMatchIndex(donor, acc);
        int dist = end - start - 1;
        if (idx < 0) {
            exPrev.createSpliceSiteDonor(Math.min(2, dist));
            ex.createSpliceSiteAcceptor(Math.min(2, dist));
        } else {
            String donorConserved = this.donorAccPairDonor.get(idx);
            String accConserved = this.donorAccPairAcc.get(idx);
            exPrev.createSpliceSiteDonor(Math.min(donorConserved.length() - 1, dist));
            ex.createSpliceSiteAcceptor(Math.min(accConserved.length() - 1, dist));
        }
    }

    void donor4acc(String accSeq) {
        AcgtTree tree = new AcgtTree();
        for (String key : this.acceptorsByIntron.keySet()) {
            String donor;
            String acc = GprSeq.reverse(this.acceptorsByIntron.get(key));
            if (!acc.endsWith(accSeq) || (donor = this.donorsByIntron.get(key)).indexOf(78) >= 0) continue;
            tree.add(donor);
        }
        for (String donorSeq : tree.findNodeNames(this.thresholdEntropyDonor, this.thresholdPDonor, 100)) {
            if (donorSeq.length() <= 1) continue;
            this.add(donorSeq, accSeq);
        }
    }

    double findEntropyThreshold(AcgtTree tree) {
        List<Double> values2 = tree.entropyAll(100);
        Collections.sort(values2);
        int index2 = (int)((double)values2.size() * 0.05);
        return values2.get(index2);
    }

    double findPthreshold(AcgtTree tree) {
        List<Double> values2 = tree.pAll(100);
        Collections.sort(values2);
        int index2 = (int)((double)values2.size() * 0.95);
        return values2.get(index2);
    }

    public String getAcceptor(int i) {
        return this.donorAccPairAcc.get(i);
    }

    public String getAcceptorsByIntron(String intronKey) {
        return this.acceptorsByIntron.get(intronKey);
    }

    public String getBranchByIntron(String intronKey) {
        return this.branchByIntron.get(intronKey);
    }

    public String getDonor(int i) {
        return this.donorAccPairDonor.get(i);
    }

    public int getDonorAccPairSize() {
        return this.donorAccPairDonor.size();
    }

    public String getDonorByIntron(String intronKey) {
        return this.donorsByIntron.get(intronKey);
    }

    public Set<String> getIntronKeySet() {
        return this.donorsByIntron.keySet();
    }

    void load() {
        String u12file = String.valueOf(this.config.getDirData()) + "/spliceSites/u12_branch.pwm";
        if (this.verbose) {
            Timer.showStdErr("\tLoading U12 PWM form file '" + u12file + "'");
        }
        this.pwmU12 = new Pwm(u12file);
        if (this.config.getSnpEffectPredictor() == null) {
            if (this.verbose) {
                Timer.showStdErr("\tLoading: " + this.config.getGenome().getGenomeName());
            }
            this.config.loadSnpEffectPredictor();
            if (this.verbose) {
                Timer.showStdErr("\tdone.");
            }
        }
    }

    String seqAcceptor(Transcript tr, String chrSeq, int intronStart, int intronEnd) {
        if (intronEnd - intronStart < MAX_SPLICE_SIZE) {
            return "";
        }
        if (tr.isStrandPlus()) {
            int splAccStart = intronEnd - MAX_SPLICE_SIZE;
            int splAccEnd = intronEnd + MAX_SPLICE_SIZE;
            return chrSeq.substring(splAccStart, splAccEnd + 1).toUpperCase();
        }
        int splAccStart = intronStart - MAX_SPLICE_SIZE;
        int splAccEnd = intronStart + MAX_SPLICE_SIZE;
        return GprSeq.reverseWc(chrSeq.substring(splAccStart, splAccEnd + 1).toUpperCase());
    }

    String seqBranch(Transcript tr, String chrSeq, int intronStart, int intronEnd) {
        if (intronEnd - intronStart < SIZE_BRANCH) {
            return "";
        }
        if (tr.isStrandPlus()) {
            int splBranchStart = intronEnd - SIZE_BRANCH + 1;
            int splBranchEnd = intronEnd;
            return chrSeq.substring(splBranchStart, splBranchEnd).toUpperCase();
        }
        int splBranchStart = intronStart + 1;
        int splBranchEnd = intronStart + SIZE_BRANCH;
        return GprSeq.reverseWc(chrSeq.substring(splBranchStart, splBranchEnd).toUpperCase());
    }

    String seqDonor(Transcript tr, String chrSeq, int intronStart, int intronEnd) {
        if (intronEnd - intronStart < MAX_SPLICE_SIZE) {
            return "";
        }
        if (tr.isStrandPlus()) {
            int splDonorStart = intronStart - MAX_SPLICE_SIZE;
            int splDonorEnd = intronStart + MAX_SPLICE_SIZE;
            return chrSeq.substring(splDonorStart, splDonorEnd + 1).toUpperCase();
        }
        int splDonorStart = intronEnd - MAX_SPLICE_SIZE;
        int splDonorEnd = intronEnd + MAX_SPLICE_SIZE;
        return GprSeq.reverseWc(chrSeq.substring(splDonorStart, splDonorEnd + 1).toUpperCase());
    }

    public void setVerbose(boolean verbose) {
        this.verbose = verbose;
    }

    void spliceDonoAcceptorPairs() {
        if (this.verbose) {
            Timer.showStdErr("\tFinding donor-acceptor pairs: Creating quaternary trees");
        }
        for (String donor : this.donorsByIntron.values()) {
            if (donor.indexOf(78) >= 0) continue;
            this.acgtTreeDonors.add(donor);
        }
        for (String acc : this.acceptorsByIntron.values()) {
            if (acc.indexOf(78) >= 0) continue;
            this.acgtTreeAcc.add(GprSeq.reverse(acc));
        }
        if (this.verbose) {
            Timer.showStdErr("\tCalculate thresholds");
        }
        this.thresholdPDonor = this.findPthreshold(this.acgtTreeDonors);
        this.thresholdEntropyDonor = this.findEntropyThreshold(this.acgtTreeDonors);
        this.thresholdPAcc = this.findPthreshold(this.acgtTreeAcc);
        this.thresholdEntropyAcc = this.findEntropyThreshold(this.acgtTreeAcc);
        if (this.verbose) {
            Timer.showStdErr("\tDonors Thresholds:\t\tEntropy: " + this.thresholdEntropyDonor + "\t\tProbability: " + this.thresholdPDonor);
        }
        for (String seq2 : this.acgtTreeDonors.findNodeNames(this.thresholdEntropyDonor, this.thresholdPDonor, 100)) {
            if (seq2.length() <= 1) continue;
            this.acc4donor(seq2);
        }
        if (this.verbose) {
            Timer.showStdErr("\tFind acceptors");
        }
        if (this.verbose) {
            Timer.showStdErr("\tAcceptors Thresholds:\t\tEntropy: " + this.thresholdEntropyAcc + "\t\tProbability: " + this.thresholdPAcc);
        }
        for (String seq2 : this.acgtTreeAcc.findNodeNames(this.thresholdEntropyAcc, this.thresholdPAcc, 100)) {
            if (seq2.length() <= 1) continue;
            this.donor4acc(GprSeq.reverse(seq2));
        }
        if (this.verbose) {
            Timer.showStdErr("\tAdd Donor - Acceptors pairs: ");
        }
        ArrayList<String> keys = new ArrayList<String>();
        keys.addAll(this.donorAcc.keySet());
        Collections.sort(keys, new Comparator<String>(){

            @Override
            public int compare(String arg0, String arg1) {
                return SpliceTypes.this.donorAcc.get(arg1) - SpliceTypes.this.donorAcc.get(arg0);
            }
        });
        for (String key : keys) {
            if (this.donorAcc.get(key) <= 100) continue;
            String[] da = key.trim().split("\\s+");
            this.donorAccPairDonor.add(da[0]);
            this.donorAccPairAcc.add(da[1]);
            if (!this.verbose) continue;
            Timer.showStdErr("\t\t\t" + this.donorAcc.get(key) + "\t" + key);
        }
    }

    void spliceSequences() {
        String genomeFasta = this.config.getFileNameGenomeFasta();
        if (this.verbose) {
            Timer.showStdErr("\tFinding splice sequences. Reading fasta file: " + genomeFasta);
        }
        FastaFileIterator ffi = new FastaFileIterator(genomeFasta);
        for (String chrSeq : ffi) {
            String chrName = Chromosome.simpleName(ffi.getName());
            this.spliceSequences(chrName, chrSeq);
        }
    }

    void spliceSequences(String chrName, String chrSeq) {
        int countEx = 0;
        int countGenes = 0;
        for (Gene gene : this.config.getGenome().getGenes()) {
            if (!gene.getChromosomeName().equals(chrName)) continue;
            ++countGenes;
            for (Transcript tr : gene) {
                Exon exPrev = null;
                for (Exon ex : tr.sortedStrand()) {
                    ++countEx;
                    if (exPrev != null) {
                        int end;
                        int start;
                        if (tr.isStrandPlus()) {
                            start = exPrev.getEnd();
                            end = ex.getStart();
                        } else {
                            start = ex.getEnd();
                            end = exPrev.getStart();
                        }
                        this.spliceSequences(tr, chrName, chrSeq, start, end);
                    }
                    exPrev = ex;
                }
            }
        }
        if (this.verbose) {
            Timer.showStdErr("\t\tChromosome: " + chrName + "\tGenes: " + countGenes + "\tExons: " + countEx + "\t" + this.donorsByIntron.size());
        }
    }

    void spliceSequences(Transcript tr, String chrName, String chrSeq, int intronStart, int intronEnd) {
        String key = String.valueOf(chrName) + ":" + intronStart + "-" + intronEnd;
        if (this.donorsByIntron.containsKey(key)) {
            return;
        }
        String donorStr = this.seqDonor(tr, chrSeq, intronStart, intronEnd);
        String accStr = this.seqAcceptor(tr, chrSeq, intronStart, intronEnd);
        String branchStr = this.seqBranch(tr, chrSeq, intronStart, intronEnd);
        String intronSeqDonor = donorStr.isEmpty() ? "" : donorStr.substring(MAX_SPLICE_SIZE + 1);
        String intronSeqAcc = accStr.isEmpty() ? "" : accStr.substring(0, MAX_SPLICE_SIZE);
        this.donorsByIntron.put(key, intronSeqDonor);
        this.acceptorsByIntron.put(key, intronSeqAcc);
        this.branchByIntron.put(key, branchStr);
    }
}

