/*
 * Decompiled with CFR 0.152.
 */
package edu.mayo.bior.catalog.stats;

import com.jayway.jsonpath.InvalidPathException;
import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.Predicate;
import edu.mayo.bior.catalog.CatalogFileUtils;
import edu.mayo.bior.catalog.CatalogFiles;
import edu.mayo.bior.catalog.CatalogFormatException;
import edu.mayo.bior.catalog.ChunkUtils;
import edu.mayo.bior.catalog.stats.AsciiCharacterStats;
import edu.mayo.bior.catalog.stats.CatalogColumnStats;
import edu.mayo.bior.catalog.stats.CatalogStats;
import edu.mayo.bior.catalog.stats.ValueSampling;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;

public class StatsBuilder {
    private static Logger sLogger = Logger.getLogger(StatsBuilder.class);
    private static final long FILE_SIZE_THRESHOLD = 0x140000000L;
    private static final long ONE_IN_X_SAMPLING_DEFAULT = 1L;
    private static long LOG_PROGRESS_EVERY_X_LINES = 100000L;
    private static final long START_LINE_DEFAULT = 1L;
    private static final long NUM_LINES_DEFAULT = 0L;
    private long startTime;
    private static boolean testLargeRowCounts = false;
    private static Properties valuesToInitializeToIntMax;
    private final long LARGE_VALUE = 0x80000009L;
    private CatalogStats catalogStats;

    public static void setTestLargeRowCounts(boolean largeRowCounts) {
        testLargeRowCounts = largeRowCounts;
    }

    public static void setTestLargeRowCountsToDefault() {
        StatsBuilder.setTestLargeRowCounts(false);
    }

    public static void setValuesToInitializeToIntMax(Properties values) {
        valuesToInitializeToIntMax = values;
    }

    public static void setValuesToInitializeToIntMaxToDefault() {
        StatsBuilder.setValuesToInitializeToIntMax(null);
    }

    public static void setLogProgressEveryXLines(long progressEveryXLines) {
        LOG_PROGRESS_EVERY_X_LINES = progressEveryXLines;
    }

    public CatalogStats build(File catalogFile, int maxValueCount) throws IOException, CatalogFormatException {
        return this.build(catalogFile, maxValueCount, 1L);
    }

    public CatalogStats build(File catalogFile, int maxValueCount, long oneInXSampling) throws IOException, CatalogFormatException {
        return this.build(catalogFile, maxValueCount, oneInXSampling, 1L, 0L);
    }

    public CatalogStats build(File catalogFile, int maxValueCount, long oneInXSampling, long startLine, long numLines) throws IOException, CatalogFormatException {
        this.checkFileParam(catalogFile);
        this.checkProcessCatalogIntParam(maxValueCount, 1L, "number values sampled per column");
        this.checkProcessCatalogIntParam(oneInXSampling, 1L, "line sampling frequency");
        this.checkProcessCatalogIntParam(startLine, 1L, "start line");
        this.checkProcessCatalogIntParam(numLines, 0L, "number of lines");
        CatalogFiles catalogFiles = new CatalogFiles(catalogFile);
        List<String> columnNames = this.getColumns(catalogFiles.getColumnsFile(), true);
        this.catalogStats = this.initializeStats(columnNames);
        return this.processCatalog(catalogFile, columnNames, maxValueCount, oneInXSampling, startLine, numLines);
    }

    private CatalogStats initializeStats(List<String> columnNames) {
        long INIT_VALUE = testLargeRowCounts ? 0x80000009L : 0L;
        CatalogStats stats = new CatalogStats();
        stats.setTotalDataLineCount(INIT_VALUE);
        stats.setNumLinesSampled(INIT_VALUE);
        stats.setColumns(columnNames);
        for (String columnName : columnNames) {
            CatalogColumnStats colStats = new CatalogColumnStats();
            colStats.setValueSamplings(this.getInitValueSamplings(columnName));
            colStats.setNumEntries(INIT_VALUE);
            colStats.setNumCharacters(INIT_VALUE);
            colStats.setLineAsciiStats(this.getInitCharStats());
            colStats.setTotalAsciiStats(this.getInitCharStats());
            stats.setColumnStats(columnName, colStats);
        }
        return stats;
    }

    private Collection<ValueSampling> getInitValueSamplings(String columnName) {
        String valToSetToMax;
        ArrayList<ValueSampling> valueSamplingList = new ArrayList<ValueSampling>();
        if (testLargeRowCounts && (valToSetToMax = valuesToInitializeToIntMax.getProperty(columnName)) != null) {
            ValueSampling valSample = new ValueSampling();
            valSample.setValue(valToSetToMax);
            valSample.setFrequency(0x80000009L);
            valueSamplingList.add(valSample);
        }
        return valueSamplingList;
    }

    private AsciiCharacterStats getInitCharStats() {
        AsciiCharacterStats charStats = new AsciiCharacterStats();
        if (testLargeRowCounts) {
            int number2 = 50;
            charStats.counts[number2] = 0x80000009L;
        }
        return charStats;
    }

    public static long getOneInXSamplingDefault() {
        return 1L;
    }

    public static long getStartLineDefault() {
        return 1L;
    }

    public static long getNumLinesDefault() {
        return 0L;
    }

    private CatalogStats processCatalog(File catalogFile, List<String> columnNames, int maxNumValues, long oneInXSampling, long startLine, long numLines) throws IOException {
        String line;
        sLogger.info((Object)("Start line for chunk: " + startLine));
        sLogger.info((Object)("Num lines in chunk: " + numLines));
        sLogger.info((Object)("Sampling every 1 in " + oneInXSampling + " lines"));
        if (columnNames == null || columnNames.isEmpty()) {
            sLogger.info((Object)"Don't have any columns to sample so bailing");
            return null;
        }
        boolean fileBiggerThanThreshold = false;
        if (catalogFile.length() > 0x140000000L) {
            fileBiggerThanThreshold = true;
            sLogger.info((Object)String.format("Catalog file length %d > %d. Can stop reading early if past chunk.", catalogFile.length(), 0x140000000L));
        }
        this.startTime = new Date().getTime();
        int jsonCol = 3;
        BufferedReader catalogRdr = CatalogFileUtils.getBufferedReader(catalogFile.getAbsolutePath());
        while ((line = catalogRdr.readLine()) != null) {
            if (line.startsWith("#")) continue;
            if (fileBiggerThanThreshold && ChunkUtils.beyondTargetChunk(this.catalogStats.getTotalDataLineCount() + 1L, startLine, numLines)) {
                this.catalogStats.setStoppedReadingAfterChunk(true);
                sLogger.info((Object)String.format("Stopped reading catalog after reading %d lines - past chunk", this.catalogStats.getTotalDataLineCount()));
                break;
            }
            this.catalogStats.incrementTotalDataLineCount();
            this.logProgressEveryXLines(this.catalogStats.getTotalDataLineCount());
            if (!StatsBuilder.shouldSample(this.catalogStats.getTotalDataLineCount(), oneInXSampling, startLine, numLines)) continue;
            this.catalogStats.incrementNumLinesSampled();
            String[] colArr = line.split("\t", -1);
            if (this.catalogStats.getTotalDataLineCount() == 1L) {
                jsonCol = this.getJsonColumn(colArr);
            }
            String json = colArr[jsonCol];
            for (String columnName : columnNames) {
                String value = this.getJsonValue(json, columnName);
                if (value == null) continue;
                this.updateColumnStats(columnName, value, maxNumValues);
            }
        }
        return this.catalogStats;
    }

    private String getJsonValue(String json, String columnName) {
        try {
            return JsonPath.compile((String)columnName, (Predicate[])new Predicate[0]).read(json).toString();
        }
        catch (InvalidPathException invalidPathException) {
            return null;
        }
    }

    private void updateColumnStats(String columnName, String value, int maxNumValues) {
        CatalogColumnStats colStats = this.catalogStats.getColumnStats(columnName);
        colStats.setColumnName(columnName);
        colStats.incrementNumEntries();
        this.updateValueSampling(value, colStats, maxNumValues);
        this.updateCharStats(value, colStats);
    }

    private void updateValueSampling(String columnValue, CatalogColumnStats colStats, int maxNumValues) {
        ValueSampling valSampling = colStats.getValueSampling(columnValue);
        if (valSampling == null) {
            if (colStats.getNumUniqueValues() < (long)maxNumValues) {
                valSampling = new ValueSampling();
                valSampling.setValue(columnValue);
                valSampling.setFrequency(1L);
                colStats.addValueSampling(valSampling);
            }
        } else {
            valSampling.incrementFrequency();
        }
    }

    private void updateCharStats(String value, CatalogColumnStats colStats) {
        int i;
        AsciiCharacterStats lineStats = colStats.getLineAsciiStats();
        AsciiCharacterStats totalStats = colStats.getTotalAsciiStats();
        boolean[] isCharFoundInLine = new boolean[lineStats.counts.length];
        for (i = 0; i < value.length(); ++i) {
            colStats.incrementNumCharacters();
            char asciiCode = value.charAt(i);
            if (asciiCode >= totalStats.counts.length) continue;
            char c = asciiCode;
            totalStats.counts[c] = totalStats.counts[c] + 1L;
            isCharFoundInLine[asciiCode] = true;
        }
        for (i = 0; i < isCharFoundInLine.length; ++i) {
            if (!isCharFoundInLine[i]) continue;
            int n = i;
            lineStats.counts[n] = lineStats.counts[n] + 1L;
        }
    }

    private void checkFileParam(File catalogFile) throws IOException {
        if (catalogFile == null) {
            throw new IOException("Null catalog file supplied to StatsBuilder.build()");
        }
        if (!catalogFile.exists()) {
            throw new IOException(String.format("File '%s' does not exist", catalogFile.getPath()));
        }
        if (!catalogFile.canRead()) {
            throw new IOException(String.format("File '%s' is not readable", catalogFile.getPath()));
        }
    }

    private void checkProcessCatalogIntParam(long param, long threshold, String paramName) {
        if (param < threshold) {
            String msg = String.format("Programming Error: supplied value %d for %s must be >= %d in StatsBuilder.build()", param, paramName, threshold);
            throw new RuntimeException(msg);
        }
    }

    private static boolean shouldSample(long lineNumber, long oneInXSampling, long startLine, long numLines) {
        if ((lineNumber - 1L) % oneInXSampling != 0L) {
            return false;
        }
        return ChunkUtils.inTargetChunk(lineNumber, startLine, numLines);
    }

    private void logProgressEveryXLines(long totalDataLinesInFile) {
        if (totalDataLinesInFile % LOG_PROGRESS_EVERY_X_LINES == 0L) {
            Date now = new Date();
            double elapsedTimeSeconds = ((double)now.getTime() - (double)this.startTime) / 1000.0;
            DecimalFormat decFormat = new DecimalFormat("#,###,###,##0");
            sLogger.info((Object)("Stats - # lines processed " + decFormat.format(totalDataLinesInFile) + "  (" + decFormat.format(elapsedTimeSeconds) + "s)"));
        }
    }

    private int getJsonColumn(String[] colArr) {
        if (colArr.length > 3 && colArr[3].startsWith("{") && colArr[3].endsWith("}")) {
            return 3;
        }
        return 0;
    }

    private List<String> getColumns(File columnsFile, boolean skipGoldenAttributes) throws IOException {
        ArrayList<String> colNames = new ArrayList<String>();
        for (String line : FileUtils.readLines((File)columnsFile)) {
            String[] colArr;
            String columnName;
            if (line.startsWith("#") || skipGoldenAttributes && line.startsWith("_") || (columnName = (colArr = line.split("\t", -1))[0]).trim().length() <= 0) continue;
            colNames.add(columnName);
        }
        return colNames;
    }

    static {
        StatsBuilder.setTestLargeRowCountsToDefault();
        StatsBuilder.setValuesToInitializeToIntMaxToDefault();
    }
}

