package edu.mayo.bior.catalog.latest;

import edu.mayo.bior.catalog.CatalogDataSource;
import edu.mayo.bior.catalog.CatalogFiles;
import edu.mayo.bior.catalog.CatalogFormatException;
import org.apache.log4j.Logger;

import java.io.File;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.*;

import static edu.mayo.bior.catalog.CatalogFileUtils.findAllTsvBgzFiles;

/**
 * Finds the latest version of a catalog that is useful when you're use-case is to always annotate with the
 * most current annotation available.
 */
public class LatestCatalogFinder {

    private static final Logger LOGGER = Logger.getLogger(LatestCatalogFinder.class);

    /**
     * Compares {@link CatalogDataSource} objects by {@link CatalogDataSource#getDataSourceReleaseDateAsDateObject()}.
     * Sort order is newest to oldest.  If 2 dates are identical, the source field sorted alphabetically is the tiebreaker.
     */
    private Comparator<CatalogDataSource> dataSourceReleaseDateComparator = new Comparator<CatalogDataSource>() {
        @Override
        public int compare(CatalogDataSource dataSrc1, CatalogDataSource dataSrc2) {
            try {
                Date date1 = dataSrc1.getDataSourceReleaseDateAsDateObject();
                Date date2 = dataSrc2.getDataSourceReleaseDateAsDateObject();

                int compareResult = date2.compareTo(date1);
                if (compareResult == 0) {
                    return dataSrc1.getSource().compareTo(dataSrc2.getSource());
                } else {
                    return compareResult;
                }
            } catch (CatalogFormatException e) {
                throw new RuntimeException(e);
            }
        }
    };

    /**
     * Finds the latest version of a catalog based on the following criteria:
     * <ul>
     *     <li>Active catalogs are searched while deprecated catalogs are ignored.</li>
     *     <li>Catalog's datasource.properties source must match the given value (case-insensitive)</li>
     *     <li>Catalog's datasource.properties build must start with the given value (case-insensitive)</li>
     *     <li>Catalogs with a missing or blank datasource.properties dataSourceReleaseDate value are ignored</li>
     *     <li>Catalog with the most recent dataSourceReleaseDate is considered the latest version</li>
     * </ul>
     * @param rootDir Root directory to begin searching for catalogs
     * @param source value that must match (case-insensitive) to a catalog's datasource.properties source value
     * @param build  value that must start with (case-insensitive) to a catalog's datasource.properties build value
     * @return Object representing the latest catalog version
     * @throws CatalogFormatException thrown when a catalog doesn't follow the formal spec
     * @throws LatestCatalogNotFoundException thrown when 1.) no catalogs are found under the rootDir, 2.) no catalogs
     *                                        hits match build and/or source, 3.) more than 1 hit matches build and source
     */
    LatestCatalog findLatestCatalog(File rootDir, String source, String build)
            throws CatalogFormatException, LatestCatalogNotFoundException {
        return findLatestCatalog(rootDir, source, build, null);
    }

    /**
     * Finds the latest version of a catalog based on the following criteria:
     * <ul>
     *     <li>Active catalogs are searched while deprecated catalogs are ignored.</li>
     *     <li>Catalog's datasource.properties source must match the given value (case-insensitive)</li>
     *     <li>Catalog's datasource.properties build must start with the given value (case-insensitive)</li>
     *     <li>Catalogs with a missing or blank datasource.properties dataSourceReleaseDate value are ignored</li>
     *     <li>Catalog with the most recent dataSourceReleaseDate is considered the latest version</li>
     * </ul>
     * @param rootDir Root directory to begin searching for catalogs
     * @param source value that must match (case-insensitive) to a catalog's datasource.properties source value
     * @param build  value that must start with (case-insensitive) to a catalog's datasource.properties build value
     * @param contains optional (may be NULL) text that must be contained within the absolute path of the latest .tsv.bgz file.
     *                 This can be useful if there are multiple catalogs built from the same release but with different
     *                 absolute paths (e.g. omim has this behavior).
     * @return Object representing the latest catalog version
     * @throws CatalogFormatException thrown when a catalog doesn't follow the formal spec
     * @throws LatestCatalogNotFoundException thrown when 1.) no catalogs are found under the rootDir, 2.) no catalogs
     *                                        hits match build and/or source, 3.) more than 1 hit matches build and source
     */
    public LatestCatalog findLatestCatalog(File rootDir, String source, String build, String contains)
            throws CatalogFormatException, LatestCatalogNotFoundException {

        List<File> allTsvBgzFiles = findAllTsvBgzFiles(rootDir, false);

        // check if rootDir has 0 catalogs
        if (allTsvBgzFiles.size() == 0) {
            throw new LatestCatalogNotFoundException(String.format("0 catalogs found under the root directory %s", rootDir.getAbsolutePath()));
        } else {
            LOGGER.info(String.format("%s catalogs found under the root directory %s", allTsvBgzFiles.size(), rootDir.getAbsolutePath()));
        }

        List<File> filteredTsvBgzFiles = filter(allTsvBgzFiles, source, build, contains);

        return findLatest(filteredTsvBgzFiles);
    }

    /**
     * Filter the given catalogs by passing the following filters:
     * <ul>
     *     <li>Is the catalog not deprecated?</li>
     *     <li>Does the catalog's datasource.properties source value match (case-insensitive)?</li>
     *     <li>Does the catalog's datasource.properties build value start with the given value (case-insensitive)?</li>
     * </ul>
     * @param tsvBgzFiles Catalog .tsv.bgz files to filter
     * @param source value to search against each catalog's datasource.properties
     * @param build value to search against each catalog's datasource.properties
     * @param contains optional (may be NULL) text that must be contained within the absolute path of the latest .tsv.bgz file.
     * @return List of filtered Catalog .tsv.bgz files
     * @throws CatalogFormatException thrown when a catalog doesn't follow the formal spec
     * @throws LatestCatalogNotFoundException thrown when no catalogs hits match build and/or source
     */
    protected List<File> filter(List<File> tsvBgzFiles, String source, String build, String contains)
            throws CatalogFormatException, LatestCatalogNotFoundException {

        Set<String> sourceValues = new HashSet<String>();
        Set<String> buildValues  = new HashSet<String>();

        int sourceMatchCount = 0;
        int buildMatchCount  = 0;

        List<File> filtered = new ArrayList<File>();
        for (File tsvBgzFile: tsvBgzFiles) {
            CatalogFiles catalogFiles = new CatalogFiles(tsvBgzFile);
            if (catalogFiles.getDataSourceFile() == null) {
                LOGGER.warn(String.format("%s does not have a corresponding datasource.properties", tsvBgzFile.getAbsolutePath()));
                continue;
            }
            CatalogDataSource dataSrc = new CatalogDataSource(catalogFiles.getDataSourceFile());

            if (!catalogFiles.isDeprecated()) {
                sourceValues.add(dataSrc.getSource());
                buildValues.add(dataSrc.getBuild());

                boolean sourceMatch   = dataSrc.getSource().equalsIgnoreCase(source);
                boolean buildMatch    = dataSrc.getBuild().toUpperCase().startsWith(build.toUpperCase());
                boolean containsMatch = contains == null || tsvBgzFile.getAbsolutePath().contains(contains);

                if (sourceMatch) sourceMatchCount++;
                if (buildMatch)  buildMatchCount++;

                if (sourceMatch && buildMatch && containsMatch) {
                    filtered.add(tsvBgzFile);
                }
            }
        }

        if (filtered.size() == 0) {
            StringWriter sWtr = new StringWriter();
            PrintWriter  pWtr = new PrintWriter(sWtr);
            if (sourceMatchCount == 0) {
                pWtr.println(String.format("Found 0 catalogs with source=%s.  Try one of the following available values:", source));
                List<String> sortedValues = new ArrayList<String>(sourceValues);
                Collections.sort(sortedValues);
                for (String value: sortedValues) {
                    pWtr.println(value);
                }
            }
            else if (buildMatchCount == 0) {
                pWtr.println(String.format("Found 0 catalogs with build=%s.  Try one of the following available values:", build));
                List<String> sortedValues = new ArrayList<String>(buildValues);
                Collections.sort(sortedValues);
                for (String value: sortedValues) {
                    pWtr.println(value);
                }
            }
            throw new LatestCatalogNotFoundException(sWtr.toString());
        }

        return filtered;
    }

    /**
     * Finds the latest catalog version based on datasource.properties dataSourceReleaseDate value
     * @param tsvBgzFiles Catalog .tsv.bgz that passed build/source filtering
     * @return {@link LatestCatalog} that represents the latest catalog version
     * @throws CatalogFormatException thrown when 1.) no catalogs have a dataSourceReleaseDate or 2.) invalid dataSourceReleaseDate
     * @throws LatestCatalogNotFoundException thrown when 1.) , 2.)
     */
    LatestCatalog findLatest(List<File> tsvBgzFiles) throws CatalogFormatException, LatestCatalogNotFoundException {

        List<File> missing = new ArrayList<File>();
        Map<CatalogDataSource, File> dataSrcToTsvBgz = new HashMap<CatalogDataSource, File>();
        for (File tsvBgzFile: tsvBgzFiles) {
            CatalogFiles catalogFiles = new CatalogFiles(tsvBgzFile);
            CatalogDataSource dataSrc = new CatalogDataSource(catalogFiles.getDataSourceFile());

            if (dataSrc.getDataSourceReleaseDate() == null || dataSrc.getDataSourceReleaseDate().trim().length() == 0) {
                missing.add(catalogFiles.getDataSourceFile());
            } else {
                dataSrcToTsvBgz.put(dataSrc, tsvBgzFile);
            }
        }

        if (missing.size() > 0) {
            // check error condition - all catalogs don't have a dataSourceReleaseDate
            if (missing.size() == tsvBgzFiles.size()) {
                StringWriter sWtr = new StringWriter();
                PrintWriter  pWtr = new PrintWriter(sWtr);
                pWtr.println("Unable to find the latest catalog because the following catalog datasource.properties are missing dataSourceReleaseDate:");
                for (File dataSrcProps: missing) {
                    pWtr.println(dataSrcProps.getAbsolutePath());
                }
                throw new CatalogFormatException(sWtr.toString());
            }

            // always log warnings for catalogs without a dataSourceReleaseDate
            StringWriter sWtr = new StringWriter();
            PrintWriter  pWtr = new PrintWriter(sWtr);
            pWtr.println(String.format("%s catalog datasource.properties are missing dataSourceReleaseDate:", missing.size()));
            for (File dataSrcProps: missing) {
                pWtr.println(dataSrcProps.getAbsolutePath());
            }
            LOGGER.warn(sWtr.toString());
        }

        // sort by DataSourceReleaseDate
        List<CatalogDataSource> sortedCatalogDataSources = new ArrayList<CatalogDataSource>(dataSrcToTsvBgz.keySet());
        // make sure dataSourceReleaseDate values are parsable
        for (CatalogDataSource cds: sortedCatalogDataSources) { cds.getDataSourceReleaseDateAsDateObject(); }
        Collections.sort(sortedCatalogDataSources, dataSourceReleaseDateComparator);

        CatalogDataSource latestDataSrc = sortedCatalogDataSources.get(0);

        // check error condition - no other catalog has the same dataSourceReleaseDate
        if (sortedCatalogDataSources.size() > 1) {
            for (int idx=1; idx < sortedCatalogDataSources.size(); idx++) {
                CatalogDataSource current = sortedCatalogDataSources.get(idx);
                if (latestDataSrc.getDataSourceReleaseDate().equals(current.getDataSourceReleaseDate())) {
                    StringWriter sWtr = new StringWriter();
                    PrintWriter  pWtr = new PrintWriter(sWtr);
                    pWtr.println(String.format("Unable to find the latest catalog because multiple catalog datasource.properties had dataSourceReleaseDate=%s",
                            latestDataSrc.getDataSourceReleaseDate()));
                    pWtr.println(latestDataSrc.getFile().getAbsolutePath());
                    pWtr.println(current.getFile().getAbsolutePath());
                    throw new LatestCatalogNotFoundException(sWtr.toString());
                }
            }
        }

        LatestCatalog latest = new LatestCatalog();
        latest.setShortUniqueName(latestDataSrc.getShortUniqueName());
        latest.setDescription(latestDataSrc.getDescription());
        latest.setBuild(latestDataSrc.getBuild());
        latest.setSource(latestDataSrc.getSource());
        latest.setVersion(latestDataSrc.getVersion());
        latest.setDataset(latestDataSrc.getDataset());
        latest.setDataSourceReleaseDate(latestDataSrc.getDataSourceReleaseDate());
        latest.setCatalogPath(dataSrcToTsvBgz.get(latestDataSrc).getAbsolutePath());
        return latest;
    }
}
