package edu.mayo.bior.catalog.verification;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.util.*;

import edu.mayo.bior.buildcatalog.BuildCatalog;
import edu.mayo.bior.catalog.*;

import org.apache.commons.lang.StringUtils;

import edu.mayo.pipes.history.ColumnMetaData;
import edu.mayo.pipes.history.ColumnMetaDataOperations;
import edu.mayo.pipes.history.ColumnMetaDataVerificationMessages;
import edu.mayo.pipes.util.PropertiesFileUtil;

public class CatalogVerifier
{
   private CatalogFiles mCatalogFiles = null;

   ColumnMetaDataOperations mColumnMetaOps = null;

   private CatalogDataSource mCatalogDataSource = null;

   // Attributes for catalog entry content checking:
   private HashMap<String, ColumnMetaData> mCatalogColumnsMap = null;
   private TreeMap<String, String> mBIORIndexMapColNameToFilePathMap = null;

   private HumanReferenceInfo mReferenceInfo = null;


   public enum Phase
   {
      METADATA,
      ORDER,
      JSON
   }

   public enum VAL_TYPE
   {
      LENIENT,
      STRICT
   }

   private VAL_TYPE mValType = VAL_TYPE.STRICT;

   private MessageLogger mLogger = null;

   public CatalogVerifier() throws VerifierExecutionException
   {
   }

   public int verify(String catalogFilePath, VAL_TYPE valLevel, MessageLogger logger)
      throws VerifierInputException, VerifierExecutionException
   {
      return this.verify(catalogFilePath, valLevel, logger, 1);
   }


   /**
    * @param catalogFilePath path to catalog file (e.g. catalog.tsv.bgz)
    * @param valLevel        VAL_TYPE enum
    * @param oneInXLinesToVerify
    * @return 0 if no verify errors are logged and 1 if any verify errors are logged
    * @throws VerifierInputException
    * @throws VerifierExecutionException
    */
   // TODO - should all these variables and calls be in the constructor?
   public int verify(String catalogFilePath, VAL_TYPE valLevel, MessageLogger logger, long oneInXLinesToVerify)
      throws VerifierInputException, VerifierExecutionException
   {
      return verify(catalogFilePath, valLevel, logger, oneInXLinesToVerify, getAllPhases(), 1, Long.MAX_VALUE);
   }


   public static List<Phase> getAllPhases()
   {
      return Arrays.asList(Phase.values());
   }


   /**
    * @param catalogFilePath path to catalog file (e.g. catalog.tsv.bgz)
    * @param valLevel        VAL_TYPE enum
    * @param oneInXLinesToVerify
    * @return 0 if no verify errors are logged and 1 if any verify errors are logged
    * @throws VerifierInputException
    * @throws VerifierExecutionException
    */
   // TODO - should all these variables and calls be in the constructor?
   public int verify(String catalogFilePath, VAL_TYPE valLevel, MessageLogger logger, long oneInXLinesToVerify, List<Phase> phasesToExecute, long startLine, long numLines)
      throws VerifierInputException, VerifierExecutionException
   {
      configureFiles(catalogFilePath, valLevel, logger);

      // METADATA verification phase ------------------------------------------------------------
      if (phasesToExecute.contains(Phase.METADATA))
      {
         verifyMetadataFiles();
      }

      // ORDER verification phase ------------------------------------------------------------
      CatalogOrderVerifier chromOrderVerifier = null;
      if (phasesToExecute.contains(Phase.ORDER))
      {
         chromOrderVerifier = verifyChromAndPosOrderAndTabixAndH2Indexes(oneInXLinesToVerify);
      }

      // JSON verification phase ------------------------------------------------------------
      if (phasesToExecute.contains(Phase.JSON))
      {
         verifyJsonFieldsAndRefAlleles(oneInXLinesToVerify, startLine, numLines);
      }


      logAndOutputMessages();

      return areAnyErrors(chromOrderVerifier, getLogger());
   }


   private void configureFiles(String catalogFilePath, VAL_TYPE valLevel, MessageLogger logger)
      throws VerifierInputException, VerifierExecutionException
   {
      mValType = valLevel;
      mCatalogFiles = getCatalogFiles(new File(catalogFilePath));
      mLogger = getMessageLogger(logger);

      String statusMsg = String.format("Verifying catalog '%s' starting at %s", catalogFilePath, VerifyUtils.composeDateTime());
      System.out.println(statusMsg);
      logInfo(statusMsg);

      // Columns.tsv info
      if (mCatalogFiles.getColumnsFile() != null)
      {
         try
         {
            mColumnMetaOps = new ColumnMetaDataOperations(mCatalogFiles.getColumnsFile());
            mCatalogColumnsMap = mColumnMetaOps.load();
         }
         catch (IOException e)
         {
            logError("Problem reading columns.tsv. Details: " + e.getMessage(), VerifyErrorCodes.COLUMNS_TSV_READ_ERROR);
         }
      }

      // Datasource.properties info
      HumanBuildAssembly humanRefAssembly = null;
      if (mCatalogFiles.getDataSourceFile() != null)
      {
         try
         {
            mCatalogDataSource = new CatalogDataSource(mCatalogFiles.getDataSourceFile());
            humanRefAssembly = mCatalogDataSource.getHumanBuildAssembly();
         }
         catch (CatalogFormatException e)
         {
            logError("datasource.properties has issue: " + e.getMessage(), VerifyErrorCodes.BAD_CATALOG_FORMAT_GENERAL);
         }
      }
      mReferenceInfo = new HumanReferenceInfo(humanRefAssembly, getLogger());
      mBIORIndexMapColNameToFilePathMap = verifyBIORIndexFilenames(mCatalogFiles.getCatalogFile());
   }

   private HumanBuildAssembly getHumanBuildAssemblyFromDatasourceProperties(File dataSourceFile) throws IOException
   {
      HumanBuildAssembly assembly = null;
      try
      {
         if (dataSourceFile != null)
         {
            String assemblyStr = new PropertiesFileUtil(dataSourceFile.getAbsolutePath()).get(DataSourceKey.Build.name());
            assembly = HumanBuildAssembly.assemblyFromString(assemblyStr);
         }
      }
      catch (Exception e)
      {
         assembly = null;
      }
      return assembly;
   }

   protected void verifyMetadataFiles() throws VerifierInputException
   {
      verifyCatalogCommonCharacteristics();
   }

   protected CatalogOrderVerifier verifyChromAndPosOrderAndTabixAndH2Indexes(long oneInXLinesToVerify) throws VerifierInputException
   {
      return verifyChromosomeOrder(oneInXLinesToVerify);
   }

   protected void verifyJsonFieldsAndRefAlleles(long oneInXLinesToVerify, long startLine, long numLines) throws VerifierInputException
   {
      CatalogRowVerifier rowVerifier = new CatalogRowVerifier(
         mCatalogFiles.getCatalogFile(),
         mCatalogDataSource,
         mCatalogColumnsMap,
         getLogger(),
         oneInXLinesToVerify,
         startLine,
         numLines);
      rowVerifier.verify();
      rowVerifier.reportColumnUsage();
   }


   /**
    * Return 1 if there are any order-checking or other errors.  Return 0 if no errors
    *
    * @param messageLogger
    * @param chromOrderVerifier
    */
   private int areAnyErrors(CatalogOrderVerifier chromOrderVerifier, MessageLogger messageLogger)
   {
      boolean areOrderCheckingErrors = chromOrderVerifier == null ? false : chromOrderVerifier.areErrorsLogged();
      boolean areOtherErrors = messageLogger.hasErrors();
      return areOrderCheckingErrors || areOtherErrors ? 1 : 0;
   }

   private void logAndOutputMessages()
   {
      String msg = String.format("Verify #ERROR: %d, #WARNING: %d.", mLogger.numErrors(), mLogger.numWarnings());
      mLogger.logInfo(msg);
      if (mLogger.getLogFile() != null)
      {
         msg += String.format(" See details in '%s'", mLogger.getLogFile().getPath());
      }
      System.out.println(msg);
   }

   private MessageLogger getMessageLogger(MessageLogger logger) throws VerifierExecutionException
   {
      if (logger == null)
      {
         String logfilePath = System.getProperty("user.dir") + "/" + mCatalogFiles.getPrefix() + "_verify.txt";
         try
         {
            logger = new MessageLogger(logfilePath);
         }
         catch (IOException io)
         {
            String msg = String.format("Problem creating writer for log file '%s'. Msg: %s", logfilePath, io.getMessage());
            throw new VerifierExecutionException(msg);
         }
      }
      return logger;
   }

   private CatalogFiles getCatalogFiles(File mCatalogFile) throws VerifierInputException
   {
      try
      {
         mCatalogFiles = new CatalogFiles(mCatalogFile);
      }
      catch (CatalogFormatException e)
      {
         throw new VerifierInputException(e.getMessage());
      }
      return mCatalogFiles;
   }

   private void verifyCatalogCommonCharacteristics() throws VerifierInputException
   {
      verifySupportingCatalogFilesExist(mCatalogFiles);

      File columnsFile = mCatalogFiles.getColumnsFile();
      if (columnsFile != null)
      {
         try
         {
            ColumnMetaDataVerificationMessages msgs = mColumnMetaOps.verifyContents();

            logAnyWarnings(msgs, columnsFile);
            logAnyErrors(msgs, columnsFile);
            logNoColumnsIfNecessary(mCatalogColumnsMap, columnsFile);
         }
         catch (IOException e)
         {
            logError("Exception raised reading in columns.tsv file name: " + columnsFile.getAbsolutePath(), VerifyErrorCodes.COLUMNS_TSV_READ_ERROR);
         }
      }


      if (mCatalogDataSource == null)
      {
         logError("Failed to instantiate and verify data source properties. Unable to set some catalog characteristics.", VerifyErrorCodes.DATASOURCE_PROPERTIES_READ_ERROR);
      }
      else
      {
         mCatalogDataSource.verify(mValType, mLogger);
      }


      if (mBIORIndexMapColNameToFilePathMap == null)
      {
         logInfo(String.format("No H2 indexes configured for '%s'", mCatalogFiles.getCatalogFile()));
      }

      // Note that the blacklist and blacklist.biorweb files have the same structure so
      // we can use the same method to evaluate the contents of these files
      verifyBlackList(mCatalogFiles.getBlacklistFile(), mCatalogColumnsMap);
      verifyBlackList(mCatalogFiles.getBlacklistBiorwebFile(), mCatalogColumnsMap);
   }

   private void logNoColumnsIfNecessary(HashMap<String, ColumnMetaData> mCatalogColumns2, File columnsFile)
   {

      if (mCatalogColumnsMap.keySet().size() == 0)
      {
         String msg = String.format("No columns were read from '%s'", columnsFile.getPath());
         logError(msg, VerifyErrorCodes.COLUMNS_TSV_NO_FIELDS);
      }
   }

   private void logAnyErrors(ColumnMetaDataVerificationMessages msgs, File columnsFile)
   {
      List<String> errors = msgs.getErrors();
      if (!errors.isEmpty())
      {
         String msg = String.format("Columns.tsv file '%s' has the following ERROR messages which could affect verification:",
            columnsFile.getPath());
         logInfo(msg);
      }
      for (String error : errors)
      {
         logError(error, VerifyErrorCodes.COLUMNS_TSV_HAS_ERRORS);
      }
   }

   private void logAnyWarnings(ColumnMetaDataVerificationMessages msgs, File columnsFile)
   {
      List<String> warnings = msgs.getWarnings();
      if (!warnings.isEmpty())
      {
         String msg = String.format("Columns.tsv file '%s' has the following WARNING messages:", columnsFile.getPath());
         logInfo(msg);
      }
      for (String warning : warnings)
      {
         logWarning(warning);
      }
   }

   private void verifySupportingCatalogFilesExist(CatalogFiles catalog)
   {
      // TODO - only check if the file is a 4 field catalog. Could check be made after?
      // Tabix index file is only needed when a catalog has positional info in first three columns (plus the three
      //  golden Json attributes: _landmark, _minBP, and _maxBP):
	  File tabixIndexFile = catalog.getTabixIndexFile();
      verifySupportingFileAsError(tabixIndexFile, CatalogMetadataConstant.CATALOG_TABIX_INDEX_SUFFIX, VerifyErrorCodes.TABIX_INDEX_FILE_MISSING);

      // TODO - probably want to check a bunch of additional things about this tabix file or is it being checked in
      //        CatalogFiles already? If not, it probably should be
      if ((tabixIndexFile == null) || ! (tabixIndexFile.exists())) {
         logError("Cannot check to ensure tabix file is newer than catalog file since tabix file does not exist.", VerifyErrorCodes.TABIX_INDEX_FILE_MISSING_SO_CANNOT_ENSURE_IS_NEWER_THAN_CATALOG);
      } else {
         if (mCatalogFiles.getCatalogFile().lastModified() > tabixIndexFile.lastModified()) {
            logError("Tabix index file timestamp is older than the catalog file.", VerifyErrorCodes.TABIX_INDEX_FILE_TIMESTAMP_OLDER_THAN_CATALOG);
         }
      }

      verifySupportingFileAsError(catalog.getColumnsFile(), catalog.getPrefix() + CatalogMetadataConstant.COLUMN_INFO_SUFFIX, 107);
      verifySupportingFileAsError(catalog.getDataSourceFile(), catalog.getPrefix() + CatalogMetadataConstant.DATASRC_PROPS_SUFFIX, 108);
      verifySupportingFileAsWarning(catalog.getBlacklistFile(), catalog.getPrefix() + CatalogMetadataConstant.BLACKLIST_SUFFIX);
      verifySupportingFileAsWarning(catalog.getBlacklistBiorwebFile(), catalog.getPrefix() + CatalogMetadataConstant.BLACKLIST_BIORWEB_SUFFIX);
   }

   private void verifySupportingFileAsWarning(File file, String fileDescription) {
      if( file == null ||  ! file.exists() ) {
         logWarning(fileDescription + " file does not exist");
      }
   }

   private void verifySupportingFileAsError(File file, String fileDescription, int code) {
      if( file == null  ||  ! file.exists() ) {
         logError(fileDescription + " file does not exist", code);
      }
   }

   private TreeMap<String, String> verifyBIORIndexFilenames(File catalogFile)
   {

      TreeMap<String, String> biorIndexMap = new TreeMap<String, String>();
      File indexDir = new File(mCatalogFiles.getDir() + File.separator + CatalogMetadataConstant.INDEX_DIR);
      if (!indexDir.isDirectory())
      {
         return null;  // Not all catalogs will have ID indexes. Done if no index subdir exists.
      }

      File[] fList = indexDir.listFiles();
      if (fList != null)
      {
         for (File indexFile : fList)
         {
            String colNameIndexed = getColNameFromIndex(indexFile, mCatalogFiles.getPrefix());

            // Skip the file if it doesn't have the index suffix
            if (!indexFile.getName().endsWith(CatalogMetadataConstant.INDEX_SUFFIX))
            {
               continue;
            }

            if (mCatalogColumnsMap == null)
            {
               logWarning("Don't have columns.tsv to check index key '" + colNameIndexed + "'");
            }
            else if (mCatalogColumnsMap.get(colNameIndexed) == null)
            {
               logWarning("Index key '" + colNameIndexed + "' not found in columns.tsv.");
            }

            // Make sure the index file is newer than the catalog file itself.
            // In past, BIOR index got copied over to a newer catalog directory and never was regenerated by accident.
            if (catalogFile.lastModified() > indexFile.lastModified())
            {
               String msg = String.format("Regenerate index for key '%s'. Catalog '%s' is newer than the index file '%s'",
                  colNameIndexed, catalogFile.getPath(), indexFile.getPath());
               logError(msg, VerifyErrorCodes.BIOR_KEY_INDEX_FILE_OLDER_THAN_CATALOG);
            }
            else
            {
               biorIndexMap.put(colNameIndexed, indexFile.getAbsolutePath());
            }
         }

      }

      if (biorIndexMap.size() == 0)
      {
         logWarning("BIOR Index checking: No valid H2 database file names found in index subdirectory for catalog name: " +
            mCatalogFiles.getPrefix());
      }
      return biorIndexMap;
   }

   /**
    * Get the column name from the index file (subtracting off the catalogPrefix and indexSuffix)
    * Ex:  catalog.my.vcf.idx.h2.db   (catalogPrefix:  "catalog.my.vcf";  indexSuffix: ".idx.h2.db")
    * Special case for backwards compatibility: dbSNP catalog: "00-All.vcf.tsv.bgz"
    * Ex:  00-All.ID.idx.h2.db        (catalogPrefix:  "00-All.vcf";      indexSuffix: ".idx.h2.db")
    *
    * @param indexFile
    * @param catalogPrefix
    * @return
    */
   private String getColNameFromIndex(File indexFile, String catalogPrefix)
   {
      String indexName = indexFile.getName();
      String suffix = CatalogMetadataConstant.INDEX_SUFFIX;
      String catalogPrefixWithoutVcf = catalogPrefix.endsWith(".vcf") ? catalogPrefix.substring(0, catalogPrefix.length() - 4) : catalogPrefix;

      String colName = indexName;
      if (colName.endsWith(suffix))
      {
         colName = colName.substring(0, colName.length() - suffix.length());
      }

      if (colName.startsWith(catalogPrefix + "."))
      {
         colName = colName.substring(catalogPrefix.length() + 1);
      }
      else if (colName.startsWith(catalogPrefixWithoutVcf + "."))
      {
         colName = colName.substring(catalogPrefixWithoutVcf.length() + 1);
      }

      return colName;
   }

   private void verifyBlackList(File blacklistFile, HashMap<String, ColumnMetaData> catalogColumnInfo)
   {
      if (blacklistFile == null)
      {
         return;
      }

      if (catalogColumnInfo == null || catalogColumnInfo.keySet().size() == 0)
      {
         String msg = String.format("Cannot verify content of '%s' as we have no columns.tsv info", blacklistFile.getPath());
         logWarning(msg);
         return;
      }
      BufferedReader rdr = CatalogFileUtils.getBufferedReader(blacklistFile.getAbsolutePath());
      if (rdr == null)
      {
         String msg = String.format("Problem getting object to read blacklist file '%s'", blacklistFile.getPath());
         logError(msg, VerifyErrorCodes.COLUMNS_TSV_BLACKLIST_FILE_MISSING_OR_UNREADABLE);
         return;
      }

      try
      {
         String line;
         Set<String> unknownBlacklistKeys = new HashSet<String>();
         while ((line = rdr.readLine()) != null)
         {
            String key = line.trim();
            if (key.startsWith("#"))
            {
               continue;
            }
            if (catalogColumnInfo.get(key) == null)
            {
               unknownBlacklistKeys.add(line.trim());
            }
         }
         if (!unknownBlacklistKeys.isEmpty())
         {
            String description = "columns";
            if (unknownBlacklistKeys.size() == 1)
            {
               description = "a column";
            }
            List<String> sortedBlacklistKeys = new ArrayList<String>(unknownBlacklistKeys);
            Collections.sort(sortedBlacklistKeys);
            String msg = String.format("'%s' contains %s not found in the columns.tsv file: %s",
               blacklistFile.getPath(), description, StringUtils.join(sortedBlacklistKeys, ","));
            logWarning(msg);
         }
      }
      catch (IOException io)
      {
         String msg = String.format("Issue reading blacklist file '%s'. Msg: %s",
            blacklistFile.getPath(), io.getMessage());
         logError(msg, VerifyErrorCodes.COLUMNS_TSV_BLACKLIST_FILE_READ_ERROR);
      }
   }

   protected MessageLogger getLogger()
   {
      return mLogger;
   }

   protected void setLogger(MessageLogger l)
   {
      mLogger = l;
   }

   protected void logInfo(String msg)
   {
      mLogger.logInfo(msg);
   }

   protected void logWarning(String msg)
   {
      mLogger.logWarning(msg);
   }

   protected void logError(String msg, int code)
   {
      mLogger.logError(msg, code);
   }

   /**
    * Verify order of chromosomes
    */
   private CatalogOrderVerifierThread startCatalogOrderCheckingThread() throws VerifierInputException
   {
      // TODO - need to make a different logger here and then join it back into the main logger if we really want this
      //        to work as intended
      CatalogOrderVerifier validator =
         new CatalogOrderVerifier(mReferenceInfo.getExpectedChrOrderList(), mCatalogFiles.getCatalogFile().getAbsolutePath(),
            mBIORIndexMapColNameToFilePathMap, mLogger);
      CatalogOrderVerifierThread thread = new CatalogOrderVerifierThread(validator);
      thread.start();
      return thread;
   }

   /**
    * Verify order of chromosomes.  This is the single-threaded equivalent of startCatalogOrderCheckingThread()
    *
    * @param oneInXLinesToVerify
    * @throws VerifierInputException
    */
   private CatalogOrderVerifier verifyChromosomeOrder(long oneInXLinesToVerify) throws VerifierInputException
   {
      CatalogOrderVerifier orderVerifier = new CatalogOrderVerifier(
         mReferenceInfo.getExpectedChrOrderList(),
         mCatalogFiles.getCatalogFile().getAbsolutePath(),
         mBIORIndexMapColNameToFilePathMap,
         mLogger,
         oneInXLinesToVerify);
      orderVerifier.verify();
      return orderVerifier;
   }


   public static void main(String[] args) throws Exception
   {
      File file = new File(args[0]);
      String catalogFilePath = file.getPath();

      CatalogVerifier catalogValidator = new CatalogVerifier();
      try
      {
         catalogValidator.verify(catalogFilePath, VAL_TYPE.STRICT, null, 1);
      }
      catch (Exception e)
      {
         System.err.println("Exception raised in bior_verify_catalog: " + e.getMessage());
         e.printStackTrace();
         System.exit(1);
      }
   }
}
