package edu.mayo.bior.catalog.verification;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.util.*;
import java.util.Map.Entry;

import com.google.gson.*;
import edu.mayo.bior.catalog.CatalogFiles;
import edu.mayo.bior.catalog.CatalogFormatException;
import edu.mayo.bior.catalog.CatalogTabixEntry;
import edu.mayo.bior.catalog.CatalogFileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.log4j.Logger;

import com.tinkerpop.pipes.util.Pipeline;

import edu.mayo.pipes.JSON.lookup.LookupPipe;
import htsjdk.tribble.readers.TabixReader;
import edu.mayo.pipes.history.HistoryInPipe;
import edu.mayo.pipes.history.HistoryOutPipe;
import edu.mayo.pipes.util.metadata.Metadata;

/** Verify:
 * 		- The order of the chromosomes in the catalog matches an expected set.
 *  	- The position order within chromosomes is increasing.
 *  	- Rows retrieved from tabix lookup (chrom-pos lookup) match the current row
 *  	- Rows retrieved from H2 indexes (Based on value for a key that is indexed) match the current row */ 
public class CatalogOrderVerifier
{
   private static final Logger sLogger = Logger.getLogger(CatalogOrderVerifier.class);
   public static final String UNKNOWN_CHR = "UNKNOWN";

   private Map<String, String> getBiorIndexMap()
   {
      return mIndexNamePathMap;
   }

   private String mCatalogFilePath = null;
   private List<String> mExpectChrOrder = null;
   private Map<String, String> mIndexNamePathMap = null;
   private JsonParser mJsonParser = new JsonParser();
   private Set arrayKeysReported = new HashSet();
   private long mOneInXLinesToVerify = 1;
   
   private MessageLogger logger = null;

   private static final String DBL_QUOTE = "\"";
   private static final String NL = VerifyUtils.NL;

   public CatalogOrderVerifier(List<String> expectedChrOrderList, String catalogFilePath,
		      Map<String, String> indexNameToPathMap, MessageLogger messageLogger)
		      throws VerifierInputException
   {
	   this(expectedChrOrderList, catalogFilePath, indexNameToPathMap, messageLogger, /*oneInXLinesToVerify=*/1);
   }

   public CatalogOrderVerifier(List<String> expectedChrOrderList, String catalogFilePath,
	  Map<String, String> indexNameToPathMap, MessageLogger messageLogger, long oneInXLinesToVerify)
      throws VerifierInputException
   {
      try
      {
         new CatalogFiles(catalogFilePath);
         mOneInXLinesToVerify = oneInXLinesToVerify;
      }
      catch (CatalogFormatException e)
      {
         throw new VerifierInputException(e.getMessage());
      }

      setExpectedChrOrderList(expectedChrOrderList);
      setCatalogFileName(catalogFilePath);
      setBiorIndexMap(indexNameToPathMap);
      logger = messageLogger;
   }

   public void verify()
   {
      String statusMsg = String.format("Verifying chromosomal order and indexes for '%s' starting at %s",
                                       mCatalogFilePath, VerifyUtils.composeDateTime());
      System.out.println(statusMsg);
      logger.logInfo(statusMsg);

      Set<Map.Entry<String, String>> biorIndexEntrySet = new TreeSet<Map.Entry<String, String>>();
      TreeMap<String, CatalogTabixEntry> lastRowWithThisBiorIndexValue = new TreeMap<String, CatalogTabixEntry>();
      if (getBiorIndexMap() != null)
      {
         biorIndexEntrySet = getBiorIndexMap().entrySet();
         // initialize the keys in this tracking map
         for (String key : getBiorIndexMap().keySet())
         {
            // If this is a multi level key (like i.j) then we will put out a message that we are not
            // supporting verification of keys like this currently
            if (key.contains("."))
            {
               String msg = String.format("Currently we are not verifying indexes on hierarchichal keys (key '%s')",
                                          key);
               logger.logWarning(msg);
            }
            else
            {
               lastRowWithThisBiorIndexValue.put(key, null);
            }
         }
      }

      Set<String> chrAlreadySeen = new HashSet<String>();
      List<String> chrSeenInOrder = new ArrayList<String>();

      BufferedReader catalogRdr;
      TabixReader tabixReader;
      CatalogTabixEntry previousRow = null;
      boolean seen4TabixColumns = false;
      boolean seen1TabixColumn = false;
      try
      {
         catalogRdr = CatalogFileUtils.getBufferedReader(mCatalogFilePath);
         if (catalogRdr == null)
         {
            throw new IOException(String.format("Couldn't create an object to read catalog '%s'", mCatalogFilePath));
         }
         tabixReader = null;
         try
         {
            tabixReader = new TabixReader(mCatalogFilePath);
         }
         catch (IOException io)
         {
            String msg = "Couldn't create a Tabix reader for catalog: '" + mCatalogFilePath + "'. Msg: " + io.getMessage();
            logger.logError(msg, VerifyErrorCodes.TABIX_INDEX_FILE_MISSING_OR_CORRUPT);
         }

         String line;
         long lineReadCount = 0;
         long lineVerifiedCount = 0;
         boolean keepChecking = true;
         while (((line = catalogRdr.readLine()) != null) && keepChecking)
         {
            if (line.trim().startsWith("#")) {
               continue;
            }

            lineReadCount++;
            
            if( shouldSkipRow(lineReadCount) ) {
            	continue;
            }
            
            CatalogTabixEntry currentRow = VerifyUtils.readCatalogRow(line, logger);
            if (currentRow == null)
            {
               // don't need to log a message. It's already done in VerifyCatalogUtil.readCatalogRow
               continue;
            }
            
            if (currentRow.is4Field()) {
               seen4TabixColumns = true;
            } else if (currentRow.is1Field()) {
               seen1TabixColumn = true;
            }            
            if (seen4TabixColumns && seen1TabixColumn) {
               logger.logError("Saw a mix of 4 columns and 1 columns in catalog. Stopping checking", VerifyErrorCodes.CATALOG_COLUMN_COUNT_INCONSISTENT);
               keepChecking = false;
               continue;
            }

            for (String biorIndexKey : lastRowWithThisBiorIndexValue.keySet())
            {
               // This includes the double quote and the : in the search
               if (currentRow.getJsonString().contains(composeJsonIndexStr(biorIndexKey)))
               {
                  lastRowWithThisBiorIndexValue.put(biorIndexKey, currentRow);
               }
            }

            //================================================================
            // Check expected row order compared to previous row:
            //================================================================
            if (currentRow.is4Field())
            {
               if (previousRow != null && currentRow.getChromosome().equals(previousRow.getChromosome()))
               {
                  verifySameChromosome(currentRow, previousRow, line);
               }
               else
               {
                  verifyNewChromosome(currentRow, previousRow, biorIndexEntrySet, lastRowWithThisBiorIndexValue,
                                      chrAlreadySeen, tabixReader, line);
                  chrSeenInOrder.add(currentRow.getChromosome());
               }

               lineVerifiedCount++;
               chrAlreadySeen.add(currentRow.getChromosome());
            }
            previousRow = currentRow;

         } // end while looping through catalog

         // For the last line in the catalog file, still stored in previousRow, make sure that we
         // get valid results from both tabix and bior indexes. If the index creation fails before the end, we wouldn't
         // get a result for this, and we want to report that problem.
         if (previousRow != null && previousRow.isPositional())
         {
            if (!previousRow.isUnknownPosition())
            {
               verifyLastTabixLine(previousRow, tabixReader);
            }
            verifyIndexes(null, previousRow, biorIndexEntrySet, lastRowWithThisBiorIndexValue);
         }

         logger.logInfo("Order of chromosomes: " + StringUtils.join(chrSeenInOrder, ","));

         statusMsg = String.format("Chrom and position order: Number of lines read (%d) and verified (%d)", lineReadCount, lineVerifiedCount);
         logger.logInfo(statusMsg);

         statusMsg = String.format("Completed verifying order and indexes at %s", VerifyUtils.composeDateTime());
         logger.logInfo(statusMsg);

      }
      catch (CatalogFormatException e)
      {
         String msg = String.format("Problem with Catalog format. Msg: %s", e.getMessage());
         sLogger.error(ExceptionUtils.getStackTrace(e));
         logger.logError(msg, VerifyErrorCodes.BAD_CATALOG_FORMAT_GENERAL);
      }
      catch (IOException e)
      {
         sLogger.error(ExceptionUtils.getStackTrace(e));
         logger.logError(e.getMessage(), VerifyErrorCodes.CATALOG_ORDER_EXCEPTION);
      }
   }
   
   /** Input: 1-based row #.  */
   private boolean shouldSkipRow(long rowsInCatalog) {
	   long zeroBasedRowInCatalog = (rowsInCatalog - 1);
	   return  zeroBasedRowInCatalog % mOneInXLinesToVerify != 0;
   }


   private void verifyNewChromosome(CatalogTabixEntry currentRow, CatalogTabixEntry previousRow,
                                    Set<Entry<String, String>> biorIndexEntrySet,
                                    TreeMap<String, CatalogTabixEntry> lastRowWithThisBiorIndexValue,
                                    Set<String> chrAlreadySeen, TabixReader tabixReader, String line)
      throws IOException, CatalogFormatException
   {
      //================================================================
      // Processing DIFFERENT chromosome from previous line:
      //================================================================

      // 1.) Make sure we haven't seen current chr already in another block of catalog data:
      if (chrAlreadySeen.contains(currentRow.getChromosome()))
      {
         String msg = String.format("Already saw this chr '%s' in previous block of catalog. Line: %s",
                                    currentRow.getChromosome(), line);
         logger.logError(msg, VerifyErrorCodes.CHROMOSOME_REPEATED_IN_NON_CONTIGUOUS_CHUNK);
      }

      // 2.) Check tabix retrievals for last row of previous chr and first row of new chromosome:
      if (previousRow != null && !previousRow.isUnknownPosition())
      {
         verifyLastTabixLine(previousRow, tabixReader);
      }
      if (!currentRow.isUnknownPosition())
      {
         verifyFirstTabixLine(currentRow, tabixReader);
      }

      // 3.) If the row has the BIOR index value, check each BIOR Index retrieval for the last
      //     row of prev chr, and first row of next chr:
      verifyIndexes(currentRow, previousRow, biorIndexEntrySet, lastRowWithThisBiorIndexValue);

      // 4.) Make sure the next/new chromosome is in expected order
      verifyChromosomeInExpectedOrder(currentRow, previousRow, line);
   }

   private void verifyFirstTabixLine(CatalogTabixEntry row, TabixReader tabixReader)
      throws IOException, CatalogFormatException
   {
      verifyTabixLine(row, tabixReader, true, "first");
   }

   private void verifyLastTabixLine(CatalogTabixEntry row, TabixReader tabixReader)
      throws IOException, CatalogFormatException
   {
      verifyTabixLine(row, tabixReader, false, "last");
   }

   private void verifyTabixLine(CatalogTabixEntry row, TabixReader tabixReader, boolean firstResult, String description)
      throws IOException, CatalogFormatException
   {
      if (row == null)
      {
         return;
      }
      if (tabixReader == null)
      {
         return;
      }
      if (!VerifyUtils.isTabixRetrievalSuccessful(tabixReader, row, firstResult))
      {
         String msg = String.format("Failed to retrieve %s tabix entry for chromosome %s from line '%s'",
                                    description, row.getChromosome(), row.getLine());
         logger.logError(msg, VerifyErrorCodes.TABIX_ENTRY_MISSING);
      }
   }


   private String composeRowChromosome(CatalogTabixEntry row)
   {
      String rowChromosome = "null";
      if (row != null)
      {
         rowChromosome = row.getChromosome();
      }
      return rowChromosome;
   }

   private void verifySameChromosome(CatalogTabixEntry currentRow, CatalogTabixEntry previousRow, String line)
   {
      //================================================================
      // Processing SAME chromosome as previous line
      //================================================================

      // 1.) Make sure current line's position is greater than previous row's position within the chromosome:
      if (previousRow.getMinPosition() > currentRow.getMinPosition())
      {
         String msg = String.format("Previous position [%s] within chromosome is greater than " +
                                    "current row position [%s]. Line: '%s'",
                                    previousRow.getMinPosition(), currentRow.getMinPosition(), line);
         logger.logError(msg, VerifyErrorCodes.CHROMOSOME_POSITIONS_OUT_OF_ORDER);
      }

      // TODO - only do this check if it's a variant we don't want duplicates
      try
      {
         CatalogEntryGoldenJson currentRowGolden = new CatalogEntryGoldenJson(VerifyUtils.getJsonObject(currentRow.getJsonString()));
         CatalogEntryGoldenJson previousRowGolden = new CatalogEntryGoldenJson(VerifyUtils.getJsonObject(previousRow.getJsonString()));
         if (currentRowGolden.isVariant() && currentRowGolden.equals(previousRowGolden))
         {
            String msg = String.format("Duplicate variant information at position %s:%d-%d",
               previousRowGolden.getChr(), previousRowGolden.getMinBP(), previousRowGolden.getMaxBP());
            logger.logWarning(msg);
         }
      }
      catch (CatalogFormatException e)
      {
         // TODO?
         // I'm going to swallow this problem for now since I think it's covered in the CatalogVariantVerifier
      }
   }

   private void verifyChromosomeInExpectedOrder(CatalogTabixEntry currentRow, CatalogTabixEntry previousRow, String line)
   {
      // first time through so you can return
      if (previousRow == null)
      {
         return;
      }
      if (hasExpectedChrOrderList())
      {
         int currentLnChrIdx = mExpectChrOrder.indexOf(currentRow.getChromosome());
         if (currentLnChrIdx < 0)
         {
            if (currentRow.getChromosome().equals(UNKNOWN_CHR))
            {
               logger.logInfo(String.format("Saw %s chromosome", UNKNOWN_CHR));
            }
            String msg = String.format("Chromosome '%s' is not in expected chromosome list.", currentRow.getChromosome());
            logger.logError(msg, VerifyErrorCodes.CHROMOSOME_NOT_IN_LIST);
            return;
         }
         int previousLnChrIdx = mExpectChrOrder.indexOf(previousRow.getChromosome());
         // The current chr idx should be greater than previous row's chr idx.
         if (previousLnChrIdx > currentLnChrIdx)
         {
            String msg = String.format("Catalog entry for chromosome '%s' " +
                                       "not in expected chromosome order. Previous chromosome catalog entries " +
                                       "were for '%s', but the current catalog entry is for " +
                                       "'%s'. Full Catalog Entry is: '%s'",
                                       currentRow.getChromosome(), mExpectChrOrder.get(previousLnChrIdx),
                                       mExpectChrOrder.get(currentLnChrIdx), line);
            logger.logWarning(msg);
         }
         else if (currentLnChrIdx == previousLnChrIdx)
         {
            String msg = String.format("Unexpected code-path reached: Previous " +
                                       "chr index in list '%s' equal to current chr index in list '%s'. Should NOT " +
                                       "find this condition. Line: '%s'",
                                       previousLnChrIdx, currentLnChrIdx, line);
            throw new RuntimeException(msg);
         }

      }
   }

   public boolean areErrorsLogged()
   {
      return logger.hasErrors();
   }

   private void setBiorIndexMap(Map<String, String> indexNamePathMap)
   {
      mIndexNamePathMap = indexNamePathMap;
   }

   public String getCatalogFileName()
   {
      return mCatalogFilePath;
   }

   private void setCatalogFileName(String catalogFileName)
   {
      mCatalogFilePath = catalogFileName;
   }

   private void setExpectedChrOrderList(List<String> expectedChrOrder)
   {
      mExpectChrOrder = expectedChrOrder;
   }

   private boolean hasExpectedChrOrderList()
   {
      return mExpectChrOrder != null && mExpectChrOrder.size() > 0;
   }

   public MessageLogger getLogger()
   {
      return logger;
   }


   // TODO - I think all this index checking stuff could be stored in an object to do the checking and be
   //        better self contained
   private void verifyIndexes(CatalogTabixEntry currentRow, CatalogTabixEntry previousRow,
                              Set<Entry<String, String>> biorIndexEntrySet,
                              TreeMap<String, CatalogTabixEntry> lastRowWithThisBiorIndex)
   {
      for (Entry<String, String> entry : biorIndexEntrySet)
      {
         String biorIndexName = entry.getKey();
         String biorIndexFileName = entry.getValue();
         try
         {
            verifyIndex(previousRow, biorIndexName, biorIndexFileName, lastRowWithThisBiorIndex, "last");
         }
         catch (Exception e)
         {
            String msg = String.format("Problem trying to check BIOR index: '%s' for last row of chromosome '%s'",
               biorIndexName, composeRowChromosome(previousRow));
            logger.logError(msg, VerifyErrorCodes.BIOR_KEY_INDEX_VALUE_MISSING_ON_LAST_ROW_OF_CHROM);
         }
         
         try
         {
            verifyIndex(currentRow, biorIndexName, biorIndexFileName, lastRowWithThisBiorIndex, "first");
         }
         catch (Exception e)
         {
            String msg = String.format("Problem trying to check BIOR index: '%s' for first row of chromosome '%s'",
               biorIndexName, composeRowChromosome(currentRow));
            logger.logError(msg, VerifyErrorCodes.BIOR_KEY_INDEX_VALUE_MISSING_ON_FIRST_ROW_OF_CHROM);
         }
      }
   }

   /**
    * verifyIndex():
    * The complexity with this method is that not every row in the catalog may have every BIOR index
    * key-value. We want to check each BIOR index for the last row of the previous chromosome, and
    * the first row of the new/next chromosome. If the last row of the previous chr doesn't have the
    * BIOR index, we've stored the last row that did have that BIOR index, so check that row instead.
    * As long as we check one close to the end of the previous chromosome, we are ok.
    */
   private void verifyIndex(CatalogTabixEntry row, String biorIndexName, String biorIndexFileName,
                            TreeMap<String, CatalogTabixEntry> lastRowWithThisBiorIndex, String description)
   {
      if (row == null)
      {
         return;
      }

      CatalogTabixEntry catEntryToUse;
      if (row.getJsonString().contains(composeJsonIndexStr(biorIndexName)))
      {
         catEntryToUse = row;
      }
      else
      {
         catEntryToUse = lastRowWithThisBiorIndex.get(biorIndexName);
      }
      // if catEntryToUse is null, the key hasn't been seen so don't check the index
      if (catEntryToUse == null)
      {
         return;
      }
      if (!isBiorIndexRetrievalSuccessful(new File(getCatalogFileName()), new File(biorIndexFileName),
                                          biorIndexName, catEntryToUse.getJsonString()))
      {
         String msg = String.format("Failed to retrieve BIOR Index entry for %s line of " +
                                    "chromosome %s for key '%s'. Line: '%s'",
                                    description, row.getChromosome(), biorIndexName, row.getLine());
         logger.logError(msg, VerifyErrorCodes.BIOR_KEY_INDEX_FILE_MISSING_OR_CORRUPT);
      }
   }

   private String composeJsonIndexStr(String biorIndexName)
   {
      return DBL_QUOTE + biorIndexName + DBL_QUOTE + ":";
   }

   private boolean isBiorIndexRetrievalSuccessful(File catalogFile, File biorIndexFile,
                                                  String indexedColumnName, String json)
   {

      JsonObject catalogRowJsonObj;
      try
      {
         catalogRowJsonObj = mJsonParser.parse(json).getAsJsonObject();
      }
      catch (Exception e)
      {
         String msg = String.format("Couldn't parse json '%s'. Msg: '%s'", json, e.getMessage());
         logger.logError(msg, VerifyErrorCodes.JSON_UNPARSEABLE);
         return false;
      }

      String valueForIndexedKey = getValueForIndexedKey(catalogRowJsonObj, indexedColumnName);
      // If the value is null, don't check
      if (valueForIndexedKey == null)
      {
         return true;
      }

      // Skip if the value is '.' because that doesn't get indexed
      if (valueForIndexedKey.equals("."))
      {
         return true;
      }

      List<String> matches = findMatches(catalogFile, biorIndexFile, json, valueForIndexedKey);
      if (matches.size() == 1)
      {
         return true;
      }
      else if (matches.size() == 0)
      {
         return false;
      }
      String msg = String.format("BIOR index lookup on key '%s' with value " +
                                 "'%s' found more than 1 row (%d) with matching json",
                                 indexedColumnName, valueForIndexedKey, matches.size());
      logger.logWarning(msg);
      return true;
   }

   private List<String> findMatches(File catalogFile, File biorIndexFile, String json, String valueForIndexedKey)
   {
      String catalog = catalogFile.getPath();
      String index = biorIndexFile.getPath();
      Metadata md = new Metadata(catalog, "bior_lookup");
      Pipeline p = new Pipeline(new HistoryInPipe(md), new LookupPipe(catalog, index), new HistoryOutPipe());
      p.setStarts(Collections.singletonList(valueForIndexedKey));
      List<String> matches = new ArrayList<String>();
      while (p.hasNext())
      {
         String result = (String) p.next();
         String[] elems = result.split("\t");
         // TODO This would be a bizarre result and you might want to throw some RuntimeException but we'll skip for now
         if (result.startsWith("#") || elems.length != 2)
         {
            continue;
         }
         String resultIndexedValue = elems[0];
         // TODO I think these always match and if they don't, that indicates some issue with the index retrieval?
         if (valueForIndexedKey.equals(resultIndexedValue))
         {
            String resultJson = elems[1];
            if (json.equals(resultJson))
            {
               matches.add(result);
            }
         }
      }
      return matches;
   }

   private String getValueForIndexedKey(JsonObject jsonObj, String keyOfValueToRetrieve)
   {
      for (Entry<String, JsonElement> thisEntry : jsonObj.entrySet())
      {
         String jsonKey = thisEntry.getKey();
         if (jsonKey.equals(keyOfValueToRetrieve))
         {
            JsonElement jsonElem = thisEntry.getValue();
            if (jsonElem.isJsonNull())
            {
               // we don't check on null values for a field
               return null;
            }
            else if (jsonElem.isJsonArray())
            {
               if (!arrayKeysReported.contains(keyOfValueToRetrieve))
               {
                  String msg = String.format("Currently we are not verifying indexes on arrays (key '%s')",
                                             keyOfValueToRetrieve);
                  logger.logWarning(msg);
                  arrayKeysReported.add(keyOfValueToRetrieve);
               }
               return null;
            }
            else if (jsonElem.isJsonPrimitive())
            {
               JsonPrimitive primitive = jsonElem.getAsJsonPrimitive();
               if (primitive.isString())
               {
                  return primitive.getAsString();
               }
               else
               {
                  return primitive.toString();
               }
            }
            return jsonElem.toString();
         }
      }
      return null;
   }
}
