package edu.mayo.bior.catalog.verification;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.junit.rules.TemporaryFolder;

import edu.mayo.bior.buildcatalog.BuildCatalogStepExecuteException;
import edu.mayo.bior.buildcatalog.BuildCatalogStepInputException;
import edu.mayo.bior.buildcatalog.BuildInfo;
import edu.mayo.bior.buildcatalog.BuildInfoKey;
import edu.mayo.bior.buildcatalog.BuildStepKey;
import edu.mayo.bior.buildcatalog.CreatePropFilesStep;
import edu.mayo.bior.buildcatalog.CreateStep;
import edu.mayo.bior.buildcatalog.StepLogger;
import edu.mayo.bior.catalog.CatalogDataSource;
import edu.mayo.bior.catalog.CatalogFormatException;
import edu.mayo.bior.catalog.CatalogMetadataConstant;
import edu.mayo.bior.catalog.HumanBuildAssembly;
import edu.mayo.bior.catalog.verification.CatalogVerifier.VAL_TYPE;
import edu.mayo.bior.cli.cmd.VerifyCatalogCommand;
import edu.mayo.bior.cli.func.BaseFunctionalTest;
import edu.mayo.bior.cli.func.CommandOutput;
import edu.mayo.pipes.history.ColumnMetaData;
import edu.mayo.pipes.history.ColumnMetaData.Type;
import edu.mayo.pipes.util.BiorProperties;

public class CatalogVerifierTest  extends BaseFunctionalTest
{
   @Rule
	public TemporaryFolder mTempFolder = new TemporaryFolder();

   @Rule
   public ExpectedException expectedException = ExpectedException.none();

	private File mTempDir = null;
	private final String EOL = "\n";
	
	@BeforeClass
	public static void beforeAll() {
	   BiorProperties.setFile("src/test/resources/bior.properties.test");
	}

	@Before
	public void beforeEach() {
		try {
			mTempFolder.create();
			mTempDir = mTempFolder.newFolder();
			
			// Change BiorProperties to point to that new file
			BiorProperties.setFile("src/test/resources/bior.properties.test");

		}catch(Exception e) {
			e.printStackTrace();
		}
	}

   @Test
   public void testExitStatusZero() throws Exception
   {
      String tjsonCatalogInput = "src/test/resources/createCatalog/tabToTjsonOutput.tsv";
      File outputDir = mTempFolder.newFolder();

      File catalogFile = null;
      String catalogPrefix = "verifyCatalog_TestCatalog";
      try
      {
         catalogFile = createTheCatalog(tjsonCatalogInput, outputDir.getAbsolutePath(), catalogPrefix);
      }
      catch (Exception e)
      {
         fail("We shouldn't get an exception creating this catalog: " + e.getMessage());
      }

      if (!catalogFile.exists())
      {
         fail("The catalog should exist now so that we can run verify on it.");
      }

      try
      {
         CatalogVerifier verifyCat = new CatalogVerifier();
         verifyCat.verify(catalogFile.getAbsolutePath(), VAL_TYPE.STRICT, null);
      }
      catch (Exception e)
      {
         fail("Got exception while running validator.");
      }
   }

   // In this bug, it was throwing an exception because it couldn't figure out what the entries looked like and
   // the test would fail. When the bug was fixed, it no longer threw an Exception and so all was good
   @Test
   public void testIsPositionalBug378197() throws Exception
   {
      File catalogDir = mTempFolder.newFolder();
      String catalogPrefix = "catalog378197";
      File catalogFile = createTheCatalog("src/test/resources/testData/verification/json378197",
                                          catalogDir.getAbsolutePath(), catalogPrefix);
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, null);
   }

   @Test
   public void testIsPositionalUNKNOWN() throws Exception
   {
      File catalogDir = mTempFolder.newFolder();
      String catalogPrefix = "catalogUnknown";
      File catalogFile = createTheCatalog("src/test/resources/testData/verification/json-UNKNOWN",
                                          catalogDir.getAbsolutePath(), catalogPrefix);
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, null);
   }

   @Test
   public void testCatalogWithNoFiles() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/no_extra_files_catalog/catalog.tsv.bgz");
      StringWriter stringWriter = new StringWriter();
      MessageLogger messageLogger = new MessageLogger(stringWriter);
      CatalogVerifier verifier = new CatalogVerifier();
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, messageLogger);
      //enable this if you want to see log to console
      // System.out.println(stringWriter);
      assertTrue(messageLogger.hasErrors());
      assertTrue(messageLogger.numWarnings() > 4);
      assertTrue(messageLogger.numErrors() > 5);
      assertContains(stringWriter.toString(), "No H2 indexes");
      assertContains(stringWriter.toString(), "Don't have genome assembly");
      assertContains(stringWriter.toString() ,"Cannot report usage stats");
   }

   @Test
   /** Test sampling the catalog for 1 in every 5 lines */
   public void testSampleSize() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/no_extra_files_catalog/catalog.tsv.bgz");
      StringWriter stringWriter = new StringWriter();
      MessageLogger messageLogger = new MessageLogger(stringWriter);
      CatalogVerifier verifier = new CatalogVerifier();
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, messageLogger, 5);
      
      //enable this if you want to see log to console
      System.out.println(stringWriter);
      
      assertTrue(messageLogger.hasErrors());
      assertEquals(5, messageLogger.numWarnings());
      assertEquals(7, messageLogger.numErrors());
      // There are a bunch of other warnings and errors, but just verify these:
      assertContains(stringWriter.toString() ,"INFO: Verifying 1 in 5 rows for");
      assertContains(stringWriter.toString() ,"INFO: Verified 4 of total 20 rows of catalog, finished at");
      assertFalse(stringWriter.toString().contains("INFO: Rows counted: 20.  Rows verified: 4"));
   }

   @Test
   public void testCatalogWith1And4Fields() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_1_4_fields/catalog.tsv.bgz");
      StringWriter stringWriter = new StringWriter();
      MessageLogger messageLogger = new MessageLogger(stringWriter);
      CatalogVerifier verifier = new CatalogVerifier();
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, messageLogger);
      //enable this if you want to see log to console
      //System.out.println(stringWriter);
      assertTrue(messageLogger.hasErrors());
      assertContains(stringWriter.toString(), "Saw a mix of 4 columns and 1 columns");
      assertContains(stringWriter.toString() ,"Cannot report usage stats");
   }

   @Test
   public void testCatalogWith1VariantAnd1Nonvariant() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_1_variant_1_nonvariant/catalog.tsv.bgz");
      StringWriter stringWriter = new StringWriter();
      MessageLogger messageLogger = new MessageLogger(stringWriter);
      CatalogVerifier verifier = new CatalogVerifier();
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, messageLogger);
      //enable this if you want to see log to console
      //System.out.println(stringWriter);
      assertTrue(messageLogger.hasErrors());
      assertContains(stringWriter.toString(), "_refAllele length [2] is not equal to calculated length [1]");
      assertContains(stringWriter.toString(), "Cannot report usage stats");
   }

   //
   @Test
   public void test2ColumnCatalog() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_2_fields/catalog.tsv.bgz");
      StringWriter stringWriter = new StringWriter();
      MessageLogger messageLogger = new MessageLogger(stringWriter);
      CatalogVerifier verifier = new CatalogVerifier();
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, messageLogger);
      assertTrue(messageLogger.hasErrors());
      assertContains(stringWriter.toString(), "It has 2 fields");
      assertContains(stringWriter.toString() ,"Cannot report usage stats");
   }

   @Test
   public void testVariantIssues() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_variant_issues/catalog.tsv.bgz");
      StringWriter stringWriter = new StringWriter();
      MessageLogger messageLogger = new MessageLogger(stringWriter);
      CatalogVerifier verifier = new CatalogVerifier();
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, messageLogger);
      //System.out.println(stringWriter);
      assertTrue(messageLogger.hasErrors());
      assertContains(stringWriter.toString(), "One of the _altAlleles [G] is equal to the _refAllele");
      assertContains(stringWriter.toString() ,"Cannot report usage stats");
      assertContains(stringWriter.toString() ,"_refAllele [T] is not equal to the REF value [G]");
      assertContains(stringWriter.toString(), "One of the _altAlleles [PA] contains an invalid nucleotide ");
      assertTrue(verifier.getLogger().hasErrors());
   }

   @Test
   public void testDuplicateVariants() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_duplicate_variants/catalog.tsv.bgz");
      StringWriter stringWriter = new StringWriter();
      MessageLogger messageLogger = new MessageLogger(stringWriter);
      CatalogVerifier verifier = new CatalogVerifier();
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, messageLogger);
      assertTrue(messageLogger.numWarnings() > 0);
      assertContains(stringWriter.toString(), "Duplicate variant information at position 21:26960070-26960070");
      assertContains(stringWriter.toString(), "Duplicate variant information at position X:26965151-26965151");
      assertContains(stringWriter.toString(), "Cannot report usage stats");
   }

   @Test
   public void testVariantWithNoAlt() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_noalt_variant/catalog.tsv.bgz");
      StringWriter stringWriter = new StringWriter();
      MessageLogger messageLogger = new MessageLogger(stringWriter);
      CatalogVerifier verifier = new CatalogVerifier();
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, messageLogger);
      assertContainsNone(stringWriter.toString(), Arrays.asList("not equal to the JSON ALT value [null]"));
   }

   @Test
   public void testBlacklistFile() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_bad_blacklist/catalog.tsv.bgz");
      StringWriter stringWriter = new StringWriter();
      MessageLogger messageLogger = new MessageLogger(stringWriter);
      CatalogVerifier verifier = new CatalogVerifier();
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, messageLogger);
      //enable this if you want to see log to console
      //System.out.println(stringWriter);
      assertTrue(messageLogger.hasErrors());
      assertContains(stringWriter.toString(), "number of occurrences in catalog");
      assertContains(stringWriter.toString() ,"contains columns not found in the columns.tsv file: key8,key9");
   }

   @Test
   public void testJsonColumnIssues() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_column_issues/catalog.tsv.bgz");
      StringWriter stringWriter = new StringWriter();
      MessageLogger messageLogger = new MessageLogger(stringWriter);
      CatalogVerifier verifier = new CatalogVerifier();
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, messageLogger);
      assertTrue(messageLogger.hasErrors());
      assertContains(stringWriter.toString(), "number of occurrences in catalog");
      assertContains(stringWriter.toString(), "Keys in json not found in columns.tsv: 'key3,key4'");
   }

   // TODO - make this a human catalog if you can by loading smaller chromosomes
   @Test
   public void testJsonChromosomeIssues() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_positional_issues/catalog.tsv.bgz");
      StringWriter stringWriter = new StringWriter();
      MessageLogger messageLogger = new MessageLogger(stringWriter);
      CatalogVerifier verifier = new CatalogVerifier();
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, messageLogger);
      assertTrue(messageLogger.hasErrors());
      assertContains(stringWriter.toString(), "Tabix chr [2] not same as _landmark value [1]");
      assertContains(stringWriter.toString(), "Tabix start position [3] not same as _minBP value [4]");
      assertContains(stringWriter.toString(), "Tabix stop position [3] not same as _maxBP value [4]");
      assertContains(stringWriter.toString(), "Previous position [3] within chromosome is greater than current row position [1]");
      assertContains(stringWriter.toString(), "Cannot report usage stats");
   }

   @Test
   public void testBadColumnsTsv() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_bad_columns_tsv/catalog.tsv.bgz");
      StringWriter stringWriter = new StringWriter();
      MessageLogger messageLogger = new MessageLogger(stringWriter);
      CatalogVerifier verifier = new CatalogVerifier();
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, messageLogger);
      assertTrue(messageLogger.hasErrors());
      assertContains(stringWriter.toString(), "number of occurrences in catalog");
      assertContains(stringWriter.toString(), "Column 'key7' not found in the catalog");
      assertEquals(4, messageLogger.numErrors());
      assertEquals(4, messageLogger.numErrorsForCode(VerifyErrorCodes.COLUMNS_TSV_HAS_ERRORS));
   }

   @Test
   public void testEmptyColumnsTsv() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_empty_columns_tsv/catalog.tsv.bgz");
      StringWriter stringWriter = new StringWriter();
      MessageLogger messageLogger = new MessageLogger(stringWriter);
      CatalogVerifier verifier = new CatalogVerifier();
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, messageLogger);
      assertTrue(messageLogger.hasErrors());
      assertContains(stringWriter.toString(), "No columns were read from");
   }
	
   /** Test for bug with NullPointerException when the catalog had only two entries and they were both of different chromosomes.
    *  This also caught a bug where the a MessageLog object was passed a LogLevel.WARN instead of LogLevel.WARNING, which caused a RuntimeException */
	@Test
   public void testNullPointerExceptionOnSmallCatalog() throws Exception
   {
      File catalogDir = mTempFolder.newFolder();
      String catalogPrefix = "miniCatalog2LinesDiffChroms";
      File catalogFile = createTheCatalog("src/test/resources/testData/verification/mini_catalog.json", catalogDir.getAbsolutePath(), catalogPrefix);
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      MessageLogger logger = new MessageLogger(new StringWriter());
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, logger);
      // If we made it here, we passed.
   }

   @Test
   public void testNumberOfOccurrencesCount() throws Exception
   {
      File catalogDir = mTempFolder.newFolder();
      String catalogPrefix = "miniCatalog2LinesDiffChroms";
      File catalogFile = createTheCatalog("src/test/resources/testData/verification/mini_catalog.json", catalogDir.getAbsolutePath(), catalogPrefix);
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      StringWriter stringWriter = new StringWriter();
      MessageLogger logger = new MessageLogger(stringWriter);
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, logger);
      assertContains(stringWriter.toString(), "'key': number of occurrences in catalog: 2");
      assertContains(stringWriter.toString(), "'key2': number of occurrences in catalog: 1");
   }

   @Test
   public void testUNKNOWNChromosome() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_UNKNOWN/catalog.tsv.bgz");
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      StringWriter stringWriter = new StringWriter();
      MessageLogger logger = new MessageLogger(stringWriter);
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, logger);
      assertContains(stringWriter.toString(), "Will not verify reference allele values for chromosome UNKNOWN");
      assertContains(stringWriter.toString(), "Order of chromosomes: UNKNOWN");
   }

   @Test
   public void testOmimGRCh37p13NoGenomeAssemblyWarning() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/omim_catalog_with_grch37p13/omim_genes.tsv.bgz");
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      StringWriter stringWriter = new StringWriter();
      MessageLogger logger = new MessageLogger(stringWriter);
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, logger);
      assertEquals(3, logger.numWarnings());
      assertContainsNone(stringWriter.toString(), Arrays.asList("Don't have genome assembly"));
   }

   @Test
   public void testCatalogWithFormat112() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_format_1.1.2/omim_genes.tsv.bgz");
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      StringWriter stringWriter = new StringWriter();
      MessageLogger logger = new MessageLogger(stringWriter);
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, logger);
      assertEquals(3, logger.numWarnings());
   }

   @Test
   public void testCatalogWithBogusFormat() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_bogus_format/omim_genes.tsv.bgz");
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      StringWriter stringWriter = new StringWriter();
      MessageLogger logger = new MessageLogger(stringWriter);
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, logger);
      assertEquals(2, logger.numErrors());
      assertEquals(1, logger.numErrorsForCode(VerifyErrorCodes.BAD_CATALOG_FORMAT_GENERAL));
   }

   @Test
   public void testCatalogWithFormat112MissingDataSourceReleaseDate() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_format_1.1.2_missing_release_date/omim_genes.tsv.bgz");
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      StringWriter stringWriter = new StringWriter();
      MessageLogger logger = new MessageLogger(stringWriter);
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, logger);
      assertEquals(2, logger.numErrors());
      assertEquals(1, logger.numErrorsForCode(VerifyErrorCodes.BAD_CATALOG_FORMAT_GENERAL));
      assertContains(stringWriter.toString(), "datasource.properties has issue: Required datasource property");
   }

   @Test
   public void testCatalogWithBadDataSourceReleaseDateFormat() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_bad_release_date_format/omim_genes.tsv.bgz");
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      StringWriter stringWriter = new StringWriter();
      MessageLogger logger = new MessageLogger(stringWriter);
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, logger);
      System.out.println(stringWriter);
      assertEquals(1, logger.numErrors());
      assertEquals(1, logger.numErrorsForCode(VerifyErrorCodes.DATASOURCE_PROPERTIES_BAD_RELEASE_DATE));
      assertTrue(stringWriter.toString().contains("Unparseable date"));
   }

   @Test
   public void testCatalogWithImpossibleDataSourceReleaseDate() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_impossible_release_date/omim_genes.tsv.bgz");
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      StringWriter stringWriter = new StringWriter();
      MessageLogger logger = new MessageLogger(stringWriter);
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, logger);
      System.out.println(stringWriter);
      assertEquals(1, logger.numErrors());
      assertEquals(1, logger.numErrorsForCode(VerifyErrorCodes.DATASOURCE_PROPERTIES_BAD_RELEASE_DATE));
      assertTrue(stringWriter.toString().contains("Unparseable date"));
   }

   @Test
   public void testCatalogWithEmptyDataSourceReleaseDate() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_empty_release_date/omim_genes.tsv.bgz");
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      StringWriter stringWriter = new StringWriter();
      MessageLogger logger = new MessageLogger(stringWriter);
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, logger);
      System.out.println(stringWriter);
      assertEquals(0, logger.numErrors());
   }

   @Test
   /** When "N" base-pair in catalog matches with a usual nucleotide ("A","C","G",T") in the reference assembly,
    *  or vice-versa, this should only throw a warning, and not an error as this is still a match.
    * @throws Exception
    */
   public void testRefAssembly_Ns() throws Exception
   {
	   changeBiorPropertiesForCustomRefAssembly(
			   Arrays.asList(
					   concat("1", "1", "20", "NNNNN"+"NNNNN"+"AANNN"+"NAANT")
					   )
			   );

	   final String WARNING_PREFIX = "WARNING: An 'N' was detected in one of the reference sequences, which may signify a misalignment of the ref allele to the reference assembly.  ";
	   List<String> warningMsgs = Arrays.asList(
			   WARNING_PREFIX + "(Ref='A',  refAssemblySequence='N')  1:2-2",
			   WARNING_PREFIX + "(Ref='A',  refAssemblySequence='N')  1:2-2",
			   WARNING_PREFIX + "(Ref='N',  refAssemblySequence='N')  1:10-10",
			   WARNING_PREFIX + "(Ref='N',  refAssemblySequence='A')  1:11-11",
			   WARNING_PREFIX + "(Ref='AAAA',  refAssemblySequence='AANN')  1:11-14",
			   WARNING_PREFIX + "(Ref='AAAA',  refAssemblySequence='NNAA')  1:15-18",
			   WARNING_PREFIX + "(Ref='AAAA',  refAssemblySequence='NAAN')  1:16-19"
			   );			   
	   
	   assertGoodCatalog( Arrays.asList(
			   // Match catalog 'A' vs 'N' in refAssembly
			   swapQuotes(concat("1", "2",  "2",  "{'_landmark':'1','_minBP':2,'_maxBP':2,'_refAllele':'A','_altAlleles':['C','G']}")),
			   
			   // Match catalog 'N' versus 'N' in refAssembly
			   swapQuotes(concat("1", "10", "10", "{'_landmark':'1','_minBP':10,'_maxBP':10,'_refAllele':'N','_altAlleles':['A','C']}")),
			   
			   // Match catalog 'N' vs 'A' in refAssembly
			   swapQuotes(concat("1", "11", "11", "{'_landmark':'1','_minBP':11,'_maxBP':11,'_refAllele':'N','_altAlleles':['A','C']}")) ,
			   
			   // Match catalog 'A' vs 'A' in refAssembly
			   swapQuotes(concat("1", "11", "11", "{'_landmark':'1','_minBP':11,'_maxBP':11,'_refAllele':'A','_altAlleles':['C','G']}")),

			   // Match catalog 'AAAA' vs 'AANN' in refAssembly  (to test that the all base-pairs are matched and not just the first
			   swapQuotes(concat("1", "11", "14", "{'_landmark':'1','_minBP':11,'_maxBP':14,'_refAllele':'AAAA','_altAlleles':['C','G']}")), 
			   
			   // Match catalog 'AAAA' vs 'NNAA' in refAssembly
			   swapQuotes(concat("1", "15", "18", "{'_landmark':'1','_minBP':15,'_maxBP':18,'_refAllele':'AAAA','_altAlleles':['C','G']}")),
			   
			   // Match catalog 'AAAA' vs 'NAAN' in refAssembly  (is this one possible???)
			   swapQuotes(concat("1", "16", "19", "{'_landmark':'1','_minBP':16,'_maxBP':19,'_refAllele':'AAAA','_altAlleles':['C','G']}"))
		   ), warningMsgs  );
   	}

   @Test
   /** When references don't match (and are NOT N's), then it should show errors
    * @throws Exception
    */
   public void testRefAssembly_BadRefs_ShouldThrowErrors()  throws Exception
   {
	   changeBiorPropertiesForCustomRefAssembly(
			   Arrays.asList(
					   concat("1", "1", "20", "NNNNN"+"NNNNN"+"AANNN"+"NAANT")
					   )
			   );
	   
	   String output = getVerifyCatalogOutput( Arrays.asList(
			   // Should NOT match the ref "GCATT" with the refAssembly's "NAANT" - one base-pair difference
			   swapQuotes(concat("1", "16", "20", "{'_landmark':'1','_minBP':16,'_maxBP':20,'_refAllele':'GCATT','_altAlleles':['T']}")),

			   // Should NOT match the ref "CC" with the refAssembly's "AA"
			   swapQuotes(concat("1", "17", "18", "{'_landmark':'1','_minBP':17,'_maxBP':18,'_refAllele':'CC','_altAlleles':['T']}")),

			   // Should NOT match the ref "A" with the refAssembly's "T"
			   swapQuotes(concat("1", "20", "20", "{'_landmark':'1','_minBP':20,'_maxBP':20,'_refAllele':'A','_altAlleles':['C','G']}"))
			   ) );
	   
	   assertContains(output, "WARNING: An 'N' was detected in one of the reference sequences, which may signify a misalignment of the ref allele to the reference assembly.  (Ref='GCATT',  refAssemblySequence='NAANT')  1:16-20");
	   assertContains(output, "ERROR 501: _refAllele [GCATT] does not match reference sequence value [NAANT] for:  chr=1 position=16");
	   assertContains(output, "ERROR 501: _refAllele [CC] does not match reference sequence value [AA] for:  chr=1 position=17");
	   assertContains(output, "ERROR 501: _refAllele [A] does not match reference sequence value [T] for:  chr=1 position=20");
	   assertContains(output, "ERROR 504: Chromosome 1: _refAllele values that DO NOT match the reference sequence is [3].");
	   assertContains(output, "INFO: Verify #ERROR: 4, #WARNING: 2.");  // WARNING from missing properties in datasource.properties
   }
   
   /** We want to copy the contents of the bior.properties.test file and modify the ref assembly so we can 
    *  match our own test reference alleles for a given range    
 * @throws IOException 
 * @throws InterruptedException */
   private void changeBiorPropertiesForCustomRefAssembly(List<String> refAssemblyLines) throws IOException, InterruptedException {
	   String biorPropsTest = FileUtils.readFileToString(new File("src/test/resources/bior.properties.test"));
	   File miniRefAssemblyCtg = new File(mTempDir, "miniRefAssem.tsv.bgz");

	   // Change values in the bior.properties file (for temp)
	   String key = BiorProperties.Key.humanRefSeqGrch37File.name();
	   biorPropsTest = biorPropsTest.replaceAll(key + "=.*", key + "=" + miniRefAssemblyCtg.getCanonicalPath());
	   
	   // Now save our modified bior.properties contents to temp bior.properties file
	   File modifiedBiorPropsTest = new File(mTempDir, "bior.properties.test.miniRefAssem");
	   FileUtils.writeStringToFile(modifiedBiorPropsTest, biorPropsTest);
	   
	   // Change BiorProperties to point to that new file
	   BiorProperties.setFile(modifiedBiorPropsTest.getCanonicalPath());
	   
	   // Now create the ref assembly and tabix file
	   createCatalog(refAssemblyLines, miniRefAssemblyCtg);
   }

   
   // The catalog should verify without errors, but may have some warning messages
   private void assertGoodCatalog(List<String> inputCatalogAllLines, List<String> warningMsgs) throws BuildCatalogStepInputException, IOException, VerifierExecutionException, VerifierInputException {
	   String output = getVerifyCatalogOutput(inputCatalogAllLines);
	   
	   // It should find the temp ref assembly we created (so should NOT contain this string)
	   assertContainsNone(output, Arrays.asList("WARNING: Don't have genome assembly. Will not be verifying chromosomal positions or reference allele values."));
	   assertContainsNone(output, Arrays.asList("WARNING: Unable to successfully initialize reference sequence lookup utility for chromosome"));
	   
	   // Should have zero errors
	   assertContains(output, "Verify #ERROR: 0");
	   
	   // Should NOT have warnings or errors about reference allele not matching
	   assertContainsNone(output, Arrays.asList("] does not match reference sequence value ["));
	   
	   for(String warning : warningMsgs) {
		   assertTrue("Output should have contained warning: " + warning, output.contains(warning));
	   }
   }

   private String getVerifyCatalogOutput(List<String> inputCatalogAllLines)
		   throws IOException, BuildCatalogStepInputException, VerifierExecutionException, VerifierInputException
   {
	   File inputCtgJsonFile = new File(mTempDir, "myInput.tsv");
	   FileUtils.write(inputCtgJsonFile, StringUtils.join(inputCatalogAllLines, "\n"));
	   createTheCatalog(inputCtgJsonFile.getCanonicalPath(), mTempDir.getCanonicalPath(), "myCtg");
	   
	   File catalogFile = new File(mTempDir, "myCtg.tsv.bgz");
	   setDataSourcePropertiesBuildToGrch37(catalogFile);
	   
	   assertTrue(catalogFile.exists());
	   CatalogVerifier verifier = new CatalogVerifier();
	   StringWriter stringWriter = new StringWriter();
	   MessageLogger logger = new MessageLogger(stringWriter);
	   verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, logger);
	   System.out.println("==============================================================================");
	   System.out.println(stringWriter);
	   System.out.println("==============================================================================");
	   
	   return stringWriter.toString();
   }

   private void setDataSourcePropertiesBuildToGrch37(File catalogFile) throws IOException {
	   String dataSrcPropsName = catalogFile.getName().replace(".tsv.bgz", ".datasource.properties");
	   File dataSrcProps = new File(catalogFile.getParentFile(), dataSrcPropsName);
	   FileUtils.write(dataSrcProps, FileUtils.readFileToString(dataSrcProps).replaceAll("Build=.*", "Build=GRCh37"));
   }

   @Test
   /** Indexing on the key 'a', but it is not present in the first line of the catalog */
   public void testIndexIssueWhereKeyIsMissingFromFirstLine() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_indexes/catalog.tsv.bgz");
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      StringWriter stringWriter = new StringWriter();
      MessageLogger logger = new MessageLogger(stringWriter);
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, logger);
      assertEquals(0, logger.numErrors());
   }

   @Test
   public void testDifferentTypesOfIndexes() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_indexes/catalog.tsv.bgz");
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      StringWriter stringWriter = new StringWriter();
      MessageLogger logger = new MessageLogger(stringWriter);
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, logger);
      assertEquals(0, logger.numErrors());
      assertContains(stringWriter.toString(), "Currently we are not verifying indexes on arrays (key 'a')");
      assertContains(stringWriter.toString(), "Currently we are not verifying indexes on arrays (key 'a2')");
   }

   @Test
   public void testMultiLevelIndex() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_multi_level_index/catalog.tsv.bgz");
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      StringWriter stringWriter = new StringWriter();
      MessageLogger logger = new MessageLogger(stringWriter);
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, logger);
      assertEquals(0, logger.numErrors());
      assertContains(stringWriter.toString(), "Index key 'i' not found in columns.tsv");
      assertContains(stringWriter.toString(), "Currently we are not verifying indexes on hierarchichal keys (key 'i.j')");
   }
   
   @Test
   /** The colName for index "catalog.my.vcf.x.idx.h2.db"
    * 		with catalog     "catalog.my.vcf.tsv.bgz"
    * 		with prefix      "catalog.my.vcf"
    *  should be "x", and not "my.vcf.x" */
   public void testIndexCatalogWithExtraDotsInPrefix() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_extra_dots_in_prefix/catalog.my.vcf.tsv.bgz");
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      StringWriter stringWriter = new StringWriter();
      MessageLogger logger = new MessageLogger(stringWriter);
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, logger);
      assertEquals(0, logger.numErrors());
      String log = stringWriter.toString();
      assertFalse(log.contains("Index key 'my.vcf.x' not found in columns.tsv"));
      assertFalse(log.contains("Currently we are not verifying indexes on hierarchichal keys ("));
   }

   @Test
   /** The colName for index "00-All.ID.idx.h2.db"
    * 	 	   with catalog  "00-All.vcf.tsv.bgz"
    * 	 	   with prefix   "00-All.vcf"
    *  should be "ID", and not "vcf.ID" */
   public void testIndexCatalogWithExtraDotsInPrefixDbsnpException() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_with_extra_dots_dbsnp/00-All.vcf.tsv.bgz");
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      StringWriter stringWriter = new StringWriter();
      MessageLogger logger = new MessageLogger(stringWriter);
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, logger);
      assertEquals(0, logger.numErrors());
      String log = stringWriter.toString();
      // The dbSNP catalog
      assertFalse(log.contains("Index key '"));
      assertFalse(log.contains("' not found in columns.tsv"));
      assertFalse(log.contains("Index key 'vcf.ID' not found in columns.tsv"));
      assertFalse(log.contains("Currently we are not verifying indexes on hierarchichal keys ("));
   }

   
   @Test
   public void testCatalogWithChrMMessages() throws Exception
   {
      File catalogFile = new File("src/test/resources/testData/verification/catalog_M_mismatch/CADD_MT13301_10.tsv.bgz");
      assertTrue(catalogFile.exists());
      CatalogVerifier verifier = new CatalogVerifier();
      StringWriter stringWriter = new StringWriter();
      MessageLogger logger = new MessageLogger(stringWriter);
      verifier.verify(catalogFile.getPath(), VAL_TYPE.STRICT, logger);
      assertEquals(25, logger.numErrors());
      assertEquals(24, logger.numErrorsForCode(502));
      assertEquals(1, logger.numErrorsForCode(505));
   }
   
   @Test
   public void testCatalogRowsOver2Billion_intToLong_rowVerifier() throws IOException, BuildCatalogStepInputException, VerifierExecutionException, VerifierInputException, CatalogFormatException {
	   File catalogFile = createSampleCatalog();
	   assertTrue(catalogFile.exists());

	   StringWriter stringWriter = new StringWriter();
	   MessageLogger msgLogger = new MessageLogger(stringWriter);

	   File dataSrcProps = new File(catalogFile.getParentFile(), "bigCounts.datasource.properties");
	   CatalogDataSource datasrcProps = new CatalogDataSource(dataSrcProps);
	   
	   Map<String, ColumnMetaData> keyToColMetaMap = new HashMap<String,ColumnMetaData>();
	   final String KEY = "AF";
	   keyToColMetaMap.put(KEY, new ColumnMetaData(KEY, Type.Float, ".", "Allele Frequency"));
	   
	   CatalogRowVerifier rowVerifier = new CatalogRowVerifier(catalogFile, datasrcProps, keyToColMetaMap, msgLogger, /*oneInXLinesToVerifiy=*/1L);
	   CatalogJsonVerifier jsonVerifier = new CatalogJsonVerifier(keyToColMetaMap, msgLogger);

	   // Set the values to the max integer limit  == 2,147,483,647
	   final long MAX_INT_PLUS_1000 = (long)(Integer.MAX_VALUE) + 1000L;
	   rowVerifier._test_setCatalogJsonVerifier(jsonVerifier);
	   rowVerifier._test_setChunk(MAX_INT_PLUS_1000, MAX_INT_PLUS_1000 + 100L);
	   rowVerifier._test_setRowCounts(MAX_INT_PLUS_1000,  MAX_INT_PLUS_1000);
	   jsonVerifier._test_setInitial_maxValuesForKey(KEY,  MAX_INT_PLUS_1000);
	   jsonVerifier._test_setInitial_numberOfTimesKeySeenInCatalog(KEY,  MAX_INT_PLUS_1000);
	   
	   rowVerifier.verify();
	   rowVerifier.reportColumnUsage();
	   
	   // From row verifier:
	   String out = stringWriter.toString();
	   assertContains(out, "Chunk (lines): 2147484647 - 2147484747");
	   assertContains(out, "Verified 2147484650 of total 2147484650 rows of catalog,");
	   assertContains(out, "INFO: Column 'AF': number of occurrences in catalog: 2147484650. Largest number of elements found for array column: 2147484647.");
    }
    
 
/** Create a test catalog and return the .tsv.bgz file 
 * @throws IOException 
 * @throws BuildCatalogStepInputException */
   private File createSampleCatalog() throws BuildCatalogStepInputException, IOException {
	   String inputJson = concat("1",  "100",  "100",  swapQuotes("{'_landmark':'1','_minBP':100,'_maxBP':100,'_refAllele':'A','_altAlleles':['C','G'],'AF':[0.1]}") )  + EOL
			   			+ concat("1",  "101",  "101",  swapQuotes("{'_landmark':'1','_minBP':101,'_maxBP':101,'_refAllele':'A','_altAlleles':['C','G'],'AF':[0.22]}") ) + EOL
			   			+ concat("1",  "102",  "102",  swapQuotes("{'_landmark':'1','_minBP':102,'_maxBP':102,'_refAllele':'A','_altAlleles':['C','G'],'AF':[0.3]}") )  + EOL;
	   File inputJsonFile = new File(this.mTempDir, "input.json");
	   FileUtils.write(inputJsonFile, inputJson);
	   return createTheCatalog(inputJsonFile.getCanonicalPath(), mTempDir.getCanonicalPath(), "bigCounts");
   }
   
   
   @Test
   /** NOTE: When not specifying the default verify output file, it should put it in the current working directory, NOT throw an error */
   public void testDefaultOutputVerifyFile() throws UnsupportedEncodingException, IOException, BuildCatalogStepInputException {
	   File catalogDir = mTempFolder.newFolder();
	   String catalogPrefix = "my";
	   File catalogFile = createTheCatalog("src/test/resources/testData/verification/mini_catalog.json", catalogDir.getAbsolutePath(), catalogPrefix);
	   assertTrue(catalogFile.exists());

	   CommandOutput out = runCmdApp(new VerifyCatalogCommand(), "bior_verify_catalog", "-d", catalogFile.getCanonicalPath());
	   assertEquals("", out.stderr);
	   assertFalse(out.stderr.contains("Problem creating writer for log file"));
	   
	   // Delete the _verify.txt file on exit
	   File verifyOutFile = new File(System.getProperty("user.dir"), catalogFile.getName().replace(".tsv.bgz", "_verify.txt"));
	   assertTrue(verifyOutFile.exists());
	   verifyOutFile.deleteOnExit();
   }
   
   //=======================================================================

   private File createTheCatalog(String inputTjsonForCatalog, String outDir, String catalogPrefixToUse)
      throws BuildCatalogStepInputException, IOException
   {
      Map<String, String> buildInfoMap = new HashMap<String, String>();
      File compileScript = new File("src/test/resources/buildCatalog/makeJsonStep.sh");
      buildInfoMap.put(BuildInfoKey.MAKE_JSON_SCRIPT_PATH.name(), compileScript.getCanonicalPath());
      buildInfoMap.put(BuildInfoKey.MAKE_JSON_ARGS.name(), "");
      buildInfoMap.put(BuildInfoKey.MAKE_JSON_OUTPUT_FILE_PATH.name(), inputTjsonForCatalog);  // TJSON input file
      buildInfoMap.put(BuildInfoKey.CATALOG_PREFIX.name(), catalogPrefixToUse);
      buildInfoMap.put(BuildInfoKey.TARGET_DIR.name(), outDir);
      buildInfoMap.put(BuildInfoKey.TEMP_DIR.name(), outDir);
      buildInfoMap.put(BuildInfoKey.DATA_SOURCE_BUILD.name(), HumanBuildAssembly.GRCh37.name());
      buildInfoMap.put(BuildInfoKey.DATA_SOURCE.name(), "x");
      buildInfoMap.put(BuildInfoKey.DATA_SOURCE_VERSION.name(), "2");
      BuildInfo buildInfo = new BuildInfo(buildInfoMap);

      try
      {
         CreateStep createCatalogStep = new CreateStep(buildInfo, new StepLogger(BuildStepKey.MAKE_CATALOG.toString(), new File(buildInfo.getTargetDirectory())));
         createCatalogStep.execute();

         CreatePropFilesStep createPropFiles = new CreatePropFilesStep(buildInfo, new StepLogger(catalogPrefixToUse, new File(outDir)));
         createPropFiles.execute();
      }
      catch (IOException io)
      {
         io.printStackTrace();
         fail("Unexpected io exception occurred: " + io.getMessage());
      }
      catch (BuildCatalogStepInputException inputE)
      {
         //System.err.println("input exception occurred: " + inputE);
         fail("Unexpected exception occurred: " + inputE.getMessage());
      }
      catch (BuildCatalogStepExecuteException execE)
      {
         //System.err.println("exec exception occurred: " + execE);
         fail("Unexpected exception occurred: " + execE.getMessage());
      }

      String catalogOutFilePath = outDir + "/" + buildInfo.getCatalogPrefix() + CatalogMetadataConstant.CATALOG_FILE_SUFFIX;
      String catalogTabixIndexPath = catalogOutFilePath + ".tbi";

      File catOutFile = new File(catalogOutFilePath);
      File tabixIdxFile = new File(catalogTabixIndexPath);

      assertTrue(catOutFile.exists() && catOutFile.length() > 0);
      assertTrue(tabixIdxFile.exists() && tabixIdxFile.length() > 0);
      assertTrue(tabixIdxFile.lastModified() >= catOutFile.lastModified());
      return catOutFile;
   }
}
