package edu.mayo.bior.cli.cmd;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.junit.Assert.assertFalse;

import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.io.FileUtils;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

import com.google.common.io.Files;

import edu.mayo.bior.catalog.verification.CatalogVerifier;
import edu.mayo.bior.cli.func.BaseFunctionalTest;
import edu.mayo.bior.cli.func.CommandOutput;
import edu.mayo.pipes.util.BiorProperties;


public class VerifyCommandTest extends BaseFunctionalTest {

	@Rule
	public TemporaryFolder mTempFolder = new TemporaryFolder();
	
	private File  mTempDir;
	private final String EOL = "\n";
	//private File  catalogBgzipFile;
	//private final String CATALOG_PREFIX = "test";

	
	@Before
	public void beforeEach() throws IOException, InterruptedException {
		mTempFolder.create();
		mTempDir = mTempFolder.newFolder();
		//createCatalogWithMetadataFiles();
		
    	BiorProperties.setFile("src/test/resources/bior.properties.test");
	}

	//=============================================================================================================
	/** Test the PHASE flag 
	 * @throws InterruptedException */
    @Test
    public void testSinglePhase_Metadata() throws IOException, InterruptedException {

    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File(mTempDir, "test.tsv.bgz");
    	createCatalogWithMetadataFiles(ctgBgz);
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify", 
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_PHASE,         CatalogVerifier.Phase.METADATA.name()
    					}
    			);
    	
    	String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);
    	
    	//printOutputsForDebugging(output, verifyTxtOut);

    	assertEquals(0, output.exit);
    	assertEquals("", output.stderr);
    	assertContains(output.stdout, "#ERROR: 0, #WARNING: 2.");
    	
    	assertContainsAll(verifyTxtOut,  getMetadataOutput());
    	assertContainsNone(verifyTxtOut, getOrderOutput());
    	assertContainsNone(verifyTxtOut, getJsonOutput_allLines());
    }

    


	//=============================================================================================================
	/** Test the PHASE flag 
	 * @throws InterruptedException */
    @Test
    public void testSinglePhase_Order() throws IOException, InterruptedException {

    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File(mTempDir, "test.tsv.bgz");
    	createCatalogWithMetadataFiles(ctgBgz);
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify", 
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_PHASE,         CatalogVerifier.Phase.ORDER.name()
    					}
    			);
    	
    	String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);
    	
    	printOutputsForDebugging(output, verifyTxtOut);

    	assertEquals(output.stderr, "");
    	assertContains(output.stdout, "#ERROR: 1, #WARNING: 1.");

    	assertContainsNone(verifyTxtOut, getMetadataOutput());
    	assertContainsAll(verifyTxtOut,  getOrderOutput());
    	assertContainsNone(verifyTxtOut, getJsonOutput_allLines());
    }

	//=============================================================================================================
	/** Test the PHASE flag 
	 * @throws InterruptedException */
    @Test
    public void testSinglePhase_Json() throws IOException, InterruptedException {

    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File(mTempDir, "test.tsv.bgz");
    	createCatalogWithMetadataFiles(ctgBgz);
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify", 
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_PHASE,         CatalogVerifier.Phase.JSON.name()
    					}
    			);
    	
    	String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);
    	
    	printOutputsForDebugging(output, verifyTxtOut);

    	assertEquals(output.stderr, "");
    	assertContains(output.stdout, "Verify #ERROR: 12, #WARNING: 2.");

    	// NOTE: Contains NONE
    	assertContainsNone(verifyTxtOut, getMetadataOutput());
    	assertContainsNone(verifyTxtOut, getOrderOutput());
    	// NOTE: Contains ALL
    	assertContainsAll(verifyTxtOut,  getJsonOutput_allLines());
    }

	//=============================================================================================================
	/** Test the PHASE flag - 3 as CSV (The METADATA, ORDER, and JSON phases are all supplied as a comma-delimited string)
	 * @throws InterruptedException */
    @Test
    public void testAllPhases_asCsv() throws IOException, InterruptedException {

    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File(mTempDir, "test.tsv.bgz");
    	createCatalogWithMetadataFiles(ctgBgz);
    	String all3PhasesAsCsv = CatalogVerifier.Phase.METADATA.name() + "," + CatalogVerifier.Phase.ORDER.name() + "," + CatalogVerifier.Phase.JSON.name();
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify", 
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_PHASE,         all3PhasesAsCsv
    					}
    			);
    	
    	String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);
    	
    	printOutputsForDebugging(output, verifyTxtOut);

    	assertEquals(output.stderr, "");
    	assertContains(output.stdout, "Verify #ERROR: 13, #WARNING: 5.");

    	assertContainsAll(verifyTxtOut, getMetadataOutput());
    	assertContainsAll(verifyTxtOut, getOrderOutput());
    	assertContainsAll(verifyTxtOut, getJsonOutput_allLines());
    }
    
	//=============================================================================================================
	/** Test the PHASE flag - all phases (default without flag) 
	 * @throws InterruptedException */
    @Test
    public void testAllPhases_noFlag() throws IOException, InterruptedException {

    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File(mTempDir, "test.tsv.bgz");
    	createCatalogWithMetadataFiles(ctgBgz);
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify", 
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath()
    					}
    			);
    	
    	String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);
    	
    	printOutputsForDebugging(output, verifyTxtOut);

    	assertEquals(output.stderr, "");
    	assertContains(output.stdout, "Verify #ERROR: 13, #WARNING: 5.");

    	assertContainsAll(verifyTxtOut, getMetadataOutput());
    	assertContainsAll(verifyTxtOut, getOrderOutput());
    	assertContainsAll(verifyTxtOut, getJsonOutput_allLines());
    }

	//=============================================================================================================
	/** Test warning when the blacklist and blacklist.biorweb files are missing
	 * @throws InterruptedException */
    @Test
    public void testWarningOnMissingBlacklistFiles() throws IOException, InterruptedException {
    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File(mTempDir, "test.tsv.bgz");
    	createCatalogWithMetadataFiles_good(ctgBgz);
    	deleteBlacklistFiles(ctgBgz);
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify", 
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath()
    					}
    			);
    	
    	String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);
    	
    	printOutputsForDebugging(output, verifyTxtOut);

    	assertEquals(output.stderr, "");
    	assertContains(output.stdout, "Verify #ERROR: 0, #WARNING: 2.");

    	assertContainsAll(verifyTxtOut, Arrays.asList(
    			"WARNING: test.columns.tsv.blacklist file does not exist",
    			"WARNING: test.columns.tsv.blacklist.biorweb file does not exist"
    			) );

    }

	private void deleteBlacklistFiles(File ctgBgz) {
		String prefix = ctgBgz.getName().replace(".tsv.bgz", "");
		File blacklist = new File(ctgBgz.getParentFile(), prefix + ".columns.tsv.blacklist");
		File blacklistBiorweb = new File(ctgBgz.getParentFile(), prefix + ".columns.tsv.blacklist.biorweb");
		blacklist.delete();
		blacklistBiorweb.delete();
	}

	//=============================================================================================================
	/** Test bad PHASE flag 
	 * @throws InterruptedException */
    @Test
    public void testBadPhaseFlag() throws IOException, InterruptedException {
    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File(mTempDir, "test.tsv.bgz");
    	createCatalogWithMetadataFiles(ctgBgz);
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify", 
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_PHASE,         "BAD_PHASE"
    					}
    			);
    	
    	String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);
    	
    	//printOutputsForDebugging(output, verifyTxtOut);

    	assertContains(output.stderr, "Application error bior_verify:\n: No valid arguments were supplied for the -phase flag: BAD_PHASE");
    	assertEquals(output.stdout, "");

    	assertEquals(verifyTxtOut, "");
    }
	//=============================================================================================================
	/** Test the Start/NumLines flags - first line to end (no numLines flag) 
	 * @throws InterruptedException */
    @Test
    public void testStartNumLines_startLine1_noNumLines() throws IOException, InterruptedException {

    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File(mTempDir, "test.tsv.bgz");
    	createCatalogWithMetadataFiles(ctgBgz);
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify", 
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_START_LINE,    "1"
    					}
    			);
    	
    	String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);
    	
    	printOutputsForDebugging(output, verifyTxtOut);

    	assertEquals(output.stderr, "");
    	assertContains(output.stdout, "Verify #ERROR: 13, #WARNING: 5.");

    	assertContainsAll(verifyTxtOut, getMetadataOutput());
    	assertContainsAll(verifyTxtOut, getOrderOutput());
    	assertContainsAll(verifyTxtOut,  getJsonOutput_allLines());
    }
    
	private List<String> getJsonOutput_lines1to3() {
		return Arrays.asList(
			"WARNING: Description not found for these columns: [A]",
			"WARNING: HumanReadableName is set to the same value as the ColumnName for these columns: [A, B]",
			"WARNING: Catalog entry for chromosome '5' not in expected chromosome order. Previous chromosome catalog entries were for '17', but the current catalog entry is for '5'. Full Catalog Entry is: '5	11640	11640	" + "{'A':5,'_landmark':'5','_minBP':11640,'_maxBP':11640,'_refAllele':'A','_altAlleles':['C']}".replace("'", "\"") + "'",
			"ERROR 302: Chromosome '23' is not in expected chromosome list.",
			"INFO: Order of chromosomes: 1,17,5,23,X",
			"INFO: Found the following issues parsing this json '{\"A\":\"17\",\"_landmark\":\"17\",\"_minBP\":305,\"_maxBP\":305,\"_refAllele\":\"C\",\"_altAlleles\":[\"C\"]}'",
			"ERROR 406: Data type does not match between json [String] and columns.tsv [Integer] for key: A",
			"ERROR 210: Tabix start position [309] not same as _minBP value [305].  Row: 17	309	309	{'A':'17','_landmark':'17','_minBP':305,'_maxBP':305,'_refAllele':'C','_altAlleles':['C']...".replace("'", "\""),
			"ERROR 211: Tabix stop position [309] not same as _maxBP value [305].  Row: 17	309	309	{'A':'17','_landmark':'17','_minBP':305,'_maxBP':305,'_refAllele':'C','_altAlleles':['C']...".replace("'", "\""),
			"ERROR 605: One of the _altAlleles [C] is equal to the _refAllele. Json: {'A':'17','_landmark':'17','_minBP':305,'_maxBP':305,'_refAllele':'C','_altAlleles':['C']}".replace("'", "\""),
			"ERROR 501: _refAllele [A] does not match reference sequence value [T] for:  chr=5 position=11640",
			"ERROR 504: Chromosome 5: _refAllele values that DO NOT match the reference sequence is [1].",
			"ERROR 306: Chromosome value not found in chromosome size map: 23 Cannot check position [13327] for chromosome value.",
			"ERROR 215: Tabix start position not in valid chromosome size range.  Chr: 23 Chr Max Position: null Tabix entry min position: 13327",
			"ERROR 306: Chromosome value not found in chromosome size map: 23 Cannot check position [13327] for chromosome value.",
			"ERROR 216: Tabix stop position not in valid chromosome size range.  Chr: 23 Chr Max Position: null Tabix entry min position: 13327",
			"ERROR 209: Tabix chr [23] not same as _landmark value [X]",
			"WARNING: An 'N' was detected in one of the reference sequences, which may signify a misalignment of the ref allele to the reference assembly.  (Ref='A',  refAssemblySequence='N')  X:13327-13327",
			"WARNING: Column 'B' not found in the catalog. Consider removing it from the columns.tsv file.",
			"WARNING: Column 'C.Nested' not found in the catalog. Consider removing it from the columns.tsv file.",
			"WARNING: Column 'D' not found in the catalog. Consider removing it from the columns.tsv file.",
			"INFO: Verified 3 of total 6 rows of catalog, finished at",
			"INFO: Column '_landmark': number of occurrences in catalog: 3.",
			"INFO: Column '_minBP': number of occurrences in catalog: 3.",
			"INFO: Column '_maxBP': number of occurrences in catalog: 3.",
			"INFO: Column '_altAlleles': number of occurrences in catalog: 3. Largest number of elements found for array column: 1.",
			"INFO: Column '_refAllele': number of occurrences in catalog: 3.",
			"INFO: Column 'A': number of occurrences in catalog: 3.",
			"INFO: Verify #ERROR: 12, #WARNING: 7."
			);
	}
	//=============================================================================================================
	/** Test the Start/NumLines flags - no startLine flag (so starts at line 1), but numLines specified as 2 
	 * @throws InterruptedException */
    @Test
    public void testStartNumLines_noStartLine_numLines2() throws IOException, InterruptedException {

    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File(mTempDir, "test.tsv.bgz");
    	createCatalogWithMetadataFiles_good(ctgBgz);
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify", 
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_NUM_LINES,     "2"
    					}
    			);
    	
    	String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);

    	printOutputsForDebugging(output, verifyTxtOut);

    	assertEquals(output.stderr, "");

    	assertContains(output.stdout, "Verifying each row");
    	assertContains(output.stdout, "Verified 2 of total 5 rows of catalog,");
    	assertContains(output.stdout, "Verify #ERROR: 0, #WARNING: 0.");

    	assertContains(verifyTxtOut, "Verify #ERROR: 0, #WARNING: 0.");
    }
    
    
    
	//=============================================================================================================
	/** Test numLines == 0 (all lines)
	 * @throws InterruptedException */
    @Test
    public void testNumLines_0() throws IOException, InterruptedException {
    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File(mTempDir, "test.tsv.bgz");
    	createCatalogWithMetadataFiles(ctgBgz);
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify", 
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_NUM_LINES,     "0"
    					}
    			);
    	
    	String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);
    	
    	printOutputsForDebugging(output, verifyTxtOut);

    	assertEquals(output.stderr, "");
    	assertContains(output.stdout, "Verify #ERROR: 13, #WARNING: 5.");

    	assertContainsAll(verifyTxtOut, getMetadataOutput());
    	assertContainsAll(verifyTxtOut, getOrderOutput());
    	assertContainsAll(verifyTxtOut,  getJsonOutput_allLines());
    }

	//=============================================================================================================
	/** Test numLines < 0 (INVALID) 
	 * @throws InterruptedException */
    @Test
    public void testNumLines_lessThan0() throws IOException, InterruptedException {
    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File(mTempDir, "test.tsv.bgz");
    	createCatalogWithMetadataFiles(ctgBgz);
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify", 
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_NUM_LINES,     "-1"
    					}
    			);
    	
    	String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);
    	
    	printOutputsForDebugging(output, verifyTxtOut);

    	assertContains(output.stderr, "bior_verify_catalog argument for --startLine must be 0 (for all lines), or a positive number.  Was: -1");
    	assertEquals(1, output.exit);
    }

    
	//=============================================================================================================
	/** Test the Start/NumLines flags - first two lines 
	 * @throws InterruptedException */
    @Test
    public void testStartNumLines_startLine1_twoLines() throws IOException, InterruptedException {

    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File(mTempDir, "test.tsv.bgz");
    	createCatalogWithMetadataFiles(ctgBgz);
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify", 
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_START_LINE,    "1",
    					"-" + VerifyCatalogCommand.OPTION_NUM_LINES,     "2"
    					}
    			);
    	
    	String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);
    	
    	//printOutputsForDebugging(output, verifyTxtOut);

    	// No errors on first 2 lines
    	assertEquals(output.stderr, "");
    	assertContains(output.stdout, "Verify #ERROR: 1, #WARNING: 3.");

    	assertContains(verifyTxtOut,    "Verified 2 of total 6 rows of catalog,");
    	assertContainsAll(verifyTxtOut, getMetadataOutput());
    	assertContainsAll(verifyTxtOut, getOrderOutput());
    	assertContainsAll(verifyTxtOut, getJsonOutput_lines1To2());
    }
    
	//=============================================================================================================
	/** Test the Start/NumLines flags - start at line 3 for 3 lines 
	 * @throws InterruptedException */
    @Test
    public void testStartNumLines_startLine3_threeLines() throws IOException, InterruptedException {

    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File(mTempDir, "test.tsv.bgz");
    	createCatalogWithMetadataFiles(ctgBgz);
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify", 
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_START_LINE,    "3",
    					"-" + VerifyCatalogCommand.OPTION_NUM_LINES,     "3"
    					}
    			);
    	
    	String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);
    	
    	printOutputsForDebugging(output, verifyTxtOut);

    	assertEquals(output.stderr, "");
    	assertContains(output.stdout, "Verify #ERROR: 12, #WARNING: 7.");

    	assertContains(verifyTxtOut,    "Verified 3 of total 6 rows of catalog,");
    	assertContainsAll(verifyTxtOut, getMetadataOutput());
    	assertContainsAll(verifyTxtOut, getOrderOutput());
    	assertContainsAll(verifyTxtOut, getJsonOutput_lines3To5());
    }
    
    
	//=============================================================================================================
	/** Test the Start/NumLines flags - start at line 4 for 10 lines (should NOT error) 
	 * @throws InterruptedException */
    @Test
    public void testStartNumLines_startLine4_tenLines() throws IOException, InterruptedException {

    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File(mTempDir, "test.tsv.bgz");
    	createCatalogWithMetadataFiles(ctgBgz);
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify", 
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_START_LINE,    "4",
    					"-" + VerifyCatalogCommand.OPTION_NUM_LINES,     "10"
    					}
    			);
    	
    	String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);

    	//printOutputsForDebugging(output, verifyTxtOut);
  	
    	assertEquals(output.stderr, "");
    	assertContains(output.stdout, "Verify #ERROR: 9, #WARNING: 8.");

    	assertContains(verifyTxtOut,    "Verified 3 of total 6 rows of catalog,");
    	assertContainsAll(verifyTxtOut, getMetadataOutput());
    	assertContainsAll(verifyTxtOut, getOrderOutput());
    	assertContainsAll(verifyTxtOut, getJsonOutput_lines4To6());
    }

    
	//=============================================================================================================
	/** Test StartLine=0 - ERROR 
	 * @throws InterruptedException */
    @Test
    public void testStartNumLines_startLine0_error() throws IOException, InterruptedException {
    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File(mTempDir, "test.tsv.bgz");
    	createCatalogWithMetadataFiles(ctgBgz);
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify", 
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_START_LINE,    "0"
    					}
    			);
    	
    	String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);

    	printOutputsForDebugging(output, verifyTxtOut);
  	
    	assertContains(output.stderr, ": bior_verify_catalog argument for --startLine must be a positive number.  Was: 0");
    	assertEquals(1, output.exit);
    }

	//=============================================================================================================
	/** Test StartLine negative - ERROR 
	 * @throws InterruptedException */
    @Test
    public void testStartNumLines_startLineNegative_error() throws IOException, InterruptedException {
    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File(mTempDir, "test.tsv.bgz");
    	createCatalogWithMetadataFiles(ctgBgz);
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify", 
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_START_LINE,    "-1"
    					}
    			);
    	
    	String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);

    	printOutputsForDebugging(output, verifyTxtOut);
  	
    	assertContains(output.stderr, ": bior_verify_catalog argument for --startLine must be a positive number.  Was: -1");
    	assertEquals(1, output.exit);
    }
    
	//=============================================================================================================
	/** Test pipe "|" character in single and array values - should be an error when in arrays as this will cause bior_drill to fail
	 * 		$ echo "{'A':'1|2','B':['1|2','3|4|5','6']}" | bior_drill -p B
			Application error bior_drill:
			: Error: the delimiter '|' was found within one of the array values that was drilled: '1|2'
	 * @throws InterruptedException */
    @Test
    public void testPipeInValues() throws IOException, InterruptedException {
    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File(mTempDir, "pipeTest.tsv.bgz");
    	createCatalogWithMetadataFiles_pipeDelimiter(ctgBgz);    	
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify", 
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath()
    					}
    			);
    	
    	String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);

    	//printOutputsForDebugging(output, verifyTxtOut);
    	
    	assertEquals("", output.stderr);
    	assertContains(output.stdout, "Verify #ERROR: 2, #WARNING: 2.");

    	assertContains(verifyTxtOut,    "Verified 1 of total 1 rows of catalog,");
    	assertContainsAll(verifyTxtOut, /*JsonOutput=*/    getJsonOutput_pipes());
    }
    
	//=============================================================================================================
	/** Make sure a bad datasource properties doesn't torpedo the whole app */
    @Test
    public void testBadDatasourcePropertiesNotKillProcess() throws IOException, InterruptedException {
    	File verifyOutFile = new File(mTempDir, "verify_output.txt");
    	File ctgBgz = new File("src/test/resources/testData/verification/catalog_with_bogus_format/omim_genes.tsv.bgz");
    	CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify",
    			new String[] {
    					"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
    					"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath()
    					}
    			);
      assertEquals(0, output.exit);
    }


	//=============================================================================================================
	/** Test 'N' alleles against other A,C,G,T alleles (accounting for combinations on both sides (catalog and input) */
	@Test
	public void testNAlleles() throws IOException, InterruptedException {
		File verifyOutFile = new File(mTempDir, "verify_output.txt");
		File ctgBgz = new File(mTempDir, "bad_datasource.tsv.bgz");
		createCatalogWithMetadataFiles_nAlleles(ctgBgz);
		CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify",
			new String[] {
				"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
				"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath()
			}
		);

		String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);

		printOutputsForDebugging(output, verifyTxtOut);

		assertEquals("", output.stderr);
		assertContains(output.stdout, "Verify #ERROR: 4, #WARNING: 8.");

		assertContains(verifyTxtOut,    "Verified 6 of total 6 rows of catalog,");
		assertContainsAll(verifyTxtOut, /*JsonOutput=*/    getJsonOutput_nAlleles());
	}

	//=============================================================================================================
	@Test
	public void testStructuralVariants() throws IOException, InterruptedException {
		File verifyOutFile = new File(mTempDir, "verify_output.txt");
		File ctgBgz = new File(mTempDir, "structuralVariants.tsv.bgz");
		createCatalog_structuralVariants(ctgBgz);
		CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify_catalog",
			new String[] {
				"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
				"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath()
			}
		);

		String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);

		printOutputsForDebugging(output, verifyTxtOut);

		String stderr = removeAllLinesStartingWithSlf4j(output.stderr);

		// TEMP ------------------------------------------------------->>>>
		//FileUtils.write(new File("/tmp/stdout.log"),  output.stdout);
		//FileUtils.write(new File("/tmp/stderr.log"),  output.stderr);
		//FileUtils.write(new File("/tmp/stderr2.log"), stderr);
		//FileUtils.write(new File("/tmp/verify.txt"),  verifyTxtOut);
		// <<<<------------------------------------------------------------

		// NOTE: We have one field INFO.HOMSEQ that is declared as an Integer in columns.tsv
		//       but is a string array ["G"] in the data
		//       The mismatched type should be reported as "String", and not "JsonArray"
		assertEquals("", stderr);
		assertContains(output.stdout, "Verify #ERROR: 1, #WARNING: 0");
		assertContains(verifyTxtOut,  "ERROR 406: Data type does not match between json [String] and columns.tsv [Integer] for key: INFO.HOMSEQ");
		assertContains(verifyTxtOut,  "Verified 9 of total 9 rows of catalog");
	}

	
	//=============================================================================================================
	@Test
	// Need to differentiate between single values and arrays (it was previously not doing so)
	public void testArrayVsSingle() throws IOException, InterruptedException {
		File verifyOutFile = new File(mTempDir, "verify_output.txt");
		File ctgBgz = new File(mTempDir, "singleVsArray.tsv.bgz");
		createCatalog_singleVsArray(ctgBgz);
		CommandOutput output = runCmdApp(new VerifyCatalogCommand(), "bior_verify",
			new String[] {
				"-" + VerifyCatalogCommand.OPTION_CATALOG_FILE,  ctgBgz.getAbsolutePath(),
				"-" + VerifyCatalogCommand.OPTION_OUTPUT_FILE,   verifyOutFile.getAbsolutePath()
			}
		);

		String verifyTxtOut = FileUtils.readFileToString(verifyOutFile);

		printOutputsForDebugging(output, verifyTxtOut);

		assertEquals("", output.stderr);
		assertContains(output.stdout, "Verify #ERROR: 6, #WARNING: 0");

		assertContains(verifyTxtOut, "ERROR 415: Count does not match between json and columns.tsv for key: StrSingle");
		assertContains(verifyTxtOut, "ERROR 415: Count does not match between json and columns.tsv for key: StrArray");
		assertContains(verifyTxtOut, "ERROR 415: Count does not match between json and columns.tsv for key: IntSingle");
		assertContains(verifyTxtOut, "ERROR 415: Count does not match between json and columns.tsv for key: IntArray");
		assertContains(verifyTxtOut, "ERROR 415: Count does not match between json and columns.tsv for key: FloatSingle");
		assertContains(verifyTxtOut, "ERROR 415: Count does not match between json and columns.tsv for key: FloatArray");
		
		assertContains(verifyTxtOut,    "Verified 1 of total 1 rows of catalog");
	}

	

	//=============================================================================================================

	private List<String> getMetadataOutput() {
		return Arrays.asList(
    			"WARNING: Description not found for these columns: [A]",
    			"WARNING: HumanReadableName is set to the same value as the ColumnName for these columns: [A, B]",
    			"INFO: Catalog is based on Human Genome Build 'GRCh37'.",
    			"INFO: No H2 indexes configured for"
        		);
	}

	private List<String> getOrderOutput() {
		return Arrays.asList(
				"INFO: Verifying chromosomal order and indexes for",
				"WARNING: Catalog entry for chromosome '5' not in expected chromosome order. Previous chromosome catalog entries were for '17', but the current catalog entry is for '5'",
				"ERROR 302: Chromosome '23' is not in expected chromosome list.",
				"INFO: Order of chromosomes: 1,17,5,23",
				"INFO: Chrom and position order: Number of lines read (6) and verified (6)",
				"INFO: Completed verifying order and indexes at "
				);
	}
	

	
	private List<String> getJsonOutput_allLines() {
		return Arrays.asList(
				// NOTE: the verify.txt file may ALSO contain warnings and errors from getMetadataOutput() and getOrderOutput()
				"INFO: Chunk (lines): 1 - END",
				"INFO: Found the following issues parsing this json '{\"A\":\"17\",\"_landmark\":\"17\",\"_minBP\":305,\"_maxBP\":305,\"_refAllele\":\"C\",\"_altAlleles\":[\"C\"]}'",
				"ERROR 406: Data type does not match between json [String] and columns.tsv [Integer] for key: A",
				"ERROR 210: Tabix start position [309] not same as _minBP value [305].  Row: 17	309	309	{'A':'17','_landmark':'17','_minBP':305,'_maxBP':305,'_refAllele':'C','_altAlleles':['C']...".replace("'", "\""),
				"ERROR 211: Tabix stop position [309] not same as _maxBP value [305].  Row: 17	309	309	{'A':'17','_landmark':'17','_minBP':305,'_maxBP':305,'_refAllele':'C','_altAlleles':['C']...".replace("'", "\""),
				"ERROR 605: One of the _altAlleles [C] is equal to the _refAllele. Json: {\"A\":\"17\",\"_landmark\":\"17\",\"_minBP\":305,\"_maxBP\":305,\"_refAllele\":\"C\",\"_altAlleles\":[\"C\"]}",
				"ERROR 501: _refAllele [A] does not match reference sequence value [T] for:  chr=5 position=11640",
				"ERROR 504: Chromosome 5: _refAllele values that DO NOT match the reference sequence is [1].",
				"ERROR 306: Chromosome value not found in chromosome size map: 23 Cannot check position [13327] for chromosome value.",
				"ERROR 215: Tabix start position not in valid chromosome size range.  Chr: 23 Chr Max Position: null Tabix entry min position: 13327",
				"ERROR 306: Chromosome value not found in chromosome size map: 23 Cannot check position [13327] for chromosome value.",
				"ERROR 216: Tabix stop position not in valid chromosome size range.  Chr: 23 Chr Max Position: null Tabix entry min position: 13327",
				"ERROR 209: Tabix chr [23] not same as _landmark value [X]",
				"WARNING: An 'N' was detected in one of the reference sequences, which may signify a misalignment of the ref allele to the reference assembly.  (Ref='A',  refAssemblySequence='N')  X:13327-13327",
				"ERROR 605: One of the _altAlleles [A] is equal to the _refAllele. Json: {\"A\":23,\"_landmark\":\"X\",\"_minBP\":13328,\"_maxBP\":13328,\"_refAllele\":\"A\",\"_altAlleles\":[\"A\"]}",
				"WARNING: An 'N' was detected in one of the reference sequences, which may signify a misalignment of the ref allele to the reference assembly.  (Ref='A',  refAssemblySequence='N')  X:13328-13328",
				"INFO: Verified 6 of total 6 rows of catalog, finished at",
				"INFO: Column '_landmark': number of occurrences in catalog: 6.",
				"INFO: Column '_minBP': number of occurrences in catalog: 6.",
				"INFO: Column '_maxBP': number of occurrences in catalog: 6.",
				"INFO: Column '_altAlleles': number of occurrences in catalog: 6. Largest number of elements found for array column: 1.",
				"INFO: Column '_refAllele': number of occurrences in catalog: 6.",
				"INFO: Column 'A': number of occurrences in catalog: 6.",
				"INFO: Column 'B': number of occurrences in catalog: 2.",
				"INFO: Column 'C.Nested': number of occurrences in catalog: 2.",
				"INFO: Column 'D': number of occurrences in catalog: 2."
				// Don't include the verify # errors and # warnings as this may change based on whether the METADATA and ORDER phases were called
				);
		
	}
	
	private List<String> getJsonOutput_lines1To3() {
		return Arrays.asList(
			// Have warnings and errors from getMetadataOutput()
			// Have warnings and errors from getOrderOutput()
			"INFO: Chunk (lines): 1 - END",
			"WARNING: Description not found for these columns: [A]",
			"WARNING: HumanReadableName is set to the same value as the ColumnName for these columns: [A, B]",
			"WARNING: Catalog entry for chromosome '5' not in expected chromosome order. Previous chromosome catalog entries were for '17', but the current catalog entry is for '5'. Full Catalog Entry is: '5	11640	11640	" + "{'A':5,'_landmark':'5','_minBP':11640,'_maxBP':11640,'_refAllele':'A','_altAlleles':['C']}".replace("'", "\"") + "'",
			"ERROR 302: Chromosome '23' is not in expected chromosome list.",
			"INFO: Order of chromosomes: 1,17,5,23,X",
			"INFO: Found the following issues parsing this json '{\"A\":\"17\",\"_landmark\":\"17\",\"_minBP\":305,\"_maxBP\":305,\"_refAllele\":\"C\",\"_altAlleles\":[\"C\"]}'",
			"ERROR 406: Data type does not match between json [String] and columns.tsv [Integer] for key: A",
			"ERROR 210: Tabix start position [309] not same as _minBP value [305].  Row: 17	309	309	{'A':'17','_landmark':'17','_minBP':305,'_maxBP':305,'_refAllele':'C','_altAlleles':['C']...".replace("'", "\""),
			"ERROR 211: Tabix stop position [309] not same as _maxBP value [305].  Row: 17	309	309	{'A':'17','_landmark':'17','_minBP':305,'_maxBP':305,'_refAllele':'C','_altAlleles':['C']...".replace("'", "\""),
			"ERROR 605: One of the _altAlleles [C] is equal to the _refAllele. Json: {'A':'17','_landmark':'17','_minBP':305,'_maxBP':305,'_refAllele':'C','_altAlleles':['C']}".replace("'", "\""),
			"ERROR 501: _refAllele [A] does not match reference sequence value [T] for:  chr=5 position=11640",
			"ERROR 504: Chromosome 5: _refAllele values that DO NOT match the reference sequence is [1].",
			"ERROR 306: Chromosome value not found in chromosome size map: 23 Cannot check position [13327] for chromosome value.",
			"ERROR 215: Tabix start position not in valid chromosome size range.  Chr: 23 Chr Max Position: null Tabix entry min position: 13327",
			"ERROR 306: Chromosome value not found in chromosome size map: 23 Cannot check position [13327] for chromosome value.",
			"ERROR 216: Tabix stop position not in valid chromosome size range.  Chr: 23 Chr Max Position: null Tabix entry min position: 13327",
			"ERROR 209: Tabix chr [23] not same as _landmark value [X]",
			"WARNING: An 'N' was detected in one of the reference sequences, which may signify a misalignment of the ref allele to the reference assembly.  (Ref='A',  refAssemblySequence='N')  X:13327-13327",
			"WARNING: Column 'B' not found in the catalog. Consider removing it from the columns.tsv file.",
			"WARNING: Column 'C.Nested' not found in the catalog. Consider removing it from the columns.tsv file.",
			"WARNING: Column 'D' not found in the catalog. Consider removing it from the columns.tsv file.",
			"INFO: Verified 3 of total 6 rows of catalog, finished at",
			"INFO: Column '_landmark': number of occurrences in catalog: 3.",
			"INFO: Column '_minBP': number of occurrences in catalog: 3.",
			"INFO: Column '_maxBP': number of occurrences in catalog: 3.",
			"INFO: Column '_altAlleles': number of occurrences in catalog: 3. Largest number of elements found for array column: 1.",
			"INFO: Column '_refAllele': number of occurrences in catalog: 3.",
			"INFO: Column 'A': number of occurrences in catalog: 3.",
			"INFO: Verify #ERROR: 12, #WARNING: 7."
			);
	}
	
	
	private List<String> getJsonOutput_lines1To2() {
		return Arrays.asList(
				"INFO: Verified 2 of total 6 rows of catalog, finished at",
				"INFO: Column '_landmark': number of occurrences in catalog: 2.",
				"INFO: Column '_minBP': number of occurrences in catalog: 2.",
				"INFO: Column '_maxBP': number of occurrences in catalog: 2.",
				"INFO: Column '_altAlleles': number of occurrences in catalog: 2. Largest number of elements found for array column: 1.",
				"INFO: Column '_refAllele': number of occurrences in catalog: 2.",
				"INFO: Column 'A': number of occurrences in catalog: 2.",
				"INFO: Column 'B': number of occurrences in catalog: 2.",
				"INFO: Column 'C.Nested': number of occurrences in catalog: 2.",
				"INFO: Column 'D': number of occurrences in catalog: 2."
				);
	}

	private List<String> getJsonOutput_lines3To5() {
		return Arrays.asList(
				"WARNING: Description not found for these columns: [A]",
				"WARNING: HumanReadableName is set to the same value as the ColumnName for these columns: [A, B]",
				"WARNING: Catalog entry for chromosome '5' not in expected chromosome order. Previous chromosome catalog entries were for '17', but the current catalog entry is for '5'. Full Catalog Entry is: '5	11640	11640	" + "{'A':5,'_landmark':'5','_minBP':11640,'_maxBP':11640,'_refAllele':'A','_altAlleles':['C']}".replace("'", "\"") + "'",
				"ERROR 302: Chromosome '23' is not in expected chromosome list.",
				"INFO: Order of chromosomes: 1,17,5,23,X",
				"INFO: Found the following issues parsing this json '{\"A\":\"17\",\"_landmark\":\"17\",\"_minBP\":305,\"_maxBP\":305,\"_refAllele\":\"C\",\"_altAlleles\":[\"C\"]}'",
				"ERROR 406: Data type does not match between json [String] and columns.tsv [Integer] for key: A",
				"ERROR 210: Tabix start position [309] not same as _minBP value [305].  Row: 17	309	309	{'A':'17','_landmark':'17','_minBP':305,'_maxBP':305,'_refAllele':'C','_altAlleles':['C']...".replace("'", "\""),
				"ERROR 211: Tabix stop position [309] not same as _maxBP value [305].  Row: 17	309	309	{'A':'17','_landmark':'17','_minBP':305,'_maxBP':305,'_refAllele':'C','_altAlleles':['C']...".replace("'", "\""),
				"ERROR 605: One of the _altAlleles [C] is equal to the _refAllele. Json: {'A':'17','_landmark':'17','_minBP':305,'_maxBP':305,'_refAllele':'C','_altAlleles':['C']}".replace("'", "\""),
				"ERROR 501: _refAllele [A] does not match reference sequence value [T] for:  chr=5 position=11640",
				"ERROR 504: Chromosome 5: _refAllele values that DO NOT match the reference sequence is [1].",
				"ERROR 306: Chromosome value not found in chromosome size map: 23 Cannot check position [13327] for chromosome value.",
				"ERROR 215: Tabix start position not in valid chromosome size range.  Chr: 23 Chr Max Position: null Tabix entry min position: 13327",
				"ERROR 306: Chromosome value not found in chromosome size map: 23 Cannot check position [13327] for chromosome value.",
				"ERROR 216: Tabix stop position not in valid chromosome size range.  Chr: 23 Chr Max Position: null Tabix entry min position: 13327",
				"ERROR 209: Tabix chr [23] not same as _landmark value [X]",
				"WARNING: An 'N' was detected in one of the reference sequences, which may signify a misalignment of the ref allele to the reference assembly.  (Ref='A',  refAssemblySequence='N')  X:13327-13327",
				"INFO: Verified 3 of total 6 rows of catalog, finished at",
				"INFO: Column '_landmark': number of occurrences in catalog: 3.",
				"INFO: Column '_minBP': number of occurrences in catalog: 3.",
				"INFO: Column '_maxBP': number of occurrences in catalog: 3.",
				"INFO: Column '_altAlleles': number of occurrences in catalog: 3. Largest number of elements found for array column: 1.",
				"INFO: Column '_refAllele': number of occurrences in catalog: 3.",
				"INFO: Column 'A': number of occurrences in catalog: 3.",
				"WARNING: Column 'B' not found in the catalog. Consider removing it from the columns.tsv file.",
				"WARNING: Column 'C.Nested' not found in the catalog. Consider removing it from the columns.tsv file.",
				"WARNING: Column 'D' not found in the catalog. Consider removing it from the columns.tsv file.",
				"INFO: Verify #ERROR: 12, #WARNING: 7."
				);
	}
	
	private List<String> getJsonOutput_lines4To6() {
		return Arrays.asList(
				"ERROR 501: _refAllele [A] does not match reference sequence value [T] for:  chr=5 position=11640",
				"ERROR 504: Chromosome 5: _refAllele values that DO NOT match the reference sequence is [1].",
				"ERROR 306: Chromosome value not found in chromosome size map: 23 Cannot check position [13327] for chromosome value.",
				"ERROR 215: Tabix start position not in valid chromosome size range.  Chr: 23 Chr Max Position: null Tabix entry min position: 13327",
				"ERROR 306: Chromosome value not found in chromosome size map: 23 Cannot check position [13327] for chromosome value.",
				"ERROR 216: Tabix stop position not in valid chromosome size range.  Chr: 23 Chr Max Position: null Tabix entry min position: 13327",
				"ERROR 209: Tabix chr [23] not same as _landmark value [X]",
				"WARNING: An 'N' was detected in one of the reference sequences, which may signify a misalignment of the ref allele to the reference assembly.  (Ref='A',  refAssemblySequence='N')  X:13327-13327",
				"ERROR 605: One of the _altAlleles [A] is equal to the _refAllele. Json: {\"A\":23,\"_landmark\":\"X\",\"_minBP\":13328,\"_maxBP\":13328,\"_refAllele\":\"A\",\"_altAlleles\":[\"A\"]}",
				"WARNING: An 'N' was detected in one of the reference sequences, which may signify a misalignment of the ref allele to the reference assembly.  (Ref='A',  refAssemblySequence='N')  X:13328-13328",
				"INFO: Verified 3 of total 6 rows of catalog, finished at",
				"INFO: Column '_landmark': number of occurrences in catalog: 3.",
				"INFO: Column '_minBP': number of occurrences in catalog: 3.",
				"INFO: Column '_maxBP': number of occurrences in catalog: 3.",
				"INFO: Column '_altAlleles': number of occurrences in catalog: 3. Largest number of elements found for array column: 1.",
				"INFO: Column '_refAllele': number of occurrences in catalog: 3.",
				"INFO: Column 'A': number of occurrences in catalog: 3.",
				"WARNING: Column 'B' not found in the catalog. Consider removing it from the columns.tsv file.",
				"WARNING: Column 'C.Nested' not found in the catalog. Consider removing it from the columns.tsv file.",
				"WARNING: Column 'D' not found in the catalog. Consider removing it from the columns.tsv file."
				);
	}

    private List<String> getJsonOutput_pipes() {
    	return Arrays.asList(
    			"ERROR 420: Detected a pipe '|' character in a JSON Array value.  This is dangerous since pipe characters are used for the delimiter when drilling out array values.  This will cause an error when drilling out this key.  pipeInArrayValues:[A|B, C, D|E|F]",
    			"ERROR 103: These ColumnNames have duplicate HumanReadableNames: [pipeInArrayValues, intArray]",
    			"WARNING: Description not found for these columns: [A]",
    			"WARNING: HumanReadableName is set to the same value as the ColumnName for these columns: [A, B]",
    			"INFO: Column 'pipeInValue': number of occurrences in catalog: 1.",
    			"INFO: Column 'pipeInArrayValues': number of occurrences in catalog: 1. Largest number of elements found for array column: 3.",
    			"INFO: Verify #ERROR: 2, #WARNING: 2."
    			);
    }

    private List<String> getJsonOutput_nAlleles() {
    	return Arrays.asList(
    			"WARNING: Description not found for these columns: [A]",
    			"WARNING: HumanReadableName is set to the same value as the ColumnName for these columns: [A, B]",
    			"WARNING: An 'N' was detected in one of the reference sequences, which may signify a misalignment of the ref allele to the reference assembly.  (Ref='TCNN',  refAssemblySequence='TCNN')  1:177416-177419",
    			"WARNING: An 'N' was detected in one of the reference sequences, which may signify a misalignment of the ref allele to the reference assembly.  (Ref='NNAA',  refAssemblySequence='TCNN')  1:177416-177419",
    			"WARNING: An 'N' was detected in one of the reference sequences, which may signify a misalignment of the ref allele to the reference assembly.  (Ref='TCAA',  refAssemblySequence='TCNN')  1:177416-177419",
    			"WARNING: An 'N' was detected in one of the reference sequences, which may signify a misalignment of the ref allele to the reference assembly.  (Ref='NN',  refAssemblySequence='TC')  1:177416-177417",
    			"WARNING: An 'N' was detected in one of the reference sequences, which may signify a misalignment of the ref allele to the reference assembly.  (Ref='TTNN',  refAssemblySequence='TCNN')  1:177416-177419",
    			"ERROR 501: _refAllele [TTNN] does not match reference sequence value [TCNN] for:  chr=1 position=177416",
    			"ERROR 601: _refAllele length [4] is not equal to calculated length [2]. Json: '{\"_landmark\":\"1\",\"_minBP\":177416,\"_maxBP\":177417,\"_refAllele\":\"TCNN\",\"_altAlleles\":[\"GGGG\"],\"A\":1,\"B\":\"b\",\"C\":{\"Nested\":0.34},\"D\":true}'",
    			"WARNING: An 'N' was detected in one of the reference sequences, which may signify a misalignment of the ref allele to the reference assembly.  (Ref='TCNN',  refAssemblySequence='TC')  1:177416-177417",
    			"ERROR 501: _refAllele [TCNN] does not match reference sequence value [TC] for:  chr=1 position=177416",
    			"ERROR 504: Chromosome 1: _refAllele values that DO NOT match the reference sequence is [2].",
    			"INFO: Verify #ERROR: 4, #WARNING: 8."
    			);
    }

    //=============================================================================================================

	private void createCatalogWithMetadataFiles_good(File ctgBgz) throws IOException, InterruptedException {
		createCatalogBgzipAndTabixIndex_good(ctgBgz);
		createColumnsTsv_good(ctgBgz);
		createDatasourceProperties(ctgBgz);
		createBlacklists(ctgBgz);
		
		setBiorPropertiesChromSizeOrderRef();
	}

    
	private void createCatalogWithMetadataFiles(File ctgBgz) throws IOException, InterruptedException {
		createCatalogBgzipAndTabixIndex(ctgBgz);
		createColumnsTsv(ctgBgz);
		createDatasourceProperties(ctgBgz);
		createBlacklists(ctgBgz);
		
		setBiorPropertiesChromSizeOrderRef();
	}

	private void createCatalogWithMetadataFiles_pipeDelimiter(File ctgBgz) throws IOException, InterruptedException {
		createCatalogBgzipAndTabixIndex_forPipeInValue(ctgBgz);
		createColumnsTsv(ctgBgz);
		createDatasourceProperties(ctgBgz);
		createBlacklists(ctgBgz);
		
		setBiorPropertiesChromSizeOrderRef();
	}

	private void createCatalogWithMetadataFiles_nAlleles(File ctgBgz) throws IOException, InterruptedException {
		createCatalogBgzipAndTabixIndex_nAlleles(ctgBgz);
		createColumnsTsv(ctgBgz);
		createDatasourceProperties(ctgBgz);
		createBlacklists(ctgBgz);
		
		setBiorPropertiesChromSizeOrderRef();
	}

	private void createBlacklists(File ctgBgz) throws IOException {
		File columnsTsv = new File(ctgBgz.getParentFile(), getCtgPrefix(ctgBgz) + ".columns.tsv");
		File columnsTsvBlacklist = new File(ctgBgz.getParentFile(), getCtgPrefix(ctgBgz) + ".columns.tsv.blacklist");
		File columnsTsvBlacklistBiorweb = new File(ctgBgz.getParentFile(), getCtgPrefix(ctgBgz) + ".columns.tsv.blacklist.biorweb");
		
		FileUtils.write(columnsTsvBlacklist, 
				"### These columns are to generally be ignored within UIs as they are duplicates or are normalized versions of the original columns." + EOL +
				"### If this file is NOT present, then assume that all fields, except for the golden attributes (those beginning with _) should be shown" + EOL +
				"### If this file is present, but contains no columns, then assume that all fields should be shown" + EOL +
				"_landmark" + EOL +
				"_maxBP" + EOL +
				"_minBP" + EOL +
				"_altAlleles" + EOL +
				"_refAllele" + EOL
				);
		
		FileUtils.copyFile(columnsTsvBlacklist, columnsTsvBlacklistBiorweb);
	}

	private void setBiorPropertiesChromSizeOrderRef() throws IOException {
    	File refAssemblyFile = new File("src/test/resources/ref_assembly/GRCh37/genome_GRCh37.tsv.bgz");
    	File chromSizesFile  = new File("src/main/resources/humanChromosomeSizes.GRCh37.txt");
    	File chromOrderFile  = new File("src/main/resources/humanChromosomesSortedByName.GRCh37.txt");
    	File biorPropsFile   = new File("src/main/resources/bior.properties");
    	File biorPropsTempFile=new File(mTempDir, "bior.properties");
    	
    	// First load the bior.properties file to string
    	String contents = FileUtils.readFileToString(biorPropsFile);
    	
    	// modify it with the paths above
    	contents = 	contents
    			.replaceAll("humanRefSeqGrch37File=.*", 		"humanRefSeqGrch37File=" 		+ refAssemblyFile.getAbsolutePath())
    			.replaceAll("humanRefChrSizesGrch37File=.*", 	"humanRefChrSizesGrch37File=" 	+ chromSizesFile.getAbsolutePath())
    			.replaceAll("humanRefChrOrderGrch37File=.*",	"humanRefChrOrderGrch37File=" 	+ chromOrderFile.getAbsolutePath());
    	
    	// then save it to a new bior.properties file
    	FileUtils.write(biorPropsTempFile, contents);
    	
    	// and set it in the BiorProperties.setFile() method
    	BiorProperties.setFile(biorPropsTempFile.getAbsolutePath());
	}


	private void createCatalogBgzipAndTabixIndex_good(File ctgBgz) throws IOException, InterruptedException {
		// NOTE: The positions here MUST match the refAssemblyFile 
    	List<String> catalogContents = Arrays.asList( 
    			//------ Chunk 1 ------------------------------------
    			concat("1", "10018", "10018",  "{'A':0,'B':'zzz','C':{'Nested':0.34},'D':false,'_landmark':'1','_minBP':10018,'_maxBP':10018,'_refAllele':'C','_altAlleles':['A']}"),
    			concat("1", "10019", "10019",  "{'A':0,'B':'zzz','C':{'Nested':0.34},'D':false,'_landmark':'1','_minBP':10019,'_maxBP':10019,'_refAllele':'T','_altAlleles':['C']}"),
    			//------ Chunk 2 ------------------------------------
       			concat("5",  "11640", "11640", "{'A':5,'_landmark':'5','_minBP':11640,'_maxBP':11640,'_refAllele':'T','_altAlleles':['C']}"),
       			concat("17", "305", "305",     "{'A':17,'_landmark':'17','_minBP':305,'_maxBP':305,'_refAllele':'C','_altAlleles':['G']}"),
     			//------ Chunk 3 ------------------------------------
    	    	concat("Y", "13301", "13301", "{'A':23,'_landmark':'Y','_minBP':13301,'_maxBP':13301,'_refAllele':'C','_altAlleles':['T']}")
    			);
    	
    	convertSingleQuotesToDouble(catalogContents);
    	
     	super.createCatalog(catalogContents, ctgBgz, /*isCreateTabixIndex=*/true);
	}

	
	private void createCatalogBgzipAndTabixIndex(File ctgBgz) throws IOException, InterruptedException {
		// NOTE: The positions here MUST match the refAssemblyFile 
    	List<String> catalogContents = Arrays.asList( 
    			//------ Chunk 1 ------------------------------------
    			concat("1", "10018", "10018",  "{'A':0,'B':'zzz','C':{'Nested':0.34},'D':false,'_landmark':'1','_minBP':10018,'_maxBP':10018,'_refAllele':'C','_altAlleles':['A']}"),
    			concat("1", "10019", "10019",  "{'A':0,'B':'zzz','C':{'Nested':0.34},'D':false,'_landmark':'1','_minBP':10019,'_maxBP':10019,'_refAllele':'T','_altAlleles':['C']}"),
    			//------ Chunk 2 ------------------------------------
    			// Type mismatch for 'A'.  Start and stop tabix positions not same as _minBP and _maxBP
    	    	concat("17", "309", "309",     "{'A':'17','_landmark':'17','_minBP':305,'_maxBP':305,'_refAllele':'C','_altAlleles':['C']}"),
    			// Chrom is out of order (should come before 17 in last row)
    			// _refAllele does not match the allele in the ref assembly file  (should be "T" at this point)
    			concat("5",  "11640", "11640", "{'A':5,'_landmark':'5','_minBP':11640,'_maxBP':11640,'_refAllele':'A','_altAlleles':['C']}"),
    			//------ Chunk 3 ------------------------------------
//*    			// ERROR:  Chrom '23' is not expected (should be 'X').  Should note difference between tabix chrom and _landmark (23 vs X)
    	    	concat("23", "13327", "13327", "{'A':23,'_landmark':'X','_minBP':13327,'_maxBP':13327,'_refAllele':'A','_altAlleles':['C']}"),
//*    			// ERROR: _refAllele is the same as one of the alts
//*    	    	// WARN: NOTE: X at this point is an 'N' in the ref assembly
      	    	concat("X",  "13328", "13328", "{'A':23,'_landmark':'X','_minBP':13328,'_maxBP':13328,'_refAllele':'A','_altAlleles':['A']}")
    			);
    	
    	convertSingleQuotesToDouble(catalogContents);
    	
     	super.createCatalog(catalogContents, ctgBgz, /*isCreateTabixIndex=*/true);
	}

	private void createCatalogBgzipAndTabixIndex_forPipeInValue(File ctgBgz) throws IOException, InterruptedException {
		// NOTE: The positions here MUST match the refAssemblyFile 
    	List<String> catalogContents = Arrays.asList( 
    			// A String value and a String array contain the pipe "|" character - this should throw a warning
    			concat("1", "10018", "10018",  "{'pipeInValue':'A|B','pipeInArrayValues':['A|B','C','D|E|F'],'intArray':[1,2,3],'_landmark':'1','_minBP':10018,'_maxBP':10018,'_refAllele':'C','_altAlleles':['A'],'A':1,'B':'b','C':{'Nested':0.34},'D':true}")
    			);

    	convertSingleQuotesToDouble(catalogContents);
    	    	
    	super.createCatalog(catalogContents, ctgBgz, /*isCreateTabixIndex=*/true);
  	}

	private void createCatalogBgzipAndTabixIndex_nAlleles(File ctgBgz) throws IOException, InterruptedException {
		// NOTE: The positions here MUST match the refAssemblyFile 
		// In refAssemblyFile:  "1	177381	177450	GGTTTTTGAAGGTTAGAACTGGTGGTCTAGAGAATTCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"
		//                                                                             ^--^ 177416-177419
    	List<String> catalogContents = Arrays.asList( 
    			// Match input of "TCNN" against the refAssembly of "TCNN"  (exact match)
    			// Match input of "NNAA" against the refAssembly of "TCNN"  (swap - matching N's on both sides)
    			// Match input of "TCAA" against the refAssembly of "TCNN"  (match first two letters exactly and last two against N's in refAssembly)
    			// Match input of "NN"   against the refAssembly of "TC"    (input N's match real base-pairs in refAssembly)
    			// Error:         "TTNN" against the refAssembly of "TCNN"  (non-N alleles do not match)
    			// Error:         "TCNN" (but len=2) against ref of "TCNN"  (max position is only 2 away from min, but ref is 4 base-pairs long)
    			concat("1", "177416", "177419",  "{'_landmark':'1','_minBP':177416,'_maxBP':177419,'_refAllele':'TCNN','_altAlleles':['GGGG'],'A':1,'B':'b','C':{'Nested':0.34},'D':true}"),
    			concat("1", "177416", "177419",  "{'_landmark':'1','_minBP':177416,'_maxBP':177419,'_refAllele':'NNAA','_altAlleles':['GGGG'],'A':1,'B':'b','C':{'Nested':0.34},'D':true}"),
    			concat("1", "177416", "177419",  "{'_landmark':'1','_minBP':177416,'_maxBP':177419,'_refAllele':'TCAA','_altAlleles':['GGGG'],'A':1,'B':'b','C':{'Nested':0.34},'D':true}"),
    			concat("1", "177416", "177417",  "{'_landmark':'1','_minBP':177416,'_maxBP':177417,'_refAllele':'NN','_altAlleles':['GGGG'],'A':1,'B':'b','C':{'Nested':0.34},'D':true}"),
    			concat("1", "177416", "177419",  "{'_landmark':'1','_minBP':177416,'_maxBP':177419,'_refAllele':'TTNN','_altAlleles':['GGGG'],'A':1,'B':'b','C':{'Nested':0.34},'D':true}"),
    			concat("1", "177416", "177417",  "{'_landmark':'1','_minBP':177416,'_maxBP':177417,'_refAllele':'TCNN','_altAlleles':['GGGG'],'A':1,'B':'b','C':{'Nested':0.34},'D':true}")
    			);

    	convertSingleQuotesToDouble(catalogContents);
    	    	
    	super.createCatalog(catalogContents, ctgBgz, /*isCreateTabixIndex=*/true);
  	}

	
	private void createCatalog_structuralVariants(File ctgBgz) throws IOException, InterruptedException {
    	List<String> catalogContents = Files.readLines(new File("src/test/resources/vcfToTjson/structuralVariants.tjson"), Charset.defaultCharset()); 
    	super.createCatalog(catalogContents, ctgBgz, /*isCreateTabixIndex=*/true);

    	// Create columns.tsv
    	File colsTsv = new File(ctgBgz.getParentFile(), getCtgPrefix(ctgBgz) + ".columns.tsv"); 
    	String colsContent = FileUtils.readFileToString(new File("src/test/resources/vcfToTjson/structuralVariants.columns.tsv"));
    	FileUtils.write(colsTsv, colsContent);
    	
    	// Create the datasource.properties
    	File datasourceProps = new File(ctgBgz.getParentFile(), getCtgPrefix(ctgBgz) + ".datasource.properties");
    	String datasrcContents = FileUtils.readFileToString(new File("src/test/resources/vcfToTjson/structuralVariants.datasource.properties"));
    	FileUtils.write(datasourceProps, datasrcContents);
    	    	
    	createBlacklists(ctgBgz);
  	}

	private void createCatalog_singleVsArray(File ctgBgz) throws IOException, InterruptedException {
    	List<String> catalogContents = Files.readLines(new File("src/test/resources/singleVsArray.tjson"), Charset.defaultCharset()); 
    	super.createCatalog(catalogContents, ctgBgz, /*isCreateTabixIndex=*/true);

    	// Create columns.tsv
    	File colsTsv = new File(ctgBgz.getParentFile(), getCtgPrefix(ctgBgz) + ".columns.tsv"); 
    	String colsContent = FileUtils.readFileToString(new File("src/test/resources/singleVsArray.columns.tsv"));
    	FileUtils.write(colsTsv, colsContent);
    	
    	// Create the datasource.properties
    	File datasourceProps = new File(ctgBgz.getParentFile(), getCtgPrefix(ctgBgz) + ".datasource.properties");
    	String datasrcContents = FileUtils.readFileToString(new File("src/test/resources/singleVsArray.datasource.properties"));
    	FileUtils.write(datasourceProps, datasrcContents);
    	    	
    	createBlacklists(ctgBgz);
  	}

	
	
	private void convertSingleQuotesToDouble(List<String> lines) {
		for(int i=0; i < lines.size(); i++) {
			lines.set(i, lines.get(i).replace("'", "\""));
		}
	}

	
	private void createColumnsTsv_good(File ctgBgz) throws IOException {
		String ctgPrefix = getCtgPrefix(ctgBgz);
    	FileUtils.write(new File(mTempDir, ctgPrefix + ".columns.tsv"), 
    			concat("#ColumnName", "Type",    "Count", "Description",   "HumanReadableName") + EOL +
    			concat("_landmark",   "String",  "1",     "Provides a context for the genomic coordinates _minBP and _maxBP.  Most often this is the chromosome where the feature appears, but could be a known genetic marker, gene, or other item (BioR field)", "Chromosome or landmark (BioR)") + EOL +
       			concat("_minBP",      "Integer", "1",     "The minimum (starting) 1-based base pair position within the chromosome (BioR field)",   "Min base-pair position (BioR)") + EOL +
       			concat("_maxBP",      "Integer", "1",     "The maximum (ending) 1-based base pair position within the chromosome (BioR field)",   "Max base-pair position (BioR)") + EOL +
    			concat("_altAlleles", "String",  ".",     "One or more alternate alleles (non-reference) in a JSON array (basically a comma-separated list) (BioR field)",   "Alternate alleles (BioR)") + EOL +
    			concat("_refAllele",  "String",  "1",     "The reference allele (BioR field)",   "Reference allele (BioR)") + EOL +
    			// NOTE: NO Description for field 'A', which should flag a warning
    			concat("A",           "Integer", "1",     "Just a field",  "A Field") + EOL +
    			concat("B",           "String",  "1",     "Field B",       "B Field") + EOL +
    			concat("C.Nested",    "Float",   "1",     "A nested field","Nested JSON Value") + EOL +
    			concat("D",           "Boolean", "0",     "D - Boolean field", "Boolean Value") + EOL
    			);
	}

	
	private void createColumnsTsv(File ctgBgz) throws IOException {
		String ctgPrefix = getCtgPrefix(ctgBgz);
    	FileUtils.write(new File(mTempDir, ctgPrefix + ".columns.tsv"), 
    			concat("#ColumnName", "Type",    "Count", "Description",   "HumanReadableName") + EOL +
    			concat("_landmark",   "String",  "1",     "Provides a context for the genomic coordinates _minBP and _maxBP.  Most often this is the chromosome where the feature appears, but could be a known genetic marker, gene, or other item (BioR field)", "Chromosome or landmark (BioR)") + EOL +
       			concat("_minBP",      "Integer", "1",     "The minimum (starting) 1-based base pair position within the chromosome (BioR field)",   "Min base-pair position (BioR)") + EOL +
       			concat("_maxBP",      "Integer", "1",     "The maximum (ending) 1-based base pair position within the chromosome (BioR field)",   "Max base-pair position (BioR)") + EOL +
    			concat("_altAlleles", "String",  ".",     "One or more alternate alleles (non-reference) in a JSON array (basically a comma-separated list) (BioR field)",   "Alternate alleles (BioR)") + EOL +
    			concat("_refAllele",  "String",  "1",     "The reference allele (BioR field)",   "Reference allele (BioR)") + EOL +
    			// NOTE: NO Description for field 'A', which should flag a warning
    			concat("A",           "Integer", "1",     "",       "A") + EOL +
    			concat("B",           "String",  "1",     "Field B",       "B") + EOL +
    			concat("C.Nested",    "Float",   "1",     "A nested field","C_Nested") + EOL +
    			concat("D",           "Boolean", "0",     "D - Boolean field", "D_Bool") + EOL +
    			// Add more columns if we are testing the pipe character in values and arrays
    			( ctgPrefix.equals("pipeTest") ? 
    					concat("pipeInValue",       "String",  "1",  "Test a pipe in a single value", "Pipe in single value") + EOL + 
    					concat("pipeInArrayValues", "String",  ".",  "Test a pipe in a String array", "Pipe in array") + EOL +
    					concat("intArray",          "Integer", ".",  "Test a pipe in a String array", "Pipe in array") + EOL
    					: "")
    			);
		
	}


	private String getCtgPrefix(File ctgBgz) {
		return ctgBgz.getName().replace(".tsv.bgz", "");
	}

	private void createDatasourceProperties(File ctgBgz) throws IOException {
	   	FileUtils.write(new File(mTempDir, getCtgPrefix(ctgBgz) + ".datasource.properties"), 
	   			"### Datasource properties file for Catalog - ALL.wgs.phase1_release_v3.20101123.snps_indels_sv.sites_GRCh37.  Please fill in the descriptions to the keys below." + EOL +
	   			"## Short name that should be unique (or mostly unique except for fixes to existing catalog) across all catalogs. Ex: dbSNP_142_GRCh37p13" + EOL +
	   			"ShortUniqueName=test_1_GRCh37" + EOL +
	   			"## Description of catalog.  Ex: NCBI's dbSNP Variant Database" + EOL +
	   			"Description=Test catalog" + EOL +
	   			"## Source of data, without point release, etc.  Ex: dbSNP" + EOL +
	   			"Source=Test" + EOL +
	   			"## Type of data.  Ex: Variants" + EOL +
	   			"Dataset=Variants" + EOL +
	   			"## Version of the data source.  Ex: 142" + EOL +
	   			"Version=1" + EOL +
	   			"## The Genome build/assembly.  Ex: GRCh37.p13" + EOL +
	   			"Build=GRCh37" + EOL +
	   			"## The BioR catalog compatibility format.  Ex: 1.1.1" + EOL +
	   			"Format=1.1.1" + EOL +
	   			"## The release date of the data source from the provider (not the BioR build date).  Ex: 2018-07-10" + EOL +
	   			"DataSourceReleaseDate=2018-06-28" + EOL
	   			);		
	}


}

