package edu.mayo.bior.cli.cmd;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import net.sf.samtools.util.BlockCompressedInputStream;
import net.sf.samtools.util.BlockCompressedOutputStream;

import org.apache.commons.io.FileUtils;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

import com.google.common.io.Files;
import com.google.gson.JsonParser;

import edu.mayo.bior.cli.func.BaseFunctionalTest;
import edu.mayo.bior.cli.func.CommandOutput;
import edu.mayo.bior.pipeline.createcatalog.TjsonToCatalog;
import edu.mayo.cli.CommandLineApp;
import edu.mayo.cli.CommandPlugin;
import edu.mayo.pipes.util.test.PipeTestUtils;

/**
 * Created with IntelliJ IDEA.
 * User: m102417
 * Date: 7/25/13
 * Time: 11:35 AM
 * To change this template use File | Settings | File Templates.
 */
public class CreateCatalogCommandTest  extends BaseFunctionalTest {

    public static String json = "{'_type':'gene','_landmark':'1','_strand':'+','_minBP':10954,'_maxBP':11507,'gene':'LOC100506145','note':'Derived by automated computational analysis using gene prediction method: GNOMON. Supporting evidence includes similarity to: 1 Protein','pseudo':'','GeneID':'100506145'}".replaceAll("'", "\"");

    private final String EOL = "\n";

    /*
    class Output {
    	int exitCode = -1;
    	public String stderr;
    	public String stdout;
    }
    */
    
    
    @Rule
    public TemporaryFolder mTempFolder = new TemporaryFolder();


    
    @Test
    public void cmdLine_sort() throws IOException, InterruptedException {
    	String testDir   = "src/test/resources/createCatalog/";
    	String catalogIn = testDir + "catalog.unsorted.tsv";
    	String outputPath = mTempFolder.newFolder().getAbsolutePath() +  "/catalog.sorted.tsv.bgz";
    	
    	CommandOutput out = runCmdApp("-i", catalogIn, "-o", outputPath);
    	
    	assertEquals(out.stderr, "", out.stderr);
    	assertEquals(out.stdout, "", out.stdout);
    	assertTrue(0 == out.exit);
    	
    	verifyCatalogsSame(testDir + "catalog.sorted.tsv.bgz", outputPath);
    }
    
    @Test
    public void error_catalogOutputPathMustHaveTsvBgzExtension() throws IOException, InterruptedException {
    	String testDir   = "src/test/resources/createCatalog/";
    	String catalogIn = testDir + "catalog.unsorted.tsv";
    	String outputPath = mTempFolder.newFolder().getAbsolutePath() +  "/catalog.sorted";
    	
    	CommandOutput out = runCmdApp("-i", catalogIn, "-o", outputPath);
    	
    	// Verify that it failed because the output catalog name must end with ".tsv.bgz"
    	assertTrue(out.stderr, out.stderr.contains("Application error bior_create_catalog:\n: Error: Catalog file should end with extension .tsv.bgz"));
    	assertEquals(out.stdout, "", out.stdout);
    	assertTrue(1 == out.exit);
    }
    
    @Test
    public void jsonOnly() throws IOException {
		String inputStr = ( // With JSON-only, it should NOT sort
				"SomeTxt	{'_landmark':'41','_minBP':300,'_maxBP':300}" + EOL +	// default - numeric only
				"SomeTxt	{'_landmark':'MT','_minBP':300,'_maxBP':300}" + EOL +	// Human chrom
				"SomeTxt	{'_landmark':'1','_minBP':300,'_maxBP':300}" + EOL +	// Human chrom -- NOTE: Landmark is an integer, but should be a string
				"SomeTxt2	{'_landmark':'chry','_minBP':200,'_maxBP':200}"			// User-specified (1)
				).replaceAll("'", "\"");
		File catalogIn = writeStringToTempFile(inputStr);

		String outputPath = mTempFolder.newFolder().getAbsolutePath() +  "/catalog.sorted.tsv.bgz";
		CommandOutput output = runCmdApp("-i", catalogIn.getCanonicalPath(), "-o", outputPath, "-j", "-k");

    	assertNoErrorsOrNonZeroExitCode(output);

        // Use this to debug:
		//printFilesInParentDir(outputPath);

		String expectedStr = (
				"{'_landmark':'41','_minBP':300,'_maxBP':300}" + EOL +	// default - numeric only
				"{'_landmark':'MT','_minBP':300,'_maxBP':300}" + EOL +	// Human chrom
				"{'_landmark':'1','_minBP':300,'_maxBP':300}" + EOL +	// Human chrom
				"{'_landmark':'chry','_minBP':200,'_maxBP':200}" 		// User-specified (1)
				).replaceAll("'", "\"");
		File expected = writeStringToTempFile(expectedStr);

    	verifyCatalogsSame(expected.getCanonicalPath(), outputPath );
    }
    
    
    @Test
    public void specifyTempDir() throws IOException {
		String inputStr = ( // With JSON-only, it should NOT sort
				"SomeTxt	{'_landmark':'41','_minBP':300,'_maxBP':300}" + EOL +	// default - numeric only
				"SomeTxt	{'_landmark':'MT','_minBP':300,'_maxBP':300}" + EOL +	// Human chrom
				"SomeTxt	{'_landmark':'1','_minBP':300,'_maxBP':300}" + EOL +	// Human chrom -- NOTE: Landmark is an integer, but should be a string
				"SomeTxt2	{'_landmark':'chry','_minBP':200,'_maxBP':200}"			// User-specified (1)
				).replaceAll("'", "\"");
		File catalogIn = writeStringToTempFile(inputStr);

		File tempDirParent = Files.createTempDir();
		File tempDir = new File(tempDirParent, "myTempDir");
		tempDir.mkdirs();

		String outputPath = mTempFolder.newFolder().getAbsolutePath() +  "/catalog.sorted.tsv.bgz";
		String logFilePath = tempDir.getCanonicalPath() + "bior.log";
		CommandOutput output = runCmdApp("-i", catalogIn.getCanonicalPath(), "-o", outputPath, "-t", tempDir.getCanonicalPath(), "--logfile", logFilePath);

		// Warnings are now going to the logfile instead of stderr
		assertEquals(output.stderr, "", output.stderr);
		String logContents = FileUtils.readFileToString(new File(logFilePath));
		String expectedLine1 = ("Warning: _landmark was changed from MT to M.  Correcting offending JSON (only first original shown): {'_landmark':'MT','_minBP':300,'_maxBP':300}" + EOL).replaceAll("'", "\"");
		String expectedLine2 = ("Warning: _landmark was changed from chry to Y.  Correcting offending JSON (only first original shown): {'_landmark':'chry','_minBP':200,'_maxBP':200}" + EOL).replaceAll("'", "\"");
		assertTrue(logContents.contains(expectedLine1));
		assertTrue(logContents.contains(expectedLine2));
        assertEquals(output.exit,   0);
		
		// Use this to debug
    	//printFilesInParentDir(outputPath);

		String expectedStr = (
				"1	300	300	{'_landmark':'1','_minBP':300,'_maxBP':300}" + EOL +	// Human chrom
				"Y	200	200	{'_landmark':'Y','_minBP':200,'_maxBP':200}" + EOL +	// User-specified (1)
				"M	300	300	{'_landmark':'M','_minBP':300,'_maxBP':300}" + EOL +	// Human chrom
				"41	300	300	{'_landmark':'41','_minBP':300,'_maxBP':300}"			// default - numeric only
				).replaceAll("'", "\"");
		File expected = writeStringToTempFile(expectedStr);

    	verifyCatalogsSame(expected.getCanonicalPath(), outputPath );
    }
    

    
    /** By default it should sort */
    @Test
    public void cmdApp_sort() throws IOException, InterruptedException {
    	String catalogIn = "src/test/resources/createCatalog/catalog2.unsorted.tsv";
    	File tempFolder = mTempFolder.newFolder();
    	String outputPath = tempFolder.getAbsolutePath() +  "/catalog.sorted.tsv.bgz";
		String logFilePath = tempFolder.getCanonicalPath() + "bior.log";
    	CommandOutput output = runCmdApp("-i", catalogIn, "-o", outputPath, "--logfile", logFilePath);

		// Warnings are now going to the logfile instead of stderr
		String logContents = FileUtils.readFileToString(new File(logFilePath));
		String expectedLine1 = ("Warning: _minBP was not an integer.  Correcting offending JSON (only first original shown): {'_landmark':'1','_minBP':'120','_maxBP':'120'}" + EOL).replaceAll("'", "\"");
		String expectedLine2 = ("Warning: _maxBP was not an integer.  Correcting offending JSON (only first original shown): {'_landmark':'1','_minBP':'120','_maxBP':'120'}" + EOL).replaceAll("'", "\"");
		assertTrue(logContents.contains(expectedLine1));
		assertTrue(logContents.contains(expectedLine2));

		// No warnings to stderr or stdout
    	assertEquals(output.stderr, "", output.stderr);
        assertEquals(output.stdout, "", output.stdout);
        assertEquals(output.exit,   0);
		
        // Use this to debug:
		//printFilesInParentDir(outputPath);
				
		String expected = "src/test/resources/createCatalog/catalog2.sorted.tsv.bgz";
    	verifyCatalogsSame(expected, outputPath );
    }

    /** Provide a user-specified chromosome file, but don't sort the lines at all
     *  Also, don't change any chromosomes
     * @throws IOException
     */
	@Test
	public void cmdLine_noSort_noChromChg() throws IOException {
		String inputStr = ( // Specified in the exact opposite order that they SHOULD appear in:
				"SomeTxt	{'_landmark':'41','_minBP':300,'_maxBP':300}" + EOL +	// default - numeric only
				"SomeTxt	{'_landmark':'MT','_minBP':300,'_maxBP':300}" + EOL +	// Human chrom
				"SomeTxt	{'_landmark':'1','_minBP':300,'_maxBP':300}" + EOL +		// Human chrom -- NOTE: Landmark is an integer, but should be a string
				"SomeTxt2	{'_landmark':'chry','_minBP':200,'_maxBP':200}"			// User-specified (1)
				).replaceAll("'", "\"");
		File catalogIn = writeStringToTempFile(inputStr);
		
		// User-specified chromosome sort order file
		File userSpecifiedSortOrder = writeStringToTempFile("chrY\nCHRZ");
		
		String outputPath = mTempFolder.newFolder().getAbsolutePath() +  "/catalog.sorted.tsv.bgz";
		
		CommandOutput output = runCmdApp("-i", catalogIn.getCanonicalPath(), "-o", outputPath, "-f", userSpecifiedSortOrder.getCanonicalPath(), "-k", "-n");

    	assertNoErrorsOrNonZeroExitCode(output);

        // Use this to debug:
		//printFilesInParentDir(outputPath);

		String expectedStr = (
				"41	300	300	{'_landmark':'41','_minBP':300,'_maxBP':300}" + EOL +		// default - numeric only
				"MT	300	300	{'_landmark':'MT','_minBP':300,'_maxBP':300}" + EOL +		// Human chrom
				"1	300	300	{'_landmark':'1','_minBP':300,'_maxBP':300}" + EOL +		// Human chrom
				"chry	200	200	{'_landmark':'chry','_minBP':200,'_maxBP':200}" 		// User-specified (1)
				).replaceAll("'", "\"");
		File expected = writeStringToTempFile(expectedStr);

    	verifyCatalogsSame(expected.getCanonicalPath(), outputPath );
    }

    /** JSON column is not the last column - have to use the flag to point to it  */
	@Test
	public void jsonNotLastCol() throws IOException {
		String inputStr = ( // Specified in the exact opposite order that they SHOULD appear in:
				"SomeTxt	{'_landmark':'41','_minBP':300,'_maxBP':300}	colx	coly	colz" + EOL +	// default - numeric only
				"SomeTxt	{'_landmark':'MT','_minBP':300,'_maxBP':300}	colx	coly	colz" + EOL +	// Human chrom
				"SomeTxt	{'_landmark':'1','_minBP':300,'_maxBP':300}	colx	coly	colz" + EOL +		// Human chrom -- NOTE: Landmark is an integer, but should be a string
				"SomeTxt2	{'_landmark':'chry','_minBP':200,'_maxBP':200}	colx	coly	colz"			// User-specified (1)
				).replaceAll("'", "\"");
		File catalogIn = writeStringToTempFile(inputStr);
		
		String outputPath = mTempFolder.newFolder().getAbsolutePath() +  "/catalog.sorted.tsv.bgz";
    	CommandOutput output = runCmdApp("-i", catalogIn.getCanonicalPath(), "-o", outputPath, "-k", "-c", "2");

    	assertNoErrorsOrNonZeroExitCode(output);

        // Use this to debug:
		//printFilesInParentDir(outputPath);

		String expectedStr = (
				"1	300	300	{'_landmark':'1','_minBP':300,'_maxBP':300}" + EOL +		// Human chrom
				"chry	200	200	{'_landmark':'chry','_minBP':200,'_maxBP':200}" + EOL +	// User-specified (1)
				"MT	300	300	{'_landmark':'MT','_minBP':300,'_maxBP':300}" + EOL +		// Human chrom
				"41	300	300	{'_landmark':'41','_minBP':300,'_maxBP':300}"				// default - numeric only
				).replaceAll("'", "\"");
		File expected = writeStringToTempFile(expectedStr);

    	verifyCatalogsSame(expected.getCanonicalPath(), outputPath );

    	
    	//---------------------
    	// Now try it with a negative column
		outputPath = mTempFolder.newFolder().getAbsolutePath() +  "/catalog.2.sorted.tsv.bgz";
    	output = runCmdApp("-i", catalogIn.getCanonicalPath(), "-o", outputPath, "-k", "-c", "-4");
    	assertNoErrorsOrNonZeroExitCode(output);
    	printFilesInParentDir(outputPath);
    	verifyCatalogsSame(expected.getCanonicalPath(), outputPath );

    	//---------------------
    	// Now try it with a column that is out of range - should produce an error in the stderr output,
    	// but not actually kill the command
    	outputPath = mTempFolder.newFolder().getAbsolutePath() +  "/catalog.3.sorted.tsv.bgz";
    	output = runCmdApp("-i", catalogIn.getCanonicalPath(), "-o", outputPath, "-k", "-c", "10");
    	assertTrue(output.stderr, output.stderr.contains("Application error bior_create_catalog")  &&  output.stderr.contains("JSON column (9) is out of range on row:"));
    	assertEquals(output.stderr, 1, output.exit);
	}

	
	/**   chrom with "chr1", "chr2", etc  (it should strip that off before comparing, and when putting into first col) 
	 * @throws IOException */
	@Test
	public void chrPrefixStrippedOff() throws IOException {
		String inputStr = (
			"SomeText	{'_landmark':'chr1','_minBP':100,'_maxBP':100,'_altAlleles':['A'],'INFO':{'key':'val'}}\n" +
			"SomeText2	{'_landmark':'chr2','_minBP':200,'_maxBP':200,'_altAlleles':['A','C','G']}"
			).replaceAll("'", "\"");
		File catalogIn = writeStringToTempFile(inputStr);
		
    	File tempFolder = mTempFolder.newFolder();
		String outputPath = tempFolder.getAbsolutePath() +  "/catalog.sorted.tsv.bgz";
		String logFilePath = tempFolder.getCanonicalPath() + "bior.log";
    	CommandOutput output = runCmdApp("-i", catalogIn.getCanonicalPath(), "-o", outputPath, "--logfile", logFilePath);

		// Warnings are now going to the logfile instead of stderr
		String logContents = FileUtils.readFileToString(new File(logFilePath));
		String expectedLine1 = ("Warning: _landmark was changed from chr1 to 1.  Correcting offending JSON (only first original shown): {'_landmark':'chr1','_minBP':100,'_maxBP':100,'_altAlleles':['A'],'INFO':{'key':'val'}}\n").replaceAll("'", "\"");
		String expectedLine2 = ("Warning: _landmark was changed from chr2 to 2.  Correcting offending JSON (only first original shown): {'_landmark':'chr2','_minBP':200,'_maxBP':200,'_altAlleles':['A','C','G']}\n").replaceAll("'", "\"");
		assertTrue(logContents.contains(expectedLine1));
		assertTrue(logContents.contains(expectedLine2));

		// No warnings to stderr or stdout
    	assertEquals(output.stderr, "", output.stderr);
        assertEquals(output.stdout, "", output.stdout);
        assertEquals(output.exit,   0);

        // Use this to debug:
		//printFilesInParentDir(outputPath);

		String expectedStr = (
				"1	100	100	{'_landmark':'1','_minBP':100,'_maxBP':100,'_altAlleles':['A'],'INFO':{'key':'val'}}\n" +
				"2	200	200	{'_landmark':'2','_minBP':200,'_maxBP':200,'_altAlleles':['A','C','G']}"
				).replaceAll("'", "\"");
		File expected = writeStringToTempFile(expectedStr);

    	verifyCatalogsSame(expected.getCanonicalPath(), outputPath );
	}
	

	
	/**  Test sort of chromosomes (and stripping the chromosome) when user specifies a chromosome sort order
	 *   NOTE: should NOT be case sensitive on chromosome
	 *   NOTE: Should match capitalization on known human chromosomes ("x" -> "X")
	 *   NOTE: Preference: user-specified, then human list, then default sort order
	 * @throws IOException */
	@Test
	public void userSpecifiedSortOrder_chrPrefixStrippedOff() throws IOException {
		String inputStr = ( // Specified in the exact opposite order that they SHOULD appear in:
				"someTxt	{'_landmark':'UNKNOWN','_minBP':0,'_maxBP':0}" + EOL +			// UNKNOWN - this should be last
				"SomeTxt	{'_landmark':'DD_alpha','_minBP':300,'_maxBP':300}" + EOL +		// default - alpha only
				"SomeTxt	{'_landmark':'41_misc','_minBP':300,'_maxBP':300}" + EOL +		// default - alphanumeric with integer out front
				"SomeTxt	{'_landmark':'Un_gl01','_minBP':300,'_maxBP':300}" +	EOL +	// default - Unknown
				"SomeTxt	{'_landmark':'41_unknown','_minBP':300,'_maxBP':300}" +	EOL +	// default - integer and "unknown"
				"SomeTxt	{'_landmark':'41_random','_minBP':300,'_maxBP':300}" +	EOL +	// default - integer and "random"
				"SomeTxt	{'_landmark':'KT','_minBP':300,'_maxBP':300}" +	EOL +			// default - alpha only
				"SomeTxt	{'_landmark':'401','_minBP':300,'_maxBP':300}" + EOL +			// default - numeric only, but larger number
				"SomeTxt	{'_landmark':'41','_minBP':300,'_maxBP':300}" + EOL +			// default - numeric only
				"SomeTxt	{'_landmark':'M','_minBP':300,'_maxBP':300}" + EOL +			// Human chrom
				"SomeTxt	{'_landmark':'XY','_minBP':300,'_maxBP':300}" + EOL +			// Human chrom
				"SomeTxt	{'_landmark':'x','_minBP':300,'_maxBP':300}" + EOL +			// Human chrom - should be capitalized to "X"
				"SomeTxt	{'_landmark':'23','_minBP':200,'_maxBP':200}" + EOL +			// Human chrom - should be capitalized to "X"
				"SomeTxt	{'_landmark':'19','_minBP':301,'_maxBP':302}" +	EOL +			// Human chrom
				"SomeTxt	{'_landmark':'19','_minBP':301,'_maxBP':302,'b':2}" +	EOL +	// Human chrom - exact same line as above
				"SomeTxt	{'_landmark':'19','_minBP':301,'_maxBP':302,'a':1}" +	EOL +	// Human chrom - exact same line as above
				"SomeTxt	{'_landmark':'19','_minBP':'300','_maxBP':'305'}" +	EOL +		// Human chrom -- NOTE: Both quoted _minBP and _maxBP
				"SomeTxt	{'_landmark':'19','_minBP':300,'_maxBP':'300'}" +	EOL +		// Human chrom -- NOTE: Quoted _maxBP, but previous one already noted, so this should not generate a warning
				"SomeTxt	{'_landmark':'2','_minBP':300,'_maxBP':300}" + EOL +			// Human chrom
				"SomeTxt	{'_landmark':1,'_minBP':300,'_maxBP':300}" + EOL +				// Human chrom -- NOTE: Landmark is an integer, but should be a string
				"SomeText	{'_landmark':'ChrZ','_minBP':100,'_maxBP':100}" +	EOL +		// User-specified (2)
				"SomeTxt2	{'_landmark':'chry','_minBP':200,'_maxBP':200}" + EOL +			// User-specified (1)
				"#Txt	Json" + EOL +														// Column header row - removed
				"##Some metadata 2" + EOL +													// Metadata - removed
				"##Some metadata 1" + EOL +													// Metadata - removed
				" \n\n\n"  																	// NOTE: Blank lines at the end should be ignored!
				).replaceAll("'", "\"");
		File catalogIn = writeStringToTempFile(inputStr);
		
		// User-specified chromosome sort order file
		File userSpecifiedSortOrder = writeStringToTempFile("chrY\nCHRZ");
		
    	File tempFolder = mTempFolder.newFolder();
		String outputPath = tempFolder.getAbsolutePath() +  "/catalog.sorted.tsv.bgz";
		String logFilePath = tempFolder.getCanonicalPath() + "bior.log";		
    	CommandOutput output = runCmdApp("-i", catalogIn.getCanonicalPath(), "-o", outputPath, "-f", userSpecifiedSortOrder.getCanonicalPath(), "--logfile", logFilePath);
    	
		// Warnings are now going to the logfile instead of stderr
		String logContents = FileUtils.readFileToString(new File(logFilePath));
		String expectedLine1 = ("Warning: _landmark was changed from x to X.  Correcting offending JSON (only first original shown): {'_landmark':'x','_minBP':300,'_maxBP':300}" + EOL).replaceAll("'", "\"");
		String expectedLine2 = ("Warning: _landmark was changed from 23 to X.  Correcting offending JSON (only first original shown): {'_landmark':'23','_minBP':200,'_maxBP':200}" + EOL).replaceAll("'", "\"");
		String expectedLine3 = ("Warning: _minBP was not an integer.  Correcting offending JSON (only first original shown): {'_landmark':'19','_minBP':'300','_maxBP':'305'}" + EOL).replaceAll("'", "\"");
		String expectedLine4 = ("Warning: _maxBP was not an integer.  Correcting offending JSON (only first original shown): {'_landmark':'19','_minBP':'300','_maxBP':'305'}" + EOL).replaceAll("'", "\"");
		String expectedLine5 = ("Warning: _landmark was not a String.  Correcting offending JSON (only first original shown): {'_landmark':1,'_minBP':300,'_maxBP':300}" + EOL).replaceAll("'", "\"");
		String expectedLine6 = ("Warning: _landmark was changed from chry to Y.  Correcting offending JSON (only first original shown): {'_landmark':'chry','_minBP':200,'_maxBP':200}" + EOL).replaceAll("'", "\"");
		assertTrue(logContents.contains(expectedLine1));
		assertTrue(logContents.contains(expectedLine2));
		assertTrue(logContents.contains(expectedLine3));
		assertTrue(logContents.contains(expectedLine4));
		assertTrue(logContents.contains(expectedLine5));
		assertTrue(logContents.contains(expectedLine6));

		// No warnings to stderr or stdout
    	assertEquals(output.stderr, "", output.stderr);
        assertEquals(output.stdout, "", output.stdout);
        assertEquals(output.exit,   0);

        // Use this to debug:
		//printFilesInParentDir(outputPath);

		String expectedStr = (
				"Y	200	200	{'_landmark':'Y','_minBP':200,'_maxBP':200}" + EOL +		// User-specified (1)
				"ChrZ	100	100	{'_landmark':'ChrZ','_minBP':100,'_maxBP':100}" + EOL + // User-specified (2)
				"1	300	300	{'_landmark':'1','_minBP':300,'_maxBP':300}" + EOL +		// Human chrom
				"2	300	300	{'_landmark':'2','_minBP':300,'_maxBP':300}" + EOL +		// Human chrom
				"19	300	300	{'_landmark':'19','_minBP':300,'_maxBP':300}" + EOL +		// Human chrom
				"19	300	305	{'_landmark':'19','_minBP':300,'_maxBP':305}" +	EOL +		// Human chrom
				"19	301	302	{'_landmark':'19','_minBP':301,'_maxBP':302,'a':1}" + EOL +	// Human chrom - exact same line as above
				"19	301	302	{'_landmark':'19','_minBP':301,'_maxBP':302,'b':2}" + EOL +	// Human chrom - exact same line as above
				"19	301	302	{'_landmark':'19','_minBP':301,'_maxBP':302}" +	EOL +		// Human chrom
				"X	200	200	{'_landmark':'X','_minBP':200,'_maxBP':200}" + EOL +		// Human chrom - 23 converted to "X"
				"X	300	300	{'_landmark':'X','_minBP':300,'_maxBP':300}" + EOL +		// Human chrom - should be capitalized to "X"
				"XY	300	300	{'_landmark':'XY','_minBP':300,'_maxBP':300}" + EOL +		// Human chrom
				"M	300	300	{'_landmark':'M','_minBP':300,'_maxBP':300}" + EOL +		// Human chrom
				"41	300	300	{'_landmark':'41','_minBP':300,'_maxBP':300}" +	EOL +		// default - numeric only
				"401	300	300	{'_landmark':'401','_minBP':300,'_maxBP':300}" +	EOL +	// default - numeric only, but larger number
				"KT	300	300	{'_landmark':'KT','_minBP':300,'_maxBP':300}" +	EOL +		// default - alpha only
				"41_random	300	300	{'_landmark':'41_random','_minBP':300,'_maxBP':300}" +	EOL +	// default - integer and "random"
				"41_unknown	300	300	{'_landmark':'41_unknown','_minBP':300,'_maxBP':300}" + EOL +	// default - alphanumeric with integer and "unknown"
				"Un_gl01	300	300	{'_landmark':'Un_gl01','_minBP':300,'_maxBP':300}" +	EOL +	// default - Unknown
				"41_misc	300	300	{'_landmark':'41_misc','_minBP':300,'_maxBP':300}" + EOL +		// default - alphanumeric with integer out front
				"DD_alpha	300	300	{'_landmark':'DD_alpha','_minBP':300,'_maxBP':300}" + EOL +		// default - alpha only
				"UNKNOWN	0	0	{'_landmark':'UNKNOWN','_minBP':0,'_maxBP':0}"		// UNKNOWN - this should be last
				).replaceAll("'", "\"");
		File expected = writeStringToTempFile(expectedStr);

    	verifyCatalogsSame(expected.getCanonicalPath(), outputPath );
	}
	
	
	/** Similar minBPs on same chrom - same prefix if sorting by string.
	 * May want to try splitting across multiple files to see if it sorts correctly (one file per two lines)
	 * @throws IOException */
	@Test
	public void similarMinbps() throws IOException {
		String inputStr = (
			"{'_landmark':'1','_minBP':10084329,'_maxBP':10084329}\n" +
			"{'_landmark':'1','_minBP':100,'_maxBP':100}\n" +
			"{'_landmark':'1','_minBP':1008,'_maxBP':1008}\n" +
			"{'_landmark':'1','_minBP':1008432,'_maxBP':1008432}\n" +
			"{'_landmark':'1','_minBP':10084,'_maxBP':10084}\n" +
			"{'_landmark':'1','_minBP':100843,'_maxBP':100843}\n" +
			"{'_landmark':'1','_minBP':10,'_maxBP':10}\n" +
			"{'_landmark':'1','_minBP':1008431,'_maxBP':1008431}\n" +
			"{'_landmark':'1','_minBP':100843291,'_maxBP':100843291}\n"
			).replaceAll("'", "\"");
		File catalogIn = writeStringToTempFile(inputStr);
		
		String outputPath = mTempFolder.newFolder().getAbsolutePath() +  "/catalog.sorted.tsv.bgz";
    	CommandOutput output = runCmdApp("-i", catalogIn.getCanonicalPath(), "-o", outputPath);

    	assertNoErrorsOrNonZeroExitCode(output);

        // Use this to debug:
		//printFilesInParentDir(outputPath);

		String expectedStr = (
				"1	10	10	{'_landmark':'1','_minBP':10,'_maxBP':10}\n" +
				"1	100	100	{'_landmark':'1','_minBP':100,'_maxBP':100}\n" +
				"1	1008	1008	{'_landmark':'1','_minBP':1008,'_maxBP':1008}\n" +
				"1	10084	10084	{'_landmark':'1','_minBP':10084,'_maxBP':10084}\n" +
				"1	100843	100843	{'_landmark':'1','_minBP':100843,'_maxBP':100843}\n" +
				"1	1008431	1008431	{'_landmark':'1','_minBP':1008431,'_maxBP':1008431}\n" +
				"1	1008432	1008432	{'_landmark':'1','_minBP':1008432,'_maxBP':1008432}\n" +
				"1	10084329	10084329	{'_landmark':'1','_minBP':10084329,'_maxBP':10084329}\n" +
				"1	100843291	100843291	{'_landmark':'1','_minBP':100843291,'_maxBP':100843291}\n"
				).replaceAll("'", "\"");
		File expected = writeStringToTempFile(expectedStr);

    	verifyCatalogsSame(expected.getCanonicalPath(), outputPath );
    	
    	
	}


    /** Should get an error if _maxBP != (_minBP + _refAllele.length() - 1) */
    @Test
    public void error_badRangeVsRefAllele() throws IOException, InterruptedException {
    	// NOTE: since _refAllele is 3 chrs, _maxBP should be 102
    	String inputStr = "{'_landmark':'1','_minBP':100,'_maxBP':101,'_refAllele':'ACG'}".replaceAll("'", "\"");
		File catalogIn = writeStringToTempFile(inputStr);
    	File tempFolder = mTempFolder.newFolder();
    	String outputPath = tempFolder.getAbsolutePath() +  "/catalog.sorted.tsv.bgz";
		String logFilePath = tempFolder.getCanonicalPath() + "bior.log";		
    	CommandOutput output = runCmdApp("-i", catalogIn.getCanonicalPath(), "-o", outputPath, "--logfile", logFilePath);

		// Warnings are now going to the logfile instead of stderr
		String logContents = FileUtils.readFileToString(new File(logFilePath));
		String expectedLine1 = "Application error bior_create_catalog:";
		String expectedLine2 = (": Error:  length of the refAllele does not equal (max-min+1): {'_landmark':'1','_minBP':100,'_maxBP':101,'_refAllele':'ACG'}").replaceAll("'", "\"");
		
		// Warnings are now going to the logfile instead of stderr, but since this particular one causes an exception in stderr...
		assertTrue(output.stderr, output.stderr.contains(expectedLine1));
		assertTrue(output.stderr, output.stderr.contains(expectedLine2));
		assertTrue(logContents.contains(expectedLine2));
        assertEquals(output.stdout, "", output.stdout);
        assertEquals(output.exit, 1);
    }

    @Test
    public void cmdLine_createCatalog_chrX_handling() throws IOException, InterruptedException {
    	String tjsonTSV = "src/test/resources/createCatalog/tjson_chrX_test.tsv";
    	File tempFolder = mTempFolder.newFolder();
		String logFilePath = tempFolder.getCanonicalPath() + "bior.log";		
    	String outputPath = tempFolder.getAbsolutePath() +  "/chrXTest.sorted.tsv.bgz";
    	CommandOutput output = runCmdApp("-i", tjsonTSV, "-o", outputPath, "-c", "7", "--logfile", logFilePath);

		// Warnings are now going to the logfile instead of stderr
		String logContents = FileUtils.readFileToString(new File(logFilePath));
		String expectedLine1 = "_landmark was changed from 23 to X";
		// Warnings are now going to the logfile instead of stderr
		assertTrue(logContents.contains(expectedLine1));

		Assert.assertEquals("",  output.stderr);
    	Assert.assertEquals("", output.stdout);
    	Assert.assertEquals(0,  output.exit);

    	
    	List<String> actual = readFile(outputPath);
    	List<String> expected = Arrays.asList(
    			// chrom 15 should be first.  NOTE: _maxBP is calculated
    			concat("15", "33333", "33333", "{'chr':'15','pos':33333,'gene':'gene333','ref':'C','alts':['G'],'rsid':'rs3333','_landmark':'15','_minBP':33333,'_refAllele':'C','_id':'rs3333','_maxBP':33333}".replaceAll("'", "\"")),
    			// chrom '23' converted to 'X' in both the tabix first column and the _landmark field.  Chrom correctly converted and then added before the standard 'X' chrom at position 22222
    			concat("X",  "11111", "11112", "{'chr':'23','pos':11111,'gene':'gene111','ref':'TT','alts':['A','C'],'rsid':'rs1111','_landmark':'X','_minBP':11111,'_refAllele':'TT','_id':'rs1111','_maxBP':11112}".replaceAll("'", "\"")),
    			concat("X",  "22222", "22222", "{'chr':'X','pos':22222,'gene':'gene222','ref':'G','alts':['A','C'],'rsid':'rs2222','_landmark':'X','_minBP':22222,'_refAllele':'G','_id':'rs2222','_maxBP':22222}".replaceAll("'", "\""))
    			);
    	PipeTestUtils.assertListsEqual(expected, actual);
    }
    
    
	/** Specify a build from which to sort chromosomes 
	 * @throws IOException */
	@Test
	public void validBuild() throws IOException {
		String inputStr = (
			"{'_minBP':100,'_maxBP':100,'_landmark':'Un_gl000211'}\n" +
			"{'_minBP':100,'_maxBP':100,'_landmark':'11_gl000202_random'}\n" + 
			"{'_minBP':100,'_maxBP':100,'_landmark':'M'}\n" + 
			"{'_minBP':100,'_maxBP':100,'_landmark':'1'}\n" + 
			"{'_minBP':100,'_maxBP':100,'_landmark':'1_KI270706v1_random'}"  // GRCH38, just after M
			).replaceAll("'", "\"");
		File catalogIn = writeStringToTempFile(inputStr);
		
		
		// GRCh37 - ok
		String outputPath37 = mTempFolder.newFolder().getAbsolutePath() +  "/catalog.37.sorted.tsv.bgz";
    	CommandOutput output37 = runCmdApp("-i", catalogIn.getCanonicalPath(), "-o", outputPath37, "-b", "GRCh37");
    	assertNoErrorsOrNonZeroExitCode(output37);
        // Use this to debug:
    	printFilesInParentDir(outputPath37);
		String expectedStrGrch37 = (
				"1	100	100	{'_minBP':100,'_maxBP':100,'_landmark':'1'}\n" + 
				"M	100	100	{'_minBP':100,'_maxBP':100,'_landmark':'M'}\n" + 
				"11_gl000202_random	100	100	{'_minBP':100,'_maxBP':100,'_landmark':'11_gl000202_random'}\n" + 
				"Un_gl000211	100	100	{'_minBP':100,'_maxBP':100,'_landmark':'Un_gl000211'}\n" +
				"1_KI270706v1_random	100	100	{'_minBP':100,'_maxBP':100,'_landmark':'1_KI270706v1_random'}"  // GRCH38, just after M
				).replaceAll("'", "\"");
		File expected37 = writeStringToTempFile(expectedStrGrch37);
    	verifyCatalogsSame(expected37.getCanonicalPath(), outputPath37 );
    	
    	
		// grch37 - ok
		String outputPath37Lower = mTempFolder.newFolder().getAbsolutePath() +  "/catalog.37lower.sorted.tsv.bgz";
    	CommandOutput output37Lower = runCmdApp("-i", catalogIn.getCanonicalPath(), "-o", outputPath37Lower, "-b", "grch37");
    	assertNoErrorsOrNonZeroExitCode(output37Lower);
        // Use this to debug:
    	printFilesInParentDir(outputPath37Lower);
    	verifyCatalogsSame(expected37.getCanonicalPath(), outputPath37Lower );

    	
    	// GRCh38 - ok
		String outputPath38 = mTempFolder.newFolder().getAbsolutePath() +  "/catalog.38.sorted.tsv.bgz";
    	CommandOutput output38 = runCmdApp("-i", catalogIn.getCanonicalPath(), "-o", outputPath38, "-b", "GRCh38");
    	assertNoErrorsOrNonZeroExitCode(output38);
        // Use this to debug:
    	printFilesInParentDir(outputPath38);
		String expectedStrGrch38 = (
				"1	100	100	{'_minBP':100,'_maxBP':100,'_landmark':'1'}\n" + 
				"M	100	100	{'_minBP':100,'_maxBP':100,'_landmark':'M'}\n" + 
				"1_KI270706v1_random	100	100	{'_minBP':100,'_maxBP':100,'_landmark':'1_KI270706v1_random'}\n" + // GRCH38, just after M
				"11_gl000202_random	100	100	{'_minBP':100,'_maxBP':100,'_landmark':'11_gl000202_random'}\n" + 
				"Un_gl000211	100	100	{'_minBP':100,'_maxBP':100,'_landmark':'Un_gl000211'}"
				).replaceAll("'", "\"");
		File expected38 = writeStringToTempFile(expectedStrGrch38);
    	verifyCatalogsSame(expected38.getCanonicalPath(), outputPath38 );

    	
    	// GRCh39 - invalid option (as of right now)
		String outputPath39 = mTempFolder.newFolder().getAbsolutePath() +  "/catalog.39.sorted.tsv.bgz";
    	CommandOutput outputInvalid = runCmdApp("-i", catalogIn.getCanonicalPath(), "-o", outputPath39, "-b", "GRCh39");
    	assertEquals(outputInvalid.stderr, 1, outputInvalid.exit);
    	String expectedErrorMsg = "Error in bior_create_catalog\n"
    			+ "Build Assembly not recognized: GRCh39\n"
    			+ "Valid build assemblies:\n"
    			+ "  GRCh37\n"
    			+ "  GRCh38\n\n\n";
    	assertEquals(outputInvalid.stderr, expectedErrorMsg, outputInvalid.stderr);
    	assertEquals(outputInvalid.stderr, "", outputInvalid.stdout);

	}
	
	
    @Test
    /* Test row counts over Integer.MAX_VALUE */
    public void testLargeRowCount() throws IOException, InterruptedException {
    	String catalogIn = "src/test/resources/createCatalog/catalog2.unsorted.tsv";
    	File tempFolder = mTempFolder.newFolder();
    	String outputPath = tempFolder.getAbsolutePath() +  "/catalog.sorted.tsv.bgz";
		String logFilePath = tempFolder.getCanonicalPath() + "bior.log";
		
		// Set the line count, and reporting count so we can check that large row counts are handled
		TjsonToCatalog.LINE_TO_START_AT = Integer.MAX_VALUE - 3;
		TjsonToCatalog.REPORT_EVERY_X_LINES = 1;
		
    	CommandOutput output = runCmdApp("-i", catalogIn, "-o", outputPath, "--logfile", logFilePath);

		// Warnings are now going to the logfile instead of stderr
		String logContents = FileUtils.readFileToString(new File(logFilePath));
		// Integer.MAX_VALUE = 2,147,483,647  -- these next values should NOT be negative!!!
		final String EXPECTED1 = "Num lines written to catalog format: 2147483645";
		final String EXPECTED2 = "Num lines written to catalog format: 2147483660";
		assertTrue(logContents.contains(EXPECTED1));
		assertTrue(logContents.contains(EXPECTED2));

		// No warnings to stderr or stdout
    	assertEquals(output.stderr, "", output.stderr);
        assertEquals(output.stdout, "", output.stdout);
        assertEquals(output.exit,   0);
		
        // Use this to debug:
		//printFilesInParentDir(outputPath);
    }

	

	// TODO:  To test:
	//   catalog with JUST the JSON
	//   normal chroms, but min, max at same coords:  "1 100 100" vs "1 100 105" vs "1 101 103"  vs "1 105 105" vs "1 105 110", - put input out of order, but have them finish in this order
	//   human - default sort (use a file within the project)
	//   human - specify a sort file (not the same as default sort)
	//   non-human chrom - default sort
	//   non-human chrom - specify a sort file
	//   For the following, throw an error, and say that the user should check all other values:
	//   	Error if _maxBP != (_minBP + _refAllele.length - 1)  (but only if _minBP and _refAllele are given)
	//   	Error if _minBP or _maxBP are NOT integers
	//   	Error if _landmark is not a string
	//   	Error if _altAlleles is not a valid JSON array
	//   Alt alleles contains a forward slash '/' - which should NOT get transformed into '\/'
	//	 Input file with both ## and # headers - ok with NO sort
	//	 Input file with both ## and # headers - ok WITH sort (should stay at the top, and order should NOT change)
	//   Input file with blanks at end (these should NOT appear in the final catalog as Tabix cannot index these lines!)
	//   Input file that is a bgzip file  (with .bgz extension)
	//   Input file that is gzip file (with .gz extension)
	//   Input file that is a plain-text file
	//   JSON column is last column
	//   JSON column is NOT the last column (and specify positive column)
	//   JSON column is NOT the last column (and specify negative column)
	//   JSON column specified is out of range
	//   JSON-only catalog
	//==============================================================

	
	
	//=======================================================================================================
	
	
	private void assertNoErrorsOrNonZeroExitCode(CommandOutput output) {
    	if(output.exit != 0  || output.stderr.length() > 0  )
			fail(output.stderr);

    	//assertEquals("Warning: _minBP is not an integer\nWarning: _maxBP is not an integer", err);
    	assertEquals("", output.stderr);
    	assertEquals("", output.stdout);
	}
	
	private File writeStringToTempFile(String str) throws IOException {
		File tempFile = File.createTempFile("myInput", ".tmp");
		writeFile(str, tempFile.getCanonicalPath());
		return tempFile;
	}
	
    private CommandOutput runCmdApp(String... cmdArgs) throws UnsupportedEncodingException {
        String MOCK_SCRIPT_NAME = "bior_create_catalog";
        CommandLineApp app = new CommandLineApp();
        app.captureSystemOutAndErrorToStrings();
        CommandPlugin mockPlugin;
        mockPlugin = new CreateCatalogCommand();
    	
        CommandOutput output = new CommandOutput();
        output.exit = app.runApplication(mockPlugin.getClass().getName(), MOCK_SCRIPT_NAME, cmdArgs);
        // Set SYSOUT and SYSERR back to their original output streams
        app.resetSystemOutAndError();

        output.stdout = app.getSystemOutMessages();
        output.stderr = app.getSystemErrorMessages();
        
        return output;
    }
    
    private void printFilesInParentDir(String outputFile) throws IOException {
		File parentDir = new File(outputFile).getParentFile();
		System.out.println("Files in dir: " + parentDir.getCanonicalPath());
		File[] filesInDir = parentDir.listFiles();
		for(File f : filesInDir ) {
			System.out.println(f.getName());
		}
    }


    private void verifyCatalogsSame(String catalogBgzipExpected, String catalogBgzipActual) throws IOException {
    	List<String> linesExpected = readFile(catalogBgzipExpected);
    	List<String> linesActual   = readFile(catalogBgzipActual);
    	
    	boolean isAllLinesSame = true;
    	for(int i=0; i < Math.max(linesExpected.size(), linesActual.size()); i++) {
    		String lineExpected = (i >= linesExpected.size())  ?  "(null)"  :  linesExpected.get(i);
    		String lineActual   = (i >= linesActual.size())    ?  "(null)"  :  linesActual.get(i);
    		if( ! lineExpected.equals(lineActual) ) {
    			isAllLinesSame = false;
        		String errMsg = "Line " + (i+1) + " is different:\n  Expected: " + lineExpected + "\n  Actual:   " + lineActual;
    			System.err.println(errMsg);
    		}
    	}
    	
    	if( ! isAllLinesSame ) {
        	printLines(linesExpected, "Expected:");
        	printLines(linesActual,   "Actual:");
        	fail("Catalogs are NOT the same!");
    	}
	}

    
    private void printLines(List<String> lines, String msg) {
    	System.out.println("\n" + msg);
    	for(int i=0; i < lines.size(); i++)
    		System.out.println( (i+1) + ")  " + lines.get(i));
    	System.out.println("---------------");
	}


	private List<String> readFile(String filePath) throws IOException {
    	BufferedReader fin = null;
    	List<String> lines = new ArrayList<String>();
    	try {
    		if( filePath.endsWith(".gz") || filePath.endsWith(".bgz") )
    			fin = new BufferedReader(new InputStreamReader(new BlockCompressedInputStream(new File(filePath))));
    		else
    			fin = new BufferedReader(new FileReader(filePath));

    		String line = null;
    		while( (line = fin.readLine()) != null ) {
    			lines.add(line);
    		}
    	} finally {
    		if( fin != null )
    			fin.close();
    	}
    	return lines;
    }

    
    private void writeFile(String str, String outputFilePath) throws IOException {
    	BufferedWriter fout = null;
    	try {
    		if( outputFilePath.endsWith(".gz") || outputFilePath.endsWith(".bgz") )
    			fout = new BufferedWriter(new OutputStreamWriter(new BlockCompressedOutputStream(new File(outputFilePath))));
    		else
    			fout = new BufferedWriter(new FileWriter(outputFilePath));

    		fout.write(str);
    	} finally {
    		if( fout != null )
    			fout.close();
    	}
    }

	private com.google.gson.JsonObject getJsonObject(String jsonAsString) {
		com.google.gson.JsonParser jp = new JsonParser ();
		com.google.gson.JsonElement catalogRowJsonElem = null;
		com.google.gson.JsonObject catalogRowJsonObj = null;
		catalogRowJsonElem = jp.parse (jsonAsString);
		catalogRowJsonObj = catalogRowJsonElem.getAsJsonObject();
		return catalogRowJsonObj;
	}
}
