package edu.mayo.bior.cli.func;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;

import org.apache.commons.io.FileUtils;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

import edu.mayo.bior.cli.cmd.DrillCommand;
import edu.mayo.bior.cli.cmd.OverlapPipelineCommand;
import edu.mayo.bior.cli.cmd.ReplaceLinesCommand;
import edu.mayo.bior.cli.cmd.TjsonToVcfCommand;
import edu.mayo.bior.pipeline.VcfInfoColumnBuilder.CharacterEncodingMethod;
import edu.mayo.cli.CommandLineApp;
import edu.mayo.cli.CommandPlugin;
import edu.mayo.pipes.util.test.PipeTestUtils;

public class TjsonToVcfITCase extends BaseFunctionalTest {

	public static String EOL = "\n";
	
	public static String TOP3_HEADERS =
			"##fileformat=VCFv4.1" + EOL +
			"##fileDate=" + new SimpleDateFormat("yyyyMMdd").format(new Date()) + EOL +
			"##source=bior_tjson_to_vcf" + EOL;

    protected static final String MOCK_SCRIPT_NAME = "bior_tjson_to_vcf";
    protected CommandLineApp mApp = new CommandLineApp();
    protected CommandPlugin mMockPlugin;

    @Rule
    public TemporaryFolder mTempFolder = new TemporaryFolder();
			
	
    @Before
    public void beforeEach() throws IOException {
    	mTempFolder.create();
    }
    
	@BeforeClass
	public static void beforeAll() throws FileNotFoundException {
		BaseFunctionalTest.setBiorToolkitCmdsRequired(true);
	}

    
	@Test
	public void nonVcfIn_colSpecified() throws IOException, InterruptedException {
        System.out.println("TjsonToVcfITCase.nonVcfIn_colSpecified");

        String dataLine = ("{'CHROM':'20','POS':'14370','ID':'rs6054257','REF':'G','ALT':'A','QUAL':'29','FILTER':'PASS',"
        		+ "'INFO':'NS=3;DP=14;AF=0.5;DB=true;H2=true','_id':'rs6054257','_type':'variant','_landmark':'20',"
        		+ "'_refAllele':'G','_altAlleles':['A'],'_minBP':14370,'_maxBP':14370}"
        		).replaceAll("'", "\"");
		String stdin = "#variantAsJson" + EOL 
				+ 		dataLine;

		// NOTE: Since there are no columns with ##BIOR headers and none that begin with "bior.", these will NOT be added to INFO column
		String expected = TOP3_HEADERS
				+ "##INFO=<ID=AF,Number=.,Type=Float,Description=\"\">"   + EOL
				+ "##INFO=<ID=DB,Number=.,Type=String,Description=\"\">"  + EOL
				+ "##INFO=<ID=DP,Number=.,Type=Integer,Description=\"\">" + EOL
				+ "##INFO=<ID=H2,Number=.,Type=String,Description=\"\">"  + EOL
				+ "##INFO=<ID=NS,Number=.,Type=Integer,Description=\"\">" + EOL
				+ concat("#CHROM",  "POS",   "ID",        "REF",  "ALT",  "QUAL",  "FILTER",  "INFO") + EOL
				+ concat("20",      "14370", "rs6054257", "G",    "A",    "29",    "PASS",    "NS=3;DP=14;AF=0.5;DB=true;H2=true") + EOL;

		CommandOutput out = executeMock(stdin, "-c", "1");
		assertNoErrors(expected, out);
	}
	
	
	@Test
	/** NOTE: When building the VCF, if the target column that builds the "CHROM", "POS", "ID"... columns contains "INFO", 
	 *  and the user has NOT specified to unroll JSON, then don't add the fields to the INFO column.
	 *  NOTE: The INFO column, if empty, should be "." and NOT ""
	 *  NOTE: Here the input is NOT in VCF format yet, so in building that VCF format, we will insert data from the INFO field in the target JSON	 */
	public void shouldNotUnrollInfoFieldWithJson() throws IOException, InterruptedException {
        String stdin = (
        		  "#bior.variantAsJson" + EOL
        		+ "{'CHROM':'20','POS':'14370','ID':'rs6054257','REF':'G','ALT':'A','QUAL':'29','FILTER':'PASS',"
        		+ "'INFO':'NS=3;DP=14;AF=0.5;DB=true;H2=true;isInDbSnp','_id':'rs6054257','_type':'variant','_landmark':'20',"
        		+ "'_refAllele':'G','_altAlleles':['A'],'_minBP':14370,'_maxBP':14370}"
        		).replaceAll("'", "\"");

		String expected = TOP3_HEADERS
				+ "##INFO=<ID=AF,Number=.,Type=Float,Description=\"\">" + EOL
				+ "##INFO=<ID=DB,Number=.,Type=String,Description=\"\">" + EOL
				+ "##INFO=<ID=DP,Number=.,Type=Integer,Description=\"\">" + EOL
				+ "##INFO=<ID=H2,Number=.,Type=String,Description=\"\">" + EOL
				+ "##INFO=<ID=isInDbSnp,Number=0,Type=Flag,Description=\"\">" + EOL
				+ "##INFO=<ID=NS,Number=.,Type=Integer,Description=\"\">" + EOL
				+ concat("#CHROM",  "POS",   "ID",        "REF",  "ALT",  "QUAL",  "FILTER",  "INFO") + EOL
				+ concat("20",      "14370", "rs6054257", "G",    "A",    "29",    "PASS",    "NS=3;DP=14;AF=0.5;DB=true;H2=true;isInDbSnp") + EOL;
		
		CommandOutput out = executeMock(stdin);
		assertNoErrors(expected, out);
	}
	
	

	@Test
	/** For some reason, when the "##fileformat=VCFv4.1" line would appear further down in the header, it would insert the "##fileDate=xxx" right above it instead of way at the top */
	public void fileDateHeaderShouldBeNearTop() throws IOException, InterruptedException {
        String stdin = (
        		  "##fileformat=VCFv4.0" + EOL
        		+ "##FILTER=<ID=LowQual,Description=\"Low quality\">" + EOL
        		+ "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">" + EOL
        		+ "##INFO=<ID=set,Number=1,Type=String,Description=\"Source VCF for the merged record in CombineVariants\">" + EOL
        		+ "##contig=<ID=chr1,length=249250621>" + EOL
        		+ "##reference=file:///data2/bsi/reference/sequence/human/ncbi/hg19/allchr.fa" + EOL
        		+ "##source=PhaseByTransmission" + EOL
        		+ "#bior.variantAsJson" + "\t" + "rsId" + "\t" + "Zed" + "\t" + "bior.emtpyJson" + "\t" + "bior.jsonOneKey" + EOL
        		+ "{'_landmark':'20','_refAllele':'G','_altAlleles':['A'],'_minBP':14370,'_maxBP':14370,'INFO':{'NS':1}}" + "\t" + "rs123" + "\t" + "ded" + "\t" + "{}" + "\t" + "{'key':'val'}"
        		).replaceAll("'", "\"");

		String expected = "##fileformat=VCFv4.0" + EOL  // NOTE: this is the original fileformat header, not the dynamically added one
				+ TOP3_HEADERS.split(EOL)[1] + EOL   // fileDate=
        		+ "##FILTER=<ID=LowQual,Description=\"Low quality\">" + EOL
        		+ "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">" + EOL
				+ "##INFO=<ID=NS,Number=1,Type=Integer,Description=\"\">" + EOL
				+ "##INFO=<ID=rsId,Number=.,Type=String,Description=\"\">" + EOL
        		+ "##INFO=<ID=set,Number=1,Type=String,Description=\"Source VCF for the merged record in CombineVariants\">" + EOL
				+ "##INFO=<ID=Zed,Number=.,Type=String,Description=\"\">" + EOL
        		+ "##contig=<ID=chr1,length=249250621>" + EOL
        		+ "##reference=file:///data2/bsi/reference/sequence/human/ncbi/hg19/allchr.fa" + EOL
        		+ "##source=PhaseByTransmission" + EOL   // NOTE: Source is way down here instead of at the top of the file
				+ concat("#CHROM",  "POS",   "ID",  "REF",  "ALT",  "QUAL",  "FILTER",  "INFO") + EOL
				+ concat("20",      "14370", ".",   "G",    "A",    ".",     ".",       "NS=1;rsId=rs123;Zed=ded") + EOL;
		
		// Only add the rsId and Zed columns to the INFO field.
		// First column should be used to construct the VCF format
		// The first column and the last two JSON columns should be removed
		CommandOutput out = executeMock(stdin, "-c", "1", "-r", "2,3");
		assertNoErrors(expected, out);
	}

	
	//==================================================================================
	
	@Test
	/** Encoding from TJSON to VCF.
	 *  NOTE: Hard to test CR (carriage-return), LF (line-feed), and TAB since these cause the input data to appear as separate lines or columns.
	 *    :  =  %3A
	 *    ;  =  %3B
	 *    =  =  %3D
	 *    %  =  %25
	 *    ,  =  %2C
	 *    CR =  %0D
	 *    LF =  %0A
	 *    TAB=  %09
	 *     */
	public void testEncoding() throws IOException, InterruptedException {
        String stdin =  "##fileformat=VCFv4.3" + EOL
        			+   "#bior.variantAsJson	bior.url" + EOL
        			+   swapQuotes("{'_landmark':'1','_minBP':14370,'_maxBP':14370,'_refAllele':'G','_altAlleles':['A'],'INFO':{'anInfoField':0}}")  +  "\t"  +  "<a href='https://google.com?q=123;resp=x%3Ay'>A,B,C</a>" + EOL;

        CharacterEncodingMethod charEncodingMethod = CharacterEncodingMethod.URL_BASIC;
        String line = null;
        if( CharacterEncodingMethod.SIMPLE.equals(charEncodingMethod) )
        	line = concat("1",       "14370", ".",   "G",    "A",    ".",     ".",       "anInfoField=0;bior.url=<a_href:'https://google.com?q:123|resp:x%3Ay'>A|B|C</a>") + EOL;
        else // URL_BASIC or URL_EXTENDED
        	line = concat("1",       "14370", ".",   "G",    "A",    ".",     ".",       "anInfoField=0;bior.url=<a href%3D'https://google.com?q%3D123%3Bresp%3Dx%3Ay'>A%2CB%2CC</a>") + EOL;
        
		String expected = "##fileformat=VCFv4.3" + EOL
				+ TOP3_HEADERS.split(EOL)[1] + EOL   // fileDate=
				+ "##source=bior_tjson_to_vcf" + EOL
        		+ "##INFO=<ID=anInfoField,Number=1,Type=Integer,Description=\"\">" + EOL
				+ "##INFO=<ID=bior.url,Number=.,Type=String,Description=\"\">" + EOL
				+ concat("#CHROM",  "POS",   "ID",  "REF",  "ALT",  "QUAL",  "FILTER",  "INFO") + EOL
				+ line;
		
		CommandOutput out = executeMock(stdin, "-c", "1");
		assertNoErrors(expected, out);
	}

	
	//==================================================================================
	
	@Test
	// What to do if there are duplicates?  (column names or key-value pairs due to multiple JSON objects)?  Only use the one closest to the end of the line?
	// FORMAT and SAMPLE1 columns are preserved
	// Pull string vs integer/float from the JSON
	// Any columns that either begin with ".bior" OR have BIOR metadata associated with them will be added to the INFO column
	// NOTE: Make sure to remove all:  whitespace, tabs, semicolons - replace with '_'
	public void unrollJson() throws IOException, InterruptedException {
		String INPUT_VCF = 
				concat("#CHROM",  "POS",  "ID",  "REF",  "ALT",  "QUAL",  "FILTER",  "INFO",  "FORMAT",  "SAMPLE1",  "bior.gene37p13",  "bior.gene37p13.gene")  + "\n"
			  + concat("1",       "14400","rs11","G",    "A",    "100",   "PASS",    "AMR_AF=0.17;EUR_AF=0.21",  "GT:GP",  "0/1",
					  // bior.gene37p13
					  ("{'_type':'gene','_landmark':'1','_strand':'+','_minBP':11874,'_maxBP':14409,'gene':'DDX11L1',"
						+ "'note':'DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1; Derived by automated computational analysis using gene prediction method: BestRefSeq.',"
						+ "'pseudo':'','GeneID':'100287102','HGNC':'37102'}").replaceAll("'", "\""),
					  // bior.gene37p13.gene
					  "DDX11L1\n"
					  );	

        CharacterEncodingMethod charEncodingMethod = CharacterEncodingMethod.URL_BASIC;
        String line = null;
        if( CharacterEncodingMethod.SIMPLE.equals(charEncodingMethod) )
        	line = "bior.gene37p13.note=DEAD/H_(Asp-Glu-Ala-Asp/His)_box_helicase_11_like_1|_Derived_by_automated_computational_analysis_using_gene_prediction_method:_BestRefSeq.";
        else // URL_BASIC or URL_EXTENDED
        	line = "bior.gene37p13.note=DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1%3B Derived by automated computational analysis using gene prediction method: BestRefSeq.";


		String EXPECTED_OUT = 
				TOP3_HEADERS +
				"##INFO=<ID=bior.gene37p13._landmark,Number=1,Type=String,Description=\"\">" 	+ EOL +
				"##INFO=<ID=bior.gene37p13._maxBP,Number=1,Type=Integer,Description=\"\">" 		+ EOL +
				"##INFO=<ID=bior.gene37p13._minBP,Number=1,Type=Integer,Description=\"\">" 		+ EOL +
				"##INFO=<ID=bior.gene37p13._strand,Number=1,Type=String,Description=\"\">" 		+ EOL +
				"##INFO=<ID=bior.gene37p13._type,Number=1,Type=String,Description=\"\">" 		+ EOL +
				// NOTE: bior.gene37p13.gene comes from the last column, and since it is sorted last-to-first
				//       it will get added before the JSON object that contains the same key, and thus will
				//       pull its info from the line queue instead of the JSON object
				"##INFO=<ID=bior.gene37p13.gene,Number=.,Type=String,Description=\"\">" 		+ EOL +
				"##INFO=<ID=bior.gene37p13.GeneID,Number=1,Type=String,Description=\"\">" 		+ EOL +
				"##INFO=<ID=bior.gene37p13.HGNC,Number=1,Type=String,Description=\"\">" 		+ EOL +
				"##INFO=<ID=bior.gene37p13.note,Number=1,Type=String,Description=\"\">" 		+ EOL +
				concat("#CHROM",  "POS",  "ID",  "REF",  "ALT",  "QUAL",  "FILTER",  "INFO",  "FORMAT",  "SAMPLE1") + EOL +
				concat("1",       "14400","rs11","G",    "A",    "100",   "PASS",
					"AMR_AF=0.17;EUR_AF=0.21;" +
					"bior.gene37p13._landmark=1;" +
					"bior.gene37p13._maxBP=14409;" +
					"bior.gene37p13._minBP=11874;" +
					"bior.gene37p13._strand=+;" +
					"bior.gene37p13._type=gene;" +
					// NOTE: gene is repeated because there is an extra column on the end of the input that has a column by itself (no JSON)
					"bior.gene37p13.gene=DDX11L1;" +
					"bior.gene37p13.gene=DDX11L1;" +
					"bior.gene37p13.GeneID=100287102;" +
					"bior.gene37p13.HGNC=37102;" +
					// NOTE that the period at the end of the note is removed because we are looking 
					//      for trailing "." and ";" characters that usually signify that there was an 
					//      existing dot in the INFO column, or the last value was blank
					line,
					"GT:GP",  "0/1") + EOL
				;
		

		CommandOutput out = executeMock(INPUT_VCF, "-j");
		assertNoErrors(EXPECTED_OUT, out);
	}

	
	@Ignore  // Very similar to other tests
	@Test
	// If specifying a range of columns to collapse, collapse only those columns 
	//(instead of the ones that begin with "bior." or have BIOR metadata)
	public void vcfIn_colRange() {
		fail("not implemented yet");
	}
	
	@Test
	// Last three bior columns are collapsed into the INFO column and then removed.
	//    columns are added to INFO and removed if either they have a ##BIOR metadata line associated with them, or begin with ".bior"
	// Tests covered here:
	// 1) 3rd-last column begins with "bior." so will be added to INFO col  (but has no ##BIOR header metadata)
	// 2) 2nd-last column has a ##BIOR header associated with it, and is a JSON column (derived from bior_overlap with a catalog),
	//    so all values will be added to INFO col  (but does not begin with "bior.")
	// 3) Last column has a ##BIOR header associated with it, but is an individual field (Boolean/Flag)
	//    (derived from bior_drill on a key within a JSON column that came from a catalog)
	//    and will be added to INFO col  (but does not begin with "bior.")
	//    Very important to test a stand-alone Flag/Boolean here to ensure the header is being used if looked up from columns.tsv
	// 4) For those columns that have an associated ##BIOR header, and have the PATH to the catalog,
	//    lookup that path, and get the value's type, count and description from the columns.tsv file
	// 5) Preserve the FORMAT column that follows INFO  (do NOT collapse or add to INFO)
	// 6) We should NOT need the last column to be JSON since the format is already VCF  (should NOT throw exception or cause problems)
	// 7) Test a dot in the JSON column (to signify empty JSON, or no results, instead of it expecting "{....}")
	// 8) Multiple data lines
	// 9) Empty line at the end (should tolerate this)
	public void catalogCols_unrollJson() throws IOException, InterruptedException {
		
		final String INPUT_VCF = 
			"##BIOR=<ID='dbSNP137aaa',Operation='bior_overlap',DataType='JSON',ShortUniqueName='dbSNP137',Source='dbSNP',Description='dbSNP version 137, Patch 10, Human',Version='137',Build='GRCh37.p10',Path='src/test/resources/metadata/00-All_GRCh37.tsv.bgz'>".replaceAll("'",  "\"") + EOL +
			"##BIOR=<ID='dbSNP137.INFO.ASP',Operation='bior_drill',Field='INFO.ASP',DataType='Boolean',ShortUniqueName='dbSNP137',Source='dbSNP',Description='dbSNP version 137, Patch 10, Human',Version='137',Build='GRCh37.p10',Path='src/test/resources/metadata/00-All_GRCh37.tsv.bgz'>".replaceAll("'", "\"") + EOL +
			concat("#CHROM",  "POS",  "ID",  "REF",  "ALT",  "QUAL",  "FILTER",  "INFO",  "FORMAT",  "bior.gene37p13",  "dbSNP137aaa",  "dbSNP137.INFO.ASP")  + EOL +
			concat("1",       "14400","rs11","G",    "A",    "100",   "PASS",    "AF=0.17", "0/1",  "{'_type':'gene','_minBP':11874,'HGNC':'37102'}",  "{'INFO':{'SSR':0}}",  "false") + EOL +
			concat("2",       "20000","rs22","A",    "T",    "100",   "PASS",    "AF=0.19", "1/1",  ".",  "{'INFO':{'RSPOS':101}}",  "true") + EOL +
			EOL;
		
		CommandOutput out = executeMock(INPUT_VCF, "-j");

		String EXPECTED_OUT =  
			TOP3_HEADERS +
			"##BIOR=<ID='dbSNP137aaa',Operation='bior_overlap',DataType='JSON',ShortUniqueName='dbSNP137',Source='dbSNP',Description='dbSNP version 137, Patch 10, Human',Version='137',Build='GRCh37.p10',Path='src/test/resources/metadata/00-All_GRCh37.tsv.bgz'>".replaceAll("'",  "\"") + EOL +
			"##BIOR=<ID='dbSNP137.INFO.ASP',Operation='bior_drill',Field='INFO.ASP',DataType='Boolean',ShortUniqueName='dbSNP137',Source='dbSNP',Description='dbSNP version 137, Patch 10, Human',Version='137',Build='GRCh37.p10',Path='src/test/resources/metadata/00-All_GRCh37.tsv.bgz'>".replaceAll("'", "\"") + EOL +
			"##INFO=<ID=bior.gene37p13._minBP,Number=1,Type=Integer,Description=\"\">" + EOL + 
			"##INFO=<ID=bior.gene37p13._type,Number=1,Type=String,Description=\"\">" + EOL +
			"##INFO=<ID=bior.gene37p13.HGNC,Number=1,Type=String,Description=\"\">" + EOL +
			"##INFO=<ID=dbSNP137.INFO.ASP,Number=1,Type=Flag,Description=\"Is Assembly specific. This is set if the variant only maps to one assembly\">" + EOL +
			"##INFO=<ID=dbSNP137aaa.INFO.RSPOS,Number=1,Type=Integer,Description=\"Chromosome position reported in dbSNP\">" + EOL +
			"##INFO=<ID=dbSNP137aaa.INFO.SSR,Number=1,Type=String,Description=\"Variant suspect reason code (0 - unspecified, 1 - paralog, 2 - byEST, 3 - Para_EST, 4 - oldAlign, 5 - other)\">" + EOL + 
			"#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT" + EOL +
			"1	14400	rs11	G	A	100	PASS	AF=0.17;bior.gene37p13._minBP=11874;bior.gene37p13._type=gene;bior.gene37p13.HGNC=37102;dbSNP137aaa.INFO.SSR=0	0/1" + EOL +
			"2	20000	rs22	A	T	100	PASS	AF=0.19;dbSNP137.INFO.ASP;dbSNP137aaa.INFO.RSPOS=101	1/1" + EOL;
		assertNoErrors(EXPECTED_OUT, out);
	}

	@Test
	// Input is already vcf-formatted; There are no flags on the command; 
	//   and no catalog columns that contain the fields to build the VCF (this should still work, and add "bior.dbSNP137.key" column to the INFO col)
	// ##BIOR headers and Catalog columns.tsv NOT available - so descriptions will be empty
	public void vcfIn_jsonFlag_noCatalogCols() throws IOException, InterruptedException {

		String INPUT_VCF = 
			"#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	bior.dbSNP137" + EOL +
			"1	100	rs1	A	C	.	.	.	{'key':0.1}" + EOL;
		
		CommandOutput out = executeMock(INPUT_VCF, "-j");

		String EXPECTED_OUT = 
				TOP3_HEADERS +
				"##INFO=<ID=bior.dbSNP137.key,Number=1,Type=Float,Description=\"\">\n" +
				"#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO\n" +
				"1	100	rs1	A	C	.	.	bior.dbSNP137.key=0.1\n";
		assertNoErrors(EXPECTED_OUT, out);
	}

	
	@Ignore  // Very similar to other tests
	@Test
	// Same as not providing the -c flag since the input is already VCF-formatted. 
	// Warn that the -c flag is ignored because it is already VCF-formatted
	// FORMAT and SAMPLE1 columns are preserved
	public void vcfIn_colFlag() {
		fail("not implemented yet");
	}
	
	@Ignore  // Very similar to other tests
	@Test
	// Adds all key-value pairs from last two columns to INFO column, but does NOT remove them
	// FORMAT and SAMPLE1 columns are preserved
	public void vcfIn_keepBiorCols() {
		fail("not implemented yet");
	}
	
	@Test
	// Warn that the command will do nothing as no columns are removed, and nothing is added to the INFO col
	// Output same as input
	public void cmd_vcfIn_keepBiorCols_noInfoAdds() throws IOException, InterruptedException {

		String INPUT_VCF = 
			"#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	bior.dbSNP137	gene" + EOL +
			"1	100	rs1	A	C	.	.	.	{'key':0.1}	BRCA1" + EOL;
		
		CommandOutput out = executeScript("bior_tjson_to_vcf", INPUT_VCF, "-k",  "-n");

		String EXPECTED_OUT = 
				TOP3_HEADERS +
				"#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	bior.dbSNP137	gene" + EOL +
				"1	100	rs1	A	C	.	.	.	{'key':0.1}	BRCA1" + EOL;
		assertNoErrors(EXPECTED_OUT, out);
	}
	
	@Ignore  // Very similar to other tests
	@Test
	// Warn that the command will do nothing as no columns are removed, and nothing is added to the INFO col
	// Warn that the -c flag is ignored because it is already VCF-formatted
	// FORMAT and SAMPLE1 columns are preserved
	// Output same as input
	public void vcfIn_keepBiorCols_noInfoAdds_colSpecified() {
		// Output will be same as input.  Give warning saying no change
		fail("not implemented yet");
	}
	
	//---------------------------------------------
	
	@Ignore  // Very similar to other tests
	@Test
	// Assumes -c column is the last one
	// Create the required first 8 columns of the VCF file
	// Add all key-value pairs from BioR columns into the new INFO column
	// Remove the BioR columns
	// rsId column is shifted to the right-most position
	public void nonVcfIn_noParms() {
		fail("not implemented yet");
	}
	
	@Test
	// Assumes -c column is the last one
	// Create the required first 8 columns of the VCF file
	// Do NOT add all key-value pairs from the JSON into the new INFO column
	// Remove the BioR columns
	// rsId column is shifted to the right-most position
	public void cmd_nonVcfIn_noInfoAdds_keepCols() throws IOException, InterruptedException {
		String dataLine = ("{'CHROM':'20','POS':'14370','ID':'rs6054257','REF':'G','ALT':'A','QUAL':'29',"
				+ "'FILTER':'PASS','INFO':'NS=3;DP=14;AF=0.5;DB=true;H2=true','_id':'rs6054257','_type':'variant',"
				+ "'_landmark':'20','_refAllele':'G','_altAlleles':['A'],'_minBP':14370,'_maxBP':14370}"
				).replaceAll("'", "\"");
		String stdin = "#variantAsJson" + EOL + dataLine;

		// NOTE: Since there are no columns with ##BIOR headers and none that begin with "bior.", these will NOT be added to INFO column
		String expected = TOP3_HEADERS
				+ concat("#CHROM",  "POS",   "ID",        "REF",  "ALT",  "QUAL",  "FILTER",  "INFO",  "variantAsJson") + EOL
				+ concat("20",      "14370", "rs6054257", "G",    "A",    "29",    "PASS",    ".",  dataLine) + EOL;

		CommandOutput out = executeScript("bior_tjson_to_vcf", stdin, "-c", "-1", "-n", "-k");

		// Verify that there were no errors
		assertEquals("STDERR:"+out.stderr+"\n"+"STDOUT:"+out.stdout, 0, out.exit);
		assertEquals("", out.stderr);

		// Verify output
		assertEquals(expected, out.stdout);
	}

	@Ignore  // Very similar to other tests
	@Test
	// Assumes -c column is the last one
	// Create the required first 8 columns of the VCF file
	// Add all key-value pairs from BioR columns into the new INFO column
	// KEEP all BioR columns
	// rsId column is shifted to the right-most position
	public void nonVcfIn_keepBiorCols() {
		fail("not implemented yet");
	}

	@Ignore   // This is tested in nonVcfIn_Over1000Lines() 
	@Test
	// Create the required first 8 columns of the VCF file
	// Add all key-value pairs from BioR columns into the new INFO column
	// Remove the BioR columns
	// rsId column is shifted to the right-most position
	public void nonVcfIn_colGiven() {
		fail("not implemented yet");
	}
	

	@Ignore  // Very similar to other tests
	@Test
	// NOTE: Need to add an extra column at the end before running this one
	// Create the required first 8 columns of the VCF file
	// Do NOT add all key-value pairs from BioR columns to the new INFO column
	// KEEP all BioR columns
	// rsId column is shifted to the right-most position
	public void nonVcfIn_colGivenButNotLast_noInfoAdds_keepBiorCols() {
		fail("not implemented yet");
	}
	
	@Test
	// Test with input that does not contain the correct number of golden attributes
	// Throw error if the last column is NOT JSON and if it does not contain all the required fields
	// (chrom, minbp, ref, alt) - since the file is not already VCF-formatted - tell user to use the -c flag
	public void nonVcfIn_missingGoldenAttributes() throws IOException, InterruptedException {
		String stdin = "#variantAsJson" + EOL
			+ "{'CHROM':'20','POS':'14370','ID':'rs6054257','REF':'G','ALT':'A'}";

		CommandOutput out = executeMock(stdin, "-c", "-1", "-n", "-k");

		// Should be an error saying golden attribute missing
		assertEquals("STDERR:"+out.stderr+"\n"+"STDOUT:"+out.stdout, 1, out.exit);
		final String EXPECTED_ERROR = "Error: Required JSON field [_landmark] missing on data line 1: Target column must contain these JSON fields: _landmark, _minBP, _refAllele, _altAlleles.  Was:";
		assertTrue(out.stderr.contains(EXPECTED_ERROR));
	}

	@Ignore  // Very similar to other tests
	@Test
	// Test input that contains required golden attributes, but not optional (id)
	public void nonVcfIn_goldenButNoOptionalAttributes() {
		fail("not implemented yet");
	}
	
	@Ignore  // Not sure if this is correct
	@Test
	// Test input that does not have required golden attributes, but gets them from VCF standard names
	public void nonVcfIn_useVcfAttrsForReqdOptionalAndOthers() {
		fail("not implemented yet");
	}

	@Ignore  // Very similar to other tests
	@Test
	// Keys with values of "." should not be added to INFO
	public void nonVcfIn_dotValuesInJsonShouldNotBeAdded() {
		fail("not implemented yet");
	}

	@Ignore  // TODO: Test this later (conflicts between same-named keys in multiple columns)
	@Test
	// Test where two values conflict - what to do then???
	public void nonVcfIn_valuesConflict() {
		fail("not implemented yet");
	}

	
	@Test
	// Need to test a file that is > 1000 lines to make sure we can handle a full queue!
	public void nonVcfIn_Over1000Lines() throws IOException, InterruptedException {
		
		final int NUM_LINES = 10000;
		final String[] GENES = new String[] { 
				"MTHFR",  "BRCA1",  "BRCA2",  "CCR5",  "NDA12",
				"JSF1",   "ALZ9",   "ZRR3",   "DDR17", "CRG01"
		};
		
		List<String> expected = new ArrayList<String>( Arrays.asList(
				TOP3_HEADERS.split(EOL)[0],
				TOP3_HEADERS.split(EOL)[1],
				TOP3_HEADERS.split(EOL)[2],
				"##INFO=<ID=AF,Number=1,Type=Float,Description=\"\">",
				"##INFO=<ID=bior.gene,Number=.,Type=String,Description=\"\">",
				"##INFO=<ID=bior.json._altAlleles,Number=.,Type=String,Description=\"\">",	
				"##INFO=<ID=bior.json._landmark,Number=1,Type=String,Description=\"\">",	
				"##INFO=<ID=bior.json._minBP,Number=1,Type=Integer,Description=\"\">",	
				"##INFO=<ID=bior.json._refAllele,Number=1,Type=String,Description=\"\">",	
				"##INFO=<ID=bior.json.AlleleFreq,Number=1,Type=String,Description=\"\">",	
				"##INFO=<ID=bior.json.INFO.AF,Number=1,Type=Float,Description=\"\">",	
				"##INFO=<ID=species,Number=.,Type=String,Description=\"\">",
				"#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO"
				) );

		// Generate x number of lines with a JSON column and a single value column
		StringBuilder input = new StringBuilder("#species	bior.json	bior.gene\n");

		// Build both the input lines as well as the expected lines
		for(int i=0; i < NUM_LINES; i++) {
			// Generate a chrom in the range from 1 to 10
			int chrom = 1 + ((i*10)/NUM_LINES);
			int pos   = i + (chrom * 100);
			String gene = GENES[chrom-1];
			String json = "{'_landmark':'" + chrom + "','_minBP':" + pos + ",'_refAllele':'A','_altAlleles':['C','T'],'AlleleFreq':'AF=0.1','INFO':{'AF':0.2}}";
			
			input.append("human" + "\t" + json + "\t" + gene + EOL);
			
	        CharacterEncodingMethod charEncodingMethod = CharacterEncodingMethod.URL_BASIC;
	        String line = null;
	        if( CharacterEncodingMethod.SIMPLE.equals(charEncodingMethod) )
	        	line = ";bior.json._refAllele=A;bior.json.AlleleFreq=AF:0.1;bior.json.INFO.AF=0.2";
	        else // URL_BASIC or URL_EXTENDED
	        	line = ";bior.json._refAllele=A;bior.json.AlleleFreq=AF%3D0.1;bior.json.INFO.AF=0.2";
			
			//                   CHROM      POS      ID    REF   ALT     QUAL  FILTER  INFO
			expected.add( concat(chrom+"",  pos+"",  ".",  "A",  "C,T",  ".",  ".",    "AF=0.2;bior.gene=" + gene
					+ ";bior.json._altAlleles=C,T;bior.json._landmark=" + chrom + ";bior.json._minBP=" + pos
					+ line
					+ ";species=human") );
		}
		
		// Call the cmd - extract the VCF columns from column -2, and specify the range to collapse as -1..-3
		CommandOutput out = executeMock(input.toString(), "-c", "-2", "-r", "-1..-3",  "-j");
		assertEquals("STDERR:" + out.stderr + "\n"+"STDOUT:" + out.stdout, 0, out.exit);
		assertEquals("", out.stderr);

		List<String> actual   = Arrays.asList(out.stdout.split("\n"));
		PipeTestUtils.assertListsEqual(expected, actual);
	}

	
	
	@Test
	/** Test empty lines at end of input
	    Also test adding a non-bior column to INFO, and having it remove that column afterwards */
	public void examplefromHelpText1_emptyLinesRemoved() throws UnsupportedEncodingException {
		String input = 
			concat("#CHROM", "POS", "ID",   "REF", "ALT", "QUAL", "FILTER", "INFO", "bior.Gene", "bior.HGNC", "bior.remove", "AddMe", "bior.rsId", "bior.desc") + "\n" +
			concat("3",      "100", "rs123", "A",  "C",   ".",    ".",      ".",    "MTXR",      "31341",     "someJunk",    "xyz",   "rs123",     "my val = 1, 2; three") + "\n" +
			"\n\n\n";
		
		// These should be equivalent
		// cat 17.in | bior_tjson_to_vcf -r 9..10,12,13
		// cat 17.in | bior_tjson_to_vcf -r -5..-4,-2..-1,-1,12
		CommandOutput output = executeMock(input, "-r", "9..10,12,14");
        
        String expected =
        		TOP3_HEADERS +
        		"##INFO=<ID=AddMe,Number=.,Type=String,Description=\"\">"  + "\n" +
        		"##INFO=<ID=bior.desc,Number=.,Type=String,Description=\"\">"  + "\n" +
        		"##INFO=<ID=bior.Gene,Number=.,Type=String,Description=\"\">"  + "\n" +
        		"##INFO=<ID=bior.HGNC,Number=.,Type=String,Description=\"\">"  + "\n" +
        		concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO") + "\n" +
        		// SIMPLE char replacement  (The setting should be SIMPLE or URL_BASIC or URL_EXTENDED
        		// concat("3", "100", "rs123", "A", "C", ".", ".", "AddMe=xyz;bior.desc=my_val_:_1|_2|_three;bior.Gene=MTXR;bior.HGNC=31341") + "\n";
        		// URL_BASIC char replacement:
        		concat("3", "100", "rs123", "A", "C", ".", ".", "AddMe=xyz;bior.desc=my val %3D 1%2C 2%3B three;bior.Gene=MTXR;bior.HGNC=31341") + "\n";
        
        assertNoErrors(expected, output);
        
        // ----------------------------------
        // Same, but try range with negative numbers
        // ----------------------------------
        output = executeMock(input, "-r", "-6..-5,-3,-1,9,12");
        assertNoErrors(expected, output);

	}
	

	
	@Test
	public void infoJsonShouldBeFlattened_mock() throws UnsupportedEncodingException {
		final String JSON1 = "{'_landmark':'1','_minBP':100,'_refAllele':'A','_altAlleles':['C']}".replaceAll("'", "\"");
		final String JSON2 = "{'_landmark':'2','_minBP':200,'_refAllele':'C','_altAlleles':['G'],'_id':'rs222','QUAL':0.3,'FILTER':1.3,'INFO':{'MAF':0.05,'isInDbsnp':true}}".replaceAll("'", "\"");
		final String JSON3 = "{'_landmark':'3','_minBP':300,'_refAllele':'C','_altAlleles':['G'],'_id':'rs333','QUAL':0.3,'FILTER':1.3,'INFO':{'MAF':0.03,'Gene':'XJT1','isInDbsnp':false}}".replaceAll("'", "\"");
		final String JSON4 = "{'_landmark':'4','_minBP':401,'_refAllele':'G','_altAlleles':['T'],'ID':'rs444','QUAL':0.45,'FILTER':1.2,'INFO':'MAF=0.04;isInDbsnp'}".replaceAll("'", "\"");
		final String JSON5 = "{'_landmark':'5','_minBP':501,'_refAllele':'G','_altAlleles':['T'],'ID':'rs555','QUAL':0.55,'FILTER':1.2,'INFO':'.'}".replaceAll("'", "\"");
		String input = 
			concat("#Chrom", "Pos", "Ref", "Alt", "dbSnpJson") + "\n" +
			concat("1",      "100", "A",   "C",   JSON1) + "\n" +
			concat("2",      "200", "C",   "G",   JSON2) + "\n" + 
			concat("3",      "300", "C",   "G",   JSON3) + "\n" +
			concat("4",      "401", "G",   "T",   JSON4) + "\n" +
			concat("5",      "501", "G",   "T",   JSON5) + "\n";
		
		CommandOutput output = executeMock(input, "-c", "5");

		String expected =
        		TOP3_HEADERS +
        		// Note: There are two extra headers here because of the inconsistency in the data formats
        		"##INFO=<ID=Gene,Number=1,Type=String,Description=\"\">" + "\n" +
        		"##INFO=<ID=isInDbsnp,Number=0,Type=Flag,Description=\"\">" + "\n" +
        		"##INFO=<ID=MAF,Number=1,Type=Float,Description=\"\">" + "\n" +
        		// dbSnpJson column is removed because it was used as the JSON column and JSON columns were to be removed
        		concat("#CHROM", "POS", "ID",    "REF", "ALT", "QUAL", "FILTER", "INFO", "Chrom", "Pos", "Ref", "Alt") + "\n" +
        		concat("1",      "100", ".",     "A",   "C",   ".",    ".",      ".",    "1",     "100", "A",   "C") + "\n" +
        		concat("2",      "200", "rs222", "C",   "G",   "0.3",  "1.3",    "MAF=0.05;isInDbsnp", "2", "200", "C", "G") + "\n" +
        		concat("3",      "300", "rs333", "C",   "G",   "0.3",  "1.3",    "MAF=0.03;Gene=XJT1", "3", "300", "C", "G") + "\n" +
        		concat("4",      "401", "rs444", "G",   "T",   "0.45", "1.2",    "MAF=0.04;isInDbsnp", "4", "401", "G", "T") + "\n" +
        		concat("5",      "501", "rs555", "G",   "T",   "0.55", "1.2",    ".", "5", "501", "G", "T") + "\n";
        
		assertNoErrors(expected, output);
	}
	
	
	@Test
	public void blankLinesAtEndOfInput_mock() throws UnsupportedEncodingException {
		final String JSON1 = "{'_landmark':'1','_minBP':100,'_refAllele':'A','_altAlleles':['C']}".replaceAll("'", "\"");
		final String JSON2 = "{'_landmark':'2','_minBP':200,'_refAllele':'C','_altAlleles':['G'],'_id':'rs222','QUAL':0.3,'FILTER':1.3,'INFO':{'MAF':0.05,'isInDbsnp':true}}".replaceAll("'", "\"");
		String input = 
			concat("#Chrom", "Pos", "Ref", "Alt", "dbSnpJson") + "\n" +
			concat("1",      "100", "A",   "C",   JSON1) + "\n" +
			concat("2",      "200", "C",   "G",   JSON2) + "\n" +
			// NOTE: These extra blank lines at end should be removed
			"\n\n\n";
		
		CommandOutput output = executeMock(input, "-c", "5");
        
        String expected =
        		TOP3_HEADERS +
        		// Note: There are two extra headers here because of the inconsistency in the data formats
        		"##INFO=<ID=isInDbsnp,Number=0,Type=Flag,Description=\"\">" + "\n" +
        		"##INFO=<ID=MAF,Number=1,Type=Float,Description=\"\">" + "\n" +
        		// dbSnpJson column is removed because it was used as the JSON column and JSON columns were to be removed
        		concat("#CHROM", "POS", "ID",    "REF", "ALT", "QUAL", "FILTER", "INFO", "Chrom", "Pos", "Ref", "Alt") + "\n" +
        		concat("1",      "100", ".",     "A",   "C",   ".",    ".",      ".",    "1",     "100", "A",   "C") + "\n" +
        		concat("2",      "200", "rs222", "C",   "G",   "0.3",  "1.3",    "MAF=0.05;isInDbsnp", "2", "200", "C", "G") + "\n";
        		// NOTE: Extra blank lines at end removed
        
        assertNoErrors(expected, output);
	}
	
	@Ignore("Wait on this case until BioR v4.4.0.  v4.3.0 handled the verification of double-quotes to make sure they don't go into new catalogs")
	@Test
	/** Test for handling of double-quotes in several commands.  First, make a copy of an existing catalog,
	 *  then modify a field within columns.tsv, then try these commands:
			- bior_overlap - save to text file (verify ##BIOR header line)
    			- bior_drill - save to text file (verify ##BIOR header line)
        			- bior_tjson_to_vcf - save to text file (verify ##INFO line)
      */
	public void testDoubleQuotesCascadingThruCmds() throws IOException {
		File tempCtgDir = mTempFolder.newFolder();
		
		// Copy sample catalog to temp dir
		FileUtils.copyFileToDirectory(new File("src/test/resources/metadata/00-All_GRCh37.tsv.bgz"), tempCtgDir);
		FileUtils.copyFileToDirectory(new File("src/test/resources/metadata/00-All_GRCh37.tsv.bgz.tbi"), tempCtgDir);
		FileUtils.copyFileToDirectory(new File("src/test/resources/metadata/00-All_GRCh37.columns.tsv"), tempCtgDir);
		FileUtils.copyFileToDirectory(new File("src/test/resources/metadata/00-All_GRCh37.datasource.properties"), tempCtgDir);

		// Modify the datasource.properties description to include double-quotes
		File datasrcProps = new File(tempCtgDir, "00-All_GRCh37.datasource.properties");
		String contents = FileUtils.readFileToString(datasrcProps).replace(
				"Description=dbSNP version 137, Patch 10, Human",
				"Description=dbSNP version 137, \"Patch 10\", Human");
		FileUtils.write(datasrcProps, contents);
		
		// Modify the columns.tsv to include double-quotes for the field: 
		File colsTsv = new File(tempCtgDir, "00-All_GRCh37.columns.tsv");
		contents = FileUtils.readFileToString(colsTsv).replace(
						"INFO.SAO	String	1	Variant allele origin (0 - unspecified, 1 - germmline, 2 - somatic, 3 - both)",
						"INFO.SAO	String	1	Variant allele origin (0 - \"unspecified\", 1 - \"germmline\", 2 - \"somatic\", 3 - \"both\")");
		FileUtils.write(colsTsv, contents);
				
		// First perform an overlap
		// 1	10180	10180	{"CHROM":"1","POS":"10180","ID":"rs201694901","REF":"T","ALT":"C","QUAL":".","FILTER":".","INFO":{"RSPOS":10180,"dbSNPBuildID":137,"SSR":0,"SAO":0,"VP":"050000000005000002000100","WGT":1,"VC":"SNV","ASP":true,"OTHERKG":true},"_id":"rs201694901","_type":"variant","_landmark":"1","_refAllele":"T","_altAlleles":["C"],"_minBP":10180,"_maxBP":10180}
		String stdin = "{'_landmark':'1','_minBP':10180,'_maxBP':10180}";
		// For cmd call, see ReplaceLinesTest.testMultipleLine()
		File ctg = new File(tempCtgDir, "00-All_GRCh37.tsv.bgz");
		setStdin(stdin);
		CommandOutput outOverlap = runCmdApp(new OverlapPipelineCommand(), "bior_overlap", "-d", ctg.getCanonicalPath());
		
		fail("TODO: Assert the ##BIOR header line does NOT have double-quotes");
		fail("TODO: Assert that error msg occurred as necessary when seeing the double-quotes");
		fail("TODO: Could also add a bior_same_variant in here...");
		
		// Now, use the output from overlap to feed into drill
		setStdin(outOverlap.stdout);
		CommandOutput outDrill = runCmdApp(new DrillCommand(), "bior_drill", "-p", "INFO.SAO", "-k");

		fail("TODO: Assert the ##BIOR header line does NOT have double-quotes");
		fail("TODO: Assert that error msg occurred as necessary when seeing the double-quotes");

		// Now, use the output from drill to feed into bior_tjson_to_vcf
		setStdin(outDrill.stdout);
		CommandOutput outTjsonToVcf = runCmdApp(new TjsonToVcfCommand(), "bior_tjson_to_vcf");
		
		fail("TODO: Assert the ##BIOR header line does NOT have double-quotes");
		fail("TODO: Assert that error msg occurred as necessary when seeing the double-quotes");
	}
	
	
	//=================================================================================================================================
	
	/**
	 * Run the bior_tjson_to_vcf command as a mock (call the Java main method instead of the linux command)
	 * @param stdin  String to pass to STDIN (or null for no input)
	 * @param args   Arguments to pass to bior_tjson_to_vcf
	 * @return
	 * @throws UnsupportedEncodingException 
	 */
	private CommandOutput  executeMock(String stdin, String... cmdArgs) throws UnsupportedEncodingException {
		if( stdin != null )
			setStdin(stdin);
		mApp.captureSystemOutAndErrorToStrings();
		CommandOutput out = new CommandOutput();
        out.exit = mApp.runApplication(new TjsonToVcfCommand().getClass().getName(), MOCK_SCRIPT_NAME, cmdArgs);
        mApp.resetSystemOutAndError();
        out.stdout = mApp.getSystemOutMessages();
        out.stderr = mApp.getSystemErrorMessages();
        return out;
	}
	
	
	private void assertNoErrors(String expectedOutput, CommandOutput actualOutput) {
        assertEquals(actualOutput.stderr,  0,  actualOutput.exit);
        assertEquals(actualOutput.stderr,  "", actualOutput.stderr);
        assertEquals(actualOutput.stdout,  expectedOutput, actualOutput.stdout);
	}
	
}
