package edu.mayo.bior.pipeline;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.TreeSet;

import org.apache.commons.io.FileUtils;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.junit.rules.TemporaryFolder;

import com.google.gson.JsonElement;
import com.google.gson.JsonParseException;
import com.google.gson.JsonParser;
import com.tinkerpop.pipes.Pipe;
import com.tinkerpop.pipes.util.Pipeline;

import edu.mayo.bior.cli.func.BaseFunctionalTest;
import edu.mayo.bior.cli.func.TjsonToVcfITCase;
import edu.mayo.bior.util.ColumnResolver;
import edu.mayo.cli.InvalidDataException;
import edu.mayo.pipes.PrintPipe;
import edu.mayo.pipes.JSON.DrillPipe;
import edu.mayo.pipes.JSON.tabix.SameVariantPipe;
import edu.mayo.pipes.UNIX.CatPipe;
import edu.mayo.pipes.bioinformatics.VCF2VariantPipe;
import edu.mayo.pipes.history.ColumnMetaData;
import edu.mayo.pipes.history.HCutPipe;
import edu.mayo.pipes.history.History;
import edu.mayo.pipes.history.HistoryInPipe;
import edu.mayo.pipes.history.HistoryMetaData;
import edu.mayo.pipes.history.HistoryOutPipe;
import edu.mayo.pipes.history.ColumnMetaData.Type;
import edu.mayo.pipes.util.metadata.Metadata;
import edu.mayo.pipes.util.test.FileCompareUtils;
import edu.mayo.pipes.util.test.PipeTestUtils;

public class TjsonToVcfPipeTest   extends  BaseFunctionalTest  {

	private static String COL_DELIMITER = "\t";
	
	@Rule
	public ExpectedException exception = ExpectedException.none();
	 
	private static String EOL = TjsonToVcfITCase.EOL;
	private static String TOP_HEADER = TjsonToVcfITCase.TOP3_HEADERS;
	private static String[] TOP_HEADER_ARRAY = TOP_HEADER.split("\n");
	
	@Test
	public void getJsonVal() {
		TjsonToVcfPipe pipe = new TjsonToVcfPipe();
		String json = "{'_landmark':'1','_minBP':100,'_altAlleles':['A','C']}".replaceAll("'", "\"");
		JsonElement jsonElem = new JsonParser().parse(json);
		assertEquals("1", 	pipe.getJsonVal(jsonElem, "_landmark", true, 1));
		assertEquals("100", pipe.getJsonVal(jsonElem, "_minBP",    true, 1));
		assertEquals("[\"A\",\"C\"]",  pipe.getJsonVal(jsonElem, "_altAlleles", true, 1));
		
		// Field not found - not required, so just fill with "."
		assertEquals(".",   pipe.getJsonVal(jsonElem, "_maxBP",    false, 1));

		// Field not found, but required - should throw exception
		try {
			pipe.getJsonVal(jsonElem, "_refAllele", true, 1);
			fail("Should throw exception before this!");
		} catch(Exception e) {
			assertTrue(e instanceof IllegalStateException);
			assertEquals("Error: Required JSON field [_refAllele] missing on data line 1: Target column must contain these JSON fields: _landmark, _minBP, _refAllele, _altAlleles.  Was: {\"_landmark\":\"1\",\"_minBP\":100,\"_altAlleles\":[\"A\",\"C\"]}", e.getMessage());
		}
	}
	
	@Test
	public void splitAlts() {
		TjsonToVcfPipe pipe = new TjsonToVcfPipe();
		assertEquals("A",    pipe.splitAlts("[\"A\"]"));
		assertEquals("A,C",  pipe.splitAlts("[\"A\",\"C\"]"));
		assertEquals("",     pipe.splitAlts("[]"));
	}
	
 
	
	@Test
	public void isVcf() {
		History history = new History( concat("1", "100", "rs123", "A", "C", ".", ".", ".") );
		history.setMetaData(new HistoryMetaData(Arrays.asList("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO")));
    	List<ColumnMetaData> meta = history.getMetaData().getColumns();
    	meta.add(0, new ColumnMetaData("CHROM", Type.String, "1", "Chromosome"));
    	meta.add(1, new ColumnMetaData("POS", 	Type.String, "1", "Position"));
    	meta.add(2, new ColumnMetaData("ID", 	Type.String, "1", "Id or rsId"));
    	meta.add(3, new ColumnMetaData("REF", 	Type.String, "1", "Reference Allele"));
    	meta.add(4, new ColumnMetaData("ALT", 	Type.String, ".", "Alternate Alleles"));
    	meta.add(5, new ColumnMetaData("QUAL", 	Type.String, "1", "Quality"));
    	meta.add(6, new ColumnMetaData("FILTER",Type.String, "1", "Filter"));
    	meta.add(7, new ColumnMetaData("INFO", 	Type.String, ".", "Info"));

    	assertTrue( new TjsonToVcfPipe().isAlreadyAVcf(history) );
	}

	@Test
	public void isVcf2() {
		History history = new History( concat("1", "100", "rs123", "A", "C", "29", "PASS", "NS=3;DP=14", "GT:GQ:DP:HQ", "0|0:48:1:51,51") );
		history.setMetaData(new HistoryMetaData(Arrays.asList( concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT", "NA00001") )));
    	List<ColumnMetaData> meta = history.getMetaData().getColumns();
    	meta.add(0, new ColumnMetaData("CHROM", Type.String, "1", "Chromosome"));
    	meta.add(1, new ColumnMetaData("POS", 	Type.String, "1", "Position"));
    	meta.add(2, new ColumnMetaData("ID", 	Type.String, "1", "Id or rsId"));
    	meta.add(3, new ColumnMetaData("REF", 	Type.String, "1", "Reference Allele"));
    	meta.add(4, new ColumnMetaData("ALT", 	Type.String, ".", "Alternate Alleles"));
    	meta.add(5, new ColumnMetaData("QUAL", 	Type.String, "1", "Quality"));
    	meta.add(6, new ColumnMetaData("FILTER",Type.String, "1", "Filter"));
    	meta.add(7, new ColumnMetaData("INFO", 	Type.String, ".", "Info"));
    	meta.add(8, new ColumnMetaData("FORMAT",Type.String, ".", "Format of sample columns"));
    	meta.add(9, new ColumnMetaData("NA00001",Type.String,".", "First sample column"));
    	
		assertTrue( new TjsonToVcfPipe().isAlreadyAVcf(history) );
	}


	@Test
	public void isNotVcf() {
		History history = new History( concat("1", "100", "{\"_id\":\"rs123\"}") );
		history.setMetaData(new HistoryMetaData(Arrays.asList( concat("#CHROM", "POS", "bior.json") )));
    	List<ColumnMetaData> meta = history.getMetaData().getColumns();
    	meta.add(0, new ColumnMetaData("CHROM", Type.String, "1", "Chromosome"));
    	meta.add(1, new ColumnMetaData("POS", 	Type.String, "1", "Position"));
    	meta.add(2, new ColumnMetaData("bior.json",	Type.String, "1", "BioR JSON field"));
    	
		assertFalse( new TjsonToVcfPipe().isAlreadyAVcf(history) );		
	}


	//==============================================================================================================================
	
	/** Create a VCF-compatible line from target JSON  (col = -1)
	 *  Settings:
	 *    - don't insert data into INFO column
	 *    - don't keep BioR lines
	 *    - no ranges to add (defaults to only BIOR columns) */
	@Test
	public void lineToVcf_requiredOnly() {
		// NOTE: Can't use -1 as last column because that is set by the processNextStart() method,
		//       and here we are skipping that to call lineToVcf() directly
		TjsonToVcfPipe pipe = new TjsonToVcfPipe(/*targetJsonColOneBased=*/ -1,  /*isKeepAllColumns=*/ false,  /*isAddDataToInfo=*/ false, /*isAddJsonToInfo=*/true,  /*range=*/ null);
		
		// Test only the required fields in json
		History historyIn = createHistoryWithHeader(
				concat("#chromosome", 	"position", "referenceAllele", 	"alternateAlleles", "bior.json"),
				concat("1", 			"100", 		"A", 				"C", 				swapQuotes("{'_landmark':'2','_minBP':200,'_refAllele':'G','_altAlleles':['T']}"))
				);
		
		History historyOut = pipe.lineToVcf(historyIn);
		
		List<String> expectedHeaderRow = Arrays.asList("CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO",    "chromosome", "position", "referenceAllele", "alternateAlleles");
		verifyHeaders(expectedHeaderRow, historyOut);

		// NOTE: The first 8 columns are being built from the JSON, so will not match the original columns in historyIn
		History expectedDataLine  = new History(new String[] { "2", "200", ".", "G", "T", ".", ".", ".", "1", "100", "A", "C" });
		PipeTestUtils.assertListsEqual(expectedDataLine, historyOut);
	}
	
	
	
	/** Test large floats to make sure their precision is preserved.
	 *  Test values within JSON as well as values from columns 
	 *  (see pipes VCF2VariantPipeTest.testParseDouble() for more explanation on handling large floats) */
 	@Test
	public void testLargeFloats() {
		Pipeline pipe = new Pipeline(
				new HistoryInPipe(),
				new TjsonToVcfPipe(/*targetJsonColOneBased=*/ -1,  /*isKeepAllColumns=*/ false,  /*isAddDataToInfo=*/ true, /*isAddJsonToInfo=*/true,  /*range=*/ null),
				new HistoryOutPipe()
				);
		pipe.setStarts( Arrays.asList(
				swapQuotes("##BIOR=<ID=bior.float1,Type='Float','Number'=1,Description='Test large float in its own column.  This should be treated as double, not BigDecimal.  Precision maintained'>"),
				swapQuotes("##BIOR=<ID=bior.float2,Type='Float','Number'=1,Description='Test large float in its own column.  This should be treated as BigDecimal instead of double.  Precision maintained'>"),
				concat("#bior.float1",      "bior.float2",   "bior.json"),
				// float1 should be converted to BigDecimal, but precision should remain intact
				// float2 should remain as double with precision intact
				concat("123456.1234567890", "1234567.12345", swapQuotes("{'_landmark':'2','_minBP':200,'_refAllele':'G','_altAlleles':['T'],'float3':0.1234567890123456,'float4':1.1234567890123456,'float5':12345678901234567890.12345678901234567899}"))
				) );
		List<String> results = PipeTestUtils.getResults(pipe);

		final List<String> EXPECTED = Arrays.asList(
				"##fileformat=VCFv4.1",
				"##fileDate=" + new SimpleDateFormat("yyyyMMdd").format(new Date()),
				"##source=bior_tjson_to_vcf",
				swapQuotes("##BIOR=<ID=bior.float1,Type='Float','Number'=1,Description='Test large float in its own column.  This should be treated as double, not BigDecimal.  Precision maintained'>"),
				swapQuotes("##BIOR=<ID=bior.float2,Type='Float','Number'=1,Description='Test large float in its own column.  This should be treated as BigDecimal instead of double.  Precision maintained'>"),
				swapQuotes("##INFO=<ID=bior.float1,Number=.,Type=String,Description=''>"),  // TODO: Why is the type and number wrong????
				swapQuotes("##INFO=<ID=bior.float2,Number=.,Type=String,Description=''>"),  // TODO: Why is the type and number wrong????
				swapQuotes("##INFO=<ID=bior.json._altAlleles,Number=.,Type=String,Description=''>"),
				swapQuotes("##INFO=<ID=bior.json._landmark,Number=1,Type=String,Description=''>"),
				swapQuotes("##INFO=<ID=bior.json._minBP,Number=1,Type=Integer,Description=''>"),
				swapQuotes("##INFO=<ID=bior.json._refAllele,Number=1,Type=String,Description=''>"),
				swapQuotes("##INFO=<ID=bior.json.float3,Number=1,Type=Float,Description=''>"),
				swapQuotes("##INFO=<ID=bior.json.float4,Number=1,Type=Float,Description=''>"),
				swapQuotes("##INFO=<ID=bior.json.float5,Number=1,Type=Float,Description=''>"),
				concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"),
				concat("2", "200", ".", "G", "T", ".", ".", "bior.float1=123456.1234567890;"
														+   "bior.float2=1234567.12345;"
														+   "bior.json._altAlleles=T;bior.json._landmark=2;bior.json._minBP=200;bior.json._refAllele=G;"
														+   "bior.json.float3=0.1234567890123456;"
														+   "bior.json.float4=1.1234567890123456;"
														+   "bior.json.float5=12345678901234567890.12345678901234567899")
				);
		PipeTestUtils.assertListsEqualExactStringMatch(EXPECTED, results);
	}

	
	/** Create a VCF-compatible line from target JSON (col = -1)
	 *  Settings:
	 *    - Build INFO from ALL columns (specify range)
 	 *    - Keep all columns
 	 *    - NOTE: here we have an extra JSON column, so all key-value pairs should be added from that to the INFO col
 	 *            (with array and flag (one of each - one should be shown, the other not shown))
	 */
	@Test
	public void lineToVcf_optionalFields() {
		//------------------------------------------------------------------
		TjsonToVcfPipe pipe = new TjsonToVcfPipe( /*JsonCol=*/ -1, /*isKeepAllCols=*/ true, /*isAddDataToInfoCol=*/ true, /*isAddJsonToInfo=*/true, /*ColsToAddToInfo=*/ "1..8");

		String biorMyKeysJson = swapQuotes("{'myKey':'myValue','year':2015,'temperature':72.349,'isHiddenBoolean':false,'isShownBoolean':true}");
		String biorTjsonJson  = swapQuotes("{'_landmark':'2','_minBP':200,'_maxBP':200,'_id':'rs11111','QUAL':0.01,'FILTER':'PASS','INFO':'AC=1;MAF=0.04','_refAllele':'C','_altAlleles':['G','T']}");
		History historyIn = createHistoryWithHeader( // NOTE: A comma in the alternateAlleles column will be converted to "%2C"
			concat("#chromosome", "position", "reference", "alternateAlleles", "altsPipeDelimited", "bior.description", "bior.myKeys", "bior.tjson"),
			concat("1", 		  "100", 	  "A", 		   "C,G", 			   "C|G|T",             "someDescription",  biorMyKeysJson, biorTjsonJson)
		);

		History historyOut = pipe.lineToVcf(historyIn);

		List<String> expectedColumnHeaders = Arrays.asList("CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", 
				"chromosome", "position", "reference", "alternateAlleles", "altsPipeDelimited", "bior.description", "bior.myKeys", "bior.tjson");
		verifyHeaders(expectedColumnHeaders, historyOut);
		
		History expectedDataLine  = new History(new String[] { 
				"2", "200", "rs11111", "C", "G,T", "0.01", "PASS", 
				// NOTE: The INFO field will have AC and MAF already in it because it found an
				//       'INFO' field in the JSON that will be used to construct the INFO column
				"AC=1;MAF=0.04;"
				  + "alternateAlleles=C%2CG;"
				  + "altsPipeDelimited=C|G|T;"
				  + "bior.description=someDescription;"
				  + "bior.myKeys.isShownBoolean;"
				  + "bior.myKeys.myKey=myValue;"
				  + "bior.myKeys.temperature=72.349;"
				  + "bior.myKeys.year=2015;"
				  + "bior.tjson._altAlleles=G,T;"
				  + "bior.tjson._id=rs11111;"
				  + "bior.tjson._landmark=2;"
				  + "bior.tjson._maxBP=200;"
				  + "bior.tjson._minBP=200;"
				  + "bior.tjson._refAllele=C;"
				  + "bior.tjson.FILTER=PASS;"
				  //"bior.tjson.INFO=AC:1|MAF:0.04;"  -- this was the old method.  In new method, these will be escaped:  "="  ","  ";"
				  + "bior.tjson.INFO=AC%3D1%3BMAF%3D0.04;"
				  + "bior.tjson.QUAL=0.01;"
				  + "chromosome=1;"
				  + "position=100;"
				  + "reference=A",
				"1", "100", "A", "C,G",  "C|G|T",  "someDescription", biorMyKeysJson, biorTjsonJson });
		PipeTestUtils.assertListsEqual(expectedDataLine, historyOut);
	}
	
	@Test
	public void handleSpacesAndOtherOddCharsGoingIntoInfoCol() {
		//------------------------------------------------------------------
		TjsonToVcfPipe pipe = new TjsonToVcfPipe( /*JsonCol=*/ -1, /*isKeepAllCols=*/ false, /*isAddDataToInfoCol=*/ true, /*isAddJsonToInfo=*/false, /*rangeToCollapse=*/"1");

		History historyIn = createHistoryWithHeader(
			concat("#oddCol", "bior.tjson"),
			// Convert these characters:  ',' -> '|'      ' ' -> '_'      '=' -> ':'     ';' -> '|'
			concat("A col : with = diff |, and chrs; too", swapQuotes("{'_landmark':'2','_minBP':200,'_refAllele':'C','_altAlleles':['G','T']}"))
		);

		History historyOut = pipe.lineToVcf(historyIn);

		List<String> expectedColumnHeaders = Arrays.asList("CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO");
		verifyHeaders(expectedColumnHeaders, historyOut);
		
		// '=' to %3D     ',' to %2C    ';' to %3B
		History expectedDataLine  = new History(new String[] {  
				"2", "200", ".", "C", "G,T", ".", ".", "oddCol=A col : with %3D diff |%2C and chrs%3B too" } );
		PipeTestUtils.assertListsEqual(expectedDataLine, historyOut);
	}


	@Test
	public void lineToVcf_headerMissing() {
		History lineIn = new History(swapQuotes("{'_landmark':'1','_minBP':100,'_maxBP':100,'_id':'rs11111','QUAL':0.0,'FILTER':9.3,'INFO':'AC=1;MAF=0.04','_refAllele':'A','_altAlleles':['C','G']}"));
		TjsonToVcfPipe pipe = new TjsonToVcfPipe();
		try {
			pipe.lineToVcf(lineIn);
			fail("Should throw exception before this!");
		} catch(Exception e) {
			assertTrue(e instanceof IllegalStateException);
			assertEquals("Error: Header information is required to construct the VCF", e.getMessage());
		}
	}
	
	@Test 
	public void lineToVcf_missingFields() {
		TjsonToVcfPipe pipe = new TjsonToVcfPipe(5);
		
		// Error if one of the required fields is missing
		String json = swapQuotes("{'_landmark':'1','_minBP':100,'_maxBP':100,'_id':'rs11111','QUAL':0.0,'FILTER':9.3,'INFO':'AC=1;MAF=0.04','_refAllele':'A','_altAlleles':['C','G']}");
		
		// Try removing one required field at a time
		String[] badJsons = new String[] {
			json.replace("\"_landmark\":\"1\",", ""),  	// _landmark removed
			json.replace("\"_minBP\":100,", ""),		// _minBP removed
			json.replace("\"_refAllele\":\"A\",", ""),	// _refAllele removed
			json.replace(",\"_altAlleles\":[\"C\",\"G\"]", "") // _altAlleles removed
		};
		// Test each required fields if removed
		String[] keysRemoved = new String[] { "_landmark",  "_minBP",  "_refAllele",  "_altAlleles" };
		for( int i = 0; i < badJsons.length; i++ ) {
			try {
				History line = createHistoryWithHeader(
					concat("#CHROM", "POS", "REF", "ALT", "Json"),
					concat("1",      "100", "A",   "C",   badJsons[i])
					);
				pipe.lineToVcf(line);
				fail("Should throw exception before this!");
			} catch(Exception e) {
				assertTrue(e instanceof IllegalStateException);
				String expectedMsg = "Error: Required JSON field [" + keysRemoved[i] + "] missing on data line " + (i+1) + ": Target column must contain these JSON fields: _landmark, _minBP, _refAllele, _altAlleles.  Was: " + badJsons[i];
				assertEquals(expectedMsg, e.getMessage());
			}
		}
		
		System.out.println("lineToVcf() - DONE");
	}

	@Test
	public void lineToVcf_missingFieldsButAlreadyAVcfSoOk() {
		History line = createHistoryWithHeader(
				concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "bior.json"),
				concat("1",      "100", ".",  "A",   "C",   ".",    ".",      ".",    ".")
				);
		History out = new TjsonToVcfPipe().lineToVcf(line);
		assertEquals( concat("1", "100", ".", "A", "C", ".", ".", "."),
				      out.getMergedData("\t")
				      );
	}

	@Test
	public void lineToVcf_rangesToAddToInfo() {
		System.out.println("See lineToVcf_optionalFields() as this specifies ranges");
	}
	
	//==============================================================================================================================


	@Test
	public void lineToVcf_alreadyAVcf() {
		// NOTE: Can't use -1 as last column because that is set by the processNextStart() method,
		//       and here we are skipping that to call lineToVcf() directly
		//  	 Should not need jsonCol as we are not building the VCF.
		//		 However, the data from the json column should be merged with the data in the original INFO col
		TjsonToVcfPipe pipe = new TjsonToVcfPipe(/*targetJsonColOneBased=*/ -1,  /*isKeepAllColumns=*/ true,  /*isAddDataToInfo=*/ true, /*isAddJsonToInfo=*/true,  /*range=*/ "9..10");
		
		// Test only the required fields in json
		History historyIn = createHistoryWithHeader(
				concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "Gene", "bior.json"),
				concat("1", 	 "100", "rs123", "C", "A,T","0.11", "PASS",   "AAA=123;BBB=234", "MTHFR", "{'_minBP':200,'_refAllele':'G','_altAlleles':['T'],'QUAL':0.33}")
				);
		
		History historyOut = pipe.lineToVcf(historyIn);
		
		List<String> expectedHeaderRow = Arrays.asList("CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "Gene", "bior.json");
		verifyHeaders(expectedHeaderRow, historyOut);

		History expectedDataLine  = new History(new String[] { "1", "100", "rs123", "C", "A,T", "0.11", "PASS",
				"AAA=123;BBB=234;bior.json._altAlleles=T;bior.json._minBP=200;bior.json._refAllele=G;bior.json.QUAL=0.33;Gene=MTHFR",
				"MTHFR",   "{'_minBP':200,'_refAllele':'G','_altAlleles':['T'],'QUAL':0.33}"
				});
		PipeTestUtils.assertListsEqual(expectedDataLine, historyOut);
	}


	/** Try a simple pipe with just the column header line, flat INFO field. The JSON column is the only one.  (default column index) */
	@Test
	public void pipeJustJson() {
		// INFO field as a flat text field (not JSON)
		List<String> tjsonIn = Arrays.asList(
				"#variantAsJson",
				( "{'CHROM':'1','POS':'100','ID':'rs1','REF':'C','ALT':'T','QUAL':'29','FILTER':'PASS',"
			      + "'INFO':'NS=3;DP=14;AF=0.5;isInDb;isH2','_id':'rs6054257','_type':'variant','_landmark':'20',"
				  + "'_refAllele':'G','_altAlleles':['A'],'_minBP':14370,'_maxBP':14370}" ).replaceAll("'", "\"")
				//( "{'CHROM':'20','POS':'14370','ID':'rs6054257','REF':'G','ALT':'A','QUAL':'29','FILTER':'PASS',"
				//	      + "'INFO':'NS=3;DP=14;AF=0.5;isInDb;MAFs=0.1,0.15,0.2','_id':'rs6054257','_type':'variant','_landmark':'20',"
				//		  + "'_refAllele':'G','_altAlleles':['A'],'_minBP':14370,'_maxBP':14370}" ).replaceAll("'", "\"")
				);
		
		// Same as the lines above, but the data line should have 8 VCF columns in front
		String expectedJsonAtEnd = 
				  "variantAsJson._altAlleles=A;variantAsJson._id=rs6054257;variantAsJson._landmark=20;"
				+ "variantAsJson._maxBP=14370;variantAsJson._minBP=14370;variantAsJson._refAllele=G;"
				+ "variantAsJson._type=variant;variantAsJson.ALT=T;variantAsJson.CHROM=1;variantAsJson.FILTER=PASS;"
				// NOTE: The equals, semicolons, and commas from INFO above will be converted  '=' to %3D    ',' to %2C    ';' to %3B
				//                                         NS=  3;  DP=  14;  AF=  0.5;  DB;  H2
				+ "variantAsJson.ID=rs1;variantAsJson.INFO=NS%3D3%3BDP%3D14%3BAF%3D0.5%3BisInDb%3BisH2;"
				//+ "variantAsJson.ID=rs1;variantAsJson.INFO=NS:3|DP:14|AF:0.5|DB:true|H2:true;"
				+ "variantAsJson.POS=100;variantAsJson.QUAL=29;variantAsJson.REF=C";
		List<String> expected = Arrays.asList(
				TOP_HEADER_ARRAY[0],
				TOP_HEADER_ARRAY[1],
				TOP_HEADER_ARRAY[2],
				"##INFO=<ID=AF,Number=.,Type=Float,Description=\"\">",
				"##INFO=<ID=DP,Number=.,Type=Integer,Description=\"\">",	
				"##INFO=<ID=isH2,Number=0,Type=Flag,Description=\"\">",
				"##INFO=<ID=isInDb,Number=0,Type=Flag,Description=\"\">",	
				"##INFO=<ID=NS,Number=.,Type=Integer,Description=\"\">",
				"##INFO=<ID=variantAsJson._altAlleles,Number=.,Type=String,Description=\"\">",
				"##INFO=<ID=variantAsJson._id,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=variantAsJson._landmark,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=variantAsJson._maxBP,Number=1,Type=Integer,Description=\"\">",
				"##INFO=<ID=variantAsJson._minBP,Number=1,Type=Integer,Description=\"\">",
				"##INFO=<ID=variantAsJson._refAllele,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=variantAsJson._type,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=variantAsJson.ALT,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=variantAsJson.CHROM,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=variantAsJson.FILTER,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=variantAsJson.ID,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=variantAsJson.INFO,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=variantAsJson.POS,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=variantAsJson.QUAL,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=variantAsJson.REF,Number=1,Type=String,Description=\"\">",
				concat("#CHROM", "POS",   "ID",        "REF", "ALT", "QUAL", "FILTER", "INFO"),
				concat("20",     "14370", "rs6054257", "G",   "A",   "29",   "PASS",   "NS=3;DP=14;AF=0.5;isInDb;isH2;" + expectedJsonAtEnd)
				);
		verifyPipelineResults(tjsonIn, expected, /*jsonColToBuildVcfFrom=*/-1, /*isKeepAllCols=*/false, /*isAddDataToInfo=*/true, /*isAddJsonToInfo=*/true, /*colRangeToCollapse=*/"1");
		// Also try with JSON column as positive column
		verifyPipelineResults(tjsonIn, expected, /*jsonColToBuildVcfFrom=*/1,  /*isKeepAllCols=*/false, /*isAddDataToInfo=*/true, /*isAddJsonToInfo=*/true, /*colRangeToCollapse=*/"1");
	}

	/** Specified column range is outside the number of columns */
	@Test
	public void colRangeOutsideNumColumns() {
		List<String> tjsonIn = Arrays.asList(
				"#variantAsJson",
				"{'CHROM':'20','POS':'14370','ID':'rs6054257','REF':'G','ALT':'A','QUAL':'29','FILTER':'PASS','INFO':'NS=3;DP=14;AF=0.5;DB=true;H2=true','_id':'rs6054257','_type':'variant','_landmark':'20','_refAllele':'G','_altAlleles':['A'],'_minBP':14370,'_maxBP':14370}".replaceAll("'", "\"")
				);
		try {
			verifyPipelineResults(tjsonIn, Arrays.asList(""), /*jsonColToBuildVcfFrom=*/-1, /*isKeepAllCols=*/false, /*isAddDataToInfo=*/true, /*isAddJsonToInfo=*/false, /*colRangeToCollapse=*/"1..10");
		}catch(Exception e) {
			assertTrue( e instanceof IllegalArgumentException);
			assertEquals("Error: Invalid column or range [1..10] - header only contains 1 columns", e.getMessage());
		}
	}

	
	@Test
	/** Test different ranges with just tab-delimited values (no JSON, and already VCF-formatted) */
	public void ranges() {
		
		List<String> tjsonIn = Arrays.asList(
				concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8"),
				concat("1",		 "100",	"rs1","A",   "C",   ".",    ".",      "AN=3", "1",  "2",  "3",  "4",  "5",  "6",  "7",  "8")
				);
		List<String> expected = Arrays.asList(
				TOP_HEADER_ARRAY[0],
				TOP_HEADER_ARRAY[1],
				TOP_HEADER_ARRAY[2],
				"##INFO=<ID=x1,Number=.,Type=String,Description=\"\">",
				"##INFO=<ID=x2,Number=.,Type=String,Description=\"\">",
				"##INFO=<ID=x3,Number=.,Type=String,Description=\"\">",
				"##INFO=<ID=x4,Number=.,Type=String,Description=\"\">",
				"##INFO=<ID=x5,Number=.,Type=String,Description=\"\">",
				"##INFO=<ID=x6,Number=.,Type=String,Description=\"\">",
				"##INFO=<ID=x7,Number=.,Type=String,Description=\"\">",
				"##INFO=<ID=x8,Number=.,Type=String,Description=\"\">",
				concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"),
				concat("1",		 "100",	"rs1","A",   "C",   ".",    ".",      "AN=3;x1=1;x2=2;x3=3;x4=4;x5=5;x6=6;x7=7;x8=8")
				);
				
		// Do the same ranges, but specify them in different ways
		//                                       jsonToVcf,	isKeepAllCols, isAddToInfo, isAddJsonToInfo,	colRanges
		verifyPipelineResults(tjsonIn, expected, -1,        false,      true,        false,				"9,10,11,12,13,14,15,16");
		verifyPipelineResults(tjsonIn, expected, -1,        false,      true,        false,				"9..16");
		verifyPipelineResults(tjsonIn, expected, -1,        false,      true,        false,				"-1..-8");
		verifyPipelineResults(tjsonIn, expected, -1,        false,      true,        false,				"-8..-1");
		verifyPipelineResults(tjsonIn, expected, -1,        false,      true,        false,				"16..9");
		verifyPipelineResults(tjsonIn, expected, -1,        false,      true,        false,				"9,10,15..16,11,12..14");
		verifyPipelineResults(tjsonIn, expected, -1,        false,      true,        false,				"-1..-3,-4,-8..-6,-5");
	}
	
	
	@Test
	/** No change because:
	 * 		- Already VCF
	 * 		- Don't add data to INFO col
	 * 		- isKeepAllCols = false (should NOT remove last column since it's already a VCF and should not be affected)
	 * 		- colRange = all columns (but should be ignored since not adding to INFO col
	 *  NOTE: If we are NOT adding anything to the INFO column, then we don't want to add the ##INFO headers either
	 */
	public void noChange() {
		// INFO field as a flat text field (not JSON)
		List<String> tjsonIn = Arrays.asList(
				concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO",            "bior.json"),
				concat("1",      "100", "rs1","A",   "C",   ".",    ".",      "AAA=RoadService", "{'this':'shouldNotBeAddedOrRemoved'}".replaceAll("'", "\""))
				);
		// Same as the lines above, but the data line should have 8 VCF columns in front
		List<String> expected = Arrays.asList(
				TOP_HEADER_ARRAY[0],
				TOP_HEADER_ARRAY[1],
				TOP_HEADER_ARRAY[2],
				concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO",            "bior.json"),
				concat("1",      "100", "rs1","A",   "C",   ".",    ".",      "AAA=RoadService", "{'this':'shouldNotBeAddedOrRemoved'}".replaceAll("'", "\""))
				);
		verifyPipelineResults(tjsonIn, expected, /*jsonColToBuildVcfFrom=*/-1, /*isKeepAllCols=*/true, /*isAddDataToInfo=*/false, /*isAddJsonToInfo=*/false, /*colRangeToCollapse=*/"1..9");
	}
	
	@Test
	/** No change because:
	 * 		- Already VCF
	 * 		- Don't add data to INFO col
	 * 		- isKeepAllCols = false (should NOT remove last column since it's already a VCF and should not be affected)
	 * 		- colRange = all columns (but should be ignored since not adding to INFO col
	 */
	public void noChange_dontRemoveVcfColumns() {
		// INFO field as a flat text field (not JSON)
		List<String> tjsonIn = Arrays.asList(
				concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"),
				concat("1",      "100", "rs1","A",   "C",   ".",    ".",      "AAA=RoadService")
				);
		// Same as the lines above, but the data line should have 8 VCF columns in front
		List<String> expected = Arrays.asList(
				TOP_HEADER_ARRAY[0],
				TOP_HEADER_ARRAY[1],
				TOP_HEADER_ARRAY[2],
				concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"),
				concat("1",      "100", "rs1","A",   "C",   ".",    ".",      "AAA=RoadService")
				);
		verifyPipelineResults(tjsonIn, expected, /*jsonColToBuildVcfFrom=*/-1, /*isKeepAllCols=*/false, /*isAddDataToInfo=*/false,  /*isAddJsonToInfo=*/false, /*colRangeToCollapse=*/"1..8");
	}
	
	
	/** Try a simple pipe with just the column header line, flat INFO field, and positive number for column index */
	@Test
	public void pipeNoHeader() {
		// INFO field as a flat text field (not JSON)
		List<String> tjsonIn = Arrays.asList(
				concat("#chr", "min",   "ref", "alt", "variantAsJson" ),
				concat("20",   "14370", "G",   "A",   "{'CHROM':'20','POS':'14370','ID':'rs6054257','REF':'G','ALT':'A','QUAL':'29','FILTER':'PASS','INFO':'NS=3;DP=14;AF=0.5;DB=true;H2=true','_id':'rs6054257','_type':'variant','_landmark':'20','_refAllele':'G','_altAlleles':['A'],'_minBP':14370,'_maxBP':14370}".replaceAll("'", "\"") )
				);
		// Same as the lines above, but the data line should have 8 VCF columns in front
		List<String> expected = Arrays.asList(
				TOP_HEADER_ARRAY[0],
				TOP_HEADER_ARRAY[1],
				TOP_HEADER_ARRAY[2],
				"##INFO=<ID=AF,Number=.,Type=Float,Description=\"\">",
				"##INFO=<ID=DB,Number=.,Type=String,Description=\"\">",
				"##INFO=<ID=DP,Number=.,Type=Integer,Description=\"\">",
				"##INFO=<ID=H2,Number=.,Type=String,Description=\"\">",
				"##INFO=<ID=NS,Number=.,Type=Integer,Description=\"\">",
				concat("#CHROM", "POS",   "ID",        "REF", "ALT", "QUAL", "FILTER", "INFO",                              "chr", "min",   "ref", "alt"),
				concat("20",     "14370", "rs6054257", "G",   "A",   "29",   "PASS",   "NS=3;DP=14;AF=0.5;DB=true;H2=true", "20",  "14370", "G",   "A")
				);
		// Default:  Build vcf from column 5, then add all bior columns to INFO (there are none), then remove all BioR columns 
		verifyPipelineResults(tjsonIn, expected, 5, null, null, null, null);
	}
	
	/** Try pipe with complex header, INFO as JSON (instead of flat string), 
	 *  and using the default column (-1) for column index */
	@Test
	public void pipeWithHeader_alreadyVcf() {
		// Example output from bior_vcf_to_tjson that we want to try to convert back to VCF
		List<String> tjsonIn = Arrays.asList(
				"##fileformat=VCFv4.0",
				"##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of Samples With Data\">",
				"##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">",
				"##INFO=<ID=AF,Number=.,Type=Float,Description=\"Allele Frequency\">",
				"##INFO=<ID=AA,Number=1,Type=String,Description=\"Ancestral Allele\">",
				"##INFO=<ID=DB,Number=0,Type=Flag,Description=\"dbSNP membership, build 129\">",
				"##INFO=<ID=H2,Number=0,Type=Flag,Description=\"HapMap2 membership\">",
				"##BIOR=<ID=\"bior.ToTJson\",Operation=\"bior_vcf_to_tjson\",DataType=\"JSON\",ShortUniqueName=\"ToTJson\">",
				concat("#CHROM", "POS",   "ID",        "REF", "ALT", "QUAL", "FILTER", "INFO",    "bior.ToTJson" ),
				concat("20",     "14370", "rs6054257", "G",   "A",   "29",   "PASS",   "NS=3;DP=14;AF=0.5;DB;H2", "{'ID':'rs1','INFO':{'NS':3,'DB':true,'H2':false}}".replaceAll("'", "\"") )
				);
		// Most of the header info will be the same in the output
		List<String> expected = Arrays.asList(
				tjsonIn.get(0), // Fileformat
				TOP_HEADER_ARRAY[1],
				TOP_HEADER_ARRAY[2],
				tjsonIn.get(1), // NS
				tjsonIn.get(2), // DP
				tjsonIn.get(3), // AF
				tjsonIn.get(4), // AA
				"##INFO=<ID=bior.ToTJson.ID,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=bior.ToTJson.INFO.DB,Number=0,Type=Flag,Description=\"\">",
				"##INFO=<ID=bior.ToTJson.INFO.H2,Number=0,Type=Flag,Description=\"\">",
				"##INFO=<ID=bior.ToTJson.INFO.NS,Number=1,Type=Integer,Description=\"\">",
				tjsonIn.get(5), // DB
				tjsonIn.get(6), // H2
				tjsonIn.get(7), // ##BIOR=bior.ToTJson
				concat("#CHROM", "POS",   "ID",        "REF", "ALT", "QUAL", "FILTER", "INFO"),
				concat("20",     "14370", "rs6054257", "G",   "A",   "29",   "PASS",   "NS=3;DP=14;AF=0.5;DB;H2;bior.ToTJson.ID=rs1;bior.ToTJson.INFO.DB;bior.ToTJson.INFO.NS=3")
				);
		verifyPipelineResults(tjsonIn, expected, -1, /*isKeepAllCols=*/false, /*isAddDataToInfo=*/true, /*isAddJsonToInfo=*/true, /*rangeToCollapse=*/"");
	}
	
	@Test
	/** Don't add anything to INFO col (even tho it says to add JSON).  Keep all columns */
	public void noAddToInfo_yesAddJson_keepCols() {
		List<String> tjsonIn = Arrays.asList(
				concat("#CHROM", "POS",   "ID",        "REF", "ALT", "QUAL", "FILTER", "INFO",  "bior.ToTJson" ),
				concat("20",     "14370", "rs6054257", "G",   "A",   "29",   "PASS",   ".",     "{'ID':'rs1'}".replaceAll("'", "\"") )
				);
		// Most of the header info will be the same in the output
		List<String> expected = Arrays.asList(
				TOP_HEADER_ARRAY[0],
				TOP_HEADER_ARRAY[1],
				TOP_HEADER_ARRAY[2],
				concat("#CHROM", "POS",   "ID",        "REF", "ALT", "QUAL", "FILTER", "INFO",  "bior.ToTJson"),
				concat("20",     "14370", "rs6054257", "G",   "A",   "29",   "PASS",   ".",     "{'ID':'rs1'}".replaceAll("'", "\""))
				);
		verifyPipelineResults(tjsonIn, expected, -1, /*isKeepAllCols=*/true, /*isAddDataToInfo=*/false, /*isAddJsonToInfo=*/true, /*rangeToCollapse=*/"");		
	}
	
	@Test
	/** Starting with a VCF-formatted input with extra BioR columns, collapse the BioR added columns into the INFO column
	 *  NOTE: If the line already contains VCF-formatted columns, it should NOT pull fields from the last JSON column to populate those VCF columns.
	 *  NOTE: Required VCF header lines should be added (AND JSON COL IS NOT THE LAST COL)
	 *  NOTE: Test nested JSON
	 *  NOTE: Test JSON arrays
	 *  NOTE: Should skip fields like pseudo that are empty
	 *  NOTE: Skip FORMAT and SAMPLE1 columns
	 *  NOTE: "Gene" column should be added to INFO because it has a ##BIOR header
	 *  NOTE: Column headers with ##BIOR headers should also be collapsed into INFO */
	public void collapseBiorColsBackIntoExistingInfoCol() {
		List<String> INPUT_VCF = Arrays.asList(
				// This "Gene" column should get added to the INFO column since it has a ##BIOR header associated with it
				"##BIOR=<ID=Gene>",
				concat("#CHROM", "POS",   "ID",  "REF", "ALT", "QUAL", "FILTER", "INFO",                    "FORMAT",  "SAMPLE1",
					"bior.gene37p13",  "bior.gene",  "Gene"),
				concat("1",      "14400", "rs11","G",   "A",   "100",  "PASS",   "AMR_AF=0.17;EUR_AF=0.21",	"GT:GP",   "0/1",
				    ("{'_type':'variant','_landmark':'1','_strand':'+','_minBP':11874,'_maxBP':11875,"
				    	+  "'_refAllele':'AA','_altAlleles':['AC','AG'],'gene':'DDX11L1',"
				    		// Semicolon should be converted to "%3B" and spaces should remain spaces (not converted to underscores)
				    	+  "'note':'DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1; Derived by automated computational analysis using gene prediction method: BestRefSeq.',"
				    	+  "'pseudo':'','GeneID':'100287102','HGNC':'37102',"
				    	+  "'Test':{'Key1':'A','Key2':['B','C']}}").replaceAll("'", "\""),
				    "DDX11L1",
				    "SomeGene"),
				""  // NOTE: Blank line at the end
				);
		List<String> EXPECTED_OUT = Arrays.asList(
				TOP_HEADER_ARRAY[0],
				TOP_HEADER_ARRAY[1],
				TOP_HEADER_ARRAY[2],
				"##BIOR=<ID=Gene>",
				"##INFO=<ID=bior.gene,Number=.,Type=String,Description=\"\">",
				"##INFO=<ID=bior.gene37p13._altAlleles,Number=.,Type=String,Description=\"\">",
				"##INFO=<ID=bior.gene37p13._landmark,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=bior.gene37p13._maxBP,Number=1,Type=Integer,Description=\"\">",
				"##INFO=<ID=bior.gene37p13._minBP,Number=1,Type=Integer,Description=\"\">",
				"##INFO=<ID=bior.gene37p13._refAllele,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=bior.gene37p13._strand,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=bior.gene37p13._type,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=bior.gene37p13.gene,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=bior.gene37p13.GeneID,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=bior.gene37p13.HGNC,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=bior.gene37p13.note,Number=1,Type=String,Description=\"\">",
				// pseudo not present since the one value that was provide was blank
				//"##INFO=<ID=bior.gene37p13.pseudo,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=bior.gene37p13.Test.Key1,Number=1,Type=String,Description=\"\">",
				"##INFO=<ID=bior.gene37p13.Test.Key2,Number=.,Type=String,Description=\"\">",
				"##INFO=<ID=Gene,Number=.,Type=String,Description=\"\">",
				concat("#CHROM", "POS",   "ID",   "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT", "SAMPLE1"),
				concat("1",      "14400", "rs11", "G",   "A",   "100",  "PASS",   
					   "AMR_AF=0.17;EUR_AF=0.21;"
					    + "bior.gene=DDX11L1;"
						+ "bior.gene37p13._altAlleles=AC,AG;"
						+ "bior.gene37p13._landmark=1;"
						+ "bior.gene37p13._maxBP=11875;"
					    + "bior.gene37p13._minBP=11874;"
					    + "bior.gene37p13._refAllele=AA;"
						+ "bior.gene37p13._strand=+;"
						+ "bior.gene37p13._type=variant;"
					    + "bior.gene37p13.gene=DDX11L1;"
					    + "bior.gene37p13.GeneID=100287102;"
					    + "bior.gene37p13.HGNC=37102;"
					    // NOTE: bior.gene37p13.pseudo should NOT be present, since value is empty
					    + "bior.gene37p13.note=DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1%3B Derived by automated computational analysis using gene prediction method: BestRefSeq.;"
					    + "bior.gene37p13.Test.Key1=A;"
					    + "bior.gene37p13.Test.Key2=B,C;"
					    + "Gene=SomeGene",
					   // Format and sample columns:
					   "GT:GP",   "0/1")
				);
		verifyPipelineResults(INPUT_VCF, EXPECTED_OUT, -3, /*isKeepAllCols=*/false, /*isAddDataToInfo=*/true, /*isAddJsonToInfo=*/true, /*colRange=*/"");		
	}
	
	@Test
	/**  A non-JSON column is selected to pull the VCF data from */
	public void testError_noJsonInCol() {
		List<String> INPUT_VCF = Arrays.asList(
				concat("#CHROM", "POS",  "Gene"),
				concat("1",      "14400","DDX11L1")
				);
		List<String> EXPECTED_OUTPUT = Arrays.asList("SHOULD THROW AN EXCEPTION");
		try {
			verifyPipelineResults(INPUT_VCF, EXPECTED_OUTPUT, -1, /*isKeepAllCols=*/false, /*isAddDataToInfo=*/true, /*isAddJsonToInfo=*/false, /*colRanges=*/"");
			fail("Should throw an exception before this");
		}catch(Exception e) {
			assertTrue(e instanceof IllegalStateException);
			assertEquals("Error: Data line 1:  Target column not JSON.  Was: DDX11L1", e.getMessage());
		}

	}
	
	@Test
	/**  A JSON column is selected to pull the VCF data from, but it does not contain all of the fields */
	public void testError_missingFieldsInJsonInCol() {
		List<String> INPUT_VCF = Arrays.asList("#CHROM	POS	Gene",  "1	14400	{\"chrom\":\"1\",\"minBP\":12345}");
		List<String> EXPECTED_OUTPUT = Arrays.asList("SHOULD THROW AN EXCEPTION");
		try {
			verifyPipelineResults(INPUT_VCF, EXPECTED_OUTPUT, -1, /*isKeepAllCols=*/false, /*isAddDataToInfo=*/true, /*isAddJsonToInfo=*/false, /*colRanges=*/"");
			fail("Should throw an exception before this");
		}catch(Exception e) {
			assertTrue(e instanceof IllegalStateException);
			assertEquals("Error: Required JSON field [_landmark] missing on data line 1: Target column must contain these JSON fields: _landmark, _minBP, _refAllele, _altAlleles.  Was: {\"chrom\":\"1\",\"minBP\":12345}", e.getMessage());
		}

	}
	@Test
	/** Test multiple data lines where different JSON fields appear on the different lines 
	 *  and the pipeline should look ahead so many lines (1000?) and add the appropriate ##INFO header lines.
	 *  Also, don't add VCF header lines that already exist (for example: ##fileformat, ##filedate)	 */
	public void addInfoHeaderLines_keysAppearingAfterFirstLine() {
		List<String> tjsonIn = Arrays.asList(
				concat("#CHROM", "POS",   "ID", "REF", "ALT",  "QUAL", "FILTER", "INFO",  "bior.add"),
				concat("1",      "100",   "rs1","G",   "A",    ".",    ".",      ".",     "{\"x1\":1}" ),
				concat("2",      "200",   "rs2","G",   "A",    ".",    ".",      ".",     "{\"x2\":2}" ),
				concat("3",      "300",   "rs3","G",   "A",    ".",    ".",      ".",     "{\"x3\":3}" )
				);
		// Same as the lines above, but the data line should have 8 VCF columns in front
		List<String> expected = Arrays.asList(
				TOP_HEADER_ARRAY[0],
				TOP_HEADER_ARRAY[1],
				TOP_HEADER_ARRAY[2],
				"##INFO=<ID=bior.add.x1,Number=1,Type=Integer,Description=\"\">",
				"##INFO=<ID=bior.add.x2,Number=1,Type=Integer,Description=\"\">",
				"##INFO=<ID=bior.add.x3,Number=1,Type=Integer,Description=\"\">",
				concat("#CHROM", "POS", "ID",  "REF", "ALT", "QUAL", "FILTER", "INFO"),
				concat("1",      "100", "rs1", "G",   "A",   ".",    ".",      "bior.add.x1=1"),
				concat("2",      "200", "rs2", "G",   "A",   ".",    ".",      "bior.add.x2=2"),
				concat("3",      "300", "rs3", "G",   "A",   ".",    ".",      "bior.add.x3=3")
				);
		verifyPipelineResults(tjsonIn, expected, /*jsonCol=*/-1, /*isKeepAllCols=*/false, /*isAddDataToInfo=*/true, /*isAddJsonToInfo=*/true, /*colRanges=*/"");
	}
	




    @Test
    public void testMergeInfo() {
    	TjsonToVcfPipe pipe = new TjsonToVcfPipe();
    	assertEquals(".", 				pipe.mergeOldInfoValuesWithNew("", ""));
    	assertEquals(".", 				pipe.mergeOldInfoValuesWithNew(".", ""));
    	assertEquals(".", 				pipe.mergeOldInfoValuesWithNew("", "."));
    	assertEquals(".", 				pipe.mergeOldInfoValuesWithNew(".", "."));
    	assertEquals("isFlag", 			pipe.mergeOldInfoValuesWithNew("", "isFlag"));
    	assertEquals("isFlag", 			pipe.mergeOldInfoValuesWithNew("isFlag", ""));
    	assertEquals("isFlag", 			pipe.mergeOldInfoValuesWithNew(".", "isFlag"));
    	assertEquals("isFlag", 			pipe.mergeOldInfoValuesWithNew("isFlag", "."));
    	assertEquals("key=val;isFlag", 	pipe.mergeOldInfoValuesWithNew("key=val", "isFlag"));
    	assertEquals("isFlag;key=val", 	pipe.mergeOldInfoValuesWithNew("isFlag", "key=val"));
    	assertEquals("key=val1,val2;isFlag;this=that",   pipe.mergeOldInfoValuesWithNew("key=val1,val2", "isFlag;this=that"));
    	assertEquals("key=val1,val2;key2=xyz;this=that", pipe.mergeOldInfoValuesWithNew("key=val1,val2;key2=xyz", "this=that"));
    }

    //================================================================================================================

    

    //head -n 250 /data/VCFExamples/BATCH4.vcf | bior_vcf_to_tjson | bior_same_variant -d /data/catalogs/dbSNP/137/00-All.tsv.bgz | bior_drill -p INFO.RSPOS -p INFO.RV -p INFO.GMAF
    @Test
    public void testRealData() throws IOException {
        String catalog =  "src/test/resources/metadata/BATCH4/dbSNP.tsv.bgz";
        String paths[] = new String[]{"INFO.RSPOS", "INFO.GMAF", "INFO.dbSNPBuildID" }; // "INFO.RV","INFO.GMAF"}; //"INFO.GENEINFO",  "_altAlleles"};      
        
        ArrayList<Metadata> metalist = new ArrayList<Metadata>();
        metalist.add(new Metadata("bior_vcf_to_json"));
        metalist.add(new Metadata(catalog, "bior_same_variant"));
        metalist.add(new Metadata(/*colNum=*/-1, "bior_drill", /*isKeepJson=*/false, DrillPipe.ARRAY_DELIMITER_DEFAULT, paths));
        
        // There are 105 columns, we want to keep just the first 10, so cut 11..105
        int[] colsToCut = new int[95];
        for(int i=11; i <= 105; i++) {
        	colsToCut[i-11] = i;
        }
        
        //In this example, there are 3 drill paths, all need to be added to the info and one bior_same_variant - that gets dropped
        Pipeline p = new Pipeline(
        		// Cat: has VCF lines with 105 columns (96 samples)
                new CatPipe(),
                new HistoryInPipe(metalist),
                new HCutPipe(true, colsToCut),
                // Let's keep only the first 10 of the VCF (the format col plus one sample)
                // After HCutPipe:        [chr1, 46726433, ., G, A, 0, LowQual, AC=0;AF=0.00;AN=576;BaseQRankSum=-1.660;DP=18023;Dels=0.00;FS=5.392;HaplotypeScore=3.1234;MLEAC=0;MLEAF=0.00;MQ=69.97;MQ0=3;MQRankSum=0.550;ReadPosRankSum=-0.512, GT:AD:DP:GQ:MLPSAC:MLPSAF:PL, 0/0/0/0/0/0:139,1:140:99:0:0.00:0,108,32767,32767,32767,32767,32767]
                new VCF2VariantPipe(),
                // After VCF2VariantPipe: [chr1, 46726433, ., G, A, 0, LowQual, AC=0;AF=0.00;AN=576;BaseQRankSum=-1.660;DP=18023;Dels=0.00;FS=5.392;HaplotypeScore=3.1234;MLEAC=0;MLEAF=0.00;MQ=69.97;MQ0=3;MQRankSum=0.550;ReadPosRankSum=-0.512, GT:AD:DP:GQ:MLPSAC:MLPSAF:PL, 0/0/0/0/0/0:139,1:140:99:0:0.00:0,108,32767,32767,32767,32767,32767, {"CHROM":"chr1","POS":"46726433","ID":".","REF":"G","ALT":"A","QUAL":"0","FILTER":"LowQual","INFO":{"AC":[0],"AF":[0.0],"AN":576,"BaseQRankSum":-1.66,"DP":18023,"Dels":0.0,"FS":5.392,"HaplotypeScore":3.1234,"MLEAC":[0],"MLEAF":[0.0],"MQ":69.97,"MQ0":3,"MQRankSum":0.55,"ReadPosRankSum":-0.512},"_id":".","_type":"variant","_landmark":"1","_refAllele":"G","_altAlleles":["A"],"_minBP":46726433,"_maxBP":46726433}]
                new SameVariantPipe(catalog),
                // AFter SameVariantPipe: [chr1, 46726433, ., G, A, 0, LowQual, AC=0;AF=0.00;AN=576;BaseQRankSum=-1.660;DP=18023;Dels=0.00;FS=5.392;HaplotypeScore=3.1234;MLEAC=0;MLEAF=0.00;MQ=69.97;MQ0=3;MQRankSum=0.550;ReadPosRankSum=-0.512, GT:AD:DP:GQ:MLPSAC:MLPSAF:PL, 0/0/0/0/0/0:139,1:140:99:0:0.00:0,108,32767,32767,32767,32767,32767, {"CHROM":"chr1","POS":"46726433","ID":".","REF":"G","ALT":"A","QUAL":"0","FILTER":"LowQual","INFO":{"AC":[0],"AF":[0.0],"AN":576,"BaseQRankSum":-1.66,"DP":18023,"Dels":0.0,"FS":5.392,"HaplotypeScore":3.1234,"MLEAC":[0],"MLEAF":[0.0],"MQ":69.97,"MQ0":3,"MQRankSum":0.55,"ReadPosRankSum":-0.512},"_id":".","_type":"variant","_landmark":"1","_refAllele":"G","_altAlleles":["A"],"_minBP":46726433,"_maxBP":46726433}, {"CHROM":"1","POS":"46726433","ID":"rs145441107","REF":"G","ALT":"A","QUAL":".","FILTER":".","INFO":{"RSPOS":46726433,"dbSNPBuildID":134,"SSR":0,"SAO":0,"VP":"050200000a05000002000100","GENEINFO":"RAD54L:8438","WGT":1,"VC":"SNV","S3D":true,"NSM":true,"REF":true,"ASP":true,"OTHERKG":true},"_id":"rs145441107","_type":"variant","_landmark":"1","_refAllele":"G","_altAlleles":["A"],"_minBP":46726433,"_maxBP":46726433}]
                new DrillPipe(false, paths),
                // After DrillPipe:       [chr1, 46726433, ., G, A, 0, LowQual, AC=0;AF=0.00;AN=576;BaseQRankSum=-1.660;DP=18023;Dels=0.00;FS=5.392;HaplotypeScore=3.1234;MLEAC=0;MLEAF=0.00;MQ=69.97;MQ0=3;MQRankSum=0.550;ReadPosRankSum=-0.512, GT:AD:DP:GQ:MLPSAC:MLPSAF:PL, 0/0/0/0/0/0:139,1:140:99:0:0.00:0,108,32767,32767,32767,32767,32767, {"CHROM":"chr1","POS":"46726433","ID":".","REF":"G","ALT":"A","QUAL":"0","FILTER":"LowQual","INFO":{"AC":[0],"AF":[0.0],"AN":576,"BaseQRankSum":-1.66,"DP":18023,"Dels":0.0,"FS":5.392,"HaplotypeScore":3.1234,"MLEAC":[0],"MLEAF":[0.0],"MQ":69.97,"MQ0":3,"MQRankSum":0.55,"ReadPosRankSum":-0.512},"_id":".","_type":"variant","_landmark":"1","_refAllele":"G","_altAlleles":["A"],"_minBP":46726433,"_maxBP":46726433}, 46726433, RAD54L:8438, ["A"]]
                new TjsonToVcfPipe(),
                // After TjsonToVcfPipe:  [chr1, 46726433, ., G, A, 0, LowQual, AC=0;AF=0.00;AN=576;BaseQRankSum=-1.660;DP=18023;Dels=0.00;FS=5.392;HaplotypeScore=3.1234;MLEAC=0;MLEAF=0.00;MQ=69.97;MQ0=3;MQRankSum=0.550;ReadPosRankSum=-0.512;bior.dbSNP137.INFO.RSPOS=46726433;bior.ToTJson._altAlleles=A;bior.ToTJson._id=.;bior.ToTJson._landmark=1;bior.ToTJson._maxBP=46726433;bior.ToTJson._minBP=46726433;bior.ToTJson._refAllele=G;bior.ToTJson._type=variant;bior.ToTJson.ALT=A;bior.ToTJson.CHROM=chr1;bior.ToTJson.FILTER=LowQual;bior.ToTJson.ID=.;bior.ToTJson.INFO.AC=0;bior.ToTJson.INFO.AF=0.0;bior.ToTJson.INFO.AN=576;bior.ToTJson.INFO.BaseQRankSum=-1.66;bior.ToTJson.INFO.Dels=0;bior.ToTJson.INFO.DP=18023;bior.ToTJson.INFO.FS=5.392;bior.ToTJson.INFO.HaplotypeScore=3.1234;bior.ToTJson.INFO.MLEAC=0;bior.ToTJson.INFO.MLEAF=0.0;bior.ToTJson.INFO.MQ=69.97;bior.ToTJson.INFO.MQ0=3;bior.ToTJson.INFO.MQRankSum=0.55;bior.ToTJson.INFO.ReadPosRankSum=-0.512;bior.ToTJson.POS=46726433;bior.ToTJson.QUAL=0;bior.ToTJson.REF=G, GT:AD:DP:GQ:MLPSAC:MLPSAF:PL, 0/0/0/0/0/0:139,1:140:99:0:0.00:0,108,32767,32767,32767,32767,32767]
                new HistoryOutPipe()
                //new PrintPipe()

                // WARNING:  WHY ARE THE VCF2VariantPipe and SameVariantPipe and DrillPipe not adding ##BIOR headers????
        );
        p.setStarts(Arrays.asList("src/test/resources/metadata/BATCH4/BATCH4.vcf"));
        List<String> actual = PipeTestUtils.getResults(p);

        String infoOriginal = "AC=39;AF=0.342;AN=114;BaseQRankSum=-2.185;DP=22;Dels=0.00;FS=4.193;HaplotypeScore=0.0000;MLEAC=37;MLEAF=0.325;MQ=70.00;MQ0=0;MQRankSum=-0.282;QD=20.22;ReadPosRankSum=-1.128;";
        String infoDrilled  = "bior.dbSNP137.INFO.dbSNPBuildID=52;bior.dbSNP137.INFO.GMAF=0.4734;bior.dbSNP137.INFO.RSPOS=28218100";
        String infoSameVar  = "";  // The JSON will not be added by default
        String samples      = concat("GT:AD:DP:GQ:MLPSAC:MLPSAF:PL", "./././././.");
        List<String>  expectedSubset = Arrays.asList(
        		// Drilled columns added to INFO  (these have fields in the corresponding columns.tsv file, so use the type, number, description)
                "##INFO=<ID=bior.dbSNP137.INFO.dbSNPBuildID,Number=1,Type=Integer,Description='First dbSNP build for RS'>",
                "##INFO=<ID=bior.dbSNP137.INFO.GMAF,Number=1,Type=Float,Description='GMAF Desc'>",
                "##INFO=<ID=bior.dbSNP137.INFO.RSPOS,Number=1,Type=Integer,Description='Chromosome position reported in dbSNP'>",
        		// BIOR headers
        		"##BIOR=<ID='bior.ToTJson',Operation='bior_vcf_to_json',DataType='JSON',ShortUniqueName='ToTJson'>",
        		"##BIOR=<ID='bior.dbSNP137',Operation='bior_same_variant',DataType='JSON',ShortUniqueName='dbSNP137',Source='dbSNP',Description='dbSNP version 137, Patch 10, Human',Version='137',Build='GRCh37.p10',Path='src/test/resources/metadata/BATCH4/dbSNP.tsv.bgz'>",
        		"##BIOR=<ID='bior.dbSNP137.INFO.RSPOS',Operation='bior_drill',Field='INFO.RSPOS',DataType='Integer',Number='1',FieldDescription='Chromosome position reported in dbSNP',ShortUniqueName='dbSNP137',Source='dbSNP',Description='dbSNP version 137, Patch 10, Human',Version='137',Build='GRCh37.p10',Path='src/test/resources/metadata/BATCH4/dbSNP.tsv.bgz'>",
        		"##BIOR=<ID='bior.dbSNP137.INFO.GMAF',Operation='bior_drill',Field='INFO.GMAF',DataType='Float',Number='1',FieldDescription='GMAF Desc',ShortUniqueName='dbSNP137',Source='dbSNP',Description='dbSNP version 137, Patch 10, Human',Version='137',Build='GRCh37.p10',Path='src/test/resources/metadata/BATCH4/dbSNP.tsv.bgz'>",
        		"##BIOR=<ID='bior.dbSNP137.INFO.dbSNPBuildID',Operation='bior_drill',Field='INFO.dbSNPBuildID',DataType='Integer',Number='1',FieldDescription='First dbSNP build for RS',ShortUniqueName='dbSNP137',Source='dbSNP',Description='dbSNP version 137, Patch 10, Human',Version='137',Build='GRCh37.p10',Path='src/test/resources/metadata/BATCH4/dbSNP.tsv.bgz'>",
        		// First line
        		concat("chr1", "28218100", ".", "T", "C", "161.75", ".",  (infoOriginal + infoDrilled + infoSameVar), samples)
        		);

        // Replace all single quotes with double quotes
        for(int i=0; i < expectedSubset.size(); i++) {
        	expectedSubset.set(i, expectedSubset.get(i).replaceAll("'", "\""));
        }
        
//        System.out.println("Expected:");
//        System.out.println("============================");
//        printList(expectedSubset);
//        System.out.println("Actual:");
//        System.out.println("============================");
//        printList(actual);
//        
        // Loop thru all lines, and make sure the expected partial lines above are contained somewhere in the output
        // The actual output will contain hundreds of lines, but we are just checking one (plus headers)
        for(String expected : expectedSubset) {
        	boolean isFound = false;
        	for(String actualLine : actual) {
        		if( actualLine.contains(expected) ) 
        			isFound = true;
        	}
        	assertTrue("Could not find this line in the actual output: \n" + expected, isFound);
        }
 
    }



    @Test
    /** Test a column that is a JSON array, but is NOT embedded within a JSON Object */
    public void testJsonArray(){
    	List<String> input = Arrays.asList(
                "##fileformat=VCFv4.0",
                "##BIOR=<ID=\"bior.JsonArray\",Operation=\"bior_foo\",Number=.,ShortUniqueName=JsonArray>",
                concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "bior.JsonArray"),
                concat("chr1", "10000", "rs00020", "A", "C", "0", ".", "InfoData", "[\"A\",\"B\",\"C\"]"),
                concat("chr1", "10000", "rs00020", "A", "C", "0", ".", "InfoData", "[1,2,3]"),
                concat("chr1", "10000", "rs00020", "A", "C", "0", ".", "InfoData", "[1.1,2.2,3.3]")
        );

        List<String> expected = Arrays.asList(
                "##fileformat=VCFv4.0",
				TOP_HEADER_ARRAY[1],
				TOP_HEADER_ARRAY[2],
                "##BIOR=<ID=\"bior.JsonArray\",Operation=\"bior_foo\",Number=.,ShortUniqueName=JsonArray>",
                "##INFO=<ID=bior.JsonArray,Number=.,Type=String,Description=\"\">",
                concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"),
                concat("chr1", "10000", "rs00020", "A", "C", "0", ".", "InfoData;bior.JsonArray=A,B,C"),
                concat("chr1", "10000", "rs00020", "A", "C", "0", ".", "InfoData;bior.JsonArray=1,2,3"),
                concat("chr1", "10000", "rs00020", "A", "C", "0", ".", "InfoData;bior.JsonArray=1.1,2.2,3.3")
        );

		verifyPipelineResults(input, expected, null, null, null, null, null);
    }

    
    
    @Test
    /** Test adding data to the INFO column, but skipping the JSON columns.  Then test with adding the JSON columns */
    public void testAddToInfo_bothAddAndNotAddJsonCols(){
    	// Set the queue limit to 2 to see what happens if a JSON object is discovered on a third line
    	int limit = TjsonToVcfPipe.QUEUE_LIMIT;
    	TjsonToVcfPipe.QUEUE_LIMIT = 2;
    	
    	List<String> input = Arrays.asList(
                "##fileformat=VCFv4.0",
                "##BIOR=<ID=\"bior.json\",DataType=\"JSON\",Operation=\"bior_foo\",Number=.,ShortUniqueName=JsonArray>",
                "##BIOR=<ID=\"bior.jsonDot\",DataType=\"JSON\",Operation=\"bior_foo\",Number=.,ShortUniqueName=JsonArray>",
                concat("#CHROM", "POS", "ID", 	   "REF", 	"ALT", 	"QUAL", "FILTER", 	"INFO", 	"bior.json", 			"bior.AF", 	"bior.ref",	"bior.jsonDot",	"bior.jsonUnknown"),  // Last column - cannot tell if it is JSON or not
                concat("chr1", "10000", "rs00020", "A", 	"C", 	"0", 	".", 		"InfoData", "{\"key\":\"val\"}", 	"0.03", 	"A",		".",			"."),
                concat("chr2", "10000", "rs00020", "A", 	"C", 	"0",	".", 		"InfoData", "{\"key2\":31.5}", 		"0.04", 	"C",		".",			"."),
                // This will be beyond the queue range:
                //   - the second-last col should NOT be added because it occurs as a JSON field in the header.
                //   - the last col WILL be added because it starts with "bior.", but has no ##BIOR header to tell it whether it should be ignored
                concat("chr3", "10000", "rs00020", "A", 	"C", 	"0", 	".", 		"InfoData", "{\"key3\":\"val3\"}", 	"0.05", 	"G",		"{\"n\":1}",	"{\"n\":2}"),
                ""  // NOTE: Blank line at end - should handle - this should be removed
        );

    	String data1 = concat("chr1", "10000", "rs00020", "A", "C", "0", ".", "InfoData;bior.AF=0.03;bior.ref=A");
        String data2 = concat("chr2", "10000", "rs00020", "A", "C", "0", ".", "InfoData;bior.AF=0.04;bior.ref=C");
        String data3 = concat("chr3", "10000", "rs00020", "A", "C", "0", ".", "InfoData;bior.AF=0.05;bior.ref=G");
        List<String> expectedNoJsonInInfo = Arrays.asList(
                "##fileformat=VCFv4.0",
				TOP_HEADER_ARRAY[1],
				TOP_HEADER_ARRAY[2],
                "##BIOR=<ID=\"bior.json\",DataType=\"JSON\",Operation=\"bior_foo\",Number=.,ShortUniqueName=JsonArray>",
                "##BIOR=<ID=\"bior.jsonDot\",DataType=\"JSON\",Operation=\"bior_foo\",Number=.,ShortUniqueName=JsonArray>",
                "##INFO=<ID=bior.AF,Number=.,Type=String,Description=\"\">",
                "##INFO=<ID=bior.jsonUnknown,Number=.,Type=String,Description=\"\">",
                "##INFO=<ID=bior.ref,Number=.,Type=String,Description=\"\">",
                concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"),
                data1,
                data2,
                data3
        );
        
   		// Add strings to INFO col, but NOT JSON
		verifyPipelineResults(input, expectedNoJsonInInfo,   /*jsonColToBuildVcfFrom=*/-1, /*isKeepAllCols=*/false, /*isAddDataToInfo=*/true, /*isAddJsonToInfo=*/false, /*colRangeToCollapse=*/"");

		//============================
		
        List<String> expectedWithJsonInInfo = new ArrayList<String>(expectedNoJsonInInfo);
        // Insert the ##INFOs for keys "bior.json.key", "bior.json.key2",
        //   (but not "bior.json.key3" since it was beyond the queue limit for building the header)
        // Likewise, we don't have to have a ##INFO for "bior.jsonDot.n" because it is beyond the queue limit
        // (but "bior.jsonUnknown" will be added to ##INFO since it starts with "bior." and doesn't know if it is it JSON or not)
        // Insert before the ##INFO=<ID=bior.jsonUnknown line:
        int idxBiorRefHeader = getRowStartingWith(expectedWithJsonInInfo, "##INFO=<ID=bior.jsonUnknown");
        expectedWithJsonInInfo.add(idxBiorRefHeader,   "##INFO=<ID=bior.json.key,Number=1,Type=String,Description=\"\">");
        expectedWithJsonInInfo.add(idxBiorRefHeader+1, "##INFO=<ID=bior.json.key2,Number=1,Type=Float,Description=\"\">");
        int idxColHeaderRow = getRowStartingWith(expectedWithJsonInInfo, "#CHROM");
        expectedWithJsonInInfo.set(idxColHeaderRow+1, data1.substring(0, data1.indexOf("InfoData")) + "InfoData;bior.AF=0.03;bior.json.key=val;bior.ref=A");
   		expectedWithJsonInInfo.set(idxColHeaderRow+2, data2.substring(0, data2.indexOf("InfoData")) + "InfoData;bior.AF=0.04;bior.json.key2=31.5;bior.ref=C");
   		expectedWithJsonInInfo.set(idxColHeaderRow+3, data3.substring(0, data3.indexOf("InfoData")) + "InfoData;bior.AF=0.05;bior.json.key3=val3;bior.jsonDot.n=1;bior.jsonUnknown.n=2;bior.ref=G");

		
		// Add strings and JSON columns to INFO
		verifyPipelineResults(input, expectedWithJsonInInfo, /*jsonColToBuildVcfFrom=*/-1,   /*isKeepAllCols=*/false, /*isAddDataToInfo=*/true, /*isAddJsonToInfo=*/true,  /*colRangeToCollapse=*/"");

		// Reset the queue limit
    	TjsonToVcfPipe.QUEUE_LIMIT = limit;

    }

    
    private int getRowStartingWith(List<String> lines, String prefixToFind) {
    	for(int i=0; i < lines.size(); i++) {
    		if( lines.get(i).startsWith(prefixToFind) )
    			return i;
    	}
    	// Not found, so return -1
		return -1;
	}

	@Test
    public void testVCFizeAnnotate() throws IOException{
		File thousandGenomesTempCatalog = createTemp1000GenomesCatalog();

		
        //System.out.println("Testing to see if we can VCFize a bior_annotate output");
        Pipeline p = new Pipeline(
            new HistoryInPipe(),
            new TjsonToVcfPipe(),
            new HistoryOutPipe()
            //new PrintPipe()
        );
        // Let's load the input file, and modify the path from "/data5/..." to "/data5bbb/..."
        // to test what happens when a path is found in a ##BIOR header, but is invalid (should work ok still)
        // This just makes sure that the path is not found in case a developer has the catalogs on his local system during testing
        List<String> linesIn = FileUtils.readLines(new File("src/test/resources/vcfizer/annotate.vcf"));
        for(int i=0; i < linesIn.size(); i++) {
        	// Replace 1000 genomes path with the temp bgzip (and thus the columns.tsv) above
        	// Also replace the actual "/research" path with a dummy one so that it is consistent when
        	// testing on a laptop (where the catalog is NOT available) vs server (where the catalog is available)
        	String line = linesIn.get(i)
        					.replace("1000_GENOMES_PATH", thousandGenomesTempCatalog.getCanonicalPath())
        					.replace("/research/bsi/data/catalogs/bior/v1", "BIOR_CATALOG");
        	linesIn.set(i, line);
        }
        p.setStarts(linesIn);
        
        // NOTE: IF DEBUGGING, look at these methods for how it assigns the description and other details from the columns.tsv files
        //   (assuming the columns.tsv file exists for that field)
        //   VcfInfoColumnBuilder.getInfoMetaFromBiorHeaderLine()
        //   VcfInfoColumnBuilder.updateInfoMetaObjFromColumnsTsvContents()
        List<String> expected = FileCompareUtils.loadFile("src/test/resources/vcfizer/annotateVcfized.vcf");
        // Change "/data5/..." to "/data5bbb/..." in the expected output as well
        for(int i=0; i < expected.size(); i++)
        	expected.set(i, expected.get(i).replace("/data5/", "/data5bbb/"));
        
        List<String> actual   = PipeTestUtils.getResults(p);
        // Convert the 1000Genomes temp filepath to "1000_GENOMES_PATH"
        // NOTE: The ##BIOR lines are NOT modified.  Only the values for the ##INFO lines that are created are modified
        //       This is the reason the ##BIOR line fields are the same as the input even though the columns.tsv says otherwise
        for(int i=0; i < actual.size(); i++) {
        	actual.set(i, actual.get(i).replace(thousandGenomesTempCatalog.getCanonicalPath(), "1000_GENOMES_PATH"));
        }
        
        System.out.println("Expected:===============================================================");
        PipeTestUtils.printLines(expected);
        System.out.println("\n\n\n");
        System.out.println("Actual:=================================================================");
        PipeTestUtils.printLines(actual);

        PipeTestUtils.assertListsEqual(expected, actual);
    }

	private File createTemp1000GenomesCatalog() throws IOException {
		// Create a temp catalog bgz and columns.tsv file so that we can have one of the sets of fields derived from the catalog's columns.tsv 
		// NOTE: The AMR_AF, AFR_AF, and EUR_AF fields will load from this temp columns.tsv.  ASN_AF is not specified, so will load from the VCF input header
		//   ##BIOR Fields:  (make sure these are different enough from the actual catalog fields so we can differentiate each
		//      Field				VCF-Type	VCF-Number	VCF-Desc								||	Ctg-Type	Ctg-Number	Ctg-Desc	||	Output
		//		1000Genomes.ASN_AF	Integer		.			1000 Genomes - Asian Allele Frequency	||  (missing in columns.tsv ---------)	||  (chooses VCF since ctg column not found)
		//	    1000Genomes.AMR_AF	Integer		.			1000 Genomes - American Allele Frequency||  Float		1			(see below) ||  (chooses catalog columns.tsv entry)
		//  	1000Genomes.AFR_AF	Integer		.			1000 Genomes - African Allele Frequency	||  Float		1			(see below) ||  (chooses catalog columns.tsv entry)
		//		1000Genomes.EUR_AF	Integer		.			1000 Genomes - European Allele Frequency||  Float		1			(see below) ||  (chooses catalog columns.tsv entry)
		// Verify that these fields match what is in the columns.tsv
		// The others should extract the Type, Number, Description from the ##BIOR headers in the VCF
		TemporaryFolder tempFolder = new TemporaryFolder();
		tempFolder.create();
		File tempDir = tempFolder.newFolder();
		File ctg     = new File(tempDir, "1000genomes.tsv.bgz");
		File colsTsv = new File(tempDir, "1000genomes.columns.tsv");
		FileUtils.write(colsTsv, 
				"#ColumnName	Type	Count	Description	HumanReadableName\n" +
				"INFO.AFR_AF	Float	1	Allele frequency in the AFR populations calculated from AC and AN, in the range (0,1)	African Allele Frequency\n" +
				"INFO.AMR_AF	Float	1	Allele frequency in the AMR populations calculated from AC and AN, in the range (0,1)	American Allele Frequency\n" +
				"INFO.EUR_AF	Float	1	Allele frequency in the EUR populations calculated from AC and AN, in the range (0,1)	European Allele Frequency\n"
				);
		return ctg;
	}

	@Test
    /** Test that the delimiter in the ##BIOR headers can be used to separate values in the INFO field */
    public void testDelimiter(){
    	
        List<String> input = Arrays.asList(
                "##fileformat=VCFv4.0",
                "##BIOR=<ID='bior.JsonArray',Operation='bior_compress',DataType='String',Field='JsonArray',FieldDescription='List of Strings',ShortUniqueName='JsonArray',Delimiter='|',Path='REPLACEMEgenes.tsv.bgz'>".replaceAll("'", "\""),
                "##BIOR=<ID='bior.JsonArray2',Operation='bior_compress',DataType='String',Field='JsonArray2',FieldDescription='List of Numbers',ShortUniqueName='JsonArray2',Delimiter=',',Path='REPLACEMEgenes.tsv.bgz'>".replaceAll("'", "\""),
                concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "bior.JsonArray", "bior.JsonArray2"),
                concat("chr1", "10000", "rs00020", "A", "C", "0", ".", "InfoData", "A|B|C", "1,2,3")
        );

        List<String> expected = Arrays.asList(
				"##fileformat=VCFv4.0",
				TOP_HEADER_ARRAY[1],
				TOP_HEADER_ARRAY[2],
                "##BIOR=<ID='bior.JsonArray',Operation='bior_compress',DataType='String',Field='JsonArray',FieldDescription='List of Strings',ShortUniqueName='JsonArray',Delimiter='|',Path='REPLACEMEgenes.tsv.bgz'>".replaceAll("'", "\""),
                "##BIOR=<ID='bior.JsonArray2',Operation='bior_compress',DataType='String',Field='JsonArray2',FieldDescription='List of Numbers',ShortUniqueName='JsonArray2',Delimiter=',',Path='REPLACEMEgenes.tsv.bgz'>".replaceAll("'", "\""),
                "##INFO=<ID=bior.JsonArray,Number=.,Type=String,Description=\"List of Strings\">",
                "##INFO=<ID=bior.JsonArray2,Number=.,Type=String,Description=\"List of Numbers\">",
                concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"),
                concat("chr1", "10000", "rs00020", "A", "C", "0", ".", "InfoData;bior.JsonArray=A,B,C;bior.JsonArray2=1,2,3")
        );

		verifyPipelineResults(input, expected, null, null, null, null, null);
   }
    
	@Test
    /** Test that columns that have no values still have ##INFO lines added, regardless of whether there is an accompanying ##BIOR line.
     *  This is important because a value for a column may not appear until after the last row stored by the queue (which is used to determine which ##INFO fields to add) */
    public void testBiorColumnsWithNoValuesStillGetAnInfoHeader(){
    	// Set the queue limit to 2 to see what happens if a value is discovered on a third line
    	int limit = TjsonToVcfPipe.QUEUE_LIMIT;
    	TjsonToVcfPipe.QUEUE_LIMIT = 2;

        List<String> input = Arrays.asList(
                "##fileformat=VCFv4.0",
                "##BIOR=<ID='bior.flag',Operation='bior_drill',DataType='Boolean',Number='0',Field='flag',FieldDescription='Boolean value',ShortUniqueName='flag',Path='REPLACEMEgenes.tsv.bgz'>".replaceAll("'", "\""),
                "##BIOR=<ID='bior.num',Operation='bior_drill',DataType='Integer',Number='1',Field='num',FieldDescription='some generic numeric key',ShortUniqueName='num',Path='REPLACEMEgenes.tsv.bgz'>".replaceAll("'", "\""),
                "##INFO=<ID=InfoData,Number=1,Type=String,Description=\"Just a generic value\">",
                concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "bior.num", 	"bior.key2", 	"bior.flagAsStr", 	"bior.flag", 	"bior.float"),
                concat("chr1", "10000", "rs00010", "A", "C", "0", ".", "InfoData=val", ".", 		".", 			".", 				".",			"1.34"),
                concat("chr2", "20000", "rs00020", "C", "G", "0", ".", "InfoData=val", ".", 		".", 			"true",				"true",			"."),
                concat("chr3", "30000", "rs00030", "G", "T", "0", ".", "InfoData=val", "1", 		"BBB", 			".",				".",			".")
        );

        List<String> expected = Arrays.asList(
				input.get(0),  // ##fileformat
				TOP_HEADER_ARRAY[1],
				TOP_HEADER_ARRAY[2],
                input.get(1), // ##BIOR=<ID="bior.flag"
                input.get(2), // ##BIOR=<ID="bior.num"
                "##INFO=<ID=bior.flag,Number=0,Type=Flag,Description=\"Boolean value\">",
                // It doesn't know how to handle the flag since it has no associated ##BIOR header, so treated as String
                "##INFO=<ID=bior.flagAsStr,Number=.,Type=String,Description=\"\">",
                // It doesn't know how to handle the float since it has no associated ##BIOR header, so treated as String
                "##INFO=<ID=bior.float,Number=.,Type=String,Description=\"\">",
                "##INFO=<ID=bior.key2,Number=.,Type=String,Description=\"\">",
                "##INFO=<ID=bior.num,Number=1,Type=Integer,Description=\"some generic numeric key\">",
                input.get(3), // ##INFO=<ID=InfoData
                concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"),
                concat("chr1", "10000", "rs00010", "A", "C", "0", ".", "InfoData=val;bior.float=1.34"),
                concat("chr2", "20000", "rs00020", "C", "G", "0", ".", "InfoData=val;bior.flag;bior.flagAsStr=true"),
                concat("chr3", "30000", "rs00030", "G", "T", "0", ".", "InfoData=val;bior.key2=BBB;bior.num=1")
        );

        // Verify defaults
		verifyPipelineResults(input, expected, null, null, null, null, null);
		// Verify range
		verifyPipelineResults(input, expected, /*JsonToBuildVcf=*/-1, /*isKeepAllCols=*/false, /*isAddDataToInfo=*/true, /*isAddJsonToInfo=*/false, /*colRange=*/"9..13");
		
		// Reset the queue limit
		TjsonToVcfPipe.QUEUE_LIMIT = limit;
   }
    
	@Test
	/** Test TJSON arrays and delimited string columns (columns that had been drilled and are now pipe-separated strings)
        Need to account for these cases:
          1) Unrolling of arrays in JSON
          2) Addition of comma-separated string columns (and default pipe-separated) - delimiter should be specified in the ##BIOR headers
          3) String where ##BIOR header is not specified (should just add the whole string)

        Need to test multiple values, one value, no values (dots or empty array)
          #GeneInfo                             bior.CommaSeparatedValues  bior.PipeSeparated  PipeSeparatedWithNoBiorHeader
          {"Genes":["BRCA1","BRCA2","MTHFR"]}   BRCA1,BRCA2,MTHFR          1|2|3               1|2|3
          {"Genes":["ADR"]}                     BRCA1                      1                   1
          {"Genes":[ ]}                         .                          .                   .
          { }                                   .                          .                   .
    */
    public void testTjsonArraysAndDelimitedStrings(){
        List<String> input = Arrays.asList(
                "##fileformat=VCFv4.0",
                "##BIOR=<ID='GeneInfo',Operation='bior_overlap',DataType='String',Number='1',Field='flag',FieldDescription='Gene info',ShortUniqueName='flag',Path='REPLACEMEgenes.tsv.bgz'>".replaceAll("'", "\""),
                "##BIOR=<ID='bior.CommaSeparatedValues',Operation='bior_drill',DataType='Integer',Number='.',Delimiter=',',Field='csv',FieldDescription='comma-separated values',ShortUniqueName='genes1',Path='REPLACEMEgenes.tsv.bgz'>".replaceAll("'", "\""),
                "##BIOR=<ID='bior.PipeSeparated',Operation='bior_drill',DataType='Integer',Number='.',Delimiter='|',Field='psv',FieldDescription='pipe-separated values',ShortUniqueName='genes1',Path='REPLACEMEgenes.tsv.bgz'>".replaceAll("'", "\""),
                concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "GeneInfo",                                    "bior.CommaSeparatedValues", "bior.PipeSeparated", "PipeSeparatedWithNoBiorHeader"),
                concat("1",      "100", "rs1","A",   "C",   ".",    ".",      ".",    "{\"Genes\":[\"BRCA1\",\"BRCA2\",\"MTHFR\"]}", "BRCA1,BRCA2,MTHFR",        "1|2|3",              "1|2|3"),
                concat("2",      "200", "rs2","C",   "G",   ".",    ".",      ".",    "{\"Genes\":[\"ADR\"]}",                       "BRCA1",                    "1",                  "1"),
                concat("3",      "300", "rs3","G",   "T",   ".",    ".",      ".",    "{\"Genes\":[]}",                              ".",                        ".",                  "."),
                concat("4",      "400", "rs4","T",   "A",   ".",    ".",      ".",    "{}",                                          ".",                        ".",                  ".")
                );

        List<String> expected = Arrays.asList(
				input.get(0),  // ##fileformat
				TOP_HEADER_ARRAY[1], // ##fileDate
				TOP_HEADER_ARRAY[2], // ##source
				input.get(1),  // ##BIOR=<ID='GeneInfo'
				input.get(2),  // ##BIOR=<ID='bior.CommaSeparatedValues'
				input.get(3),  // ##BIOR=<ID='bior.PipeSeparated'
                //"##INFO=<ID=GeneInfo.Genes,Number=1,Type=String,Description=\"Gene info\">",
                "##INFO=<ID=bior.CommaSeparatedValues,Number=.,Type=Integer,Description=\"comma-separated values\">",
                "##INFO=<ID=bior.PipeSeparated,Number=.,Type=Integer,Description=\"pipe-separated values\">",
                "##INFO=<ID=GeneInfo,Number=1,Type=String,Description=\"Gene info\">",
                "##INFO=<ID=GeneInfo.Genes,Number=.,Type=String,Description=\"\">",
                "##INFO=<ID=PipeSeparatedWithNoBiorHeader,Number=.,Type=String,Description=\"\">",
                concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"),
                concat("1",      "100", "rs1", "A",  "C",   ".",    ".",      "bior.CommaSeparatedValues=BRCA1,BRCA2,MTHFR;bior.PipeSeparated=1,2,3;GeneInfo.Genes=BRCA1,BRCA2,MTHFR;PipeSeparatedWithNoBiorHeader=1|2|3"),
                concat("2",      "200", "rs2", "C",  "G",   ".",    ".",      "bior.CommaSeparatedValues=BRCA1;bior.PipeSeparated=1;GeneInfo.Genes=ADR;PipeSeparatedWithNoBiorHeader=1"),
                concat("3",      "300", "rs3", "G",  "T",   ".",    ".",      "."),
                concat("4",      "400", "rs4", "T",  "A",   ".",    ".",      ".")
        );

		// Verify range
		verifyPipelineResults(input, expected, /*JsonToBuildVcf=*/-1, /*isKeepAllCols=*/false, /*isAddDataToInfo=*/true, /*isAddJsonToInfo=*/true, /*colRange=*/"9..12");
	}
    
	@Test
	/** Test arrays by passing thru bior_drill first to make sure it generates the header "Delimiter='|'" field */
    public void testTjsonArraysAndDelimitedStrings_thruDrillFirst(){
		List<String> input = Arrays.asList(
				concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "testArray"),
			    concat("1",      "100", "rs1","A",   "C",   ".",    ".",      ".",    "{'chr':['A','B','C',null],'num':[1,2,3]}")
			 );
		DrillPipe drillPipe = new DrillPipe(/*isKeepJson=*/false, new String[] {"chr","num"}, -1, "|", /*isSkipNulls=*/false);
		TjsonToVcfPipe tjsonPipe = new TjsonToVcfPipe(-1, /*isKeepAllCols=*/false, /*isAddDataToInfo=*/true, /*isAddJsonToInfo=*/false, "9..10");
		List<Metadata> meta = Arrays.asList( new Metadata(-1, "bior_drill", /*isKeepJson=*/false, "|", "chr", "num") );
		Pipeline<String,String> pipeline = new Pipeline(new HistoryInPipe(meta), drillPipe, tjsonPipe,	new HistoryOutPipe());
		pipeline.setStarts(input);
		List<String> actual = PipeTestUtils.getResults(pipeline);

		List<String> expected = Arrays.asList(
				TOP_HEADER_ARRAY[0],  // fileformat
				TOP_HEADER_ARRAY[1],  // fileDate
				TOP_HEADER_ARRAY[2],  // source
				"##BIOR=<ID=\"bior.testArray.chr\",Operation=\"bior_drill\",DataType=\"String\",ShortUniqueName=\"\",Path=\"\",Delimiter=\"|\",Number=\".\">",
				"##BIOR=<ID=\"bior.testArray.num\",Operation=\"bior_drill\",DataType=\"String\",ShortUniqueName=\"\",Path=\"\",Delimiter=\"|\",Number=\".\">",
				"##INFO=<ID=bior.testArray.chr,Number=.,Type=String,Description=\"\">",
				"##INFO=<ID=bior.testArray.num,Number=.,Type=String,Description=\"\">",
				concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"),
			    concat("1",      "100", "rs1","A",   "C",   ".",    ".",      "bior.testArray.chr=A,B,C,.;bior.testArray.num=1,2,3")
			    );
		
		PipeTestUtils.assertListsEqual(expected, actual);
	}

	
	
	
	@Test
	/** For some reason it seemed that Integer values in objects < 128 would compare correctly, but not those over that
	 * Ahah!  These is because of the way Java tries to be memory efficient by using an IntegerCache for all ints < 128 and int primitive over that
	 * See: http://stackoverflow.com/questions/18850930/java-integer-auto-auto-boxing
	 * 		http://grepcode.com/file/repository.grepcode.com/java/root/jdk/openjdk/6-b14/java/lang/Integer.java#Integer.IntegerCache.0cache
	 *   */
	public void testIntegerComparison() {

		// Value < 128: Explicit Integer object declaration - so object comparison is NOT equal, while value comparison is equal.
		Integer a1 = new Integer(1);
		Integer a2 = new Integer(1);
		assertFalse(a1 == a2);
		assertTrue(a1.compareTo(a2) == 0);
		assertTrue(a1.equals(a2));

		// Value > 128: Again, Explicit Integer object declaration - so object comparison is NOT equal, while value comparison is equal.
		Integer b1 = new Integer(128);
		Integer b2 = new Integer(128);
		assertFalse(b1 == b2);
		assertTrue(b1.compareTo(b2) == 0);
		assertTrue(b1.equals(b2));

		// ===========================================

		// WARNING:  Value < 128: Using Autoboxing - object comparison is EQUAL (!!!!!!), value comparison is equal
		Integer c1 = 127;
		Integer c2 = 127;
		assertTrue(c1 == c2);  // !!!!!!!!!!!!!!!!!!!!!!!!!!!  (using IntegerCache internally for value) !!!!!!!!!!!!!!!!!!
		assertTrue(c1.compareTo(c2) == 0);
		assertTrue(c1.equals(c2));

		// Also autoboxing, but not using cache internal value storage
		Integer d1 = 128;
		Integer d2 = 128;
		assertFalse(d1 == d2);   // ! (not using IntegerCache to store value, but an int primitive instead) !
		assertTrue(d1.compareTo(d2) == 0);
		assertTrue(d1.equals(d2));
	}
	
	@Test
	// Verify that the TreeSet removes duplicates
	public void treeSetTest() {
		TreeSet<Integer> treeSet = new TreeSet<Integer>(Collections.reverseOrder());
		treeSet.add(127);
		treeSet.add(127);
		treeSet.add(128);
		treeSet.add(128);
		treeSet.add(1000);
		treeSet.add(1000);

		assertEquals("[1000, 128, 127]", treeSet.toString());
	}
	
	@Test
	/** For some reason, 129 or more columns was causing bior_tjson_to_vcf to fail previously
	 *  because one column was added in twice in the list of columns to remove, which
	 *  caused an IndexOutOfBoundsException 
	 */
	public void test128Columns_IndexOutOfBoundsException() {
		StringBuilder header = new StringBuilder(concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"));
		StringBuilder data   = new StringBuilder(concat("1",      "100", "rs1","A",   "C",   "0.0",  "0.0",    "key=val"));
		// Add at least 121 more columns  (for some reason, once it gets to more than 128 columns, it caused an error)
		final int NUM_MORE_COLS = 200;
		for(int i=1; i <= NUM_MORE_COLS; i++) {
			header.append("\tbior." + pad0(i, 3));
			data.append("\t" + i);
		}
		
		
		// Build the expected string -----------------------------------------
		// Create all the values for the INFO col, sort them string-wise instead of numerically, then add to a string
		List<String> infoKeyVals = new ArrayList<String>();
		List<String> infos = new ArrayList<String>();
		for(int i=1; i <= NUM_MORE_COLS; i++) {
			infos.add("##INFO=<ID=bior." + pad0(i,3) + ",Number=.,Type=String,Description=\"\">");
			infoKeyVals.add("bior." + pad0(i, 3) + "=" + i);
		}
        Collections.sort(infos);
        Collections.sort(infoKeyVals);
		
        StringBuilder infoKeyValStr = new StringBuilder("key=val");
		for(String infoKeyVal : infoKeyVals) {
			infoKeyValStr.append(";" + infoKeyVal);
		}
		
        List<String> expected = new ArrayList<String>(Arrays.asList(
				TOP_HEADER_ARRAY[0],
				TOP_HEADER_ARRAY[1],
				TOP_HEADER_ARRAY[2],
                concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"),
                concat("1",      "100", "rs1", "A",  "C",   "0.0",  "0.0",    infoKeyValStr.toString())
        ));
        
        // Add all the ##INFOs
        expected.addAll(3, infos);
        
        
        //----------------------------------------------------------------------
        
        List<String> input = Arrays.asList(header.toString(), data.toString());
		verifyPipelineResults(input, expected, null, null, null, null, null);
	}
	
	
	
    //========================================================================
    
    
	/** Prefix the given integer with 0's up to the specified length  (Ex: pad0(1,5) returns "00001") */
	private String pad0(int i, int len) {
		StringBuilder str = new StringBuilder(i + "");
		while(str.length() < len) {
			str.insert(0, "0");
		}
		return str.toString();
	}
	
	private History createHistoryWithHeader(String columnHeader, String dataLine) {
		History history = new History(dataLine);
		addHeaders(history, columnHeader);
		return history;
	}
	
	private void addHeaders(History history, String columnHeaderRow) {
		// Set the column header row (#)
		history.setMetaData(new HistoryMetaData(new ArrayList<String>(Arrays.asList(columnHeaderRow))));
		
		// Set the column metadata row (info about each column header)
    	List<ColumnMetaData> meta = history.getMetaData().getColumns();
    	List<String> columnHeaders = Arrays.asList(columnHeaderRow.split("\t"));
    	for(String columnHeaderName : columnHeaders) {
    		meta.add(new ColumnMetaData(columnHeaderName.replace("#", ""), Type.String, "1", "Description: Column header name: " + columnHeaderName.replace("#","")));
    	}
	}
	
	private void verifyHeaders(List<String> expectedColumnNames,  History history) {
		List<ColumnMetaData> meta = history.getMetaData().getColumns();
		assertEquals("Number of column headers is not the same", expectedColumnNames.size(), meta.size());
		for(int i=0; i < expectedColumnNames.size(); i++)
			assertEquals(expectedColumnNames.get(i), meta.get(i).getColumnName());
	}

	
	/** Verify the results - can pass in "null" for any of the last 3 columns */
	private void verifyPipelineResults(List<String> tjsonIn, List<String> expected, Integer jsonColToBuildVcfFrom, Boolean isKeepAllCols, Boolean isAddDataToInfo, Boolean isAddJsonToInfo, String colRangeToCollapseToInfo) {
		if( jsonColToBuildVcfFrom == null )
			jsonColToBuildVcfFrom = -1;

		if( isKeepAllCols == null )
			isKeepAllCols = false;

		if( isAddDataToInfo == null )
			isAddDataToInfo = true;
		
		if( isAddJsonToInfo == null )
			isAddJsonToInfo = false;
		
		if( colRangeToCollapseToInfo == null )
			colRangeToCollapseToInfo = "";
		
		TjsonToVcfPipe tjsonPipe = new TjsonToVcfPipe(jsonColToBuildVcfFrom, isKeepAllCols, isAddDataToInfo, isAddJsonToInfo, colRangeToCollapseToInfo);
		Pipeline<String,String> pipeline = new Pipeline(new HistoryInPipe(), tjsonPipe,	new HistoryOutPipe());
		pipeline.setStarts(tjsonIn);
		List<String> results = PipeTestUtils.getResults(pipeline);
		
		
		System.out.println("Expected----------------------------");
		PipeTestUtils.printLines(expected);
		System.out.println("Actual----------------------------");
		PipeTestUtils.printLines(results);
		
		PipeTestUtils.assertListsEqual(expected, results);
	}
	
	
	//======================================================================================================================
	// MIKE: Used this to sort the INFO column in the annotateVcfized.vcf file 
	//       (had to capture only the INFO field after the "set=variant2;" portion so it wouldn't mix with the existing fields)
	@Test
	public void sortInfo() {
		String[] s = new String[] {
				"dbSNP.build=131;dbSNP.DiseaseVariant=0;dbSNP.SNP_Allele_Origin=unspecified;dbSNP.SuspectRegion=unspecified;rsID=rs75025155;SNPEFF.Effect=INTERGENIC;SNPEFF.Effect_impact=MODIFIER;SNPEFF.Functional_class=NONE;UCSC.Repeat_Region=693",
				"dbSNP.build=131;dbSNP.DiseaseVariant=0;rsID=rs78200054;SNPEFF.Functional_class=NONE;dbSNP.SNP_Allele_Origin=unspecified;dbSNP.SuspectRegion=unspecified;SNPEFF.Effect=INTERGENIC;SNPEFF.Effect_impact=MODIFIER;UCSC.Repeat_Region=693",
				"SNPEFF.Functional_class=NONE;rsID=rs71235073;dbSNP.build=130;dbSNP.SuspectRegion=unspecified;dbSNP.SNP_Allele_Origin=unspecified;dbSNP.DiseaseVariant=0;UCSC.Repeat_Region=693;SNPEFF.Effect_impact=MODIFIER;SNPEFF.Effect=INTERGENIC",
				"SNPEFF.Functional_class=NONE;dbSNP.DiseaseVariant=0;UCSC.Repeat_Region=693;SNPEFF.Effect_impact=MODIFIER;SNPEFF.Effect=INTERGENIC",
				"SNPEFF.Functional_class=NONE;rsID=rs76676778;dbSNP.build=131;dbSNP.SuspectRegion=unspecified;dbSNP.SNP_Allele_Origin=unspecified;dbSNP.DiseaseVariant=0;UCSC.Repeat_Region=693;SNPEFF.Effect_impact=MODIFIER;SNPEFF.Effect=INTERGENIC",
				"SNPEFF.Functional_class=NONE;dbSNP.DiseaseVariant=0;UCSC.Repeat_Region=693;SNPEFF.Effect_impact=MODIFIER;SNPEFF.Effect=INTERGENIC",
				"SNPEFF.Functional_class=NONE;rsID=rs71235074;dbSNP.build=130;dbSNP.SuspectRegion=unspecified;dbSNP.SNP_Allele_Origin=unspecified;dbSNP.DiseaseVariant=0;UCSC.Repeat_Region=693;SNPEFF.Effect_impact=MODIFIER;SNPEFF.Effect=INTERGENIC",
				"SNPEFF.Functional_class=NONE;rsID=rs71235075;dbSNP.build=130;dbSNP.SuspectRegion=unspecified;dbSNP.SNP_Allele_Origin=unspecified;dbSNP.DiseaseVariant=0;UCSC.Repeat_Region=693;SNPEFF.Effect_impact=MODIFIER;SNPEFF.Effect=INTERGENIC",
				"SNPEFF.Functional_class=NONE;dbSNP.DiseaseVariant=0;UCSC.Repeat_Region=693;SNPEFF.Effect_impact=MODIFIER;SNPEFF.Effect=INTERGENIC",
				"SNPEFF.Functional_class=NONE;dbSNP.DiseaseVariant=0;UCSC.Repeat_Region=693;SNPEFF.Effect_impact=MODIFIER;SNPEFF.Effect=INTERGENIC",
				"SNPEFF.Functional_class=NONE;dbSNP.DiseaseVariant=0;UCSC.Repeat_Region=693;SNPEFF.Effect_impact=MODIFIER;SNPEFF.Effect=INTERGENIC",
				"SNPEFF.Functional_class=NONE;dbSNP.DiseaseVariant=0;UCSC.Repeat_Region=693;SNPEFF.Effect_impact=MODIFIER;SNPEFF.Effect=INTERGENIC",
		};
		
		for(int i=0; i < s.length; i++)
			System.out.println( (i+1) + ")  " + sortInfoCol(s[i]));

	}
	
//
//    
//    /** Modifies the list of strings, sorting each ones items in the INFO column */
//    public static List<String> sortAllInfoCols(List<String> rows) {
//    	for(int i=0; i < rows.size(); i++) {
//    		rows.set(i, sortInfoCol(rows.get(i)));
//    	}
//    	return rows;
//    }
//    
    /** Split the fields separated by ";" in the INFO col (col 8), then reassemble them in a string
     *  This is to get around the HashMap problem where keys are not re-inserted in the same order
     * @param line
     * @return Line but with fields in the INFO column sorted by key
     */
    public static  String sortInfoCol(String infoCol) {
    	List<String> infoParts = Arrays.asList(infoCol.split(";"));
    	Collections.sort(infoParts, String.CASE_INSENSITIVE_ORDER);
    	
    	// Rebuid the INFO column with the sorted items, then replace the old with the new
    	StringBuilder newInfo = new StringBuilder();
    	for(int i=0; i < infoParts.size(); i++) {
    		if( i > 0 )
    			newInfo.append(";");
    		newInfo.append(infoParts.get(i)); 
    	}
    	return newInfo.toString();
	}
//
//	
}
