package edu.mayo.bior.pipeline;


import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.TreeSet;

import org.apache.commons.lang.NotImplementedException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.gson.Gson;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.tinkerpop.pipes.AbstractPipe;

import edu.mayo.bior.util.ColumnResolver;
import edu.mayo.cli.InvalidDataException;
import edu.mayo.pipes.history.ColumnMetaData;
import edu.mayo.pipes.history.ColumnMetaData.Type;
import edu.mayo.pipes.history.History;
import edu.mayo.pipes.history.HistoryMetaData;

/** =======================================================================================================================
 *  Attempts to output a correctly-formatted VCF file from tab-delimited strings/JSON.
 * 
 *  The input may be a file that already has all 8 VCF standard columns in which case these will be unaffected.
 *  Or, if the input does NOT contain the required 8 VCF standard columns, it will attempt to build them from a specified JSON column.
 *  
 *  If building the VCF columns from JSON, these golden attributes must be specified:
 *  	_landmark, _minBP, _refAllele, _altAlleles
 *  The ID column will be filled in if the "_id" golden attribute is present in the JSON
 *  These fields will be filled in if fields match the names:
 *  	QUAL, FILTER, INFO
 * 
 *  VCF ## headers will be created as necessary (##fileformat, ##fileDate, ##source, ##INFO, #CHROM...., etc)
 *  If there is no available data about the field, it will receive defaults of Type=String, Number=.
 *  JSON types will be used to determine the type if possible
 * 
 *  There are several parameters:
 *  	-c (--columnJson) = Column from which to pull JSON to create the first 8 columns of the VCF.  
 *  						These VCF columns will NOT be created if they already exist 
 *                          (this option will not even be checked or verified for JSON validity in that case)
 *  						OPTIONAL  (default=-1)
 *  						NOTE: This column will NOT be added to the INFO column unless included in
 *                          the range option as it will have many duplicates with the first 8 columns of the VCF
 * 		-k (--keepAll)	  = Keep all BioR columns - do NOT remove them.  By default, any columns beginning with "bior"
 * 							or that have a ##BIOR header associated with them will be removed.
 * 							OPTIONAL  (default is to NOT keep all BioR columns)
 * 		-n (--noInfoAdd)  = Do NOT add the extra BioR columns to the INFO column.  
 * 							This is useful if you just want to create the standard first 8 columns 
 * 							of the VCF file but do not want to insert anything into the INFO column.
 * 							The resulting file can then be run against bior_annotate
 * 							OPTIONAL  (default is to add the column data to the INFO column)
 * 		-r (--range) 	  = Add only the specified columns to the INFO column.
 * 							By default without the parameter, all columns beginning with "bior" and all columns
 * 							that have an associated ##BIOR line will be added to INFO.
 * 							This overrides that and forces only certain columns to be added to the INFO column.
 * 							Ranges must be specified as [start]..[end] using double-dot separator.
 * 							[start] and [end] can be positive or negative (with -1 being the last column, -2 the second-last, etc).
 *        	  				Any columns NOT specified here will just be retained AFTER the INFO column.
 *   						Redundant columns are eliminated.  Invalid columns will throw an error. 
 *  						Columns are 1-based with -1 representing the last column and -3 the third-to-last column. 
 *  						OPTIONAL  (default is to add all BioR columns)
 *  						Examples include:<br/>
 *  							1 				<br/>
 *  							1..8 			<br/>
 *  							9,10,12..14 	<br/>
 *  							..4,-3..-1,6,8..  (add columns 1,2,3,4,-3,-2,-1,6,8..end.  Redundant columns are eliminated) <br/>
 *  							8..14	(OK if # columns >= 14.  ERROR if < 14) <br/>
 *  							8-14	(ERROR - invalid specification.  Use 8..14) <br/>
 *  
 *  Example:
 *    INPUT:
 *        #CHR MIN    MAX    MEM_USED            MYJSON
 *        chr1 100    200    {"mem":"1024KB"}    {"_landmark":"1","_id":"rs111222333","_minBP":100,"_maxBP":200,"_refAllele":"A","_altAlleles":["C","G"],"QUAL":9.3,"MyExtraField":"ABCDE"}
 *    
 *    COMMAND:
 *    	  cat input.txt | bior_tjson_to_vcf
 *    
 *    OUTPUT:
 *        #CHROM  POS  ID            REF   ALT   QUAL  FILTER   INFO  CHR   MIN   MAX   MEM_USED          MYJSON
 *        1       100  rs111222333   A     C,G   9.3   .        .     chr1  100   200   {"mem":"1024KB"}  {"_landmark":"1","_id":"rs111222333","_minBP":100,"_maxBP":200,"_refAllele":"A","_altAlleles":["C","G"],"QUAL":9.3,"MyExtraField":"ABCDE"}
 * =======================================================================================================================
 */
public class TjsonToVcfPipe extends AbstractPipe<History, History> {
    public final String DEFAULT_DESCRIPTION = "BioR property file missing description";
    public final String DEFAULT_TYPE = "String";
    public final String DEFAULT_NUMBER = ".";
    private final int   INFO_COL = 7;
    private final int   NUM_REQUIRED_VCF_COLS = INFO_COL + 1;

    
    // Keep a Queue of X # of lines so we can check data for fields that should have corresponding ##INFO lines
    protected static int QUEUE_LIMIT = 1000;
    private List<History> mLineQueue = new ArrayList<History>();
    
    private static final Logger sLogger = LoggerFactory.getLogger(TjsonToVcfPipe.class);

    Map<Integer, String> biorindexes = new LinkedHashMap<Integer, String>();

    // Has the header been modified yet?  If so, don't process it again
    private boolean mIsHeaderModified = false;
    
    // Store all the header INFO metadata into a hashmap so we can reference it quickly
    // This will be used when choosing what columns to add/remove
    private Map<String, ColumnMetaData> mInfoMetadata = new HashMap<String,ColumnMetaData>();
    
    // Values from the flags
    private int 	mJsonColumn = -1;
    private boolean mIsKeepAllColumns = false;
    private boolean mIsAddDataToInfo  = false;
    private boolean mIsAddJsonToInfo  = false;
    private String  mColumnsToCollapseUserInputString = null;
    
    /** zero-based list of columns to add to INFO column. */
    private TreeSet<Integer> mColumnsToAddToInfo = null;
    
    // The columns to remove will be: 
    //   - Nothing if "-k" flag is used
    // Otherwise will be:
    //   - All column headers starting with "bior"  (case-insensitive)
    //   - All column headers matching ##BIOR Ids
    //   - All range columns
    //   - The target JSON column
    private TreeSet<Integer> mColumnsToRemove = null;
    
    // The JSON columns that we may have to skip if the user doesn't want them rolled into the INFO field.
    // (NOTE: This is now the default option)
	private TreeSet<Integer> mJsonColumns;

    private long mLineNum = 0;
    
    private Gson gson = new Gson();

    private JsonParser jsonParser = new JsonParser();

    private VcfInfoColumnBuilder mVcfInfoColumnBuilder = new VcfInfoColumnBuilder();


    /** Is the incoming data already in VCF format?  NOTE: This must be checked BEFORE the header is changed */
    private boolean mIsAlreadyAVcf;
	
	/** Want to keep track of whether the previous pipe has more data since we use this in conjunction with other checks */
	private boolean mIsPrevPipeHasNext = true;
	
	
    
    /** Construct the TJSON to VCF pipe
     *  @param jsonCol  The JSON column from which to extract the fields necessary to build the VCF file 
     *  				Pass in a positive column number (starting at 1) or a negative number (where -1 is the last column on the right)
     *  @param isKeepAllColumns  Keep the JSON column at the end of the line (this will override the ranges to collapse, and will cause the JSON to be retained)
     *  @param isAddDataToInfo  Add any data columns to the INFO column (default: TRUE)
     *  @param isAddJsonToInfo  Add data within JSON columns to the INFO column  (default: FALSE)
     *  @param colRangeToCollapseToInfoCol  A comma-separated and/or colon-separated range of columns to remove and add to the INFO column as key-value pairs.
     * 					"" = No range specified.  Use default: all columns starting with "bior" or having ##BIOR header associated  <br/>
     */
    public TjsonToVcfPipe(int jsonCol, boolean isKeepAllColumns, boolean isAddDataToInfo, boolean isAddJsonToInfo, String colRangeToCollapseToInfoCol) {
    	mJsonColumn = jsonCol;
    	mIsKeepAllColumns = isKeepAllColumns;
    	mIsAddDataToInfo = isAddDataToInfo;
    	mIsAddJsonToInfo = isAddJsonToInfo;
    	mColumnsToCollapseUserInputString = colRangeToCollapseToInfoCol;
    	if( mColumnsToCollapseUserInputString == null )
    		mColumnsToCollapseUserInputString = "";
    }
    
    /** Construct the TJSON to VCF pipe
     *  @param jsonCol  The JSON column from which to extract the fields necessary to build the VCF file 
     *  				Pass in a positive column number (starting at 1) or a negative number (where -1 is the last column on the right) */
    public TjsonToVcfPipe(int jsonCol) {
    	this(jsonCol, /*isKeepAllColumns=*/false, /*isAddDataToInfo=*/true, /*isAddJsonToInfo=*/false, /*colRangeToCollapseToInfoCol=*/"");
    }
    
    /** Construct the TJSON to VCF pipe
     *  It is assumed that the last column will be used to extract the fields necessary to build the VCF file */
    public TjsonToVcfPipe() {
    	this(/*jsonCol=*/-1, /*isKeepAllColumns=*/false, /*isAddDataToInfo=*/true, /*isAddJsonToInfo=*/false, /*colRangeToCollapseToInfoCol=*/"");
    }
    
    
    //==============================================================================================================
    @Override
    protected History processNextStart() throws NoSuchElementException {
    	// Fill up the Queue for the first 1000 lines for instance, 
    	// so we can figure out which ##INFO lines to add to the header.
    	// Some fields may not appear in the first few lines, so we want to read
    	// a bunch of lines to try to harvest all INFO key-value pairs
    	// NOTE: since this.starts.hasNext() can only be called once when the end of the input stream is reached,
    	//       we want to save this state to a class member variable so it doesn't throw an InputStream exception
    	while( mIsPrevPipeHasNext  &&  (mIsPrevPipeHasNext = this.starts.hasNext())   &&   mLineQueue.size() < QUEUE_LIMIT ) {
    		History history = this.starts.next();
    		if( ! isBlankLine(history) )
    			mLineQueue.add(history);
    	}
    	
    	// If the Queue is empty, then throw an exception to signify that all data has been processed
    	if( mLineQueue.size() == 0 ) {
    		if( mLineNum == 0 )
    			System.err.println("Warning: No data lines available, so headers could not be modified.");
    		else
    			throw new NoSuchElementException("No more data to process");
    	}
    	
    	// Pull the next line from the queue
    	History history = mLineQueue.get(0);
        
        History historyAsVcf = lineToVcf(history);
        
        // Remove the line from the queue
        mLineQueue.remove(0);
        
        return historyAsVcf;
    }
    //==============================================================================================================

    
    /** Get the list of zero-based column indexes that will be added to INFO column:
     * If NOT adding anything to INFO, then:
     *   - Return empty list
     * Else If a column range was specified, then:
     *   - Any columns specified in the range (whether JSON or not)
     * Else
     *   - Add all columns starting with "bior" or have "##BIOR" header
     *   - If "--json" flag specified, then add the target JSON column 
     *   - Else then remove all the bior JSON columns
     * PRE: 
     * 	 - Requires mColumnsToCollapseUserInputString, and mIsAddJsonToInfo to be set
     * POST:
     *   - ASSUMPTION: The TreeSet that is returned will be in reverse order (largest to smallest) 
     * @throws Exception 
     */
    private TreeSet<Integer>  getColumnsToAddToInfoColumn(History history, int targetJsonToBuildVcf8Cols, List<History> lineQueue) throws Exception {
    	// Columns should be maintained in reverse order (because removing from the front causes IndexOutOfBoundsExceptions)
    	TreeSet<Integer> columnsToAddToInfo = new TreeSet<Integer>(Collections.reverseOrder());
    	
    	// If not adding any data, then return empty list
    	if( ! mIsAddDataToInfo)
    		return columnsToAddToInfo;
    	
    	
    	// If the user specified a range of columns, then use that
    	if( mColumnsToCollapseUserInputString.length() > 0 ) {
    		columnsToAddToInfo.addAll(ColumnResolver.rangesToZeroBasedIndexes(history, mColumnsToCollapseUserInputString));
    	// Else, add the target JSON column, and all BioR columns.
    	// If we are NOT adding the JSON columns, then remove those from the list
    	} else {
    		columnsToAddToInfo.addAll(getAllBiorColumns(history));
    		// If we are adding JSON columns to the INFO column, then also add the target JSON column as that should be JSON
    		if( mIsAddJsonToInfo )
    			columnsToAddToInfo.add(targetJsonToBuildVcf8Cols);
    		else  // we don't want the bior columns that are JSON
    			columnsToAddToInfo.removeAll(getJsonColumns(lineQueue));
    	}
    	
    	return columnsToAddToInfo;
    }
    
    
	/** Get the list of zero-based column indexes to remove
     *  If keeping all columns (-k flag), then return empty list
     *  Else, Add:
     *  	- All bior columns
     *  	- The target JSON column
     *  	- All range columns
     *  	- If already a VCF, then remove the first 8 columns so they are not deleted
     *  NOTE:  The columns must be removed before others are inserted (for creating the first 8 columns of the VCF) to avoid deleting the wrong columns
  	 *  ASSUMPTION: The TreeSet that is returned will be in reverse order (largest to smallest) 
	 * @throws Exception 
     */
    private TreeSet<Integer>  getColumnsToRemove(History history, boolean isKeepAllColumns, boolean isAlreadyAVcf, int targetJsonToBuildVcf8Cols) throws Exception {
    	// Add all BioR columns, target JSON column, all range columns
    	// Columns should be maintained in reverse order (because removing from the front causes IndexOutOfBoundsExceptions)
    	TreeSet<Integer> columnsToRemove = new TreeSet<Integer>(Collections.reverseOrder());

    	// If we want to keep all columns, then there are none to remove
    	if( isKeepAllColumns )
    		return columnsToRemove;
    	
    	
    	columnsToRemove.addAll(getAllBiorColumns(history));
    	// Only remove the targetJsonToBuildVcf8Cols column if it is JSON
    	if( mVcfInfoColumnBuilder.isJsonObject(history.get(targetJsonToBuildVcf8Cols)) ) 
    		columnsToRemove.add(targetJsonToBuildVcf8Cols);
    	columnsToRemove.addAll(ColumnResolver.rangesToZeroBasedIndexes(history, mColumnsToCollapseUserInputString));
    	// If already a VCF, then don't allow the first 8 columns to be removed
    	if( isAlreadyAVcf )
    		columnsToRemove.removeAll(Arrays.asList(0,1,2,3,4,5,6,7));
    	
    	return columnsToRemove;
    }
    
    private void removeDuplicates(List<Integer> list) {
    	for(int i=list.size()-1; i >= 0; i--) {
    		for(int j=0; j < i; j++) {
    			if( list.get(i) == list.get(j) ) {
    				list.remove(i);
    				break; // Break out of the inner for loop or we may have an IndexOutOfBoundsException if there were three duplicates 
    			}
    		}
    	}
	}


    
    

    /** Get the BioR columns (those in the header row that begin with "bior", 
     *  and those that match metadata Ids in the ##BIOR metadata lines).
     *  Data from these columns will be added to the INFO column 
     * @param history
     * @return
     */
	private List<Integer> getAllBiorColumns(History history) {
		Set<String> biorColumnNamesFromMetadata = getBiorColumnIdsFromMetadata(history.getMetaData().getOriginalHeader());
		List<Integer> biorColumnIndexes = new ArrayList<Integer>();
		List<ColumnMetaData> columnMetaList = history.getMetaData().getColumns();
		// Add the JSON column which we are extracting info from
		for( int i=0; i < columnMetaList.size(); i++ ) {
			String colName = columnMetaList.get(i).getColumnName();
			if( colName.toLowerCase().startsWith("bior")  ||  biorColumnNamesFromMetadata.contains(colName) )
				biorColumnIndexes.add(i);
		}
		return biorColumnIndexes;
	}
	
    /** Extracts the List of Bior Columns looking at Metadata (##BIOR lines)
     *  @param metadata - the original header lines including ##BIOR lines       */
    public Set<String> getBiorColumnIdsFromMetadata(List<String> metadataLines) {
        Set<String> columns = new HashSet<String>();
        for (String metadataLine : metadataLines) {
            if (metadataLine.startsWith("##BIOR=<ID")) {
            	columns.add(getIdFromBiorMetadata(metadataLine));
            }
        }
        return columns;
    }
    
    
    /** From a line like:
     *    ##BIOR=<ID=BIOR.VCF2VariantPipe,Operation="bior_vcf_to_variant",DataType="JSON">
     *  Extract and return the ID  (ex: "BIOR.VCF2VariantPipe")
     */
    private String getIdFromBiorMetadata(String biorMetadataLine) {
    	final String PREFIX = "##BIOR=<ID=";
    	int idxIdStart = biorMetadataLine.indexOf(PREFIX);
    	
    	// Get the comma or closing bracket that follows the ID
    	int idxIdEnd = biorMetadataLine.indexOf(",", idxIdStart);
    	if( idxIdEnd == -1 )
    		idxIdEnd = biorMetadataLine.indexOf(">", idxIdStart);
    	
    	if( idxIdStart == -1  ||  idxIdEnd == -1 )
    		return "";
    	else
    		return biorMetadataLine.substring(idxIdStart + PREFIX.length(), idxIdEnd).replaceAll("\"", "");
    }


    


	/**======================================================================================================== 
	 * Create a correct VCF line:<br>
	 *  1) If the file is not already a VCF, use the "golden attributes" in the target JSON column to create the first 8 data columns of a VCF:
     *   	#CHROM  POS  ID  REF   ALT   QUAL  FILTER   INFO<br>  
     *  2) If the user wants to add key-value pairs to the INFO column, then add them from the target JSON column as well as the data columns<br>
     *  3) If the user wants to keep all columns, then do not remove the target and data columns<br>
     * All non-target and range columns should follow after the INFO column
     * @param history
     * @return
     * ========================================================================================================
	 * @throws Exception 
     */
    protected History lineToVcf(History history)  {
    	try {
	    	// If blank line, then return empty history
	    	if( isBlankLine(history) )
	    		return new History();
	    	
	        // Modify Metadata only once - create the necessary ## metadata lines, column headers, and column metadata
	    	modifyHeaderMetadataOnce(history);
	    	
	    	// Now, modify the actual data line
	    	return modifyDataLine(history);
    	} catch(RuntimeException e) {  // Various IllegalStateExceptions from within this class
    		throw e;
    	} catch(InvalidDataException e) {  // Exceptions thrown from ColumnResolver class when checking column ranges
    		throw new IllegalArgumentException(e.getMessage());
    	} catch(Exception e) {
    		throw new RuntimeException(e);
    	}
    }

	private boolean isBlankLine(History history) {
		return history == null || history.size() == 0 || (history.size() == 1 &&  history.get(0).trim().length() == 0 );
	}

	private void modifyHeaderMetadataOnce(History history) throws Exception {
    	// Also set column ranges
        if (! mIsHeaderModified) {
        	// If the queue is empty (for example, if we are calling this method independently), then add the current line to the queue
    		// This guarantees that we have at least one line in the queue to avoid IndexOutOfBoundsExceptions
        	if( mLineQueue.size() == 0 )
        		mLineQueue.add(history);
        	
        	// NOTE: This must be set before the header is modified, since this is based on the column header names
        	mIsAlreadyAVcf      = isAlreadyAVcf(history);
			mJsonColumn         = ColumnResolver.numberToZeroBasedIndex(history, mJsonColumn, mLineNum);
			mColumnsToAddToInfo = getColumnsToAddToInfoColumn(history, mJsonColumn, mLineQueue);
			mColumnsToRemove    = getColumnsToRemove(history, mIsKeepAllColumns, mIsAlreadyAVcf, mJsonColumn);
			
        	addInfoHeaderLines(history);
        	addTopMostHeaders(history);
            modifyHeaderLineToVcfFormat(history);
            
            mIsHeaderModified = true;
        }
	}


	/** Add the ##INFO header lines based on the columns to add.  The keys should be pulled first from ##BIOR header lines, then column headers */
	private void addInfoHeaderLines(History history) throws Exception {
    	// This sets up the original header and the bior headers
		mColumnsToAddToInfo = getColumnsToAddToInfoColumn(history, mJsonColumn, mLineQueue);
		
		// Add the INFO metadata for all target columns
    	List<String> infoMetaHeaders = mVcfInfoColumnBuilder.createInfoMetadata(mLineQueue, mColumnsToAddToInfo); 
    	
    	// If we are building the VCF
    	//   AND we are inserting into the INFO column
    	// Then get the InfoMetaHeaders from the INFO field (NOTE: WITHIN THE TARGET JSON ONLY) if available
    	if( ! mIsAlreadyAVcf  &&  mIsAddDataToInfo )
    		infoMetaHeaders.addAll(mVcfInfoColumnBuilder.createInfoMetadataFromInfoInTargetJsonCol(mLineQueue, mJsonColumn));

    	// Only add the ##INFO header lines if we are adding anything to the INFO column
    	if( mIsAddDataToInfo ) {
    		for(String infoMetaHeader : infoMetaHeaders) {
    			mVcfInfoColumnBuilder.addInfoLineToHeader(history, infoMetaHeader);
    		}
    	}
	}

	/** Modify each data line:
	 *   1) form the 8 required VCF columns if necessary;
	 *   2) Add column keys and values into INFO column;
	 *   3) Remove the columns that were added to INFO col */
	private History modifyDataLine(History history) {
		mLineNum++;
		
		// Get the values to insert before we insert VCF columns or remove JSON and bior columns
    	String newInfoValues = "";
    	// Only values to insert into INFO column if user specifies to insert into INFO column
    	if( mIsAddDataToInfo ) {
    		// In case there were any JSON objects further down ***beyond the mLineQueue limit***, with all dots above (in which case the column was NOT flagged as JSON)
    		// then check the value to see if it is a JSON object or not, and if so, remove it from the list of columns to collapse, for this pass:
    		if( ! mIsAddJsonToInfo )
    			removeColsFromListIfJson(mColumnsToAddToInfo, history);

    		newInfoValues = mVcfInfoColumnBuilder.createInfoData(history, mColumnsToAddToInfo);
    	}

    	List<String> vcfRequiredColsToInsert = new ArrayList<String>();
    	if( ! mIsAlreadyAVcf )
    		vcfRequiredColsToInsert = getRequiredVcfColumnsToInsertIntoDataLine(history);
    	
    	// Remove columns as necessary.
    	// If the -k flag was specified, the list should be empty and no columns removed
    	removeTargetAndSpecifiedColumns(history, mColumnsToRemove);

    	// Insert required VCF data columns (CHROM, POS, ... INFO)
    	// NOTE: Nothing will be inserted if the list is empty
    	history.addAll(0, vcfRequiredColsToInsert);
    	
    	// Merge any old data in the INFO column with the new ones gathered from the target columns
    	String infoCol = mergeOldInfoValuesWithNew(history.get(INFO_COL), newInfoValues);
    	history.set(INFO_COL, infoCol);
    		
    	return history;
	}

	
	/** In case there were any JSON objects further down beyond the queue limit, with all dots above (in which case the column was NOT flagged as JSON)
	    then check the value to see if it is a JSON object or not, and if so, remove it from the list of columns to collapse, for this pass:
	    ASSUMPTION: The colsToCollapseSet TreeSet is provided in reverse order (largest to smallest) 
	    */
	private void removeColsFromListIfJson(TreeSet<Integer> colsToCollapseSet,	History history) {
		Integer[] colsToCollapse = colsToCollapseSet.toArray(new Integer[colsToCollapseSet.size()]);
		for(Integer col : colsToCollapse) {
			if( mVcfInfoColumnBuilder.isJsonObject(history.get(col)) )
				colsToCollapseSet.remove(col);
		}
	}

	/** Check each column in each of the lines in the queue, and try to determine if it is a JSON column or not.
	 *  If it contains only dot or {...}, then it is JSON, else it is not
	 * @param lineQueue
	 * @return
	 */
	private List<Integer> getJsonColumns(List<History> lineQueue) {
		int numCols = lineQueue.get(0).size();
		Boolean[] isJsonCol = new Boolean[numCols];
		
		// Set all booleans initially to null (or to true if their headers have DataType=JSON)
		for(int col=0; col < isJsonCol.length; col++) {
			if( isDataTypeJson(col, lineQueue.get(0)) )
				isJsonCol[col] = true;
			else
				isJsonCol[col] = null;
		}

		// Now, look at the same column across all rows.
		for(int col=0; col < numCols; col++) {
			for(int row=0; row < lineQueue.size()  &&  (isJsonCol[col] == null  ||  isJsonCol[col]); row++) {
				String colStr = lineQueue.get(row).get(col);
				// If it a JSON string and isJsonCol[col] is still null, then set to TRUE 
				if( mVcfInfoColumnBuilder.isJsonObject(colStr) ) {
					if( isJsonCol[col] == null )
						isJsonCol[col] = true;
				}
				// Else, if it is NOT a dot (which means it is probably a regular string), then set to false
				else if( ! ".".equals(colStr) )
					isJsonCol[col] = false;
				// Else, it was a dot, so we don't know definitively yet if it is a JSON string or not, so leave it as null
			}
		}
		
		// Now, for each column that is a JSON column, add it to the list
		List<Integer> jsonColumns = new ArrayList<Integer>();
		for(int i=0; i < isJsonCol.length; i++) {
			if( isJsonCol[i] != null  &&  isJsonCol[i] )
				jsonColumns.add(i);
		}
		return jsonColumns;
	}



	/** Go through the <code>columnsToCollapseList</code> and add any to the output list that are NOT JSON */
	private List<Integer> getNonJsonCols(List<Integer> columnsToCollapseList, List<Integer> jsonCols) {
		List<Integer> cols = new ArrayList<Integer>();
		for(int col : columnsToCollapseList) {
			boolean isJson = false;
			for(int jsonCol : jsonCols) {
				if( col == jsonCol )
					isJson = true;
			}
			if( ! isJson )
				cols.add(col);
		}
		return cols;
	}
	
	
	/** True if the column's ##BIOR metadata contains DataType="JSON", similar to:
	 *  ##BIOR=<ID="bior.ToTJson",Operation="bior_vcf_to_tjson",DataType="JSON",ShortUniqueName="ToTJson">
	 * @param col  Column to check if it is JSON
	 * @return true if the column contains JSON data
	 */
	private boolean isDataTypeJson(int col, History line) {
		String colName = line.getMetaData().getColumns().get(col).getColumnName();
		String metaHeaderLine = getBiorHeaderByKey(colName, line);
		if( metaHeaderLine == null )
			return false;
		else
			return metaHeaderLine.contains("DataType=\"JSON\"");
	}
	
	/** Search all the header metadata for one that has the ID mentioned and return it.  If not found, return null.
	 *  NOTE: Key is case-sensitive!  */
	private String getBiorHeaderByKey(String key, History line) {
		String keyWithPrefix = "##BIOR=<ID=\"" + key + "\"";
		for(String header : line.getMetaData().getOriginalHeader()) {
			if( header.startsWith(keyWithPrefix) )
				return header;
		}
		// Not found
		return null;
	}


	/** Merge new INFO values into the existing INFO values in column 8.
	 *  NOTE: Careful of blank or "." in the INFO column */
	protected String mergeOldInfoValuesWithNew(String oldInfoValues, String newInfoValues) {
		String merged = "";
		if( isNullBlankOrDot(oldInfoValues) ) {
			merged = newInfoValues;
		} else if( isNullBlankOrDot(newInfoValues) ) {
			merged = oldInfoValues;
		} else { // Both are given
			merged = oldInfoValues + ";" + newInfoValues;
		}
		
		// But, we want to have some placeholder, so if it's now blank, we want it to be "."
		if( merged.length() == 0 )
			merged = ".";
		return merged;
	}

	private boolean isNullBlankOrDot(String val) {
		return val == null || val.length() == 0  ||  ".".equals(val);
	}

	protected boolean isAlreadyAVcf(History history) {
    	HistoryMetaData metadata = history.getMetaData();
    	if( metadata == null )
    		throw new IllegalStateException("Error: Header information is required to construct the VCF");
    	
    	String header = metadata.getColumnHeaderRow(history, "\t");
    	String[] columnHeaders = header.split("\t");
    	
    	return columnHeaders != null  &&  columnHeaders.length >= 8
    		&& columnHeaders[0].equalsIgnoreCase("#CHROM")
    		&& columnHeaders[1].equalsIgnoreCase("POS")
    		&& columnHeaders[2].equalsIgnoreCase("ID")
    		&& columnHeaders[3].equalsIgnoreCase("REF")
    		&& columnHeaders[4].equalsIgnoreCase("ALT")
    		&& columnHeaders[5].equalsIgnoreCase("QUAL")
    		&& columnHeaders[6].equalsIgnoreCase("FILTER")
    		&& columnHeaders[7].equalsIgnoreCase("INFO");
    }

    /** Get the required first 8 VCF columns to insert into the current data line.
     *  This should be inserted at position 0 in the History object. */
	private List<String> getRequiredVcfColumnsToInsertIntoDataLine(History history) {
		JsonElement jsonElem = getJsonElementFromColumn(history.get(mJsonColumn));
    	
    	// Get the values from the JSON field
    	// Any of these should throw an exception if they are required but not found
    	String 	chrom 	= getJsonVal(jsonElem, "_landmark", 	true,	mLineNum);
    	String	pos   	= getJsonVal(jsonElem, "_minBP",    	true,	mLineNum);
    	String 	id		= getJsonVal(jsonElem, "_id",       	false,	mLineNum);
    	String  ref		= getJsonVal(jsonElem, "_refAllele",	true,	mLineNum);
    	String  alt		= splitAlts(getJsonVal(jsonElem, "_altAlleles", true, mLineNum));
    	String  qual	= getJsonVal(jsonElem, "QUAL",      	false,	mLineNum);
    	String  filter	= getJsonVal(jsonElem, "FILTER",    	false,	mLineNum);
    	String  info 	= ".";

    	// If the "_id" field was not given, then try the "ID" field if it is given
    	if( id.equals(".") )
    		id = getJsonVal(jsonElem, "ID", false, mLineNum);

    	if( mIsAddDataToInfo ) {
    		info = getJsonVal(jsonElem, "INFO",      	false,	mLineNum);
	    	// If the INFO field is a JSON object, then flatten it
	    	if( mVcfInfoColumnBuilder.isJsonObject(info) ) {
	    		JsonObject infoJsonObj = this.jsonParser.parse(info).getAsJsonObject();
	    		info = mVcfInfoColumnBuilder.jsonObjectToInfoDataString("", infoJsonObj);
	    	}
    	}
    	
    	// Insert the 8 required VCF columns at the beginning of the History object
    	List<String> colsToInsert = Arrays.asList(chrom,pos,id,ref,alt,qual,filter,info);
    	return colsToInsert;
   	}
		
	private JsonElement getJsonElementFromColumn(String json) {
    	// Throw an exception if the column is not JSON
    	JsonElement jsonElem = null;
    	try {
    		jsonElem = this.jsonParser.parse(json);
    		// Try to cast it to a JSON object, which should work if the column is JSON
    		JsonObject jsonObj = jsonElem.getAsJsonObject();
    	} catch(Exception e) {
    		throw new IllegalStateException("Error: Data line " + mLineNum + ":  Target column not JSON.  Was: " + json);
    	}
    	return jsonElem;
	}

	

	/** Loop through each column, and if it was the JSON column we extracted data from, OR
	 *  begins with "bior" or has a ##BIOR header associated with it,
	 *  then remove that column from the history data line
	 *  NOTE: No header or column metadata is affected by this method
	 *  ASSUMPTION: The columnsToRemoveSet is provided in reverse order (largest to smallest) 
	 * @param history  Current line to remove columns from.
	 */
	private void removeTargetAndSpecifiedColumns(History history, TreeSet<Integer> columnsToRemoveSet) {
		Integer[] colsToRemove = columnsToRemoveSet.toArray(new Integer[columnsToRemoveSet.size()]);

		for(Integer biorCol : colsToRemove) {
			// Remove the column from the data line 
			// (NOTE: must convert from Integer to int or the .remove() method
			//   will attempt to remove the Integer OBJECT instead of the index)
			history.remove((int)biorCol);
		}
	}




	/** Get the key from the json string.  If it is required, but not found, then throw an exception.  If not required, value will be "." */
    protected String getJsonVal(JsonElement jsonElem, String key, boolean isRequired, long lineNum) {
    	JsonElement obj = jsonElem.getAsJsonObject().get(key);
        if( isRequired && obj == null ) { 
     		throw new IllegalStateException("Error: Required JSON field [" + key + "] missing on data line "
     				+ lineNum + ": Target column must contain these JSON fields: _landmark, _minBP, _refAllele, _altAlleles.  "
     				+ "Was: " + jsonElem.toString());
        }
        
        String val = ".";
        if( obj == null )
        	val = ".";
        else if( obj.isJsonPrimitive())
        	val = obj.getAsString();
        else
        	val = obj.toString();
        
        return val;
	}
    
    /** Convert the JSON alts string (ex: ["A","C"]) to A,C */
    protected String splitAlts(String jsonAltArray) {
    	return jsonAltArray.replace("[", "").replace("]", "").replaceAll("\"", "");
	}
    
    /** Add the 8 VCF columns to the front of the column header line and remove the "#" that WAS at the beginning of the line */
    private void modifyHeaderLineToVcfFormat(History history) {
    	removeColumnMetaData(history, mColumnsToRemove);

    	if( ! mIsAlreadyAVcf )
    		insertVcfRequiredColumnsIntoHeader(history);
    }
    
    
    /** Remove the ColumnMetaData that is associated with the columns that are to be removed.
     *  Also remove the column header string
     *  ASSUMPTION: The columnsToRemoveSet TreeSet is provided in reverse order (largest to smallest) 
     */
    private void removeColumnMetaData(History history, TreeSet<Integer> columnsToRemoveSet) {
    	Integer[] colsToRemove = columnsToRemoveSet.toArray(new Integer[columnsToRemoveSet.size()]);
		List<ColumnMetaData> colMetaData = history.getMetaData().getColumns();
		for(Integer biorCol : colsToRemove) {
			// (NOTE: must convert from Integer to int or the .remove() method
			//   will attempt to remove the Integer OBJECT instead of the index)
			colMetaData.remove((int)biorCol);
		}
	}

 
	private void insertVcfRequiredColumnsIntoHeader(History history) {
    	List<String> headerLines = history.getMetaData().getOriginalHeader();
    	
    	// If there is at least one header line, then get the last one in the list (and remove the "#" out front), else create a new one
    	History colHeaderLine = headerLines.size() > 0  ?  new History(headerLines.get(headerLines.size()-1))  :  new History();
    	if( colHeaderLine.size() > 0 )
    		colHeaderLine.set(0, colHeaderLine.get(0).replace("#", ""));
    	colHeaderLine.addAll(0, Arrays.asList("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"));
    	
    	// Overwrite the last row if it exists, else if there were no header rows before, then add this one
    	if( headerLines.size() > 0 )
    		headerLines.set(headerLines.size() - 1, colHeaderLine.getMergedData("\t"));
    	else
    		headerLines.add(colHeaderLine.getMergedData("\t"));

    	// Modify the metadata info about the columns (these need to be inserted in the correct order and match each column)
    	List<ColumnMetaData> meta = history.getMetaData().getColumns();
    	meta.addAll( 0, Arrays.asList(
    			new ColumnMetaData("CHROM", Type.String, "1", "Chromosome"),
    			new ColumnMetaData("POS", 	Type.String, "1", "Position"),
    			new ColumnMetaData("ID", 	Type.String, "1", "Id or rsId"),
    			new ColumnMetaData("REF", 	Type.String, "1", "Reference Allele"),
    			new ColumnMetaData("ALT", 	Type.String, ".", "Alternate Alleles"),
    			new ColumnMetaData("QUAL", 	Type.String, "1", "Quality"),
    			new ColumnMetaData("FILTER",Type.String, "1", "Filter"),
    			new ColumnMetaData("INFO", 	Type.String, ".", "Info")
    			) );
    }

    
	private void addTopMostHeaders(History history) {
		List<String> headers = history.getMetaData().getOriginalHeader();
		
    	// Add the top-most headers if they don't already exist:
		// "##fileformat" must be the first line in the file
		// NOTE: Once we are ready to move to VCFv4.2, we should add "Source" and "Version" fields to the ##INFO rows
    	if( ! isHeaderExists("##fileformat", history) )
    		headers.add(0, "##fileformat=VCFv4.1");

    	// Insert ##fileDate after ##fileformat
    	if( ! isHeaderExists("##fileDate", history) )
    		insertAfter("##fileDate=" + /*Today's date=*/ new SimpleDateFormat("yyyyMMdd").format(new Date()),  "##fileformat",  history);
    	
    	// Insert ##source after ##fileDate
    	if( ! isHeaderExists("##source", history) ) 
    		insertAfter("##source=bior_tjson_to_vcf",  "##fileDate",  history);
	}

	private void insertAfter(String headerLine, String keyToInsertAfter, History history) {
		int lineOfKeyToInsertAfter = getHeaderRowIndex(keyToInsertAfter, history);
		history.getMetaData().getOriginalHeader().add(lineOfKeyToInsertAfter+1, headerLine);
	}
	
	/** Does the key in headerLine already exist in the header (ex: "fileformat" or "source") */
	protected boolean isHeaderExists(String headerKey, History history) {
		return -1 != getHeaderRowIndex(headerKey, history);
	}
	
	
	/** Get the row # within the header for the row that starts with <code>headerKey</code>, followed by an "=".  If the key is not found, return -1 */
	private int getHeaderRowIndex(String headerKey, History history) {
		List<String> headers = history.getMetaData().getOriginalHeader();
		for(int i=0; i < headers.size(); i++) {
			int idxEq = headers.get(i).indexOf("=");
			if( idxEq == -1 )
				continue;
			String key = headers.get(i).substring(0, idxEq);
			if( headerKey.equalsIgnoreCase(key) )
				return i;
		}
		// Not found in the header, so return -1
		return -1;
	}


    /**
     * for a header line e.g. #CHROM, remove all bior/annotation columns.
     *
     * @param metaData
     * @param biorindexes2
     * @return
     */
    public HistoryMetaData removeColumnHeader(HistoryMetaData metaData, Map<Integer, String> biorindexes2) {
        List<ColumnMetaData> columns = metaData.getColumns();
        List<Integer> indexes = new ArrayList<Integer>(biorindexes2.keySet());
        Collections.sort(indexes);    // 8 9 10 ...
        Collections.reverse(indexes); // 10 9 8 ...
        for (int j : indexes) {
            ColumnMetaData cmd = columns.get(j);
            //System.err.println(cmd.getColumnName());
            columns.remove(cmd);
        }
        return metaData;
    }

}
