/**
 * bior_pipeline
 *
 * <p>@author Gregory Dougherty</p>
 * Copyright Mayo Clinic, 2014
 *
 */
package edu.mayo.bior.util;

import static edu.mayo.bior.pipeline.Variant2JSONPipe.kLookupFile;
import static edu.mayo.bior.pipeline.Variant2JSONPipe.kLookupIndexFile;
import static edu.mayo.bior.util.CatalogUtils.kOnlyUnique;
import static edu.mayo.bior.util.CatalogUtils.kReturnAllNames;
import static edu.mayo.bior.util.CatalogUtils.kHumanNameCol;
import static edu.mayo.bior.util.CatalogUtils.kMachineNameCol;
import static edu.mayo.bior.util.FieldProcessor.kBlank;
import static edu.mayo.bior.pipeline.Treat.SplitFile.kReturnAll;
import com.tinkerpop.pipes.Pipe;
import com.tinkerpop.pipes.util.Pipeline;
import edu.mayo.bior.pipeline.Variant2JSONPipe;
import edu.mayo.bior.pipeline.Treat.SplitFile;
import edu.mayo.bior.pipes.history.MakeFirstLineHeaderPipe;
import edu.mayo.pipes.JSON.DrillPipe;
import edu.mayo.pipes.JSON.lookup.LookupPipe;
import edu.mayo.pipes.JSON.tabix.OverlapPipe;
import edu.mayo.pipes.JSON.tabix.SameVariantPipe;
import edu.mayo.pipes.history.History;
import edu.mayo.pipes.history.HistoryInPipe;
import edu.mayo.pipes.iterators.Compressor;
import edu.mayo.pipes.util.metadata.Metadata;
import java.io.*;
import java.util.*;


/**
 * Class to allow you to pass in an int that can be modified and returned by the recipient
 *
 * <p>@author Gregory Dougherty</p>
 */
class ModInt
{
	int	value;
}


/**
 * Class to implement the BioR annotation for R
 *
 * <p>@author Gregory Dougherty</p>
 */
public class BioR
{
	private static List<String>				errorStrings = new ArrayList<String> ();
	private static Map<String, String>		gHeadersToDrills = new HashMap<String, String> ();
	private static DefaultProcessor			gDefaultProcessor = null;
	private static List<String>				gOverlapPaths = new ArrayList<String> ();
	/** Map from catalog path to lookup field */
	private static Map<String, String[]>	gLookupDrill = new HashMap<String, String[]> ();
	/** Map from lookup catalog path to associated overlap catalog path */
	private static Map<String, String>		gLookupCatalogs = new HashMap<String, String> ();
	/** Map from lookup catalog path to associated map of index paths to index files */
	private static Map<String, Map<String, String>>	gLookupIndexes = new HashMap<String, Map<String, String>> ();
	private static Map<String, FieldProcessor>		gFieldProcessorMap = new HashMap<String, FieldProcessor> ();
	
//	private static final String		kNCBIFile = "/dbSNP/142/00-All_GRCh37.tsv.bgz";
//	private static final String		kNCBIFileIndex = "/dbSNP/142/index/00-All_GRCh37.ID.idx.h2.db";
	private static final String[]	kBaseDrill = {"_landmark", "_minBP", "_refAllele", "_altAlleles"};
	private static final String[]	kBaseHeader = {"bior.chrom", "bior.startPos", "Ref", "Alt"};
	private static final String[]	kCosmicDrill = {"Mutation_ID", "Mutation_AA", "Mutation_CDS", "Mutation_GRCh37_strand"};
	private static final String[]	kCosmicHeader = {"Cosmic.ID", "Cosmic.Amino.Acid.Change", "Cosmic.Change", "Cosmic.Strand"};
	private static final String[]	kNCBIDrill = {"INFO.RS", "INFO.dbSNPBuildID", "INFO.SAO", "INFO.SSR"};
	private static final String[]	kNCBIHeader = {"dbSNP.rsID", "dbSNP.build", "dbSNP.SNP.Allele.Origin", "dbSNP.Suspect.Region"};
	private static final String[]	kGenomeDrill = {"INFO.AFR_AF", "INFO.AMR_AF", "INFO.EAS_AF", "INFO.EUR_AF", "INFO.SAS_AF"};
	private static final String[]	kGenomeHeader = {"kGenomes.AFR.MAF", "kGenomes.AMR.MAF", "kGenomes.EAS.MAF", "kGenomes.EUR.MAF", "kGenomes.SAS.MAF"};
	private static final String[]	kHapMapDrill = {"ASW.otherallele_freq", "CEU.otherallele_freq", "CHB.otherallele_freq", "CHD.otherallele_freq", 
	                             	                "GIH.otherallele_freq", "JPT.otherallele_freq", "LWK.otherallele_freq", "MEX.otherallele_freq", 
	                             	                "MKK.otherallele_freq", "TSI.otherallele_freq", "YRI.otherallele_freq"};
	private static final String[]	kHapMapHeader = {"HapMap.ASW.MAF", "HapMap.CEU.MAF", "HapMap.CHB.MAF", "HapMap.CHD.MAF", "HapMap.GIH.MAF", 
	                             	                 "HapMap.JPT.MAF", "HapMap.LWK.MAF", "HapMap.MEX.MAF","HapMap.MKK.MAF", "HapMap.TSI.MAF", 
	                             	                 "HapMap.YRI.MAF"};
	private static final String[]	kHGNCDrill = {"name", "symbol", "ensembl_gene_id", "entrez_id", "hgnc_id"};
	private static final String[]	kHGNCHeader = {"HGNC.Approved.Gene.Name", "HGNC.Approved.Gene.Symbol", "HGNC.Ensembl.Gene.ID", 
	                             	               "HGNC.Entrez.Gene.ID", "HGNC.ID"};
	private static final String[]	kBGIDrill = {"estimated_minor_allele_freq"};
	private static final String[]	kBGIHeader = {"BGI200.Danish.MAF"};
	private static final String[]	kESPDrill = {"AA._maf", "EA._maf"};
	private static final String[]	kESPHeader = {"ESP6500.AFR.MAF", "ESP6500.EUR.MAF"};
	private static final String[]	kMirBaseDrill = {"ID"};
	private static final String[]	kMirBaseHeader = {"mirBase.ID"};
	private static final String[]	kOMIMDrill = {"name"};
	private static final String[]	kOMIMHeader = {"OMIM.Disease"};
	private static final String[]	kUCSCBRDrill = {"name", "score"};
	private static final String[]	kUCSCBRHeader = {"UCSCBR.Name", "UCSCBR.Score"};
	
	private static final int			kHeaders = 0;
	private static final int			kDrills = kHeaders + 1;
	private static final String[][][]	kHeaderDrillPairs = {{kBaseHeader, kBaseDrill}, {kCosmicHeader, kCosmicDrill}, {kNCBIHeader, kNCBIDrill}, 
	                                 	                     {kGenomeHeader, kGenomeDrill}, {kHapMapHeader, kHapMapDrill}, {kHGNCHeader, kHGNCDrill}, 
	                                 	                     {kBGIHeader, kBGIDrill}, {kESPHeader, kESPDrill}, {kMirBaseHeader, kMirBaseDrill}, 
	                                 	                     {kOMIMHeader, kOMIMDrill}, {kUCSCBRHeader, kUCSCBRDrill}};
	
	private static final String[]	kDbSnpSuspectLookup = {"unspecified", "Paralog", "byEST", "Para_EST", "oldAlign", "other"};
	protected static final String[]	kDbSnpClinicalLookup = {"unknown", "untested", "non-pathogenic", "probable-non-pathogenic", 
	                             	                        "probable-pathogenic", "pathogenic", "drug-response", 
	                             	                        "histocompatibility", "other"};
	private static final String[]	kDbSnpAlleleLookup = {"unspecified", "Germline", "Somatic", "Both", "not-tested", 
	                             	                      "tested-inconclusive", "other"};
	
	private static final int	kNCBIRsID = 0;
	private static final int	kNCBIBuildID = kNCBIRsID + 1;
	private static final int	kNCBIAlleleOrigin = kNCBIBuildID + 1;
	private static final int	kNCBISuspect = kNCBIAlleleOrigin + 1;
	
	private static final int	kChrom = 0;
	private static final int	kStartPos = kChrom + 1;
	private static final int	kRef = kStartPos + 1;
	private static final int	kAlt = kRef + 1;
	
	private static final int	kMinFieldSize = 2;	// must have name and at least one field, or no point
	
	private static final int	kInFile = 0;
	private static final int	kBuildID = kInFile + 1;
	private static final int	kCatalogFile = kBuildID + 1;
	private static final int	kFieldsFile = kCatalogFile + 1;
	private static final int	kNumParams = kFieldsFile + 1;
	
	private static final int	kName = 0;
	private static final int	kPath = kName + 1;
	private static final int	kIsOverlap = kPath + 1;
	private static final int	kNumCatCols = kIsOverlap + 1;
	private static final boolean	kKeepOld = true;
	private static final boolean	kUseNewest = false;
	private static final String[]	kFindStrs = {" ", "_"};
	private static final String[]	kReplaceStrs = {".", "."};
	private static final int		kNoHeader = 0;
	private static final String		kFileBase = "/data2/bsi/app_data/GAIA/";
	protected static final String	kCategoryPathFile = kFileBase + "BioR/Categories.txt";
	private static final String		kCategoryNameFile = kFileBase + "BioR/CategoryNames.txt";
	
	/**
	 * Initialize all needed maps, etc
	 */
	private static void init ()
	{
		gHeadersToDrills.clear ();
		for (String[][] pairs : kHeaderDrillPairs)
			addEntries (gHeadersToDrills, pairs[kHeaders], pairs[kDrills], kUseNewest);
		
		
		gFieldProcessorMap.clear ();
		FieldProcessor	genomesProcessor = new TextCleaner ("[", "]");
		for (String field : kGenomeHeader)
			gFieldProcessorMap.put (field, genomesProcessor);
		
		
		// Clean up Alt, making it normal, possibly delimited, strings
		FieldProcessor	altProcessor = new TextCleaner ("[\"", "\",\"", ":", "\"]");
		gFieldProcessorMap.put (kBaseHeader[kAlt], altProcessor);
		
		
		gFieldProcessorMap.put (kNCBIHeader[kNCBIRsID], new TextWrapper ("rs", null));
		gFieldProcessorMap.put (kNCBIHeader[kNCBIAlleleOrigin], new LookupProcessor (kDbSnpAlleleLookup));
//		gFieldProcessorMap.put (kNCBIHeader[kNCBIClincal], new LookupProcessor (kDbSnpClinicalLookup));
		gFieldProcessorMap.put (kNCBIHeader[kNCBISuspect], new LookupProcessor (kDbSnpSuspectLookup));
		
		
		// Return empty string as NA
		gDefaultProcessor = new DefaultProcessor ();
		gDefaultProcessor.addLocalBlank ("");
		
		gLookupIndexes.clear ();
		gLookupCatalogs.clear ();
		gLookupDrill.clear ();
		gOverlapPaths.clear ();
		errorStrings.clear ();
	}
	
	
	/**
	 * Add entries to {@code headerMap}, where {@code headers} provides the key and {@code drills} 
	 * provides the value
	 * 
	 * @param headerMap	Map to add to.  Must be valid
	 * @param headers	Array of key values
	 * @param drills	Array of value values.
	 */
	private static void addEntries (Map<String, String> headerMap, String[] headers, String[] drills, boolean keepOld)
	{
		int	len = Math.min (headers.length, drills.length);
		
		for (int i = 0; i < len; ++i)
		{
			String	header = headers[i];
			String	newest = drills[i];
			String	old = headerMap.put (header, newest);
			
			if (old != null)
			{
				FieldProcessor	proc;
				
				if (keepOld)
					headerMap.put (header, old);
				else if ((proc = gFieldProcessorMap.remove (old)) != null)
					gFieldProcessorMap.put (newest, proc);
			}
		}
	}
	
	
	/**
	 * Command line interface to annotate.  Not implemented yet
	 * 
	 * @param args	The arguments: inputFile, build, catalogFile, fieldsFile
	 */
	public static void main (String[] args)
	{
		if (args.length < kNumParams)
		{
			System.err.println ("Must have inFile, build, catalogFile and fieldsFile");
			System.exit (1);
		}
		
		File	inFile = new File (args[kInFile]);
		String	build = args[kBuildID];
		File	catFile = new File (args[kCatalogFile]);
		File	fieldsFile = new File (args[kFieldsFile]);
		
		testFile (inFile, "inFile");
		testFile (catFile, "catalogFile");
		testFile (fieldsFile, "fieldsFile");
		
		String[][]	data = loadFile (inFile);
		String[][]	fields = loadLines (fieldsFile);
		Catalog[]	catalogs = loadCatalogs (catFile);
		String[][]	results = annotate (data, build, catalogs, fields);
		int			numCols = results.length;
		
		if (numCols == 0)
		{
			System.err.println ("No annotation results");
			System.exit (1);
		}
		
		int	numLines = results[0].length;
		
		for (int i = 0; i < numLines; ++i)
		{
			boolean	first = true;
			
			for (int j = 0; j < numCols; ++j)
			{
				if (first)
					first = false;
				else
					System.out.print ("\t");
				
				System.out.print (results[j][i]);
			}
			
			System.out.println ();
		}
	}
	
	
	/**
	 * Annotate the passed in data with the passed in fields from the passed in catalogs
	 * 
	 * @param data		Data to be annotated.  First element of each array gives name, rest gives the data.
	 * Each array must be same length as others, or will annotate only up to shortest array passed in
	 * @param build		Genome build.  Will verify that all catalogs are from that build, or else fail
	 * @param catalogs	Catalog names, locations, and how to use them
	 * @param fields	List of annotations desired.  First element of each array gives catalog name, 
	 * rest of elements give the desired annotations from that catalog.  Each array can have own length
	 * @return	2D array of results, one column per array.  First element of each array is name of column, 
	 * rest is results.  All columns will be same length.
	 */
	public static String[][] annotate (String[][] data, String build, Catalog[] catalogs, String[][] fields)
	{
		try
		{
			init ();
			build = CatalogUtils.getBuild (build);	// Clean any extraneous data from build
			
			List<String>			paths = new ArrayList<String> ();
			List<String[]>			drillPaths = new ArrayList<String[]> ();
			List<String[]>			drillHeaders = new ArrayList<String[]> ();
			
			String[][]	failure = getDrillAndHeaders (build, catalogs, fields, paths, drillPaths, drillHeaders);
			if (failure != null)
				return failure;
			
			Metadata					metadata = new Metadata ("bior.annotate");
			Pipe<String, String>		addHeader = new MakeFirstLineHeaderPipe ();
			Pipe<String, History>		setup = new HistoryInPipe (metadata);
			String[]					lookupFiles = Variant2JSONPipe.getLookupFiles (build);
			String						lookupFile = lookupFiles[kLookupFile];
			String						lookupIndexFile = lookupFiles[kLookupIndexFile];
			LookupPipe					lookup = new LookupPipe (lookupFile, lookupIndexFile);
			Variant2JSONPipe			logic = new Variant2JSONPipe (lookup, null);
			Pipeline<String, History>	preLogic = new Pipeline<String, History> (addHeader, setup);
			Pipeline<History, History>	annotate = makeFindPipes (paths, logic, true);
			Pipeline<History, History>	postLogic = makeDrillPipes (drillPaths);
			Pipeline<String, History>	thePipe = new Pipeline<String, History> (preLogic, annotate, postLogic);
			List<String>				input = getAsList (data);
			
			thePipe.setStarts (input);
			
			List<History>	actual = getResults (thePipe);
			return dumpHistory (actual, drillHeaders, kBaseHeader);
		}
		catch (Throwable oops)
		{
			return returnFailure ("Exception", stackTraceToStrArray (oops));
		}
	}
	
	
	/**
	 * Load information about a catalog file and return it
	 * 
	 * @param path	Path to the catalog file of interest
	 * @return	{@linkplain CatalogInfo} with the information about the catalog, or null if there 
	 * was a problem
	 */
	public static CatalogInfo getCatalogInfo (String path)
	{
		Properties	catProperties = CatalogUtils.getCatalogProperties (path);
		
		if (catProperties == null)
			return null;
		
		try
		{
			Map<String, String>	catNames = CatalogUtils.getStringToStringMap (kCategoryNameFile);
			String				name = CatalogUtils.getUniqueName (catProperties);
			String				humanName = CatalogUtils.getHumanName (catProperties, catNames, name);
			String				build = CatalogUtils.getBuild (catProperties);
			String				source = CatalogUtils.getSource (catProperties);
			String				description = CatalogUtils.getDescription (catProperties);
			String				version = CatalogUtils.getVersion (catProperties);
			String				biorType = CatalogUtils.getBioRType (path);
			boolean				mustOverlap = CatalogUtils.kOverlap.equals (biorType);
			String[][]			keywords = CatalogUtils.getCatalogKeywords (path, kReturnAllNames);
			
			if (keywords == null)
				return null;
			
			String[]	machineNames = keywords[kMachineNameCol];
			String[]	humanNames = keywords[kHumanNameCol];
			
			return new CatalogInfo (name, humanName, build, source, description, version, path, mustOverlap, machineNames, humanNames);
		}
		catch (IOException oops)
		{
			oops.printStackTrace ();
			return null;
		}
	}
	
	
	/**
	 * Given {@code build}, {@code catalogs}, and {@code fields}, compute the {@code paths}, 
	 * {@code drillPaths}, and {@code drillHeaders}
	 * 
	 * @param build			Genome build, either GRCh37 or GRCh38.  Will verify that all catalogs 
	 * are from that build, or else fail
	 * @param catalogs		Catalog names, locations, and how to use them
	 * @param fields		List of annotations desired.  First element of each array gives catalog name, 
	 * rest of elements give the desired annotations from that catalog.  Each array can have own length
	 * @param paths			List to which we will add paths to all the catalogs being used
	 * @param drillPaths	List to which we will add all the JSON drill paths to be used
	 * @param drillHeaders	List to which we will add the headers to be dumped to output for each field
	 * @return	Null, or an error message if failed
	 * @throws IOException
	 */
	private static String[][] getDrillAndHeaders (String build, Catalog[] catalogs, String[][] fields, List<String> paths, 
												  List<String[]> drillPaths, List<String[]> drillHeaders)
			throws IOException
	{
		List<String>			using = new ArrayList<String> ();
		Set<String>				needed = getNthRow (fields, 0, kMinFieldSize);
		
		drillHeaders.add (kBaseHeader);
		for (Catalog catalog : catalogs)
		{
			String	name = catalog.getName ();
			if (needed.contains (name))
			{
				String	path = catalog.getPath ();
				
				if (!(new File (path)).exists ())
					throw new FileNotFoundException ("Catalog could not be found: " + path);
				
				paths.add (path);
				if (catalog.isOverlap ())
					gOverlapPaths.add (path);
				
				String	catBuild = addCatalogEntries (name, path);
				if (!build.equalsIgnoreCase (catBuild))
					return returnFailure ("Builds don't match", makeBuildError (build, catBuild, catalog));
				
				using.add (name);
			}
		}
		
		Map<String, String[]>	headers = getHeaderMap (fields);
		Map<String, String[]>	drills = getDrillMap (fields);
		
		for (String name : using)
		{
			drillPaths.add (drills.get (name));
			drillHeaders.add (headers.get (name));
		}
		
		drillPaths.add (kBaseDrill);
		
		return null;
	}
	
	
	/**
	 * Package up an error into a 2d array of strings
	 * 
	 * @param errorTitle		Type of error we're returning
	 * @param errorDescription	Description of the error
	 * @return	2D array of strings holding the passed in information, and any information that 
	 * was in {@code errorStrings}
	 */
	private static String[][] returnFailure (String errorTitle, String[] errorDescription)
	{
		int			numStrs = errorStrings.isEmpty () ? 2 : 3;
	    String[][]	results = new String[numStrs][];
	    
	    results[0] = new String[1];
	    results[0][0] = errorTitle;
	    results[1] = errorDescription;
	    
	    if (numStrs > 2)
		    results[2] = errorStrings.toArray (new String[errorStrings.size ()]);
	    
		return results;
	}
	
	
	/**
	 * Build an array of strings describing a build match failure
	 * 
	 * @param build		Build we're supposed to have
	 * @param catBuild	Build of the current catalog
	 * @param catalog	Information about that catalog
	 * @return	Array of Strings
	 */
	private static String[] makeBuildError (String build, String catBuild, Catalog catalog)
	{
		List<String>	results = new ArrayList<String> ();
		
		results.add ("Catalog");
		results.add (catalog.getName ());
		results.add ("at location");
		results.add (catalog.getPath ());
		results.add ("is supposed to be build");
		results.add (build);
		results.add ("but is actually build");
		results.add (catBuild);
		
		return results.toArray (new String[results.size ()]);
	}
	
	
	/**
	 * Take a {@linkplain Throwable}, extract its stack track, and return it as a String[]
	 * 
	 * @param oops	{@linkplain Throwable} to get stack trace for, must be valid
	 * @return	String[]
	 */
	private static String[] stackTraceToStrArray (Throwable oops)
	{
		StackTraceElement[]	stack = oops.getStackTrace ();
	    String[]			exception = new String[stack.length];
	    int					which = 0;
	    
	    for (StackTraceElement s : stack)
	    {
	        exception[which] = s.toString ();
	        ++which;
	    }
	    
		return exception;
	}
	
	
	/**
	 * Add any human name : machine readable name pairs to {@code gHeadersToDrills}
	 *  
	 * @param name	Unique catalog name, suffix for both the human and machine names
	 * @param path	Path to the catalog file.  Nothing is done unless it has a .columns.tsv file
	 * @return	String specifying the Genome Build of the catalog
	 */
	private static String addCatalogEntries (String name, String path)
	{
		return addCatalogEntries (name, path, gHeadersToDrills, kOnlyUnique);
	}
	
	
	/**
	 * Add any human name : machine readable name pairs to {@code headerMap}
	 *  
	 * @param name				Unique catalog name, suffix for both the human and machine names
	 * @param path				Path to the catalog file.  Nothing is done unless it has a .columns.tsv file
	 * @param headerMap			Map to add to.  Must be valid
	 * @param onlyUniqueHuman	Only return those keywords that have human names different from the 
	 * machine names
	 * @return	String specifying the Genome Build of the catalog
	 */
	private static String addCatalogEntries (String name, String path, Map<String, String> headerMap, boolean onlyUniqueHuman)
	{
		String	build = CatalogUtils.getCatalogBuild (path);
		if (build == null)
			return null;
		
		String[][]	keywords = CatalogUtils.getCatalogKeywords (path, onlyUniqueHuman);
		
		if (keywords == null)
			return build;
		
		StringBuffer	nameBuilder = new StringBuffer (100);
		String[]		headerNames = keywords[kHumanNameCol];
		String[]		drillNames = keywords[kMachineNameCol];
		int				nameLen = name.length () + 1;
		int				numNames = Math.min (headerNames.length, drillNames.length);
		
		nameBuilder.append (name);
		nameBuilder.append ('.');
		
		for (int i = 0; i < numNames; ++i)
		{
			nameBuilder.setLength (nameLen);	// Trim off anything extra
			appendWithChanges (headerNames[i], kFindStrs, kReplaceStrs, nameBuilder);
			headerNames[i] = nameBuilder.toString ();
			
			nameBuilder.setLength (nameLen);
			nameBuilder.append (drillNames[i]);
			drillNames[i] = nameBuilder.toString ();
		}
		
		addEntries (headerMap, headerNames, drillNames, kKeepOld);
		return build;
	}
	
	
	/**
	 * Append a {@linkplain String} to a {@linkplain StringBuffer}, replacing any copies of 
	 * {@code find} with {@code replace}
	 * 
	 * @param theStr	String to append.  Must not be null
	 * @param find		Array of strings to find.  Must not be null
	 * @param replace	Array of strings to replace the matching {@code find} string with.  Must not be null
	 * @param theBuffer	{@linkplain StringBuffer} in which to add things
	 */
	private static void appendWithChanges (String theStr, String find[], String replace[], StringBuffer theBuffer)
	{
		ModInt	findLen = new ModInt ();
		ModInt	whichFind = new ModInt ();
		int		last = 0;
		int		pos = getNextMatch (theStr, last, find, findLen, whichFind);
		int		len = theStr.length ();
		
		while (pos >= 0)
		{
			theBuffer.append (theStr.substring (last, pos));
			theBuffer.append (replace[whichFind.value]);
			last = pos + findLen.value;
			pos = getNextMatch (theStr, last, find, findLen, whichFind);
		}
		
		if (last < len)
			theBuffer.append (theStr.substring (last, len));
	}


	/**
	 * Find the next occurrence of any of the strings in {@code find} and return where that is, 
	 * and its length
	 * 
	 * @param theStr	String to look through for matches, must not be null or of length <= {@code start}
	 * @param start		Where in {@code theStr} to start looking for matches
	 * @param find		Array of strings that might match.  Must not be null
	 * @param findLen	{@linkplain ModInt} to fill in with the length of the found string. 
	 * Must not be null
	 * @return	-1 if no match was found, else the position of the first match
	 */
	private static int getNextMatch (String theStr, int start, String[] find, ModInt findLen, ModInt whichFind)
	{
		int	found = -1;
		int	which = 0;
		
		for (String findStr : find)
		{
			int	pos = theStr.indexOf (findStr, start);
			
			if ((pos >= 0) && ((found < 0) || pos < found))
			{
				found = pos;
				findLen.value = findStr.length ();
				whichFind.value = which;
			}
			++which;
		}
		
		return found;
	}


	/**
	 * Make a Map holding the first entry of the array as keys, and the rest of the array as the values
	 * 
	 * @param fields	2D array of strings, first element is key for the rest
	 * @return	Map, possibly empty, never null.  All elements will have non-empty arrays
	 */
	private static Map<String, String[]> getHeaderMap (String[][] fields)
	{
		Map<String, String[]>	results = new HashMap<String, String[]> ();
		
		if (fields == null)
			return results;
		
		for (String[] catalog : fields)
		{
			int	len;
			if ((catalog == null) || ((len = catalog.length) < kMinFieldSize))
				continue;
			
			--len;
			String			key = catalog[0];
			String[]		value = new String[len];
			StringBuilder	copier = new StringBuilder (key);
			int				baseLen;
			
			copier.append (".");
			baseLen = copier.length ();
			
			for (int i = 0; i < len; ++i)
			{
				copier.delete (baseLen, copier.length ());	// Clear any extra
				copier.append (catalog[i + 1]);
				value[i] = copier.toString ();
			}
			
			results.put (key, value);
		}
		
		return results;
	}
	
	
	/**
	 * Make a Map holding the first entry of the array as keys, and with the rest of the array 
	 * their values are mapped through headersToDrills to the values to add to the map 
	 * 
	 * @param fields	2D array of strings, first element is key for the rest
	 * @return	Map, possibly empty, never null.  All elements will have non-empty arrays
	 */
	private static Map<String, String[]> getDrillMap (String[][] fields)
	{
		Map<String, String[]>	results = new HashMap<String, String[]> ();
		
		if (fields == null)
			return results;
		
		for (String[] catalog : fields)
		{
			int	len;
			if ((catalog == null) || ((len = catalog.length) < kMinFieldSize))
				continue;
			
			--len;
			String			key = catalog[0];
			String[]		value = new String[len];
			StringBuilder	tester = new StringBuilder (key);
			int				baseLen;
			
			tester.append (".");
			baseLen = tester.length ();
			for (int i = 0; i < len; ++i)
			{
				String	test = catalog[i + 1];
				tester.delete (baseLen, tester.length ());	// Clear any extra
				tester.append (test);
				String	drill = gHeadersToDrills.get (tester.toString ());
				
				if (drill == null)
					drill = test;	// If unknown to us, just pass it through
				if (isEmpty (drill))
					value[i] = kBlank;
				else
					value[i] = drill;
			}
			
			results.put (key, value);
		}
		
		return results;
	}
	
	
	/**
	 * Make a set of Strings from the nth row of each array in {@code fields} whose length is 
	 * at least {@code minLen}
	 * 
	 * @param fields	2D array of Strings.  None can be null
	 * @return	Set of Strings, none will be null or empty
	 */
	private static Set<String> getNthRow (String[][] fields, int which, int minLen)
	{
		Set<String>	results = new HashSet<String> ();
		
		if (fields == null)
			return results;
		
		minLen = Math.max (minLen, which + 1);
		for (String[] row : fields)
		{
			if (row.length >= minLen)
			{
				String	cell = row[which];
				
				if (!isEmpty (cell))
					results.add (cell);
			}
		}
		
		return results;
	}


	/**
	 * Turn a 2D array of Strings into a List of Strings, where the nth List element is the nth 
	 * element from each array.<br/>
	 * Will have as many List elements as the smallest array in data
	 * 
	 * @param data	2D array of Strings.  None can be null.  Any null or empty strings will be 
	 * replaced with "."
	 * @return	List of Strings
	 */
	private static List<String> getAsList (String[][] data)
	{
		int				numRows = getMinNumRows (data);
		List<String>	results = new ArrayList<String> (numRows);
		StringBuilder	hold = new StringBuilder (200);
		
		for (int i = 0; i < numRows; ++i)
		{
			boolean			first = true;
			StringBuilder	theRow = new StringBuilder (200);
			
			for (String[] row : data)
			{
				String	cell = row[i];
				
				if (isEmpty (cell))	// If all the last cols are empty, don't add
				{
					if (first)
						first = false;
					else
						hold.append ("\t");
					
					hold.append (kBlank);
				}
				else
				{
					if (first)
						first = false;
					else
					{
						int	holdLen = hold.length ();
						if (holdLen > 0)	// Have empties before non-empty, add them so cols correct
						{
							theRow.append (hold);
							hold.delete (0, holdLen);
						}
						theRow.append ("\t");
					}
					
					theRow.append (cell);
				}
			}
			
			results.add (theRow.toString ());
			hold.delete (0, hold.length ());
		}
		
		return results;
	}
	
	
	/**
	 * Get the length of the shortest array in data
	 * 
	 * @param data	2D array of Strings.  None can be null
	 * @return	Length of shortest array
	 */
	private static int getMinNumRows (String[][] data)
	{
		if (data.length == 0)
			return 0;
		
		int	len = data[0].length;
		
		for (String[] anArray : data)
			len = Math.min (len, anArray.length);
		
		return len;
	}
	
	
	/**
	 * Dump the contents of a String[][] to stdout
	 * 
	 * @param results	Array to dump
	 */
	public static void dump (String[][] results)
	{
		dump (results, "\t", System.out);
	}
	
	
	/**
	 * Dump the contents of a String[][] to stdout
	 * 
	 * @param results	Array to dump
	 * @param delimiter	String to use as a delimiter, if null will use "\t"
	 * @param out		Where to send output, if null will use System.err
	 */
	public static void dump (String[][] results, String delimiter, PrintStream out)
	{
		if (out == null)
			out = System.err;
		
		if (results == null)
		{
			out.println ("null");
			return;
		}
		
		int	numCols = results.length;
		if (results.length == 0)
		{
			out.println ("empty");
			return;
		}
		
		if (delimiter == null)
			delimiter = "\t";
		
		int	numRows = results[0].length;
		for (int i = 0; i < numRows; ++i)
		{
			boolean	first = true;
			
			for (int j = 0; j < numCols; ++j)
			{
				String	col = results[j][i];
				
				if (first)
					first = false;
				else
					out.print (delimiter);
				
				out.print (col);
			}
			
			out.println ();
		}
	}
	
	
	/**
	 * Parse a List of {@linkplain History} into String[][], using the passed in headers as well as the 
	 * History's "Original Headers"
	 * 
	 * @param theHistory	List of History to parse.  Must not be null, if empty will return 
	 * empty String[][]
	 * @param addedHeaders	Headers added for the annotations, in the order of the annotations
	 * @param lastToFirst	Headers for items appearing at the end, which need to get moved starting 
	 * at {@code firstCol}.  Must be first in addedHeaders
	 * @return	2D array of results, one column per array.  First element of each array is name of column, 
	 * rest is results.  All columns will be same length.
	 */
	private static String[][] dumpHistory (List<History> theHistory, List<String[]> addedHeaders, String[] lastToFirst)
	{
		List<String[]>		arrays = new ArrayList<String[]> ();
		FieldProcessor[]	processors = null;
		int					numResults = theHistory.size ();
		
		if (numResults == 0)
			return new String[0][];
		
		int			numAddedCols = addHeader (theHistory.get (0), addedHeaders, arrays, numResults);
		int			numLast = (lastToFirst == null) ? 0 : lastToFirst.length;
		int			numCols = arrays.size ();
		int			firstCol = numCols - numAddedCols;
		int			which = 1;
		String[][]	results = arrays.toArray (new String[numCols][]);
		
		processors = getProcessors (firstCol, numCols, addedHeaders, lastToFirst);
		numCols -= numLast;	// Won't get these in the normal way
		numLast += numCols;	// Make this now the real end
		
		for (History history : theHistory)
		{
			int	pos = 0;
			for (int i = 0; i < firstCol; ++i)
			{
				results[pos][which] = processors[i].process (history.get (i));
				++pos;
			}
			
			for (int i = numCols; i < numLast; ++i)
			{
				results[pos][which] = processors[i].process (history.get (i));
				++pos;
			}
			
			for (int i = firstCol; i < numCols; ++i)
			{
				results[pos][which] = processors[i].process (history.get (i));
				++pos;
			}
			
			++which;
		}
		
		return results;
	}
	
	
	/**
	 * Determine the {@linkplain FieldProcessor} for each field
	 * 
	 * @param firstCol		First added column
	 * @param numCols		Size of returned array
	 * @param addedHeaders	Where to get the headers from to do the {@linkplain FieldProcessor} lookup
	 * @param lastToFirst	Headers for items appearing at the end, whose output starts at 
	 * {@code firstCol}, but which is actually at the end.  Must be first in addedHeaders
	 * @return	Array of length {@code numCols}, each position having a processor
	 */
	private static FieldProcessor[] getProcessors (int firstCol, int numCols, List<String[]> addedHeaders, String[] lastToFirst)
	{
		FieldProcessor[]	processors = new FieldProcessor[numCols];
		int					i;
		
		for (i = 0; i < firstCol; ++i)
			processors[i] = gDefaultProcessor;
		
		for (String[] headers : addedHeaders)
		{
			if (headers == lastToFirst)
				continue;
			
			for (String header : headers)
			{
				FieldProcessor	processor = gFieldProcessorMap.get (header);
				if (processor == null)
					processor = gDefaultProcessor;
				
				processors[i] = processor;
				++i;
			}
		}
		
		if (lastToFirst != null)
		{
			for (String header : lastToFirst)
			{
				FieldProcessor	processor = gFieldProcessorMap.get (header);
				if (processor == null)
					processor = gDefaultProcessor;
				
				processors[i] = processor;
				++i;
			}
		}
		
		return processors;
	}
	
	
	/**
	 * Add history headers to {@code results}, using {@code outLine} as scratch space
	 * 
	 * @param history		{@linkplain History} object holding the headers to start with
	 * @param addedHeaders	{@linkplain List} of String[] holding all the other headers to add
	 * @param results		On entry: Empty {@linkplain List} of String[].  
	 * On return: One array per column, arrays set to output length, 0th element with header
	 * @param numResults	Size of output data.  Will make every array in {@code results} length 
	 * {@code numResults} + 1
	 * @return	Number of header lines added from addedHeaders
	 */
	private static int addHeader (History history, List<String[]> addedHeaders, List<String[]> results, int numResults)
	{
		List<String>	headers = history.getMetaData ().getOriginalHeader ();
		String			headerLine = null;
		int				size = headers.size ();
		
		// Ignore all but last line of headers, rest is irrelevant to us
		++numResults;
		if (size > 0)
		{
			headerLine = headers.get (size - 1);
			for (String theHeader : SplitFile.mySplit (headerLine, "\t", kReturnAll))
			{
				String[]	column = new String[numResults];
				
				column[0] = theHeader;
				results.add (column);
			}
		}
		
		int	numAddedCols = 0;
		
		for (String[] theHeaders : addedHeaders)
		{
			numAddedCols += theHeaders.length;
			for (String theHeader : theHeaders)
			{
				String[]	column = new String[numResults];
				
				column[0] = theHeader;
				results.add (column);
			}
		}
		
		return numAddedCols;
	}
	
	
	/**
	 * Create a {@linkplain History} to {@linkplain History} {@linkplain Pipeline} from a List of paths as Strings
	 * 
	 * @param paths			{@linkplain List} of paths to database files
	 * @param start			{@linkplain Pipe} from History to History to go at beginning of Pipeline, or null
	 * @param reverseList	If true, add files as pipes in reverse order, so parse order matches path order
	 * @return	{@linkplain History} to {@linkplain History} {@linkplain Pipeline}, possibly empty, never null
	 * @throws IOException 
	 */
	private static Pipeline<History, History> makeFindPipes (List<String> paths, Pipe<History, History> start, boolean reverseList) throws IOException
	{
		Pipeline<History, History>	annotate = new Pipeline<History, History> ();
		
		if (start != null)
			annotate.addPipe (start);
		
		if (paths == null)
			return annotate;
		
		if (reverseList)
		{
			int				numPaths = paths.size ();
			List<String>	hold = new ArrayList<String> (numPaths);
			for (int i = numPaths - 1; i >= 0; --i)
				hold.add (paths.get (i));
			
			paths = hold;
		}
		
		int	historyPosition = -1;
		for (String path : paths)
		{
			if (gOverlapPaths.contains (path))
				annotate.addPipe (new OverlapPipe (path, historyPosition));
			else
				annotate.addPipe (new SameVariantPipe (path, false, historyPosition));
			--historyPosition;	// Each pipe adds to end of history, pushing target back one
		}
		
		return annotate;
	}
	
	
	/**
	 * Create a {@linkplain History} to {@linkplain History} {@linkplain Pipeline} from a List of 
	 * JSON "Paths" to drill for 
	 * 
	 * @param paths	{@linkplain List} of JSON paths of fields we're interested in, in reverse order 
	 * that their databases were added to the annotation logic
	 * @param end	{@linkplain Pipe} from History to String to go at end of Pipeline
	 * @return	{@linkplain History} to {@linkplain History} {@linkplain Pipeline}, possibly empty, never null
	 * @throws IOException 
	 */
	private static Pipeline<History, History> makeDrillPipes (List<String[]> paths) throws IOException
	{
		Pipeline<History, History>	driller = new Pipeline<History, History> ();
		
		if (paths != null)
		{
			int	drillColumn = -1;
			for (String[] drillSet : paths)
			{
				if (drillSet == null)	// Way to force a skip back
				{
					--drillColumn;
				}
				else
				{
					driller.addPipe (new DrillPipe (false, drillSet, drillColumn));
					drillColumn -= drillSet.length;
				}
			}
		}
		
		return driller;
	}
	
	
	/**
	 * Make sure a file exists, exiting with an error message if it doesn't
	 * 
	 * @param inFile	File to test
	 * @param title		Name of the file, use in the error message
	 */
	private static void testFile (File inFile, String title)
	{
		if (!inFile.exists ())
		{
			System.err.println (title + " doesn't exist: " + inFile.getAbsolutePath ());
			System.exit (1);
		}
	}
	
	
	/**
	 * Load a tab delimited file from disk, returning the columns of the file as arrays in a String[][]
	 * Assumes every line has same number of columns as the first line
	 * 
	 * @param inFile	File to process
	 * @return	2D array of Strings, the columns of the file in order
	 */
	public static String[][] loadFile (File inFile)
	{
		return loadFile (inFile, "\t", kNoHeader);
	}
	
	
	/**
	 * Load a tab delimited file from disk, returning the columns of the file as arrays in a String[][]
	 * Assumes every line has same number of columns as the first line
	 * 
	 * @param inFile	File to process
	 * @param skipLines	Number of lines to skip before start parsing
	 * @return	2D array of Strings, the columns of the file in order
	 */
	public static String[][] loadFile (File inFile, long skipLines)
	{
		return loadFile (inFile, "\t", skipLines);
	}
	
	
	/**
	 * Load a delimited file from disk, returning the columns of the file as arrays in a String[][]
	 * Assumes every line has same number of columns as the first line
	 * 
	 * @param inFile	File to process
	 * @param delimiter	Delimiter to use when processing the file
	 * @return	2D array of Strings, the columns of the file in order
	 */
	public static String[][] loadFile (File inFile, String delimiter)
	{
		return loadFile (inFile, delimiter, kNoHeader);
	}
	
	
	/**
	 * Load a delimited file from disk, returning the columns of the file as arrays in a String[][]
	 * Assumes every line has same number of columns as the first line
	 * 
	 * @param inFile	File to process
	 * @param delimiter	Delimiter to use when processing the file
	 * @param skipLines	Number of lines to skip before start parsing
	 * @return	2D array of Strings, the columns of the file in order
	 */
	public static String[][] loadFile (File inFile, String delimiter, long skipLines)
	{
		try
		{
			List<List<String>>	columns = new ArrayList<List<String>> ();
			Compressor			comp = new Compressor (inFile, null);
			BufferedReader		dataReader = comp.getReader ();
			String				line = dataReader.readLine ();
			
			while ((line != null) && (skipLines > 0))
			{
				--skipLines;
				line = dataReader.readLine ();
			}
			
			if (line == null)
				return new String[0][];
			
			String[]	cols = SplitFile.mySplit (line, delimiter, kReturnAll);
			int			numCols = cols.length;
			
			for (String column : cols)
			{
				List<String>	theColumn = new ArrayList<String> ();
				
				theColumn.add (column);
				columns.add (theColumn);
			}
			
			while ((line = dataReader.readLine ()) != null)
			{
				cols = SplitFile.mySplit (line, delimiter, kReturnAll);
				
//				if (cols.length < numCols)
//				{
//					System.err.print ("Bad line:" );
//					System.err.println (line);
//					continue;
//				}
				
				int	have = cols.length;
				for (int i = 0; i < numCols; ++i)
				{
					String			column = (i < have) ? cols[i] : null;
					List<String>	theColumn = columns.get (i);
					
					theColumn.add (column);
				}
			}
			
			String[][]	results = new String[numCols][];
			int			numLines = columns.get (0).size ();
			
			for (int i = 0; i < numCols; ++i)
				results[i] = columns.get (i).toArray (new String[numLines]);
			
			return results;
		}
		catch (IOException oops)
		{
			oops.printStackTrace ();
		}
		
		return null;
	}
	
	
	/**
	 * Load {@linkplain Catalog}s from a file
	 * 
	 * @param catFile	File that must exist
	 * @return	Array of {@linkplain Catalog}, one per line of the file
	 */
	public static Catalog[] loadCatalogs (File catFile)
	{
		return loadCatalogs (catFile, null);
	}
	
	
	/**
	 * Load {@linkplain Catalog}s from a file
	 * 
	 * @param catFile	File that must exist
	 * @param baseDir	Path to prepend to each catalog file path, must end with "/".  If null ignore
	 * @return	Array of {@linkplain Catalog}, one per line of the file
	 */
	public static Catalog[] loadCatalogs (File catFile, String baseDir)
	{
		try
		{
			List<Catalog>	results = new ArrayList<Catalog> ();
			Compressor		comp = new Compressor (catFile, null);
			BufferedReader	dataReader = comp.getReader ();
			String			line;
			
			while ((line = dataReader.readLine ()) != null)
			{
				// If it is a comment line, then skip
				if (line.startsWith ("#"))
					continue;
				
				String[]	cols = SplitFile.mySplit (line, "\t", kReturnAll);
				
				if (cols.length < kNumCatCols)
					continue;
				
				String	name = cols[kName];
				String	path = cols[kPath];
				boolean	overlap = Boolean.parseBoolean (cols[kIsOverlap]);
				
				if (baseDir != null)
					path = baseDir + path;
				
				results.add (new Catalog (name, path, overlap));
			}
			
			return results.toArray (new Catalog[results.size ()]);
		}
		catch (IOException oops)
		{
			oops.printStackTrace ();
		}
		
		return null;
	}
	
	
	/**
	 * Load lines from a file, split on \t
	 * 
	 * @param theFile	File that must exist
	 * @return	Array of Arrays of Strings, one per line of the file
	 */
	public static String[][] loadLines (File theFile)
	{
		return loadLines (theFile, "\t");
	}
	
	
	/**
	 * Load lines from a file, split on \t
	 * 
	 * @param theFile	File that must exist
	 * @param delimiter	Delimiter to use when processing the file
	 * @return	Array of Arrays of Strings, one per line of the file
	 */
	public static String[][] loadLines (File theFile, String delimiter)
	{
		try
		{
			List<String[]>	results = new ArrayList<String[]> ();
			Compressor		comp = new Compressor (theFile, null);
			BufferedReader	dataReader = comp.getReader ();
			String			line;
			
			while ((line = dataReader.readLine ()) != null)
			{
				String[]	cols = SplitFile.mySplit (line, delimiter, kReturnAll);
				
				results.add (cols);
			}
			
			return results.toArray (new String[results.size ()][]);
		}
		catch (IOException oops)
		{
			oops.printStackTrace ();
		}
		
		return null;
	}
	
	
	/**
	 * Take a {@linkplain Pipe} that emits {@linkplain History}, and run it to completion, 
	 * saving all its results to a List
	 * 
	 * @param pipe	Pipe to use.  If null will return an empty list
	 * @return	List of History, possibly empty, never null
	 */
	public static <T> List<History> getResults (Pipe<T, History> pipe)
	{
		List<History> results = new ArrayList<History> ();
		
		if (pipe != null)
		{
			while (pipe.hasNext ())
			{
				Object	obj = pipe.next ();
				
				if (obj instanceof History)
					results.add ((History) obj);
				else
					results.add (new History (obj.toString ()));
			}
		}
		
		return results;
	}
	
	
	/**
	 * Determine if a String is null or empty
	 * 
	 * @param test	String to test
	 * @return	True if {@code test} is null or empty, else false
	 */
	public static final boolean isEmpty (String test)
	{
		return ((test == null) || test.isEmpty ());
	}
	
	
	/**
	 * Add a string to report if we have an error
	 * 
	 * @param reportStr	String to add, will be ignored if null or empty
	 */
	public static final void report (String reportStr)
	{
		if (isEmpty (reportStr))
			return;
		
		errorStrings.add (reportStr);
	}
	
}
