package edu.mayo.bior.pipeline.createCatalogProps;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import com.google.gson.JsonElement;
import com.google.gson.JsonNull;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import edu.mayo.cli.InvalidDataException;
import com.tinkerpop.pipes.util.Pipeline;

import edu.mayo.bior.util.progress.NumLineProgressHandler;
import edu.mayo.pipes.UNIX.CatGZPipe;
import edu.mayo.pipes.UNIX.CatPipe;
import edu.mayo.pipes.history.ColumnMetaData;
import edu.mayo.pipes.history.ColumnMetaData.Type;

/** Crawls an existing catalog, grabbing all column names, and attempts to guess the type and number from all data in the file
 * @author Michael Meiners (m054457)
 * Date created: Aug 16, 2013
 */
public class ColumnMetaFromCatalogCrawling {

	private JsonParser mJsonParser = new JsonParser();
	
	
	/** NOTE: ONLY CHANGE THIS FOR TESTING LARGE ROW COUNTS (> Integer.MAX_VALUE)!!!!!!!!!!  */
	public static long NUM_LINES_START = 0;
	
	long numLines = NUM_LINES_START;
	
	public static void main(String[] args) {
		try {
			String a = "1.0";
			String[] strs = a.split(";");
			List<ColumnMetaData> colMeta = new ColumnMetaFromCatalogCrawling().getColumnMetadata("src/test/resources/example_dbSNP_catalog.tsv.bgz", null);
			System.out.println("Done.");
		}catch(Exception e) {
			e.printStackTrace();
		}
	}
	

	/** Process each line in the catalog bgzip (or tsv) file, noting the JSON path, field type, and count (will need to split by delimiters)
	 * @param catalogFilePath path to tsv or bgzip file
	 * @param progressHandler callback to handle progress callbacks, usually to display information to the user
	 * @return
	 * @throws IOException 
	 * @throws InvalidDataException 
	 */
	public List<ColumnMetaData> getColumnMetadata(String catalogFilePath, NumLineProgressHandler progressHandler)
		throws IOException, InvalidDataException {
		Map<String,ColumnTypeCounter> colMap = new LinkedHashMap<String, ColumnTypeCounter>();

		boolean isGzip = catalogFilePath.endsWith(".bgz") || catalogFilePath.endsWith(".gz");
		Pipeline pipe = new Pipeline( isGzip ? new CatGZPipe("gzip") : new CatPipe() );
		pipe.setStarts(Arrays.asList(new File(catalogFilePath).getCanonicalPath()));
		
		// Read each line from the catalog
		long numLinesToCallbackOn = -1;
		if (progressHandler != null)
		{
			progressHandler.startingRead();
			numLinesToCallbackOn = progressHandler.getNumLinesToCallbackOn();
		}
		while(pipe.hasNext()) {
			String row = (String)(pipe.next());
			
			// If this is a blank line or a header line, then skip
			if( row == null || row.length() == 0 || row.startsWith("#") )
				continue;
			
			// Split by tab - JSON will be the last column
			String[] cols = row.split("\t");
			JsonObject jsonObj = mJsonParser.parse(cols[cols.length-1]).getAsJsonObject();
			addJsonKeysToMap(jsonObj.getAsJsonObject(), null, colMap);
			
			// Print an indicator of # of lines read
			this.numLines++;
			if (numLinesToCallbackOn > 0)
			{
				if (this.numLines % numLinesToCallbackOn == 0)
				{
					progressHandler.readNumLines(this.numLines, -1);
				}
			}
		}
		if (progressHandler != null)
		{
			progressHandler.readAllLines(this.numLines);
		}

		// Pull out the ColumnMetaData objects and add them to a list
		List<ColumnMetaData> colMetaList = new ArrayList<ColumnMetaData>();
		for(String key : colMap.keySet()) {
			ColumnTypeCounter colType = colMap.get(key);
			if( colType != null ) {
				setTypeToStringIfNull(colType);
				setHumanReadableNameToColumnNameIfMissing(colType);
				colMetaList.add(colType.getColumnMetaData());
			}
		}
		
		return colMetaList;
	}
	

	
	private void setHumanReadableNameToColumnNameIfMissing(ColumnTypeCounter colType) {
		if( colType == null  ||  colType.getColumnMetaData() == null ) {
			return;
		}
		
		String humanReadableName = colType.getColumnMetaData().getHumanReadableName();
		if( humanReadableName == null  ||  humanReadableName.trim().length() == 0 ) {
			colType.getColumnMetaData().setHumanReadableName(colType.getColumnMetaData().getColumnName());
		}
			
	}


	// If the type is still null, that probably means it was always an empty array, so set it to type "String"
	private void setTypeToStringIfNull(ColumnTypeCounter colType) {
		if( null == colType.getColumnMetaData().getType() ) {
			colType.getColumnMetaData().setType(Type.String);
		}
	}


	/** Given a JSON string, pull out all keys.  Ex: {"CHR":1,"POS":111,"INFO":{"SAO":1.0,"SSR":2.0}}<br>
	 *  and values, and add them to the HashMap that maps columnName to ColumnTypeCounter object
	 * @param jsonStr The full JSON string to search
	 * @param parentJsonPath - The parent of the current JSON key (since nested, for "INFO.SSR.KEY1" for KEY1 it would be "INFO.SSR"
	 * @param colMap - The mapping from columnName to ColumnTypeCounter that contains the tally info for that column.  This HashMap will be modified as the method recursively calls itself.
	 * NOTE: modifies the HashMap 
	 * @throws InvalidDataException 
	 */
	private void  addJsonKeysToMap(JsonObject jsonObj, String parentJsonPath, Map<String,ColumnTypeCounter> colMap) throws InvalidDataException {
	    for (Map.Entry<String,JsonElement> entry : jsonObj.entrySet()) {
	    	// Key may be several layers deep (such as INFO.OBJ.SOMEKEY), so build the full JSON path
	    	String key = (parentJsonPath == null || parentJsonPath.length() == 0  ?  ""  :  parentJsonPath + ".") + entry.getKey();
            JsonElement value = entry.getValue();
	    	
            // If value is null, then just add the key with a value of null since we can't determine the type.
            // BUT, only add it if there is not already an entry in the map for that, as we don't want to replace a valid value (say "AN.a"=INTEGER, with "An.a"=null)
            // Adding it to the map will at least preserve the order in which the keys are encountered
            if( value instanceof JsonNull ) {
            	if( colMap.get(key) == null ) {
            		colMap.put(key, null);
            	}
            // If it is a complex object, then break it down further
            } else if( value instanceof JsonObject )
            	addJsonKeysToMap(value.getAsJsonObject(), key, colMap);
            else // should be a primitive or an array
            	addKeyValue(key, value, colMap);
	    }
	}


	/** Add a key and value to the HashMap that maps a columnName to a ColumnTypeCounter object,
	 *  incrementing the appropriate counters depending on what the value is 
	 * @param key - The JSON key
	 * @param value - The JSON value (a primitive)
	 * @param colMap - The mapping between columnName and ColumnTypeCounter
	 * @throws InvalidDataException 
	 */
	private void addKeyValue(String key, JsonElement value,	Map<String, ColumnTypeCounter> colMap) throws InvalidDataException {
		// Get the current ColumnTypeCounter matching the key.  If none, create one and add to hashmap
		ColumnTypeCounter colTypeCount = colMap.get(key);

		if( colTypeCount == null ) {
			colTypeCount = new ColumnTypeCounter(key, "");
			colMap.put(key, colTypeCount);
			
			setTypeAndCount(value, colTypeCount);
			
			// Return, since this is the first time we've seen this key
			return;
		}

		Type currentType = colTypeCount.getColumnMetaData().getType();
		String currentCount = colTypeCount.getColumnMetaData().getCount();

		Type newType 	= colTypeCount.determineType(value);
		String newCount = colTypeCount.determineCount(value);
		
		// If the current type is null (which could happen if we previously encountered an array with no values) OR
		// if the type is an integer and the new value type is a Float, then set the type and count again OR
		// if the type was a non-string and the new value type is String, then set type and count again
		// NOTE: If the type had been an integer previously, we want to set it again in case the type now registers as a Float 
		//       since both Integer and Float are treated as numbers and can toggle between values like: 0.314 and 0, we don't want the last value to determine the type
		// NOTE: Types could both be null
		boolean isIntToFloat  = Type.Integer.equals(currentType)  &&  Type.Float.equals(newType);
		if( currentType == null  ||  isIntToFloat ) {
			setTypeAndCount(value, colTypeCount);
		}
		
		throwExceptionIfIncompatibleTypeChange(key, currentType, newType, isIntToFloat);
		
		// Throw an exception if it goes from a single value to an array, or an array to single value
		throwExceptionIfCountChanged(key, currentCount, newCount);
	}


	private void throwExceptionIfIncompatibleTypeChange(String key, Type currentType, Type newType, boolean isIntToFloat) throws InvalidDataException {
		// If the type changes from anything other than int-to-float or float-to-int, 
		// then this could be a problem, so throw InvalidDataException
		boolean isFloatToInt = Type.Float.equals(currentType)  &&  Type.Integer.equals(newType);
		boolean isTypeChange = currentType != null  &&  newType != null  &&  ! currentType.equals(newType);
		if( isTypeChange  &&  ! (isIntToFloat || isFloatToInt) ) {
			throw new InvalidDataException("ERROR: Incompatible type change for key [" + key + "] from [" + currentType + "] to [" + newType + "] on data line " + (this.numLines + 1));
		}
	}

	/** Throw exception if the count changed */ 
	private void throwExceptionIfCountChanged(String key, String currentCount, String newCount) throws InvalidDataException {
		if( currentCount != null  &&  ! currentCount.equals(newCount) ) {
			throw new InvalidDataException("ERROR: Incompatible count change for key [" + key + "] from [" + currentCount + "] to [" + newCount + "] on data line " + (this.numLines + 1));			
		}
	}



	private void setTypeAndCount(JsonElement value, ColumnTypeCounter colTypeCount) {
		// Set the type
		Type valType = colTypeCount.determineType(value);
		colTypeCount.getColumnMetaData().setType(valType);
		
		// Set the count
		String count = colTypeCount.determineCount(value);
		colTypeCount.getColumnMetaData().setCount(count);
	}
	
}
