package edu.mayo.bior.catalogremoveduplicates;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;

/*
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.JsonProcessingException;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.node.ArrayNode;
import org.codehaus.jettison.json.JSONArray;
*/

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;

import com.google.gson.JsonArray;
import com.tinkerpop.pipes.AbstractPipe;
import com.tinkerpop.pipes.Pipe;

import edu.mayo.pipes.history.History;

/** Scans a catalog, removes duplicate rows where the chrom:min-max coordinates
 *  AND the value of specified keys match, then writes to a new bgzipped catalog */
public class CatalogRemoveDuplicatesPipe extends AbstractPipe<History, History>  {

	// Parameters passed in via cmd-line args
	List<String> mJsonPathsToCompareForDuplicates = new ArrayList<String>();
	// Whether to match on any array elements, or match exactly on the entire array
	// Match on any:  [a,b,c] vs [a,c] = MATCH
	boolean mIsMatchOnAnyArrayElements = false;
	// Chrom,min,max regions to skip over - these will be output, but will not be scanned when removing duplicates
	// (all lines in these regions will be output)
	// Format:  chrom:min-max
	List<String> mRegionsToSkip = new ArrayList<String>();
	
	
	
	// Keep a line buffer of rows on the same position (chrom, min, max), and compare all lines at that position
	private List<History> mLinesWithSameCoordsBuffer = new ArrayList<History>();
	private History mLastLine = null;
	private long mLineNum = 0;
	//private boolean mIsPullFromBuffer = false;
	
	
	public CatalogRemoveDuplicatesPipe(List<String> jsonPathsToCompare, boolean isCompareAnyInSubset, List<String> regionsToSkip) {
		mJsonPathsToCompareForDuplicates = jsonPathsToCompare;
		mIsMatchOnAnyArrayElements = isCompareAnyInSubset;
		mRegionsToSkip = regionsToSkip;
	}
	
	
	//--------------------------------------------------------------------------------------------------
	// Example rows:
	// 	 1	100	100	{"_refAllele":"A"}		<-- Add to buffer (buffer empty), no return line
	// 	 1	100	100	{"_refAllele":"A"}		<-- Add to buffer (same coord and ref), no return line
	// 	 1	100	100	{"_refAllele":"T"}		<-- Add to buffer (same coord but different key), no return line
	// 	 1	200	200	{"_refAllele":"A"}		<-- Different coord, mark buffer to be dequeued (x2 rows), and dequeue and return first line in buffer
	//   M  10  10  {"_refAllele":"C"}      <-- A region to skip so don't add to queue
	// 	 (end)								<-- End (no line),   so remove dupes from buffer, mark buffer to be dequeued,
	//											and dequeue and return 1st line in buffer 
	
	@Override
	protected History processNextStart() throws NoSuchElementException {
		try {
			if( ! this.starts.hasNext()  &&  mLinesWithSameCoordsBuffer.size() == 0  &&  mLastLine == null )
				throw new NoSuchElementException();

			// Keep emptying the buffer until there are no lines left
			if( mLinesWithSameCoordsBuffer.size() > 0 )
				return mLinesWithSameCoordsBuffer.remove(0);
			
			// Buffer is empty so add the last line to the buffer if it is not null
			if( mLastLine != null ) {
				mLinesWithSameCoordsBuffer.add(mLastLine);
				mLastLine = null;
			}
				
			// Now, fill the buffer as long as there are items to add and the line is not a duplicate
			while( this.starts.hasNext() ) {
				mLineNum++;
				mLastLine = this.starts.next();
				// If this is region to skip OR it is NOT in the same chrom and position as the lines in the buffer, then 
				// break out of the loop so we can return a value from the buffer
				if( isRegionToSkip(mLastLine)  ||  (mLinesWithSameCoordsBuffer.size() > 0 && ! isSameRegionAsBuffer(mLastLine)) )
					break;

				// Else, only add the line if it is not the same as another line in the buffer
				if( ! isAMatchAlreadyInBuffer(mLastLine) )  {
					addLineToBuffer(mLastLine);
				}				
				// The line was either added to the buffer OR was not added and should NOT be added to the catalog, so set mLastLine to null
				mLastLine = null;
			}

			if( mLinesWithSameCoordsBuffer.size() > 0 )
				return mLinesWithSameCoordsBuffer.remove(0);
			else  // Loop around again to check boundary conditions
				return processNextStart();
		} catch( NoSuchElementException e ) {
			throw e;
		} catch(NumberFormatException e) {
			String errMsg = "Bad number format: " + e.getMessage() + "   Verify that the catalog has four columns, and that the first three are chromosome, startPosition, endPosition";
			System.err.println(errMsg);
			throw new NoSuchElementException(errMsg);
		} catch(Exception e) {
			System.err.println(e.getMessage());
			throw new NoSuchElementException(e.getMessage());
		}
	}


	private boolean isSameRegionAsBuffer(History line) {
		if( mLinesWithSameCoordsBuffer.size() == 0 )
			return false;
		return isLinesAtSameCoords(line, mLinesWithSameCoordsBuffer.get(0));
	}


	private boolean isRegionToSkip(History line) {
		String chrom = line.get(0);
		long start = Long.parseLong(line.get(1));
		long end   = Long.parseLong(line.get(2));
		
		for(String region : mRegionsToSkip) {
			String[] parts = region.split(":|-");
			String regionChrom = parts[0];
			long regionStart = 0;
			long regionEnd   = Long.MAX_VALUE;

			if( parts.length > 1 )
				regionStart = Long.parseLong(parts[1]);

			if( parts.length > 2 )
				regionEnd = Long.parseLong(parts[2]);

			boolean isBeforeRegion = end < regionStart;
			boolean isAfterRegion  = start > regionEnd; 
			boolean isWithinRegion = chrom.equalsIgnoreCase(regionChrom)
					&& ! isBeforeRegion
					&& ! isAfterRegion;
			if( isWithinRegion )
				return true;
		}
		// Not in any of the skip regions
		return false;
	}


	private boolean isLinesAtSameCoords(History line1, History line2) {
		// Chrom = col1,   minBp = col2,   maxBp = col3
		boolean isSameChrom = line1.get(0).equalsIgnoreCase(line2.get(0));
		boolean isSameMinBp = line1.get(1).equals(line2.get(1));
		boolean isSameMaxBp = line1.get(2).equals(line2.get(2));
		return isSameChrom && isSameMinBp && isSameMaxBp;
	}
	
	private void addLineToBuffer(History line) {
		// Don't add the line if it is null or contains only one empty column
		if( line == null  ||  (line.size() == 1 && line.get(0).trim().length() == 0) )
			System.err.println("Warning: empty line encountered at line " + mLineNum);
		else if( line.size() < 1  && ! line.get(line.size()-1).startsWith("{") && ! line.get(line.size()-1).endsWith("}") )
			System.err.println("ERROR:  Catalog line should contain JSON as the last column.  Line #" + mLineNum);
		else
			mLinesWithSameCoordsBuffer.add(line);
	}

	
	/** Compare current line to all other lines in buffer.  If it matches any by coords AND JSON paths, then return true */
	private boolean isAMatchAlreadyInBuffer(History lineToChk) throws Exception {
		for(int i=0; i < mLinesWithSameCoordsBuffer.size(); i++) {
			History lineInBuffer = mLinesWithSameCoordsBuffer.get(i);
			if( isLinesAtSameCoords(lineToChk, lineInBuffer)  &&  isMatchOnJsonKeys(lineToChk, lineInBuffer) )
				return true;
		}
		// No matches found, so return false
		return false;
	}


	private boolean isMatchOnJsonKeys(History line1, History line2) throws JsonProcessingException, IOException {
		ObjectMapper mapper = new ObjectMapper();
		final int JSON_COL_INDEX = 3;
		JsonNode tree1 = mapper.readTree(line1.get(JSON_COL_INDEX));
		JsonNode tree2 = mapper.readTree(line2.get(JSON_COL_INDEX));
		for(String jsonPath : mJsonPathsToCompareForDuplicates) {
			JsonNode val1 = tree1.get(jsonPath);
			JsonNode val2 = tree2.get(jsonPath);
			if( ! isJsonNodesEqual(val1, val2) )
				return false;
		}
		// All match, so return true
		return true;
	}


	private boolean isJsonNodesEqual(JsonNode val1, JsonNode val2) {
		// To avoid NullPointerException, first check if val1 is null. 
		if( val1 == null )
			return (val2 == null);
		// Else, compare the two values
		if( mIsMatchOnAnyArrayElements && val1.isArray() && val2.isArray() ) {
			ArrayNode array1 = (ArrayNode)val1;
			ArrayNode array2 = (ArrayNode)val2;
			for(int i=0; i < array1.size(); i++) {
				for(int j=0; j < array2.size(); j++) {
					if( array1.get(i).equals(array2.get(j)) )
						return true;
				}
			}
			// No sub-elements matched, so return false
			return false;
		} else  // Just match exactly
			return val1.equals(val2);
	}


}
