package edu.mayo.bior.pipeline.createcatalog;

import java.util.ArrayList;
import java.util.List;
import java.util.NoSuchElementException;

import org.apache.log4j.Logger;
import org.json.JSONObject;

import com.tinkerpop.pipes.AbstractPipe;

import edu.mayo.pipes.bioinformatics.vocab.CoreAttributes;
import edu.mayo.pipes.history.History;
import edu.mayo.pipes.util.GenomicObjectUtils;

/**
 * Convert a tab-delimited line that contains JSON
 *   Ex:  ABC	typeVariant  	1	 100  101  {"_landmark":"1","_minBP":100,"_maxBP":101,....}
 * Into a Catalog compatible line (retaining any header lines)
 *   Ex:  1  100  100  {"_landmark":"1"....
 *
 * NOTE:  This class is in contrast to TjsonToCatalogPipe in that it avoids the JSON parsing routines,
 *        relying instead on String.indexOf() operations to double the speed at which the catalog can be processed.
 * 
 * @author Michael Meiners (m054457), 2018-06-12
 */
public class TjsonToCatalogNoJsonParsingPipe extends AbstractPipe<String, String>
{
	
	class CatalogLine {
		public String jsonOriginal;
		public String jsonModified;
		public String chrom;
		public long   minBp;
		public long   maxBp;
		public String toString(boolean isJsonColumnOnly) {
			if( isJsonColumnOnly )
				return jsonModified;
			else
				return chrom + "\t" + minBp + "\t" + maxBp + "\t" + jsonModified;
		}
	}
	
   protected static Logger sLogger = Logger.getLogger(TjsonToCatalogNoJsonParsingPipe.class);

   private int mJsonCol = -1;
   private boolean mIsJsonOnly = false;

   private boolean mIsFirstRow = true;
   private boolean mIsModifyChrom = true;

   // Show warnings for each field that is changed automatically
   private boolean mIsWarnedOfLandmarkNotFound = false;
   private boolean mIsWarnedOfLandmarkNonString = false;
   private boolean mIsWarnedOfMinbpNonLong = false;
   private boolean mIsWarnedOfMaxbpNonLong = false;
   private boolean mIsWarnedOfMaxbpCalculated = false;
   private List<String> mLandmarksWarned = new ArrayList<String>();
   
   //private boolean mIsChromNeedsUpdating = false;
   //private boolean mIsMinBpNeedsUpdating = false;
   //private boolean mIsMaxBpNeedsUpdating = false;
   
   private final String UNKNOWN = "UNKNOWN";

   //private String mJsonOriginal = null;

   
   private final String LANDMARK = CoreAttributes._landmark.toString();
   private final String MINBP    = CoreAttributes._minBP.toString();
   private final String MAXBP    = CoreAttributes._maxBP.toString();
   private final String REFALLELE= CoreAttributes._refAllele.toString();
   
   
   /**
    * Constructor
    *
    * @param jsonCol    The positive or negative index of the JSON column within the data rows
    * @param isJsonOnly Should the catalog consist of only a JSON column (and not the chrom,minBP,maxBP as well)?
    */
   public TjsonToCatalogNoJsonParsingPipe(int jsonCol, boolean isJsonOnly, boolean isModifyChromosome) {
      mJsonCol = jsonCol;
      mIsJsonOnly = isJsonOnly;
      mIsModifyChrom = isModifyChromosome;
   }


   @Override
   protected String processNextStart() throws NoSuchElementException {
      String lineIn = getNextNonBlankNonHeaderLine();

      fixJsonColumnIfFirstDataRow(lineIn);

      String lineOut = processLine(lineIn);
      return lineOut;
   }


   private String getNextNonBlankNonHeaderLine() {
      String lineIn = null;
      do {
         lineIn = this.starts.next();
      } while( isBlankOrHeaderLine(lineIn) );
      return lineIn;
   }
   
   private boolean isBlankOrHeaderLine(String line) {
	   return line == null  ||  line.trim().length() == 0  ||  line.startsWith("#");
   }


   private void fixJsonColumnIfFirstDataRow(String lineIn) {
      if (!mIsFirstRow)
         return;

      mIsFirstRow = false;

      // If the json column is 0 or higher, then subtract 1 to make the index 0-based
      if (mJsonCol > -1) {
         mJsonCol = mJsonCol - 1;
      }

      // If the json column is negative, then make it positive, with -1 representing the last column
      if (mJsonCol <= -1) {
    	  int colCount = getColumnCount(lineIn);
          mJsonCol = colCount + mJsonCol;
      }
   }

   private int getColumnCount(String lineIn) {
	   if( lineIn == null  ||  lineIn.length() == 0 )
		   return 0;
	   
	   int numCols = 1;
	   int idxTab = lineIn.indexOf("\t");
	   while( idxTab != -1 ) {
		   numCols++;
		   idxTab = lineIn.indexOf("\t", idxTab+1);
	   }
	   return numCols;
   }



   /**
    * Process the next line and turn it into a catalog line
    *
    * @param lineIn The next tab-delimited line to convert into a catalog line
    * @return String containing either one new JSON column (if requested), or 3 tabix columns plus JSON (standard)
    */
   public String processLine(String lineIn) {
	   throwExceptionIfJsonColumnIndexOutOfRange(lineIn, mJsonCol);

	   CatalogLine ctgLine = new CatalogLine();
	   ctgLine.jsonModified = ctgLine.jsonOriginal = getCol(lineIn, mJsonCol);
	   
	   throwExceptionIfBadJson(ctgLine.jsonModified, mJsonCol, lineIn);
	   
	   setChrom(ctgLine);
	   setMinBp(ctgLine);
	   setMaxBp(ctgLine);
	   
   	   return ctgLine.toString(mIsJsonOnly);
   }
	   
	 

	private void setChrom(CatalogLine ctgLine) {
 	   String chrom = ctgLine.chrom = getJsonVal(ctgLine.jsonModified, LANDMARK);
	   
 	   if( ! isChromFound(ctgLine.chrom) ) {
 		   boolean isFoundButEmpty = ctgLine.chrom != null  &&  ctgLine.chrom.trim().length() == 0;
 		   ctgLine.chrom = UNKNOWN;
 		   warnChromNotFound(ctgLine.jsonOriginal);
 		   if( isFoundButEmpty )
 			  ctgLine.jsonModified = replaceValueInJson(ctgLine.jsonModified, LANDMARK, ctgLine.chrom, /*isValueQuoted=*/true);
 	   } else {
 		   boolean isChromQuoted = isValueQuoted(ctgLine.jsonModified, LANDMARK);
 		   if( ! isChromQuoted )
 			   warnChromNotQuoted(ctgLine.jsonModified);
 		   if( mIsModifyChrom )
 			   ctgLine.chrom = getStandardizedChrom(ctgLine.chrom, ctgLine.jsonOriginal);
 		   if( (mIsModifyChrom  &&  ! ctgLine.chrom.equals(chrom)) || ! isChromQuoted ) {
 			   ctgLine.jsonModified = replaceValueInJson(ctgLine.jsonModified, LANDMARK, ctgLine.chrom, /*isValueQuoted=*/true);
 		   }
 	   }
    }

	private void setMinBp(CatalogLine ctgLine) {
    	ctgLine.minBp = getStrAsLong(getJsonVal(ctgLine.jsonModified, MINBP));
 	   	if( isValueQuoted(ctgLine.jsonModified, MINBP) ) {
 		   warnMinBpQuoted(ctgLine.jsonOriginal);
 		   ctgLine.jsonModified = replaceValueInJson(ctgLine.jsonModified, MINBP, ctgLine.minBp + "", /*isValueQuoted=*/false);
 	   }
    }


    private void setMaxBp(CatalogLine ctgLine) {
  	   ctgLine.maxBp = getStrAsLong(getJsonVal(ctgLine.jsonModified, MAXBP));
  	   if( isValueQuoted(ctgLine.jsonModified, MAXBP) ) {
  		   warnMaxBpQuoted(ctgLine.jsonOriginal);
  		   ctgLine.jsonModified = replaceValueInJson(ctgLine.jsonModified, MAXBP, ctgLine.maxBp + "", /*isValueQuoted=*/false);
  	   }
  	   
  	   String ref = getJsonVal(ctgLine.jsonModified, REFALLELE);
  	   if( ctgLine.maxBp == 0L ) {
  		   // If minBP and ref are both missing, then we can't compute maxBP, so just return
  		   if( ctgLine.minBp == 0L  ||  ref == null )
  			   return;
  		   
  		   warnMaxBpCalculated(ctgLine.jsonOriginal);
  		   
  	       // They are present, so try to calculate:  _maxBP = (_minBP + refAllele.length() - 1)
  		   ctgLine.maxBp = ctgLine.minBp + ref.length() - 1;
  		   
  		   // Update the JSON
  		   ctgLine.jsonModified = replaceValueInJson(ctgLine.jsonModified, MAXBP, ctgLine.maxBp + "", /*isValueQuoted=*/false);
  	   }

  	   if( isMismatchRefAlleleLengthVsPositions(ctgLine, ref) )
  		   throw new IllegalArgumentException("Error:  length of the refAllele does not equal (max-min+1): " + ctgLine.jsonModified);
    }


    private boolean isMismatchRefAlleleLengthVsPositions(CatalogLine ctgLine, String ref) {
	    // If _minBP, _maxBP, and _refAllele are all specified, then throw exception if they do not add up
	    if( ctgLine.minBp != 0L  &&  ctgLine.maxBp != 0L  && ref != null  &&  ref.length() > 0 ) {
	       if( (ctgLine.maxBp - ctgLine.minBp) != (ref.length() - 1))
	          return true;
	    }
	    return false;
    }


	/* Is the value quoted?  Ex:  "_landmark":"1" */
    private boolean isValueQuoted(String json, String key) {
    	int idxKey = getIdxKey(json, key);
    	if( idxKey == -1 )
    		return false;
    	int idxColon = json.indexOf(":",  idxKey + key.length());
    	return json.charAt(idxColon+1) == '\"'  ||  json.charAt(idxColon+1) == '\'';
    }


	private boolean isChromFound(String chrom) {
    	return chrom != null  &&  chrom.trim().length() > 0;
    }

	
    private void warnChromNotFound(String json) {
    	if (!mIsWarnedOfLandmarkNotFound) {
    		sLogger.warn("Warning: " + LANDMARK + " was not found (or was empty) on at least one line.  This was replaced with " + UNKNOWN + ".  Only first offence shown : " + json);
    		mIsWarnedOfLandmarkNotFound = true;
        }
    }

    private void warnChromNotQuoted(String json) {
        if (!mIsWarnedOfLandmarkNonString) {
           sLogger.warn("Warning: " + LANDMARK
              	+ " was not a String.  Correcting offending JSON (only first original shown): " + json);
           mIsWarnedOfLandmarkNonString = true;
        }
    }
    
    private void warnMinBpQuoted(String json) {
        if( ! mIsWarnedOfMinbpNonLong) {
        	sLogger.warn("Warning: " + MINBP + " was not an integer.  Correcting offending JSON (only first original shown): " + json);
        	mIsWarnedOfMinbpNonLong = true;
        }
    }

    private void warnMaxBpQuoted(String json) {
        if ( ! mIsWarnedOfMaxbpNonLong) {
           sLogger.warn("Warning: " + MAXBP + " was not an integer.  Correcting offending JSON (only first original shown): " + json);
           mIsWarnedOfMaxbpNonLong = true;
        }
    }
    
	private void warnMaxBpCalculated(String json) {
		if ( ! mIsWarnedOfMaxbpCalculated) {
			sLogger.warn("Warning: " + MAXBP + " was not specified, so it will be calculated from " + MINBP + " and " + REFALLELE + " length.");
			sLogger.warn("First offending JSON shown: " + json);
			mIsWarnedOfMaxbpCalculated = true;
		}
	}




    /** Standardize the chromosome name */
	private String getStandardizedChrom(String chrom, String jsonOriginal) {
		// If it starts with "chr" and is longer than 3 chars, then cut off the "chr" from the front
		// Convert 23 to X,  24 to Y,  25 to XY,  26 to M,  MT to M, etc
		String chromModified = GenomicObjectUtils.computechr(chrom);
		if( ! chrom.equals(chromModified) ) {
			// Warn about this landmark being changed if we have not seen it yet
			if( ! mLandmarksWarned.contains(chrom)) {
	               sLogger.warn("Warning: _landmark was changed from " + chrom + " to " + chromModified
	            		   + ".  Correcting offending JSON (only first original shown): " + jsonOriginal);
	               mLandmarksWarned.add(chrom);
			}
		}
		return chromModified;
	}


	/** Return the String as a long.  If the String was null or could not be parsed, then return 0 */ 
    private long getStrAsLong(String s) {
    	if( s == null  ||  s.trim().length() == 0 )
    		return 0L;
    	try {
    		return Long.parseLong(s);
    	} catch(Exception e) {
    		return 0L;
    	}
    }


	/** Replace string or number or boolean values in the JSON.
     *    Ex:  {"chrom":"12"} ==> {"chrom":"13"}
     *    Ex:  {"minBP":100,"maxBP":101} ==> {"minBP":101,"maxBP":101}
     *    Ex:  {} ==> {"minBP":100}
     *    Ex:  {"chrom":"12"} ==> {"chrom":"12","minBP":100}
     *  NOTE: We won't worry about this one for now since we are just pulling out _landmark, _minBP, and _maxBP so shouldn't have quotes and commas in the values
     *    Ex:  {"minBP":101,"chrom":"MTTTT, \":; , < > |#$!@#$^","maxBP":101}"  ==>  {"minBP":101,"chrom":"X","maxBP":101}
     */
	protected String replaceValueInJson(String json, String key, String val, boolean isValueQuoted) {
		int idxKey = getIdxKey(json, key);
		String QT = "\"";
		if( idxKey != -1 )
			QT = json.charAt(idxKey) + "";
		
		String quoteIfAny = isValueQuoted ? QT : "";
		String keyAndVal = QT + key + QT + ":" + quoteIfAny + val + quoteIfAny;
		// If the key was not found in the json, then add it at the end (with a comma preceding it if the JSON was not empty
		if( idxKey == -1 ) {
			String commaIfNecessary = json.length() > 2  ?  ","  :  "";
			json = json.substring(0, json.length()-1) + commaIfNecessary + keyAndVal + "}";
		} else {  // Insert it where it was
			String keyColon = QT + key + QT + ":";
			int idxEnd = json.indexOf(",", idxKey + keyColon.length());
			if( idxEnd == -1 )
				idxEnd = json.length() - 1;
			json = json.substring(0, idxKey) + keyAndVal + json.substring(idxEnd);
		}
		return json;
	}


	private int getIdxKey(String json, String key) {
		String keyColon = "\"" + key + "\":";
		int idxKey = json.indexOf(keyColon);
		
		// Check if we should be using single-quotes instead
		if( idxKey == -1  ) {
			keyColon = "'" + key + "':";
			idxKey = json.indexOf(keyColon);
		}
		
		return idxKey;
	}


	private String getUnknownChromIfNotFound(String chrom) {
	      // If not found, set it to "UNKNOWN"
	      if (chrom == null  ||  chrom.trim().length() == 0)
	         return UNKNOWN;

	      return chrom.trim();
	}


	private String getJsonVal(String fullJson, String key) {
		int idxKey = getIdxKey(fullJson, key);
		
		if( idxKey == -1 )
			return null;
		
		// WARNING: This can be dangerous for general text fields that contain commas and quotes within the string value!!!
		int idxComma = fullJson.indexOf(",", idxKey);
		if( idxComma == -1 )
			idxComma = fullJson.length() - 1;
		
		int idxStart = fullJson.indexOf(":", idxKey + key.length()) + 1;
		
		String val = fullJson.substring(idxStart, idxComma).trim();
		
		boolean isQuoted = (val.startsWith("\"") && val.endsWith("\""))  ||  (val.startsWith("'") && val.endsWith("'"));
		
		if( isQuoted )
			val = val.substring(1, val.length()-1);

		return val;
	}


	private void throwExceptionIfJsonColumnIndexOutOfRange(String line, int jsonCol) {
	   int numCols = getColumnCount(line);
	   if (numCols == 0  ||  jsonCol < 0  ||  jsonCol > (numCols - 1))
		   throw new RuntimeException("JSON column (" + jsonCol + ") is out of range on row: " + line);
	}

	private void throwExceptionIfBadJson(String json, int jsonCol, String line) {
		if( ! json.startsWith("{")  ||  ! json.endsWith("}") )
			throw new IllegalArgumentException("Bad JSON column data in column " + (jsonCol + 1) + ": " + line);
	}


   //------------------------------------------------------
   // Return the 0-based column.  If the column is not found, return null
   //   Ex:  "1 2 3 4", col=3 will return "4"
   //   Ex:  "1 2 3 4", col=5 will return null
   private String getCol(String s, int col) {
	   int currentCol = 0;
	   int idxStart = 0;
	   int idxEnd   = getNextTabIdxOrEnd(s, 0); 
	   while( currentCol < col  &&  idxEnd != s.length() ) {
		   currentCol++;
		   idxStart = idxEnd + 1;
		   idxEnd = getNextTabIdxOrEnd(s, idxStart);
	   }
  
	   // If the correct column was found, then return it
	   if( currentCol == col )
		   return s.substring(idxStart, idxEnd);
    
	   // Not found, so return null
	   return null;
   }

   private int getNextTabIdxOrEnd(String s, int start) {
	   int idx = s.indexOf("\t", start);
	   if( idx == -1 ) 
		   return s.length();
	   return idx;
   }
}
