package edu.mayo.bior.pipeline.VEP;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.concurrent.BrokenBarrierException;
import java.util.concurrent.TimeoutException;

import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;

import com.google.gson.JsonArray;
import com.tinkerpop.pipes.AbstractPipe;

import edu.mayo.exec.AbnormalExitException;
import edu.mayo.pipes.history.History;

/**
 * @author Michael Meiners (m054457)
 * Date created: Jan 29, 2016  (offshoot of original VEPPipeline)
 * Utilizes a buffer to queue up multiple lines to send to VEP all at once.  This:
 *   - speeds up processing
 *   - avoids an error where sending only a single line to VEP causes some data to get dropped
 */
public class VEPPipeline  extends AbstractPipe<History,History> {

	private static final Logger sLogger = Logger.getLogger(VEPPipeline.class);

	private VEPEXE mVepExe = null;
	private boolean mIsWorstCaseOnly = true;
	private List<History>  mBuffer = new ArrayList<History>();
	//private List<History>  mOutputBuffer = new ArrayList<History>();
	private int mMaxBufferSize = VEPEXE.VEP_BUFFER_SIZE_DEFAULT; // Default = 1000
	private boolean mHasMoreInput = true;
	private String EOL = "\n";
	private String TAB = "\t";
	private final String DUMMY_LINE = "DUMMY_LINE";
	
	private VepFunctions mVepFunctions = null;
	
	private final int COL_ID = 2;
	private final int COL_INFO = 7;
	
	/** Keep track of the number of columns on the input. We should add 1 more to this.
	 *  If there are less than (1 more plus # of input), then we'll add a VEP error JSON to end of line  */
	private int mNumColsOnInput = 0;


	
	public VEPPipeline(String[] userSuppliedCommandOptions, boolean pickworst) throws IOException, InterruptedException, BrokenBarrierException, TimeoutException, AbnormalExitException{            
        mIsWorstCaseOnly = pickworst;
        
        mVepExe = new VEPEXE(userSuppliedCommandOptions);
        mMaxBufferSize = mVepExe.getVepBufferSize();
        mVepFunctions = new VepFunctions(mVepExe.getVepColHeaders());
	}
	
	@Override
	public boolean hasNext() {
		return mHasMoreInput  &&  mBuffer.size() > 0;  
	}
	
	@Override
	public History processNextStart() throws NoSuchElementException {
		// If there is still data in the buffer, then pull the next item
		if( mBuffer.size() > 0 ) 
			return getNextOutputBufferItem();
		
		// Else if we have no more input, then throw a NoSuchElementException
		if( ! mHasMoreInput )
			throw new NoSuchElementException("End of input");
		
		fillBufferFromInput();

		sendToVep(mBuffer);
		
		getOutputFromVep();
		
		addCsqErrorForAnyLinesWithNoVepOutput();
		
		// Get the next item from the output queue
		return getNextOutputBufferItem();
	}

	private void fillBufferFromInput() {
		// Pull in lines from previous pipe until buffer is full
		// If we have not hit the end of the input buffer yet, then add the next line
		//   (if it is a bypass line, then add it directly to output with CSQ error)
		// Else just add dummy lines until buffer is full
		// NOTE: We may temporarily go over the buffer size if there are any bypass lines
		//       because even though those won't be sent to VEP, we have to match the same
		//       number of lines as what VEP expects in the buffer or VEP will hang.
		int numBypasses = 0;
		while( mBuffer.size() < (mMaxBufferSize + numBypasses) ) {
			// Keep reading if there is something in the input buffer
			// NOTE: this.starts.hasNext() cannot be called multiple times
			//       On a second call it throws a NoSuchElementException, and thus would abort sending a partial buffer to VEP
			//       To get around this, check mHasMoreInput before checking this.starts.hasNext(), 
			//       so it is marked the first time we encounter an end of input.
			mHasMoreInput = mHasMoreInput  &&  this.starts.hasNext();
			if( mHasMoreInput )  {
				History nextLine = trimColumns(this.starts.next());
				mNumColsOnInput = nextLine.size();
				// If the line is one that would cause VEP to skip the line, then add it to the
				// buffer and put a fake VEP CSQ response in, but don't send it along to VEP.
				if( mVepExe.isBypass(nextLine) ) {
					// Add it directly to output buffer,
					// since we don't want to send it to VEP as it will skip the line anyway
					// and we don't want to hang VEP by passing in too few lines
					numBypasses++;
					nextLine.add(getVepAsJson(mVepExe.getDefaultErrorResponse()));
					mBuffer.add(nextLine);
				} else {
					mBuffer.add(nextLine);
				}
			} // Else, we have reached the end, so fill input buffer with dummy data
			else {
				mBuffer.add(getDummyDataLine());
			}
		}
	}

	private History trimColumns(History line) {
		for(int i=0; i < line.size(); i++) {
			line.set(i, line.get(i).trim());
		}
		return line;
	}

	/** Send the input buffer to VEP as a single newline-separated string  */
	private void sendToVep(List<History> inputBuffer) {
		// Convert all the lines in the buffer to a single string
		String inputLinesToSend = convertInputBufferToString(inputBuffer);
		mVepExe.sendLines(inputLinesToSend);
	}


	private void getOutputFromVep() {
		for( int i=0; i < mMaxBufferSize; i++ ) {
			History out = mVepExe.getNextLine();

			// If it is a dummy line, then skip it, but mark that we have received the last batch
			// NOTE: Don't end on a dummy line as some of the lines may have switched order,
			//       so we still want to process all of them from the last batch
			if( isDummyLine(out) ) {
				mHasMoreInput = false;
				continue;
			}
			
			// Get the line index from the ID column (ex: "line0")
			// add the CSQ JSON to the end of the history object and return it
			String id = out.get(COL_ID);
			int inputBufferLineNum = Integer.parseInt(id.replace("line", ""));
			History fullLine = mBuffer.get(inputBufferLineNum);
			fullLine.add(getVepAsJson(out.get(COL_INFO)));
		}
	}

	/** An error may still have occurred when calling VEP.  
	 *  If so, then we are missing one column at the end, so insert CSQ Error data at the end of these lines, and send msg to stderr */
	private void addCsqErrorForAnyLinesWithNoVepOutput() {
		for(int i=0; i < mBuffer.size(); i++) {
			History line = mBuffer.get(i);
			
			if( isDummyLine(line) )
				continue;
			
			if( line.get(line.size()-1).contains(VepFunctions.VEP_ERROR_MSG) ) {
				String errorMsg = "Warning: VEP could not process line:\n    " + StringUtils.join(line.subList(0, 7), "\t");
				sLogger.error(errorMsg);
				System.err.println(errorMsg);
			}
			
			if( line.size() < (mNumColsOnInput + 1) )
				line.add(getVepAsJson(mVepExe.getDefaultErrorResponse()));
		}
	}



	/** Return the next item in the output buffer.
	    However, if the next item is a dummy line, then throw NoSuchElementException */
	private History getNextOutputBufferItem() {
		History next = mBuffer.remove(0);
		if( isDummyLine(next) )
			throw new NoSuchElementException("Reached end of input");
		return next;
	}


	/** Look at the 3rd column - if it says "DUMMY_LINE", return true */
	private boolean isDummyLine(History next) {
		return next != null   &&   next.size() >= 3   &&   next.get(2).equals(DUMMY_LINE);
	}


	private History getDummyDataLine() {
		return new History(new String[] { "1", "1", DUMMY_LINE, "A", "C", ".", "." } );
	}


	/** Loop thru the buffer, and for each line pulled in, get the first 7 columns
	    (trim any spaces around the columns, which otherwise would cause VEP to expand in size over time)
	    then append the current line number (0-based) so we know which to match with on the output */
	private String convertInputBufferToString(List<History> buffer) {
		StringBuilder vepInputStr = new StringBuilder();
		for(int i=0; i < buffer.size(); i++) {
			// Don't add the line if it is a bypass line (which would cause VEP to error and skip it)
			if( mVepExe.isBypass(buffer.get(i)) )
				continue;
			String line = getHistoryAsVepInputLine(buffer.get(i), i);
			vepInputStr.append(line + EOL);
		}
		return vepInputStr.toString();
	}

	private String getVepAsJson(String csqString) {
		// Pull out the CSQ data from the INFO column and process it into JSON
		JsonArray csqAsJsonArray = mVepFunctions.vepCsqToJsonList(csqString);
		String vepOut = mIsWorstCaseOnly 
				? mVepFunctions.getWorstCase(csqAsJsonArray).toString() 
				: "{\"CSQ\":" + csqAsJsonArray.toString() + "}";
		return vepOut;
	}


	// Get the History object as an input string for VEP
	// (basically put the line number in the ID field 3, and only get the first 7 cols)
	private String getHistoryAsVepInputLine(History history, long lineNum) {
		String id = history.get(2).equals(DUMMY_LINE)  ?  DUMMY_LINE  :  ("line" + lineNum);
			
		return history.get(0) + TAB  // CHROM
			+  history.get(1) + TAB  // POS
			+  id             + TAB	// ID - just replace the one they have for now since we will add it back later
			+  history.get(3) + TAB  // REF
			+  history.get(4) + TAB  // ALT
			+  history.get(5) + TAB  // QUAL
			+  history.get(6) + TAB  // FILTER
			+  ".";					 // INFO
	}

	/** NOTE: Make sure to call this terminate() method when the pipe is finished!!!!! **/
    public void terminate() throws InterruptedException{
        mVepExe.terminate();
    }

}
