/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package edu.mayo.bior.pipeline.VEP;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.concurrent.BrokenBarrierException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;

import com.tinkerpop.pipes.PipeFunction;

import edu.mayo.pipes.util.BiorProperties;
import edu.mayo.pipes.util.BiorProperties.Key;
import edu.mayo.exec.AbnormalExitException;
import edu.mayo.exec.UnixStreamCommand;
import edu.mayo.pipes.history.History;

/**
 * @author Michael Meiners (m054457)
 * Based on previous VEPEXE.java from Daniel Quest (m102417)
 */
public class VEPEXE {

	private static final Logger sLogger = Logger.getLogger(VEPEXE.class);
	private UnixStreamCommand mVep;
	
	// NOTE: A buffer size of "1" appears to be required when using streaming thru Java classes
	//       else the call will hang.
	//       (though when used separately on just the command line, 20-50 is most efficient)
	//private static final String VEP_BUFFER_SIZE = "1";
	public static final int VEP_BUFFER_SIZE_DEFAULT = 100;
	private static int mVepBufferSize = VEP_BUFFER_SIZE_DEFAULT;
	
	private static final int VCF_REF_COL = 3; 
	private static final int VCF_ALT_COL = 4; 

	// 10 second timeout for VEP writing response to STDOUT
	private static final long RECEIVE_TIMEOUT = 10;

	private List<String> mVepFormatHeaders = new ArrayList<String>();
	
	private long mLinesSent = 0;
	
	/** Pre-emptively set the buffer size.  For testing! */
	public static void setBufferSize(int bufferSize) {
		mVepBufferSize = bufferSize;
	}
	
	public VEPEXE() throws IOException, InterruptedException, BrokenBarrierException, TimeoutException, AbnormalExitException{
		this(new String[] { });
	}


	public VEPEXE(String[] userSuppliedOptions) throws IOException, InterruptedException, BrokenBarrierException, TimeoutException, AbnormalExitException {
		try {
			final Map<String, String> NO_CUSTOM_ENV = Collections.emptyMap();
			String[] allCmdFlags = getVEPCommand(userSuppliedOptions);
			mVep = new UnixStreamCommand(allCmdFlags, NO_CUSTOM_ENV, true, true);
			//-------------------------------------------------------------------------------
			// NOTE: VEP will only work on 64-bit systems, when the "forcelinebuffering" 
			// flag at the end of this line is set to "true"
			// To get it to work on 32-bit systems, change this flag to "false"
			// HOWEVER, this should be tested thoroughly before putting it into production!
			//-------------------------------------------------------------------------------
			//mVep = new UnixStreamCommand(getVEPCommand(vepCmd), NO_CUSTOM_ENV, true, false); 
			mVep.launch();
			mVep.send(concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"));
			//send some fake data to get the ball rolling...
			// Need to have as many lines as the buffer is set to or it will hang
			
			int bufferSize = getVepBufferSize();
			String fakeLinesIn = getFakeInput(bufferSize);
			mVep.send(fakeLinesIn);

			// Now receive the headers and fake lines
			String infoHeader = mVep.receive();//##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Allele|Gene|Feature|Feature_type|Consequence|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|HGNC|DISTANCE|SIFT|PolyPhen|CELL_TYPE">
			String colHeader  = mVep.receive();//#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
			sLogger.info("VEP Priming ---------------------------------------------");
			sLogger.info("Header, ##INFO CSQ: " + infoHeader);
			sLogger.info("Header, columns:    " + colHeader);
			for(int i=0; i < bufferSize; i++) {
				String dataLineOut  = mVep.receive();//chr1  1588717  rs009  G  A  0.0  .  CSQ=A|ENSG00000248333|ENST00000317673|Transcript|intron_variant|......
				sLogger.info("Data line out [" + i + "]: " + dataLineOut);
			}
			sLogger.info("---------------------------------------------------------");
			
			// PUll the CSQ headers from the returned ##INFO line and set it our local CSQ headers variable
			setVepFormatHeaders(infoHeader);
			
		}catch(AbnormalExitException e) {
			sLogger.error("VEP failed to run.  Please verify that the VEP properties are configured correctly, and that the system VEP is running on has the correct Perl path, variant_effect_predictor.pl, and VEP cache paths.");
			throw e;
		}
	}

	private String getFakeInput(int bufferSize) {
		// This line has multiple CSQ results
		String fakeLine = concat("chr1", "1588717", "rs009",  "G", "A", "0.0", ".", ".");
		StringBuilder fakeLinesIn = new StringBuilder();
		for(int i=0; i < bufferSize; i++) {
			if( i > 0 )
				fakeLinesIn.append("\n");
			fakeLinesIn.append(fakeLine);
		}
		return fakeLinesIn.toString();
	}


	/** Concatenate a bunch of strings together, tab-separated */
	private String concat(String... s) {
		StringBuilder str = new StringBuilder();
		for(int i=0; i < s.length; i++) {
			if( i > 0 )
				str.append("\t");
			str.append(s[i]);
		}
		return str.toString();
	}


	/** Turn initial VEP INFO header:
	 *    ##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Allele|Gene|Feature|Feature_type|Consequence|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|HGNC|DISTANCE|SIFT|PolyPhen|CELL_TYPE">
	 *  into list of CSQ format headers:
	 *    [Allele, Gene, Feature, Feature_type, Consequence, cDNA_position, CDS_position, Protein_position, Amino_acids, Codons, Existing_variation, HGNC, DISTANCE, SIFT, PolyPhen, CELL_TYPE]
	 * @param vepInfoHeader
	 */
	private void setVepFormatHeaders(String vepInfoHeader) {
		int idxFormat = vepInfoHeader.indexOf("Format: ");
		int idxQuoteAfter = vepInfoHeader.indexOf("\"", idxFormat);
		if( ! vepInfoHeader.startsWith("##INFO=<ID=CSQ") || idxFormat == -1  ||  idxQuoteAfter == -1 )
			throw new IllegalArgumentException("##INFO header coming back from VEP command was not correct: " + vepInfoHeader);
		
		String format = vepInfoHeader.substring(idxFormat + 8, idxQuoteAfter);
		mVepFormatHeaders = Arrays.asList(format.split("\\|"));
	}
	
	public List<String> getVepColHeaders() {
		return mVepFormatHeaders;
	}

	public static String[] getVEPCommand(String[] userOptions) throws IOException {
		// See src/main/resources/bior.properties for example file to put into your user home directory
		BiorProperties biorProps = new BiorProperties();

		//VEP_COMMAND="$BIOR_VEP_PERL_HOME/perl $BIOR_VEP_HOME/variant_effect_predictor.pl -i /dev/stdin -o STDOUT -dir $BIOR_VEP_HOME/cache/ -vcf --hgnc -polyphen b -sift b --offline --buffer_size $VEP_BUFFER_SIZE"
		// NOTE: Need to type-cast the Arrays.asList() to an ArrayList, otherwise the list will NOT be modifiable!!!
		List<String> command = new ArrayList<String>();
		
		// On Dan's Mac, first part of cmd: "/usr/bin/perl"
		String vepPerlPath = biorProps.get(Key.BiorVepPerl);
		String vepPath = biorProps.get(Key.BiorVep);
		String vepCacheDir = biorProps.get(Key.BiorVepCache);
		String cmdBeforeVariableSubstitutions = biorProps.get(Key.BiorVepCmdFlags);
		String vepBufferSizeStr = biorProps.get(Key.BiorVepBufferSize);
		
		// Show an error if any VEP fields are missing 
		if( vepPerlPath == null  ||  vepPath == null  ||  vepCacheDir == null  || cmdBeforeVariableSubstitutions == null  ||  vepBufferSizeStr == null )
			logAndSysError("Error: Properties missing in bior.properties.  Location of properties file: " + BiorProperties.getFile());

		// Show specific errors for any missing fields
		if( vepPerlPath == null )
			logAndSysError("    " + Key.BiorVepPerl);
		if( vepPath == null )
			logAndSysError("    " + Key.BiorVep);
		if( vepCacheDir == null )
			logAndSysError("    " + Key.BiorVepCache);
		if( cmdBeforeVariableSubstitutions == null )
			logAndSysError("    " + Key.BiorVepCmdFlags);
		if( vepBufferSizeStr == null )
			logAndSysError("    " + Key.BiorVepBufferSize);

		
		// Now, take into account the default and whether the buffer size is supplied
		int vepBufferSize = loadVepBufferSizeProperty(biorProps);

		
		// Substitute variables in for buffer size and VEP cache directory
		String cmdFlagWithVarSubstitutions = biorProps.get(Key.BiorVepCmdFlags)
				.replace("$BiorVepCache",   vepCacheDir)
				.replace("$BiorVepBufferSize", vepBufferSize + "");

		command.add(vepPerlPath);
		command.add(vepPath);

		// Split the command line by spaces (since it is all one string in the properties file)
		command.addAll(Arrays.asList(cmdFlagWithVarSubstitutions.split(" +")));

		command.addAll(getMacFlagsIfNecessary());

		if( userOptions != null )
			command.addAll(Arrays.asList(userOptions));
		
		return command.toArray(new String[command.size()]);
	}

	private static void logAndSysError(String msg) {
		System.err.println(msg);
		sLogger.error(msg);
	}

	public static int loadVepBufferSizeProperty(BiorProperties biorProps) {
		
		// Don't change the buffer size if it has already been changed thru the static method
		if( mVepBufferSize == VEP_BUFFER_SIZE_DEFAULT ) {
			String vepBufferSize = biorProps.get(Key.BiorVepBufferSize);
			if( vepBufferSize != null  &&  isInteger(vepBufferSize) )
				mVepBufferSize = Integer.parseInt(vepBufferSize);
		}

		return mVepBufferSize;
	}

	public int getVepBufferSize() {
		return mVepBufferSize;
	}


	private static boolean isInteger(String str) {
		try {
			Integer.parseInt(str);
			return true;
		} catch(Exception e) {
			return false;
		}
	}

	private static List<String> getMacFlagsIfNecessary() {
		String os = System.getProperty("os.name"); 
		List<String> macFlags = new ArrayList<String>();
		if (os.equals("Mac OS X"))
		{
			// MAC ONLY
			// @see https://github.com/arq5x/gemini/blob/master/docs/content/functional_annotation.rst
			// 
			// To use the cache, the gzip and zcat utilities are required. VEP uses zcat to 
			// decompress cached files. For systems where zcat may not be installed or may
			// not work, the following option needs to be added along with the --cache option:
			// 
			// "--compress gunzip -c"
			// 
			macFlags.add("--compress");
			macFlags.add("gunzip -c");

			sLogger.info(String.format("Running on %s.  Adding Mac-specific options.", os));
		}
		return macFlags;
	}

	
	/** Pass the next set of newline-separated lines to VEP.
 	 *  The number of lines sent MUST equal the bufferSize that is used to construct the command.
	 *  WARNING:  This assumes that you are closely watching that the input buffer is filled
	 *            (equals the same number of lines as the bufferSize setting), and that all bad lines
	 *            that should be bypassed have been removed from the full buffer that is sent!	 
	 * @param vcfLines  VCF lines to send to VEP (Newline-separated).  
	 *                  The number of lines must equal the bufferSize and must not contain any bypass lines */
	public void sendLines(String vcfLines) {
		try
		{
			// Verify that the number of lines input equals the buffer size
			String[] lines = vcfLines.split("\n");
			if( lines.length != mVepBufferSize ) {
				String msg = "Lines being passed to VEP (" + lines.length + ") do not equal the buffer size (" + mVepBufferSize + ")";
				sLogger.error(msg);
				throw new RuntimeException();
			}

			sLogger.info("VEP - sending lines to VEP (" + mVepBufferSize + " lines in buffer) -------------");
			sLogger.info(vcfLines);
			mVep.send(vcfLines);
			sLogger.info("VEP - Lines sent. -------------------------------------------------------------------");
		}
		catch( RuntimeException runtimeExc)
		{
			terminate();
			// Rethrow any runtime exceptions
			throw runtimeExc;
		}
		catch (Exception ex)
		{
			terminate();
			sLogger.error(ex);
			throw new NoSuchElementException();
		}
	}
	
	/** Get the next line from VEP.
	 *  WARNING:  This assumes that you are closely watching that the input buffer is filled
	 *            (equals the same number of lines as the bufferSize setting), and that all bad lines
	 *            that should be bypassed have been removed from the full buffer that is sent!	 
	 * @return */
	public History getNextLine() {
		try
		{
			sLogger.info("VEP - Receiving line...");
			String result =  mVep.receive(RECEIVE_TIMEOUT, TimeUnit.SECONDS);
			sLogger.info("VEP - received line: " + result);
			return new History(result);
		}
		catch (TimeoutException te)
		{
			// TimeoutException or RuntimeException or general Exception
			terminate();
			sLogger.error(te);
			throw new RuntimeException("VEP timed out while retrieving next line: " + te.getMessage());
		}
		catch( Exception e)
		{
			terminate();
			sLogger.error(e);
			// Rethrow any runtime exceptions
			throw new RuntimeException(e);
		}
	}

	
	/**
	 * Gets a "fake" error response from VEP that is useful for when VEP is
	 * either bypassed entirely or VEP failed to send a response.
	 * 
	 * @param vcfLine
	 * @return
	 */
	protected String getDefaultErrorResponse()	{
		// return blank CSQ field
		return "CSQ=VEPERR";		
	}
	
	public void terminate() {
		try {
			this.mVep.terminate();
		} catch(Exception e) { 
			sLogger.error("Error terminating VEPEXE pipe" + e);
		}
	}
	
	/**
	 * Checks for cases where VEP will <b>not</b> send a response to STDOUT.
	 * These cases should be avoided as the {@link UnixStreamCommand#receive()}
	 * will hang indefinitely.
	 * <p/>
	 * The following cases cause VEP to not return a response and are checked:
	 * <ul>
	 * <li>absent (e.g. NULL) value for ALT column</li>
	 * <li>1 or more whitespace characters for ALT column</li>
	 * <li>"." character for ALT column</li>
	 * <li>ALT and REF columns match</li>
	 * </ul>
	 *  
	 * @param line
	 * 		A single data line from a VCF file.
	 * @return
	 * 		True if VEP should be bypassed.  False otherwise.
	 */
	protected boolean isBypass(String line) {
		return isBypass(new History(line));
	}
	
	protected boolean isBypass(History history) {
		// make sure we can access the ALT column
		if (history.size() < (VCF_ALT_COL + 1))	{
			return true;
		}
		
		final String ref = history.get(VCF_REF_COL).trim();
		final String alt = history.get(VCF_ALT_COL).trim();
		
		// Return true if alt is null, whitespace, dot, or if alt == ref
		return  (alt.length() == 0)
			||  alt.equals(".")
			||  alt.equals(ref);		
	}

}
