package edu.mayo.bior.cli.cmd;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Properties;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;

import com.tinkerpop.pipes.Pipe;
import com.tinkerpop.pipes.IdentityPipe;
import com.tinkerpop.pipes.util.Pipeline;

import edu.mayo.bior.catalogremoveduplicates.CatalogRemoveDuplicatesPipe;
import edu.mayo.bior.pipeline.UnixStreamPipeline;
import edu.mayo.bior.pipeline.createcatalog.TabixCmd;
import edu.mayo.bior.pipeline.createcatalog.TjsonToCatalog;
import edu.mayo.cli.CommandPlugin;
import edu.mayo.pipes.InputStreamPipe;
import edu.mayo.pipes.PrintPipe;
import edu.mayo.pipes.JSON.DrillPipe;
import edu.mayo.pipes.JSON.tabix.BgzipWriter;
import edu.mayo.pipes.UNIX.CatAnythingPipe;
import edu.mayo.pipes.UNIX.GrepEPipe;
import edu.mayo.pipes.UNIX.GrepPipe;
import edu.mayo.pipes.history.History;
import edu.mayo.pipes.history.HistoryInPipe;
import edu.mayo.pipes.history.HistoryOutPipe;
import edu.mayo.pipes.util.metadata.Metadata;
import edu.mayo.pipes.util.metadata.Metadata.CmdType;

public class CatalogRemoveDuplicatesCommand implements CommandPlugin {

	private static final char OPTION_INPUT_CATALOG  = 'i';	
	private static final char OPTION_OUTPUT_CATALOG = 'o';	
	private static final char OPTION_JSON_PATH 		= 'p';
	private static final char OPTION_ANY_ELEMENT_IN_ARRAY = 'a';
	private static final char OPTION_SKIP_REGION    = 's';
		
	private String operation;
	
	public void init(Properties props) throws Exception {
		operation = props.getProperty("command.name");
	}

	public void execute(CommandLine line, Options opts) throws Exception {
		// THIS COMMAND WILL BE DEPRECATED IN BIOR 5.0.0 !!!!
		System.err.println("WARNING:  THIS COMMAND WILL BE REMOVED IN BIOR v5.0.0!");
		System.err.println("You can continue to use older versions of BioR to run this command");


		
		String inputCatalogPath = "";
		if( line.hasOption(OPTION_INPUT_CATALOG) )
			inputCatalogPath = line.getOptionValue(OPTION_INPUT_CATALOG);
		
		String outputCatalogPath = "";
		if( line.hasOption(OPTION_OUTPUT_CATALOG) )
			outputCatalogPath = line.getOptionValue(OPTION_OUTPUT_CATALOG);
		
		List<String> jsonPathsToCompare = new ArrayList<String>();
		if( line.hasOption(OPTION_JSON_PATH) )
			jsonPathsToCompare.addAll(Arrays.asList(line.getOptionValues(OPTION_JSON_PATH)));

		boolean isCompareAnyInSubset = false;
		if( line.hasOption(OPTION_ANY_ELEMENT_IN_ARRAY) )
			isCompareAnyInSubset = true;
		
		List<String> regionsToSkip = new ArrayList<String>();
		if( line.hasOption(OPTION_SKIP_REGION) )
			regionsToSkip.addAll(Arrays.asList(line.getOptionValues(OPTION_SKIP_REGION)));
		verifyRegions(regionsToSkip);
		
		boolean isUseInputFile  = inputCatalogPath.length() > 0;
		boolean isUseOutputFile = outputCatalogPath.length() > 0;
		boolean isGzip = isUseInputFile  &&  (inputCatalogPath.endsWith(".gz")  ||  inputCatalogPath.endsWith(".bgz")  ||  TjsonToCatalog.isFileGzip(new File(inputCatalogPath)));
		String inputCatalogType = isGzip  ?  "gzip"  :  "text";
		
		Pipeline pipeline = new Pipeline(
				// If the input catalog path was specified, then use CatAnythingPipe, else use InputStreamPipe
				isUseInputFile ?  new CatAnythingPipe<String, String>(inputCatalogType)  :  new InputStreamPipe(),
				new HistoryInPipe(),
				new CatalogRemoveDuplicatesPipe(jsonPathsToCompare, isCompareAnyInSubset, regionsToSkip),
				// If the output catalog path was specified, then use BgzipWriter pipe, else use HistoryOutPipe + PrintPipe to dump to STDOUT
				isUseOutputFile ?  new BgzipWriter(outputCatalogPath)  :  new Pipeline(new HistoryOutPipe(), new GrepEPipe("^#"), new PrintPipe())
				);
		
		// If user specifies an input file, then set that here:
		pipeline.setStarts( isUseInputFile  ?  Arrays.asList(inputCatalogPath)  :  Arrays.asList(System.in));
		
		// Now, process all lines
		while( pipeline.hasNext() ) {
				pipeline.next();
		}
		
		// Create the tabix index (but only if the output was to a file)
		if( isUseOutputFile ) {
			TabixCmd.createTabixIndex(outputCatalogPath);
		}
	}
	


	/** Regions must be of format CHROM:START-END
	 *  Examples:
	 *  	1
	 *  	UNKNOWN
	 *  	X:1
	 *  	Y:1-100
	 */
	private void verifyRegions(List<String> regionsToSkip) {
		for(String region : regionsToSkip) {
			String[] parts = region.split(":|-");
			boolean isChromMatch = region.matches("[0-9A-Za-z]+");
			boolean isChromAndMinMatch = region.matches("[0-9A-Za-z]+:[0-9]+");
			boolean isChromAndMinMaxMatch = region.matches("[0-9A-Za-z]+:[0-9]+-[0-9]+");
			if( ! (isChromMatch || isChromAndMinMatch || isChromAndMinMaxMatch) )
				throw new IllegalArgumentException("Region format incorrect: " + region + ".   Format:  CHROM  or  CHROM:MIN  or  CHROM:MIN-MAX");
		}
	}
	
	
}
