package edu.mayo.bior.cli.cmd;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PushbackInputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
import org.apache.commons.lang.math.NumberUtils;
import org.apache.log4j.Logger;

import edu.mayo.bior.util.FilenameComparatorByNumericPart;
import edu.mayo.cli.CommandLineApp;
import edu.mayo.cli.CommandPlugin;
import edu.mayo.pipes.util.test.PipeTestUtils;

/** Concatenates multiple sequential vcf files together.  
 *  Can match on file patterns.  For example:  my.*.vcf would match
 *    my.0001.vcf
 *    my.0002.vcf
 *    ...
 *    my.0023.vcf
 *  And would resolve all of those file name and piece together one final vcf
 *  
 *  Using the "--nozip" flag will cause it to output plain-text tsv rather than gzip'd tsv.
 *  
 *  Usage:  ConcatCommand  [--nozip]  <OutputFile>  <InputVcfFileOrPattern>
 *  
 *  Example:
 *  	ConcatCommand  my.vcf.out.gz  my.vcf  your.vcf.*.out.gz
 *  	ConcatCommand  --nozip  my.vcf.out.gz  my.vcf.out.1.gz  my.vcf.out.2.gz  my.vcf.out.3.gz
 *  
 * @author Michael Meiners (m054457)
 * 2014-04-04
 *
 */
public class ConcatCommand implements CommandPlugin
{
	private static Logger sLogger = Logger.getLogger(ConcatCommand.class);
	
	
	private static final char OPTION_NO_ZIP = 'n';
    private static final char OPTION_OUTPUT_FILE = 'o';
    private static final char OPTION_SORT_INPUT_FILES = 's';

	public void init(Properties arg0) throws Exception {
	}
	

	public void execute(CommandLine cmdLine, Options options) throws Exception {
        String outfile = cmdLine.getOptionValue(OPTION_OUTPUT_FILE);
        boolean isZipOutput = ! cmdLine.hasOption(OPTION_NO_ZIP);
        boolean isSortInputFileList = cmdLine.hasOption(OPTION_SORT_INPUT_FILES);

        sLogger.info("Output file: " + outfile);
        sLogger.info("Should output be zipped?: " + isZipOutput);
        sLogger.info("Sort the input files?: " + isSortInputFileList);
        
        List<String> inputVcfs = cmdLine.getArgList();
        logList("Input files (unsorted): ", inputVcfs);
        
        if( isSortInputFileList ) {
        	Collections.sort(inputVcfs, new FilenameComparatorByNumericPart());
        	logList("Sorted input file list: ", inputVcfs);
        }

		verifyInputFiles(inputVcfs);
		sLogger.info("Input files have been resolved");
		
		concatVcfs(inputVcfs, outfile, isZipOutput);
		sLogger.info("Done.");
	}

	private void logList(String msg, List<String> inputVcfs) {
        sLogger.info(msg);
        for(String input : inputVcfs)
        	sLogger.info("  " + input);
	}


	private static void verifyInputFiles(List<String> inputVcfs) {
		if( inputVcfs == null || inputVcfs.size() == 0 ) {
			throw new IllegalArgumentException("No input vcf files were specified");
		}
			
		for(String inputVcf : inputVcfs) {
			if( ! new File(inputVcf).exists() ) {
				throw new IllegalArgumentException("Input vcf file does not exist: " + inputVcf);
			}	
		}
	}

	public void concatVcfs(List<String> inputVcfPaths, String outputPath, boolean isZipOutput) throws FileNotFoundException, IOException {

		BufferedWriter fout = null;
		
		try {
			fout = isZipOutput  
					?  new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(outputPath))))
					:  new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputPath)));

			for(int i = 0; i < inputVcfPaths.size(); i++) {
				String vcfInPath = inputVcfPaths.get(i);
				boolean isIncludeHeader = (i == 0);
				writeFileContents(vcfInPath, fout, isIncludeHeader);
			}
		} finally {
			if( fout != null )
				fout.close();
		}
	}

	
	private void writeFileContents(String vcfInPath, BufferedWriter fout, boolean isIncludeHeader) throws FileNotFoundException, IOException {
		BufferedReader fin = null;
		try {
			fin = isZippedInput(vcfInPath)
					? new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(vcfInPath))))
					: new BufferedReader(new InputStreamReader(new FileInputStream(vcfInPath)));
			String line = null;
			while( (line = fin.readLine()) != null ) {
				// If we are including the header for this file, 
				// OR the line is a data line, then include it
				if( isIncludeHeader || ! line.startsWith("#") ) {
					fout.write(line);
					fout.newLine();
				}
			}
		} finally {
			if( fin != null )
				fin.close();
		}
	}

	private boolean isZippedInput(String filePath) {
		boolean isZip = false;
		try {
			FileInputStream fin = new FileInputStream(filePath);
		    PushbackInputStream pb = new PushbackInputStream( fin, 2 ); //we need a pushbackstream to look ahead
		    byte [] signature = new byte[2];
		    pb.read( signature ); //read the signature
		    pb.unread( signature ); //push back the signature to the stream
	    
		    //check if matches standard gzip magic number
		    isZip = signature[0] == (byte)0x1f
		    	 && signature[1] == (byte)0x8b;
		} catch(Exception e) {
			System.err.println(e.getMessage());
		}
		return isZip;
	}

}
