import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.BlockCompressedOutputStream;
import java.util.zip.GZIPInputStream;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertFalse;

//runTestSuite()


if( args.length != 3 ) { 
  printUsage()
  System.exit(1)
}

String originalHugeVcf = args[0]
String annotatedVcf    = args[1]
String outputCombinedVcf=args[2]

// Get reader for original HUGE # of columns VCF
BufferedReader originalHugeVcfReader = getBufferedReader(originalHugeVcf)

// Get reader for annotated VCF (just the 8 cols)
BufferedReader annotatedVcfReader    = getBufferedReader(annotatedVcf)

// Get writer for output
BlockCompressedOutputStream outWriter = getBgzipWriter(outputCombinedVcf)

mergeAllLinesOriginalWithAnnotated(originalHugeVcfReader, annotatedVcfReader, outWriter)

originalHugeVcfReader.close()
annotatedVcfReader.close()
outWriter.close()

//------------------------------------------------------
private void printUsage() {
  println("USAGE:")
  println("  merge <originalHugeVcf>  <annotatedVcf>  <outputCombinedVcf>")
  println("  WHERE:")
  println("    - originalHugeVcf  is the original VCF file that may contain a huge number of sample columns")
  println("    - annotatedVcf is the VCF that was cut to 8 columns, then annotated with BioR and has the annotations put into the INFO column")
  println("    - outputCombinedVcf is the output file to write to that will combine the BioR annotations")
  println("      from annotatedVcf with the sample columns from originalHugeVcf (along with any other columns after column 8, such as FORMAT)")
}


//------------------------------------------------------
private void mergeAllLinesOriginalWithAnnotated(BufferedReader originalHugeVcfReader, BufferedReader annotatedVcfReader, BlockCompressedOutputStream outWriter) {

  // NOTE: In the final version, we should add all header metadata lines ("##...") from both the original and annotation files
  //       into a HashSet and sort them by type.  
  // NOTE: The final splitter code should remove these lines are they will not be relevant until merging: ##SAMPLE, ##FORMAT, ##contig (??)
  // .............. 
  // NOTE: The final version of the splitter should add a "BiorLineId=1", etc to each line which we can use in the merge
  //       Instead of trying to have the first 7 columns match

  // Go through each line in annotatedVcf and match it with the line in the originalHugeVcf
  String lineAnnot = null
  String lineOrig   = getFirstNonMetadataLine(originalHugeVcfReader)
  while( (lineAnnot = annotatedVcfReader.readLine()) != null ) {
    println("-------")
    println("annot: " + lineAnnot.substring(0, Math.min(100,lineAnnot.length())))
    println("orig:  " + lineOrig.substring(0, Math.min(100, lineOrig.length())))

    // Write out any header lines immediately and skip them
    if( lineAnnot.startsWith("##") ) {
      outWriter.write( (lineAnnot + "\n").getBytes() )
      continue
    }

    // If the current original file line is NOT the same as the annotated one, then grab the next original line
    while( ! isSameLine(lineAnnot, lineOrig) ) {
      lineOrig = originalHugeVcfReader.readLine()
      println(" origN:" + lineOrig.substring(0, Math.min(100, lineOrig.length())))
    }

    String mergedLine = mergeLine(lineAnnot, lineOrig)
    outWriter.write( (mergedLine + "\n").getBytes() )
  }
} 

//------------------------------------------------------
private boolean isSameLine(String annotLine, String origLine) {
  origLine = removeChrPrefix(origLine)
  boolean isFirst4ColsSame = getFirst4Cols(annotLine).equals(getFirst4Cols(origLine))
  if( ! isFirst4ColsSame )
    return false
  
  // Else, if the first 4 were the same, then check the 5th col (ALTS),
  // which could be broken out into a comma-separated list in origLine,
  // but will be separate alts in annotLine
  String annotCol5 = getCol(annotLine, 5)
  String origCol5  = getCol(origLine, 5)
  if( origCol5.equals(annotCol5) )
    return true;
    
  // Else check if the annotAlt is a subset of the origAlts
  return isAnnotAltASubsetOfOriginal(annotCol5, origCol5)
}

//------------------------------------------------------
private boolean isAnnotAltASubsetOfOriginal(String annotAlt, String origAlts) {
  List<String> origAltList = Arrays.asList(origAlts.split(","))
  return origAltList.contains(annotAlt)
}

//------------------------------------------------------
private String removeChrPrefix(String s) {
  // If the string starts with "chr" then remove that
  if( s.toLowerCase().startsWith("chr") )
    s = s.substring(3)
  return s
}

//------------------------------------------------------
private String getFirstNonMetadataLine(BufferedReader fin) {
  String line = null;
  while( (line = fin.readLine()) != null  &&  line.startsWith("##") ) { }
  return line;
}
    
//------------------------------------------------------
private String getFirst4Cols(String s) {
  return getFirstXCols(s, 4)
}


//------------------------------------------------------
private String getFirst7Cols(String s) {
  return getFirstXCols(s, 7)
}

//------------------------------------------------------
// Get first X columns as a single string
private String getFirstXCols(String s, int numCols) {
  int count = 0;
  int idx = s.indexOf("\t")
  while( idx != -1 ) {
    count++
    if( count == numCols ) 
      return s.substring(0, idx)
    idx = s.indexOf("\t", idx+1)
  }
  // Must be less than numCols columns, so just return whole string
  return s;  
}

//------------------------------------------------------
// Return the 1-based column.  If the column is not found, return ""
//   Ex:  "1 2 3 4", col=3 will return "3"
//   Ex:  "1 2 3 4", col=5 will return ""
private String getCol(String s, int col) {
  int currentCol = 1
  int idxStart = 0
  int idxEnd   = getNextTabIdxOrEnd(s, 0) 
  while( currentCol < col  &&  idxEnd != s.length() ) {
    currentCol++
    idxStart = idxEnd + 1
    idxEnd = getNextTabIdxOrEnd(s, idxStart)
  }
  
  // If the correct column was found, then return it
  if( currentCol == col )
    return s.substring(idxStart, idxEnd)
    
  // Not found, so return ""
  return "";
}

private int getNextTabIdxOrEnd(String s, int start) {
  int idx = s.indexOf("\t", start)
  if( idx == -1 ) 
    return s.length()
  return idx
}

//------------------------------------------------------
// Take the 8th col from lineAnnot, and insert it in as the 8th col in lineOrig
private String mergeLine(String lineAnnot, String lineOrig) {
  String annotInfoCol = getCol(lineAnnot, 8)
  
  // If there was no annotated INFO column, then just return the full original line
  if( annotInfoCol.equals("") )
    return lineOrig
  
  String origFirst7Cols = getFirst7Cols(lineOrig)
  int tabOrig7 = origFirst7Cols.length()
  int tabOrig8 = lineOrig.indexOf("\t", tabOrig7 + 2)
  
  // Only include columns 9-end in the original if there are there, 
  // Else we only want the original 1-7 and the annotation INFO col
  String merged = tabOrig8 != -1  ?
      concat(origFirst7Cols, annotInfoCol, lineOrig.substring(tabOrig8 + 1))  :
      concat(origFirst7Cols, annotInfoCol)
      
  return merged
}



//------------------------------------------------------
// Determine if file is plain-text, gz, or bzip (just from extension), then return appropriate BufferedReader
private BufferedReader getBufferedReader(String filePath) {
  if( filePath.endsWith(".bgz") ) {
    return new BufferedReader(new InputStreamReader(new BlockCompressedInputStream(filePath)));
  } else if( filePath.endsWith(".gz") ) {
	return new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(filePath))))
  } else {
    return new BufferedReader(new InputStreamReader(new FileInputStream(filePath)))
  }
}


//------------------------------------------------------
private BlockCompressedOutputStream getBgzipWriter(String fileOutPath) {
  BlockCompressedOutputStream outStream = new BlockCompressedOutputStream(fileOutPath)
  return outStream
}


//=============================================================================================


private void runTestSuite() {
  testConcat()
  testCol()
  testFirst7Cols()
  testMergeLine()
  testSameLine()
  println("-------------------------------------")
  println("SUCCESS!  ALL TESTS PASSED!")
  println("-------------------------------------")
  System.exit(0)
}

private String testConcat() {
  assertEquals("1", concat("1"))
  assertEquals("1\t2", concat("1", "2"))
  assertEquals("1\t2\t3\t4\t55555", concat("1", "2", "3", "4", "55555"))
}

private String testCol() {
  assertEquals("a", getCol("a", 1))
  assertEquals("",  getCol("a", 0))
  assertEquals("",  getCol("a", 2))
  assertEquals("A", getCol("A\tB", 1))
  assertEquals("B", getCol("A\tB", 2))
  assertEquals("",  getCol("A\tB", 3))
  assertEquals("A", getCol("A\tB\tC\tD", 1))
  assertEquals("B", getCol("A\tB\tC\tD", 2))
  assertEquals("C", getCol("A\tB\tC\tD", 3))
  assertEquals("D", getCol("A\tB\tC\tD", 4))
  assertEquals("",  getCol("A\tB\tC\tD", 5))
}

private void testFirst7Cols() {
  assertEquals("1", getFirst7Cols("1"))
  assertEquals("##12345678", getFirst7Cols("##12345678"))
  assertEquals("1\t2\t3\t4\t5\t6\t7", getFirst7Cols("1\t2\t3\t4\t5\t6\t7"))
  assertEquals("1\t2\t3\t4\t5\t6\t7", getFirst7Cols("1\t2\t3\t4\t5\t6\t7\t8"))
  assertEquals("1\t2\t3\t4\t5\t6\t7", getFirst7Cols("1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t11\t12"))
}

private void testMergeLine() {
  assertEquals("1\t2\t3\t4\5", mergeLine("##1", "1\t2\t3\t4\5"))

  assertEquals(concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT", "SAMPLE1"),
     mergeLine(concat("1",      "2",   "3",  "4",   "5",   "6",    "7",      "INFO", "9",      "10"),
               concat("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "XXXX", "FORMAT", "SAMPLE1")))

  assertEquals(concat("1", "100", "rs1", "A", "C", ".", ".", "AF=0.24;AC=3"),
     mergeLine(concat("a", "b",   "c",   "d", "e", "f", "g", "AF=0.24;AC=3"),
               concat("1", "100", "rs1", "A", "C", ".", ".", ".")))

  assertEquals(concat("1", "100", "rs1", "A", "C", ".", ".", "AF=0.24;AC=3", "format", "sample1", "sample2", "sample3", "sample4", "sample5"),
     mergeLine(concat("a", "b",   "c",   "d", "e", "f", "g", "AF=0.24;AC=3", "5",      ".|."),
               concat("1", "100", "rs1", "A", "C", ".", ".", ".",            "format", "sample1", "sample2", "sample3", "sample4", "sample5")))
}

private void testSameLine() {
  // Exact match
  assertTrue(isSameLine(
  		concat("1", "100", "rs1", "A", "C", "0.0", "vx=0", "AC=3"),
  		concat("1", "100", "rs1", "A", "C", "0.0", "vx=0", "AC=3") ))

  // Match, but with "chr" prefix, and differences in non-essential cols
  assertTrue(isSameLine(
  		concat("1",   "100", "rs1", "A", "C", "0.0",  "vx=0",  "AC=3"),
  		concat("chr1","100", "rs1", "A", "C", "0.00", "vx=0.0","AC=3.0") ))

  // Match, but with "chr" prefix, and ALTs subset
  assertTrue(isSameLine(
  		concat("1",   "100", "rs1", "A", "C",   "0.0",  "vx=0",  "AC=3"),
  		concat("chr1","100", "rs1", "A", "C,G", "0.00", "vx=0.0","AC=3.0") ))

  // NO Match, because ALT is not in the original ALTs set
  assertFalse(isSameLine(
  		concat("1",   "100", "rs1", "A", "A",   "0.0",  "vx=0",  "AC=3"),
  		concat("chr1","100", "rs1", "A", "C,G", "0.00", "vx=0.0","AC=3.0") ))
}

private String concat(String... s) {
  StringBuilder str = new StringBuilder()
  for(int i=0; i < s.length; i++) {
    if( i > 0 )
      str.append("\t")
    str.append(s[i])
  }
  return str.toString()
}
  