package edu.mayo.bior.pipeline.createcatalog;


import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertFalse;
import static junit.framework.Assert.assertTrue;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.AppenderSkeleton;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.WriterAppender;
import org.apache.log4j.spi.LoggingEvent;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

import edu.mayo.bior.catalog.CatalogMetadataConstant;
import edu.mayo.bior.catalog.HumanBuildAssembly;
import edu.mayo.bior.cli.func.BaseFunctionalTest;
import edu.mayo.bior.pipeline.createcatalog.TjsonToCatalog.FileType;
import htsjdk.tribble.readers.TabixReader;

public class TjsonToCatalogTest  extends BaseFunctionalTest
{
   private static final String NL = System.getProperty("line.separator");

   private File tempDir;
   
   @Rule
   public TemporaryFolder temporaryFolder = new TemporaryFolder();
   
   @Before
   public void beforeEach() throws IOException {
	   tempDir = temporaryFolder.newFolder();
   }

   @Test
   public void testBadTabixIndexesBug490565() throws IOException, InterruptedException
   {
      String json = "{'_landmark':'1','_minBP':100,'_maxBP':101,'key':1,'key2':'val'}";
      File f = temporaryFolder.newFile();
      FileUtils.write(f, swapQuotes(json));
      TjsonToCatalog catalogMaker = new TjsonToCatalog();
      File catalogFile = new File(temporaryFolder.getRoot(), "catalog" + CatalogMetadataConstant.CATALOG_FILE_SUFFIX);
      File catalogTabixIndexFile = new File(catalogFile.getPath() + ".tbi");
      catalogMaker.createCatalog(f.getPath(), catalogFile.getPath(), /*isSort=*/false, /*chromSortOrder=*/null,
    		  /*jsonCol=*/1, /*isJsonOnly=*/false, /*tempDir=*/null, /*buildAssembly=*/null, /*isModifyChrom=*/false, /*isInFinalFormat=*/false);
      assertTrue(catalogFile.exists());
      assertTrue(catalogTabixIndexFile.exists());
      TabixReader reader = new TabixReader(catalogFile.getPath());

      // If the bug is there from just making an index on the chr and min coordinate, this should fail
      assertEquals(1, recordCountFromQuery(reader, "1:101-101"));

      // test a few other queries
      assertEquals(1, recordCountFromQuery(reader, "1:100-100"));
      assertEquals(1, recordCountFromQuery(reader, "1:100-101"));
      assertEquals(1, recordCountFromQuery(reader, "1:99-105"));
      assertEquals(0, recordCountFromQuery(reader, "1:98-98"));
      assertEquals(0, recordCountFromQuery(reader, "2:99-105"));

      json = "{'_landmark':'1','_minBP':1,'_maxBP':1,'key1':'str','key2':1}" + NL +
             "{'_landmark':'1','_minBP':2,'_maxBP':2,'key3':'str','key4':1}" + NL +
             "{'_landmark':'1','_minBP':3,'_maxBP':3,'key5':'str','key6':1}";

      f = temporaryFolder.newFile();
      FileUtils.write(f, swapQuotes(json));
      catalogFile = new File(temporaryFolder.getRoot(), "catalog2" + CatalogMetadataConstant.CATALOG_FILE_SUFFIX);
      catalogTabixIndexFile = new File(catalogFile.getPath() + ".tbi");
      catalogMaker.createCatalog(f.getPath(), catalogFile.getPath(), false, null, 1, false, null, null, false, /*isInFinalFormat=*/false);
      assertTrue(catalogFile.exists());
      assertTrue(catalogTabixIndexFile.exists());
      reader = new TabixReader(catalogFile.getPath());

      assertEquals(1, recordCountFromQuery(reader, "1:1-1"));
      assertEquals(1, recordCountFromQuery(reader, "1:2-2"));
      assertEquals(1, recordCountFromQuery(reader, "1:3-3"));
      assertEquals(2, recordCountFromQuery(reader, "1:1-2"));
      assertEquals(3, recordCountFromQuery(reader, "1:1-3"));
      assertEquals(2, recordCountFromQuery(reader, "1:2-3"));
   }
   
   @Test
   public void createCatalogUsingGzipTempFiles() throws IOException, InterruptedException {
	   File tempInputFileBgzip = temporaryFolder.newFile("input.bgz");
	   File tempOutFileBgzip   = temporaryFolder.newFile("output.bgz");

	   String s1 = concat("1", "100", "100", swapQuotes("{'_landmark':'1','_minBP':100,'_maxBP':100,'_refAllele':'A','_altAlleles':['C','G']}"));
	   String s2 = concat("2", "200", "200", swapQuotes("{'_landmark':'2','_minBP':200,'_maxBP':200,'_refAllele':'A','_altAlleles':['C','G']}"));
	   String s3 = concat("3", "300", "300", swapQuotes("{'_landmark':'3','_minBP':300,'_maxBP':300,'_refAllele':'A','_altAlleles':['C','G']}"));
	   String s4 = concat("3", "300", "300", swapQuotes("{'_landmark':'3','_minBP':300,'_maxBP':300,'_refAllele':'C','_altAlleles':['A','G']}"));
	   String s5 = concat("5", "500", "500", swapQuotes("{'_landmark':'5','_minBP':500,'_maxBP':500,'_refAllele':'A','_altAlleles':['C','G']}"));
	   // Put them in the file out-of-order so we can sort them, and write to bgzip file
	   writeBgzip(Arrays.asList(s1,s3, s5, s4, s2), tempInputFileBgzip);

	   TjsonToCatalog catalogMaker = new TjsonToCatalog();
	   
	   // Now produce the sorted catalog
	   catalogMaker.createCatalog(
			   tempInputFileBgzip.getCanonicalPath(),
			   tempOutFileBgzip.getCanonicalPath(),
			   /*isSort=*/true,
			   /*chromSortOrderFilePath=*/"src/main/resources/humanChromosomesSortedByName.GRCh37.txt",
			   /*jsonCol=*/4,
			   /*isJsonOnly=*/false,
			   this.tempDir.getCanonicalPath(),
			   HumanBuildAssembly.GRCh37,
			   /*isModifyChrom=*/true,
			   /*isInFinalFormat=*/false
			   );

	   List<String> strList = readBgzip(tempOutFileBgzip);
	   assertEquals(5,  strList.size());
	   assertEquals(s1, strList.get(0));
	   assertEquals(s2, strList.get(1));
	   assertEquals(s3, strList.get(2));
	   assertEquals(s4, strList.get(3));
	   assertEquals(s5, strList.get(4));
   }
   
   
   @Test
   /** Test a make_json script that would bgzip the output file and have it in the final catalog format so we 
    *  can skip the step that extracts the tabix columns, and skip bgzipping of the file 
    * @throws IOException
    * @throws InterruptedException
    */
   public void createCatalog_InFinalFormat() throws IOException, InterruptedException {
	   File tempInputFileBgzip = temporaryFolder.newFile("input.bgz");
	   File tempOutFileBgzip   = temporaryFolder.newFile("output.bgz");

	   
	   String s1 = concat("1", "100", "100", swapQuotes("{'_landmark':'1','_minBP':100,'_maxBP':100,'_refAllele':'A','_altAlleles':['C','G']}"));
	   String s2 = concat("2", "200", "200", swapQuotes("{'_landmark':'2','_minBP':200,'_maxBP':200,'_refAllele':'A','_altAlleles':['C','G']}"));
	   String s3 = concat("3", "300", "300", swapQuotes("{'_landmark':'3','_minBP':300,'_maxBP':300,'_refAllele':'A','_altAlleles':['C','G']}"));
	   String s4 = concat("3", "300", "300", swapQuotes("{'_landmark':'3','_minBP':300,'_maxBP':300,'_refAllele':'C','_altAlleles':['A','G']}"));
	   String s5 = concat("5", "500", "500", swapQuotes("{'_landmark':'5','_minBP':500,'_maxBP':500,'_refAllele':'A','_altAlleles':['C','G']}"));
	   writeBgzip(Arrays.asList(s1,s2, s3, s4, s5), tempInputFileBgzip);

	   TjsonToCatalog catalogMaker = new TjsonToCatalog();
	   
	   // NOTE: isSort must be FALSE to use isInFinalFormat=true
	   boolean isSort = false;
	   boolean isInFinalFormat = true;
	   
	   // Capture log messages
	   // See: https://stackoverflow.com/questions/1827677/how-to-do-a-junit-assert-on-a-message-in-a-logger
	   TestAppender appender = setupTestLogAppender();
	   
	   
	   // Now produce the sorted catalog
	   catalogMaker.createCatalog(
			   tempInputFileBgzip.getCanonicalPath(),
			   tempOutFileBgzip.getCanonicalPath(),
			   isSort,
			   /*chromSortOrderFilePath=*/"src/main/resources/humanChromosomesSortedByName.GRCh37.txt",
			   /*jsonCol=*/4,
			   /*isJsonOnly=*/false,
			   this.tempDir.getCanonicalPath(),
			   HumanBuildAssembly.GRCh37,
			   /*isModifyChrom=*/true,
			   isInFinalFormat
			   );

	   List<String> logMsgs = getLogMessages(appender);
	   String logMsgsAll = StringUtils.join(logMsgs, "\n");
	   assertTrue(logMsgsAll.contains("The output from make_json is already BGZipped and in the correct format, so just copy the file to the final output location"));
	   assertTrue(logMsgsAll.contains("No sort will be performed on the catalog."));
	   assertFalse(logMsgsAll.contains("Extract the first 3 tabix columns (if needed), and format the catalog lines, then write to bgzip output"));
       
	   List<String> strList = readBgzip(tempOutFileBgzip);
	   assertEquals(5,  strList.size());
	   assertEquals(s1, strList.get(0));
	   assertEquals(s2, strList.get(1));
	   assertEquals(s3, strList.get(2));
	   assertEquals(s4, strList.get(3));
	   assertEquals(s5, strList.get(4));
   }
   
   
   private List<String> getLogMessages(TestAppender appender) {
       final List<LoggingEvent> log = appender.getLog();
       List<String> msgs = new ArrayList<String>();
       for(int i=0; i < log.size(); i++) {
    	   msgs.add(log.get(i).getLevel() + ": " + log.get(i).getMessage());
       }
       return msgs;
   }

private TestAppender setupTestLogAppender() {
	   final TestAppender appender = new TestAppender();
       final Logger logger = Logger.getRootLogger();
       logger.addAppender(appender);
       try {
           Logger.getLogger(TjsonToCatalog.class).info("Test");
       }
       finally {
           //logger.removeAppender(appender);
       }
       return appender;
   }


class TestAppender extends AppenderSkeleton {
	    private final List<LoggingEvent> log = new ArrayList<LoggingEvent>();

	    @Override
	    public boolean requiresLayout() {
	        return false;
	    }

	    @Override
	    protected void append(final LoggingEvent loggingEvent) {
	        log.add(loggingEvent);
	    }

	    @Override
	    public void close() {
	    }

	    public List<LoggingEvent> getLog() {
	        return new ArrayList<LoggingEvent>(log);
	    }
	}
   
   @Test
   public void testCompareLines() {
	   String line1 = swapQuotes(concat("1",  "10055",  "10055", "{'CHROM':'1','POS':10055,'ID':'rs768019142','REF':'T','ALT':'C,G','_landmark':'1','_minBP':10055,'_maxBP':10055,'_refAllele':'T','_altAlleles':['C','G']}"));
	   // line2 BEFORE line1: Alt alleles should come before line1
	   String line2 = swapQuotes(concat("1",  "10055",  "10055", "{'CHROM':'1','POS':10055,'ID':'rs768019142','REF':'T','ALT':'A,G','_landmark':'1','_minBP':10055,'_maxBP':10055,'_refAllele':'T','_altAlleles':['A','G']}"));
	   // line3 BEFORE line1: Alt alleles should come before line1
	   String line3 = swapQuotes(concat("1",  "10055",  "10055", "{'CHROM':'1','POS':10055,'ID':'rs768019142','REF':'T','ALT':'AAA,GGG','_landmark':'1','_minBP':10055,'_maxBP':10055,'_refAllele':'T','_altAlleles':['AAA','GGG']}"));
	   // line4 AFTER line1:  Alt alleles should come AFTER line1
	   String line4 = swapQuotes(concat("1",  "10055",  "10055", "{'CHROM':'1','POS':10055,'ID':'rs768019142','REF':'T','ALT':'CCC,GGG','_landmark':'1','_minBP':10055,'_maxBP':10055,'_refAllele':'T','_altAlleles':['CCC','GGG']}"));
	   // line5 AFTER line1: Alt alleles should come AFTER line1
	   String line5 = swapQuotes(concat("1",  "10055",  "10055", "{'CHROM':'1','POS':10055,'ID':'rs768019142','REF':'T','ALT':'TTT,GGG','_landmark':'1','_minBP':10055,'_maxBP':10055,'_refAllele':'T','_altAlleles':['TTT','GGG']}"));
	   // line6 BEFORE line1: Ref allele comes before first line1
	   String line6 = swapQuotes(concat("1",  "10055",  "10055", "{'CHROM':'1','POS':10055,'ID':'rs768019142','REF':'A','ALT':'C,G','_landmark':'1','_minBP':10055,'_maxBP':10055,'_refAllele':'A','_altAlleles':['C','G']}"));
	   // line7 AFTER line1:  MaxBP is after line1's even tho the ref is before line1's
	   String line7 = swapQuotes(concat("1",  "10055",  "10057", "{'CHROM':'1','POS':10055,'ID':'rs768019142','REF':'AAA','ALT':'C,G','_landmark':'1','_minBP':10055,'_maxBP':10057,'_refAllele':'AAA','_altAlleles':['C','G']}"));
	   // line8 BEFORE line1: MinBP is before line1's
	   String line8 = swapQuotes(concat("1",  "10054",  "10054", "{'CHROM':'1','POS':10054,'ID':'rs768019142','REF':'T','ALT':'C,G','_landmark':'1','_minBP':10054,'_maxBP':10054,'_refAllele':'T','_altAlleles':['C','G']}"));
	   // line9 AFTER line1:  Chrom is AFTER line1's even though everything else is before
	   String line9 = swapQuotes(concat("2",  "100",    "100",   "{'CHROM':'1','POS':100,'ID':'rs768019142','REF':'C','ALT':'A,G','_landmark':'2','_minBP':100,'_maxBP':100,'_refAllele':'C','_altAlleles':['A','G']}"));
	   // line10 BEFORE line1: Whole line is before line1's since we chrom,min,max,ref,alt are same, but rsId is different and before line1's (on whole line comparison)
	   String line10= swapQuotes(concat("1",  "10055",  "10055", "{'CHROM':'1','POS':10055,'ID':'rs111111111','REF':'T','ALT':'C,G','_landmark':'1','_minBP':10055,'_maxBP':10055,'_refAllele':'T','_altAlleles':['C','G']}"));
	   // line11 is exactly the same as line1: drops thru to whole line comparison, which is still the same
	   String line11= swapQuotes(concat("1",  "10055",  "10055", "{'CHROM':'1','POS':10055,'ID':'rs768019142','REF':'T','ALT':'C,G','_landmark':'1','_minBP':10055,'_maxBP':10055,'_refAllele':'T','_altAlleles':['C','G']}"));

	   TjsonToCatalog tjsonToCtg = new TjsonToCatalog();
	   assertTrue(tjsonToCtg.compareLines(line2, line1) < 0);    // Line2 BEFORE line1
	   assertTrue(tjsonToCtg.compareLines(line3, line1) < 0);    // Line3 BEFORE line1
	   assertTrue(tjsonToCtg.compareLines(line4, line1)   > 0);  // Line4 after  line1
	   assertTrue(tjsonToCtg.compareLines(line5, line1)   > 0);  // Line5 after  line1
	   assertTrue(tjsonToCtg.compareLines(line6, line1) < 0);    // Line6 BEFORE line1
	   assertTrue(tjsonToCtg.compareLines(line7, line1)   > 0);  // Line7 after  line1
	   assertTrue(tjsonToCtg.compareLines(line8, line1) < 0);    // Line8 BEFORE line1
	   assertTrue(tjsonToCtg.compareLines(line9, line1)   > 0);  // Line9 after  line1
	   assertTrue(tjsonToCtg.compareLines(line10,line1) < 0);    // Line10 BEFORE  line1
	   assertTrue(tjsonToCtg.compareLines(line11,line1)     == 0); // Line11 SAME AS line1
   }

   
   @Test
   public void testFileTypes() throws IOException {
	   // Try with files with extesion matching actual file type
	   File tsvFile = new File("src/test/resources/ref_assembly/GRCh37/genome_GRCh37.tsv");
	   File gzFile  = new File("src/test/resources/dbsnp20k.vcf.gz");
	   File bgzFile = new File("src/test/resources/genes.tsv.bgz");
	   TjsonToCatalog tjsonToCtg = new TjsonToCatalog();
	   assertEquals(FileType.TEXT, tjsonToCtg.getFileType(tsvFile));
	   assertEquals(FileType.GZIP,      tjsonToCtg.getFileType(gzFile));
	   assertEquals(FileType.BGZIP,     tjsonToCtg.getFileType(bgzFile));

	   temporaryFolder.create();
	   File tempDir = temporaryFolder.newFolder();

	   // Now try with mismatches
	   // TSV as GZ and BGZ ---------------------------------
	   assertEquals(FileType.TEXT, tjsonToCtg.getFileType(copyFileAndAddExt(tsvFile, tempDir, "gz")));
	   assertEquals(FileType.TEXT, tjsonToCtg.getFileType(copyFileAndAddExt(tsvFile, tempDir, "bgz")));
   
	   // GZ as TSV and BGZ ---------------------------------
	   assertEquals(FileType.GZIP,      tjsonToCtg.getFileType(copyFileAndAddExt(gzFile,  tempDir, "tsv")));
	   assertEquals(FileType.GZIP,      tjsonToCtg.getFileType(copyFileAndAddExt(gzFile,  tempDir, "bgz")));

	   // BGZ as TSV and GZ ---------------------------------
	   assertEquals(FileType.BGZIP,     tjsonToCtg.getFileType(copyFileAndAddExt(bgzFile,  tempDir, "tsv")));
	   assertEquals(FileType.BGZIP,     tjsonToCtg.getFileType(copyFileAndAddExt(bgzFile,  tempDir, "gz")));
   }
   
   /** Test against large files, as sometimes attempting to read a large file will cause an OutOfMemoryError.
    *  NOTE: This requires a connection to the /data5 folder on RCF systems */
   @Ignore
   @Test
   public void testFileTypesLarge() {
	   File bgzip = new File("/data5/bsi/BIOR/Temp/dbNSFP_build_2016Dec/DbnsfpCtgOut/TestPerformanceImprovementsFull/dbNSFPv3.0a.zip.tsv.bgz"); // 21GB
	   File gzip  = new File("/data5/bsi/refdata-new/dbnsfp/human/variant/latest/downloaded/2016_12_22/dbNSFP3.3a_variant.chr1.gz"); // 1.6GB
	   File text  = new File("/data5/bsi/BIOR/Temp/dbSNP_142_GRCh37_p13_recreateCatalogInProdSpace/this.5col.txt");  // 2.8GB
	   
	   TjsonToCatalog tjsonToCtg = new TjsonToCatalog();
	   assertTrue(tjsonToCtg.isFileBgzip(bgzip));
	   // NOTE: A Bgzip file is also a Gzip file!
	   assertTrue(tjsonToCtg.isFileGzip(bgzip));
	   assertEquals(FileType.BGZIP, tjsonToCtg.getFileType(bgzip));

	   assertFalse(tjsonToCtg.isFileBgzip(gzip));
	   assertTrue( tjsonToCtg.isFileGzip(gzip));
	   assertEquals(FileType.GZIP, tjsonToCtg.getFileType(gzip));

	   assertFalse(tjsonToCtg.isFileBgzip(text));
	   assertFalse(tjsonToCtg.isFileGzip(text));
	   assertEquals(FileType.TEXT, tjsonToCtg.getFileType(text));
   }
   
   private File copyFileAndAddExt(File file, File tempDir, String extensionToAppend) throws IOException {
	   File fout = new File(tempDir, "file." + extensionToAppend);
	   FileUtils.copyFile(file, fout);
	   return fout;
   }
   
   //---------------------------------------------------------------------------------------
   
   public int recordCountFromQuery(TabixReader reader, String query) throws IOException
   {
      TabixReader.Iterator iter = reader.query(query);
      int count = 0;
      while (iter.next() != null)
      {
         count++;
      }
      return count;
   }  

}
