package edu.mayo.bior.catalog.verification;

import java.io.IOException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import htsjdk.tribble.readers.TabixReader;
import htsjdk.tribble.readers.TabixReader.Iterator;

/**
 * Use TabixReader to lookup the current sequence in the ref assembly catalog.
 * The seq from the last search will be kept so that the next position may 
 * not have to query the ref assembly catalog again if it is close in proximity.
 * Use the ref assembly catalog that has large lines (ex: 100k), to reduce queries to the catalog
 * @author Michael Meiners (m054457), Nov 2016
 */
public class SeqLookup
{
   private static final Logger sLogger = LoggerFactory.getLogger(SeqLookup.class);
   
   private String refAssemblyCatalogPath;
   private TabixReader tabixReader = null;
   private String chromCached;
   private long   minCached;
   private long   maxCached;
   private StringBuilder seqCached;
   
   public SeqLookup(String refAssemblyCatalog) throws IOException {
	   this.refAssemblyCatalogPath = refAssemblyCatalog;
	   tabixReader = new TabixReader(refAssemblyCatalog);
	   sLogger.info("SeqLookup: refAssemblyCatalogPath = " + this.refAssemblyCatalogPath);
   }
   
   public String getRefSeq(String chrom, long min, long max) throws NumberFormatException, IOException {
	   updateCacheAsNeeded(chrom, min, max);
	   
	   verifyWithinRange(chrom, min, max);
	   
	   return getSubSeqFromCache(chrom, min, max);
   }
 
   

   private void verifyWithinRange(String chrom, long min, long max) {
	   if( ! isSameChrom(chrom) ) {
		   throw new IllegalArgumentException("Reference sequence lookup: Chromosome does not match the cached chromosome");
	   }
	   if( ! isWithinRange(min, max) ) {
		   throw new IllegalArgumentException("Reference sequence lookup: Range is not within the cached range");
	   }   
	}


   private boolean isSameChrom(String chrom) {
	   return chrom != null  &&  chrom.equals(this.chromCached);
   }
   
   private boolean isWithinRange(long min, long max) {
	   return min >= this.minCached  &&  max <= this.maxCached;
   }


   private void updateCacheAsNeeded(String chrom, long min, long max) throws NumberFormatException, IOException {
	   boolean isWithinCache = chrom.equals(this.chromCached)  && min >= this.minCached  && max <= this.maxCached;
	   
	   // If it is within the cache, just return
	   if( isWithinCache )
		   return;
	   
	   // Else, we need to submit another tabix query and merge potentially multiple result lines into one sequence
	   String tabixQuery = chrom + ":" + min + "-" + max;
	   Iterator tabixIterator = tabixReader.query(tabixQuery);
	   String line = null;
	   this.chromCached = "";
	   this.minCached = -1;
	   this.maxCached = -1;
	   this.seqCached = new StringBuilder();
	   // Get back the ref assembly catalog line (ex:  "1  101 200 ACTGA...")
	   // Could be multiples
	   while( (line = tabixIterator.next()) != null ) {
		   String[] cols = line.split("\t");
		   this.chromCached = cols[0];
		   if(this.minCached == -1) {
			   this.minCached = Long.parseLong(cols[1]);
		   }
		   this.maxCached = Long.parseLong(cols[2]);
		   this.seqCached.append(cols[3]);
	   }
	   
	   if( this.chromCached.length() == 0  ||  this.minCached == -1  || this.maxCached == -1  ||  this.seqCached.length() == 0 )
		   throw new IllegalArgumentException("Range [" + tabixQuery + "] could not be retrieved from ref assembly catalog.  "
				   + "This may mean you have the wrong ref catalog assembly for the data you are verifying.");
   }
   
   /** Get the subseq within the cached seq.  Ex: 
    *     CachedSeq:  1  101  110  ACGTACGTAC
    *     Range:      1  102  104   ^^^
    *     (start = 102-101 = 1)
    *     (end   = start + (max-min) + 1 = 1 + (104-102) + 1 = 1 + 2 = 4)
    * @param chrom
    * @param min
    * @param max
    * @return
    */
   private String getSubSeqFromCache(String chrom, long min, long max) {
	   int start = (int)(min-minCached);
	   int end   = (int)(start + (max-min) + 1);
	   return seqCached.substring(start, end);
   }
	   
}

