package edu.mayo.bior.catalog.fasta2ctg;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;

import edu.mayo.bior.pipeline.createcatalog.TabixCmd;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.BlockCompressedOutputStream;

/** Given a fasta reference assembly file in the format:
 *    >chr1
 *    NNNNNNACGATACGA...
 *  Convert this to a catalog with tabix index
 * @author Michael Meiners (m054457)
 */
public class FastaToCatalog {

	public static void printUsage() {
		System.out.println("Converts a ref assembly fasta file into a BioR catalog");
		System.out.println("USAGE:");
		System.out.println("  _bior_fasta_to_catalog  <RefFastaInput>  <NumBasePairsPerLine>  <CatalogOutFile>");
		System.out.println("  WHERE:");
		System.out.println("    RefFastaInput is a plain-text fasta file in the format:");
		System.out.println("      >chr1");
		System.out.println("      NNNNNNNNNACTGCTAAACCTTTTAGGAAGAGAGAGAAAACCCCCGGGGG");
		System.out.println("      CTGAAAGGTACTGCTAAACCTTTTAGGAAGAGAGAGAAAACCCCCGGGGG");
		System.out.println("      >chr2");
		System.out.println("      ...");
		System.out.println("    NumBasePairsPerLine is the number of base-pairs to add to each line in the catalog.  For example, if you want 10 base-pairs per line:");
		System.out.println("      1  1  10  NNNNNNNNNA");
		System.out.println("      1  11 20  CTGCTAAACC");
		System.out.println("      ...");
		System.out.println("    CatalogOutFile is the new catalog to create, with extension .tsv.bgz");
		System.out.println("      A tabix index will be created for this catalog");
	}
	
	public static void main(String[] args) {
		if ( args.length != 3 ) {
			printUsage();
			System.exit(0);
		}

		try {
			new FastaToCatalog().fastaToCatalog(args[0], Integer.parseInt(args[1]), args[2]);
		} catch(Exception e) {
			e.printStackTrace();
		}
	}

	public void fastaToCatalog(String fastaRefAssemblyPath, int numBasePairsPerLine, String ctgOutPath) throws Exception {
		if( ! ctgOutPath.endsWith(".tsv.bgz") ) {
			throw new Exception("Catalog output file shoudl end in .tsv.bgz");
		}
		
		BufferedReader fin = null;
		if( fastaRefAssemblyPath.endsWith(".gz")  ||  fastaRefAssemblyPath.endsWith(".bgz") ) {
			fin = new BufferedReader(new InputStreamReader(new BlockCompressedInputStream(new File(ctgOutPath))));
		} else {
			fin = new BufferedReader(new FileReader(new File(fastaRefAssemblyPath)));
		}
		BlockCompressedOutputStream fout = new BlockCompressedOutputStream(new File(ctgOutPath));
		String line = null;
		String chrom = "";
		long pos = 1;
		StringBuilder str = new StringBuilder();
		while( (line = fin.readLine()) != null ) {
			if( line.startsWith(";") ) {
				continue;
			} else if( line.startsWith(">") ) {
				// if there is a chromosome already defined, then we need to write out the previous chromosome's lines
				if( chrom != null  &&  chrom.length() > 0 ) {
					pos += writeOut(fout, chrom, pos, str, numBasePairsPerLine, /*isForce=*/true);
				}
			    pos = 1;
			    chrom = line.replace(">","").replace("chr","");
			    str = new StringBuilder();
			} else {
				str.append(line);
				pos += writeOut(fout, chrom, pos, str, numBasePairsPerLine, /*isForce=*/false);
			}
		}
		if( str.length() > 0 ) {
			pos += writeOut(fout, chrom, pos, str, numBasePairsPerLine, /*isForce=*/true);
		}
		fin.close();
		fout.close();
		TabixCmd.createTabixIndex(ctgOutPath);
	}
	
	/** Write out a line to the catalog with the correct size 
	 * @throws IOException */
	private int writeOut(BlockCompressedOutputStream fout, String chrom, long pos, StringBuilder str, int numBPsPerLine, boolean isForce) throws IOException {
		int numBPsWritten = 0;
		while( (isForce ? str.length() > 0  :  str.length() >= numBPsPerLine) ) {
			// Write out up to numBPsPerLine, delete that amount from the StringBuilder
			int numBPsToWrite = (str.length() > numBPsPerLine ?  numBPsPerLine  :  str.length());
			long end = (pos - 1) + numBPsToWrite;
			String line = chrom + "\t" + pos + "\t" + end + "\t" + str.substring(0, numBPsToWrite).toUpperCase() + "\n";
			fout.write(line.getBytes());
			str.delete(0, numBPsToWrite);
			numBPsWritten += numBPsToWrite;
			pos = end + 1;
		}
		return numBPsWritten;
	} 


}
