package edu.mayo.bior.catalog.list;

import java.text.SimpleDateFormat;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Properties;

import edu.mayo.bior.catalog.list.CatalogInfo.CatalogStatus;


/**
Crawls the given catalog parent directory (use $BIOR_CATALOG in most cases)
and builds tab-delimited output to STDOUT that contains key information from each of the catalogs.

See the usage() method for more info

 @author Michael Meiners (m054457) - July, 2016

 */
public class CatalogListGenerator {

	public static void usage() {
		System.out.println(
				"\n" +
				"_bior_catalog_list  <CATALOG_DIR>\n" +
				"\n" +
				"This class crawls the given catalog parent directory (use $BIOR_CATALOG in most cases)\n" +
						"and builds tab-delimited output to STDOUT that contains key information from each of the catalogs, such as:\n" +
						"    - Status       (ex: Active  or  Deprecated)\n" +
						"    - Path         (ex: /research/bsi/data/catalogs/bior/v1/dbSNP/142_GRCh37.p13/variants_nodups.v2/00-All.vcf.tsv.bgz)\n" +
						"    - LastUpdated  (ex: 2015-11-16 09:39:54)\n" +
						"    - ShortName    (ex: dbSNP_142_GRCh37p13)\n" +
						"    - Indexes      (ex: GENE, ID, RSID)\n" +
						"    - DataSource:  (ex: dbSNP)\n" +
						"    - Version      (ex: 142)\n" +
						"    - Build        (ex: GRCh37.p13)\n" +
						"    - BuildBase    (ex: GRCh37)\n" +
						"    - Dataset      (ex: Variants)\n" +
						"\n" +
						"This output is used to build the $BIOR_CATALOG/CATALOG_LIST.txt file.\n"
				);
	}


	public static void main(String[] args) {
		if(args.length != 1) {
			usage();
			return;
		}

		String catalogDirPathToCrawl = args[0];

		try {
			String output = new CatalogListGenerator().createCatalogList(catalogDirPathToCrawl);
			System.out.println(output);
		} catch(Exception e) {
			e.printStackTrace();
		}
	}


	public String createCatalogList(String catalogDirPathToCrawl) throws IOException {
		File catalogDir = new File(catalogDirPathToCrawl);
		if( ! catalogDir.exists() ) {
			System.err.println("Directory does not exist: " + catalogDirPathToCrawl);
			return "";
		}
		if( ! catalogDir.isDirectory() ) {
			System.err.println("Path is not a directory: " + catalogDirPathToCrawl);
			return "";
		}
			
		
		String canonicalPath = catalogDir.getCanonicalPath();

		// These messages should NOT appear in the output
		System.err.println("Catalog path: " + catalogDirPathToCrawl);
		System.err.println("(canonical):  " + canonicalPath);

		List<File> allCatalogs = getCatalogs(new File(canonicalPath));
		List<File> catalogs = removeNonCatalogDirs(removeDuplicates(allCatalogs));
		sortCatalogs(catalogs);

		StringBuilder out = new StringBuilder();
		out.append("#Status" 	 + "\t"
				+  "Path"    	 + "\t"
				+  "LastUpdated" + "\t"
				+  "ShortName"   + "\t"
				+  "Indexes"     + "\t"
				+  "DataSource"  + "\t"
				+  "Version"     + "\t"
				+  "Build"       + "\t"
				+  "BuildBase"   + "\t"
				+  "Dataset"     + "\n");
		for(File catalog : catalogs) {
			CatalogInfo catalogInfo = getCatalogInfo(catalog);
			out.append(catalogInfo + "\n");
		}
		return out.toString();
	}


	//=================================================================


	private CatalogInfo  getCatalogInfo(File catalogFile) throws IOException {
		CatalogInfo catalogInfo = new CatalogInfo();

		catalogInfo.status = getStatus(catalogFile);
		catalogInfo.path   = catalogFile.getCanonicalPath();
		SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
		catalogInfo.lastUpdated = dateFormat.format(catalogFile.lastModified());

		Properties props = getCatalogDatasourceProps(catalogFile);
		catalogInfo.shortName = emptyIfNull(props.getProperty("ShortUniqueName"));
		catalogInfo.indexes   = getIndexes(catalogFile);
		catalogInfo.dataSource= emptyIfNull(props.getProperty("Source"));
		catalogInfo.version   = emptyIfNull(props.getProperty("Version"));
		catalogInfo.build     = truncateAfterFirstWord(emptyIfNull(props.getProperty("Build")));
		catalogInfo.buildBase = getBuildBase(catalogInfo.build);
		catalogInfo.dataset   = emptyIfNull(props.getProperty("Dataset"));

		return catalogInfo;
	}

	private String emptyIfNull(String s) {
		if( s == null )
			return "";
		return s;
	}


	/** Truncate a string after the first word.  
	 *  Ex: "GRCh37 (lifted over from hg18)" ==> "GRCh37"
	 */
	private String truncateAfterFirstWord(String str) {
		int idxSpace = str.indexOf(" ");
		if( idxSpace == -1 )
			return str;
		return str.substring(0, idxSpace).trim();
	}


	private Properties getCatalogDatasourceProps(File catalogFile) throws IOException {
		String datasourcePropsName = catalogFile.getName().replace(".tsv.bgz", ".datasource.properties").replace(".tsv.gz", ".datasource.properties");
		File datasourcePropsFile = new File(catalogFile.getParentFile(), datasourcePropsName);

		Properties props = new Properties();

		// If the datasource.properties file does NOT exist, then return empty properties
		if( ! datasourcePropsFile.exists() )
			return props;

		InputStream instream = new FileInputStream(datasourcePropsFile);
		props.load(instream);
		instream.close();

		return props;
	}

	/** Strip off point releases, etc.  Basically stop at the first non-alphanumeric char.  "GRCh37.p13" to "GRCh37" */
	private String getBuildBase(String buildFull) {
		if( buildFull == null  || buildFull.trim().length() == 0 )
			return "";

		int idx = 0;
		for(int i=0;  i < buildFull.length(); i++) {
			if( ! Character.isLetterOrDigit(buildFull.charAt(i)) )
				break;
			idx++;
		}
		return buildFull.substring(0, idx);
	}


	/** Get the list of indexes for a catalog.  Ex: ID, GENE, HGNC */
	private List<String> getIndexes(File catalogFile) {
		List<String> indexes = new ArrayList<String>();
		
		// For each matching index file within the "indexes" directory get the Index name and add to list
		File[] indexFiles = new File(catalogFile.getParentFile(), "index").listFiles(new FilenameFilter() {
			public boolean accept(File file, String name) {
				return name.endsWith(".idx.h2.db");
			}
		} );
		
		// If the "index" directory doesn't exist (indexFiles == null), then return empty list
		if( indexFiles == null )
			return indexes;
			
		for(File idxFile : indexFiles) {
			// WARNING:  dbSNP has:
			//   ctg:  00-All.vcf.tsv.bgz
			//   idx:  00-All.ID.idx.h2.db
			// So pull off matching characters from the beginning
			String idx = idxFile.getName().replace(".idx.h2.db", "");
			String ctg = catalogFile.getName();
			int numCharsMatching=0;
			while( idx.charAt(0) == ctg.charAt(0) ) {
				idx = idx.substring(1);
				ctg = ctg.substring(1);
				numCharsMatching++;
			}
			// Only add if there were at least 3 matching characters
			// (otherwise, there may have been two catalogs in the same directory and the index only matches one)
			if( numCharsMatching >= 3 )
				indexes.add(idx);
		}
		
		Collections.sort(indexes, String.CASE_INSENSITIVE_ORDER);
		
		return indexes;
	}

	private List<File> sortCatalogs(List<File> catalogs) {
		Comparator<File> fileCompare = new Comparator<File>() {
			@Override
			public int compare(File f1, File f2) {
				try {
					return f1.getCanonicalPath().compareToIgnoreCase(f2.getCanonicalPath());
				} catch (IOException e) {
					e.printStackTrace();
				}
				return 0;
			}
		};
		Collections.sort(catalogs, fileCompare);
		return catalogs;
	}

	private List<File> getCatalogs(File startingDir) throws IOException {
		List<File> catalogs = new ArrayList<File>();
		File[] files = startingDir.listFiles();
		for(File f : files) {
			if( f.isDirectory() )
				catalogs.addAll(getCatalogs(f));
			else if( f.getName().endsWith(".tsv.bgz") )
				catalogs.add(f.getCanonicalFile());
		}
		return catalogs;
	}

	private List<File> removeDuplicates(List<File> files) throws IOException {
		for(int i=files.size()-1; i >=0; i--) {
			if( files.get(i).getAbsolutePath().contains("latest") ) {
				files.remove(i);
				continue;
			}
			for(int j=0; j < i; j++) {
				if( files.get(i).getCanonicalPath().equals(files.get(j).getCanonicalPath()) ) {
					files.remove(i);
					break;
				}
			}
		}
		return files;
	}

	private List<File> removeNonCatalogDirs(List<File> files) throws IOException {
		for(int i=files.size()-1; i>=0; i--) {
			String path = files.get(i).getCanonicalPath();
			if( path.contains("_BuildCatalogs") || path.contains("/build/")  ||  path.contains(".svn") )
				files.remove(i);
		}
		return files;
	} 

	private boolean isDeprecated(File catalog) {
		File deprecatedFile = new File(catalog.getParentFile(), catalog.getName().replace(".tsv.bgz", ".DEPRECATED.txt"));
		return deprecatedFile.exists();
	}    


	private CatalogStatus getStatus(File catalog) {
		CatalogStatus status = CatalogStatus.Active;
		if( isDeprecated(catalog) )
			status = CatalogStatus.Deprecated;
		//else if( isLatest(catalog) )
		//  status = Status.Latest;
		return status;
	}

}
