package edu.mayo.bior.cli.cmd;

import edu.mayo.bior.catalog.CatalogFiles;
import edu.mayo.bior.catalog.CatalogFormatException;
import edu.mayo.bior.catalog.stats.CatalogStats;
import edu.mayo.bior.catalog.stats.StatsBuilder;
import edu.mayo.bior.catalog.stats.StatsPrinter;
import edu.mayo.bior.cli.cmd.CommandUtil.FileAttributes;
import edu.mayo.cli.CommandPlugin;
import edu.mayo.pipes.history.ColumnMetaData;
import edu.mayo.pipes.history.ColumnMetaDataOperations;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.*;

import static edu.mayo.bior.cli.cmd.CommandUtil.handleFile;
import static edu.mayo.bior.cli.cmd.CommandUtil.handleInteger;
import static edu.mayo.bior.cli.cmd.CommandUtil.handleLong;

/**
 * Collect statistics on catalog values, with number of occurrences of each character, and % of lines those characters occur on.  Also dump out a set of values so the user can see examples
 */
public class CatalogStatsCommand implements CommandPlugin
{

   private static final String OPTION_CATALOG_BGZ_FILE = "d";
   private static final String OPTION_OUTPUT_DIR = "o";
   private static final String OPTION_VALUE_COUNT = "n";
   private static final String OPTION_SAMPLE_1_IN_X = "s";
   private static final String OPTION_START_LINE = "startLine";
   private static final String OPTION_NUM_LINES = "numLines";

   private int defaultValueCount;
   // Set the default output directory to be the current working directory
   private File defaultOutputDir = new File(System.getProperty("user.dir"));

   @Override
   public void init(Properties properties) throws Exception
   {
      defaultValueCount = Integer.parseInt(properties.getProperty("DEFAULT_VALUE_COUNT"));
   }

   @Override
   public void execute(CommandLine cl, Options options) throws Exception
   {

      List<FileAttributes> catalogFileAttrs = Arrays.asList(FileAttributes.EXISTS, FileAttributes.READABLE);
      File catalogFile = handleFile(cl, OPTION_CATALOG_BGZ_FILE, catalogFileAttrs);

      int maxValueCount = cl.hasOption(OPTION_VALUE_COUNT)
         ? handleInteger(cl, OPTION_VALUE_COUNT, 1, null)
         : defaultValueCount;

      File outputDir = cl.hasOption(OPTION_OUTPUT_DIR)
         ? new File(cl.getOptionValue(OPTION_OUTPUT_DIR))
         : defaultOutputDir;
      if (!outputDir.exists()) outputDir.mkdirs();

      // Default is to sample every line  (1 in 1)
      // If user specifies 100, then sample only 1 in every 100 lines
      final long DEFAULT_ONE_IN_X_SAMPLING = StatsBuilder.getOneInXSamplingDefault();
      long oneInXSampling = cl.hasOption(OPTION_SAMPLE_1_IN_X)
         ? handleLong(cl, OPTION_SAMPLE_1_IN_X, 1L, null)
         : DEFAULT_ONE_IN_X_SAMPLING;

      final long DEFAULT_START_LINE = StatsBuilder.getStartLineDefault();
      long startLine = cl.hasOption(OPTION_START_LINE)
         ? handleLong(cl, OPTION_START_LINE, 1L, null)
         : DEFAULT_START_LINE;

      final long DEFAULT_NUM_LINES = StatsBuilder.getNumLinesDefault();
      long numLines = cl.hasOption(OPTION_NUM_LINES)
         ? handleLong(cl, OPTION_NUM_LINES, 0L, null)
         : DEFAULT_NUM_LINES;

      runStats(catalogFile, maxValueCount, oneInXSampling, outputDir, startLine, numLines);
   }

   private void runStats(File catalogFile, int maxValues, long oneInXSampling, File outputDir, long startLine, long numLines)
      throws IOException, CatalogFormatException
   {
      StatsBuilder s = new StatsBuilder();
      CatalogStats stats = s.build(catalogFile, maxValues, oneInXSampling, startLine, numLines);

      CatalogFiles files = new CatalogFiles(catalogFile);
      ColumnMetaDataOperations metaDataOperations = new ColumnMetaDataOperations(files.getColumnsFile());
      Map<String, ColumnMetaData> metaDataMap = metaDataOperations.load();

      for (String column : stats.getColumns())
      {
         ColumnMetaData metaData = metaDataMap.get(column);

         File statsFile = new File(outputDir, column + "_stats.txt");
         FileWriter fileWtr = new FileWriter(statsFile);
         StatsPrinter.printStats(new PrintWriter(fileWtr), stats, column, metaData);
         fileWtr.close();
      }
   }
}
