############################################
####   CONFIGURATION FILE for ChIP-Seq  ####
############################################
# 02/27/13, 07/21/13
# This is the configuration file for ChipSeq v2.1 Workflow

## tool_info corresponds to GENOMEBUILD
TOOL_INFO=/home/mayo/chipseq/config/tool_info.txt
MEMORY_INFO=/home/mayo/chipseq/config/memory_info.txt

## build for mouse = 'mm10' or human = 'hg19'
## IRC=6436
GENOMEBUILD=hg19
PROJECT_NAME=Chr17_VM_QuickTest
PI=LastName_FirstName
SAMPLENAME=EncodeChr17
RUNID=121016_SN700_BC1657ACXX
USEREMAIL=user@domain.com
PORTAL_PROJECT_NAME=NA

##################################
####   Input sequence files   ####
##################################
## for PE data, make sure to specify "SEQ_TYPE" as "PE", -> "FILTER_TYPE" as "U02", "U12" or "U22", also provide files for both "END1_SEQ=" and "END2_SEQ="
## for SE data, make sure to specify "SEQ_TYPE" as "SE", -> "FILTER_TYPE" as "U0" or "U1", leave "END2_SEQ=" as blank
## SEQ_DIR is the dir containing input fastq files [List sequence files from each end, separated file names by a single space]
## SEQ_TYPE is SE or PE, If SE, then leave END2_SEQ blank
# - - - - - - - - - - - - - - - #
SEQ_DIR=/home/mayo/chipseq/reducedReferences/encode_samples
SEQ_TYPE=SE

## set SEQ_SUFFIX as fastq or fastq.gz
## MAP_OUTDIR is the dir containing mapping output files, bedgraph/wig files and some summary files
# - - - - - - - - - - - - - - - #
SEQ_SUFFIX=fastq.gz
END1_SEQ=GSM798423_MCF7_1_IP_chr17.fastq.gz GSM798425_MCF7_3_IP_chr17.fastq.gz GSM798424_MCF7_2_IP_chr17.fastq.gz GSM798440_MCF7_Input_chr17.fastq.gz
END2_SEQ=NA

## U22 (both ends unique), U12 (one or both ends unique), U02 (primary alignments), these are for PE data
## U1 (unique match), U0 (primary alignment), these are for SE data
## "primary alignment" is the unique match or a randomly selected match if a read wis mapped to multiple locations
# - - - - - - - - - - - - - - - #
FILTER_TYPE=U1

###############################
####  Peak Calling Config  ####
###############################
# Valid peak callers are 'sicer', 'macs2noidr', 'macs2idr'
# - - - - - - - - - - - - - - - #
PEAK_CALLER=macs2noidr
PKGENE_NEIGHDIST=10000
WORK_DIR=/home/mayo/Desktop/Encode_Sample

IP_FILE=GSM798423_MCF7_1_IP_chr17.fastq.gz GSM798425_MCF7_3_IP_chr17.fastq.gz GSM798424_MCF7_2_IP_chr17.fastq.gz

INPUT_FILE=GSM798440_MCF7_Input_chr17.fastq.gz GSM798440_MCF7_Input_chr17.fastq.gz GSM798440_MCF7_Input_chr17.fastq.gz

### If multiple controls tied to single IP file, need to provide lables
#INPUT_LABLES=INPUT IgG INPUT IgG

###############################
####    Peak Annotation    ####
###############################
RUN_CEAS=Yes
RUN_MEME=Yes
RUN_GOM=Yes


## CEAS_PEAK_CUTOFF: -log10(pvalue) or -log10(qvalue)
# Leave CEAS_GENE_DIR= and CEAS_GENE_LIST= blank if no user-provided genes
# Gene list files are single-column containing gene accessions like NM_000215
# '>' denotes special case where multiple '=' symbols in a single line.
# - - - - - - - - - - - - - - - #
>CEAS_ARGS=--bg --sizes=1000,2000,3000 --bisizes=2000,4000 --span=2000 --pf-res=50 --rel-dist=2000
CEAS_PEAK_CUTOFF=10
CEAS_GENE_DIR=/home/mayo/chipseq/reducedReferences/annotation/
CEAS_GENE_LIST=gene.list1.txt gene.list2.txt


## MEME motif finding
#MEME_PEAK_SIZE: for each selected peak, 200-bp sequence (left size: 100 bp; right side: 99 bp) will be extracted for motif finding
#MEME_PEAK_CUTOFF: an integer of >1, then this represents number of best peaks to be selected based on the p-value
#if between 0.001 and 1, then this represents the percentage of peaks to be selected based on the p-value (0.5 means 50% of the total peaks).
# - - - - - - - - - - - - - - - #
MEME_ARGS=-dna -mod zoops -nmotifs 5 -minw 10 -maxw 20 -maxsize 999999999 -revcomp
MEME_PEAK_SIZE=200
MEME_PEAK_CUTOFF=0.05


## Gene Ontology Module
# - - - - - - - - - - - - - - - #
REG_ARGS=5000 1000 100000 5000
ANNO_METHOD=qvalue
