Skip to content
Snippets Groups Projects
Commit 242be1ed authored by TRON Kelly's avatar TRON Kelly
Browse files

tp

parent b2d7fe09
Branches
No related tags found
No related merge requests found
#The list of subcommands
help:
@echo "Available subcommands"
@echo "\t- run"
@echo "\t- dry"
@echo "\t- clean"
@echo "\t- graph"
@echo "\t- rulegraph"
@echo "\t- queue"
# To start the workflow
run:
@bash -c "module unload snakemake; \
module load snakemake/7.25.0 ; \
snakemake --cluster 'sbatch -c {params.cpu} --mem {params.mem} --partition=fast --account=2427_data_master' -c 3000 -j 500 --rerun-incomplete --rerun-trigger mtime"
dry:
@bash -c "module unload snakemake; \
module load snakemake/7.25.0 ; \
snakemake --cluster 'sbatch -c {params.cpu} --mem {params.mem} --partition=fast --account=2427_data_master' -c 3000 -j 500 --rerun-incomplete --rerun-trigger mtime -n -p"
# Clean unnecessary files
clean:
@rm -f slurm*.out
graph:
@bash -c "module unload snakemake; \
module load snakemake/7.25.0; \
module load graphviz/2.40.1; \
snakemake --dag | dot -Tpng > graph.png"
rulegraph:
@bash -c "module unload snakemake; \
module load snakemake/7.25.0; \
module load graphviz/2.40.1; \
snakemake --rulegraph | dot -Tpng > rulegraph.png"
queue:
@squeue -u $$USER
#singularity: "docker://continuumio/miniconda3:4.4.10"
import os
import re
import sys
workdir:os.getcwd()
DIR="/shared/projects/2427_data_master/datasets/E-MTAB-8560/"
STARINDEX="/shared/bank/mus_musculus/mm10/star-2.7.5a"
GTF ="/shared/bank/mus_musculus/mm10/gff/Mus_musculus.GRCm38.97.gtf"
SZ="/shared/bank/mus_musculus/mm10/star-2.7.5a/chrNameLength.txt"
SAMPLE=os.listdir(DIR)
SAMPLE=[x for x in SAMPLE if ".fq.gz" in x]
SAMPLE=[re.sub("_R[12]\\.fq\\.gz","",x)for x in SAMPLE]
SAMPLE=list(set(SAMPLE))
SAMPLE=sorted(SAMPLE)
#SAMPLE=SAMPLE[:10]
rule all:
input: expand("fastqc/{smp}_R1_fastqc.zip" , smp=SAMPLE), \
expand("fastqctrim/{smp}_R1_fastqc.zip" , smp=SAMPLE) , \
expand("star/{smp}.bam.bai" , smp=SAMPLE) , \
expand("coverage/{smp}.bw" , smp=SAMPLE) , \
expand("featurecounts/{smp}.txt" , smp=SAMPLE) , \
expand("star/{smp}.bam" , smp=SAMPLE)
rule fastqc:
input: r1 = DIR + "{smp}_R1.fq.gz", \
r2 = DIR + "{smp}_R2.fq.gz"
output: r1 ="fastqc/{smp}_R1_fastqc.zip", \
r2 ="fastqc/{smp}_R2_fastqc.zip"
params : cpu="1" , mem="4G"
shell:"""
module load fastqc/0.12.1
fastqc --outdir fastqc {input.r1} {input.r2}
"""
rule trimmomatic:
input: r1 = DIR + "{smp}_R1.fq.gz", \
r2 = DIR + "{smp}_R2.fq.gz"
output: r1 ="trimmomatic/{smp}_R1.fq.gz", \
r2 ="trimmomatic/{smp}_R2.fq.gz", \
r1u ="trimmomatic/{smp}_R1U.fq.gz", \
r2u ="trimmomatic/{smp}_R2U.fq.gz"
params : cpu="4" , mem="4G"
shell:"""
module load trimmomatic/0.39
trimmomatic PE -threads 1 -phred33 \
{input.r1} {input.r2} \
{output.r1} {output.r1u} \
{output.r2} {output.r2u} \
SLIDINGWINDOW:4:20 MINLEN:20
"""
rule fastqc_trim:
input: r1 = "trimmomatic/{smp}_R1.fq.gz", \
r2 = "trimmomatic/{smp}_R2.fq.gz"
output: r1 ="fastqctrim/{smp}_R1_fastqc.zip", \
r2 ="fastqctrim/{smp}_R2_fastqc.zip"
params : cpu="1" , mem="4G"
shell:"""
module load fastqc/0.12.1
fastqc --outdir fastqctrim {input.r1} {input.r2}
"""
rule star:
input: r1="trimmomatic/{smp}_R1.fq.gz", \
r2="trimmomatic/{smp}_R2.fq.gz"
output: "star/{smp}.bam"
params: cpu="20", mem="50G", index=STARINDEX
shell: """
module unload star
module load star/2.7.5a
STAR --genomeDir {params.index} \
--runThreadN {params.cpu} \
--readFilesIn {input.r1} {input.r2} \
--outFileNamePrefix star/{wildcards.smp} \
--outSAMtype BAM SortedByCoordinate \
--readFilesCommand zcat \
--outFilterMultimapNmax 1
mv star/{wildcards.smp}Aligned.sortedByCoord.out.bam {output}
"""
rule samtools_index:
input: "star/{smp}.bam"
output: "star/{smp}.bam.bai"
params: cpu="1" , mem="4G"
shell:"""
module unload samtools
module load samtools/1.18
samtools index {input}
"""
rule big_wig:
input: "star/{smp}.bam"
output: "coverage/{smp}.bw"
params: cpu="20" , mem="50G" , sz=SZ
shell:"""
module unload rseqc
module unload ucsc-wigtobigwig
module load ucsc-wigtobigwig/377
module load rseqc/2.6.4
mkdir -p coverage
bam2wig.py -s {params.sz} \
-i {input} \
-o coverage/{wildcards.smp}
"""
rule feature_counts:
input: "star/{smp}.bam"
output: "featurecounts/{smp}.txt"
params: cpu="4" , mem="16G" , gtf=GTF
shell:"""
module unload subread
module load subread/2.0.6
featureCounts -p --countReadPairs -t exon -g gene_id -a {params.gtf} \
-o featurecounts/{wildcards.smp}.txt {input}
"""
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment