diff --git a/dputhier/Makefile b/dputhier/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..83727ad9257d92abbf12b2ae7908b03e3d70e2e3 --- /dev/null +++ b/dputhier/Makefile @@ -0,0 +1,38 @@ + +#The list of subcommands +help: + @echo "Avaiable subcommands" + @echo "\t- run" + @echo "\t- clean" + +run: + @bash -c "module unload snakemake; \ + module load snakemake/7.25.0 ; \ + snakemake --cluster 'sbatch -c {params.cpu} --mem {params.mem} --partition=fast --account=2427_data_master' -c 3000 -j 500 --rerun-incomplete --rerun-trigger mtime" + +dry: + @bash -c "module unload snakemake; \ + module load snakemake/7.25.0 ; \ + snakemake --cluster 'sbatch -c {params.cpu} --mem {params.mem} --partition=fast --account=2427_data_master' -c 3000 -j 500 --rerun-incomplete --rerun-trigger mtime -n -p" + +#Clean unnecessary files +clean: + @rm -f slurm*.out + + +graph: + @bash -c "module unload snakemake; \ + module load snakemake/7.25.0 ; \ + module load graphviz/2.40.1 ; \ + snakemake --dag | dot -Tpng > graph.png" + +rulegraph: + @bash -c "module unload snakemake; \ + module load snakemake/7.25.0 ; \ + module load graphviz/2.40.1 ; \ + snakemake --rulegraph | dot -Tpng > rulegraph.png" + +queue: + @squeue -u $$USER + + diff --git a/dputhier/Snakefile b/dputhier/Snakefile new file mode 100644 index 0000000000000000000000000000000000000000..4541c148c3add00b19f28ddd190cec89eaa3731f --- /dev/null +++ b/dputhier/Snakefile @@ -0,0 +1,129 @@ + +import os +import re +import sys + +WDIR = os.getcwd() +#print(WDIR) + +workdir: WDIR + +#print ("Starting workflow") + +DATADIR = "/shared/projects/2427_data_master/datasets/E-MTAB-8560/" +STARINDEX = "/shared/bank/mus_musculus/mm10/star-2.7.5a" + +GTF="/shared/bank/mus_musculus/mm10/gff/Mus_musculus.GRCm38.97.gtf" +SZ="/shared/bank/mus_musculus/mm10/star-2.7.5a/chrNameLength.txt" + +SAMPLE = os.listdir(DATADIR) +SAMPLE = [x for x in SAMPLE if ".fq.gz" in x] +SAMPLE = [re.sub("_R[12]\\.fq\\.gz$", "", x) for x in SAMPLE] +SAMPLE = list(set(SAMPLE)) +SAMPLE = sorted(SAMPLE) + +#SAMPLE = SAMPLE[:10] + +#toto = expand("trimmomatic/{smp}_R1.fq.gz", smp=SAMPLE) +#print(toto) +#sys.exit() + +rule final: + input: expand("fastqc/{smp}_R1_fastqc.zip", smp=SAMPLE) , \ + expand("trimmomatic/{smp}_R1.fq.gz", smp=SAMPLE) , \ + expand("fastqc_trim/{smp}_R1_fastqc.zip" , smp=SAMPLE) , \ + expand("star/{smp}.bam", smp=SAMPLE) , \ + expand("star/{smp}.bam.bai", smp=SAMPLE) , \ + expand("coverage/{smp}.bw", smp=SAMPLE) , \ + expand("featurecounts/{smp}.txt" , smp=SAMPLE) + +rule fastqc: + input: r1 = DATADIR + "{smp}_R1.fq.gz", \ + r2 = DATADIR + "{smp}_R2.fq.gz" + output: r1="fastqc/{smp}_R1_fastqc.zip", \ + r2="fastqc/{smp}_R2_fastqc.zip" + params: cpu="1", mem="4G" + shell: """ + module load fastqc/0.12.1 + fastqc --outdir fastqc {input.r1} {input.r2} + """ + +rule trimmomatic: + input: r1 = DATADIR + "{smp}_R1.fq.gz", \ + r2 = DATADIR + "{smp}_R2.fq.gz" + output: r1="trimmomatic/{smp}_R1.fq.gz", \ + r2="trimmomatic/{smp}_R2.fq.gz", \ + r1u="trimmomatic/{smp}_R1u.fq.gz", \ + r2u="trimmomatic/{smp}_R2u.fq.gz" + params: cpu="4", mem="4G" + shell: """ + module load trimmomatic/0.39 + trimmomatic PE -threads 1 -phred33 \ + {input.r1} {input.r2} \ + {output.r1} {output.r1u} \ + {output.r2} {output.r2u} \ + SLIDINGWINDOW:4:20 MINLEN:20 + """ + +rule fastqc_trim: + input: r1="trimmomatic/{smp}_R1.fq.gz", \ + r2="trimmomatic/{smp}_R2.fq.gz" + output: r1="fastqc_trim/{smp}_R1_fastqc.zip", \ + r2="fastqc_trim/{smp}_R2_fastqc.zip" + params: cpu="1", mem="4G" + shell: """ + module load fastqc/0.12.1 + fastqc --outdir fastqc_trim {input.r1} {input.r2} + """ + +rule star: + input: r1="trimmomatic/{smp}_R1.fq.gz", \ + r2="trimmomatic/{smp}_R2.fq.gz" + output: r1="star/{smp}.bam" + params: cpu="20", mem="50G", index=STARINDEX + shell: """ + module unload star + module load star/2.7.5a + STAR --genomeDir {params.index} \ + --runThreadN {params.cpu} \ + --readFilesIn {input.r1} {input.r2} \ + --outFileNamePrefix star/{wildcards.smp} \ + --outSAMtype BAM SortedByCoordinate \ + --readFilesCommand zcat \ + --outFilterMultimapNmax 1 + mv star/{wildcards.smp}Aligned.sortedByCoord.out.bam {output} + """ + +rule samtools_index: + input: "star/{smp}.bam" + output: "star/{smp}.bam.bai" + params: cpu="1", mem="4G" + shell: """ + module unload samtools + module load samtools/1.18 + samtools index {input} + """ + +rule big_wig: + input: bam="star/{smp}.bam", bai="star/{smp}.bam.bai" + output: "coverage/{smp}.bw" + params: cpu="20", mem="50G", sz=SZ + shell: """ + module unload rseqc + module unload ucsc-wigtobigwig + module load ucsc-wigtobigwig/377 + module load rseqc/2.6.4 + mkdir -p coverage + bam2wig.py -s {params.sz} -i {input.bam} -o coverage/{wildcards.smp} + """ + +rule featurecounts: + input: bam="star/{smp}.bam" + output: "featurecounts/{smp}.txt" + params: cpu="4", mem="16G", gtf=GTF + shell: """ + module unload subread + module load subread/2.0.6 + mkdir -p featurecounts + featureCounts -p --countReadPairs -t exon -g gene_id -a {params.gtf} -o {output} {input.bam} + """