diff --git a/D.Puthier/Makefile b/D.Puthier/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1b2be31ed990bcd4cb4531f8d0f8995cc0870da9 --- /dev/null +++ b/D.Puthier/Makefile @@ -0,0 +1,40 @@ +#The list of subcommands + +help: + @echo "Available subcommands" + @echo "\t- run" + @echo "\t- dry" + @echo "\t- clean" + @echo "\t- graph" + @echo "\t- rulegraph" + @echo "\t- queue" + +# To start the workflow +run: + @bash -c "module unload snakemake; \ + module load snakemake/7.25.0 ; \ + snakemake --cluster 'sbatch -c {params.cpu} --mem {params.mem} --partition=fast --account=2427_data_master' -c 3000 -j 500 --rerun-incomplete --rerun-trigger mtime" + +dry: + @bash -c "module unload snakemake; \ + module load snakemake/7.25.0 ; \ + snakemake --cluster 'sbatch -c {params.cpu} --mem {params.mem} --partition=fast --account=2427_data_master' -c 3000 -j 500 --rerun-incomplete --rerun-trigger mtime -n -p" + +# Clean unnecessary files +clean: + @rm -f slurm*.out + +graph: + @bash -c "module unload snakemake; \ + module load snakemake/7.25.0; \ + module load graphviz/2.40.1; \ + snakemake --dag | dot -Tpng > graph.png" + +rulegraph: + @bash -c "module unload snakemake; \ + module load snakemake/7.25.0; \ + module load graphviz/2.40.1; \ + snakemake --rulegraph | dot -Tpng > rulegraph.png" + +queue: + @squeue -u $$USER diff --git a/D.Puthier/Snakefile b/D.Puthier/Snakefile new file mode 100644 index 0000000000000000000000000000000000000000..f32a0ad63ffb78bc7a30e5fca43ff895a801c140 --- /dev/null +++ b/D.Puthier/Snakefile @@ -0,0 +1,119 @@ +#singularity: "docker://continuumio/miniconda3:4.4.10" +import os +import re +import sys + +workdir:os.getcwd() + +DIR="/shared/projects/2427_data_master/datasets/E-MTAB-8560/" +STARINDEX="/shared/bank/mus_musculus/mm10/star-2.7.5a" +GTF ="/shared/bank/mus_musculus/mm10/gff/Mus_musculus.GRCm38.97.gtf" +SZ="/shared/bank/mus_musculus/mm10/star-2.7.5a/chrNameLength.txt" +SAMPLE=os.listdir(DIR) +SAMPLE=[x for x in SAMPLE if ".fq.gz" in x] +SAMPLE=[re.sub("_R[12]\\.fq\\.gz","",x)for x in SAMPLE] +SAMPLE=list(set(SAMPLE)) +SAMPLE=sorted(SAMPLE) + +#SAMPLE=SAMPLE[:10] + +rule all: + input: expand("fastqc/{smp}_R1_fastqc.zip" , smp=SAMPLE), \ + expand("fastqctrim/{smp}_R1_fastqc.zip" , smp=SAMPLE) , \ + expand("star/{smp}.bam.bai" , smp=SAMPLE) , \ + expand("coverage/{smp}.bw" , smp=SAMPLE) , \ + expand("featurecounts/{smp}.txt" , smp=SAMPLE) , \ + expand("star/{smp}.bam" , smp=SAMPLE) + +rule fastqc: + input: r1 = DIR + "{smp}_R1.fq.gz", \ + r2 = DIR + "{smp}_R2.fq.gz" + output: r1 ="fastqc/{smp}_R1_fastqc.zip", \ + r2 ="fastqc/{smp}_R2_fastqc.zip" + params : cpu="1" , mem="4G" + shell:""" + module load fastqc/0.12.1 + fastqc --outdir fastqc {input.r1} {input.r2} + """ + +rule trimmomatic: + input: r1 = DIR + "{smp}_R1.fq.gz", \ + r2 = DIR + "{smp}_R2.fq.gz" + output: r1 ="trimmomatic/{smp}_R1.fq.gz", \ + r2 ="trimmomatic/{smp}_R2.fq.gz", \ + r1u ="trimmomatic/{smp}_R1U.fq.gz", \ + r2u ="trimmomatic/{smp}_R2U.fq.gz" + params : cpu="4" , mem="4G" + shell:""" + module load trimmomatic/0.39 + trimmomatic PE -threads 1 -phred33 \ + {input.r1} {input.r2} \ + {output.r1} {output.r1u} \ + {output.r2} {output.r2u} \ + SLIDINGWINDOW:4:20 MINLEN:20 + """ + +rule fastqc_trim: + input: r1 = "trimmomatic/{smp}_R1.fq.gz", \ + r2 = "trimmomatic/{smp}_R2.fq.gz" + output: r1 ="fastqctrim/{smp}_R1_fastqc.zip", \ + r2 ="fastqctrim/{smp}_R2_fastqc.zip" + params : cpu="1" , mem="4G" + shell:""" + module load fastqc/0.12.1 + fastqc --outdir fastqctrim {input.r1} {input.r2} + """ + +rule star: + input: r1="trimmomatic/{smp}_R1.fq.gz", \ + r2="trimmomatic/{smp}_R2.fq.gz" + output: "star/{smp}.bam" + params: cpu="20", mem="50G", index=STARINDEX + shell: """ + module unload star + module load star/2.7.5a + STAR --genomeDir {params.index} \ + --runThreadN {params.cpu} \ + --readFilesIn {input.r1} {input.r2} \ + --outFileNamePrefix star/{wildcards.smp} \ + --outSAMtype BAM SortedByCoordinate \ + --readFilesCommand zcat \ + --outFilterMultimapNmax 1 + mv star/{wildcards.smp}Aligned.sortedByCoord.out.bam {output} + """ + +rule samtools_index: + input: "star/{smp}.bam" + output: "star/{smp}.bam.bai" + params: cpu="1" , mem="4G" + shell:""" + module unload samtools + module load samtools/1.18 + samtools index {input} + """ + +rule big_wig: + input: "star/{smp}.bam" + output: "coverage/{smp}.bw" + params: cpu="20" , mem="50G" , sz=SZ + shell:""" + module unload rseqc + module unload ucsc-wigtobigwig + module load ucsc-wigtobigwig/377 + module load rseqc/2.6.4 + mkdir -p coverage + bam2wig.py -s {params.sz} \ + -i {input} \ + -o coverage/{wildcards.smp} + """ + +rule feature_counts: + input: "star/{smp}.bam" + output: "featurecounts/{smp}.txt" + params: cpu="4" , mem="16G" , gtf=GTF + shell:""" + module unload subread + module load subread/2.0.6 + featureCounts -p --countReadPairs -t exon -g gene_id -a {params.gtf} \ + -o featurecounts/{wildcards.smp}.txt {input} + """ \ No newline at end of file