diff --git a/D.Puthier/Makefile b/D.Puthier/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..de8310a344ca988cefb4754df8a2d1ee388481a7 --- /dev/null +++ b/D.Puthier/Makefile @@ -0,0 +1,38 @@ + +#The list of subcommands +help: + @echo "Available subcommands" + @echo "\t- run" + @echo "\t- clean" + @echo "\t- graph" + @echo "\t- rulegraph" + @echo "\t- dry" + +run: + @bash -c "module unload snakemake; \ + module load snakemake/7.25.0 ; \ + snakemake --cluster 'sbatch -c {params.cpu} --mem {params.mem} --partition=fast --account=2427_data_master' -c 3000 -j 500 --rerun-incomplete --rerun-trigger mtime" + +dry: + @bash -c "module unload snakemake; \ + module load snakemake/7.25.0 ; \ + snakemake --cluster 'sbatch -c {params.cpu} --mem {params.mem} --partition=fast --account=2427_data_master' -c 3000 -j 500 --rerun-incomplete --rerun-trigger mtime -n -p" + +# Clean unnecessary files +clean: + @rm -f slurm*.out graph.png rulegraph.png + +graph: + @bash -c "module unload snakemake; \ + module load snakemake/7.25.0; \ + module load graphviz/2.40.1; \ + snakemake --dag | fdp -Tpng > graph.png" + +rulegraph: + @bash -c "module unload snakemake; \ + module load snakemake/7.25.0; \ + module load graphviz/2.40.1; \ + snakemake --rulegraph | dot -Tpng > graph.png" + +queue: + @squeue -u $$USER \ No newline at end of file diff --git a/D.Puthier/Snakefile b/D.Puthier/Snakefile new file mode 100644 index 0000000000000000000000000000000000000000..872e43156a16cf8664978b757f81b9c5f4b76f40 --- /dev/null +++ b/D.Puthier/Snakefile @@ -0,0 +1,130 @@ +# Librairie +import os +import re +import sys + +#Definir le répértoir de travail +workdir:os.getcwd() + +# Définition des chemins de fichiers et répertoires +DATADIR="/shared/projects/2427_data_master/datasets/E-MTAB-8560/" +STARINDEX="/shared/bank/mus_musculus/mm10/star-2.7.5a" + +GTF ="/shared/bank/mus_musculus/mm10/gff/Mus_musculus.GRCm38.97.gtf" +SZ="/shared/bank/mus_musculus/mm10/star-2.7.5a/chrNameLength.txt" + +# Extraction et traitement des noms d'échantillons +SAMPLE=os.listdir(DATADIR) +SAMPLE=[x for x in SAMPLE if ".fq.gz" in x] +SAMPLE=[re.sub("_R[12]\\.fq\\.gz","",x)for x in SAMPLE] +SAMPLE=list(set(SAMPLE)) +SAMPLE=sorted(SAMPLE) + +SAMPLE=SAMPLE[:10] + +# Règle principale pour définir les sorties globales +rule final: + input: expand("fastqc/{smp}_R1_fastqc.zip" , smp=SAMPLE), \ + expand("fastqctrim/{smp}_R1_fastqc.zip" , smp=SAMPLE) , \ + expand("star/{smp}.bam.bai" , smp=SAMPLE) , \ + expand("coverage/{smp}.bw" , smp=SAMPLE) , \ + expand("featurecounts/{smp}.txt" , smp=SAMPLE) , \ + expand("star/{smp}.bam" , smp=SAMPLE) + + +rule fastqc: + input: r1 = DATADIR + "{smp}_R1.fq.gz", \ + r2 = DATADIR + "{smp}_R2.fq.gz" + output: r1 ="fastqc/{smp}_R1_fastqc.zip", \ + r2 ="fastqc/{smp}_R2_fastqc.zip" + params : cpu="1" , mem="4G" + shell:""" + module load fastqc/0.12.1 + fastqc --outdir fastqc {input.r1} {input.r2} + """ + +rule trimmomatic: + input: r1 = DATADIR + "{smp}_R1.fq.gz", \ + r2 = DATADIR + "{smp}_R2.fq.gz" + output: r1 ="trimmomatic/{smp}_R1.fq.gz", \ + r2 ="trimmomatic/{smp}_R2.fq.gz", \ + r1u ="trimmomatic/{smp}_R1U.fq.gz", \ + r2u ="trimmomatic/{smp}_R2U.fq.gz" + params : cpu="4" , mem="4G" + shell:""" + module load trimmomatic/0.39 + trimmomatic PE -threads 1 -phred33 \ + {input.r1} {input.r2} \ + {output.r1} {output.r1u} \ + {output.r2} {output.r2u} \ + SLIDINGWINDOW:4:20 MINLEN:20 + """ + +rule fastqc_trim: + input: r1 = "trimmomatic/{smp}_R1.fq.gz", \ + r2 = "trimmomatic/{smp}_R2.fq.gz" + output: r1 ="fastqctrim/{smp}_R1_fastqc.zip", \ + r2 ="fastqctrim/{smp}_R2_fastqc.zip" + params : cpu="1" , mem="4G" + shell:""" + module load fastqc/0.12.1 + fastqc --outdir fastqctrim {input.r1} {input.r2} + """ + + +rule star: + input: r1="trimmomatic/{smp}_R1.fq.gz", \ + r2="trimmomatic/{smp}_R2.fq.gz" + output: "star/{smp}.bam" + params: cpu="20", mem="50G", index=STARINDEX + shell: """ + module unload star + module load star/2.7.5a + STAR --genomeDir {params.index} \ + --runThreadN {params.cpu} \ + --readFilesIn {input.r1} {input.r2} \ + --outFileNamePrefix star/{wildcards.smp} \ + --outSAMtype BAM SortedByCoordinate \ + --readFilesCommand zcat \ + --outFilterMultimapNmax 1 + mv star/{wildcards.smp}Aligned.sortedByCoord.out.bam {output} + """ + + +rule samtools_index: + input: "star/{smp}.bam" + output: "star/{smp}.bam.bai" + params: cpu="1" , mem="4G" + shell:""" + module unload samtools + module load samtools/1.18 + samtools index {input} + """ + + +rule big_wig: + input: "star/{smp}.bam" + output: "coverage/{smp}.bw" + params: cpu="20" , mem="50G" , sz=SZ + shell:""" + module unload rseqc + module unload ucsc-wigtobigwig + module load ucsc-wigtobigwig/377 + module load rseqc/2.6.4 + mkdir -p coverage + bam2wig.py -s {params.sz} \ + -i {input} \ + -o coverage/{wildcards.smp} + """ + + +rule feature: + input: "star/{smp}.bam" + output: "featurecounts/{smp}.txt" + params: cpu="4" , mem="16G" , gtf=GTF + shell:""" + module unload subread + module load subread/2.0.6 + featureCounts -p --countReadPairs -t exon -g gene_id -a {params.gtf} \ + -o featurecounts/{wildcards.smp}.txt {input} + """ \ No newline at end of file diff --git a/README.md b/README.md index fbb9d6cdd2f596b5f3ec20d7b3fa07abb41dabb3..2086ca013a3cfcba6a84e620f6b1371adf8213a0 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,13 @@ -# Download Clone +# Description: +This project automates the execution of four scripts for the analysis of GWAS (Genome-Wide Association Study) data. -mkdir m2bsgreprod +#Steps to Run the Project -git clone git@etulab.univ-amu.fr:o22025448/tp1ara.git > m2bsgreprod +## Step 1: +Follow the instructions in doc/01install.md to install the necessary dependencies and set up the environment. -# Install the micromamba or equiv. - -# Create a m2bsgreprod micromamba environment and install apptainer 1.3.2 - -# Build the apptainer image - -mkdir -p results/containers - -sudo /home/gonzalez/Software/micromamba/envs/m2bsgreprod/bin/apptainer build results/containers/m2bsgreprod.sif containers/m2bsgreprod.def - -# Execute the Rscripts - -micromamba activate m2bsgreprod - -apptainer exec results/containers/m2bsgreprod3.sif make -f workflows/makefile +## Step 2: +After setup, proceed with doc/02run.md to execute the analysis scripts and complete the workflow. +## Vesion +To obtain version information, please consult the commit and branch indicated in the Release.md file. diff --git a/Release.md b/Release.md index 1c5a717c55b4994b57c4e71181f8034f19ae2634..962825049d574f5b4e978506304614448eebb365 100644 --- a/Release.md +++ b/Release.md @@ -1 +1,2 @@ -Version for evaluation +# Version for evaluation + diff --git a/doc/01install.md b/doc/01install.md new file mode 100644 index 0000000000000000000000000000000000000000..c1d5488d05a56f5b1719ce6ca769a6d2da116fe4 --- /dev/null +++ b/doc/01install.md @@ -0,0 +1,16 @@ +#Installation Guide + +1. Clone the Repository + +mkdir m2bsgreprod +git clone git@etulab.univ-amu.fr:o22025448/tp1ara.git > m2bsgreprod + +2. Install the micromamba or equiv. + +3. Create a m2bsgreprod micromamba environment and install apptainer 1.3.2 + +4. Build the apptainer image + +mkdir -p results/containers +sudo build results/containers/m2bsgreprod.sif containers/m2bsgreprod.def + diff --git a/doc/02run.md b/doc/02run.md new file mode 100644 index 0000000000000000000000000000000000000000..ee32ae61ee2ad9c83e204c02020b6d4610ec0805 --- /dev/null +++ b/doc/02run.md @@ -0,0 +1,10 @@ +# Execute the Rscripts + +1. Activate the Environnement + +micromamba activate m2bsgreprod + +2. Run the Workflow + +apptainer exec results/containers/m2bsgreprod3.sif make -f workflows/makefile +