From 522ff6e1cbebf30006bc9aff23401c0aa161d04c Mon Sep 17 00:00:00 2001 From: OTT Oceane <o22025448@V-PP-47-L-054.salsa.univ-amu.fr> Date: Thu, 24 Oct 2024 09:11:06 +0200 Subject: [PATCH] mod --- src/tp1.R | 44 ++++++++++++++++------------- workflows/Makefile | 21 -------------- workflows/{makefile.v2 => makefile} | 1 + 3 files changed, 26 insertions(+), 40 deletions(-) delete mode 100644 workflows/Makefile rename workflows/{makefile.v2 => makefile} (99%) diff --git a/src/tp1.R b/src/tp1.R index 6d6f7d5..d3c8fa1 100644 --- a/src/tp1.R +++ b/src/tp1.R @@ -1,26 +1,32 @@ -wdir="." -dir.create(wdir, showWarnings = F, recursive = T) -setwd(wdir) - -#library(devtools) -#library(plyr) - -if (!require("BiocManager", quietly = TRUE)) - install.packages("BiocManager") - -if (!require("snpStats", quietly = TRUE)) - BiocManager::install("snpStats") - -if (!require("SNPRelate", quietly = TRUE)) -BiocManager::install("SNPRelate") - -# Charger les bibliothèques -library(snpStats) -library(SNPRelate) +options(repos = c(CRAN = "https://cloud.r-project.org")) +output_dir = "results/tp1" +dir.create(output_dir, showWarnings = F, recursive = T) # Les données analysées nécessitant beaucoup de RAM, nous allons sélectionner aléatoirement 250000 SNPs et réecrire des fichiers bed, bim, fam penncath_bed_path = "results/data/penncath.bed" penncath_bim_path = "results/data/penncath.bim" penncath_fam_path = "results/data/penncath.fam" + geno <- snpStats::read.plink(penncath_bed_path, penncath_bim_path, penncath_fam_path, select.snps=sample(1:861473, 25000, replace = FALSE ), na.strings = ("-9")) +plink_base=file.path(output_dir, "plink_base") +snpStats::write.plink(plink_base, snps=geno$genotypes, pedigree=geno$fam[,1], id=geno$fam[,1], mother=geno$fam[,4], sex=geno$fam[,5], phenotype=geno$fam[,6], chromosome = geno$map[,1], genetic.distance = geno$map[,3], position = geno$map[,4], allele.1 = geno$map[,5], allele.2 = geno$map[,6], na.code = ("-9")) + +genoBim<-geno$map +colnames(genoBim)<-c("chr", "SNP", "gen.dist", "position", "A1", "A2") +#head(genoBim) + +genotype<-geno$genotype +#print(genotype) + +genoFam<-geno$fam +#head(genoFam) + +# On commence par libérer de l'espace +rm(geno) + +rdata_path = file.path(output_dir, "TP1_asbvg.RData") +save.image(rdata_path) + + + diff --git a/workflows/Makefile b/workflows/Makefile deleted file mode 100644 index 987930c..0000000 --- a/workflows/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -# Makefile pour enchaîner deux scripts - -# Cible par défaut (si 'make' est appelé sans argument) -all: tp1_output.txt - -# Première étape : exécuter download_data.R -download_data_output.txt: ~/tp1ara/src/download_data.R - @echo "Running download_data.R..." - @Rscript ~/tp1ara/src/download_data.R > download_data_output.txt - @echo "download_data.R completed and output stored in download_data_output.txt." - -# Deuxième étape : exécuter tp1.R en utilisant le résultat du premier script -tp1_output.txt: download_data_output.txt ~/tp1ara/src/tp1.R - @echo "Running tp1.R..." - @Rscript ~/tp1ara/src/tp1.R > tp1_output.txt - @echo "tp1.R completed and output stored in tp1_output.txt." - -# Nettoyer les fichiers générés -clean: - @rm -f download_data_output.txt tp1_output.txt - @echo "Cleaned up output files." diff --git a/workflows/makefile.v2 b/workflows/makefile similarity index 99% rename from workflows/makefile.v2 rename to workflows/makefile index c863e10..71616f4 100644 --- a/workflows/makefile.v2 +++ b/workflows/makefile @@ -18,3 +18,4 @@ results/data/penncath.bed results/data/penncath.bim results/data/penncath.fam: clean: rm -rf results/data/penncath.bed results/data/penncath.bim results/data/penncath.fam results/data/penncath.csv results/penncath.tar.gz results/data + -- GitLab