diff --git a/src/tp1.R b/src/tp1.R index 6d6f7d55f9d82fcb7cc6a66254124b34b9f6189e..d3c8fa175b5ec5559058a69bb4b7600b46975eaf 100644 --- a/src/tp1.R +++ b/src/tp1.R @@ -1,26 +1,32 @@ -wdir="." -dir.create(wdir, showWarnings = F, recursive = T) -setwd(wdir) - -#library(devtools) -#library(plyr) - -if (!require("BiocManager", quietly = TRUE)) - install.packages("BiocManager") - -if (!require("snpStats", quietly = TRUE)) - BiocManager::install("snpStats") - -if (!require("SNPRelate", quietly = TRUE)) -BiocManager::install("SNPRelate") - -# Charger les bibliothèques -library(snpStats) -library(SNPRelate) +options(repos = c(CRAN = "https://cloud.r-project.org")) +output_dir = "results/tp1" +dir.create(output_dir, showWarnings = F, recursive = T) # Les données analysées nécessitant beaucoup de RAM, nous allons sélectionner aléatoirement 250000 SNPs et réecrire des fichiers bed, bim, fam penncath_bed_path = "results/data/penncath.bed" penncath_bim_path = "results/data/penncath.bim" penncath_fam_path = "results/data/penncath.fam" + geno <- snpStats::read.plink(penncath_bed_path, penncath_bim_path, penncath_fam_path, select.snps=sample(1:861473, 25000, replace = FALSE ), na.strings = ("-9")) +plink_base=file.path(output_dir, "plink_base") +snpStats::write.plink(plink_base, snps=geno$genotypes, pedigree=geno$fam[,1], id=geno$fam[,1], mother=geno$fam[,4], sex=geno$fam[,5], phenotype=geno$fam[,6], chromosome = geno$map[,1], genetic.distance = geno$map[,3], position = geno$map[,4], allele.1 = geno$map[,5], allele.2 = geno$map[,6], na.code = ("-9")) + +genoBim<-geno$map +colnames(genoBim)<-c("chr", "SNP", "gen.dist", "position", "A1", "A2") +#head(genoBim) + +genotype<-geno$genotype +#print(genotype) + +genoFam<-geno$fam +#head(genoFam) + +# On commence par libérer de l'espace +rm(geno) + +rdata_path = file.path(output_dir, "TP1_asbvg.RData") +save.image(rdata_path) + + + diff --git a/workflows/Makefile b/workflows/Makefile deleted file mode 100644 index 987930c194e11a491a4dd3a846bb9561e5476a87..0000000000000000000000000000000000000000 --- a/workflows/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -# Makefile pour enchaîner deux scripts - -# Cible par défaut (si 'make' est appelé sans argument) -all: tp1_output.txt - -# Première étape : exécuter download_data.R -download_data_output.txt: ~/tp1ara/src/download_data.R - @echo "Running download_data.R..." - @Rscript ~/tp1ara/src/download_data.R > download_data_output.txt - @echo "download_data.R completed and output stored in download_data_output.txt." - -# Deuxième étape : exécuter tp1.R en utilisant le résultat du premier script -tp1_output.txt: download_data_output.txt ~/tp1ara/src/tp1.R - @echo "Running tp1.R..." - @Rscript ~/tp1ara/src/tp1.R > tp1_output.txt - @echo "tp1.R completed and output stored in tp1_output.txt." - -# Nettoyer les fichiers générés -clean: - @rm -f download_data_output.txt tp1_output.txt - @echo "Cleaned up output files." diff --git a/workflows/makefile.v2 b/workflows/makefile similarity index 99% rename from workflows/makefile.v2 rename to workflows/makefile index c863e10dc5a53f5d688f1bd12ec07ba1ea1e948c..71616f41a7bb8be84c2fb5509d22455783e048a3 100644 --- a/workflows/makefile.v2 +++ b/workflows/makefile @@ -18,3 +18,4 @@ results/data/penncath.bed results/data/penncath.bim results/data/penncath.fam: clean: rm -rf results/data/penncath.bed results/data/penncath.bim results/data/penncath.fam results/data/penncath.csv results/penncath.tar.gz results/data +