diff --git a/sources/.Rhistory b/sources/.Rhistory new file mode 100644 index 0000000000000000000000000000000000000000..7de21e43a55a0df46876f9bc7280594f5ac384e4 --- /dev/null +++ b/sources/.Rhistory @@ -0,0 +1,2 @@ +if (!require("SNPRelate", quietly = TRUE)) +BiocManager::install("SNPRelate") diff --git a/sources/download_data.R b/sources/download_data.R new file mode 100644 index 0000000000000000000000000000000000000000..61e5291ad5d4922b8943bd354266bfbdebf5fbf0 --- /dev/null +++ b/sources/download_data.R @@ -0,0 +1,54 @@ +options(repos = c(CRAN = "https://cloud.r-project.org")) + +# Install 'remotes' if it's not already installed +if (!require("remotes")) { + install.packages("remotes", dependencies = TRUE) + library(remotes) +} + +# Load necessary library and install a specific version if not present +if (!requireNamespace("digest", quietly = TRUE)) { + remotes::install_version("digest", version = "0.6.25", repos = "https://cloud.r-project.org") +} +# Load necessary library +if (!require("digest")) install.packages("digest", dependencies = TRUE) +library(digest) + +# Define variables +# wdir="/shared/projects/2427_data_master/user/agonzalez/m2bsgreprod/src" +wdir="." +results_dir <- file.path(wdir, "results") +url <- "https://d1ypx1ckp5bo16.cloudfront.net/penncath/penncath.tar.gz" +dest_file <- file.path(results_dir, "penncath.tar.gz") +expected_md5 <- "5d5f422aeafdd2d725ad93f447d9af4b" + +# Create results directory if it doesn't exist +if (!dir.exists(results_dir)) { + dir.create(results_dir, recursive = TRUE) +} + +# Check if the file exists +if (!file.exists(dest_file)) { + message("File does not exist. Downloading...") + # Download the file + download.file(url, dest_file, method = "auto") +} else { + message("File already exists.") +} + +# Verify the MD5 checksum +actual_md5 <- digest(dest_file, algo = "md5", file = TRUE) + +if (actual_md5 == expected_md5) { + message("MD5 checksum matches! Proceeding to extract the file...") + + # Uncompress the file + untar(dest_file, exdir = results_dir) + message("File uncompressed successfully!") + +} else { + stop("MD5 checksum does not match!") +} + + + diff --git a/sources/tp2.R b/sources/tp2.R new file mode 100644 index 0000000000000000000000000000000000000000..edc229b0126b17831f0937b67153b549387c1779 --- /dev/null +++ b/sources/tp2.R @@ -0,0 +1,27 @@ + +wdir ="/amuhome/s23014817/m2bsgreprod/sources" +dir.create(wdir, showWarnings = F, recursive = T) +setwd(wdir) + +library(devtools) + +if (!require("BiocManager", quietly = TRUE)) #boucle qui permet de dire que s'il n'est pas installé il faut l'installer mais s'il est déjà installé pas besoin + install.packages("BiocManager") + +if (!require("snpStats", quietly = TRUE)) + BiocManager::install("snpStats") + +if (!require("SNPRelate", quietly = TRUE)) + BiocManager::install("SNPRelate") + + + +# Les données analysées nécessitant beaucoup de RAM, nous allons sélectionner aléatoirement 250000 SNPs et réecrire des fichiers bed, bim, penncath +penncath_bed_path = "/amuhome/s23014817/m2bsgreprod/results/data/penncath.bed" +penncath_bim_path = "/amuhome/s23014817/m2bsgreprod/results/data/penncath.bim" +penncath_fam_path = "/amuhome/s23014817/m2bsgreprod/results/data/penncath.fam" +geno <- read.plink(penncath_bed_path, penncath_bim_path, penncath_fam_path, select.snps=sample(1:861473, 25000, replace = FALSE ), na.strings = ("-9")) + + + +