From 8482dc2aaad8c3b56753995fd6dd8ba14ff58903 Mon Sep 17 00:00:00 2001
From: THIBERT-RIPOSO Anais <t20006223@V-PP-47-018.salsa.univ-amu.fr>
Date: Mon, 21 Oct 2024 17:03:31 +0200
Subject: [PATCH] scripts tp1 r

---
 src/download_data.R | 52 +++++++++++++++++++++++++++++++++++++++++++++
 src/tp2.R           | 24 +++++++++++++++++++++
 2 files changed, 76 insertions(+)
 create mode 100644 src/download_data.R
 create mode 100644 src/tp2.R

diff --git a/src/download_data.R b/src/download_data.R
new file mode 100644
index 0000000..2f0d18b
--- /dev/null
+++ b/src/download_data.R
@@ -0,0 +1,52 @@
+options(repos = c(CRAN = "https://cloud.r-project.org"))
+
+# Install 'remotes' if it's not already installed
+if (!require("remotes")) {
+  install.packages("remotes", dependencies = TRUE)
+  library(remotes)
+}
+
+# Load necessary library and install a specific version if not present
+if (!requireNamespace("digest", quietly = TRUE)) {
+  remotes::install_version("digest", version = "0.6.25", repos = "https://cloud.r-project.org")
+}
+# Load necessary library
+if (!require("digest")) install.packages("digest", dependencies = TRUE)
+library(digest)
+
+# Define variables
+# wdir="/shared/projects/2427_data_master/user/agonzalez/m2bsgreprod/src"
+wdir="."
+results_dir <- file.path(wdir, "results")
+url <- "https://d1ypx1ckp5bo16.cloudfront.net/penncath/penncath.tar.gz"
+dest_file <- file.path(results_dir, "penncath.tar.gz")
+expected_md5 <- "5d5f422aeafdd2d725ad93f447d9af4b"
+
+# Create results directory if it doesn't exist
+if (!dir.exists(results_dir)) {
+  dir.create(results_dir, recursive = TRUE)
+}
+
+# Check if the file exists
+if (!file.exists(dest_file)) {
+  message("File does not exist. Downloading...")
+  # Download the file
+  download.file(url, dest_file, method = "auto")
+} else {
+  message("File already exists.")
+}
+
+# Verify the MD5 checksum
+actual_md5 <- digest(dest_file, algo = "md5", file = TRUE)
+
+if (actual_md5 == expected_md5) {
+  message("MD5 checksum matches! Proceeding to extract the file...")
+  
+  # Uncompress the file
+  untar(dest_file, exdir = results_dir)
+  message("File uncompressed successfully!")
+  
+} else {
+  stop("MD5 checksum does not match!")
+}
+
diff --git a/src/tp2.R b/src/tp2.R
new file mode 100644
index 0000000..4977de1
--- /dev/null
+++ b/src/tp2.R
@@ -0,0 +1,24 @@
+wdir="/amuhome/t20006223/m2bsgreprod/src"
+dir.create(wdir)
+setwd(wdir)
+
+library(devtools)
+
+if (!require("BiocManager", quietly=TRUE))
+  install.packages("BiocManager")
+
+if (!require("snpStats", quietly=TRUE))
+  BiocManager::install("snpStats")
+
+if (!require("SNPRelate", quietly=TRUE))
+  BiocManager::install("SNPRelate")
+
+library(snpStats)
+
+library(SNPRelate)
+
+# Les données analysées nécessitant beaucoup de RAM, nous allons sélectionner aléatoirement 250000 SNPs et réecrire des fichiers bed, bim, fam
+penncath_bed_path = "/amuhome/t20006223/m2bsgreprod/results/data/penncath.bed"
+penncath_bim_path = "/amuhome/t20006223/m2bsgreprod/results/data/penncath.bim"
+penncath_fam_path = "/amuhome/t20006223/m2bsgreprod/results/data/penncath.fam"
+geno <- snpStats::read.plink(penncath_bed_path, penncath_bim_path, penncath_fam_path, select.snps=sample(1:861473, 25000, replace = FALSE ), na.strings = ("-9"))
-- 
GitLab