diff --git a/src/download_data.R b/src/download_data.R index 61e5291ad5d4922b8943bd354266bfbdebf5fbf0..2f0d18b6cef2a78dedfe86445df06f870b23c3d9 100644 --- a/src/download_data.R +++ b/src/download_data.R @@ -50,5 +50,3 @@ if (actual_md5 == expected_md5) { stop("MD5 checksum does not match!") } - - diff --git a/src/tp1.R b/src/tp1.R index 1230310402f23cd574cf9be80fc7e2f330ac4689..c58bd34051aa8098b6807910ce6ca6bd42cb5df1 100644 --- a/src/tp1.R +++ b/src/tp1.R @@ -1,4 +1,5 @@ -options(repos = c(CRAN = "https://cloud.r-project.org")) +output_dir = "results/tp1" +dir.create(output_dir, showWarnings = F, recursive = T) if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager") @@ -9,9 +10,36 @@ if (!require("snpStats", quietly = TRUE)) if (!require("SNPRelate", quietly = TRUE)) BiocManager::install("SNPRelate") +# Charger les bibliothèques +library(snpStats) +library(SNPRelate) +#library(devtools) +#library(plyr) + # Les données analysées nécessitant beaucoup de RAM, nous allons sélectionner aléatoirement 250000 SNPs et réecrire des fichiers bed, bim, fam penncath_bed_path = "results/data/penncath.bed" penncath_bim_path = "results/data/penncath.bim" penncath_fam_path = "results/data/penncath.fam" + geno <- snpStats::read.plink(penncath_bed_path, penncath_bim_path, penncath_fam_path, select.snps=sample(1:861473, 25000, replace = FALSE ), na.strings = ("-9")) +plink_base=file.path(output_dir, "plink_base") +write.plink(plink_base, snps=geno$genotypes, pedigree=geno$fam[,1], id=geno$fam[,1], mother=geno$fam[,4], sex=geno$fam[,5], phenotype=geno$fam[,6], chromosome = geno$map[,1], genetic.distance = geno$map[,3], position = geno$map[,4], allele.1 = geno$map[,5], allele.2 = geno$map[,6], na.code = ("-9")) + +genoBim<-geno$map +colnames(genoBim)<-c("chr", "SNP", "gen.dist", "position", "A1", "A2") +head(genoBim) + +genotype<-geno$genotype +print(genotype) + +genoFam<-geno$fam +head(genoFam) + +# On commence par libérer de l'espace +rm(geno) + + +rdata_path = file.path(output_dir, "TP1_asbvg.RData") +save.image(rdata_path) +