script

db8d7506 · OTT Oceane · 444177d3 · db8d7506 · db8d7506
Commit db8d7506 authored 7 months ago by OTT Oceane
--- a/src/TP1.v2.R
+++ b/src/TP1.v2.R
@@ -8,20 +8,26 @@ output_dir <- file.path(m2bsgreprod, "results", "TP1")
 # Créer les répertoires si nécessaire
 dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)
+# Les données analysées nécessitant beaucoup de RAM, nous allons sélectionner aléatoirement 250000 SNPs et réecrire des fichiers bed, bim, fam
+penncath_bed_path = file.path(data.dir, "penncath.bed")
+penncath_bim_path = file.path(data.dir, "penncath.bim")
+penncath_fam_path = file.path(data.dir, "penncath.fam")
+clinical_csv_path = "data/GWAStutorial_clinical.csv"
 # Fichiers d'entrées
 gwas.fn<-lapply(c(bed='bed', fam='fam', bim ='bim', gds='gds'), function(n) sprintf("%s/GWAStutorial.%s", data.dir, n))
 gwas.fn.2<-lapply(c(bed='bed', fam='fam', bim ='bim', gds='gds'), function(n) sprintf("%s/data.%s", data.dir, n))
-clinical.fn<-sprintf("%s/GWAStutorial_clinical.csv", data.dir)
+# clinical.fn<-sprintf("%s/GWAStutorial_clinical.csv", data.dir)
 onethou.fn<-lapply(c(info='info', ped='ped'), function(n) sprintf("%s/chr16_1000g_CEU.%s", data.dir, n))
 protein.coding.coords.fname<-sprintf("%s/ProCodgene_coords.csv", data.dir)
 # Sauvegarde des "modules"
 working.data.fname <- function(num) { sprintf("%s/working_%s.Rdata", output_dir, num) }
-# Les données analysées nécessitant beaucoup de RAM, nous allons sélectionner aléatoirement 250000 SNPs et réecrire des fichiers bed, bim, fam
-penncath_bed_path = file.path(data.dir, "penncath.bed")
-penncath_bim_path = file.path(data.dir, "penncath.bim")
-penncath_fam_path = file.path(data.dir, "penncath.fam")
 geno <- snpStats::read.plink(penncath_bed_path, penncath_bim_path, penncath_fam_path, select.snps=sample(1:861473, 25000, replace = FALSE ), na.strings = ("-9"))
@@ -39,7 +45,7 @@ genoFam<-geno$fam
 rm(geno)
 # On charge le fichier clinique
-clinical<- read.csv(clinical.fn, colClasses = c("character", "factor", "factor", rep("numeric", 4)))
+clinical<- read.csv(clinical_csv_path, colClasses = c("character", "factor", "factor", rep("numeric", 4)))
 rownames(clinical)<-clinical$FamID
 print(head(clinical))

--- a/src/TP1.v3.R
+++ b/src/TP1.v3.R
@@ -9,7 +9,6 @@ penncath_fam_path = "results/data/penncath.fam"
 geno <- snpStats::read.plink(penncath_bed_path, penncath_bim_path, penncath_fam_path, select.snps=sample(1:861473, 25000, replace = FALSE ), na.strings = ("-9"))
 plink_base=file.path(output_dir, "plink_base")
 snpStats::write.plink(plink_base, snps=geno$genotypes, pedigree=geno$fam[,1], id=geno$fam[,1], mother=geno$fam[,4], sex=geno$fam[,5], phenotype=geno$fam[,6], chromosome = geno$map[,1], genetic.distance = geno$map[,3], position = geno$map[,4], allele.1 = geno$map[,5], allele.2 = geno$map[,6], na.code = ("-9"))
@@ -26,6 +25,15 @@ genoFam<-geno$fam
 # On commence par libérer de l'espace
 rm(geno)
+# On charge le fichier clinique
+clinical_csv_path = "data/GWAStutorial_clinical.csv"
+clinical<- read.csv(clinical_csv_path, colClasses = c("character", "factor", "factor", rep("numeric", 4)))
+rownames(clinical)<-clinical$FamID
+#print(head(clinical))
+protein.coding.coords.fname<-"data/ProCodgene_coords.csv"
 rdata_path = file.path(output_dir, "TP1_asbvg.RData")
 save.image(rdata_path)