Skip to content
Snippets Groups Projects
Commit db8d7506 authored by OTT Oceane's avatar OTT Oceane
Browse files

script

parent 444177d3
No related branches found
No related tags found
No related merge requests found
...@@ -8,20 +8,26 @@ output_dir <- file.path(m2bsgreprod, "results", "TP1") ...@@ -8,20 +8,26 @@ output_dir <- file.path(m2bsgreprod, "results", "TP1")
# Créer les répertoires si nécessaire # Créer les répertoires si nécessaire
dir.create(output_dir, showWarnings = FALSE, recursive = TRUE) dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)
# Les données analysées nécessitant beaucoup de RAM, nous allons sélectionner aléatoirement 250000 SNPs et réecrire des fichiers bed, bim, fam
penncath_bed_path = file.path(data.dir, "penncath.bed")
penncath_bim_path = file.path(data.dir, "penncath.bim")
penncath_fam_path = file.path(data.dir, "penncath.fam")
clinical_csv_path = "data/GWAStutorial_clinical.csv"
# Fichiers d'entrées # Fichiers d'entrées
gwas.fn<-lapply(c(bed='bed', fam='fam', bim ='bim', gds='gds'), function(n) sprintf("%s/GWAStutorial.%s", data.dir, n)) gwas.fn<-lapply(c(bed='bed', fam='fam', bim ='bim', gds='gds'), function(n) sprintf("%s/GWAStutorial.%s", data.dir, n))
gwas.fn.2<-lapply(c(bed='bed', fam='fam', bim ='bim', gds='gds'), function(n) sprintf("%s/data.%s", data.dir, n)) gwas.fn.2<-lapply(c(bed='bed', fam='fam', bim ='bim', gds='gds'), function(n) sprintf("%s/data.%s", data.dir, n))
clinical.fn<-sprintf("%s/GWAStutorial_clinical.csv", data.dir)
# clinical.fn<-sprintf("%s/GWAStutorial_clinical.csv", data.dir)
onethou.fn<-lapply(c(info='info', ped='ped'), function(n) sprintf("%s/chr16_1000g_CEU.%s", data.dir, n)) onethou.fn<-lapply(c(info='info', ped='ped'), function(n) sprintf("%s/chr16_1000g_CEU.%s", data.dir, n))
protein.coding.coords.fname<-sprintf("%s/ProCodgene_coords.csv", data.dir) protein.coding.coords.fname<-sprintf("%s/ProCodgene_coords.csv", data.dir)
# Sauvegarde des "modules" # Sauvegarde des "modules"
working.data.fname <- function(num) { sprintf("%s/working_%s.Rdata", output_dir, num) } working.data.fname <- function(num) { sprintf("%s/working_%s.Rdata", output_dir, num) }
# Les données analysées nécessitant beaucoup de RAM, nous allons sélectionner aléatoirement 250000 SNPs et réecrire des fichiers bed, bim, fam
penncath_bed_path = file.path(data.dir, "penncath.bed")
penncath_bim_path = file.path(data.dir, "penncath.bim")
penncath_fam_path = file.path(data.dir, "penncath.fam")
geno <- snpStats::read.plink(penncath_bed_path, penncath_bim_path, penncath_fam_path, select.snps=sample(1:861473, 25000, replace = FALSE ), na.strings = ("-9")) geno <- snpStats::read.plink(penncath_bed_path, penncath_bim_path, penncath_fam_path, select.snps=sample(1:861473, 25000, replace = FALSE ), na.strings = ("-9"))
...@@ -39,7 +45,7 @@ genoFam<-geno$fam ...@@ -39,7 +45,7 @@ genoFam<-geno$fam
rm(geno) rm(geno)
# On charge le fichier clinique # On charge le fichier clinique
clinical<- read.csv(clinical.fn, colClasses = c("character", "factor", "factor", rep("numeric", 4))) clinical<- read.csv(clinical_csv_path, colClasses = c("character", "factor", "factor", rep("numeric", 4)))
rownames(clinical)<-clinical$FamID rownames(clinical)<-clinical$FamID
print(head(clinical)) print(head(clinical))
......
...@@ -9,7 +9,6 @@ penncath_fam_path = "results/data/penncath.fam" ...@@ -9,7 +9,6 @@ penncath_fam_path = "results/data/penncath.fam"
geno <- snpStats::read.plink(penncath_bed_path, penncath_bim_path, penncath_fam_path, select.snps=sample(1:861473, 25000, replace = FALSE ), na.strings = ("-9")) geno <- snpStats::read.plink(penncath_bed_path, penncath_bim_path, penncath_fam_path, select.snps=sample(1:861473, 25000, replace = FALSE ), na.strings = ("-9"))
plink_base=file.path(output_dir, "plink_base") plink_base=file.path(output_dir, "plink_base")
snpStats::write.plink(plink_base, snps=geno$genotypes, pedigree=geno$fam[,1], id=geno$fam[,1], mother=geno$fam[,4], sex=geno$fam[,5], phenotype=geno$fam[,6], chromosome = geno$map[,1], genetic.distance = geno$map[,3], position = geno$map[,4], allele.1 = geno$map[,5], allele.2 = geno$map[,6], na.code = ("-9")) snpStats::write.plink(plink_base, snps=geno$genotypes, pedigree=geno$fam[,1], id=geno$fam[,1], mother=geno$fam[,4], sex=geno$fam[,5], phenotype=geno$fam[,6], chromosome = geno$map[,1], genetic.distance = geno$map[,3], position = geno$map[,4], allele.1 = geno$map[,5], allele.2 = geno$map[,6], na.code = ("-9"))
...@@ -26,6 +25,15 @@ genoFam<-geno$fam ...@@ -26,6 +25,15 @@ genoFam<-geno$fam
# On commence par libérer de l'espace # On commence par libérer de l'espace
rm(geno) rm(geno)
# On charge le fichier clinique
clinical_csv_path = "data/GWAStutorial_clinical.csv"
clinical<- read.csv(clinical_csv_path, colClasses = c("character", "factor", "factor", rep("numeric", 4)))
rownames(clinical)<-clinical$FamID
#print(head(clinical))
protein.coding.coords.fname<-"data/ProCodgene_coords.csv"
rdata_path = file.path(output_dir, "TP1_asbvg.RData") rdata_path = file.path(output_dir, "TP1_asbvg.RData")
save.image(rdata_path) save.image(rdata_path)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment