Skip to content
Snippets Groups Projects
Commit db8d7506 authored by OTT Oceane's avatar OTT Oceane
Browse files

script

parent 444177d3
Branches
Tags
No related merge requests found
......@@ -8,20 +8,26 @@ output_dir <- file.path(m2bsgreprod, "results", "TP1")
# Créer les répertoires si nécessaire
dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)
# Les données analysées nécessitant beaucoup de RAM, nous allons sélectionner aléatoirement 250000 SNPs et réecrire des fichiers bed, bim, fam
penncath_bed_path = file.path(data.dir, "penncath.bed")
penncath_bim_path = file.path(data.dir, "penncath.bim")
penncath_fam_path = file.path(data.dir, "penncath.fam")
clinical_csv_path = "data/GWAStutorial_clinical.csv"
# Fichiers d'entrées
gwas.fn<-lapply(c(bed='bed', fam='fam', bim ='bim', gds='gds'), function(n) sprintf("%s/GWAStutorial.%s", data.dir, n))
gwas.fn.2<-lapply(c(bed='bed', fam='fam', bim ='bim', gds='gds'), function(n) sprintf("%s/data.%s", data.dir, n))
clinical.fn<-sprintf("%s/GWAStutorial_clinical.csv", data.dir)
# clinical.fn<-sprintf("%s/GWAStutorial_clinical.csv", data.dir)
onethou.fn<-lapply(c(info='info', ped='ped'), function(n) sprintf("%s/chr16_1000g_CEU.%s", data.dir, n))
protein.coding.coords.fname<-sprintf("%s/ProCodgene_coords.csv", data.dir)
# Sauvegarde des "modules"
working.data.fname <- function(num) { sprintf("%s/working_%s.Rdata", output_dir, num) }
# Les données analysées nécessitant beaucoup de RAM, nous allons sélectionner aléatoirement 250000 SNPs et réecrire des fichiers bed, bim, fam
penncath_bed_path = file.path(data.dir, "penncath.bed")
penncath_bim_path = file.path(data.dir, "penncath.bim")
penncath_fam_path = file.path(data.dir, "penncath.fam")
geno <- snpStats::read.plink(penncath_bed_path, penncath_bim_path, penncath_fam_path, select.snps=sample(1:861473, 25000, replace = FALSE ), na.strings = ("-9"))
......@@ -39,7 +45,7 @@ genoFam<-geno$fam
rm(geno)
# On charge le fichier clinique
clinical<- read.csv(clinical.fn, colClasses = c("character", "factor", "factor", rep("numeric", 4)))
clinical<- read.csv(clinical_csv_path, colClasses = c("character", "factor", "factor", rep("numeric", 4)))
rownames(clinical)<-clinical$FamID
print(head(clinical))
......
......@@ -9,7 +9,6 @@ penncath_fam_path = "results/data/penncath.fam"
geno <- snpStats::read.plink(penncath_bed_path, penncath_bim_path, penncath_fam_path, select.snps=sample(1:861473, 25000, replace = FALSE ), na.strings = ("-9"))
plink_base=file.path(output_dir, "plink_base")
snpStats::write.plink(plink_base, snps=geno$genotypes, pedigree=geno$fam[,1], id=geno$fam[,1], mother=geno$fam[,4], sex=geno$fam[,5], phenotype=geno$fam[,6], chromosome = geno$map[,1], genetic.distance = geno$map[,3], position = geno$map[,4], allele.1 = geno$map[,5], allele.2 = geno$map[,6], na.code = ("-9"))
......@@ -26,6 +25,15 @@ genoFam<-geno$fam
# On commence par libérer de l'espace
rm(geno)
# On charge le fichier clinique
clinical_csv_path = "data/GWAStutorial_clinical.csv"
clinical<- read.csv(clinical_csv_path, colClasses = c("character", "factor", "factor", rep("numeric", 4)))
rownames(clinical)<-clinical$FamID
#print(head(clinical))
protein.coding.coords.fname<-"data/ProCodgene_coords.csv"
rdata_path = file.path(output_dir, "TP1_asbvg.RData")
save.image(rdata_path)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment