# Rbayz function to build and run bayz models from R # version 4: added gf1g and gf2g feature models # version 5: added mmG1, mmG2, mtmmG models # version 6: added mmG3 and memmp models # version 7 (Apr 2019): added mmcw model cat("This is RBayz version 7 for bayz version 26.1+.\n") cat("If bayz is not in the R working directory, set BAYZHOME to the directory with bayz, for instance:\n") cat(" BAYZHOME = \"~/bin/\"\n") # If bayz can't be found from R, user can manually set BAYZHOME variable to point to location of bayz, # BAYZHOME should be set including a last slash / BAYZHOME <- "./" # ------ Generic functions # Clean up possible old files from a previous run # Note: cleanup() does not yet remove data files written by the R scripts, they differ for each model, so # it's not clear how to write a generic function for that (except when all data sets start with the mode name ...). cleanup <- function(bayzmod) { filenames <- paste("bayz.",bayzmod,c(".out",".mod",".last",".lab",".plog"),sep="") for(f in filenames) { if (file.exists(f)) file.remove(f) } } # Write the chain section with settings 'chainsettings' (vector of 3 numbers) to file 'outfile' wrchain <- function(chainsettings, outfile) { chainsettings <- c(chainsettings,10*chainsettings[3]) # add a 4th value for the 'show' parameter chaintext <- format(chainsettings,scientific=FALSE,trim=TRUE) cat("chain\n", file=outfile) cat(paste("total=",chaintext[1]," burn=",chaintext[2]," skip=",chaintext[3]," show=",chaintext[4],"\n",sep=""), file=outfile) return } # Run bayz in the background and collect pbayz summary # This assumes bayz and pbayz can be found in the location set in BAYZHOME # The argument is the bayz model-name without bayz. prefix and .bayz postfix, for instance # for script "bayz.memmp.bayz", give "memmp". runbayz <- function(bayzmod, extscript=FALSE) { if (!extscript) { bayzmod = paste("bayz.",bayzmod,sep="") } system(paste(BAYZHOME,"bayz ",bayzmod,sep="")) if(!file.exists(paste(bayzmod,".out",sep=""))) { cat("Bayz seems to have encountered errors - no output available\n") return() } cat("Bayz finished, collecting parameter estimates ...\n") system(paste(BAYZHOME,"pbayz ",bayzmod," maxlev=0 table",sep="")) pbayzsumm <- read.table(paste(bayzmod,".plog",sep=""),skip=1,header=T) return(pbayzsumm) } # make short summary version from large pbayzsumm bayz.summ <- function(pbayzsumm) { parcounts <- table(pbayzsumm$parameter) smallsumm <- match(pbayzsumm$parameter,names(parcounts[parcounts<=20])) print(pbayzsumm[!is.na(smallsumm),]) toobig <- as.data.frame(parcounts[parcounts>20]) colnames(toobig) <- c("Parameter","Size") cat("\nAlso in the output:\n") print(toobig) } # extract one set of parameter estimates from pbayzsumm bayz.extract <- function(pbayzsumm, parname) { extract <- match(pbayzsumm$parameter,parname) if (sum(!is.na(extract))==0) { cat(paste("Parameter",parname,"not found in this output - spelled wrong?\n")) return() } return(pbayzsumm[!is.na(extract),]) } # Bayesian mixed model bayz.mm <- function(data=NULL, resp=NULL, fixmod="", ranmod="", chain=NULL) { # check needed arguments: data, resp and chain are needed. # fixmod and ranmod are not needed, when all is missing it will run a model with intercept only. if(is.null(data)) stop("data (R data table with response and explanatory variables is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") cleanup("mm") # prepare data file for bayz write.table(data,file="bayz_mm_data.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.mm.bayz", "w") # data statement cat(paste("data ","\n"), file=out) cat("file=bayz_mm_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # model line 1: mean + fixed effects + random effects cat("model\n", file=out) cat(paste(resp,"= mean",gsub("\\+"," ",fixmod),gsub("\\+"," ",ranmod),"\n"), file=out) # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+")) ) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp,"~uni\n",sep=""), file=out) } # model lines for residual specification cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("mm") return(pbayzsumm) } # Bayesian mixed model with table of genotypes for random regression (rrBLUP/SNP-BLUP) fit bayz.mmg <- function(data=NULL, geno=NULL, resp=NULL, fixmod="", ranmod="", chain=NULL) { # check needed arguments: data, geno, resp and chain are needed. # fixmod and ranmod are not needed, when all is missing it will run a model with intercept only. if(is.null(data)) stop("data (R data table with response and explanatory variables is not given") if(is.null(geno)) stop("geno (R data table with genotypes) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") cleanup("mmg") # prepare data files for bayz write.table(data,file="bayz_mmg_data.txt",quote=F,row.names=F,col.names=F) write.table(geno,file="bayz_mmg_geno.txt",quote=F,row.names=F,col.names=F) genonames = colnames(geno)[2:ncol(geno)] write.table(genonames,file="bayz_mmg_genonames.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.mmg.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmg_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with genotypes cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmg_geno.txt\n", file=out) cat(paste(colnames(data)[1]," geno[] !genot012\n"), file=out) # third data with genotype names cat("data geno map\n", file=out) cat("file=bayz_mmg_genonames.txt\n", file=out) cat("geno\n", file=out) # model line 1: mean + fixed effects + random effects + genotypes cat("model\n", file=out) cat(paste(resp,"= mean",gsub("\\+"," ",fixmod),gsub("\\+"," ",ranmod)," add.geno\n"), file=out) # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+")) ) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp,"~uni\n",sep=""), file=out) } # model with iden distribution for add.geno effects cat(paste("add.geno.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.add.geno.",resp,"~uni\n",sep=""), file=out) # model lines for residual specification cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("mmg") return(pbayzsumm) } # Bayesian mixed model with two tables of genotypes for random regression (rrBLUP/SNP-BLUP) fit bayz.mmg2 <- function(data=NULL, geno1=NULL, geno2=NULL, resp=NULL, fixmod="", ranmod="", chain=NULL) { # check needed arguments: data, geno1, geno2, resp and chain are needed. # fixmod and ranmod are not needed, when all is missing it will run a model with intercept only. if(is.null(data)) stop("data (R data table with response and explanatory variables is not given") if(is.null(geno1)) stop("geno1 (first R data table with genotypes) is not given") if(is.null(geno2)) stop("geno2 (second R data table with genotypes) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") cleanup("mmg2") # prepare data files for bayz write.table(data,file="bayz_mmg2_data.txt",quote=F,row.names=F,col.names=F) # first genotype data write.table(geno1,file="bayz_mmg2_geno1.txt",quote=F,row.names=F,col.names=F) geno1names = colnames(geno1)[2:ncol(geno1)] write.table(geno1names,file="bayz_mmg2_geno1names.txt",quote=F,row.names=F,col.names=F) # second genotype data write.table(geno2,file="bayz_mmg2_geno2.txt",quote=F,row.names=F,col.names=F) geno2names = colnames(geno2)[2:ncol(geno2)] write.table(geno2names,file="bayz_mmg2_geno2names.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.mmg2.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmg2_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # first data with genotypes and their map cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmg2_geno1.txt\n", file=out) cat(paste(colnames(data)[1]," geno1[] !genot012\n"), file=out) cat("data geno1 map\n", file=out) cat("file=bayz_mmg2_geno1names.txt\n", file=out) cat("geno1\n", file=out) # second data with genotypes and their map cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmg2_geno2.txt\n", file=out) cat(paste(colnames(data)[1]," geno2[] !genot012\n"), file=out) cat("data geno2 map\n", file=out) cat("file=bayz_mmg2_geno2names.txt\n", file=out) cat("geno2\n", file=out) # model line 1: mean + fixed effects + random effects + two times genotypes cat("model\n", file=out) cat(paste(resp,"= mean",gsub("\\+"," ",fixmod),gsub("\\+"," ",ranmod)," add.geno1 add.geno2\n"), file=out) # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+")) ) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp,"~uni\n",sep=""), file=out) } # model with iden distribution for add.geno1 and add.geno2 effects cat(paste("add.geno1.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.add.geno1.",resp,"~uni\n",sep=""), file=out) cat(paste("add.geno2.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.add.geno2.",resp,"~uni\n",sep=""), file=out) # model lines for residual specification cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("mmg2") return(pbayzsumm) } # Bayesian mixed model with table of covariates for standard random regression fit bayz.mmc <- function(data=NULL, covar=NULL, resp=NULL, fixmod="", ranmod="", chain=NULL) { # check needed arguments: data, covar, resp and chain are needed. # fixmod and ranmod are not needed, when all is missing it will run a model with intercept only. if(is.null(data)) stop("data (R data table with response and explanatory variables is not given") if(is.null(covar)) stop("covar (R data table with covariates for random regression fit) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") cleanup("mmc") # As a new feature, covar table get NA replaced by the mean; this is combined with # centering (adjusting each column to mean 0) so that missings values can be replaced by 0. covarIDs <- covar[,1] covar <- scale(covar[,-1],scale=FALSE) covar[is.na(covar)] <- 0 covar <- cbind(covarIDs,covar) # prepare data files for bayz write.table(data,file="bayz_mmc_data.txt",quote=F,row.names=F,col.names=F) write.table(covar,file="bayz_mmc_covar.txt",quote=F,row.names=F,col.names=F) covarnames = colnames(covar)[2:ncol(covar)] write.table(covarnames,file="bayz_mmc_covarnames.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.mmc.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmc_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmc_covar.txt\n", file=out) cat(paste(colnames(data)[1]," covar[]\n"), file=out) # third data with covariate names cat("data covar\n", file=out) cat("file=bayz_mmc_covarnames.txt\n", file=out) cat("covar\n", file=out) # model line 1: mean + fixed effects + random effects + genotypes cat("model\n", file=out) cat(paste(resp,"= mean",gsub("\\+"," ",fixmod),gsub("\\+"," ",ranmod)," reg.covar !nocenter\n"), file=out) # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+")) ) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp,"~uni\n",sep=""), file=out) } # model with iden distribution for add.geno effects cat(paste("reg.covar.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.reg.covar.",resp,"~uni\n",sep=""), file=out) # model lines for residual specification cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("mmc") return(pbayzsumm) } # mmc version with weights on the covariates proportional to variance, model b_i ~ N(0,w_i sigma_b^2) bayz.mmcw <- function(data=NULL, covar=NULL, resp=NULL, covw=NULL, fixmod="", ranmod="", chain=NULL) { # check needed arguments: data, covar, resp and chain are needed. # fixmod and ranmod are not needed, when all is missing it will run a model with intercept only. if(is.null(data)) stop("data (R data table with response and explanatory variables is not given") if(is.null(covar)) stop("covar (R data table with covariates for random regression fit) is not given") if(is.null(covw)) stop("covw (R vector with weights for covariates) is not given - use mmc version") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") if (length(covw) != (ncol(covar)-1)) stop("weight vector does not match (size-1) of covar table") cleanup("mmcw") # As a new feature, covar table get NA replaced by the mean; this is combined with # centering (adjusting each column to mean 0) so that missings values can be replaced by 0. covarIDs <- covar[,1] covar <- scale(covar[,-1],scale=FALSE) covar[is.na(covar)] <- 0 covar <- cbind(covarIDs,covar) # prepare data files for bayz write.table(data,file="bayz_mmcw_data.txt",quote=F,row.names=F,col.names=F) write.table(covar,file="bayz_mmcw_covar.txt",quote=F,row.names=F,col.names=F) covarnames = colnames(covar)[2:ncol(covar)] write.table(cbind(covarnames,covw),file="bayz_mmcw_covarnames.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.mmcw.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmcw_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmcw_covar.txt\n", file=out) cat(paste(colnames(data)[1]," covar[]\n"), file=out) # third data with covariate names and weight info cat("data covar\n", file=out) cat("file=bayz_mmcw_covarnames.txt\n", file=out) cat("covar covw\n", file=out) # model line 1: mean + fixed effects + random effects + covariates cat("model\n", file=out) cat(paste(resp,"= mean",gsub("\\+"," ",fixmod),gsub("\\+"," ",ranmod)," reg.covar !nocenter\n"), file=out) # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+")) ) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp,"~uni\n",sep=""), file=out) } # model with diag.covw distribution for add.geno effects cat(paste("reg.covar.",resp,"~norm diag.covw *\n",sep=""), file=out) cat(paste("var.covw.reg.covar.",resp,"~uni\n",sep=""), file=out) # model lines for residual specification cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("mmcw") return(pbayzsumm) } # Bayesian mixed model with ID random effects with a correlation/covariance structure (e.g., GBLUP using GRM). # The model is fitted by regression on the scaled eigenvectors as in Janss, Sigsgaard, Sorensen paper. # Note: the G-matrix needs a first column with IDs! There is a check that ncol = nrow + 1. bayz.mmG1 <- function(data=NULL, resp=NULL, fixmod="", ranmod="", G=NULL, mineval=0.001, chain=NULL) { # check needed arguments: data, resp, G and chain are needed. # fixmod and ranmod are not needed, when both are missing it will run a model with intercept random ID. if(is.null(data)) stop("data (R data frame with response and explanatory variables) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(G)) stop("G (cor/covar/relationship/kinship matrix) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") if (ncol(G) != (nrow(G)+1) ) stop("Looks like the G-matrix does not have an ID column, G should have 1 column more than rows") cleanup("mmG1") # Compute eigenvectors and scaled eigenvectors for use in this model Gids <- as.character(G[,1]) Geig <- eigen(G[,-1]) poseval <- Geig$values > mineval Nposeval <- sum(poseval) # first compute scaling for all eigenvectors using the absolute of eigen-values, # but in the end leave out the eigenvectors that are not above the mineval. evecscaling <- 1/sqrt(abs(Geig$values)) Geig$vectors <- scale(Geig$vectors, center=FALSE, scale=evecscaling) cat(paste(sum(!poseval)," column(s) will be removed from eigenvectors because the eigenvalue was below mineval (", mineval,")\n"),sep="") GeigData <- cbind(Gids,Geig$vectors[,poseval]) # The rest is same as mmc model, but some names are changed. # prepare data files for bayz write.table(data,file="bayz_mmG1_data.txt",quote=F,row.names=F,col.names=F) write.table(GeigData,file="bayz_mmG1_covar.txt",quote=F,row.names=F,col.names=F) covarnames = paste("ev",1:Nposeval,sep="") write.table(covarnames,file="bayz_mmG1_covarnames.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.mmG1.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmG1_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmG1_covar.txt\n", file=out) cat(paste(colnames(data)[1]," Gevec[]\n"), file=out) # third data with covariate names cat("data Gevec\n", file=out) cat("file=bayz_mmG1_covarnames.txt\n", file=out) cat("Gevec\n", file=out) # model line 1: mean + fixed effects + random effects + genotypes cat("model\n", file=out) cat(paste(resp,"= mean",gsub("\\+"," ",fixmod),gsub("\\+"," ",ranmod)," reg.Gevec\n"), file=out) # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+")) ) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp,"~uni\n",sep=""), file=out) } # model with iden distribution for reg.Gevec effects cat(paste("reg.Gevec.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.reg.Gevec.",resp,"~uni\n",sep=""), file=out) # model lines for residual specification cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("mmG1") return(pbayzsumm) } # mmG1 repeated observations version bayz.mmG1r <- function(data=NULL, resp=NULL, fixmod="", ranmod="", G=NULL, mineval=0.001, chain=NULL) { # check needed arguments: data, resp, G and chain are needed. # fixmod and ranmod are not needed, when both are missing it will run a model with intercept and random ID. if(is.null(data)) stop("data (R data frame with response and explanatory variables) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(G)) stop("G (cor/covar/relationship/kinship matrix) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") if (ncol(G) != (nrow(G)+1) ) stop("Looks like the G-matrix does not have an ID column, G should have 1 column more than rows") cleanup("mmG1r") # Compute eigenvectors and scaled eigenvectors for use in this model Gids <- as.character(G[,1]) Geig <- eigen(G[,-1]) poseval <- Geig$values > mineval Nposeval <- sum(poseval) # first compute scaling for all eigenvectors using the absolute of eigen-values, # but in the end leave out the eigenvectors that are not above the mineval. evecscaling <- 1/sqrt(abs(Geig$values)) Geig$vectors <- scale(Geig$vectors, center=FALSE, scale=evecscaling) cat(paste(sum(!poseval)," column(s) will be removed from eigenvectors because the eigenvalue was below mineval (", mineval,")\n"),sep="") GeigData <- cbind(Gids,Geig$vectors[,poseval]) # The rest is same as mmc model, but some names are changed. # prepare data files for bayz write.table(data,file="bayz_mmG1r_data.txt",quote=F,row.names=F,col.names=F) write.table(GeigData,file="bayz_mmG1r_covar.txt",quote=F,row.names=F,col.names=F) covarnames = paste("ev",1:Nposeval,sep="") write.table(covarnames,file="bayz_mmG1r_covarnames.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.mmG1r.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data\n"), file=out) # reps: no idfield here as key cat("file=bayz_mmG1r_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmG1r_covar.txt\n", file=out) cat(paste(colnames(data)[1]," Gevec[]\n"), file=out) # third data with covariate names cat("data Gevec\n", file=out) cat("file=bayz_mmG1r_covarnames.txt\n", file=out) cat("Gevec\n", file=out) # model line 1: mean + fixed effects + random effects from ranmod + id-effect cat("model\n", file=out) cat(paste(resp," = mean ",gsub("\\+"," ",fixmod)," ", gsub("\\+"," ",ranmod), " fac.",idfield,"\n",sep=""), file=out) #! # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp," ~ uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+")) ) { cat(paste(i,".",resp," ~ uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects from ranmod for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp," ~ norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp," ~ uni\n",sep=""), file=out) } # reps: here reg.Gevec on fac.idfield.response + resid.fac.idfield.response specifications cat(paste("fac.",idfield,".",resp," = reg.Gevec\n",sep=""), file=out) cat(paste("reg.Gevec.fac.",idfield,".",resp," ~ norm iden *\n",sep=""), file=out) cat(paste("var.reg.Gevec.fac.",idfield,".",resp," ~ uni\n",sep=""), file=out) cat(paste("resid.fac.",idfield,".",resp," ~ norm iden *\n",sep=""), file=out) #! cat(paste("var.resid.fac.",idfield,".",resp," ~ uni\n",sep=""), file=out) # model lines for residual specification cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("mmG1r") return(pbayzsumm) } bayz.mmG2 <- function(data=NULL, resp=NULL, fixmod="", ranmod="", G1=NULL, G2=NULL, mineval=0.001, chain=NULL) { # check needed arguments: data, resp, G1, G2 and chain are needed. # fixmod and ranmod are not needed, when both are missing it will run a model with intercept random ID. if(is.null(data)) stop("data (R data frame with response and explanatory variables) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(G1)) stop("G1 (cor/covar/relationship/kinship matrix) is not given") if(is.null(G2)) stop("G2 (cor/covar/relationship/kinship matrix) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") if (ncol(G1) != (nrow(G1)+1) ) stop("Looks like the G1-matrix does not have an ID column, there should be 1 column more than rows") if (ncol(G2) != (nrow(G2)+1) ) stop("Looks like the G2-matrix does not have an ID column, there should be 1 column more than rows") cleanup("mmG2") # Compute eigenvectors and scaled eigenvectors for use in this model (two times) G1ids <- as.character(G1[,1]) G1eig <- eigen(G1[,-1]) poseval1 <- G1eig$values > mineval Nposeval1 <- sum(poseval1) evecscaling <- 1/sqrt(abs(G1eig$values)) G1eig$vectors <- scale(G1eig$vectors, center=FALSE, scale=evecscaling) cat(paste(sum(!poseval1)," column(s) will be removed from eigenvectors of G1 because the eigenvalue was below mineval (", mineval,")\n"),sep="") G1eigData <- cbind(G1ids,G1eig$vectors[,poseval1]) # repeat for G2 G2ids <- as.character(G2[,1]) G2eig <- eigen(G2[,-1]) poseval2 <- G2eig$values > mineval Nposeval2 <- sum(poseval2) evecscaling <- 1/sqrt(abs(G2eig$values)) G2eig$vectors <- scale(G2eig$vectors, center=FALSE, scale=evecscaling) cat(paste(sum(!poseval2)," column(s) will be removed from eigenvectors of G2 because the eigenvalue was below mineval (", mineval,")\n"),sep="") G2eigData <- cbind(G2ids,G2eig$vectors[,poseval2]) # prepare data files for bayz write.table(data,file="bayz_mmG2_data.txt",quote=F,row.names=F,col.names=F) write.table(G1eigData,file="bayz_mmG2_covar1.txt",quote=F,row.names=F,col.names=F) covarnames = paste("ev",1:Nposeval1,sep="") write.table(covarnames,file="bayz_mmG2_covar1names.txt",quote=F,row.names=F,col.names=F) write.table(G2eigData,file="bayz_mmG2_covar2.txt",quote=F,row.names=F,col.names=F) covarnames = paste("ev",1:Nposeval2,sep="") write.table(covarnames,file="bayz_mmG2_covar2names.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.mmG2.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmG2_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmG2_covar1.txt\n", file=out) cat(paste(colnames(data)[1]," Gevec1[]\n"), file=out) # third data with covariate names cat("data Gevec1\n", file=out) cat("file=bayz_mmG2_covar1names.txt\n", file=out) cat("Gevec1\n", file=out) # fourth data is second set with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmG2_covar2.txt\n", file=out) cat(paste(colnames(data)[1]," Gevec2[]\n"), file=out) # firth data is second set of covariate names cat("data Gevec2\n", file=out) cat("file=bayz_mmG2_covar2names.txt\n", file=out) cat("Gevec2\n", file=out) # model line 1: mean + fixed effects + random effects + genotypes cat("model\n", file=out) cat(paste(resp," = mean ",gsub("\\+"," ",fixmod),gsub("\\+"," ",ranmod)," reg.Gevec1 reg.Gevec2\n",sep=""), file=out) # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+")) ) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp,"~uni\n",sep=""), file=out) } # model with iden distribution for reg.Gevec effects cat(paste("reg.Gevec1.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.reg.Gevec1.",resp,"~uni\n",sep=""), file=out) cat(paste("reg.Gevec2.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.reg.Gevec2.",resp,"~uni\n",sep=""), file=out) # model lines for residual specification cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("mmG2") return(pbayzsumm) } # mmG2 repeated observations version bayz.mmG2r <- function(data=NULL, resp=NULL, fixmod="", ranmod="", G1=NULL, G2=NULL, mineval=0.001, chain=NULL) { # check needed arguments: data, resp, G and chain are needed. # fixmod and ranmod are not needed, when both are missing it will run a model with intercept and random ID. if(is.null(data)) stop("data (R data frame with response and explanatory variables) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(G1)) stop("G1 (cor/covar/relationship/kinship matrix) is not given") if(is.null(G2)) stop("G2 (cor/covar/relationship/kinship matrix) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") if (ncol(G1) != (nrow(G1)+1) ) stop("Looks like the G-matrix does not have an ID column, G should have 1 column more than rows") if (ncol(G2) != (nrow(G2)+1) ) stop("Looks like the G-matrix does not have an ID column, G should have 1 column more than rows") cleanup("mmG2r") # Compute eigenvectors and scaled eigenvectors for use in this model (two times) G1ids <- as.character(G1[,1]) G1eig <- eigen(G1[,-1]) poseval1 <- G1eig$values > mineval Nposeval1 <- sum(poseval1) evecscaling <- 1/sqrt(abs(G1eig$values)) G1eig$vectors <- scale(G1eig$vectors, center=FALSE, scale=evecscaling) cat(paste(sum(!poseval1)," column(s) will be removed from eigenvectors of G1 because the eigenvalue was below mineval (", mineval,")\n"),sep="") G1eigData <- cbind(G1ids,G1eig$vectors[,poseval1]) # repeat for G2 G2ids <- as.character(G2[,1]) G2eig <- eigen(G2[,-1]) poseval2 <- G2eig$values > mineval Nposeval2 <- sum(poseval2) evecscaling <- 1/sqrt(abs(G2eig$values)) G2eig$vectors <- scale(G2eig$vectors, center=FALSE, scale=evecscaling) cat(paste(sum(!poseval2)," column(s) will be removed from eigenvectors of G2 because the eigenvalue was below mineval (", mineval,")\n"),sep="") G2eigData <- cbind(G2ids,G2eig$vectors[,poseval2]) # prepare data files for bayz write.table(data,file="bayz_mmG2r_data.txt",quote=F,row.names=F,col.names=F) write.table(G1eigData,file="bayz_mmG2r_covar1.txt",quote=F,row.names=F,col.names=F) covarnames = paste("ev",1:Nposeval1,sep="") write.table(covarnames,file="bayz_mmG2r_covar1names.txt",quote=F,row.names=F,col.names=F) write.table(G2eigData,file="bayz_mmG2r_covar2.txt",quote=F,row.names=F,col.names=F) covarnames = paste("ev",1:Nposeval2,sep="") write.table(covarnames,file="bayz_mmG2r_covar2names.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.mmG2r.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data\n"), file=out) # reps: no idfield here as key cat("file=bayz_mmG2r_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmG2r_covar1.txt\n", file=out) cat(paste(colnames(data)[1]," Gevec1[]\n"), file=out) # third data with covariate names cat("data Gevec1\n", file=out) cat("file=bayz_mmG2r_covar1names.txt\n", file=out) cat("Gevec1\n", file=out) # fourth data with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmG2r_covar2.txt\n", file=out) cat(paste(colnames(data)[1]," Gevec2[]\n"), file=out) # fifth data with covariate names cat("data Gevec2\n", file=out) cat("file=bayz_mmG2r_covar2names.txt\n", file=out) cat("Gevec2\n", file=out) # model line 1: mean + fixed effects + random effects from ranmod + id-effect cat("model\n", file=out) cat(paste(resp," = mean ",gsub("\\+"," ",fixmod)," ", gsub("\\+"," ",ranmod), " fac.",idfield,"\n",sep=""), file=out) #! # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp," ~ uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+")) ) { cat(paste(i,".",resp," ~ uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects from ranmod for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp," ~ norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp," ~ uni\n",sep=""), file=out) } # reps: here reg.Gevec1 and reg.Gevec2 on fac.idfield.response + resid.fac.idfield.response specifications cat(paste("fac.",idfield,".",resp," = reg.Gevec1 reg.Gevec2\n",sep=""), file=out) cat(paste("reg.Gevec1.fac.",idfield,".",resp," ~ norm iden *\n",sep=""), file=out) cat(paste("var.reg.Gevec1.fac.",idfield,".",resp," ~ uni\n",sep=""), file=out) cat(paste("resid.fac.",idfield,".",resp," ~ norm iden *\n",sep=""), file=out) #! cat(paste("var.resid.fac.",idfield,".",resp," ~ uni\n",sep=""), file=out) cat(paste("reg.Gevec2.fac.",idfield,".",resp," ~ norm iden *\n",sep=""), file=out) cat(paste("var.reg.Gevec2.fac.",idfield,".",resp," ~ uni\n",sep=""), file=out) # model lines for residual specification cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("mmG2r") return(pbayzsumm) } # with correlation modelled between the two G-matrices bayz.mmG3 <- function(data=NULL, resp=NULL, fixmod="", ranmod="", G1=NULL, G2=NULL, mineval=0.001, chain=NULL) { # check needed arguments: data, resp, G1, G2 and chain are needed. # fixmod and ranmod are not needed, when both are missing it will run a model with intercept random ID. if(is.null(data)) stop("data (R data frame with response and explanatory variables) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(G1)) stop("G1 (cor/covar/relationship/kinship matrix) is not given") if(is.null(G2)) stop("G2 (cor/covar/relationship/kinship matrix) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") if (ncol(G1) != (nrow(G1)+1) ) stop("Looks like the G1-matrix does not have an ID column, there should be 1 column more than rows") if (ncol(G2) != (nrow(G2)+1) ) stop("Looks like the G2-matrix does not have an ID column, there should be 1 column more than rows") cleanup("mmG3") # Compute eigenvectors and scaled eigenvectors for use in this model (two times) G1ids <- as.character(G1[,1]) G1eig <- eigen(G1[,-1]) poseval1 <- G1eig$values > mineval Nposeval1 <- sum(poseval1) evecscaling <- 1/sqrt(abs(G1eig$values)) G1eig$vectors <- scale(G1eig$vectors, center=FALSE, scale=evecscaling) # repeat for G2 G2ids <- as.character(G2[,1]) G2eig <- eigen(G2[,-1]) poseval2 <- G2eig$values > mineval Nposeval2 <- sum(poseval2) evecscaling <- 1/sqrt(abs(G2eig$values)) G2eig$vectors <- scale(G2eig$vectors, center=FALSE, scale=evecscaling) # decide how many eigenvectors to use: smallest of Nposeval1 and Nposeval2 NposevalBoth <- min(Nposeval1,Nposeval2) cat(paste("G1 has ",Nposeval1," and G2 has ",Nposeval2," eigenvalues above mineval of ",mineval,"\n",sep="")) cat(paste("Retaining ",NposevalBoth," eigenvectors for both G1 and G2\n",sep="")) G1eigData <- cbind(G1ids,G1eig$vectors[,1:NposevalBoth]) G2eigData <- cbind(G2ids,G2eig$vectors[,1:NposevalBoth]) # Compute the Covariance CrossProduct Matrix mean Diagonal CovMatMeanDiag <- mean(diag(tcrossprod(G1eig$vectors[,1:NposevalBoth],G2eig$vectors[,1:NposevalBoth]))) cat(paste("The G1xG2 covariance multiplication factor (mean diagonal) ",CovMatMeanDiag,"\n")) # prepare data files for bayz write.table(data,file="bayz_mmG3_data.txt",quote=F,row.names=F,col.names=F) write.table(G1eigData,file="bayz_mmG3_covar1.txt",quote=F,row.names=F,col.names=F) covarnames = paste("ev",1:NposevalBoth,sep="") write.table(covarnames,file="bayz_mmG3_covar1names.txt",quote=F,row.names=F,col.names=F) write.table(G2eigData,file="bayz_mmG3_covar2.txt",quote=F,row.names=F,col.names=F) covarnames = paste("ev",1:NposevalBoth,sep="") write.table(covarnames,file="bayz_mmG3_covar2names.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.mmG3.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmG3_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmG3_covar1.txt\n", file=out) cat(paste(colnames(data)[1]," Gevec1[]\n"), file=out) # third data with covariate names cat("data Gevec1\n", file=out) cat("file=bayz_mmG3_covar1names.txt\n", file=out) cat("Gevec1\n", file=out) # fourth data is second set with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmG3_covar2.txt\n", file=out) cat(paste(colnames(data)[1]," Gevec2[]\n"), file=out) # firth data is second set of covariate names cat("data Gevec2\n", file=out) cat("file=bayz_mmG3_covar2names.txt\n", file=out) cat("Gevec2\n", file=out) # model line 1: mean + fixed effects + random effects + genotypes cat("model\n", file=out) cat(paste(resp," = mean ",gsub("\\+"," ",fixmod),gsub("\\+"," ",ranmod)," reg.Gevec1 reg.Gevec2\n",sep=""), file=out) # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+")) ) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp,"~uni\n",sep=""), file=out) } # model with norm lvec distribution for reg.Gevec effects cat(paste("reg.Gevec* ~ norm lvec ** !ldim=1\n",sep=""), file=out) cat(paste("var.reg.Gevec* ~ uni\n",sep=""), file=out) # model lines for residual specification cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("mmG3") extra <- data.frame("var.reg.Gevec*","cov.scaling",CovMatMeanDiag,0) colnames(extra) <- colnames(pbayzsumm) pbayzsumm <- rbind(pbayzsumm,extra) return(pbayzsumm) } bayz.mmG3r <- function(data=NULL, resp=NULL, fixmod="", ranmod="", G1=NULL, G2=NULL, mineval=0.001, chain=NULL) { # check needed arguments: data, resp, G and chain are needed. # fixmod and ranmod are not needed, when both are missing it will run a model with intercept and random ID. if(is.null(data)) stop("data (R data frame with response and explanatory variables) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(G1)) stop("G1 (cor/covar/relationship/kinship matrix) is not given") if(is.null(G2)) stop("G2 (cor/covar/relationship/kinship matrix) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") if (ncol(G1) != (nrow(G1)+1) ) stop("Looks like the G-matrix does not have an ID column, G should have 1 column more than rows") if (ncol(G2) != (nrow(G2)+1) ) stop("Looks like the G-matrix does not have an ID column, G should have 1 column more than rows") cleanup("mmG3r") # Compute eigenvectors and scaled eigenvectors for use in this model (two times) G1ids <- as.character(G1[,1]) G1eig <- eigen(G1[,-1]) poseval1 <- G1eig$values > mineval Nposeval1 <- sum(poseval1) evecscaling <- 1/sqrt(abs(G1eig$values)) G1eig$vectors <- scale(G1eig$vectors, center=FALSE, scale=evecscaling) # repeat for G2 G2ids <- as.character(G2[,1]) G2eig <- eigen(G2[,-1]) poseval2 <- G2eig$values > mineval Nposeval2 <- sum(poseval2) evecscaling <- 1/sqrt(abs(G2eig$values)) G2eig$vectors <- scale(G2eig$vectors, center=FALSE, scale=evecscaling) # decide how many eigenvectors to use: smallest of Nposeval1 and Nposeval2 NposevalBoth <- min(Nposeval1,Nposeval2) cat(paste("G1 has ",Nposeval1," and G2 has ",Nposeval2," eigenvalues above mineval of ",mineval,"\n",sep="")) cat(paste("Retaining ",NposevalBoth," eigenvectors for both G1 and G2\n",sep="")) G1eigData <- cbind(G1ids,G1eig$vectors[,1:NposevalBoth]) G2eigData <- cbind(G2ids,G2eig$vectors[,1:NposevalBoth]) CovMatMeanDiag <- mean(diag(tcrossprod(G1eig$vectors[,1:NposevalBoth],G2eig$vectors[,1:NposevalBoth]))) cat(paste("The G1xG2 covariance multiplication factor (mean diagonal) ",CovMatMeanDiag,"\n")) # prepare data files for bayz write.table(data,file="bayz_mmG3r_data.txt",quote=F,row.names=F,col.names=F) write.table(G1eigData,file="bayz_mmG3r_covar1.txt",quote=F,row.names=F,col.names=F) covarnames = paste("ev",1:NposevalBoth,sep="") write.table(covarnames,file="bayz_mmG3r_covar1names.txt",quote=F,row.names=F,col.names=F) write.table(G2eigData,file="bayz_mmG3r_covar2.txt",quote=F,row.names=F,col.names=F) covarnames = paste("ev",1:NposevalBoth,sep="") write.table(covarnames,file="bayz_mmG3r_covar2names.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.mmG3r.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data\n"), file=out) # reps: no idfield here as key cat("file=bayz_mmG3r_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmG3r_covar1.txt\n", file=out) cat(paste(colnames(data)[1]," Gevec1[]\n"), file=out) # third data with covariate names cat("data Gevec1\n", file=out) cat("file=bayz_mmG3r_covar1names.txt\n", file=out) cat("Gevec1\n", file=out) # fourth data with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmG3r_covar2.txt\n", file=out) cat(paste(colnames(data)[1]," Gevec2[]\n"), file=out) # fifth data with covariate names cat("data Gevec2\n", file=out) cat("file=bayz_mmG3r_covar2names.txt\n", file=out) cat("Gevec2\n", file=out) # model line 1: mean + fixed effects + random effects from ranmod + id-effect cat("model\n", file=out) cat(paste(resp," = mean ",gsub("\\+"," ",fixmod)," ", gsub("\\+"," ",ranmod), " fac.",idfield,"\n",sep=""), file=out) #! # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp," ~ uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+")) ) { cat(paste(i,".",resp," ~ uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects from ranmod for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp," ~ norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp," ~ uni\n",sep=""), file=out) } # reps: here reg.Gevec1 and reg.Gevec2 on fac.idfield.response + resid.fac.idfield.response specifications cat(paste("fac.",idfield,".",resp," = reg.Gevec1 reg.Gevec2\n",sep=""), file=out) cat(paste("resid.fac.",idfield,".",resp," ~ norm iden *\n",sep=""), file=out) #! cat(paste("var.resid.fac.",idfield,".",resp," ~ uni\n",sep=""), file=out) cat(paste("reg.Gevec* ~ norm lvec ** !ldim=1\n",sep=""), file=out) cat(paste("var.reg.Gevec* ~ uni\n",sep=""), file=out) # model lines for residual specification cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("mmG3r") extra <- data.frame("var.reg.Gevec*","cov.scaling",CovMatMeanDiag,0) colnames(extra) <- colnames(pbayzsumm) pbayzsumm <- rbind(pbayzsumm,extra) return(pbayzsumm) } # interaction model, need to link to 2 IDs (assumed to be in first two columns of data) bayz.mmG4r <- function(data=NULL, resp=NULL, fixmod="", ranmod="", G1=NULL, G2=NULL, mineval=0.001, chain=NULL) { # check needed arguments: data, resp, G1, G2 and chain are needed. # fixmod and ranmod are not needed, when both are missing it will run a model with intercept random ID. if(is.null(data)) stop("data (R data frame with response and explanatory variables) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(G1)) stop("G1 (cor/covar/relationship/kinship matrix) is not given") if(is.null(G2)) stop("G2 (cor/covar/relationship/kinship matrix) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") if (ncol(G1) != (nrow(G1)+1) ) stop("Looks like the G1-matrix does not have an ID column, there should be 1 column more than rows") if (ncol(G2) != (nrow(G2)+1) ) stop("Looks like the G2-matrix does not have an ID column, there should be 1 column more than rows") cleanup("mmG4r") id1 <- as.factor(data[,1]) id1levels <- levels(id1) id2 <- as.factor(data[,2]) id2levels <- levels(id2) idnew <- paste("row",seq(1,nrow(data)),sep="") # a new ID for every row to be used later Z1 <- model.matrix(~0+id1) # design matrices for the two IDs needed Z2 <- model.matrix(~0+id2) # to 'expand' the G-matrix to match the data G1ids <- as.character(G1[,1]) G2ids <- as.character(G2[,1]) # G-matrices need to be reordered to match data-levels ordering matchG1toId <- match(G1ids,id1levels) matchG2toId <- match(G2ids,id2levels) G1 <- G1[,-1] G1 <- G1[matchG1toId,matchG1toId] G2 <- G2[,-1] G2 <- G2[matchG2toId,matchG2toId] # Compute eigenvectors and scaled eigenvectors for use in this model (two times) G1eig <- eigen(G1) poseval1 <- G1eig$values > mineval Nposeval1 <- sum(poseval1) evecscaling <- 1/sqrt(abs(G1eig$values)) G1eig$vectors <- scale(G1eig$vectors, center=FALSE, scale=evecscaling) # repeat for G2 G2eig <- eigen(G2) poseval2 <- G2eig$values > mineval Nposeval2 <- sum(poseval2) evecscaling <- 1/sqrt(abs(G2eig$values)) G2eig$vectors <- scale(G2eig$vectors, center=FALSE, scale=evecscaling) cat(paste("G1 has ",Nposeval1," and G2 has ",Nposeval2," eigenvalues above mineval of ",mineval,"\n",sep="")) # make the eigenvector-covariates to fit on the data, also select only the ones with positives eigenvalues G1eigData <- cbind(idnew,Z1 %*% G1eig$vectors[,1:Nposeval1]) G2eigData <- cbind(idnew,Z2 %*% G2eig$vectors[,1:Nposeval2]) # for interaction part: first expand then eigenvectors is correct? Ginteract <- ( Z1 %*% G1 %*% t(Z1) ) * ( Z2 %*% G2 %*% t(Z2) ) # note: * for Hadamart product in R Ginteract_eig <- eigen(Ginteract) poseval3 <- Ginteract_eig$values > mineval Nposeval3 <- sum(poseval3) evecscaling <- 1/sqrt(abs(Ginteract_eig$values)) Ginteract_eig$vectors <- scale(Ginteract$vectors, center=FALSE, scale=evecscaling) GinteractData <- cbind(idnew,Ginteract_eig$vectors[,1:Nposeval3]) # prepare data files for bayz data <- cbind(idnew,data) write.table(data,file="bayz_mmG4r_data.txt",quote=F,row.names=F,col.names=F) write.table(G1eigData,file="bayz_mmG4r_covar1.txt",quote=F,row.names=F,col.names=F) covarnames = paste("ev",1:Nposeval1,sep="") write.table(covarnames,file="bayz_mmG4r_covar1names.txt",quote=F,row.names=F,col.names=F) write.table(G2eigData,file="bayz_mmG4r_covar2.txt",quote=F,row.names=F,col.names=F) covarnames = paste("ev",1:Nposeval2,sep="") write.table(covarnames,file="bayz_mmG4r_covar2names.txt",quote=F,row.names=F,col.names=F) write.table(GinteractData,file="bayz_mmG4r_covar3.txt",quote=F,row.names=F,col.names=F) covarnames = paste("ev",1:Nposeval3,sep="") write.table(covarnames,file="bayz_mmG4r_covar3names.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.mmG4r.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmG4r_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmG4r_covar1.txt\n", file=out) cat(paste(colnames(data)[1]," Gevec1[]\n"), file=out) # third data with covariate names cat("data Gevec1\n", file=out) cat("file=bayz_mmG4r_covar1names.txt\n", file=out) cat("Gevec1\n", file=out) # fourth data is second set with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmG4r_covar2.txt\n", file=out) cat(paste(colnames(data)[1]," Gevec2[]\n"), file=out) # firth data is second set of covariate names cat("data Gevec2\n", file=out) cat("file=bayz_mmG4r_covar2names.txt\n", file=out) cat("Gevec2\n", file=out) # fifth data is third set of covariate names cat("data Gevec3\n", file=out) cat("file=bayz_mmG4r_covar3names.txt\n", file=out) cat("Gevec3\n", file=out) # model line 1: mean + fixed effects + random effects + genotypes cat("model\n", file=out) cat(paste(resp," = mean ",gsub("\\+"," ",fixmod),gsub("\\+"," ",ranmod)," reg.Gevec1 reg.Gevec2 reg.Gevec3\n",sep=""), file=out) # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+")) ) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp,"~uni\n",sep=""), file=out) } # model with norm lvec distribution for reg.Gevec effects cat(paste("reg.Gevec1.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.reg.Gevec1.",resp,"~uni\n",sep=""), file=out) cat(paste("reg.Gevec2.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.reg.Gevec2.",resp,"~uni\n",sep=""), file=out) cat(paste("reg.Gevec3.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.reg.Gevec3.",resp,"~uni\n",sep=""), file=out) # model lines for residual specification cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("mmG4r") return(pbayzsumm) } bayz.mtmmG1 <- function(data=NULL, resp=NULL, fixmod="", ranmod="", G=NULL, mineval=0.001, ldim=1, ecor=TRUE, chain=NULL) { # check needed arguments: data, resp, G and chain are needed. # fixmod and ranmod are not needed, when both are missing it will run a model with intercept random ID. if(is.null(data)) stop("data (R data frame with response and explanatory variables) is not given") if(is.null(resp)) stop("resp (list of field name for response variables) is not given") if(is.null(G)) stop("G (cor/covar/relationship/kinship matrix) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") if (ncol(G) != (nrow(G)+1) ) stop("Looks like the G-matrix does not have an ID column, G should have 1 column more than rows") if(length(resp) < 2) stop("There is only one response variable - change to bayz.mmG1 for single trait analysis") cleanup("mtmmG1") # Compute eigenvectors and scaled eigenvectors for use in this model Gids <- as.character(G[,1]) Geig <- eigen(G[,-1]) poseval <- Geig$values > mineval Nposeval <- sum(poseval) # first compute scaling for all eigenvectors using the absolute of eigen-values, # but in the end leave out the eigenvectors that are not above the mineval. evecscaling <- 1/sqrt(abs(Geig$values)) Geig$vectors <- scale(Geig$vectors, center=FALSE, scale=evecscaling) cat(paste(sum(!poseval)," column(s) will be removed from eigenvectors because the eigenvalue was below mineval (", mineval,")\n"),sep="") GeigData <- cbind(Gids,Geig$vectors[,poseval]) # prepare data files for bayz write.table(data,file="bayz_mtmmG1_data.txt",quote=F,row.names=F,col.names=F) write.table(GeigData,file="bayz_mtmmG1_covar.txt",quote=F,row.names=F,col.names=F) covarnames = paste("ev",1:Nposeval,sep="") write.table(covarnames,file="bayz_mtmmG1_covarnames.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.mtmmG1.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mtmmG1_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mtmmG1_covar.txt\n", file=out) cat(paste(colnames(data)[1]," Gevec[]\n"), file=out) # third data with covariate names cat("data Gevec\n", file=out) cat("file=bayz_mtmmG1_covarnames.txt\n", file=out) cat("Gevec\n", file=out) cat("model\n", file=out) for(r in 1:length(resp)) { # all LINP, fixed and IID random effects within trait # model lines: mean + fixed effects + random effects + genotypes cat(paste(resp[r],"= mean",gsub("\\+"," ",fixmod),gsub("\\+"," ",ranmod)," reg.Gevec\n"), file=out) # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp[r],"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+")) ) { cat(paste(i,".",resp[r],"~uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp[r],"~norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp[r],"~uni\n",sep=""), file=out) } } # model with mvnorm distribution for reg.Gevec effects cat(paste("reg.Gevec.* ~ norm lvec ** !ldim=", ldim,"\n",sep=""), file=out) cat(paste("var.reg.Gevec.* ~ uni\n",sep=""), file=out) # model lines for residual specification if (ecor) { cat(paste("resid.* ~ norm lvec ** !ldim=", ldim, "\n",sep=""), file=out) cat(paste("var.resid.* ~ uni\n",sep=""), file=out) } else { for(r in 1:length(resp)) { cat(paste("resid.",resp[r]," ~ norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp[r]," ~ uni\n",sep=""), file=out) } } # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result. # Also add gebv per trait in the pbayzsumm table (psd not available in this way and is set to 0). pbayzsumm <- runbayz("mtmmG1") for(r in 1:length(resp)) { regevec <- bayz.extract(pbayzsumm,paste("reg.Gevec.",resp[r],sep="")) # sometimes regevec$post.mean becomes non-numeric gebv <- Geig$vectors[,poseval] %*% as.numeric(regevec$post.mean) gebv_table <- as.data.frame(cbind(paste("gebv.",resp[r],sep=""),Gids,gebv,0)) colnames(gebv_table) <- colnames(pbayzsumm) pbayzsumm <- rbind(pbayzsumm,gebv_table) } return(pbayzsumm) } # Mixed model with pedigree (no repeats). bayz.mmp <- function(data=NULL, ped=NULL, resp=NULL, fixmod="", ranmod="", chain=NULL) { # check needed arguments: data, ped, resp and chain are needed. # fixmod and ranmod are not needed, when all is missing it will run a model with intercept and # ped effects. if(is.null(data)) stop("data (R data frame with response and explanatory variables) is not given") if(is.null(ped)) stop("ped (R data frame with pedigree) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") cleanup("mmp") write.table(data,file="bayz_mmp_data.txt",quote=F,row.names=F,col.names=F) write.table(ped,file="bayz_mmp_ped.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.mmp.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmp_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) cat(paste("data ",idfield," ped\n"), file=out) cat("file=bayz_mmp_ped.txt\n", file=out) cat(paste(idfield," parent1 parent2\n"), file=out) # model line 1: mean + fixed effects + random effects from 'ranmod' + id-effect for ped cat("model\n", file=out) cat(paste(resp,"= mean ",gsub("\\+"," ",fixmod)," ",gsub("\\+"," ",ranmod)," polyg.",idfield,"\n",sep=""), file=out) # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+"))) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp,"~uni\n",sep=""), file=out) } # model lines for mvnorm polyg.id effect cat(paste("polyg.",idfield,".",resp," ~ norm iden *\n",sep=""),file=out) cat(paste("var.polyg.,",idfield,".",resp," ~ uni\n",sep=""),file=out) # model lines for residual specification cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("mmp") return(pbayzsumm) } # Multi-trait pedigree (no repeats). bayz.mtmmp <- function(data=NULL, ped=NULL, resp=NULL, fixmod="", ranmod="", ldim=1, ecor=TRUE, chain=NULL) { # check needed arguments: data, ped, resp and chain are needed. # fixmod and ranmod are not needed, when all is missing it will run a model with intercept and ped effects. if(is.null(data)) stop("data (R data frame with response and explanatory variables) is not given") if(is.null(ped)) stop("ped (R data frame with pedigree) is not given") if(is.null(resp)) stop("resp (vector of field names to use as response variables) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") if(length(resp) < 2) stop("There is only one response variable - change to bayz.mmp for single trait analysis") cleanup("mtmmp") write.table(data,file="bayz_mtmmp_data.txt",quote=F,row.names=F,col.names=F) write.table(ped,file="bayz_mtmmp_ped.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.mtmmp.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mtmmp_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) cat(paste("data ",idfield," ped\n"), file=out) cat("file=bayz_mtmmp_ped.txt\n", file=out) cat(paste(idfield," parent1 parent2\n"), file=out) # model line 1: mean + fixed effects + random effects from 'ranmod' + id-effect for ped cat("model\n", file=out) for(r in 1:length(resp)) { # all LINP, fixed and IID random effects within trait cat(paste(resp[r],"= mean ",gsub("\\+"," ",fixmod)," ",gsub("\\+"," ",ranmod)," polyg.",idfield,"\n",sep=""), file=out) # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp[r],"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+"))) { cat(paste(i,".",resp[r],"~uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp[r],"~norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp[r],"~uni\n",sep=""), file=out) } } # model lines for polyg.id effect cat(paste("polyg.",idfield,".* ~ norm lvec ** !ldim=", ldim, "\n",sep=""),file=out) cat(paste("var.polyg.",idfield,".* ~ uni\n",sep=""),file=out) # model lines for residual specification if (ecor) { cat(paste("resid.* ~ norm lvec ** !ldim=", ldim, "\n",sep=""), file=out) cat(paste("var.resid.* ~ uni\n",sep=""), file=out) } else { for(r in 1:length(resp)) { cat(paste("resid.",resp[r]," ~ norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp[r]," ~ uni\n",sep=""), file=out) } } # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("mtmmp") return(pbayzsumm) } # Mixed model with pedigree and repeats, extra random ID effect is automatically included, # it should not be added in the ranmod! bayz.mmpr <- function(data=NULL, ped=NULL, resp=NULL, fixmod="", ranmod="", chain=NULL) { # check needed arguments: data, ped, resp and chain are needed. # fixmod and ranmod are not needed, when all is missing it will run a model with intercept and # ped + resid.ID effects. if(is.null(data)) stop("data (R data frame with response and explanatory variables) is not given") if(is.null(ped)) stop("ped (R data frame with pedigree) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") cat("NOTE: do not include a factor with ID in the ranmod part, it is already included to handle pedigree\n") cleanup("mmpr") # prepare data files for bayz, this step also checks duplicates dupids <- duplicated(data[,1]) if (sum(dupids) == 0) { cat(paste("There are no repeats for ",colnames(data)[1]," in this data set\n")) stop("") } uniqids <- data[!dupids,1] write.table(data,file="bayz_mmpr_data.txt",quote=F,row.names=F,col.names=F) write.table(uniqids,file="bayz_mmpr_dataids.txt",quote=F,row.names=F,col.names=F) write.table(ped,file="bayz_mmpr_ped.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.mmpr.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ","\n"), file=out) cat("file=bayz_mmpr_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mmpr_dataids.txt\n", file=out) cat(paste(idfield,"\n"), file=out) cat(paste("data ",idfield," ped\n"), file=out) cat("file=bayz_mmpr_ped.txt\n", file=out) cat(paste(idfield," parent1 parent2\n"), file=out) # model line 1: mean + fixed effects + random effects from 'ranmod' + id-effect for ped cat("model\n", file=out) cat(paste(resp,"= mean ",gsub("\\+"," ",fixmod)," ",gsub("\\+"," ",ranmod)," fac.",idfield,"\n",sep=""), file=out) # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+"))) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp,"~uni\n",sep=""), file=out) } # model lines for fac.id effect and hierarchy with ped cat(paste("fac.",idfield,".",resp," = polyg.",idfield," !name=polyg.",idfield,".",resp,"\n",sep=""), file=out) cat(paste("polyg.",idfield,".",resp," ~ norm iden *\n",sep=""), file=out) cat(paste("var.polyg.",idfield,".",resp," ~ uni\n",sep=""), file=out) cat(paste("resid.fac.",idfield,".",resp," ~ norm iden *\n",sep=""), file=out) cat(paste("var.resid.fac.",idfield,".",resp," ~ uni\n",sep=""), file=out) # model lines for residual specification cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("mmpr") return(pbayzsumm) } # Multi-trait pedigree with repeats, extra random ID effect is automatically included, # it should not be added in the ranmod! bayz.mtmmpr <- function(data=NULL, ped=NULL, resp=NULL, fixmod="", ranmod="", ldim=1, ecor=TRUE, chain=NULL) { # check needed arguments: data, ped, resp and chain are needed. # fixmod and ranmod are not needed, when all is missing it will run a model with intercept and # ped + resid.ID effects. if(is.null(data)) stop("data (R data frame with response and explanatory variables) is not given") if(is.null(ped)) stop("ped (R data frame with pedigree) is not given") if(is.null(resp)) stop("resp (vector of field names to use as response variables) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") if(length(resp) < 2) stop("There is only one response variable - change to bayz.mmp for single trait analysis") cat("NOTE: do not include a factor with ID in the ranmod part, it is already included to handle pedigree\n") cleanup("mtmmpr") # prepare data files for bayz, this step also checks duplicates dupids <- duplicated(data[,1]) if (sum(dupids) == 0) { cat(paste("There are no repeats for ",colnames(data)[1]," in this data set\n")) stop("") } uniqids <- data[!dupids,1] write.table(data,file="bayz_mtmmpr_data.txt",quote=F,row.names=F,col.names=F) write.table(uniqids,file="bayz_mtmmpr_dataids.txt",quote=F,row.names=F,col.names=F) write.table(ped,file="bayz_mtmmpr_ped.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.mtmmpr.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ","\n"), file=out) cat("file=bayz_mtmmpr_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_mtmmpr_dataids.txt\n", file=out) cat(paste(idfield,"\n"), file=out) cat(paste("data ",idfield," ped\n"), file=out) cat("file=bayz_mtmmpr_ped.txt\n", file=out) cat(paste(idfield," parent1 parent2\n"), file=out) # model line 1: mean + fixed effects + random effects from 'ranmod' + id-effect for ped cat("model\n", file=out) for(r in 1:length(resp)) { # all LINP, fixed and IID random effects within trait cat(paste(resp[r],"= mean ",gsub("\\+"," ",fixmod)," ",gsub("\\+"," ",ranmod)," fac.",idfield,"\n",sep=""), file=out) # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp[r],"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+"))) { cat(paste(i,".",resp[r],"~uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp[r]," ~ norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp[r]," ~ uni\n",sep=""), file=out) } # model line for fac.id effect with hierarchy with ped cat(paste("fac.",idfield,".",resp[r]," = polyg.",idfield," !name=polyg.",idfield,".",resp[r],"\n",sep=""), file=out) # resid.ID effects now uncorrelated cat(paste("resid.fac.",idfield,".",resp[r]," ~ norm iden *\n",sep=""), file=out) cat(paste("var.resid.fac.",idfield,".",resp[r]," ~ uni\n",sep=""), file=out) } # Correlated effects with norm lvec cat(paste("polyg.",idfield,".* ~ norm lvec ** !ldim=", ldim, "\n",sep=""), file=out) cat(paste("var.polyg.",idfield,".* ~ uni\n",sep=""), file=out) # model lines for residual specification if (ecor) { # note: cannot use resid.* because it will also collect the resid.fac.... terms residlist <- paste(paste("resid",resp,sep="."),collapse=",") cat(paste(residlist, " ~ norm lvec ** !name=varResid !ldim=", ldim, "\n",sep=""), file=out) cat(paste("varResid ~ uni\n",sep=""), file=out) } else { for(r in 1:length(resp)) { cat(paste("resid.",resp[r]," ~ norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp[r]," ~ uni\n",sep=""), file=out) } } # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("mtmmpr") return(pbayzsumm) } # Multi-environment pedigree with repeats (repeats within environments). # Extra random ID effect is automatically included, it should not be added in the ranmod! # This is how the working bayz file looks: # ----------------------------- # data [nokey] Envir1 # file=polyg1600_1.txt # ID_E1 Envir Rep Loc T1_E1 T2_E1 T3_E1 # data ID_E1 # file=polyg1600_dataids.txt # ID_E1 # data ID_E1 ped # file=polyg1600.ped # ID_E1 P1 P2 # ... repeated for every environment # model # T1_E1 = mean fac.Loc fac.ID_E1 # mean.T1_E1 ~ uni # fac.Loc.T1_E1 ~ uni # fac.ID_E1.T1_E1 = polyg.ID_E1 !name=polyg.ID.T1_E1 # resid.fac.ID_E1.T1_E1 ~ norm iden * # var.resid.fac.ID_E1.T1_E1 ~ uni # resid.T1_E1 ~ norm iden * # var.resid.T1_E1 ~ uni # ... repeated for every environment # polyg.ID.* ~ norm lvec ** !ldim=1 # var.polyg.ID.* ~ uni # ----------------------------- bayz.memmp <- function(data=NULL, ped=NULL, resp=NULL, fixmod="", ranmod="", by=NULL, ldim=1, chain=NULL) { # check needed arguments: data, ped, resp, by and chain are needed. # When fixmod and ranmod remain "" a model with intercept and ped + resid.ID effects is fitted. if(is.null(data)) stop("data (R data frame with response and explanatory variables) is not given") if(is.null(ped)) stop("ped (R data frame with pedigree) is not given") if(is.null(resp)) stop("resp (field name to use as response variable) is not given") if(! resp %in% colnames(data)) stop(paste("The field",resp,"to use as response is not in this data set")) if(is.null(by)) stop("by (field name to use for environment groups) is not given") if(! by %in% colnames(data)) stop(paste("The field",by,"to use for environment groups is not in this data set")) if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") if(length(resp) > 1) stop("There are multiple response variables but memmp needs one, use by= to set environment groups\n") cat("NOTE: do not include a factor with ID in the ranmod part to allow for replicates, it is already added when needed\n") cleanup("memmp") # Make a list of unique IDs over all environments to use as ID-list dupids <- duplicated(data[,1]) if (sum(dupids) == 0) { cat(paste("There are no repeats for ",colnames(data)[1]," in this data set\n")) stop("") } uniqids <- data[!dupids,1] write.table(uniqids,file="bayz_memmp_dataids.txt",quote=F,row.names=F,col.names=F) # Make data sets by environment and make list if environments have repeats or not envirtext <- as.character(data[,by]) # environment column forced as vector of 'text' (strings) envir_level <- sort(unique(envirtext)) # list of unique environment levels as vector of 'text' if(envir_level[1]=="") stop(paste("The field",by,"to use for environment groups has empty cells")) if (length(envir_level) <= 1) stop(paste("The field",by,"to use for environment groups has only one level")) envHasDup <- rep(FALSE,length(envir_level)) names(envHasDup) <- envir_level for (e in envir_level) { subdat <- data[(envirtext==e),] RepCounts <- table(subdat[,1]) NnoReps <- sum(RepCounts==1) NmultReps <- sum(RepCounts > 1) cat(paste("Environment",e,"has",NnoReps,"IDs without reps, and",NmultReps,"IDs with reps\n")) if (NmultReps > 0) envHasDup[e]=TRUE write.table(subdat,file=paste("bayz_memmp_data_",e,".txt",sep=""),quote=F,row.names=F,col.names=F) } # Determine modelling of covariance for residual ID effects if (sum(envHasDup) > 1) CovResidID = TRUE else CovResidID = FALSE write.table(ped,file="bayz_memmp_ped.txt",quote=F,row.names=F,col.names=F) cat(paste("Your data is grouped by",by,"in the following sub-sets: ")) cat(paste(envir_level,collapse=" ")) cat("\nand effects and variances are nested within these groups with genetic effects correlated across groups\n") if (ldim < (length(envir_level)/3)) { cat(paste("ldim=",ldim," and there are ",length(envir_level), " groups, you could consider increasing ldim to better describe the covariance between environments\n",sep="")) } # write bayz script file out <- file("bayz.memmp.bayz", "w") # data statements for every environment idfield <- colnames(data)[1] respcol <- match(resp,colnames(data)) # the column number for the response for (e in envir_level) { if (envHasDup[e]) { cat(paste("data [nokey] ",e,"\n",sep=""), file=out) cat(paste("file=bayz_memmp_data_",e,".txt\n",sep=""), file=out) # ID and response column also needed appended environment.... fieldnames <- colnames(data) fieldnames[c(1,respcol)] <- paste(fieldnames[c(1,respcol)],"_",e,sep="") cat(paste(fieldnames,"\n"), file=out) cat(paste("data ",idfield,"_",e,"\n",sep=""), file=out) cat("file=bayz_memmp_dataids.txt\n", file=out) cat(paste(idfield,"_",e,"\n",sep=""), file=out) cat(paste("data ",idfield,"_",e," ped\n",sep=""), file=out) cat("file=bayz_memmp_ped.txt\n", file=out) cat(paste(idfield,"_",e," parent1 parent2\n",sep=""), file=out) } else { cat(paste("data ",idfield,"_",e," ",e,"\n",sep=""), file=out) cat(paste("file=bayz_memmp_data_",e,".txt\n",sep=""), file=out) # ID and response column also needed appended environment.... fieldnames <- colnames(data) fieldnames[c(1,respcol)] <- paste(fieldnames[c(1,respcol)],"_",e,sep="") cat(paste(fieldnames,"\n"), file=out) cat(paste("data ",idfield,"_",e,"\n",sep=""), file=out) cat("file=bayz_memmp_dataids.txt\n", file=out) cat(paste(idfield,"_",e,"\n",sep=""), file=out) cat(paste("data ",idfield,"_",e," ped\n",sep=""), file=out) cat("file=bayz_memmp_ped.txt\n", file=out) cat(paste(idfield,"_",e," parent1 parent2\n",sep=""), file=out) } } # model line 1: mean + fixed effects + random effects from 'ranmod' + id-effect for ped cat("model\n", file=out) for(e in envir_level) { # all LINP, fixed and IID random effects, ID hierarchy and resid within environment if (envHasDup[e]) { cat(paste(resp,"_",e," = mean ",gsub("\\+"," ",fixmod)," ",gsub("\\+"," ",ranmod)," fac.",idfield,"_",e,"\n",sep=""), file=out) # model line for polygenic effects using hierarchy in case of replicates cat(paste("fac.",idfield,"_",e,".",resp,"_",e," = polyg.",idfield,"_",e," !name=polyg.",idfield,".",resp,"_",e,"\n",sep=""), file=out) # note: CovResidID can be FALSE when there is exactly one environment with reps, then insert the uncorrelated resid.ID distribution here if (!CovResidID) { cat(paste("resid.fac.",idfield,"_",e,".",resp,"_",e," ~ norm iden *\n",sep=""), file=out) cat(paste("var.resid.fac.",idfield,"_",e,".",resp,"_",e," ~ uni\n",sep=""), file=out) } } else { # for no reps directly use polyg effect in the reponse model cat(paste(resp,"_",e," = mean ",gsub("\\+"," ",fixmod)," ",gsub("\\+"," ",ranmod), " polyg.",idfield,"_",e," !name=polyg.",idfield,".",resp,"_",e,"\n",sep=""), file=out) } # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp,"_",e," ~ uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+"))) { cat(paste(i,".",resp,"_",e," ~ uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp,"_",e," ~ norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp,"_",e," ~ uni\n",sep=""), file=out) } # residuals are uncorrelated in memm models cat(paste("resid.",resp,"_",e," ~ norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"_",e," ~ uni\n",sep=""), file=out) } # Correlated effects: polygenic effects, and if CovResidID also (2 or more of) the resid.ID effects cat(paste("polyg.",idfield,".* ~ norm lvec ** !ldim=", ldim, "\n",sep=""), file=out) cat(paste("var.polyg.",idfield,".* ~ uni\n",sep=""), file=out) if (CovResidID) { residIDlist <- paste(paste("resid.fac.",idfield,"_",envir_level[which(envHasDup)],".",resp,"_",envir_level[which(envHasDup)],sep=""),collapse=",") cat(paste(residIDlist," ~ norm lvec ** !ldim=", ldim, " !name=var.resid.",idfield,"\n",sep=""), file=out) cat(paste("var.resid.",idfield," ~ uni\n",sep=""), file=out) } # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("memmp") return(pbayzsumm) } # Bayesian power lasso for non-repeated observations and genotypes bayz.bplg = function(data=NULL, geno=NULL, resp=NULL, fixmod=NULL, pow=0.5, chain=NULL) { # check if needed input is set: data, covar, resp and chain should be given and cannot be NULL; # fixmod may be NULL (in that case fixed part is a mean only), and the pow parameter has a default 0.5. if(is.null(data)) stop("data (R data table with response and explanatory variables is not given") if(is.null(geno)) stop("geno (R data table with genotypes) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") cleanup("bplg") # prepare data files for bayz write.table(data,file="bayz_bplg_data.txt",quote=F,row.names=F,col.names=F) write.table(geno,file="bayz_bplg_geno.txt",quote=F,row.names=F,col.names=F) genonames = colnames(geno)[2:ncol(geno)] write.table(genonames,file="bayz_bplg_genonames.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.bplg.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_bplg_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with genotypes cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_bplg_geno.txt\n", file=out) cat(paste(colnames(data)[1]," geno[] !genot012\n"), file=out) # third data with geno names cat("data geno map\n", file=out) cat("file=bayz_bplg_genonames.txt\n", file=out) cat("geno\n", file=out) # model line 1: mean + fixed effects + covar regressions cat("model\n", file=out) cat(paste(resp,"= mean "), file=out) if(!is.null(fixmod)) cat(gsub("\\+"," ",fixmod), file=out) cat(" add.geno\n", file=out) # model lines with uni distributions for mean and fixed effects and residual specification cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+"))) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # last model lines are for covar regression distribution, it differs for repeat and non-repeat version cat(paste("add.geno.",resp,"~ epow * ",pow,"\n",sep=""), file=out) cat(paste("epow.add.geno.",resp,"~uni\n",sep=""), file=out) # Chain section lines cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("bplg") return(pbayzsumm) } # Bayesian power lasso for non-repeated observations and continuous covariates. bayz.bplc = function(data=NULL, covar=NULL, resp=NULL, fixmod=NULL, pow=0.5, chain=NULL) { # check if needed input is set: data, covar, resp and chain should be given and cannot be NULL; # fixmod may be NULL (in that case fixed part is a mean only), and the pow parameter has a default 0.5. if(is.null(data)) stop("data (R data table with response and explanatory variables is not given") if(is.null(covar)) stop("covar (R data table with covariate predictors) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") cleanup("bplc") # check for missing values in covar input data if (sum(is.na(covar))>0) { stop("The covariate matrix contains missing values; this can not be handled in bplc models") } # prepare data files for bayz write.table(data,file="bayz_bplc_data.txt",quote=F,row.names=F,col.names=F) write.table(covar,file="bayz_bplc_covar.txt",quote=F,row.names=F,col.names=F) covnames = colnames(covar)[2:ncol(covar)] write.table(covnames,file="bayz_bplc_covnames.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.bplc.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_bplc_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_bplc_covar.txt\n", file=out) cat(paste(colnames(data)[1]," covar[]\n"), file=out) # third data with covariate names cat("data covar\n", file=out) cat("file=bayz_bplc_covnames.txt\n", file=out) cat("covar\n", file=out) # model line 1: mean + fixed effects + covar regressions cat("model\n", file=out) cat(paste(resp,"= mean "), file=out) if(!is.null(fixmod)) cat(gsub("\\+"," ",fixmod), file=out) cat(" reg.covar\n", file=out) # model lines with uni distributions for mean and fixed effects and residual specification cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+"))) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # last model lines are for covar regression distribution, it differs for repeat and non-repeat version cat(paste("reg.covar.",resp,"~ epow * ",pow,"\n",sep=""), file=out) cat(paste("epow.reg.covar.",resp,"~uni\n",sep=""), file=out) # Chain section lines cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("bplc") return(pbayzsumm) } # Bayesian power lasso for repeated observations and continuous covariates. bayz.bplcr = function(data=NULL, covar=NULL, resp=NULL, fixmod=NULL, pow=0.5, chain=NULL) { # check if needed input is set: data, covar, resp and chain should be given and cannot be NULL; # fixmod may be NULL (in that case fixed part is a mean only), and the pow parameter has a default 0.5. if(is.null(data)) stop("data (R data table with response and explanatory variables except genotypes) is not given") if(is.null(covar)) stop("covar (R data table with covariate predictors) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") cleanup("bplcr") # check for missing values in covar input data if (sum(is.na(covar))>0) { stop("The covariate matrix contains missing values; this can not be handled in bplc models") } # prepare data files for bayz write.table(data,file="bayz_bplcr_data.txt",quote=F,row.names=F,col.names=F) write.table(covar,file="bayz_bplcr_covar.txt",quote=F,row.names=F,col.names=F) covnames = colnames(covar)[2:ncol(covar)] write.table(covnames,file="bayz_bplcr_covnames.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.bplcr.bayz", "w") # data statements, for repeated obs the first (phenotype) data does not have ID mergefield. idfield=colnames(data)[1] cat(paste("data\n"), file=out) # no ID field here! cat("file=bayz_bplcr_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_bplcr_covar.txt\n", file=out) cat(paste(colnames(data)[1]," covar[]\n"), file=out) # third data with covariate names cat("data covar\n", file=out) cat("file=bayz_bplcr_covnames.txt\n", file=out) cat("covar\n", file=out) # model line 1: mean + fixed effects + random id for repeated obs. cat("model\n", file=out) cat(paste(resp,"= mean "), file=out) if(!is.null(fixmod)) cat(gsub("\\+"," ",fixmod), file=out) cat(paste(" fac.",colnames(data)[1],"\n",sep=""), file=out) # model lines with uni distributions for mean and fixed effects and residual specification cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+"))) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Insert hierarchical model for the fac. with the regression on the covars. cat(paste("fac.",colnames(data)[1],".",resp," = reg.covar\n",sep=""), file=out) cat(paste("resid.fac.",colnames(data)[1],".",resp," ~ norm iden *\n",sep=""), file=out) cat(paste("var.resid.fac.",colnames(data)[1],".",resp," ~ uni\n",sep=""), file=out) cat(paste("reg.covar.fac.",colnames(data)[1],".",resp," ~ epow * ",pow,"\n",sep=""), file=out) cat(paste("epow.reg.covar.fac.",colnames(data)[1],".",resp," ~ uni\n",sep=""), file=out) # Chain section lines cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("bplcr") return(pbayzsumm) } # BVS1 is 2-class mixture with estimated pi's, like BayesCpi. bayz.bvs1g = function(data=NULL, geno=NULL, resp=NULL, fixmod=NULL, piprior=c(100,1), varratio=100, chain=NULL) { # check if needed input is set: data, covar, resp and chain should be given and cannot be NULL; # fixmod may be NULL (in that case fixed part is a mean only), other parameters have defaults. if(is.null(data)) stop("data (R data table with response and explanatory variables except genotypes) is not given") if(is.null(geno)) stop("geno (R data table with genotypes) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") cleanup("bvs1g") # prepare data files for bayz write.table(data,file="bayz_bvs1g_data.txt",quote=F,row.names=F,col.names=F) write.table(geno,file="bayz_bvs1g_geno.txt",quote=F,row.names=F,col.names=F) genonames = colnames(geno)[2:ncol(geno)] write.table(genonames,file="bayz_bvs1g_genonames.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.bvs1g.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_bvs1g_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with genotypes cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_bvs1g_geno.txt\n", file=out) cat(paste(colnames(data)[1]," geno[] !genot012\n"), file=out) # third data with geno names cat("data geno map\n", file=out) cat("file=bayz_bvs1g_genonames.txt\n", file=out) cat("geno\n", file=out) cat("model\n", file=out) # Model lines for indicator variable pi0 <- piprior[1]/sum(piprior) pi1 <- piprior[2]/sum(piprior) cat(paste("z <- bern *",pi0," *",pi1,"\n",sep=""), file=out) cat(paste("frq.z ~ beta ",piprior[1],piprior[2],"\n"), file=out) # model line 1: mean + fixed effects + covar regressions cat("model\n", file=out) cat(paste(resp,"= mean "), file=out) if(!is.null(fixmod)) cat(gsub("\\+"," ",fixmod), file=out) cat(" add.geno\n", file=out) # model lines with uni distributions for mean and fixed effects and residual specification cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+"))) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # last model lines are for geno effects cat(paste("add.geno.",resp," ~ norm fac.z *",(1/varratio)," *",1," !ratios !step=0.1\n",sep=""), file=out) cat(paste("var.z.add.geno.",resp,"~uni\n",sep=""), file=out) # Chain section lines cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("bvs1g") return(pbayzsumm) } bayz.bvs1cr = function(data=NULL, covar=NULL, resp=NULL, fixmod=NULL, piprior=c(100,1), varratio=100, chain=NULL) { # check if needed input is set: data, covar, resp and chain should be given and cannot be NULL; # fixmod may be NULL (in that case fixed part is a mean only), other parameters have defaults. if(is.null(data)) stop("data (R data table with response and explanatory variables except genotypes) is not given") if(is.null(covar)) stop("covar (R data table with covariate predictors) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") cleanup("bvs1cr") # check for missing values in covar input data if (sum(is.na(covar))>0) { stop("The covariate matrix contains missing values; this can not be handled in bvs1c models") } # prepare data files for bayz write.table(data,file="bayz_bvs1cr_data.txt",quote=F,row.names=F,col.names=F) write.table(covar,file="bayz_bvs1cr_covar.txt",quote=F,row.names=F,col.names=F) covnames = colnames(covar)[2:ncol(covar)] write.table(covnames,file="bayz_bvs1cr_covnames.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.bvs1cr.bayz", "w") # data statements, for repeated obs the first (phenotype) data does not have ID mergefield. idfield=colnames(data)[1] cat(paste("data\n"), file=out) # no ID field here! cat("file=bayz_bvs1cr_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with covariates cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_bvs1cr_covar.txt\n", file=out) cat(paste(colnames(data)[1]," covar[]\n"), file=out) # third data with covariate names cat("data covar\n", file=out) cat("file=bayz_bvs1cr_covnames.txt\n", file=out) cat("covar\n", file=out) cat("model\n", file=out) # Model lines for indicator variable pi0 <- piprior[1]/sum(piprior) pi1 <- piprior[2]/sum(piprior) cat(paste("z <- bern *",pi0," *",pi1,"\n",sep=""), file=out) cat(paste("frq.z ~ beta ",piprior[1],piprior[2],"\n"), file=out) # model line for response with mean + fixed effects + random id for repeated obs. cat(paste(resp,"= mean "), file=out) if(!is.null(fixmod)) cat(gsub("\\+"," ",fixmod), file=out) cat(paste(" fac.",colnames(data)[1],"\n",sep=""), file=out) # model lines with uni distributions for mean and fixed effects and residual specification cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+"))) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Insert hierarchical model for the fac. with the regression on the covars and bvs model. cat(paste("fac.",colnames(data)[1],".",resp," = reg.covar\n",sep=""), file=out) cat(paste("resid.fac.",colnames(data)[1],".",resp," ~ norm iden *\n",sep=""), file=out) cat(paste("var.resid.fac.",colnames(data)[1],".",resp," ~ uni\n",sep=""), file=out) cat(paste("reg.covar.fac.",colnames(data)[1],".",resp," ~ norm fac.z *",(1/varratio)," *",1," !ratios !step=0.1\n",sep=""), file=out) cat(paste("var.z.reg.covar.fac.",colnames(data)[1],".",resp," ~ uni\n",sep=""), file=out) # Chain section lines cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("bvs1cr") return(pbayzsumm) } # Genomic Feature model, versions 1 are with uni prior on variances, making also version 2... bayz.gf1g <- function(data=NULL, geno=NULL, resp=NULL, fixmod="", ranmod="", feature=NULL, chain=NULL) { # check needed arguments: data, geno, resp, feature, chain are needed. # fixmod and ranmod are not needed, when all is missing it will run a model with intercept only. if(is.null(data)) stop("data (R data table with response and explanatory variables is not given") if(is.null(geno)) stop("geno (R data table with genotypes) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(feature)) stop("feature (R vector with feature information) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") cleanup("gf1g") # prepare data files for bayz genonames = colnames(geno)[2:ncol(geno)] if (length(feature) != length(genonames)) { stop("The length of the feature vector does not match number of markers in geno") } if (length(table(feature)) > 10) { cat("Warning: there are more then 10 groups in the feature, it can be more appropriate\n") cat("to use bayz.gf2 versions that pull variances towards the common mean variance\n") } write.table(data,file="bayz_gf1g_data.txt",quote=F,row.names=F,col.names=F) write.table(geno,file="bayz_gf1g_geno.txt",quote=F,row.names=F,col.names=F) write.table(cbind(genonames,feature),file="bayz_gf1g_genonames.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.gf1g.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_gf1g_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with genotypes cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_gf1g_geno.txt\n", file=out) cat(paste(colnames(data)[1]," geno[] !genot012\n"), file=out) # third data with genotype names cat("data geno map\n", file=out) cat("file=bayz_gf1g_genonames.txt\n", file=out) cat("geno feat\n", file=out) # model line 1: mean + fixed effects + random effects + genotypes cat("model\n", file=out) cat(paste(resp,"= mean",gsub("\\+"," ",fixmod),gsub("\\+"," ",ranmod)," add.geno\n"), file=out) # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+")) ) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp,"~uni\n",sep=""), file=out) } # model with heterogeneous variances for add.geno effects by feature cat(paste("add.geno.",resp,"~norm fac.feat **\n",sep=""), file=out) cat(paste("var.feat.add.geno.",resp,"~uni\n",sep=""), file=out) # model lines for residual specification cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("gf1g") return(pbayzsumm) } bayz.gf2g <- function(data=NULL, geno=NULL, resp=NULL, fixmod="", ranmod="", feature=NULL, df=5, chain=NULL) { # check needed arguments: data, geno, resp, feature, chain are needed. # fixmod and ranmod are not needed, when all is missing it will run a model with intercept only. if(is.null(data)) stop("data (R data table with response and explanatory variables is not given") if(is.null(geno)) stop("geno (R data table with genotypes) is not given") if(is.null(resp)) stop("resp (field name for response variable in data) is not given") if(is.null(feature)) stop("feature (R vector with feature information) is not given") if(is.null(chain)) stop("chain (vector with total, burn and skip values) is not given") cleanup("gf2g") # prepare data files for bayz genonames = colnames(geno)[2:ncol(geno)] if (length(feature) != length(genonames)) { stop("The length of the feature vector does not match number of markers in geno") } write.table(data,file="bayz_gf2g_data.txt",quote=F,row.names=F,col.names=F) write.table(geno,file="bayz_gf2g_geno.txt",quote=F,row.names=F,col.names=F) write.table(cbind(genonames,feature),file="bayz_gf2g_genonames.txt",quote=F,row.names=F,col.names=F) # write bayz script file out <- file("bayz.gf2g.bayz", "w") # data statements idfield=colnames(data)[1] cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_gf2g_data.txt\n", file=out) cat(paste(colnames(data),"\n"), file=out) # second data with genotypes cat(paste("data ",idfield,"\n"), file=out) cat("file=bayz_gf2g_geno.txt\n", file=out) cat(paste(colnames(data)[1]," geno[] !genot012\n"), file=out) # third data with genotype names cat("data geno map\n", file=out) cat("file=bayz_gf2g_genonames.txt\n", file=out) cat("geno feat\n", file=out) # model line 1: mean + fixed effects + random effects + genotypes cat("model\n", file=out) cat(paste(resp,"= mean",gsub("\\+"," ",fixmod),gsub("\\+"," ",ranmod)," add.geno\n"), file=out) # model lines with uni distributions for mean and fixed effects cat(paste("mean.",resp,"~uni\n",sep=""), file=out) for(i in unlist(strsplit(fixmod, "\\+")) ) { cat(paste(i,".",resp,"~uni\n",sep=""), file=out) } # model lines with norm iden distributions for random effects for(i in unlist(strsplit(ranmod, "\\+"))) { cat(paste(i,".",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.",i,".",resp,"~uni\n",sep=""), file=out) } # model with heterogeneous variances for add.geno effects by feature cat(paste("add.geno.",resp,"~norm fac.feat **\n",sep=""), file=out) cat(paste("var.feat.add.geno.",resp,"~ ichi * ",df,"\n",sep=""), file=out) # model lines for residual specification cat(paste("resid.",resp,"~norm iden *\n",sep=""), file=out) cat(paste("var.resid.",resp,"~uni\n",sep=""), file=out) # Chain section lines and finish writing bayz file cat("chain\n", file=out) cat(paste("total=",chain[1]," burn=",chain[2]," skip=",chain[3]," show=",10*chain[3],"\n",sep=""), file=out) close(out) # Run this bayz model and return result pbayzsumm <- runbayz("gf2g") return(pbayzsumm) } # reform a pediree with 2 columns ('slash format') to bayz 3 col # splitped: function only used internally # ped2to3col: function to be called by user, input a 2-column matrix or data frame with # ID and parent code in form a//b/c (and deeper nestings), output 3-column matrix/data-frame with ID, parent1, parent2. splitped <- function(ped, splitstring){ splitgroup <- grep(splitstring,ped[,2]) # the parent records that need to be split Nsplit <- length(splitgroup) cat(paste(Nsplit," pedigrees found for splitting on ",splitstring,"\n",sep="")) if (Nsplit==0) { cat("Ready without any changes...\n") return(ped) } splitlist <- strsplit(ped[splitgroup,2],splitstring) wrongsplit <- which(lengths(splitlist)>2) if (length(wrongsplit) > 0) { cat("Error(s) in pedigree format for the following record(s):\n") pedwrong <- ped[splitgroup[wrongsplit],] print(pedwrong) return(NULL) } splitparents <- unlist(splitlist) # P1, P2 for ID1; P1, P2 for ID2, etc. dummyparentIDs <- paste(rep(ped[splitgroup,1],each=2),rep(c("P1","P2"),Nsplit),sep="-") # make dummy parent IDs for all parwithslashes <- grep("/",splitparents) # parents which still have slashes need to be replaced by a dummy parent newPedID <- dummyparentIDs[parwithslashes] # new IDs and parents to be added in list newPedPar <- splitparents[parwithslashes] splitparents[parwithslashes] <- dummyparentIDs[parwithslashes] # replace with dummy parents in existing records updatePedPar <- paste(splitparents[seq(1,2*Nsplit,by=2)],splitparents[seq(2,2*Nsplit,by=2)],sep="/") ped[splitgroup,2] <- updatePedPar addPedRecords <- cbind(newPedID,newPedPar) cat(paste(length(newPedID)," dummy parents inserted and added\n")) return(rbind(ped,addPedRecords)) } ped2to3col <- function(ped2col) { for (split in c("/////","////","///","//")) { ped2col <- splitped(ped2col,split) if (is.null(ped2col)) return(NULL) } # now all pedigrees are split to single split splitlist <- strsplit(ped2col[,2],"/") wrongsplit <- which(lengths(splitlist)!=2) if (length(wrongsplit) > 0) { cat("Error(s) in pedigree format for the following record(s):\n") pedwrong <- ped2col[wrongsplit,] print(pedwrong) return(NULL) } parents <- matrix(unlist(splitlist),dim(ped2col)[1],2,byrow=TRUE) return(cbind(ped2col[,1],parents)) }