### Coefficient of coancestry # load the packages. library(kinship2) library(ggplot2) library(reshape2) # load the pedigree. ped <- read.csv("https://raw.githubusercontent.com/cjyang-work/lecture/main/2024_genetic_relationship/PERM/PERM_ped.csv", as.is=TRUE) # manually plot the pedigree. ped2 <- ped ped2$x <- c(1.5,10.5,19.5,28.5, 1.5,7,12.5,18,23.5,29, 1:30) ped2$y <- c(rep(3,4), rep(2,6), rep(1,30)) ped3 <- vector() for(i in 5:40){ ped3 <- rbind(ped3, data.frame(x1=ped2$x[i], x2=ped2$x[ped2$P1[i]], y1=ped2$y[i], y2=ped2$y[ped2$P1[i]], highlight=ifelse(i%in%c(5,6,11), TRUE, FALSE)), data.frame(x1=ped2$x[i], x2=ped2$x[ped2$P2[i]], y1=ped2$y[i], y2=ped2$y[ped2$P2[i]], highlight=ifelse(i%in%c(5,6,11), TRUE, FALSE))) } ggplot() + geom_segment(data=ped3, aes(x=x1, xend=x2, y=y1, yend=y2, color=highlight), linewidth=1) + geom_label(data=ped2, aes(x=x, y=y, label=ID), size=4) + theme_void() + theme(legend.position="none") + scale_color_manual(values=c("#999999", "#FF9999")) + coord_fixed(ratio=4) # compute the coefficients of coancestry using R/kinship2. coan <- kinship(id=ped$ID, dadid=ped$P1, momid=ped$P2) # convert the matrix of coefficients for plotting. coan4plot <- melt(data=coan) coan4plot <- coan4plot[order(coan4plot$Var1, coan4plot$Var2), ] colnames(coan4plot) <- c("ID1", "ID2", "coef") # plot the coefficients of coancestry. ggplot() + geom_tile(data=coan4plot, aes(x=ID1, y=ID2, fill=coef), color="#999999") + theme_bw() + scale_y_reverse(expand=c(0,0)) + scale_x_continuous(position="top", expand=c(0,0)) + scale_fill_gradient(low="#FFFFFF", high="#0000FF") + coord_fixed() # calculate the coefficients of coancestry manually. # create a table of all pairwise relationships n <- nrow(ped) df <- data.frame(ID1=sort(rep(1:n, n)), ID2=rep(1:n, n), manual=NA) df <- df[!(df$ID2 > df$ID1), ] rownames(df) <- NULL # identify the individuals in each generation. g0 <- c(1:4) g1 <- c(5:10) g2 <- c(11:40) # compute the coefficients for g0 to itself. for(i in g0) df$manual[df$ID1==i & df$ID2==i] <- 0.5 # compute the coefficients for g0 to g0. for(i in g0){ for(j in g0[g0 < i]){ df$manual[df$ID1==i & df$ID2==j] <- 0 } } # compute the coefficients for g1 to g0/g1. for(i in g1){ for(j in c(g0, g1)[c(g0, g1) < i]){ a <- sort(c(ped$P1[ped$ID==i], j)) b <- sort(c(ped$P2[ped$ID==i], j)) df$manual[df$ID1==i & df$ID2==j] <- 0.5*(df$manual[df$ID1==a[2] & df$ID2==a[1]] + df$manual[df$ID1==b[2] & df$ID2==b[1]]) } } # compute the coefficients for g1 to itself. for(i in g1){ a <- sort(c(ped$P1[ped$ID==i], ped$P2[ped$ID==i])) df$manual[df$ID1==i & df$ID2==i] <- 0.5*(1 + df$manual[df$ID1==a[2] & df$ID2==a[1]]) } # compute the coefficients for g2 to g0/g1/g2. for(i in g2){ for(j in c(g0, g1, g2)[c(g0, g1, g2) < i]){ a <- sort(c(ped$P1[ped$ID==i], j)) b <- sort(c(ped$P2[ped$ID==i], j)) df$manual[df$ID1==i & df$ID2==j] <- 0.5*(df$manual[df$ID1==a[2] & df$ID2==a[1]] + df$manual[df$ID1==b[2] & df$ID2==b[1]]) } } # compute the coefficients for g2 to itself. for(i in g2){ a <- sort(c(ped$P1[ped$ID==i], ped$P2[ped$ID==i])) df$manual[df$ID1==i & df$ID2==i] <- 0.5*(1 + df$manual[df$ID1==a[2] & df$ID2==a[1]]) } # compare the coefficients from R/kinship2 and manual calculation. df$kinship2 <- coan4plot$coef[!(coan4plot$ID2 > coan4plot$ID1)] ggplot() + geom_point(data=df, aes(x=manual, y=kinship2)) + theme_bw() + coord_fixed() ### Coefficient of fraternity # load the packages. library(AGHmatrix) # compute the coefficients of fraternity using R/AGHmatrix. frat <- Amatrix(data=ped, dominance=TRUE) # convert the matrix of coefficients for plotting. frat4plot <- melt(data=frat) frat4plot <- frat4plot[order(frat4plot$Var1, frat4plot$Var2), ] colnames(frat4plot) <- c("ID1", "ID2", "coef") # plot the coefficients of coancestry. ggplot() + geom_tile(data=frat4plot, aes(x=ID1, y=ID2, fill=coef), color="#999999") + theme_bw() + scale_y_reverse(expand=c(0,0)) + scale_x_continuous(position="top", expand=c(0,0)) + scale_fill_gradient(low="#FFFFFF", high="#0000FF") + coord_fixed() # calculate the coefficients of fraternity manually. # create a table of all pairwise relationships df2 <- df[, c(1,2)] df2$manual <- NA # compute the coefficients for an individual to itself. for(i in 1:n) df2$manual[df2$ID1==i & df2$ID2==i] <- 1 # compute the coefficients for g0 to g0/g1/g2. for(i in 1:n){ for(j in g0[g0 < i]){ df2$manual[df2$ID1==i & df2$ID2==j] <- 0 } } # compute the remaining coefficients. for(i in c(g1,g2)){ for(j in c(g1,g2)[c(g1,g2) < i]){ a <- sort(c(ped$P1[i], ped$P1[j])) b <- sort(c(ped$P2[i], ped$P2[j])) d <- sort(c(ped$P1[i], ped$P2[j])) e <- sort(c(ped$P2[i], ped$P1[j])) if(is.na(df2$manual[df2$ID1==i & df2$ID2==j])){ P1P1 <- df$manual[df$ID1==a[2] & df$ID2==a[1]] P2P2 <- df$manual[df$ID1==b[2] & df$ID2==b[1]] P1P2 <- df$manual[df$ID1==d[2] & df$ID2==d[1]] P2P1 <- df$manual[df$ID1==e[2] & df$ID2==e[1]] df2$manual[df2$ID1==i & df2$ID2==j] <- P1P1*P2P2 + P1P2*P2P1 } } } # compare the coefficients from R/AGHmatrix and manual calculation. df2$AGHmatrix <- frat4plot$coef[!(frat4plot$ID2 > frat4plot$ID1)] ggplot() + geom_point(data=df2, aes(x=manual, y=AGHmatrix)) + theme_bw() + coord_fixed() ### Parentage inference # load the package library(qtl2) # load the data magic <- read_cross2("https://raw.github.com/cjyang-work/lecture/main/2024_genetic_relationship/MAGIC.zip") # compute P(IBD) gp <- calc_genoprob(magic) # manually plot the P(IBD) gp4plot <- vector() for(i in 1:length(gp)){ gp4plot <- rbind(gp4plot, cbind(melt(gp[[i]]), Chr=i, Pos=sort(rep(magic$gmap[[i]], 4*10)))) } colnames(gp4plot) <- c("RIL", "Founder", "SNP", "PIBD", "Chr", "Pos") levels(gp4plot$Founder) <- c("B73", "Mo17", "W22", "FV2") ggplot() + geom_line(data=gp4plot, aes(x=Pos, y=PIBD, color=Founder), linewidth=1) + facet_grid(rows=vars(RIL), cols=vars(Chr), scales="free_x", space="free_x") + theme_minimal() + theme(legend.position="bottom") + scale_color_manual(values=c("#CA0020", "#F4A582", "#0571B0", "#92C5DE")) + scale_y_continuous(breaks=c(0,0.5,1)) # threshold of 0.5 ip <- maxmarg(gp, minprob=0.5) # manually plot the inferred parentage. ip4plot <- data.frame(t(do.call(cbind, ip))) temp <- sapply(1:length(magic$gmap), FUN=function(i) length(magic$gmap[[i]])) ip4plot$Chr <- unlist(lapply(1:length(temp), FUN=function(i) rep(i, temp[i]))) ip4plot$Pos <- unlist(magic$gmap) ip4plot <- melt(ip4plot, id.vars=c("Chr", "Pos")) colnames(ip4plot)[3:4] <- c("RIL", "Founder") ip4plot$Founder <- factor(x=ip4plot$Founder, labels=c("B73", "Mo17", "W22", "FV2")) ggplot() + geom_point(data=ip4plot, aes(x=Pos, y=RIL, color=Founder), size=2) + facet_grid(cols=vars(Chr), scales="free_x", space="free_x") + theme_minimal() + theme(legend.position="bottom") + scale_color_manual(values=c("#CA0020", "#F4A582", "#0571B0", "#92C5DE")) + scale_y_discrete(limits=rev) ### Additive GRM # load the packages library(sommer) # load the genomic marker data for PERM. geno <- read.csv("https://raw.githubusercontent.com/cjyang-work/lecture/main/2024_genetic_relationship/PERM/PERM_geno.csv", as.is=TRUE, row.names=1) geno <- as.matrix(geno) # compute the A-GRM using sommer. KA <- A.mat(geno-1) # compute the allele frequency. p <- colSums(geno)/(2*colSums(!is.na(geno))) # center the marker data. W <- geno - matrix(rep(2*p, nrow(geno)), nrow=nrow(geno), byrow=TRUE) # obtain the denominator. d <- sum(2*p*(1-p)) # compute the A-GRM. KA2 <- W%*%t(W)/d # check if KA and KA2 are identical. KA4plot <- rbind(data.frame(sommer=KA[lower.tri(KA)], manual=KA2[lower.tri(KA2)], type="off-diagonal"), data.frame(sommer=diag(KA), manual=diag(KA2), type="diagonal")) ggplot() + geom_point(data=KA4plot, aes(x=sommer, y=manual, color=type)) + theme_bw() + coord_fixed() ### Dominance GRM # compute the D-GRM using sommer. KD <- D.mat(X=geno-1, nishio=FALSE) # convert to dominance marker data. genod <- geno genod[genod==2] <- 0 # center the marker data. Wd <- genod - matrix(rep(2*p*(1-p), nrow(genod)), nrow=nrow(genod), byrow=TRUE) # obtain the denominator. dd <- sum(2*p*(1-p)*( 1-2*p*(1-p) )) # compute the D-GRM. KD2 <- Wd%*%t(Wd)/dd # check if KD and KD2 are identical. KD4plot <- rbind(data.frame(sommer=KD[lower.tri(KD)], manual=KD2[lower.tri(KD2)], type="off-diagonal"), data.frame(sommer=diag(KD), manual=diag(KD2), type="diagonal")) ggplot() + geom_point(data=KD4plot, aes(x=sommer, y=manual, color=type)) + theme_bw() + coord_fixed() ### Compare A/D-GRM to coancestry/fraternity. # load the packages. library(kinship2) library(AGHmatrix) # load the pedigree for PERM. ped <- read.csv("https://raw.githubusercontent.com/cjyang-work/lecture/main/2024_genetic_relationship/PERM/PERM_ped.csv", as.is=TRUE) # compute the pedigree-based A-GRM. HA <- kinship(id=ped$ID, dadid=ped$P1, momid=ped$P2) HD <- Amatrix(data=ped, dominance=TRUE) # compare HA to KA, HD to KD. compareAD <- rbind(data.frame(pedigree=HA[lower.tri(HA)], marker=KA[lower.tri(KA)]/2, type="off-diagonal", coef="Additive/coancestry"), data.frame(pedigree=diag(HA), marker=diag(KA)/2, type="diagonal", coef="Additive/coancestry"), data.frame(pedigree=HD[lower.tri(HD)], marker=KD[lower.tri(KD)], type="off-diagonal", coef="Dominance/fraternity"), data.frame(pedigree=diag(HD), marker=diag(KD), type="diagonal", coef="Dominance/fraternity")) temp <- data.frame(x1=c(0,0), x2=c(0.55,1), y1=c(0,0), y2=c(0.55,1), coef=c("Additive/coancestry", "Dominance/fraternity")) ggplot() + geom_point(data=compareAD, aes(x=pedigree, y=marker, color=type)) + geom_segment(data=temp, aes(x=x1, xend=x2, y=y1, yend=y2), color="#999999", linewidth=1) + geom_smooth(data=compareAD, aes(x=pedigree, y=marker), method="lm", color="#000000") + facet_wrap(vars(coef), nrow=1) + theme_bw() + coord_fixed(xlim=c(-1,1.5)) ### Computing AxA epistasis GRM # subset the additive markers to use as an example. X <- geno[, 1:50] # center and scale the additive markers first - different approach from the previous way. W <- scale(X, center=TRUE, scale=TRUE)/sqrt(ncol(X)) # compute the additive x additive epistasis markers. WW <- lapply(1:(ncol(W)-1), FUN=function(i) W[,i]*W[,(i+1):ncol(W), drop=FALSE]) WW <- do.call(cbind, WW) # compute the additive GRM. MX <- W%*%t(W) # method 1 for computing the additive x additive epistasis GRM. MXX1 <- WW%*%t(WW) # method 2 for computing the additive x additive epistasis GRM using trick. MXX2 <- 0.5*(MX*MX - (W*W)%*%t(W*W)) # check if method 1 and 2 are identical. MXX4plot <- rbind(data.frame(method.1=MXX1[lower.tri(MXX1)], method.2=MXX2[lower.tri(MXX2)], type="off-diagonal"), data.frame(method.1=diag(MXX1), method.2=diag(MXX2), type="diagonal")) ggplot() + geom_point(data=MXX4plot, aes(x=method.1, y=method.2, color=type)) + theme_bw() + coord_fixed() # compute KAA for the full data. W <- scale(geno, center=TRUE, scale=TRUE)/sqrt(ncol(geno)) KAA <- 0.5*((W%*%t(W))*(W%*%t(W)) - (W*W)%*%t(W*W)) # compare KA to KD and KA to KAA. K4plot <- rbind(data.frame(KA=KA[lower.tri(KA)], KD=KD[lower.tri(KD)], KAA=KAA[lower.tri(KAA)], type="off-diagonal"), data.frame(KA=diag(KA), KD=diag(KD), KAA=diag(KAA), type="diagonal")) K4plot <- melt(K4plot, id.vars=c("KA", "type")) ggplot() + geom_point(data=K4plot, aes(x=KA, y=value, color=type)) + facet_wrap(vars(variable), nrow=1) + theme_bw() + coord_fixed() ### Splitting and combining GRMs # get the marker information. marker <- read.csv("https://raw.githubusercontent.com/cjyang-work/lecture/main/2024_genetic_relationship/PERM/PERM_marker.csv", as.is=TRUE) # loop through all 10 chromosomes. d.list <- list() KA.list <- list() for(i in 1:10){ # extract the marker for chr-i. temp.geno <- geno[, marker$Chr==i] # compute the allele frequencies. temp.p <- colSums(temp.geno)/(2*colSums(!is.na(temp.geno))) # center the marker data. temp.W <- temp.geno - matrix(2*temp.p, nrow=nrow(temp.geno), ncol=ncol(temp.geno), byrow=TRUE) # obtain the denominator. temp.d <- sum(2*temp.p*(1-temp.p)) # compute the A-GRM. temp.KA <- temp.W%*%t(temp.W)/temp.d # collect the results. d.list[[i]] <- temp.d KA.list[[i]] <- temp.KA } # combine the GRMs. KA.comb <- d.list[[1]]*KA.list[[1]] for(i in 2:10) KA.comb <- KA.comb + d.list[[i]]*KA.list[[i]] KA.comb <- KA.comb/sum(unlist(d.list)) # combine all but one chromosome. KA.loco <- d.list[[2]]*KA.list[[2]] for(i in 3:10) KA.loco <- KA.loco + d.list[[i]]*KA.list[[i]] KA.loco <- KA.loco/sum(unlist(d.list)[-1]) # compare Original KA to Combined and LOCO. KAcomb4plot <- rbind(data.frame(Original=KA[lower.tri(KA)], Combined=KA.comb[lower.tri(KA.comb)], LOCO=KA.loco[lower.tri(KA.loco)], type="off-diagonal"), data.frame(Original=diag(KA), Combined=diag(KA.comb), LOCO=diag(KA.loco), type="diagonal")) KAcomb4plot <- melt(KAcomb4plot, id.vars=c("Original", "type")) ggplot() + geom_point(data=KAcomb4plot, aes(x=Original, y=value, color=type)) + facet_wrap(vars(variable), nrow=1) + theme_bw() + coord_fixed() ### Genetic distance ### Fst # load packages. library(hierfstat) # load the marker data. geno <- read.csv("https://raw.githubusercontent.com/cjyang-work/lecture/main/2024_genetic_relationship/DIV/DIV_geno.csv", as.is=TRUE, row.names=1) geno <- as.matrix(geno) # load the grouping data. group <- read.csv("https://raw.githubusercontent.com/cjyang-work/lecture/main/2024_genetic_relationship/DIV/DIV_group.csv", as.is=TRUE) table(group$Group) # compute the Fst for each group. fs.dosage(dos=geno, pop=group$Group) # CIMMYT EU_Inbred NA_Inbred PVP Teosinte All #Fis 1.0000 1.0000 1.0000 1.000 1.0000 1.0000 #Fst 0.1304 0.1421 -0.0399 -0.043 0.2586 0.0896 # manually compute Fst. # order geno by group. group2 <- group[order(group$Group),] geno2 <- geno[group2$ID,] # identify the indices for each group. idx <- lapply(unique(group2$Group), FUN=function(i) which(group2$Group==i)) # compute the matches as 1 - average Manhattan distance between individuals. # this works for inbred data. match.all <- 1 - as.matrix(dist(x=geno2/2, method="manhattan"))/ncol(geno2) # set anything other than the lower triangle to 0 for convenience. match.all[upper.tri(match.all, diag=TRUE)] <- 0 # compute match-within. match.w <- vector() for(i in 1:length(idx)){ # extract the matrix containing all the within-i match. temp <- match.all[idx[[i]], idx[[i]]] # compute the mean of the match. match.w <- c(match.w, mean(temp[lower.tri(temp)])) } # compute match-between. match.b <- vector() for(i in 1:(length(idx)-1)){ for(j in (i+1):length(idx)){ # extract the matrix containing all the between-i match. match.b <- c(match.b, mean(c(match.all[idx[[j]], idx[[i]]]))) } } # compute the mean of the match-between. match.b <- mean(match.b) # compute the Fst manually. c((match.w - match.b)/(1 - match.b), (mean(match.w) - match.b)/(1 - match.b)) #0.13037383 0.14205002 -0.03989604 -0.04297350 0.25857899 0.08962666 ### genome-wide Fst # load package. library(ggplot2) # load the marker information. marker <- read.csv("https://raw.githubusercontent.com/cjyang-work/lecture/main/2024_genetic_relationship/DIV/DIV_marker.csv", as.is=TRUE) # compare the Fst between NA_Inbred and EU_Inbred. # extract only the data for these two populations. geno3 <- geno[group$Group%in%c("NA_Inbred", "EU_Inbred"), ] group3 <- group[group$Group%in%c("NA_Inbred", "EU_Inbred"), ] # compute genome-wide Fst. gw.fst <- varcomp.glob(levels=group3[, "Group", drop=FALSE], loci=data.frame(geno3)) gw.fst <- gw.fst$loc[,1]/rowSums(gw.fst$loc) # prepare the data for plotting. gw4plot <- cbind(marker, Fst=gw.fst) # plot the data. ggplot() + geom_point(data=gw4plot, aes(x=Pos_bp, y=Fst), color="#555555") + facet_grid(cols=vars(Chr), switch="x", scales="free_x", space="free_x") + theme_minimal() + theme(axis.text.x=element_blank(), axis.title.x=element_blank()) # null distribution for running Qst-Fst test. ggplot() + geom_density(data=gw4plot, aes(x=Fst), fill="#DDDDDD") + theme_bw() ### NJ with Roger's and Nei's distance. # load package. library(adegenet) library(poppr) library(ape) # convert the marker data into `adegenet::genind` format. geno4 <- geno geno4[geno4==0] <- "A/A" geno4[geno4==1] <- "A/B" # we don't actually have heterozygotes. geno4[geno4==2] <- "B/B" geno4 <- df2genind(X=geno4, sep="/") # compute Roger's distance. dist.rog <- rogers.dist(x=geno4) # compute Nei's distance. dist.nei <- nei.dist(x=geno4) # plot the NJ tree for Roger's distance. plot(nj(dist.rog), cex=0.5) # plot the NJ tree for Nei's distance. plot(nj(dist.nei), cex=0.5) ### Population structure ### PCA # load the packages library(ggplot2) # load the marker data. geno <- read.csv("https://raw.githubusercontent.com/cjyang-work/lecture/main/2024_genetic_relationship/DIV/DIV_geno.csv", as.is=TRUE, row.names=1) geno <- as.matrix(geno) # load the grouping data. group <- read.csv("https://raw.githubusercontent.com/cjyang-work/lecture/main/2024_genetic_relationship/DIV/DIV_group.csv", as.is=TRUE) # compute the genetic relationship matrix. geno.scaled <- scale(x=geno, center=TRUE, scale=TRUE) temp <- geno.scaled%*%t(geno.scaled)/ncol(geno.scaled) # compute the principal components. PC <- eigen(temp) # prepare the data to plot the percent variance explained by the PC. pve <- data.frame(PC=1:nrow(geno), PVE=cumsum(100*PC$values/sum(PC$values))) # plot the PVE. ggplot() + geom_point(data=pve, aes(x=PC, y=PVE)) + theme_bw() # prepare the data to create PCA plot. pca <- data.frame(PC1=PC$vectors[,1], PC2=PC$vectors[,2], PC3=PC$vectors[,3], PC4=PC$vectors[,4], ID=group$ID, Group=group$Group) # plot the PCA (PC1 vs PC2, text label). ggplot() + geom_text(data=pca, aes(x=PC1, y=PC2, label=ID), size=1) + theme_bw() + coord_fixed() # plot the PCA (PC1 vs PC2, group-colored). ggplot() + geom_point(data=pca, aes(x=PC1, y=PC2, color=Group), size=1) + theme_bw() + coord_fixed() # plot the PCA (PC3 vs PC4, group-colored). ggplot() + geom_point(data=pca, aes(x=PC3, y=PC4, color=Group), size=1) + theme_bw() + coord_fixed() ### Population admixture # load the package. library(LEA) # set working directory setwd("...") # run the admixture analysis with K=2/3/4. out <- snmf("BCF.geno", K=2:4, entropy=TRUE, repetitions=10, project="new") # use to entropy to help us decide on the K. plot(out, pch=16, col="#FF9999") # K=2. barchart(out, K=2, run=which.min(cross.entropy(out, K=2)), sort.by.Q=F, border=NA, space=0, col=c("#E69F00", "#56B4E9"), ylab="Ancestry proportions") # K=3. barchart(out, K=3, run=which.min(cross.entropy(out, K=3)), sort.by.Q=F, border=NA, space=0, col=c("#E69F00", "#56B4E9", "#009E73"), ylab="Ancestry proportions") # K=4. barchart(out, K=4, run=which.min(cross.entropy(out, K=4)), sort.by.Q=F, border=NA, space=0, col=c("#E69F00", "#56B4E9", "#009E73", "#F0E442"), ylab="Ancestry proportions") ### GWAS with/without population structure control. # load packages. library(sommer) # load the marker information. marker <- read.csv("https://raw.githubusercontent.com/cjyang-work/lecture/main/2024_genetic_relationship/DIV/DIV_marker.csv", as.is=TRUE) # load the phenotype data. pheno <- read.csv("https://raw.githubusercontent.com/cjyang-work/lecture/main/2024_genetic_relationship/DIV/DIV_pheno.csv", as.is=TRUE) # calculate A-GRM. KA <- A.mat(geno-1) # GWAS with mixed model. gw1 <- GWAS(fixed=trait~1, random=~vsr(ID, Gu=KA), rcov=~units, data=pheno, M=geno-1, gTerm="u:ID", dateWarning=FALSE) # GWAS with fixed model. gw2 <- sapply(1:ncol(geno), FUN=function(i) summary(glm(trait~geno[,i], data=pheno))$coefficients[2,4]) # prepare the data for plotting the mixed model results. gw1plot <- cbind(marker[,c(2,4)], gw1$scores[,1]) colnames(gw1plot) <- c("Chrom", "Position", "p.val") gw1plot$p.val[gw1plot$p.val==Inf] <- NA # prepare the data for plotting the fixed model results. gw2plot <- cbind(marker[,c(2,4)], -log10(gw2)) colnames(gw2plot) <- c("Chrom", "Position", "p.val") # plot the mixed model results. manhattan(gw1plot, pch=16, fdr.level=FALSE, show.fdr=FALSE) # plot the fixed model results. manhattan(gw2plot, pch=16, fdr.level=FALSE, show.fdr=FALSE) # compare both. ggplot() + annotate("segment", x=-Inf, xend=Inf, y=4, yend=4, color="#9999FF") + annotate("segment", x=4, xend=4, y=-Inf, yend=Inf, color="#9999FF") + geom_point(aes(x=gw1plot$p.val, y=gw2plot$p.val), na.rm=TRUE) + geom_abline(slope=1, intercept=0, color="#FF0000") + theme_bw() + xlab("With PS control") + ylab("Without PS control") + coord_fixed() ### eigenGWAS. # compute the first principal component. PC1 <- eigen(KA)$vectors[,1] # add that to the phenotype data. pheno <- cbind(pheno, PC1=PC1) # eigenGWAS with mixed model. gw3 <- GWAS(fixed=PC1~1, random=~vsr(ID, Gu=KA), rcov=~units, data=pheno, M=geno-1, gTerm="u:ID", dateWarning=FALSE) # prepare the data for plotting the mixed model results. gw3plot <- cbind(marker[,c(2,4)], gw3$scores[,1]) colnames(gw3plot) <- c("Chrom", "Position", "p.val") gw3plot$p.val[gw3plot$p.val==Inf] <- NA # plot the mixed model results. manhattan(gw3plot, pch=16, fdr.level=FALSE, show.fdr=FALSE) ### GBLUP # randomly sample 35 lines from EU_Inbred/NA_Inbred/PVP to predict 18 lines in Teosinte. # identify the lines. set.seed(252) train1 <- sort(sample(group$ID[!(group$Group=="Teosinte")], 96)) train2 <- group$ID[group$Group=="PVP"] test <- group$ID[group$Group=="Teosinte"] # prepare the pheno data frame. pheno <- pheno[, 1:2] rownames(pheno) <- pheno$ID # random 96 + Teosinte. KA1 <- A.mat(geno[c(train1, test), ]-1) pheno1 <- pheno[c(train1, test), ] pheno1$trait[pheno1$ID%in%test] <- NA # PVP + Teosinte. KA2 <- A.mat(geno[c(train2, test), ]-1) pheno2 <- pheno[c(train2, test), ] pheno2$trait[pheno2$ID%in%test] <- NA # GBLUP 1, get the predicted values. mm1 <- mmer(fixed=trait~1, random=~vsr(ID, Gu=KA1), rcov=~units, data=pheno1, dateWarning=FALSE) pred1 <- c(mm1$Beta$Estimate + mm1$U$`u:ID`$trait)[test] # GBLUP 2, get the predicted values. mm2 <- mmer(fixed=trait~1, random=~vsr(ID, Gu=KA2), rcov=~units, data=pheno2, dateWarning=FALSE) pred2 <- c(mm2$Beta$Estimate + mm2$U$`u:ID`$trait)[test] # compare the prediction accuracies. cor(cbind(obs=pheno[test, "trait"], pred1, pred2))