# Power calculator for mol QTLs

how to
Author

Haky Im

Published

February 21, 2023

## PORS power calculation

The success of the prediction training depends mostly on whether the corresponding QTL study will be powered. Thus, we provide here the power to detect molecular QTLs for a range of sample sizes, effect sizes, and minor allele frequencies.

## function: R2 from beta assuming variance of y = 1

$y = \delta \cdot x + \epsilon$

$r^2 = \delta^2 \cdot \text{var}(x) = \delta^2 \cdot 2 \cdot \text{maf} \cdot (1-\text{maf})$

Code
suppressMessages(library(tidyverse))
suppressMessages(library(knitr))
## install.packages("pwr")
#if (!("pwr" %in% installed.packages()[, 1])) {
#  install.packages("pwr")
#}

## define ranges of maf, eff, sample sizes

Code
mafvec = c(0.05, 0.10, 0.30)
effvec = c(0.40, 0.60, 0.80)
nvec = c(200,350,500)
nsnps = 1000
alpha = 0.05/nsnps

## create data frame with all combinations

Code
# mat = matrix(NA,length(mafvec)*length(effvec)*length(nvec),4)
# colnames(mat) = c("maf","eff","nsam","power")
# cont = 1
# for(maf in mafvec)
# {
#   for(nn in nvec)
#   {
#     for(eff in effvec)
#     {
#       r2 = eff^2 * 2 * maf * (1-maf)
#       rr = sqrt(r2)
#       pp = pwr::pwr.r.test(n = nn, r= rr , sig.level = alpha)
#       mat[cont,] = c(maf, eff, nn, pppower) # cont = cont + 1 # } # } # } # # mat %>% data.frame() %>% # pivot_wider(names_from = nsam, values_from = power) %>% # mutate(across(3:ncol(.), ~sprintf("%.1f%%", . * 100))) %>% # kable(format = "markdown", align = c("l", "l", "c", "c", "c"), # caption = "Power by sample size")  ## calculate detectable effect sizes The following table shows the detectable effect sizes at 80% power with a significance level of 0.05/1000. Variance of Y is standardized to 1. Code calc_mateff = function(mafvec,nvec,outmat=FALSE) { mateff = matrix(NA,length(mafvec)*length(nvec),3) colnames(mateff) = c("maf","nsam","power") cont = 1 for(maf in mafvec) { for(nn in nvec) { # r2 = eff^2 * 2 * maf * (1-maf) # rr = sqrt(r2) pp = pwr::pwr.r.test(n = nn, power=0.80 , sig.level = alpha) eff = ppr/sqrt(2*maf*(1-maf))
mateff[cont,] = c(maf,  nn, eff)
cont = cont + 1
}
}
mateff = mateff %>% data.frame() %>%
pivot_wider(names_from = nsam, values_from = power) %>%
mutate_at(vars(-c(1)), ~round(., 2))
print(mateff)
if(outmat) mateff
}

mafvec = c(0.05, 0.10, 0.30)
nvec = c(100,500,1000,7000)
nsnps = 1000
alpha = 0.05/nsnps
## ---
mat = calc_mateff(mafvec,nvec,outmat=TRUE)
# A tibble: 3 × 5
maf 100 500 1000 7000
<dbl> <dbl> <dbl>  <dbl>  <dbl>
1  0.05  1.5   0.7    0.5    0.19
2  0.1   1.09  0.51   0.36   0.14
3  0.3   0.71  0.33   0.24   0.09
Code
mat %>%  kable(format = "markdown", align = c("l", "c", "c", "c", "c"), caption = "Detectable effects w/1000 SNPs",label=NA) 
Detectable effects w/1000 SNPs
maf 100 500 1000 7000
0.05 1.50 0.70 0.50 0.19
0.10 1.09 0.51 0.36 0.14
0.30 0.71 0.33 0.24 0.09
Code
mafvec = c(0.05, 0.10, 0.30)
nvec = c(1000,6000,10000,100000)
nsnps = 1e6
alpha = 0.05/nsnps
## ---
mat = calc_mateff(mafvec,nvec,outmat=TRUE)
# A tibble: 3 × 5
maf 1000 6000 10000 1e+05
<dbl>  <dbl>  <dbl>   <dbl>   <dbl>
1  0.05   0.64   0.26    0.2     0.06
2  0.1    0.46   0.19    0.15    0.05
3  0.3    0.3    0.13    0.1     0.03
Code
mat %>%  kable(format = "markdown", align = c("l", "c", "c", "c", "c"), caption = "Detectable effects w/1e6 SNPs",label=NA) 
Detectable effects w/1e6 SNPs
maf 1000 6000 10000 1e+05
0.05 0.64 0.26 0.20 0.06
0.10 0.46 0.19 0.15 0.05
0.30 0.30 0.13 0.10 0.03

## power for Xcan association (continuous X)

Code
nvec = c(10000,100000,500000)
ntests = 10000
alpha = 0.05/ntests

calc_matr = function(nvec,outmat=FALSE)
{
mateff = matrix(NA,length(nvec),2)
colnames(mateff) = c("nsam","r")
cont = 1
for(nn in nvec)
{
pp = pwr::pwr.r.test(n = nn, power=0.80 , sig.level = alpha)
mateff[cont,] = c(nn, pp\$r)
cont = cont + 1
}
mateff = mateff %>% data.frame() %>%  mutate(r2 = r^2) %>% mutate_at(vars(-c(1)), ~signif(., 2))
print(mateff)
if(outmat) mateff
}

calc_matr(nvec,outmat=TRUE) %>% knitr::kable()
   nsam      r      r2
1 1e+04 0.0540 2.9e-03
2 1e+05 0.0170 2.9e-04
3 5e+05 0.0077 5.9e-05
nsam r r2
1e+04 0.0540 2.9e-03
1e+05 0.0170 2.9e-04
5e+05 0.0077 5.9e-05

For the *-Xcan analysis, sample sizes ranging from 10,000 to 500,000 will detect omic traits explaining 0.29% to 0.0059% of the total variation of the phenotype with a power of 80%.