Power calculator for mol QTLs

how to

Author

Haky Im

Published

February 21, 2023

PORS power calculation

The success of the prediction training depends mostly on whether the corresponding QTL study will be powered. Thus, we provide here the power to detect molecular QTLs for a range of sample sizes, effect sizes, and minor allele frequencies.

function: R2 from beta assuming variance of y = 1

\[y = \delta \cdot x + \epsilon\]

\[r^2 = \delta^2 \cdot \text{var}(x) = \delta^2 \cdot 2 \cdot \text{maf} \cdot (1-\text{maf})\]

Code

suppressMessages(library(tidyverse))
suppressMessages(library(knitr))
## install.packages("pwr")
#if (!("pwr" %in% installed.packages()[, 1])) {
#  install.packages("pwr")
#}

define ranges of maf, eff, sample sizes

Code

mafvec = c(0.05, 0.10, 0.30) 
effvec = c(0.40, 0.60, 0.80) 
nvec = c(200,350,500) 
nsnps = 1000
alpha = 0.05/nsnps

create data frame with all combinations

Code

# mat = matrix(NA,length(mafvec)*length(effvec)*length(nvec),4)
# colnames(mat) = c("maf","eff","nsam","power")
# cont = 1
# for(maf in mafvec)
# {
#   for(nn in nvec)
#   {
#     for(eff in effvec)
#     {
#       r2 = eff^2 * 2 * maf * (1-maf)
#       rr = sqrt(r2)
#       pp = pwr::pwr.r.test(n = nn, r= rr , sig.level = alpha)
#       mat[cont,] = c(maf, eff, nn, pp$power)
#       cont = cont + 1
#     }
#   }
# }
# 
# mat %>% data.frame() %>% 
#   pivot_wider(names_from = nsam, values_from = power) %>% 
#   mutate(across(3:ncol(.), ~sprintf("%.1f%%", . * 100))) %>% 
#   kable(format = "markdown", align = c("l", "l", "c", "c", "c"), 
#         caption = "Power by sample size")

calculate detectable effect sizes

The following table shows the detectable effect sizes at 80% power with a significance level of 0.05/1000. Variance of Y is standardized to 1.

Code

calc_mateff = function(mafvec,nvec,outmat=FALSE)
{
  mateff = matrix(NA,length(mafvec)*length(nvec),3)
colnames(mateff) = c("maf","nsam","power")
cont = 1
for(maf in mafvec)
{
  for(nn in nvec)
  {
      # r2 = eff^2 * 2 * maf * (1-maf)
      # rr = sqrt(r2)
      pp = pwr::pwr.r.test(n = nn, power=0.80 , sig.level = alpha)
      eff = pp$r/sqrt(2*maf*(1-maf))
      mateff[cont,] = c(maf,  nn, eff)
      cont = cont + 1
  }
}
  mateff = mateff %>% data.frame() %>% 
  pivot_wider(names_from = nsam, values_from = power) %>% 
  mutate_at(vars(-c(1)), ~round(., 2)) 
  print(mateff)
  if(outmat) mateff
} 

mafvec = c(0.05, 0.10, 0.30) 
nvec = c(100,500,1000,7000) 
nsnps = 1000
alpha = 0.05/nsnps
## ---
mat = calc_mateff(mafvec,nvec,outmat=TRUE)

# A tibble: 3 × 5
    maf `100` `500` `1000` `7000`
  <dbl> <dbl> <dbl>  <dbl>  <dbl>
1  0.05  1.5   0.7    0.5    0.19
2  0.1   1.09  0.51   0.36   0.14
3  0.3   0.71  0.33   0.24   0.09

Code

mat %>%  kable(format = "markdown", align = c("l", "c", "c", "c", "c"), caption = "Detectable effects w/1000 SNPs",label=NA)

Detectable effects w/1000 SNPs
maf	100	500	1000	7000
0.05	1.50	0.70	0.50	0.19
0.10	1.09	0.51	0.36	0.14
0.30	0.71	0.33	0.24	0.09

Code

mafvec = c(0.05, 0.10, 0.30) 
nvec = c(1000,6000,10000,100000) 
nsnps = 1e6
alpha = 0.05/nsnps
## ---
mat = calc_mateff(mafvec,nvec,outmat=TRUE)

# A tibble: 3 × 5
    maf `1000` `6000` `10000` `1e+05`
  <dbl>  <dbl>  <dbl>   <dbl>   <dbl>
1  0.05   0.64   0.26    0.2     0.06
2  0.1    0.46   0.19    0.15    0.05
3  0.3    0.3    0.13    0.1     0.03

Code

mat %>%  kable(format = "markdown", align = c("l", "c", "c", "c", "c"), caption = "Detectable effects w/1e6 SNPs",label=NA)

Detectable effects w/1e6 SNPs
maf	1000	6000	10000	1e+05
0.05	0.64	0.26	0.20	0.06
0.10	0.46	0.19	0.15	0.05
0.30	0.30	0.13	0.10	0.03

power for Xcan association (continuous X)

Code

nvec = c(10000,100000,500000) 
ntests = 10000
alpha = 0.05/ntests

calc_matr = function(nvec,outmat=FALSE)
{
  mateff = matrix(NA,length(nvec),2)
  colnames(mateff) = c("nsam","r")
  cont = 1
  for(nn in nvec)
  {
    pp = pwr::pwr.r.test(n = nn, power=0.80 , sig.level = alpha)
    mateff[cont,] = c(nn, pp$r)
    cont = cont + 1
  }
  mateff = mateff %>% data.frame() %>%  mutate(r2 = r^2) %>% mutate_at(vars(-c(1)), ~signif(., 2)) 
  print(mateff)
  if(outmat) mateff
} 

calc_matr(nvec,outmat=TRUE) %>% knitr::kable()

   nsam      r      r2
1 1e+04 0.0540 2.9e-03
2 1e+05 0.0170 2.9e-04
3 5e+05 0.0077 5.9e-05

nsam	r	r2
1e+04	0.0540	2.9e-03
1e+05	0.0170	2.9e-04
5e+05	0.0077	5.9e-05

For the *-Xcan analysis, sample sizes ranging from 10,000 to 500,000 will detect omic traits explaining 0.29% to 0.0059% of the total variation of the phenotype with a power of 80%.

PORS power calculation

function: R2 from beta assuming variance of y = 1

define ranges of maf, eff, sample sizes

create data frame with all combinations

calculate detectable effect sizes

power for Xcan association (continuous X)

Reuse