Code
suppressMessages(library(tidyverse))
suppressMessages(library(knitr))
## install.packages("pwr")
#if (!("pwr" %in% installed.packages()[, 1])) {
# install.packages("pwr")
#}
Haky Im
February 21, 2023
The success of the prediction training depends mostly on whether the corresponding QTL study will be powered. Thus, we provide here the power to detect molecular QTLs for a range of sample sizes, effect sizes, and minor allele frequencies.
\[y = \delta \cdot x + \epsilon\]
\[r^2 = \delta^2 \cdot \text{var}(x) = \delta^2 \cdot 2 \cdot \text{maf} \cdot (1-\text{maf})\]
# mat = matrix(NA,length(mafvec)*length(effvec)*length(nvec),4)
# colnames(mat) = c("maf","eff","nsam","power")
# cont = 1
# for(maf in mafvec)
# {
# for(nn in nvec)
# {
# for(eff in effvec)
# {
# r2 = eff^2 * 2 * maf * (1-maf)
# rr = sqrt(r2)
# pp = pwr::pwr.r.test(n = nn, r= rr , sig.level = alpha)
# mat[cont,] = c(maf, eff, nn, pp$power)
# cont = cont + 1
# }
# }
# }
#
# mat %>% data.frame() %>%
# pivot_wider(names_from = nsam, values_from = power) %>%
# mutate(across(3:ncol(.), ~sprintf("%.1f%%", . * 100))) %>%
# kable(format = "markdown", align = c("l", "l", "c", "c", "c"),
# caption = "Power by sample size")
The following table shows the detectable effect sizes at 80% power with a significance level of 0.05/1000. Variance of Y is standardized to 1.
calc_mateff = function(mafvec,nvec,outmat=FALSE)
{
mateff = matrix(NA,length(mafvec)*length(nvec),3)
colnames(mateff) = c("maf","nsam","power")
cont = 1
for(maf in mafvec)
{
for(nn in nvec)
{
# r2 = eff^2 * 2 * maf * (1-maf)
# rr = sqrt(r2)
pp = pwr::pwr.r.test(n = nn, power=0.80 , sig.level = alpha)
eff = pp$r/sqrt(2*maf*(1-maf))
mateff[cont,] = c(maf, nn, eff)
cont = cont + 1
}
}
mateff = mateff %>% data.frame() %>%
pivot_wider(names_from = nsam, values_from = power) %>%
mutate_at(vars(-c(1)), ~round(., 2))
print(mateff)
if(outmat) mateff
}
mafvec = c(0.05, 0.10, 0.30)
nvec = c(100,500,1000,7000)
nsnps = 1000
alpha = 0.05/nsnps
## ---
mat = calc_mateff(mafvec,nvec,outmat=TRUE)
# A tibble: 3 × 5
maf `100` `500` `1000` `7000`
<dbl> <dbl> <dbl> <dbl> <dbl>
1 0.05 1.5 0.7 0.5 0.19
2 0.1 1.09 0.51 0.36 0.14
3 0.3 0.71 0.33 0.24 0.09
maf | 100 | 500 | 1000 | 7000 |
---|---|---|---|---|
0.05 | 1.50 | 0.70 | 0.50 | 0.19 |
0.10 | 1.09 | 0.51 | 0.36 | 0.14 |
0.30 | 0.71 | 0.33 | 0.24 | 0.09 |
# A tibble: 3 × 5
maf `1000` `6000` `10000` `1e+05`
<dbl> <dbl> <dbl> <dbl> <dbl>
1 0.05 0.64 0.26 0.2 0.06
2 0.1 0.46 0.19 0.15 0.05
3 0.3 0.3 0.13 0.1 0.03
maf | 1000 | 6000 | 10000 | 1e+05 |
---|---|---|---|---|
0.05 | 0.64 | 0.26 | 0.20 | 0.06 |
0.10 | 0.46 | 0.19 | 0.15 | 0.05 |
0.30 | 0.30 | 0.13 | 0.10 | 0.03 |
nvec = c(10000,100000,500000)
ntests = 10000
alpha = 0.05/ntests
calc_matr = function(nvec,outmat=FALSE)
{
mateff = matrix(NA,length(nvec),2)
colnames(mateff) = c("nsam","r")
cont = 1
for(nn in nvec)
{
pp = pwr::pwr.r.test(n = nn, power=0.80 , sig.level = alpha)
mateff[cont,] = c(nn, pp$r)
cont = cont + 1
}
mateff = mateff %>% data.frame() %>% mutate(r2 = r^2) %>% mutate_at(vars(-c(1)), ~signif(., 2))
print(mateff)
if(outmat) mateff
}
calc_matr(nvec,outmat=TRUE) %>% knitr::kable()
nsam r r2
1 1e+04 0.0540 2.9e-03
2 1e+05 0.0170 2.9e-04
3 5e+05 0.0077 5.9e-05
nsam | r | r2 |
---|---|---|
1e+04 | 0.0540 | 2.9e-03 |
1e+05 | 0.0170 | 2.9e-04 |
5e+05 | 0.0077 | 5.9e-05 |
For the *-Xcan analysis, sample sizes ranging from 10,000 to 500,000 will detect omic traits explaining 0.29% to 0.0059% of the total variation of the phenotype with a power of 80%.
---
title: Power calculator for mol QTLs
author: Haky Im
date: '2023-02-21'
slug: power-calculator-for-mol-qtls
categories:
- how to
tags: []
editor_options:
chunk_output_type: console
---
## PORS power calculation
The success of the prediction training depends mostly on whether the corresponding QTL study will be powered. Thus, we provide here the power to detect molecular QTLs for a range of sample sizes, effect sizes, and minor allele frequencies.
## function: R2 from beta assuming variance of y = 1
$$y = \delta \cdot x + \epsilon$$
$$r^2 = \delta^2 \cdot \text{var}(x) = \delta^2 \cdot 2 \cdot \text{maf} \cdot (1-\text{maf})$$
```{r}
suppressMessages(library(tidyverse))
suppressMessages(library(knitr))
## install.packages("pwr")
#if (!("pwr" %in% installed.packages()[, 1])) {
# install.packages("pwr")
#}
```
## define ranges of maf, eff, sample sizes
```{r}
mafvec = c(0.05, 0.10, 0.30)
effvec = c(0.40, 0.60, 0.80)
nvec = c(200,350,500)
nsnps = 1000
alpha = 0.05/nsnps
```
## create data frame with all combinations
```{r}
# mat = matrix(NA,length(mafvec)*length(effvec)*length(nvec),4)
# colnames(mat) = c("maf","eff","nsam","power")
# cont = 1
# for(maf in mafvec)
# {
# for(nn in nvec)
# {
# for(eff in effvec)
# {
# r2 = eff^2 * 2 * maf * (1-maf)
# rr = sqrt(r2)
# pp = pwr::pwr.r.test(n = nn, r= rr , sig.level = alpha)
# mat[cont,] = c(maf, eff, nn, pp$power)
# cont = cont + 1
# }
# }
# }
#
# mat %>% data.frame() %>%
# pivot_wider(names_from = nsam, values_from = power) %>%
# mutate(across(3:ncol(.), ~sprintf("%.1f%%", . * 100))) %>%
# kable(format = "markdown", align = c("l", "l", "c", "c", "c"),
# caption = "Power by sample size")
```
## calculate detectable effect sizes
The following table shows the detectable effect sizes at 80% power with a significance level of 0.05/`r nsnps`. Variance of Y is standardized to 1.
```{r}
calc_mateff = function(mafvec,nvec,outmat=FALSE)
{
mateff = matrix(NA,length(mafvec)*length(nvec),3)
colnames(mateff) = c("maf","nsam","power")
cont = 1
for(maf in mafvec)
{
for(nn in nvec)
{
# r2 = eff^2 * 2 * maf * (1-maf)
# rr = sqrt(r2)
pp = pwr::pwr.r.test(n = nn, power=0.80 , sig.level = alpha)
eff = pp$r/sqrt(2*maf*(1-maf))
mateff[cont,] = c(maf, nn, eff)
cont = cont + 1
}
}
mateff = mateff %>% data.frame() %>%
pivot_wider(names_from = nsam, values_from = power) %>%
mutate_at(vars(-c(1)), ~round(., 2))
print(mateff)
if(outmat) mateff
}
mafvec = c(0.05, 0.10, 0.30)
nvec = c(100,500,1000,7000)
nsnps = 1000
alpha = 0.05/nsnps
## ---
mat = calc_mateff(mafvec,nvec,outmat=TRUE)
mat %>% kable(format = "markdown", align = c("l", "c", "c", "c", "c"), caption = "Detectable effects w/1000 SNPs",label=NA)
mafvec = c(0.05, 0.10, 0.30)
nvec = c(1000,6000,10000,100000)
nsnps = 1e6
alpha = 0.05/nsnps
## ---
mat = calc_mateff(mafvec,nvec,outmat=TRUE)
mat %>% kable(format = "markdown", align = c("l", "c", "c", "c", "c"), caption = "Detectable effects w/1e6 SNPs",label=NA)
```
## power for Xcan association (continuous X)
```{r}
nvec = c(10000,100000,500000)
ntests = 10000
alpha = 0.05/ntests
calc_matr = function(nvec,outmat=FALSE)
{
mateff = matrix(NA,length(nvec),2)
colnames(mateff) = c("nsam","r")
cont = 1
for(nn in nvec)
{
pp = pwr::pwr.r.test(n = nn, power=0.80 , sig.level = alpha)
mateff[cont,] = c(nn, pp$r)
cont = cont + 1
}
mateff = mateff %>% data.frame() %>% mutate(r2 = r^2) %>% mutate_at(vars(-c(1)), ~signif(., 2))
print(mateff)
if(outmat) mateff
}
calc_matr(nvec,outmat=TRUE) %>% knitr::kable()
```
For the \*-Xcan analysis, sample sizes ranging from 10,000 to 500,000 will detect omic traits explaining 0.29% to 0.0059% of the total variation of the phenotype with a power of 80%.