suppressMessages(library(tidyverse))suppressMessages(library(glue))PRE ="/Users/haekyungim/Library/CloudStorage/Box-Box/LargeFiles/imlab-data/data-Github/web-data"##PRE="/Users/margaretperry/Library/CloudStorage/Box-Box/imlab-data/data-Github/web-data "##PRE="/Users/temi/Library/CloudStorage/Box-Box/imlab-data/data-Github/web-data"## COPY THE DATE AND SLUG fields FROM THE HEADERSLUG="predictdb-weight-distribution"## copy the slug from the headerbDATE='2023-02-28'## copy the date from the blog's header hereDATA =glue("{PRE}/{bDATE}-{SLUG}")if(!file.exists(DATA)) system(glue::glue("mkdir {DATA}"))WORK=DATAUSERNAME="haekyungim"## move data to DATA#tempodata=("~/Downloads/tempo/gwas_catalog_v1.0.2-associations_e105_r2022-04-07.tsv")#system(glue::glue("cp {tempodata} {DATA}/"))## system(glue("open {DATA}")) ## this will open the folder
download mashr gtex v8 prediction models for various
dbname <-glue("{DATA}/eqtl/mashr_Adipose_Subcutaneous.db") ## add full path if db file not in current directorymyshowdist(dbname,titulo="Adipose expr n=491")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
read weights brain substantia nigra with 101 samples
Code
dbname <-glue("{DATA}/eqtl/mashr_Brain_Substantia_nigra.db") ## add full path if db file not in current directorymyshowdist(dbname,titulo="Brain SN expr - n=101")
db =dbConnect(sqlite,dbname)##weights = dbGetQuery(db, "select * from weights")extra =dbGetQuery(db,"select * from extra")nrow(extra)
[1] 8650
Code
dbDisconnect(db)dbname <-glue("{DATA}/en_Brain_Substantia_nigra.db") ## add full path if db file not in current directoryprint("Brain Substantia nigra models")
---title: "PredictDB weight distribution"author: "Haky Im"date: "2023-02-28"categories: [analysis]format: html: code-fold: true code-summary: "Show the code"editor_options: chunk_output_type: console---Goal: get effect size distribution of omic traits```{r preliminary definitions}suppressMessages(library(tidyverse))suppressMessages(library(glue))PRE ="/Users/haekyungim/Library/CloudStorage/Box-Box/LargeFiles/imlab-data/data-Github/web-data"##PRE="/Users/margaretperry/Library/CloudStorage/Box-Box/imlab-data/data-Github/web-data "##PRE="/Users/temi/Library/CloudStorage/Box-Box/imlab-data/data-Github/web-data"## COPY THE DATE AND SLUG fields FROM THE HEADERSLUG="predictdb-weight-distribution"## copy the slug from the headerbDATE='2023-02-28'## copy the date from the blog's header hereDATA =glue("{PRE}/{bDATE}-{SLUG}")if(!file.exists(DATA)) system(glue::glue("mkdir {DATA}"))WORK=DATAUSERNAME="haekyungim"## move data to DATA#tempodata=("~/Downloads/tempo/gwas_catalog_v1.0.2-associations_e105_r2022-04-07.tsv")#system(glue::glue("cp {tempodata} {DATA}/"))## system(glue("open {DATA}")) ## this will open the folder ```- [ ] download mashr gtex v8 prediction models for various```{r, eval=FALSE}if(F){setwd(DATA)system(glue("wget https://zenodo.org/record/3518299/files/mashr_eqtl.tar?download=1"))system(glue("tar xvf mashr_eqtl.tar"))system(glue("cd eqtl"))system("rm *.txt.gz")}```- [ ] plot distribution, show summary```{r}library("RSQLite")sqlite <-dbDriver("SQLite")myshowdist =function(dbname,titulo="n="){print(dbname)db =dbConnect(sqlite,dbname)weights =dbGetQuery(db, "select * from weights")absweivec =abs(weights$weight)qlines =quantile(absweivec,c(0.2,0.5)) %>%round(2)quantile(absweivec,(0:10)/10) %>%round(2) %>%print()pp <- weights %>%mutate(abswei=abs(weight)) %>%ggplot(aes(abswei))+geom_histogram() +geom_vline(xintercept=qlines) +ggtitle(glue("{titulo}; 20%={qlines[1]}, median={qlines[2]}"))print(pp)dbDisconnect(db)}```- [ ] read weights adipose with 491 samples```{r}dbname <-glue("{DATA}/eqtl/mashr_Adipose_Subcutaneous.db") ## add full path if db file not in current directorymyshowdist(dbname,titulo="Adipose expr n=491")```- [ ] read weights brain substantia nigra with 101 samples```{r}dbname <-glue("{DATA}/eqtl/mashr_Brain_Substantia_nigra.db") ## add full path if db file not in current directorymyshowdist(dbname,titulo="Brain SN expr - n=101")```- [ ] read protein prediction weights```{r}## can download from https://uchicago.box.com/shared/static/m3zsxy3oy8kn5gkktkuo8lvui269hxi5.dbprint("AA")dbname <- glue::glue("/Users/{USERNAME}/Library/CloudStorage/Box-Box/LargeFiles/imlab-data/Within-Lab-Sharing/Sabrina-Data/ARIC/ARIC_AA_hg38.db")myshowdist(dbname,titulo="protein ARIC AA n=1,871")``````{r}## can download from https://uchicago.box.com/shared/static/m3zsxy3oy8kn5gkktkuo8lvui269hxi5.dbprint("EUR")dbname <- glue::glue("/Users/{USERNAME}/Library/CloudStorage/Box-Box/LargeFiles/imlab-data/Within-Lab-Sharing/Sabrina-Data/ARIC/ARIC_EA_hg38.db")myshowdist(dbname,titulo="protein ARIC EA n=7,213")```- [ ] read brainxcan weights (use elastic net to be closer to QTL effect size)```{r}##install.packages("arrow")tempo = arrow::read_parquet(glue::glue("{DATA}/brainxcan-gw_lasso_beta.parquet"))tempo =as.matrix(tempo %>%select(starts_with("IDP")))kk =abs(tempo[tempo!=0])print(summary(kk))print(quantile(kk,(0:10)/10))print(quantile(kk,c(0.2,0.5)))```## get protein predictors with FDR < 0.05just need to count the number of prediction models```{r}print("Protein ARIC models AA")dbname <- glue::glue("/Users/{USERNAME}/Library/CloudStorage/Box-Box/LargeFiles/imlab-data/Within-Lab-Sharing/Sabrina-Data/ARIC/ARIC_AA_hg38.db")print(dbname)db =dbConnect(sqlite,dbname)##weights = dbGetQuery(db, "select * from weights")extra =dbGetQuery(db,"select * from extra")nrow(extra)dbDisconnect(db)print("Protein ARIC models EUR")dbname <- glue::glue("/Users/{USERNAME}/Library/CloudStorage/Box-Box/LargeFiles/imlab-data/Within-Lab-Sharing/Sabrina-Data/ARIC/ARIC_EA_hg38.db")print(dbname)db =dbConnect(sqlite,dbname)##weights = dbGetQuery(db, "select * from weights")extra =dbGetQuery(db,"select * from extra")nrow(extra)dbDisconnect(db)``````{r}dbname <-glue("{DATA}/en_Adipose_Subcutaneous.db") ## add full path if db file not in current directoryprint("Adipose models")print(dbname)db =dbConnect(sqlite,dbname)##weights = dbGetQuery(db, "select * from weights")extra =dbGetQuery(db,"select * from extra")nrow(extra)dbDisconnect(db)dbname <-glue("{DATA}/en_Brain_Substantia_nigra.db") ## add full path if db file not in current directoryprint("Brain Substantia nigra models")print(dbname)db =dbConnect(sqlite,dbname)##weights = dbGetQuery(db, "select * from weights")extra =dbGetQuery(db,"select * from extra")nrow(extra)dbDisconnect(db)```