conda activate imlabtools





gencode_df = load_gencode_df()

Run S-PrediXcan

Run S-PrediXcan with the ARIC model.

python $METAXCAN/ \
--gwas_file  $DATA/imputed_CARDIoGRAM_C4D_CAD_ADDITIVE.txt.gz \
--snp_column panel_variant_id --effect_allele_column effect_allele --non_effect_allele_column non_effect_allele --zscore_column zscore \
--model_db_path $MODEL/ARIC_EA_hg38.db \
--covariance $MODEL/ARIC_EA_hg38.txt.gz \
--keep_non_rsid --additional_output --model_db_snp_key varID \
--throw \
--output_file $RESULTS/CAD_ARIC_hg38.csv

And the mashr model.

python $METAXCAN/ \
--gwas_file  $DATA/imputed_CARDIoGRAM_C4D_CAD_ADDITIVE.txt.gz \
--snp_column panel_variant_id --effect_allele_column effect_allele --non_effect_allele_column non_effect_allele --zscore_column zscore \
--model_db_path $MODEL/mashr_Whole_Blood.db \
--covariance $MODEL/mashr_Whole_Blood.txt.gz \
--keep_non_rsid --additional_output --model_db_snp_key varID \
--throw \
--output_file $RESULTS/CAD_mashr_Whole_Blood.csv

Compare Association Results

spredixcan_association_ARIC = load_spredixcan_association(glue::glue("{RESULTS}/CAD_ARIC_hg38.csv"), gencode_df)
[1] 1318   16
significant_genes_ARIC <- spredixcan_association_ARIC %>% filter(pvalue < 0.05/nrow(spredixcan_association_ARIC)) %>% arrange(pvalue)
spredixcan_association_Whole_Blood = load_spredixcan_association(glue::glue("{RESULTS}/CAD_mashr_Whole_Blood.csv"), gencode_df)
[1] 12587    16
significant_genes_Whole_Blood <- spredixcan_association_Whole_Blood %>% filter(pvalue < 0.05/nrow(spredixcan_association_Whole_Blood)) %>% arrange(pvalue)

Then compare ARIC and Whole Blood z-scores.

zscores = inner_join(spredixcan_association_Whole_Blood, spredixcan_association_ARIC, by=c("gene"))
[1] 815  31
zscores %>% ggplot(aes(zscore.x, zscore.y)) + geom_point() + ggtitle("S-PrediXcan z-score") + xlab("mashr Whole Blood") + ylab("ARIC") + geom_abline(intercept = 0, slope = 1)
Warning: Removed 1 rows containing missing values (geom_point).

We can compare the significant genes found with the ARIC and mashr Whole Blood models.

significant_genes_ARIC[, c(1,2)]
             gene    zscore
1 ENSG00000186063 -6.914873
2 ENSG00000160712 -5.813218
3 ENSG00000169174  5.326004
4 ENSG00000133789 -5.085480
5 ENSG00000107562  4.976651
6 ENSG00000158710  4.567579
7 ENSG00000158710  4.567579
8 ENSG00000175573  4.125269
significant_genes_Whole_Blood[, c(1,2)]
              gene    zscore
1  ENSG00000134222 -9.263287
2  ENSG00000107798  7.525837
3  ENSG00000163596  7.210367
4  ENSG00000138380 -5.982736
5  ENSG00000160712  5.744854
6  ENSG00000127616  5.703439
7  ENSG00000183431 -5.382606
8  ENSG00000182511 -5.373519
9  ENSG00000115486  5.258467
10 ENSG00000084093 -5.204898
11 ENSG00000143498  4.920139
12 ENSG00000031698 -4.747462
13 ENSG00000168906 -4.698991
14 ENSG00000130475 -4.683521
15 ENSG00000119718  4.681344
[1] "ENSG00000160712"

There is only one gene found significant in both, ENSG00000160712

