suppressMessages(library(tidyverse))suppressMessages(library(glue))PRE ="/Users/haekyungim/Library/CloudStorage/Box-Box/LargeFiles/imlab-data/data-Github/web-data"##PRE="/Users/margaretperry/Library/CloudStorage/Box-Box/imlab-data/data-Github/web-data "##PRE="/Users/temi/Library/CloudStorage/Box-Box/imlab-data/data-Github/web-data"## COPY THE DATE AND SLUG fields FROM THE HEADERSLUG="cistromedb-data"## copy the slug from the headerbDATE='2023-03-28'## copy the date from the blog's header hereDATA =glue("{PRE}/{bDATE}-{SLUG}")if(!file.exists(DATA)) system(glue::glue("mkdir {DATA}"))WORK=DATA## move data to DATA#tempodata=("~/Downloads/tempo/gwas_catalog_v1.0.2-associations_e105_r2022-04-07.tsv")#system(glue::glue("cp {tempodata} {DATA}/"))## system(glue("open {DATA}")) ## this will open the folder
data =read_tsv(glue("{DATA}/human_factor_full_QC.txt"))
Rows: 11348 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (6): Species, GSMID, Factor, Cell_line, Cell_type, Tissue_type
dbl (7): DCid, FastQC, UniquelyMappedRatio, PBC, PeaksFoldChangeAbove10, FRi...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## are cell line==None non tumor?data %>%filter(Cell_line=="None") %>%dim()
[1] 1817 13
## how many unique cell lines?data %>%count(Cell_line) %>%dim()
[1] 520 2
## how many unique cell types?data %>%count(Cell_type) %>%dim()
[1] 153 2
## how many unique tissue types?data %>%count(Tissue_type) %>%dim()
[1] 84 2
Source Code
---title: "Cistrome DB data"author: "Haky Im"date: "2023-03-28"format: html: code-fold: false code-summary: "Show the code"editor_options: chunk_output_type: console---```{r}suppressMessages(library(tidyverse))suppressMessages(library(glue))PRE ="/Users/haekyungim/Library/CloudStorage/Box-Box/LargeFiles/imlab-data/data-Github/web-data"##PRE="/Users/margaretperry/Library/CloudStorage/Box-Box/imlab-data/data-Github/web-data "##PRE="/Users/temi/Library/CloudStorage/Box-Box/imlab-data/data-Github/web-data"## COPY THE DATE AND SLUG fields FROM THE HEADERSLUG="cistromedb-data"## copy the slug from the headerbDATE='2023-03-28'## copy the date from the blog's header hereDATA =glue("{PRE}/{bDATE}-{SLUG}")if(!file.exists(DATA)) system(glue::glue("mkdir {DATA}"))WORK=DATA## move data to DATA#tempodata=("~/Downloads/tempo/gwas_catalog_v1.0.2-associations_e105_r2022-04-07.tsv")#system(glue::glue("cp {tempodata} {DATA}/"))## system(glue("open {DATA}")) ## this will open the folder ``````{r}data =read_tsv(glue("{DATA}/human_factor_full_QC.txt"))names(data)data %>%select(Factor,Cell_line,Cell_type,Tissue_type) %>%unique() %>%dim()data %>%count(Factor,Cell_line,Cell_type,Tissue_type) %>%arrange(desc(n))data %>%count(Factor,Cell_line,Cell_type,Tissue_type) %>% .[["n"]] %>%table()## are cell line==None non tumor?data %>%filter(Cell_line=="None") %>%dim()## how many unique cell lines?data %>%count(Cell_line) %>%dim()## how many unique cell types?data %>%count(Cell_type) %>%dim()## how many unique tissue types?data %>%count(Tissue_type) %>%dim()```