recount quick access

Author

Haky Im

Published

May 31, 2024

Code
## Install the recount3 R/Bioconductor package
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install("recount3")
Bioconductor version 3.18 (BiocManager 1.30.23), R 4.3.2 (2023-10-31)
Warning: package(s) not installed when version(s) same as or greater than current; use
  `force = TRUE` to re-install: 'recount3'
Old packages: 'knitr'
Code
## Load recount3 R package
suppressMessages(library("recount3"))
Warning: package 'GenomeInfoDb' was built under R version 4.3.3
Code
## Find all available human projects
human_projects <- available_projects()
2024-06-02 10:17:07.210418 caching file sra.recount_project.MD.gz.
2024-06-02 10:17:07.553967 caching file gtex.recount_project.MD.gz.
2024-06-02 10:17:07.926997 caching file tcga.recount_project.MD.gz.
Code
## Find the project you are interested in,
## here we use SRP009615 as an example
## use gtex ADIPOSE_TISSUE instead
proj_info <- subset(
    human_projects,
    project == "ADIPOSE_TISSUE" & project_type == "data_sources"
)

## Create a RangedSummarizedExperiment (RSE) object at the gene level
rse_gene_adipose_tissue <- create_rse(proj_info)
2024-06-02 10:17:10.105473 downloading and reading the metadata.
2024-06-02 10:17:10.410978 caching file gtex.gtex.ADIPOSE_TISSUE.MD.gz.
2024-06-02 10:17:10.746632 caching file gtex.recount_project.ADIPOSE_TISSUE.MD.gz.
2024-06-02 10:17:11.134876 caching file gtex.recount_qc.ADIPOSE_TISSUE.MD.gz.
2024-06-02 10:17:11.489014 caching file gtex.recount_seq_qc.ADIPOSE_TISSUE.MD.gz.
2024-06-02 10:17:11.880811 downloading and reading the feature information.
2024-06-02 10:17:12.196454 caching file human.gene_sums.G026.gtf.gz.
2024-06-02 10:17:12.785954 downloading and reading the counts: 1293 samples across 63856 features.
2024-06-02 10:17:13.091725 caching file gtex.gene_sums.ADIPOSE_TISSUE.G026.gz.
2024-06-02 10:17:18.285009 constructing the RangedSummarizedExperiment (rse) object.
Code
## Explore that RSE object
rse_gene_adipose_tissue
class: RangedSummarizedExperiment 
dim: 63856 1293 
metadata(8): time_created recount3_version ... annotation recount3_url
assays(1): raw_counts
rownames(63856): ENSG00000278704.1 ENSG00000277400.1 ...
  ENSG00000182484.15_PAR_Y ENSG00000227159.8_PAR_Y
rowData names(10): source type ... havana_gene tag
colnames(1293): GTEX-S32W-2226-SM-2XCAY.1 GTEX-WL46-0326-SM-3LK6Y.1 ...
  GTEX-1MA7X-1026-SM-DKPQ8.1 GTEX-1MA7X-2126-SM-E6CIX.1
colData names(198): rail_id external_id ... recount_seq_qc.errq
  BigWigURL
Code
# Load the necessary library
# library(SummarizedExperiment)

# Extract the assay data (raw counts)
raw_counts <- assay(rse_gene_adipose_tissue, "raw_counts")

# Extract the row data (gene information)
row_data <- rowData(rse_gene_adipose_tissue)

# Extract the column data (sample information)
col_data <- colData(rse_gene_adipose_tissue)

# Extract metadata
metadata <- metadata(rse_gene_adipose_tissue)

# Display the extracted data
head(raw_counts)[,1:4]
                  GTEX-S32W-2226-SM-2XCAY.1 GTEX-WL46-0326-SM-3LK6Y.1
ENSG00000278704.1                         0                         0
ENSG00000277400.1                         0                         0
ENSG00000274847.1                         0                         0
ENSG00000277428.1                         0                         0
ENSG00000276256.1                         0                         0
ENSG00000278198.1                         0                         0
                  GTEX-144GM-0626-SM-79OKY.1 GTEX-14AS3-2026-SM-5TDD9.1
ENSG00000278704.1                          0                          0
ENSG00000277400.1                          0                          0
ENSG00000274847.1                          0                          0
ENSG00000277428.1                          0                          0
ENSG00000276256.1                          0                          0
ENSG00000278198.1                          0                          0
Code
dim(row_data)
[1] 63856    10
Code
names(row_data)
 [1] "source"      "type"        "bp_length"   "phase"       "gene_id"    
 [6] "gene_type"   "gene_name"   "level"       "havana_gene" "tag"        
Code
dim(col_data)
[1] 1293  198
Code
names(col_data)
  [1] "rail_id"                                                           
  [2] "external_id"                                                       
  [3] "study"                                                             
  [4] "gtex.run_acc"                                                      
  [5] "gtex.subjid"                                                       
  [6] "gtex.sex"                                                          
  [7] "gtex.age"                                                          
  [8] "gtex.dthhrdy"                                                      
  [9] "gtex.sampid"                                                       
 [10] "gtex.smatsscr"                                                     
 [11] "gtex.smcenter"                                                     
 [12] "gtex.smpthnts"                                                     
 [13] "gtex.smrin"                                                        
 [14] "gtex.smts"                                                         
 [15] "gtex.smtsd"                                                        
 [16] "gtex.smubrid"                                                      
 [17] "gtex.smtsisch"                                                     
 [18] "gtex.smtspax"                                                      
 [19] "gtex.smnabtch"                                                     
 [20] "gtex.smnabtcht"                                                    
 [21] "gtex.smnabtchd"                                                    
 [22] "gtex.smgebtch"                                                     
 [23] "gtex.smgebtchd"                                                    
 [24] "gtex.smgebtcht"                                                    
 [25] "gtex.smafrze"                                                      
 [26] "gtex.smgtc"                                                        
 [27] "gtex.sme2mprt"                                                     
 [28] "gtex.smchmprs"                                                     
 [29] "gtex.smntrart"                                                     
 [30] "gtex.smnumgps"                                                     
 [31] "gtex.smmaprt"                                                      
 [32] "gtex.smexncrt"                                                     
 [33] "gtex.sm550nrm"                                                     
 [34] "gtex.smgnsdtc"                                                     
 [35] "gtex.smunmprt"                                                     
 [36] "gtex.sm350nrm"                                                     
 [37] "gtex.smrdlgth"                                                     
 [38] "gtex.smmncpb"                                                      
 [39] "gtex.sme1mmrt"                                                     
 [40] "gtex.smsflgth"                                                     
 [41] "gtex.smestlbs"                                                     
 [42] "gtex.smmppd"                                                       
 [43] "gtex.smnterrt"                                                     
 [44] "gtex.smrrnanm"                                                     
 [45] "gtex.smrdttl"                                                      
 [46] "gtex.smvqcfl"                                                      
 [47] "gtex.smmncv"                                                       
 [48] "gtex.smtrscpt"                                                     
 [49] "gtex.smmppdpr"                                                     
 [50] "gtex.smcglgth"                                                     
 [51] "gtex.smgappct"                                                     
 [52] "gtex.smunpdrd"                                                     
 [53] "gtex.smntrnrt"                                                     
 [54] "gtex.smmpunrt"                                                     
 [55] "gtex.smexpeff"                                                     
 [56] "gtex.smmppdun"                                                     
 [57] "gtex.sme2mmrt"                                                     
 [58] "gtex.sme2anti"                                                     
 [59] "gtex.smaltalg"                                                     
 [60] "gtex.sme2snse"                                                     
 [61] "gtex.smmflgth"                                                     
 [62] "gtex.sme1anti"                                                     
 [63] "gtex.smspltrd"                                                     
 [64] "gtex.smbsmmrt"                                                     
 [65] "gtex.sme1snse"                                                     
 [66] "gtex.sme1pcts"                                                     
 [67] "gtex.smrrnart"                                                     
 [68] "gtex.sme1mprt"                                                     
 [69] "gtex.smnum5cd"                                                     
 [70] "gtex.smdpmprt"                                                     
 [71] "gtex.sme2pcts"                                                     
 [72] "recount_project.project"                                           
 [73] "recount_project.organism"                                          
 [74] "recount_project.file_source"                                       
 [75] "recount_project.metadata_source"                                   
 [76] "recount_project.date_processed"                                    
 [77] "recount_qc.aligned_reads%.chrm"                                    
 [78] "recount_qc.aligned_reads%.chrx"                                    
 [79] "recount_qc.aligned_reads%.chry"                                    
 [80] "recount_qc.bc_auc.all_reads_all_bases"                             
 [81] "recount_qc.bc_auc.all_reads_annotated_bases"                       
 [82] "recount_qc.bc_auc.unique_reads_all_bases"                          
 [83] "recount_qc.bc_auc.unique_reads_annotated_bases"                    
 [84] "recount_qc.bc_auc.all_%"                                           
 [85] "recount_qc.bc_auc.unique_%"                                        
 [86] "recount_qc.bc_frag.count"                                          
 [87] "recount_qc.bc_frag.kallisto_count"                                 
 [88] "recount_qc.bc_frag.kallisto_mean_length"                           
 [89] "recount_qc.bc_frag.mean_length"                                    
 [90] "recount_qc.bc_frag.mode_length"                                    
 [91] "recount_qc.bc_frag.mode_length_count"                              
 [92] "recount_qc.exon_fc.all_%"                                          
 [93] "recount_qc.exon_fc.unique_%"                                       
 [94] "recount_qc.exon_fc_count_all.total"                                
 [95] "recount_qc.exon_fc_count_all.assigned"                             
 [96] "recount_qc.exon_fc_count_unique.total"                             
 [97] "recount_qc.exon_fc_count_unique.assigned"                          
 [98] "recount_qc.gene_fc.all_%"                                          
 [99] "recount_qc.gene_fc.unique_%"                                       
[100] "recount_qc.gene_fc_count_all.total"                                
[101] "recount_qc.gene_fc_count_all.assigned"                             
[102] "recount_qc.gene_fc_count_unique.total"                             
[103] "recount_qc.gene_fc_count_unique.assigned"                          
[104] "recount_qc.intron_sum"                                             
[105] "recount_qc.intron_sum_%"                                           
[106] "recount_qc.star.%_of_chimeric_reads"                               
[107] "recount_qc.star.%_of_chimeric_reads2"                              
[108] "recount_qc.star.%_of_reads_mapped_to_multiple_loci"                
[109] "recount_qc.star.%_of_reads_mapped_to_multiple_loci2"               
[110] "recount_qc.star.%_of_reads_mapped_to_too_many_loci"                
[111] "recount_qc.star.%_of_reads_mapped_to_too_many_loci2"               
[112] "recount_qc.star.%_of_reads_unmapped:_other"                        
[113] "recount_qc.star.%_of_reads_unmapped:_other2"                       
[114] "recount_qc.star.%_of_reads_unmapped:_too_many_mismatches"          
[115] "recount_qc.star.%_of_reads_unmapped:_too_many_mismatches2"         
[116] "recount_qc.star.%_of_reads_unmapped:_too_short"                    
[117] "recount_qc.star.%_of_reads_unmapped:_too_short2"                   
[118] "recount_qc.star.all_mapped_reads"                                  
[119] "recount_qc.star.all_mapped_reads2"                                 
[120] "recount_qc.star.average_input_read_length"                         
[121] "recount_qc.star.average_input_read_length2"                        
[122] "recount_qc.star.average_mapped_length"                             
[123] "recount_qc.star.average_mapped_length2"                            
[124] "recount_qc.star.deletion_average_length"                           
[125] "recount_qc.star.deletion_average_length2"                          
[126] "recount_qc.star.deletion_rate_per_base"                            
[127] "recount_qc.star.deletion_rate_per_base2"                           
[128] "recount_qc.star.insertion_average_length"                          
[129] "recount_qc.star.insertion_average_length2"                         
[130] "recount_qc.star.insertion_rate_per_base"                           
[131] "recount_qc.star.insertion_rate_per_base2"                          
[132] "recount_qc.star.mapping_speed,_million_of_reads_per_hour"          
[133] "recount_qc.star.mapping_speed,_million_of_reads_per_hour2"         
[134] "recount_qc.star.mismatch_rate_per_base,_%"                         
[135] "recount_qc.star.mismatch_rate_per_base,_%2"                        
[136] "recount_qc.star.number_of_chimeric_reads"                          
[137] "recount_qc.star.number_of_chimeric_reads2"                         
[138] "recount_qc.star.number_of_input_reads"                             
[139] "recount_qc.star.number_of_input_reads2"                            
[140] "recount_qc.star.number_of_reads_mapped_to_multiple_loci"           
[141] "recount_qc.star.number_of_reads_mapped_to_multiple_loci2"          
[142] "recount_qc.star.number_of_reads_mapped_to_too_many_loci"           
[143] "recount_qc.star.number_of_reads_mapped_to_too_many_loci2"          
[144] "recount_qc.star.number_of_reads_unmapped:_other"                   
[145] "recount_qc.star.number_of_reads_unmapped:_other2"                  
[146] "recount_qc.star.number_of_reads_unmapped:_too_many_mismatches"     
[147] "recount_qc.star.number_of_reads_unmapped:_too_many_mismatches2"    
[148] "recount_qc.star.number_of_reads_unmapped:_too_short"               
[149] "recount_qc.star.number_of_reads_unmapped:_too_short2"              
[150] "recount_qc.star.number_of_splices:_at/ac"                          
[151] "recount_qc.star.number_of_splices:_at/ac2"                         
[152] "recount_qc.star.number_of_splices:_annotated_(sjdb)"               
[153] "recount_qc.star.number_of_splices:_annotated_(sjdb)2"              
[154] "recount_qc.star.number_of_splices:_gc/ag"                          
[155] "recount_qc.star.number_of_splices:_gc/ag2"                         
[156] "recount_qc.star.number_of_splices:_gt/ag"                          
[157] "recount_qc.star.number_of_splices:_gt/ag2"                         
[158] "recount_qc.star.number_of_splices:_non-canonical"                  
[159] "recount_qc.star.number_of_splices:_non-canonical2"                 
[160] "recount_qc.star.number_of_splices:_total"                          
[161] "recount_qc.star.number_of_splices:_total2"                         
[162] "recount_qc.star.uniquely_mapped_reads_%"                           
[163] "recount_qc.star.uniquely_mapped_reads_%2"                          
[164] "recount_qc.star.uniquely_mapped_reads_number"                      
[165] "recount_qc.star.uniquely_mapped_reads_number2"                     
[166] "recount_qc.junction_count"                                         
[167] "recount_qc.junction_coverage"                                      
[168] "recount_qc.junction_avg_coverage"                                  
[169] "recount_qc.star.number_of_input_reads_both"                        
[170] "recount_qc.star.all_mapped_reads_both"                             
[171] "recount_qc.star.number_of_chimeric_reads_both"                     
[172] "recount_qc.star.number_of_reads_mapped_to_multiple_loci_both"      
[173] "recount_qc.star.number_of_reads_mapped_to_too_many_loci_both"      
[174] "recount_qc.star.number_of_reads_unmapped:_other_both"              
[175] "recount_qc.star.number_of_reads_unmapped:_too_many_mismatches_both"
[176] "recount_qc.star.number_of_reads_unmapped:_too_short_both"          
[177] "recount_qc.star.uniquely_mapped_reads_number_both"                 
[178] "recount_qc.star.%_mapped_reads_both"                               
[179] "recount_qc.star.%_chimeric_reads_both"                             
[180] "recount_qc.star.%_reads_mapped_to_multiple_loci_both"              
[181] "recount_qc.star.%_reads_mapped_to_too_many_loci_both"              
[182] "recount_qc.star.%_reads_unmapped:_other_both"                      
[183] "recount_qc.star.%_reads_unmapped:_too_many_mismatches_both"        
[184] "recount_qc.star.%_reads_unmapped:_too_short_both"                  
[185] "recount_qc.star.uniquely_mapped_reads_%_both"                      
[186] "recount_seq_qc.min_len"                                            
[187] "recount_seq_qc.max_len"                                            
[188] "recount_seq_qc.avg_len"                                            
[189] "recount_seq_qc.#distinct_quality_values"                           
[190] "recount_seq_qc.#bases"                                             
[191] "recount_seq_qc.%a"                                                 
[192] "recount_seq_qc.%c"                                                 
[193] "recount_seq_qc.%g"                                                 
[194] "recount_seq_qc.%t"                                                 
[195] "recount_seq_qc.%n"                                                 
[196] "recount_seq_qc.avgq"                                               
[197] "recount_seq_qc.errq"                                               
[198] "BigWigURL"                                                         
Code
print(metadata)
$time_created
[1] "2024-06-02 10:17:18 CDT"

$recount3_version
          package ondiskversion loadedversion
recount3 recount3        1.12.0        1.12.0
                                                                                  path
recount3 /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/recount3
                                                                            loadedpath
recount3 /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/recount3
         attached is_base       date       source md5ok
recount3     TRUE   FALSE 2023-10-26 Bioconductor    NA
                                                                      library
recount3 /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library

$project
[1] "ADIPOSE_TISSUE"

$project_home
[1] "data_sources/gtex"

$type
[1] "gene"

$organism
[1] "human"

$annotation
[1] "gencode_v26"

$recount3_url
[1] "http://duffel.rail.bio/recount3"