Code
## Install the recount3 R/Bioconductor package
if (! requireNamespace ("BiocManager" , quietly = TRUE ))
install.packages ("BiocManager" )
BiocManager:: install ("recount3" )
Bioconductor version 3.18 (BiocManager 1.30.23), R 4.3.2 (2023-10-31)
Warning: package(s) not installed when version(s) same as or greater than current; use
`force = TRUE` to re-install: 'recount3'
Old packages: 'cli', 'crayon', 'digest', 'downlit', 'evaluate', 'xfun'
Code
## Load recount3 R package
suppressMessages (library ("recount3" ))
Warning: package 'GenomeInfoDb' was built under R version 4.3.3
Code
## Find all available human projects
human_projects <- available_projects ()
2024-06-24 12:09:28.916354 caching file sra.recount_project.MD.gz.
2024-06-24 12:09:29.224369 caching file gtex.recount_project.MD.gz.
2024-06-24 12:09:29.518069 caching file tcga.recount_project.MD.gz.
Code
## Find the project you are interested in,
## here we use SRP009615 as an example
## use gtex ADIPOSE_TISSUE instead
proj_info <- subset (
human_projects,
project == "ADIPOSE_TISSUE" & project_type == "data_sources"
)
## Create a RangedSummarizedExperiment (RSE) object at the gene level
rse_gene_adipose_tissue <- create_rse (proj_info)
2024-06-24 12:09:31.654101 downloading and reading the metadata.
2024-06-24 12:09:31.998993 caching file gtex.gtex.ADIPOSE_TISSUE.MD.gz.
2024-06-24 12:09:32.318296 caching file gtex.recount_project.ADIPOSE_TISSUE.MD.gz.
2024-06-24 12:09:32.60812 caching file gtex.recount_qc.ADIPOSE_TISSUE.MD.gz.
2024-06-24 12:09:33.334494 caching file gtex.recount_seq_qc.ADIPOSE_TISSUE.MD.gz.
2024-06-24 12:09:33.664105 downloading and reading the feature information.
2024-06-24 12:09:33.914365 caching file human.gene_sums.G026.gtf.gz.
2024-06-24 12:09:34.480166 downloading and reading the counts: 1293 samples across 63856 features.
2024-06-24 12:09:34.759355 caching file gtex.gene_sums.ADIPOSE_TISSUE.G026.gz.
2024-06-24 12:09:39.663544 constructing the RangedSummarizedExperiment (rse) object.
Code
## Explore that RSE object
rse_gene_adipose_tissue
class: RangedSummarizedExperiment
dim: 63856 1293
metadata(8): time_created recount3_version ... annotation recount3_url
assays(1): raw_counts
rownames(63856): ENSG00000278704.1 ENSG00000277400.1 ...
ENSG00000182484.15_PAR_Y ENSG00000227159.8_PAR_Y
rowData names(10): source type ... havana_gene tag
colnames(1293): GTEX-S32W-2226-SM-2XCAY.1 GTEX-WL46-0326-SM-3LK6Y.1 ...
GTEX-1MA7X-1026-SM-DKPQ8.1 GTEX-1MA7X-2126-SM-E6CIX.1
colData names(198): rail_id external_id ... recount_seq_qc.errq
BigWigURL
Code
# Load the necessary library
# library(SummarizedExperiment)
# Extract the assay data (raw counts)
raw_counts <- assay (rse_gene_adipose_tissue, "raw_counts" )
# Extract the row data (gene information)
row_data <- rowData (rse_gene_adipose_tissue)
# Extract the column data (sample information)
col_data <- colData (rse_gene_adipose_tissue)
# Extract metadata
metadata <- metadata (rse_gene_adipose_tissue)
# Display the extracted data
head (raw_counts)[,1 : 4 ]
GTEX-S32W-2226-SM-2XCAY.1 GTEX-WL46-0326-SM-3LK6Y.1
ENSG00000278704.1 0 0
ENSG00000277400.1 0 0
ENSG00000274847.1 0 0
ENSG00000277428.1 0 0
ENSG00000276256.1 0 0
ENSG00000278198.1 0 0
GTEX-144GM-0626-SM-79OKY.1 GTEX-14AS3-2026-SM-5TDD9.1
ENSG00000278704.1 0 0
ENSG00000277400.1 0 0
ENSG00000274847.1 0 0
ENSG00000277428.1 0 0
ENSG00000276256.1 0 0
ENSG00000278198.1 0 0
Code
Code
[1] "source" "type" "bp_length" "phase" "gene_id"
[6] "gene_type" "gene_name" "level" "havana_gene" "tag"
Code
Code
[1] "rail_id"
[2] "external_id"
[3] "study"
[4] "gtex.run_acc"
[5] "gtex.subjid"
[6] "gtex.sex"
[7] "gtex.age"
[8] "gtex.dthhrdy"
[9] "gtex.sampid"
[10] "gtex.smatsscr"
[11] "gtex.smcenter"
[12] "gtex.smpthnts"
[13] "gtex.smrin"
[14] "gtex.smts"
[15] "gtex.smtsd"
[16] "gtex.smubrid"
[17] "gtex.smtsisch"
[18] "gtex.smtspax"
[19] "gtex.smnabtch"
[20] "gtex.smnabtcht"
[21] "gtex.smnabtchd"
[22] "gtex.smgebtch"
[23] "gtex.smgebtchd"
[24] "gtex.smgebtcht"
[25] "gtex.smafrze"
[26] "gtex.smgtc"
[27] "gtex.sme2mprt"
[28] "gtex.smchmprs"
[29] "gtex.smntrart"
[30] "gtex.smnumgps"
[31] "gtex.smmaprt"
[32] "gtex.smexncrt"
[33] "gtex.sm550nrm"
[34] "gtex.smgnsdtc"
[35] "gtex.smunmprt"
[36] "gtex.sm350nrm"
[37] "gtex.smrdlgth"
[38] "gtex.smmncpb"
[39] "gtex.sme1mmrt"
[40] "gtex.smsflgth"
[41] "gtex.smestlbs"
[42] "gtex.smmppd"
[43] "gtex.smnterrt"
[44] "gtex.smrrnanm"
[45] "gtex.smrdttl"
[46] "gtex.smvqcfl"
[47] "gtex.smmncv"
[48] "gtex.smtrscpt"
[49] "gtex.smmppdpr"
[50] "gtex.smcglgth"
[51] "gtex.smgappct"
[52] "gtex.smunpdrd"
[53] "gtex.smntrnrt"
[54] "gtex.smmpunrt"
[55] "gtex.smexpeff"
[56] "gtex.smmppdun"
[57] "gtex.sme2mmrt"
[58] "gtex.sme2anti"
[59] "gtex.smaltalg"
[60] "gtex.sme2snse"
[61] "gtex.smmflgth"
[62] "gtex.sme1anti"
[63] "gtex.smspltrd"
[64] "gtex.smbsmmrt"
[65] "gtex.sme1snse"
[66] "gtex.sme1pcts"
[67] "gtex.smrrnart"
[68] "gtex.sme1mprt"
[69] "gtex.smnum5cd"
[70] "gtex.smdpmprt"
[71] "gtex.sme2pcts"
[72] "recount_project.project"
[73] "recount_project.organism"
[74] "recount_project.file_source"
[75] "recount_project.metadata_source"
[76] "recount_project.date_processed"
[77] "recount_qc.aligned_reads%.chrm"
[78] "recount_qc.aligned_reads%.chrx"
[79] "recount_qc.aligned_reads%.chry"
[80] "recount_qc.bc_auc.all_reads_all_bases"
[81] "recount_qc.bc_auc.all_reads_annotated_bases"
[82] "recount_qc.bc_auc.unique_reads_all_bases"
[83] "recount_qc.bc_auc.unique_reads_annotated_bases"
[84] "recount_qc.bc_auc.all_%"
[85] "recount_qc.bc_auc.unique_%"
[86] "recount_qc.bc_frag.count"
[87] "recount_qc.bc_frag.kallisto_count"
[88] "recount_qc.bc_frag.kallisto_mean_length"
[89] "recount_qc.bc_frag.mean_length"
[90] "recount_qc.bc_frag.mode_length"
[91] "recount_qc.bc_frag.mode_length_count"
[92] "recount_qc.exon_fc.all_%"
[93] "recount_qc.exon_fc.unique_%"
[94] "recount_qc.exon_fc_count_all.total"
[95] "recount_qc.exon_fc_count_all.assigned"
[96] "recount_qc.exon_fc_count_unique.total"
[97] "recount_qc.exon_fc_count_unique.assigned"
[98] "recount_qc.gene_fc.all_%"
[99] "recount_qc.gene_fc.unique_%"
[100] "recount_qc.gene_fc_count_all.total"
[101] "recount_qc.gene_fc_count_all.assigned"
[102] "recount_qc.gene_fc_count_unique.total"
[103] "recount_qc.gene_fc_count_unique.assigned"
[104] "recount_qc.intron_sum"
[105] "recount_qc.intron_sum_%"
[106] "recount_qc.star.%_of_chimeric_reads"
[107] "recount_qc.star.%_of_chimeric_reads2"
[108] "recount_qc.star.%_of_reads_mapped_to_multiple_loci"
[109] "recount_qc.star.%_of_reads_mapped_to_multiple_loci2"
[110] "recount_qc.star.%_of_reads_mapped_to_too_many_loci"
[111] "recount_qc.star.%_of_reads_mapped_to_too_many_loci2"
[112] "recount_qc.star.%_of_reads_unmapped:_other"
[113] "recount_qc.star.%_of_reads_unmapped:_other2"
[114] "recount_qc.star.%_of_reads_unmapped:_too_many_mismatches"
[115] "recount_qc.star.%_of_reads_unmapped:_too_many_mismatches2"
[116] "recount_qc.star.%_of_reads_unmapped:_too_short"
[117] "recount_qc.star.%_of_reads_unmapped:_too_short2"
[118] "recount_qc.star.all_mapped_reads"
[119] "recount_qc.star.all_mapped_reads2"
[120] "recount_qc.star.average_input_read_length"
[121] "recount_qc.star.average_input_read_length2"
[122] "recount_qc.star.average_mapped_length"
[123] "recount_qc.star.average_mapped_length2"
[124] "recount_qc.star.deletion_average_length"
[125] "recount_qc.star.deletion_average_length2"
[126] "recount_qc.star.deletion_rate_per_base"
[127] "recount_qc.star.deletion_rate_per_base2"
[128] "recount_qc.star.insertion_average_length"
[129] "recount_qc.star.insertion_average_length2"
[130] "recount_qc.star.insertion_rate_per_base"
[131] "recount_qc.star.insertion_rate_per_base2"
[132] "recount_qc.star.mapping_speed,_million_of_reads_per_hour"
[133] "recount_qc.star.mapping_speed,_million_of_reads_per_hour2"
[134] "recount_qc.star.mismatch_rate_per_base,_%"
[135] "recount_qc.star.mismatch_rate_per_base,_%2"
[136] "recount_qc.star.number_of_chimeric_reads"
[137] "recount_qc.star.number_of_chimeric_reads2"
[138] "recount_qc.star.number_of_input_reads"
[139] "recount_qc.star.number_of_input_reads2"
[140] "recount_qc.star.number_of_reads_mapped_to_multiple_loci"
[141] "recount_qc.star.number_of_reads_mapped_to_multiple_loci2"
[142] "recount_qc.star.number_of_reads_mapped_to_too_many_loci"
[143] "recount_qc.star.number_of_reads_mapped_to_too_many_loci2"
[144] "recount_qc.star.number_of_reads_unmapped:_other"
[145] "recount_qc.star.number_of_reads_unmapped:_other2"
[146] "recount_qc.star.number_of_reads_unmapped:_too_many_mismatches"
[147] "recount_qc.star.number_of_reads_unmapped:_too_many_mismatches2"
[148] "recount_qc.star.number_of_reads_unmapped:_too_short"
[149] "recount_qc.star.number_of_reads_unmapped:_too_short2"
[150] "recount_qc.star.number_of_splices:_at/ac"
[151] "recount_qc.star.number_of_splices:_at/ac2"
[152] "recount_qc.star.number_of_splices:_annotated_(sjdb)"
[153] "recount_qc.star.number_of_splices:_annotated_(sjdb)2"
[154] "recount_qc.star.number_of_splices:_gc/ag"
[155] "recount_qc.star.number_of_splices:_gc/ag2"
[156] "recount_qc.star.number_of_splices:_gt/ag"
[157] "recount_qc.star.number_of_splices:_gt/ag2"
[158] "recount_qc.star.number_of_splices:_non-canonical"
[159] "recount_qc.star.number_of_splices:_non-canonical2"
[160] "recount_qc.star.number_of_splices:_total"
[161] "recount_qc.star.number_of_splices:_total2"
[162] "recount_qc.star.uniquely_mapped_reads_%"
[163] "recount_qc.star.uniquely_mapped_reads_%2"
[164] "recount_qc.star.uniquely_mapped_reads_number"
[165] "recount_qc.star.uniquely_mapped_reads_number2"
[166] "recount_qc.junction_count"
[167] "recount_qc.junction_coverage"
[168] "recount_qc.junction_avg_coverage"
[169] "recount_qc.star.number_of_input_reads_both"
[170] "recount_qc.star.all_mapped_reads_both"
[171] "recount_qc.star.number_of_chimeric_reads_both"
[172] "recount_qc.star.number_of_reads_mapped_to_multiple_loci_both"
[173] "recount_qc.star.number_of_reads_mapped_to_too_many_loci_both"
[174] "recount_qc.star.number_of_reads_unmapped:_other_both"
[175] "recount_qc.star.number_of_reads_unmapped:_too_many_mismatches_both"
[176] "recount_qc.star.number_of_reads_unmapped:_too_short_both"
[177] "recount_qc.star.uniquely_mapped_reads_number_both"
[178] "recount_qc.star.%_mapped_reads_both"
[179] "recount_qc.star.%_chimeric_reads_both"
[180] "recount_qc.star.%_reads_mapped_to_multiple_loci_both"
[181] "recount_qc.star.%_reads_mapped_to_too_many_loci_both"
[182] "recount_qc.star.%_reads_unmapped:_other_both"
[183] "recount_qc.star.%_reads_unmapped:_too_many_mismatches_both"
[184] "recount_qc.star.%_reads_unmapped:_too_short_both"
[185] "recount_qc.star.uniquely_mapped_reads_%_both"
[186] "recount_seq_qc.min_len"
[187] "recount_seq_qc.max_len"
[188] "recount_seq_qc.avg_len"
[189] "recount_seq_qc.#distinct_quality_values"
[190] "recount_seq_qc.#bases"
[191] "recount_seq_qc.%a"
[192] "recount_seq_qc.%c"
[193] "recount_seq_qc.%g"
[194] "recount_seq_qc.%t"
[195] "recount_seq_qc.%n"
[196] "recount_seq_qc.avgq"
[197] "recount_seq_qc.errq"
[198] "BigWigURL"
Code
$time_created
[1] "2024-06-24 12:09:39 CDT"
$recount3_version
package ondiskversion loadedversion
recount3 recount3 1.12.0 1.12.0
path
recount3 /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/recount3
loadedpath
recount3 /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/recount3
attached is_base date source md5ok
recount3 TRUE FALSE 2023-10-26 Bioconductor NA
library
recount3 /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
$project
[1] "ADIPOSE_TISSUE"
$project_home
[1] "data_sources/gtex"
$type
[1] "gene"
$organism
[1] "human"
$annotation
[1] "gencode_v26"
$recount3_url
[1] "http://duffel.rail.bio/recount3"