Code
suppressMessages(library(tidyverse))
suppressMessages(library(glue))
suppressMessages(library(RSQLite))
Haky Im
July 28, 2021
Adding the cytogenetic to genes is convenient because it provides a somewhat memorable names for the genomic region where the gene is located. Biomart package in bioconductor has the database and function for the annnotation.
add cytogenetic band to genes
# install biomaRt if not installed
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("biomaRt")
## YOU MAY WANT TO RESTART R/RSTUDIO AFTER INSTAALLING BIOCMANAGER
library(biomaRt)
#ensembl <- useMart(biomart="ENSEMBL_MART_ENSEMBL",dataset="hsapiens_gene_ensembl")
ensembl <- useEnsembl(biomart = "ensembl",
dataset = "hsapiens_gene_ensembl",
mirror = "useast")
mirror = “useast” was necessary to make quarto render work; interactive runs seems to be able to be redirected to the useast mirror automatically but not quarto render
## get the gene annotation with cytoband from biomart
anno_gene <- getBM(attributes =c("ensembl_gene_id","external_gene_name","chromosome_name","start_position","end_position","band", "gene_biotype"),mart=ensembl)
## define function to add cytoband to gene
addband2gene = function(df,geneid = "ensembl_gene_id")
{
if(!exists("anno_gene"))
{
ensembl <- useMart(biomart="ENSEMBL_MART_ENSEMBL",dataset="hsapiens_gene_ensembl")
anno_gene <<- getBM(attributes = c("ensembl_gene_id","external_gene_name","chromosome_name","start_position","end_position","band", "gene_biotype"),mart=ensembl )
print("defined anno_gene")
}
if(geneid != "ensembl_gene_id") names(df)[names(df)==geneid] = "ensembl_gene_id" ## this is an ugly workaround - need to find a way to use rename_ for this but don't know how to specify a string instead of name in rename(geneid = "ensembl_gene_id")
df = df %>% left_join(anno_gene, by=c( "ensembl_gene_id" ))
if(geneid != "ensembl_gene_id") names(df)[names(df)=="ensembl_gene_id"] = geneid
df %>% mutate(cytoband = paste0(chromosome_name,band))
}
anno_gene <- anno_gene %>% mutate(cytoband = paste0(chromosome_name,band))
To get start and end of the cytegenetic bands
## Download cytoband table from http://genome.ucsc.edu/cgi-bin/hgTables
## 1. Go to the UCSC Genome browser: https://genome.ucsc.edu/index.html
## 2. Mouse over "Tools" and select "Table Browser"
## 3. In the table browser window, set the following parameters: clade = mammal, genome = human, assembly = hg38 (or other), group = Mapping and Sequencing, track = Chromosome Band, table = cytoBand, position = chr1 (or whatever chromosome you are interested in), output format = all fields from selected table, file type returned = plain text
## 4. Click [get output]
## The results will show the following 5 columns: chromosome number, cytoband start position, end position, cytoband name, and staining result.
## Haynes, Karmella. (2018). Re: How can i gen the length in mb from a cytoband?. Retrieved from: https://www.researchgate.net/post/How-can-i-gen-the-length-in-mb-from-a-cytoband/5b2147a0565fba5e2820b3de/citation/download.
#
WEBDATA="/Users/haekyungim/Library/CloudStorage/Box-Box/LargeFiles/imlab-data/data-Github/web-data"
cytoband_hg38 <- read_table(glue("{WEBDATA}/2021-07-28-how-to-get-the-cytogenetic-band-of-a-gene/hgTables-cytoband-positions-hg38.txt"),comment = "#",guess_max = 10000)
── Column specification ────────────────────────────────────────────────────────
cols(
chrom = col_character(),
chromStart = col_double(),
chromEnd = col_double(),
name = col_character(),
gieStain = col_character()
)
Warning: 571 parsing failures.
row col expected actual file
455 -- 5 columns 4 columns '/Users/haekyungim/Library/CloudStorage/Box-Box/LargeFiles/imlab-data/data-Github/web-data/2021-07-28-how-to-get-the-cytogenetic-band-of-a-gene/hgTables-cytoband-positions-hg38.txt'
864 -- 5 columns 4 columns '/Users/haekyungim/Library/CloudStorage/Box-Box/LargeFiles/imlab-data/data-Github/web-data/2021-07-28-how-to-get-the-cytogenetic-band-of-a-gene/hgTables-cytoband-positions-hg38.txt'
865 -- 5 columns 4 columns '/Users/haekyungim/Library/CloudStorage/Box-Box/LargeFiles/imlab-data/data-Github/web-data/2021-07-28-how-to-get-the-cytogenetic-band-of-a-gene/hgTables-cytoband-positions-hg38.txt'
866 -- 5 columns 4 columns '/Users/haekyungim/Library/CloudStorage/Box-Box/LargeFiles/imlab-data/data-Github/web-data/2021-07-28-how-to-get-the-cytogenetic-band-of-a-gene/hgTables-cytoband-positions-hg38.txt'
867 -- 5 columns 4 columns '/Users/haekyungim/Library/CloudStorage/Box-Box/LargeFiles/imlab-data/data-Github/web-data/2021-07-28-how-to-get-the-cytogenetic-band-of-a-gene/hgTables-cytoband-positions-hg38.txt'
... ... ......... ......... .....................................................................................................................................................................................
See problems(...) for more details.
── Column specification ────────────────────────────────────────────────────────
cols(
chrom = col_character(),
chromStart = col_double(),
chromEnd = col_double(),
name = col_character(),
gieStain = col_character()
)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.100 2.000 3.200 3.545 4.600 18.100
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.50 2.40 3.60 3.94 4.90 15.20
[1] 1433 5
[1] 862 5
cytoband n
1 16p11.2 457
2 14q11.2 434
3 16p13.3 432
4 19p13.3 389
5 11p15.4 386
6 17p11.2 369
7 CHR_HSCHR6_MHC_COX_CTG1 340
8 19p13.2 326
9 CHR_HSCHR6_MHC_QBL_CTG1 322
10 CHR_HSCHR6_MHC_DBB_CTG1 316
11 17q11.2 311
12 CHR_HSCHR6_MHC_SSTO_CTG1 303
13 CHR_HSCHR6_MHC_MANN_CTG1 299
14 14q32.33 292
15 CHR_HSCHR6_MHC_MCF_CTG1 292
16 8p23.1 284
17 15q11.2 274
18 19q13.2 271
19 19q13.42 268
20 17q25.3 267
21 1q21.3 267
22 17q12 262
23 3p21.31 262
24 2p11.2 260
25 1q32.1 252
26 Xq28 249
27 11q12.1 247
28 12p13.31 247
29 6p22.1 247
30 22q11.21 245
31 7q22.1 237
32 19q13.33 231
33 17p13.1 228
34 8q24.3 227
35 16q22.1 223
36 21q22.3 218
37 7q34 217
38 19p13.11 216
39 7q11.21 213
40 17q21.31 208
41 CHR_HG76_PATCH 207
42 12q24.31 199
43 3q29 198
44 19p12 197
45 12q13.13 196
46 7q11.23 196
47 19q13.43 193
48 11q13.4 191
49 11q13.1 190
50 17q21.2 188
51 5q31.3 188
52 6q21 188
53 CHR_HSCHR14_3_CTG1 188
54 11q23.3 187
55 5q35.3 187
56 1q44 186
57 9q34.3 184
58 1p31.1 183
59 CHR_HSCHR6_MHC_APD_CTG1 183
60 2q35 181
61 14q24.3 179
62 17q25.1 177
63 6p21.1 174
64 15q26.1 172
65 1p32.3 172
66 6p21.33 171
67 19q13.12 168
68 5q31.1 168
69 20q13.33 167
70 9p13.3 167
71 2q31.1 165
72 Yp11.2 165
73 11p11.2 162
74 14q12 162
75 19q13.41 162
76 1p36.33 162
77 7p11.2 162
78 11p15.5 161
79 12q24.33 161
80 20p13 161
81 1p36.11 159
82 20q13.12 159
83 7q36.1 156
84 18p11.21 155
85 2q37.3 155
86 19q13.32 154
87 4p16.3 153
88 1q21.1 151
89 22q11.22 151
90 2q37.1 150
91 4p16.1 149
92 6p22.3 148
93 6q27 148
94 1p34.2 147
95 2q11.2 146
96 1p31.3 145
97 2q33.1 145
98 17q22 144
99 1p34.3 144
100 Xp11.23 143
101 22q12.3 142
102 22q13.1 142
103 2q21.1 142
104 11q13.2 141
105 1q21.2 141
106 17p13.3 139
107 1q23.3 139
108 14q32.31 138
109 5q11.2 138
110 19p13.12 137
111 16q21 136
112 9q34.11 135
113 1q41 133
114 1p36.13 132
115 21q22.11 132
116 11q24.2 131
117 16q12.1 131
118 4q12 131
119 1p13.3 130
120 2p23.3 130
121 1q42.13 129
122 17q21.32 128
123 22q11.23 127
124 Xp11.22 127
125 16q23.1 126
126 15q15.1 125
127 17p13.2 125
128 2p21 125
129 18q11.2 124
130 22q12.2 123
131 22q13.2 123
132 15q26.3 122
133 8q21.13 122
134 15q21.1 121
135 13q34 120
136 11p15.1 119
137 14q32.2 119
138 16p12.3 119
139 1q25.3 119
140 8p12 119
141 Xq13.1 119
142 15q25.2 118
143 2p25.1 118
144 Xp22.2 118
145 3q21.3 117
146 Xq26.3 117
147 12q12 116
148 1p36.12 116
149 5q13.2 116
150 Xq22.1 116
151 11q14.1 115
152 1p36.22 115
153 12q13.12 114
154 4q25 114
155 7p14.3 114
156 18q21.1 113
157 1q43 113
158 CHR_HSCHR7_2_CTG6 113
159 10q26.3 112
160 1p13.2 112
161 Xq24 112
162 Yq11.223 112
163 10q22.1 111
164 15q14 111
165 15q25.1 111
166 17p12 111
167 5q14.3 111
168 5q32 111
169 8q22.3 111
170 Xq23 111
171 Yq11.23 111
172 11q22.3 110
173 15q22.31 110
174 4q13.3 108
175 11p13 107
176 15q21.3 107
177 8q22.1 107
178 10q11.21 106
179 15q23 106
180 17q21.33 106
181 19q13.11 106
182 6p21.32 106
183 CHR_HSCHR17_7_CTG4 106
184 13q12.11 105
185 18q23 105
186 7p14.1 105
187 20q11.21 104
188 2q14.1 104
189 6p22.2 104
190 1p34.1 103
191 13q14.11 102
192 1p36.21 102
193 5p15.33 102
194 8p21.3 102
195 12q23.3 101
196 15q22.2 101
197 20q11.23 101
198 6q25.3 101
199 11q12.3 100
200 11q21 100
201 16p12.2 100
202 6q14.1 100
203 10q22.2 99
204 20p11.21 99
205 3p22.1 99
206 4p14 99
207 10p12.1 98
208 22q11.1 98
209 5q15 98
210 1q23.1 97
211 22q13.31 97
212 4q32.1 97
213 7p22.1 97
214 9q13 97
215 10q11.22 96
216 12p13.2 96
217 12q13.2 96
218 16q24.1 96
219 2q13 96
220 4q35.1 96
221 11q14.3 95
222 12q13.11 95
223 12q23.1 95
224 21p11.2 95
225 21q21.1 95
226 6p21.31 95
227 Yq11.222 95
228 2q14.3 94
229 7q21.3 94
230 8q24.13 94
231 19q13.31 93
232 20q11.22 93
233 10p13 92
234 20q13.13 92
235 2p14 92
236 10q22.3 91
237 3p25.3 91
238 3q22.1 91
239 4q13.2 91
240 5q14.1 91
241 5q35.2 91
242 18q12.1 90
243 1q22 90
244 22q12.1 90
245 5q23.2 90
246 6q25.1 90
247 2p12 89
248 8p22 89
249 9q21.11 89
250 Xp11.3 89
251 Xp11.4 89
252 2q33.3 88
253 6q13 88
254 9p21.3 88
255 12p13.33 87
256 12q13.3 87
257 15q21.2 87
258 3q23 87
259 5q35.1 87
260 6q23.2 87
261 2p13.3 86
262 3q25.1 86
263 7q33 86
264 9p24.1 86
265 12q15 85
266 13q14.3 85
267 16p12.1 85
268 18p11.31 85
269 1p35.3 85
270 1q24.2 85
271 22q13.33 85
272 5p13.2 85
273 5q31.2 85
274 5q33.3 85
275 8q12.1 85
276 15q24.1 84
277 16p13.11 84
278 1p33 84
279 1q42.2 84
280 2p16.1 84
281 7p15.2 84
282 9q33.3 84
283 12p11.21 83
284 12p12.1 83
285 17q24.2 83
286 4q22.1 83
287 4q31.3 83
288 7q35 83
289 10q26.13 82
290 12q22 82
291 4q24 82
292 10p14 81
293 2q11.1 81
294 4q35.2 80
295 5q13.3 80
296 6q15 80
297 7p22.3 80
298 9q22.33 80
299 9q31.1 80
300 17q23.2 79
301 1q32.2 79
302 3q26.33 79
303 4q26 79
304 CHR_HSCHR15_4_CTG8 79
305 10q21.3 78
306 12q24.11 78
307 14q22.1 78
308 14q23.1 78
309 16q24.3 78
310 3p22.2 78
311 5p15.31 78
312 7p15.3 78
313 7q32.1 78
314 8p11.21 78
315 Xq21.1 78
316 10p15.1 77
317 13q14.2 77
318 16q12.2 77
319 1q25.2 77
320 3p25.1 77
321 10q23.31 76
322 20p11.23 76
323 2p25.3 76
324 11q12.2 75
325 12p12.3 75
326 4q28.3 75
327 5p13.3 75
328 6p21.2 75
329 CHR_HSCHR19_4_CTG3_1 75
330 11q23.1 74
331 12q14.1 74
332 1p22.1 74
333 1p35.2 74
334 2p13.1 74
335 4q31.21 74
336 6p12.3 74
337 6q22.31 74
338 9p11.2 74
339 14q24.2 73
340 16p13.13 73
341 21q21.3 73
342 3q13.33 73
343 4p15.2 73
344 5p15.1 73
345 5p15.2 73
346 7q36.3 73
347 8q21.3 73
348 9q22.31 73
349 9q22.32 73
350 1p36.32 72
351 1q32.3 72
352 2p15 72
353 4q32.3 72
354 5q23.1 72
355 7p13 72
356 9q32 72
357 Xq27.3 71
358 15q24.2 70
359 15q26.2 70
360 3q27.1 70
361 4q21.1 70
362 5q34 70
363 7q31.1 70
364 19p13.13 69
365 19q12 69
366 9q33.2 69
367 10q25.3 68
368 12q21.2 68
369 12q24.32 68
370 1q23.2 68
371 2p24.1 68
372 3p14.1 68
373 3q24 68
374 6p12.1 68
375 6q23.3 68
376 7p21.3 68
377 8q24.22 68
378 14q24.1 67
379 17q23.3 67
380 18q21.2 67
381 1p22.2 67
382 20p12.1 67
383 3q26.2 67
384 7q21.11 67
385 10q24.32 66
386 14q23.3 66
387 18p11.32 66
388 3p14.3 66
389 8p21.2 66
390 10p11.21 65
391 18p11.22 65
392 1p12 65
393 21q11.2 65
394 3p24.3 65
395 7p21.1 65
396 Xq25 65
397 10q23.33 64
398 11q24.1 64
399 13q31.1 64
400 14q21.3 64
401 16q13 64
402 3p13 64
403 10q11.23 63
404 11p11.12 63
405 13q12.12 63
406 1p21.3 63
407 2q36.3 63
408 3p21.1 63
409 3q27.3 63
410 5p13.1 63
411 6p25.2 63
412 9q21.13 63
413 15q13.1 62
414 15q13.2 62
415 1p22.3 62
416 1q42.3 62
417 2q24.2 62
418 3q11.2 62
419 Xq13.2 62
420 10p12.31 61
421 11q14.2 61
422 13q12.3 61
423 15q25.3 61
424 20p12.3 61
425 2q32.1 61
426 2q34 61
427 3p22.3 61
428 12q21.31 60
429 13q13.3 60
430 14q32.12 60
431 16q22.2 60
432 20q13.2 60
433 2q24.1 60
434 CHR_HG2365_PATCH 60
435 Xp11.21 60
436 Xp22.33 60
437 1q25.1 59
438 21q22.2 59
439 2p22.3 59
440 3p21.2 59
441 5p12 59
442 8q24.21 59
443 CHR_HG1_PATCH 59
444 CHR_HSCHR16_1_CTG1 59
445 10q23.1 58
446 12q24.23 58
447 15q13.3 58
448 16q24.2 58
449 1p21.2 58
450 3p12.3 58
451 5q33.1 58
452 7q22.3 58
453 7q31.2 58
454 8p11.23 58
455 8q22.2 58
456 10q21.1 57
457 11q25 57
458 14q22.3 57
459 16p11.1 57
460 3q21.2 57
461 11q24.3 56
462 12p13.32 56
463 14q21.1 56
464 18q12.2 56
465 1q42.12 56
466 2p16.3 56
467 2q12.1 56
468 2q31.2 56
469 8q21.11 56
470 11q13.3 55
471 12q21.33 55
472 13q32.3 55
473 18q21.32 55
474 3q26.1 55
475 5q21.1 55
476 8q11.23 55
477 9q31.3 55
478 Xp21.1 55
479 Xq22.3 55
480 Xq26.2 55
481 1p35.1 54
482 3q26.31 54
483 4q13.1 54
484 4q34.1 54
485 6q22.1 54
486 8p23.3 54
487 9p21.1 54
488 11q13.5 53
489 12q14.2 53
490 2q32.3 53
491 3q13.31 53
492 6p24.3 53
493 8q11.21 53
494 CHR_HSCHR5_2_CTG1_1 53
495 10p11.22 52
496 10q23.2 52
497 12q14.3 52
498 16q23.2 52
499 18q12.3 52
500 18q22.3 52
501 1p36.23 52
502 20q13.32 52
503 6p25.1 52
504 7p12.3 52
505 7p14.2 52
506 9q21.33 52
507 CHR_HSCHR19LRC_PGF1_CTG3_1 52
508 10q24.1 51
509 10q25.2 51
510 12q24.13 51
511 13q12.13 51
512 2q21.2 51
513 2q23.3 51
514 3q13.2 51
515 3q22.3 51
516 10q26.11 50
517 12p13.1 50
518 14q31.3 50
519 18q22.1 50
520 5q12.1 50
521 6q14.3 50
522 6q16.1 50
523 8q12.3 50
524 9q21.32 50
525 9q34.13 50
526 11p14.1 49
527 12q23.2 49
528 14q32.13 49
529 2q24.3 49
530 3q25.31 49
531 4p15.1 49
532 4q28.1 49
533 6q12 49
534 6q25.2 49
535 12p11.22 48
536 12q21.1 48
537 13q31.3 48
538 13q32.1 48
539 1q24.3 48
540 2q32.2 48
541 4p13 48
542 4q23 48
543 4q31.1 48
544 10q25.1 47
545 11q11 47
546 11q23.2 47
547 15q15.3 47
548 1q24.1 47
549 2p24.3 47
550 2q14.2 47
551 2q36.1 47
552 3q13.13 47
553 6q24.1 47
554 CHR_HSCHR19LRC_COX1_CTG3_1 47
555 10p11.23 46
556 10q24.2 46
557 16p13.2 46
558 1q31.1 46
559 2p22.1 46
560 2q22.1 46
561 5q12.3 46
562 6q24.2 46
563 8p21.1 46
564 9q34.2 46
565 Xp21.3 46
566 11p12 45
567 14q23.2 45
568 17q24.3 45
569 18q21.33 45
570 21q22.13 45
571 2q22.3 45
572 9q31.2 45
573 10p15.3 44
574 10q24.31 44
575 12q24.21 44
576 13q14.13 44
577 14q32.11 44
578 1p13.1 44
579 20p12.2 44
580 2p22.2 44
581 8q23.1 44
582 CHR_HSCHR17_2_CTG5 44
583 11p15.3 43
584 1p36.31 43
585 1q31.3 43
586 21q22.12 43
587 3p24.1 43
588 4p15.32 43
589 7q31.32 43
590 7q32.2 43
591 Xp22.11 43
592 14q31.1 42
593 16q23.3 42
594 20q13.31 42
595 3q25.2 42
596 4q34.3 42
597 9p13.2 42
598 15q24.3 41
599 17q24.1 41
600 20q12 41
601 2q33.2 41
602 3q26.32 41
603 3q28 41
604 4p12 41
605 4q27 41
606 5p14.1 41
607 6q24.3 41
608 8q13.1 41
609 8q13.3 41
610 9q22.1 41
611 CHR_HG2290_PATCH 41
612 Xp22.31 41
613 14q13.2 40
614 14q21.2 40
615 1p11.2 40
616 6q22.33 40
617 8q21.2 40
618 CHR_HSCHR17_1_CTG5 40
619 Xq21.31 40
620 10q26.2 39
621 21p12 39
622 4p15.33 39
623 5q21.3 39
624 6p25.3 39
625 CHR_HSCHR15_6_CTG8 39
626 Xq22.2 39
627 13q21.33 38
628 18q21.31 38
629 1q31.2 38
630 3p14.2 38
631 4q21.21 38
632 4q21.22 38
633 5q33.2 38
634 7p12.1 38
635 8q24.12 38
636 9p23 38
637 9p24.3 38
638 13q22.3 37
639 16p13.12 37
640 2p23.2 37
641 2q12.3 37
642 3p25.2 37
643 5p14.3 37
644 6q26 37
645 9q33.1 37
646 CHR_HSCHR14_7_CTG1 37
647 CHR_HSCHR19LRC_PGF2_CTG3_1 37
648 MT 37
649 11p14.3 36
650 11p15.2 36
651 13q33.3 36
652 9p22.3 36
653 Xq26.1 36
654 Yq11.221 36
655 13q33.1 35
656 17q23.1 35
657 3p26.1 35
658 3q21.1 35
659 6p12.2 35
660 7q21.2 35
661 Xq12 35
662 Xq21.33 35
663 15q12 34
664 1p21.1 34
665 21q21.2 34
666 3p24.2 34
667 7q21.13 34
668 CHR_HG109_PATCH 34
669 CHR_HSCHR22_1_CTG7 34
670 10q21.2 33
671 13q12.2 33
672 7q11.22 33
673 7q31.33 33
674 9q22.2 33
675 4p11 32
676 4q21.23 32
677 4q31.23 32
678 5q22.3 32
679 8p23.2 32
680 CHR_HG1342_HG2282_PATCH 32
681 10p12.2 31
682 10q24.33 31
683 11q22.1 31
684 13q11 31
685 13q13.1 31
686 14q32.32 31
687 1p32.1 31
688 2q12.2 31
689 2q31.3 31
690 3q12.3 31
691 3q13.12 31
692 7q32.3 31
693 9p21.2 31
694 CHR_HSCHR12_3_CTG2 31
695 CHR_HSCHR19LRC_COX2_CTG3_1 31
696 Yq11.21 31
697 10p11.1 30
698 12p11.23 30
699 13q21.1 30
700 3q25.32 30
701 6q16.3 30
702 8q11.1 30
703 CHR_HSCHR17_3_CTG1 30
704 Xq27.1 30
705 11q22.2 29
706 15q15.2 29
707 2p16.2 29
708 5q22.1 29
709 6p23 29
710 6p24.2 29
711 CHR_HG30_PATCH 29
712 Xp22.12 29
713 13q21.31 28
714 14q13.3 28
715 2p23.1 28
716 3q25.33 28
717 4q33 28
718 5q23.3 28
719 7q21.12 28
720 9p11.1 28
721 1p32.2 27
722 20q11.1 27
723 5p15.32 27
724 7q31.31 27
725 8q23.3 27
726 9p12 27
727 9p13.1 27
728 9p24.2 27
729 12q24.22 26
730 2p25.2 26
731 2q21.3 26
732 3p26.3 26
733 3q12.1 26
734 5q13.1 26
735 8q13.2 26
736 9p22.1 26
737 9q21.2 26
738 CHR_HG26_PATCH 26
739 CHR_HSCHR12_2_CTG2 26
740 CHR_HSCHR17_4_CTG4 26
741 Xp22.13 26
742 10q23.32 25
743 13q31.2 25
744 15q11.1 25
745 2p13.2 25
746 2p24.2 25
747 4q28.2 25
748 7q36.2 25
749 8p11.22 25
750 9q21.12 25
751 CHR_HG1815_PATCH 25
752 CHR_HG2023_PATCH 25
753 CHR_HSCHR15_1_CTG8 25
754 Xq13.3 25
755 12q21.32 24
756 13q21.2 24
757 13q22.1 24
758 20p11.22 24
759 4q31.22 24
760 CHR_HSCHR15_5_CTG8 24
761 CHR_HSCHR22_1_CTG3 24
762 CHR_HSCHR2_6_CTG7_2 24
763 CHR_HSCHR5_1_CTG1_1 24
764 Xp21.2 24
765 10p15.2 23
766 17q21.1 23
767 3q22.2 23
768 4q22.3 23
769 5q22.2 23
770 CHR_HG2030_PATCH 23
771 CHR_HG708_PATCH 23
772 CHR_HSCHR19LRC_LRC_I_CTG3_1 23
773 CHR_HSCHR19LRC_LRC_T_CTG3_1 23
774 CHR_HSCHR4_11_CTG12 23
775 CHR_HSCHR8_3_CTG7 23
776 13q21.32 22
777 14q13.1 22
778 18q22.2 22
779 3p12.2 22
780 3q27.2 22
781 5q14.2 22
782 8p11.1 22
783 CHR_HSCHR15_1_CTG1 22
784 CHR_HSCHR4_1_CTG9 22
785 CHR_HSCHR7_3_CTG4_4 22
786 16q11.2 21
787 1q42.11 21
788 3p11.1 21
789 3p12.1 21
790 6p24.1 21
791 7p21.2 21
792 9q21.31 21
793 CHR_HG2002_PATCH 21
794 CHR_HG2066_PATCH 21
795 CHR_HG2246_HG2248_HG2276_PATCH 21
796 CHR_HSCHR19LRC_LRC_J_CTG3_1 21
797 CHR_HSCHR8_3_CTG1 21
798 Xq11.2 21
799 12q24.12 20
800 13q32.2 20
801 4p15.31 20
802 CHR_HG2198_PATCH 20
803 CHR_HG926_PATCH 20
804 CHR_HSCHR17_10_CTG4 20
805 CHR_HSCHR19LRC_LRC_S_CTG3_1 20
806 13q22.2 19
807 14q22.2 19
808 20p11.1 19
809 2q37.2 19
810 4q32.2 19
811 CHR_HG2046_PATCH 19
812 CHR_HG2513_PATCH 19
813 CHR_HSCHR19KIR_FH15_B_HAP_CTG3_1 19
814 CHR_HSCHR1_5_CTG3 19
815 12p11.1 18
816 16q22.3 18
817 4p16.2 18
818 5q11.1 18
819 6p11.2 18
820 CHR_HG2419_PATCH 18
821 CHR_HSCHR19KIR_FH05_B_HAP_CTG3_1 18
822 2q22.2 17
823 2q23.1 17
824 4q34.2 17
825 6q16.2 17
826 8q23.2 17
827 8q24.11 17
828 CHR_HG2087_PATCH 17
829 CHR_HG2217_PATCH 17
830 CHR_HG545_PATCH 17
831 CHR_HSCHR19KIR_CA01-TB04_CTG3_1 17
832 CHR_HSCHR19KIR_FH13_BA2_HAP_CTG3_1 17
833 CHR_HSCHR1_2_CTG3 17
834 10p12.33 16
835 10q26.12 16
836 13q13.2 16
837 18p11.23 16
838 19q11 16
839 5p14.2 16
840 7p12.2 16
841 CHR_HG1362_PATCH 16
842 CHR_HG142_HG150_NOVEL_TEST 16
843 CHR_HG28_PATCH 16
844 CHR_HSCHR11_1_CTG8 16
845 CHR_HSCHR19KIR_CA01-TB01_CTG3_1 16
846 CHR_HSCHR19_3_CTG3_1 16
847 CHR_HSCHR8_8_CTG1 16
848 Xq21.2 16
849 Xq27.2 16
850 12p12.2 15
851 17q11.1 15
852 3q13.32 15
853 8q24.23 15
854 9p22.2 15
855 CHR_HG2114_PATCH 15
856 CHR_HG2499_PATCH 15
857 CHR_HSCHR11_1_CTG7 15
858 CHR_HSCHR19KIR_0019-4656-B_CTG3_1 15
859 CHR_HSCHR19KIR_FH08_BAX_HAP_CTG3_1 15
860 CHR_HSCHR1_4_CTG3 15
861 13q33.2 14
862 4q21.3 14
863 9q12 14
864 CHR_HG2021_PATCH 14
865 CHR_HG439_PATCH 14
866 CHR_HSCHR15_3_CTG3 14
867 CHR_HSCHR16_3_CTG1 14
868 CHR_HSCHR19KIR_7191059-2_CTG3_1 14
869 CHR_HSCHR19KIR_FH15_A_HAP_CTG3_1 14
870 CHR_HSCHR19KIR_G248_A_HAP_CTG3_1 14
871 CHR_HSCHR19KIR_GRC212_AB_HAP_CTG3_1 14
872 CHR_HSCHR19KIR_LUCE_BDEL_HAP_CTG3_1 14
873 CHR_HSCHR19KIR_T7526_BDEL_HAP_CTG3_1 14
874 CHR_HSCHR1_1_CTG3 14
875 CHR_HSCHR1_1_CTG31 14
876 CHR_HSCHR1_5_CTG32_1 14
877 17q25.2 13
878 20q13.11 13
879 22q13.32 13
880 3q13.11 13
881 6q11.1 13
882 6q22.32 13
883 7p22.2 13
884 9q34.12 13
885 CHR_HSCHR11_2_CTG1_1 13
886 CHR_HSCHR19KIR_502960008-1_CTG3_1 13
887 CHR_HSCHR19KIR_502960008-2_CTG3_1 13
888 CHR_HSCHR19KIR_7191059-1_CTG3_1 13
889 CHR_HSCHR19KIR_ABC08_AB_HAP_T_P_CTG3_1 13
890 CHR_HSCHR19KIR_CA01-TA01_1_CTG3_1 13
891 CHR_HSCHR19KIR_CA01-TA01_2_CTG3_1 13
892 CHR_HSCHR19KIR_FH05_A_HAP_CTG3_1 13
893 CHR_HSCHR19KIR_FH06_A_HAP_CTG3_1 13
894 CHR_HSCHR19KIR_FH06_BA1_HAP_CTG3_1 13
895 CHR_HSCHR19KIR_FH13_A_HAP_CTG3_1 13
896 CHR_HSCHR19KIR_G085_A_HAP_CTG3_1 13
897 CHR_HSCHR19KIR_G085_BA1_HAP_CTG3_1 13
898 CHR_HSCHR19KIR_HG2394_CTG3_1 13
899 CHR_HSCHR19KIR_RSH_A_HAP_CTG3_1 13
900 CHR_HSCHR19KIR_RSH_BA2_HAP_CTG3_1 13
901 CHR_HSCHR19KIR_T7526_A_HAP_CTG3_1 13
902 CHR_HSCHR4_6_CTG12 13
903 Xq11.1 13
904 3q12.2 12
905 5q21.2 12
906 8q12.2 12
907 CHR_HG151_NOVEL_TEST 12
908 CHR_HSCHR11_1_CTG6 12
909 CHR_HSCHR15_2_CTG3 12
910 CHR_HSCHR17_1_CTG4 12
911 CHR_HSCHR19KIR_0019-4656-A_CTG3_1 12
912 CHR_HSCHR19KIR_ABC08_A1_HAP_CTG3_1 12
913 CHR_HSCHR19KIR_FH08_A_HAP_CTG3_1 12
914 CHR_HSCHR19KIR_RP5_B_HAP_CTG3_1 12
915 CHR_HSCHR3_5_CTG1 12
916 CHR_HSCHR3_9_CTG3 12
917 13q14.12 11
918 19q13.13 11
919 3p23 11
920 6q14.2 11
921 CHR_HG1277_PATCH 11
922 CHR_HG1309_PATCH 11
923 CHR_HG1398_PATCH 11
924 CHR_HG2263_PATCH 11
925 CHR_HSCHR11_1_CTG1_2 11
926 CHR_HSCHR15_1_CTG3 11
927 CHR_HSCHR17_1_CTG1 11
928 CHR_HSCHR19KIR_G248_BA2_HAP_CTG3_1 11
929 CHR_HSCHR19_1_CTG2 11
930 CHR_HSCHR19_3_CTG2 11
931 CHR_HSCHR22_3_CTG1 11
932 Xq21.32 11
933 2p11.1 10
934 3p26.2 10
935 CHR_HG2057_PATCH 10
936 CHR_HG2266_PATCH 10
937 CHR_HG2334_PATCH 10
938 CHR_HSCHR10_1_CTG2 10
939 CHR_HSCHR17_1_CTG2 10
940 CHR_HSCHR19KIR_ABC08_AB_HAP_C_P_CTG3_1 10
941 CHR_HSCHR19KIR_GRC212_BA1_HAP_CTG3_1 10
942 CHR_HSCHR19KIR_HG2393_CTG3_1 10
943 CHR_HSCHR19KIR_LUCE_A_HAP_CTG3_1 10
944 CHR_HSCHR1_2_CTG31 10
945 CHR_HSCHR5_8_CTG1 10
946 CHR_HSCHR7_2_CTG1 10
947 18q11.1 9
948 22p11.2 9
949 3p11.2 9
950 CHR_HG1832_PATCH 9
951 CHR_HG2058_PATCH 9
952 CHR_HG2095_PATCH 9
953 CHR_HG2115_PATCH 9
954 CHR_HG2232_PATCH 9
955 CHR_HG2285_HG106_HG2252_PATCH 9
956 CHR_HG2291_PATCH 9
957 CHR_HG2525_PATCH 9
958 CHR_HSCHR11_1_CTG5 9
959 CHR_HSCHR11_2_CTG8 9
960 CHR_HSCHR12_1_CTG1 9
961 CHR_HSCHR13_1_CTG3 9
962 CHR_HSCHR14_1_CTG1 9
963 CHR_HSCHR15_3_CTG8 9
964 CHR_HSCHR17_6_CTG4 9
965 CHR_HSCHR19KIR_0010-5217-AB_CTG3_1 9
966 CHR_HSCHR19KIR_HG2396_CTG3_1 9
967 CHR_HSCHR19_2_CTG2 9
968 CHR_HSCHR1_ALT2_1_CTG32_1 9
969 CHR_HSCHR22_1_CTG6 9
970 CHR_HSCHR22_2_CTG1 9
971 CHR_HSCHR2_3_CTG15 9
972 CHR_HSCHR2_8_CTG7_2 9
973 CHR_HSCHR6_1_CTG8 9
974 CHR_HSCHR7_2_CTG4_4 9
975 CHR_HSCHR8_9_CTG1 9
976 CHR_HSCHR9_1_CTG5 9
977 15q22.1 8
978 2q23.2 8
979 3q11.1 8
980 CHR_HG1311_PATCH 8
981 CHR_HG1395_PATCH 8
982 CHR_HG1535_PATCH 8
983 CHR_HG2111_PATCH 8
984 CHR_HG2121_PATCH 8
985 CHR_HSCHR17_2_CTG2 8
986 CHR_HSCHR17_5_CTG4 8
987 CHR_HSCHR19KIR_CA04_CTG3_1 8
988 CHR_HSCHR1_3_CTG32_1 8
989 CHR_HSCHR1_4_CTG31 8
990 CHR_HSCHR1_6_CTG3 8
991 CHR_HSCHR22_1_CTG1 8
992 CHR_HSCHR22_4_CTG1 8
993 CHR_HSCHR22_7_CTG1 8
994 CHR_HSCHR3_1_CTG1 8
995 CHR_HSCHR5_2_CTG5 8
996 CHR_HSCHR5_3_CTG5 8
997 CHR_HSCHR6_1_CTG4 8
998 CHR_HSCHR7_1_CTG1 8
999 CHR_HSCHR7_1_CTG4_4 8
1000 CHR_HSCHR8_5_CTG7 8
1001 11p14.2 7
1002 14q31.2 7
1003 2q36.2 7
1004 4q22.2 7
1005 6q23.1 7
1006 CHR_HG1485_PATCH 7
1007 CHR_HG986_PATCH 7
1008 CHR_HSCHR10_1_CTG1 7
1009 CHR_HSCHR10_1_CTG4 7
1010 CHR_HSCHR11_1_CTG3_1 7
1011 CHR_HSCHR12_4_CTG2 7
1012 CHR_HSCHR16_5_CTG1 7
1013 CHR_HSCHR16_5_CTG3_1 7
1014 CHR_HSCHR16_CTG2 7
1015 CHR_HSCHR1_2_CTG32_1 7
1016 CHR_HSCHR22_8_CTG1 7
1017 CHR_HSCHR2_2_CTG7 7
1018 CHR_HSCHR3_1_CTG3 7
1019 CHR_HSCHR3_4_CTG2_1 7
1020 CHR_HSCHR3_5_CTG3 7
1021 CHR_HSCHR6_1_CTG5 7
1022 CHR_HSCHR7_1_CTG6 7
1023 CHR_HSCHR8_4_CTG7 7
1024 CHR_HSCHRX_1_CTG3 7
1025 KI270727.1 7
1026 Xp22.32 7
1027 1p31.2 6
1028 3p21.33 6
1029 7p15.1 6
1030 CHR_HG2072_PATCH 6
1031 CHR_HG2191_PATCH 6
1032 CHR_HG2213_PATCH 6
1033 CHR_HG2412_PATCH 6
1034 CHR_HSCHR12_1_CTG2_1 6
1035 CHR_HSCHR14_8_CTG1 6
1036 CHR_HSCHR18_1_CTG1_1 6
1037 CHR_HSCHR18_3_CTG2_1 6
1038 CHR_HSCHR19_5_CTG2 6
1039 CHR_HSCHR1_8_CTG3 6
1040 CHR_HSCHR20_1_CTG3 6
1041 CHR_HSCHR21_4_CTG1_1 6
1042 CHR_HSCHR22_5_CTG1 6
1043 CHR_HSCHR22_6_CTG1 6
1044 CHR_HSCHR3_3_CTG3 6
1045 CHR_HSCHR3_4_CTG3 6
1046 CHR_HSCHR3_6_CTG3 6
1047 CHR_HSCHR3_7_CTG3 6
1048 CHR_HSCHR3_8_CTG3 6
1049 CHR_HSCHR4_7_CTG12 6
1050 CHR_HSCHR4_9_CTG12 6
1051 CHR_HSCHR5_3_CTG1 6
1052 CHR_HSCHR8_1_CTG1 6
1053 CHR_HSCHR8_7_CTG1 6
1054 KI270728.1 6
1055 7q22.2 5
1056 9q11 5
1057 CHR_HG126_PATCH 5
1058 CHR_HG1445_PATCH 5
1059 CHR_HG2236_PATCH 5
1060 CHR_HSCHR10_1_CTG3 5
1061 CHR_HSCHR11_3_CTG1 5
1062 CHR_HSCHR13_1_CTG1 5
1063 CHR_HSCHR16_4_CTG3_1 5
1064 CHR_HSCHR17_2_CTG1 5
1065 CHR_HSCHR18_2_CTG2 5
1066 CHR_HSCHR1_3_CTG31 5
1067 CHR_HSCHR21_2_CTG1_1 5
1068 CHR_HSCHR21_5_CTG2 5
1069 CHR_HSCHR21_6_CTG1_1 5
1070 CHR_HSCHR2_1_CTG15 5
1071 CHR_HSCHR2_2_CTG15 5
1072 CHR_HSCHR3_6_CTG2_1 5
1073 CHR_HSCHR4_12_CTG12 5
1074 CHR_HSCHR6_1_CTG2 5
1075 CHR_HSCHR7_3_CTG6 5
1076 CHR_HSCHR8_1_CTG6 5
1077 CHR_HSCHR8_2_CTG7 5
1078 CHR_HSCHRX_2_CTG3 5
1079 15q22.33 4
1080 CHR_HG107_PATCH 4
1081 CHR_HG2060_PATCH 4
1082 CHR_HG2104_PATCH 4
1083 CHR_HG2235_PATCH 4
1084 CHR_HG2471_PATCH 4
1085 CHR_HSCHR12_3_CTG2_1 4
1086 CHR_HSCHR12_9_CTG2_1 4
1087 CHR_HSCHR16_2_CTG3_1 4
1088 CHR_HSCHR16_4_CTG1 4
1089 CHR_HSCHR17_1_CTG9 4
1090 CHR_HSCHR17_3_CTG2 4
1091 CHR_HSCHR18_ALT2_CTG2_1 4
1092 CHR_HSCHR21_3_CTG1_1 4
1093 CHR_HSCHR22_1_CTG2 4
1094 CHR_HSCHR22_1_CTG4 4
1095 CHR_HSCHR22_1_CTG5 4
1096 CHR_HSCHR2_1_CTG1 4
1097 CHR_HSCHR2_1_CTG7 4
1098 CHR_HSCHR2_1_CTG7_2 4
1099 CHR_HSCHR2_3_CTG7_2 4
1100 CHR_HSCHR3_2_CTG3 4
1101 CHR_HSCHR3_9_CTG2_1 4
1102 CHR_HSCHR4_1_CTG4 4
1103 CHR_HSCHR4_3_CTG12 4
1104 CHR_HSCHR5_2_CTG1 4
1105 CHR_HSCHR5_4_CTG1 4
1106 CHR_HSCHR5_5_CTG1 4
1107 CHR_HSCHR5_6_CTG1 4
1108 CHR_HSCHR6_1_CTG9 4
1109 CHR_HSCHR8_4_CTG1 4
1110 CHR_HSCHR9_1_CTG3 4
1111 GL000220.1 4
1112 KI270713.1 4
1113 KI270721.1 4
1114 KI270733.1 4
1115 KI270734.1 4
1116 3p21.32 3
1117 CHR_HG1298_PATCH 3
1118 CHR_HG1320_PATCH 3
1119 CHR_HG1524_PATCH 3
1120 CHR_HG1708_PATCH 3
1121 CHR_HG2063_PATCH 3
1122 CHR_HG2133_PATCH 3
1123 CHR_HG2247_PATCH 3
1124 CHR_HG2288_HG2289_PATCH 3
1125 CHR_HG2442_PATCH 3
1126 CHR_HG2509_PATCH 3
1127 CHR_HG2510_PATCH 3
1128 CHR_HG2511_PATCH 3
1129 CHR_HG699_PATCH 3
1130 CHR_HG705_PATCH 3
1131 CHR_HG721_PATCH 3
1132 CHR_HSCHR11_2_CTG1 3
1133 CHR_HSCHR12_2_CTG2_1 3
1134 CHR_HSCHR12_4_CTG2_1 3
1135 CHR_HSCHR12_5_CTG2_1 3
1136 CHR_HSCHR12_6_CTG2_1 3
1137 CHR_HSCHR13_1_CTG5 3
1138 CHR_HSCHR14_2_CTG1 3
1139 CHR_HSCHR15_2_CTG8 3
1140 CHR_HSCHR16_1_CTG3_1 3
1141 CHR_HSCHR17_3_CTG4 3
1142 CHR_HSCHR18_2_CTG2_1 3
1143 CHR_HSCHR19_1_CTG3_1 3
1144 CHR_HSCHR19_2_CTG3_1 3
1145 CHR_HSCHR1_1_CTG11 3
1146 CHR_HSCHR20_1_CTG1 3
1147 CHR_HSCHR20_1_CTG4 3
1148 CHR_HSCHR2_1_CTG5 3
1149 CHR_HSCHR2_2_CTG1 3
1150 CHR_HSCHR2_4_CTG1 3
1151 CHR_HSCHR3_1_CTG2_1 3
1152 CHR_HSCHR3_2_CTG2_1 3
1153 CHR_HSCHR3_4_CTG1 3
1154 CHR_HSCHR3_8_CTG2_1 3
1155 CHR_HSCHR4_1_CTG12 3
1156 CHR_HSCHR4_5_CTG12 3
1157 CHR_HSCHR5_4_CTG1_1 3
1158 CHR_HSCHR7_3_CTG1 3
1159 CHR_HSCHR8_2_CTG1 3
1160 CHR_HSCHR9_1_CTG2 3
1161 Yq12 3
1162 15q22.32 2
1163 6q22.2 2
1164 8q21.12 2
1165 CHR_HG1384_PATCH 2
1166 CHR_HG2022_PATCH 2
1167 CHR_HG2047_PATCH 2
1168 CHR_HG2067_PATCH 2
1169 CHR_HG2088_PATCH 2
1170 CHR_HG2116_PATCH 2
1171 CHR_HG2128_PATCH 2
1172 CHR_HG2233_PATCH 2
1173 CHR_HG2249_PATCH 2
1174 CHR_HG2512_PATCH 2
1175 CHR_HSCHR17_8_CTG4 2
1176 CHR_HSCHR17_9_CTG4 2
1177 CHR_HSCHR18_1_CTG1 2
1178 CHR_HSCHR18_1_CTG2_1 2
1179 CHR_HSCHR18_5_CTG1_1 2
1180 CHR_HSCHR18_ALT21_CTG2_1 2
1181 CHR_HSCHR1_9_CTG3 2
1182 CHR_HSCHR20_1_CTG2 2
1183 CHR_HSCHR21_8_CTG1_1 2
1184 CHR_HSCHR2_7_CTG7_2 2
1185 CHR_HSCHR3_5_CTG2_1 2
1186 CHR_HSCHR4_8_CTG12 2
1187 CHR_HSCHR5_1_CTG5 2
1188 CHR_HSCHR6_1_CTG7 2
1189 CHR_HSCHR6_8_CTG1 2
1190 CHR_HSCHR8_1_CTG7 2
1191 CHR_HSCHR8_5_CTG1 2
1192 CHR_HSCHR8_6_CTG1 2
1193 CHR_HSCHR8_7_CTG7 2
1194 CHR_HSCHR9_1_CTG4 2
1195 CHR_HSCHR9_1_CTG6 2
1196 GL000194.1 2
1197 GL000195.1 2
1198 GL000213.1 2
1199 KI270442.1 2
1200 KI270726.1 2
1201 KI270731.1 2
1202 12q11 1
1203 14p11.2 1
1204 6q11.2 1
1205 8q11.22 1
1206 CHR_HG1531_PATCH 1
1207 CHR_HG1651_PATCH 1
1208 CHR_HG2062_PATCH 1
1209 CHR_HG2239_PATCH 1
1210 CHR_HSCHR10_1_CTG6 1
1211 CHR_HSCHR12_5_CTG2 1
1212 CHR_HSCHR12_8_CTG2_1 1
1213 CHR_HSCHR14_9_CTG1 1
1214 CHR_HSCHR17_11_CTG4 1
1215 CHR_HSCHR17_12_CTG4 1
1216 CHR_HSCHR17_2_CTG4 1
1217 CHR_HSCHR18_2_CTG1_1 1
1218 CHR_HSCHR19_4_CTG2 1
1219 CHR_HSCHR1_1_CTG32_1 1
1220 CHR_HSCHR1_3_CTG3 1
1221 CHR_HSCHR2_2_CTG7_2 1
1222 CHR_HSCHR2_3_CTG1 1
1223 CHR_HSCHR3_3_CTG1 1
1224 CHR_HSCHR4_1_CTG6 1
1225 CHR_HSCHR4_2_CTG12 1
1226 CHR_HSCHR4_2_CTG4 1
1227 CHR_HSCHR4_4_CTG12 1
1228 CHR_HSCHR5_1_CTG1 1
1229 CHR_HSCHR5_7_CTG1 1
1230 CHR_HSCHR6_1_CTG3 1
1231 CHR_HSCHR6_1_CTG6 1
1232 CHR_HSCHR7_1_CTG7 1
1233 CHR_HSCHR7_2_CTG7 1
1234 CHR_HSCHR9_1_CTG1 1
1235 CHR_HSCHRX_2_CTG12 1
1236 GL000009.2 1
1237 GL000205.2 1
1238 GL000216.2 1
1239 GL000218.1 1
1240 GL000219.1 1
1241 GL000225.1 1
1242 KI270711.1 1
1243 KI270744.1 1
1244 KI270750.1 1
cytoband n
Length:1244 Min. : 1.00
Class :character 1st Qu.: 10.00
Mode :character Median : 39.00
Mean : 55.71
3rd Qu.: 78.00
Max. :457.00
---
title: How to annotate a gene with the cytogenetic band
author: Haky Im
date: '2021-07-28'
slug: how-to-get-the-cytogenetic-band-of-a-gene
categories:
- how to
tags: []
---
Adding the cytogenetic to genes is convenient because it provides a somewhat memorable names for the genomic region where the gene is located. Biomart package in bioconductor has the database and function for the annnotation.
```{r}
suppressMessages(library(tidyverse))
suppressMessages(library(glue))
suppressMessages(library(RSQLite))
```
add cytogenetic band to genes
```{r}
# install biomaRt if not installed
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("biomaRt")
## YOU MAY WANT TO RESTART R/RSTUDIO AFTER INSTAALLING BIOCMANAGER
library(biomaRt)
#ensembl <- useMart(biomart="ENSEMBL_MART_ENSEMBL",dataset="hsapiens_gene_ensembl")
ensembl <- useEnsembl(biomart = "ensembl",
dataset = "hsapiens_gene_ensembl",
mirror = "useast")
```
::: {.callout-note}
mirror = "useast" was necessary to make quarto render work; interactive runs seems to be able to be redirected to the useast mirror automatically but not quarto render
:::
```{r}
## get the gene annotation with cytoband from biomart
anno_gene <- getBM(attributes =c("ensembl_gene_id","external_gene_name","chromosome_name","start_position","end_position","band", "gene_biotype"),mart=ensembl)
## define function to add cytoband to gene
addband2gene = function(df,geneid = "ensembl_gene_id")
{
if(!exists("anno_gene"))
{
ensembl <- useMart(biomart="ENSEMBL_MART_ENSEMBL",dataset="hsapiens_gene_ensembl")
anno_gene <<- getBM(attributes = c("ensembl_gene_id","external_gene_name","chromosome_name","start_position","end_position","band", "gene_biotype"),mart=ensembl )
print("defined anno_gene")
}
if(geneid != "ensembl_gene_id") names(df)[names(df)==geneid] = "ensembl_gene_id" ## this is an ugly workaround - need to find a way to use rename_ for this but don't know how to specify a string instead of name in rename(geneid = "ensembl_gene_id")
df = df %>% left_join(anno_gene, by=c( "ensembl_gene_id" ))
if(geneid != "ensembl_gene_id") names(df)[names(df)=="ensembl_gene_id"] = geneid
df %>% mutate(cytoband = paste0(chromosome_name,band))
}
anno_gene <- anno_gene %>% mutate(cytoband = paste0(chromosome_name,band))
```
To get start and end of the cytegenetic bands
```{r}
## Download cytoband table from http://genome.ucsc.edu/cgi-bin/hgTables
## 1. Go to the UCSC Genome browser: https://genome.ucsc.edu/index.html
## 2. Mouse over "Tools" and select "Table Browser"
## 3. In the table browser window, set the following parameters: clade = mammal, genome = human, assembly = hg38 (or other), group = Mapping and Sequencing, track = Chromosome Band, table = cytoBand, position = chr1 (or whatever chromosome you are interested in), output format = all fields from selected table, file type returned = plain text
## 4. Click [get output]
## The results will show the following 5 columns: chromosome number, cytoband start position, end position, cytoband name, and staining result.
## Haynes, Karmella. (2018). Re: How can i gen the length in mb from a cytoband?. Retrieved from: https://www.researchgate.net/post/How-can-i-gen-the-length-in-mb-from-a-cytoband/5b2147a0565fba5e2820b3de/citation/download.
#
WEBDATA="/Users/haekyungim/Library/CloudStorage/Box-Box/LargeFiles/imlab-data/data-Github/web-data"
cytoband_hg38 <- read_table(glue("{WEBDATA}/2021-07-28-how-to-get-the-cytogenetic-band-of-a-gene/hgTables-cytoband-positions-hg38.txt"),comment = "#",guess_max = 10000)
#
cytoband_hg37 <- read_table(glue("{WEBDATA}/2021-07-28-how-to-get-the-cytogenetic-band-of-a-gene/hgTables-cytoband-positions-hg37.txt"),comment = "#",guess_max = 10000)
# What's the distribution of the weights
cytoband_hg38 %>% filter(chrom %in% paste0("chr",c(1:22)) )%>% mutate(bandw = chromEnd - chromStart) %>% .[["bandw"]] %>% summary()/1e6
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.100 2.000 3.200 3.545 4.600 18.100
cytoband_hg37 %>% filter(chrom %in% paste0("chr",1:2) ) %>% mutate(bandw = chromEnd - chromStart) %>% .[["bandw"]] %>% summary()/1e6
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.100 2.100 3.300 3.552 4.600 15.200
cytoband_hg37 %>% mutate(bandw = chromEnd - chromStart) %>% ggplot(aes(bandw,fill=chrom)) + geom_density(alpha=0.5)
# How many cytogenetic bands are there in the autosomes (non sex chromosomes nor mitochondria)
dim(cytoband_hg38)
dim(cytoband_hg37)
## there are many more cytobands in hg38 because there are many patches added after the official release of hg19
```
```{r}
## number of genes per cytoband
anno_gene %>% count(cytoband) %>% arrange(desc(n))
anno_gene %>% count(cytoband) %>% arrange(desc(n)) %>% summary()
```