exon <- gtf[
gtf$type == "exon",
c("start", "end", "gene_name")
]
exon_by_gene_name <- split(exon, exon$gene_name)
cl <- parallel::makeCluster(0.5 * parallel::detectCores())
gene_length <-
parallel::parLapply(
cl = cl,
X = exon_by_gene_name,
fun = function(x) {
tmp <-
apply(x, 1, function(y) {
y[1]:y[2]
})
length(unique(unlist(tmp)))
}
)
gene_length <- data.frame(
gene_name = names(gene_length),
length = as.numeric(gene_length)
)
write.table(
x = gene_length,
file = "data/gencode.vM25.basic.gene_length.tsv",
sep = "\t",
row.names = FALSE,
quote = FALSE
)
gene_length <-
read.table(
file = "data/gencode.vM25.basic.gene_length.tsv",
header = TRUE,
row.names = 1
)