How to extract promoters positions?

gene_x 0 like s 398 view s

Tags: R

#https://charlesjb.github.io/How_to_extract_promoters_positions/  
setwd("/media/jhuang/Elements1/Data_Denise_LT_DNA_Bindung")

# Load required libraries
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(GenomicFeatures)
library(rtracklayer)
library(GenomicRanges)
library(org.Hs.eg.db)

# Load TxDb object
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene

# Get promoters and genes
promoters_txdb <- promoters(genes(txdb))
genes_txdb <- genes(txdb)

# Find overlaps between promoters and genes
overlaps <- findOverlaps(promoters_txdb, genes_txdb)

# Add gene names, scores, and strands to promoters
promoters_with_gene_names <- promoters_txdb[queryHits(overlaps)]
gene_ids <- mcols(genes_txdb[subjectHits(overlaps)])$gene_id
gene_symbols <- mapIds(org.Hs.eg.db, keys = gene_ids, column = "SYMBOL", keytype = "ENTREZID", multiVals = "first")
mcols(promoters_with_gene_names)$gene_name <- gene_symbols
mcols(promoters_with_gene_names)$score <- 0

# Remove duplicate entries
promoters_with_gene_names <- unique(promoters_with_gene_names)

# Custom function to export the data in the desired BED format
export_bed_with_extra_columns <- function(gr, file) {
  bed_data <- data.frame(chrom = as.character(seqnames(gr)),
                         start = pmax(start(gr) - 1, 0), # Ensure start is never negative
                         end = end(gr),
                         gene_name = mcols(gr)$gene_name,
                         score = mcols(gr)$score,
                         strand = as.character(strand(gr)),
                         stringsAsFactors = FALSE)
  write.table(bed_data, file, sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE)
}

# Save promoters with gene names, scores, and strands as a BED file
export_bed_with_extra_columns(promoters_with_gene_names, "promoters_with_gene_names.bed")

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum