gene_x 0 like s 398 view s
Tags: R
#https://charlesjb.github.io/How_to_extract_promoters_positions/
setwd("/media/jhuang/Elements1/Data_Denise_LT_DNA_Bindung")
# Load required libraries
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(GenomicFeatures)
library(rtracklayer)
library(GenomicRanges)
library(org.Hs.eg.db)
# Load TxDb object
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
# Get promoters and genes
promoters_txdb <- promoters(genes(txdb))
genes_txdb <- genes(txdb)
# Find overlaps between promoters and genes
overlaps <- findOverlaps(promoters_txdb, genes_txdb)
# Add gene names, scores, and strands to promoters
promoters_with_gene_names <- promoters_txdb[queryHits(overlaps)]
gene_ids <- mcols(genes_txdb[subjectHits(overlaps)])$gene_id
gene_symbols <- mapIds(org.Hs.eg.db, keys = gene_ids, column = "SYMBOL", keytype = "ENTREZID", multiVals = "first")
mcols(promoters_with_gene_names)$gene_name <- gene_symbols
mcols(promoters_with_gene_names)$score <- 0
# Remove duplicate entries
promoters_with_gene_names <- unique(promoters_with_gene_names)
# Custom function to export the data in the desired BED format
export_bed_with_extra_columns <- function(gr, file) {
bed_data <- data.frame(chrom = as.character(seqnames(gr)),
start = pmax(start(gr) - 1, 0), # Ensure start is never negative
end = end(gr),
gene_name = mcols(gr)$gene_name,
score = mcols(gr)$score,
strand = as.character(strand(gr)),
stringsAsFactors = FALSE)
write.table(bed_data, file, sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE)
}
# Save promoters with gene names, scores, and strands as a BED file
export_bed_with_extra_columns(promoters_with_gene_names, "promoters_with_gene_names.bed")
点赞本文的读者
还没有人对此文章表态
没有评论
Phyloseq for GPA vs RA vs control
QIIME + Phyloseq + MicrobiotaProcess (v2)
© 2023 XGenes.com Impressum