gene_x 0 like s 469 view s
Tags: R
#-- merge the hg38 counts of 38 samples and the hg38 counts of LT_K331A --
# Load necessary libraries
library(dplyr)
# Read the first table
first_table <- read.csv("merged_gene_counts.csv", stringsAsFactors = FALSE)
# Read the second table
second_table <- read.csv("~/DATA/Data_Denise_LT_K331A_RNASeq/results_GRCh38/star_salmon/gene_name_gene_counts.csv", sep = ',', stringsAsFactors = FALSE)
# Extract the relevant columns from the second table
second_table_filtered <- second_table %>%
select(gene_name, LT_K331A_DI, LT_K331A_DII)
# Summarise the second table by gene_name, summing the LT_K331A_DI and LT_K331A_DII columns for duplicate gene names
second_table_summarised <- second_table_filtered %>%
group_by(gene_name) %>%
summarise(LT_K331A_DI = sum(LT_K331A_DI, na.rm = TRUE), LT_K331A_DII = sum(LT_K331A_DII, na.rm = TRUE))
# Merge the tables by matching the 'gene_name' from the second table to the gene_name of the first table
merged_table <- left_join(first_table, second_table_summarised, by = "gene_name")
# Replace NA values with 0
merged_table[is.na(merged_table)] <- 0
# View the result
print(head(merged_table))
# Optionally, write the result to a new file
write.csv(merged_table, "gene_counts_hg38_30samples.csv", row.names = TRUE)
#-- cbind the hg38 and virus counts --
# Load necessary libraries
library(dplyr)
# Read the first table
gene_counts_hg38_30samples <- read.csv("gene_counts_hg38_30samples.csv", stringsAsFactors = FALSE, row.names=1)
# Read the second table
salmon_merged_gene_counts <- read.csv("~/DATA/Data_Denise_LT_K331A_RNASeq/results_JN707599/star_salmon/salmon.merged.gene_counts.csv", sep = ',', stringsAsFactors = FALSE)
# Rename the columns in the second table as specified
names(salmon_merged_gene_counts)[names(salmon_merged_gene_counts) == "LT_K331A_d8_DonorI"] <- "LT_K331A_DI"
names(salmon_merged_gene_counts)[names(salmon_merged_gene_counts) == "LT.K331A.d8.DII_re"] <- "LT_K331A_DII"
# Determine which columns are in both tables
common_columns <- intersect(names(gene_counts_hg38_30samples), names(salmon_merged_gene_counts))
# Select only the common columns from the second table (ignoring the extra 5 columns)
salmon_merged_gene_counts <- salmon_merged_gene_counts[, common_columns]
# Sort the columns of the second table to match the first table
# Ensuring that 'gene_name' remains the first column
cols_order <- c("gene_name", setdiff(names(gene_counts_hg38_30samples), "gene_name"))
salmon_merged_gene_counts <- salmon_merged_gene_counts[, cols_order]
# Merge the tables by pasting together
# Since the columns are now aligned, this should paste them side by side
merged_table <- cbind(gene_counts_hg38_30samples, salmon_merged_gene_counts[,-1]) # -1 to not duplicate gene_name
# Replace NA values with 0 in the entire table
merged_table[is.na(merged_table)] <- 0
# View the result
print(head(merged_table))
# Optionally, write the result to a new file
write.csv(merged_table, "updated_gene_counts_hg38_30samples.csv", row.names = FALSE)
点赞本文的读者
还没有人对此文章表态
没有评论
Phyloseq for GPA vs RA vs control
QIIME + Phyloseq + MicrobiotaProcess (v2)
© 2023 XGenes.com Impressum