#plot the counts of red, green, and leaf. normalized library size across red gall, green gall, and leaf samples. library(edgeR) library(tximeta) library(Glimma) library(broman) library(seqinr) library(dplyr) library(tidyr) library(EnhancedVolcano) library(statmod) library(scales) files <- dir(path="RedVsGreen/counts",pattern="*") y <- readDGE(files,path="RedVsGreen/counts", columns=c(1,2),head=FALSE) y_color <- y[-grep("_",row.names(y$counts)),] files <- dir(path="GallVsLeaf/counts",pattern="*") y <- readDGE(files,path="GallVsLeaf/counts", columns=c(1,2),head=FALSE) y_leaf <- y[-grep("_",row.names(y$counts)),] y=cbind(y_color,y_leaf) #remove empty libraries y=y[,colnames(y)!="Sample_20JRC114G"] y$samples$group <- factor(y$samples$group,levels = c("Gall","Leaf","Red gall","Green gall")) y$samples$group[grep(pattern = "R.counts",x = y$samples$files)] = "Red gall" y$samples$group[grep(pattern = "G.counts",x = y$samples$files)] = "Green gall" y$samples$group[grep(pattern = "gall",x = row.names(y$samples))] <- "Gall" y$samples$group[grep(pattern = "leafaround",x = row.names(y$samples))] <- "Leaf" #remove gall samples y=y[,y$samples$group!="Gall"] #Reduce transcripts to genes (not necessary to run because htseq counts already in genes) tx2gene <- data.frame(transcript = row.names(y),gene = sub("\\..*","",row.names(y))) y_temp <- y y_temp$abundance <- y$counts y_temp$length <- y$counts y_temp<-summarizeToGene(y_temp,tx2gene) y$counts <- y_temp$counts rm(y_temp) #QC - eliminate low count genes cpm <- cpm(y) lcpm <- cpm(y,log=TRUE) numsamples <-length(row.names(y$samples)) table(rowSums(y$counts==0) == numsamples) #Only interested in genes that are expressed #Therefore, exclude any genes that are not expressed at cpm > 0.5 #in at least 15 samples keep.exprs <- rowSums(cpm>0.5)>=15 y <- y[keep.exprs,, keep.lib.sizes=FALSE] length(keep.exprs[keep.exprs==TRUE]) #25586 #normalize samples y <- calcNormFactors(y) #remove outlier libraries y=y[,colnames(y)!="Sample_20JRC1084G"] y <- calcNormFactors(y) ysub <- y de = c("g23591", "g109682", "g134919","g14194","g7147", "g22774", "g97071") #plot the counts for the DE genes counts=t(subset(cpm(ysub),rownames(ysub$counts) %in% de)) type=rep("Green gall",dim(counts)[1]) type[grep(pattern = "R$",x = row.names(counts))]="Red gall" type[grep(pattern = "leafaroundgall",x = row.names(counts))]="Leaf" counts=cbind(counts,type) counts = as.data.frame(counts) down_selected = c() counts_for_plot = counts %>% gather(Gene, Counts, g109682:g97071) counts_for_plot$type <- factor(counts_for_plot$type , levels=c("Leaf","Green gall","Red gall")) counts_for_plot$Gene = gsub("g109682", "GSTF12", counts_for_plot$Gene) counts_for_plot$Gene = gsub("g23591", "FAOMT-1", counts_for_plot$Gene) counts_for_plot$Gene = gsub("g134919", "GSTF11", counts_for_plot$Gene) counts_for_plot$Gene = gsub("g14194", "UGT75C1", counts_for_plot$Gene) counts_for_plot$Gene = gsub("g7147", "FAOMT-2", counts_for_plot$Gene) counts_for_plot$Gene = gsub("g22774", "UFGT", counts_for_plot$Gene) counts_for_plot$Gene = gsub("g97071", "ACCA", counts_for_plot$Gene) #counts_for_plot$Gene <- factor(counts_for_plot$Gene , levels=c("FAOMT-1","GSTF11","UGT75C1","FAOMT-2","UFGT")) #v1 counts_for_plot$Gene <- factor(counts_for_plot$Gene , levels=c("FAOMT-1","FAOMT-2","GSTF11","GSTF12","UGT75C1","UFGT","ACCA")) #v2 pdf("Hvir_Galls_0.01DE_UpInRed_Counts_Sep2020_wUTR_v2.pdf") ggplot(counts_for_plot, aes(x = Gene, y = as.numeric(as.character(Counts))+1, fill = type)) + geom_boxplot(outlier.size = 0, outlier.shape=NA) + geom_point(pch = 21, size=1.5, color="black", position = position_jitterdodge(jitter.width = 0.4), aes(colour=type)) + scale_y_log10(breaks = c(1,11,101,1001), labels = c(0,10,100,1000), limits = c(1,1.1e3), name="Counts Per Million Reads") + theme_classic() + theme(axis.text.x=element_text(size=18, angle = 45, hjust = 1), axis.text.y=element_text(size=18, vjust = 0.5), axis.title.x=element_blank(), axis.title.y=element_text(size=22), legend.position = "top", legend.title=element_blank(), legend.text = element_text(size = 20)) + scale_colour_manual(values=c("#56B4E9","#009E73", "#D55E00")) + scale_fill_manual(values=c("#56B4E9","#009E73", "#D55E00")) dev.off()