library(edgeR) library(tximeta) library(Glimma) library(broman) library(seqinr) library(dplyr) library(tidyr) library(EnhancedVolcano) library(statmod) library(scales) setwd("/Users/hanc/Documents/Clair/aphid_paper1/Hvir_plant_RNAseq/red_vs_green_2020Sep/") files <- dir(path="RedVsGreen/counts",pattern="*") y <- readDGE(files,path="RedVsGreen/counts", columns=c(1,2),head=FALSE) y <- y[-grep("_",row.names(y$counts)),] #remove empty libraries y=y[,colnames(y)!="Sample_20JRC114G"] y$samples$group = factor(y$samples$group,levels = c("Red","Green")) y$samples$group[grep(pattern = "R.counts",x = y$samples$files)] = "Red" y$samples$group[grep(pattern = "G.counts",x = y$samples$files)] = "Green" #Reduce transcripts to genes (no need to run this because htseq output already in genes, doesn't make a difference ran or not) tx2gene <- data.frame(transcript = row.names(y),gene = sub("\\..*","",row.names(y))) y_temp <- y y_temp$abundance <- y$counts y_temp$length <- y$counts y_temp<-summarizeToGene(y_temp,tx2gene) y$counts <- y_temp$counts rm(y_temp) #QC - eliminate low count genes cpm <- cpm(y) lcpm <- cpm(y,log=TRUE) numsamples <-length(row.names(y$samples)) table(rowSums(y$counts==0) == numsamples) #Only interested in genes that are expressed #Therefore, exclude any genes that are not expressed at cpm > 0.5 #in at least 15 samples keep.exprs <- rowSums(cpm>0.5)>=15 y <- y[keep.exprs,, keep.lib.sizes=FALSE] length(keep.exprs[keep.exprs==TRUE]) #23145 #normalize samples y <- calcNormFactors(y) glMDSPlot(y,groups=y$samples$group,labels=row.names(y$samples)) #remove outlier libraries y=y[,colnames(y)!="Sample_20JRC1084G"] y <- calcNormFactors(y) #DE of red vs green ysub <- y ygall <- ysub[,c(which(ysub$samples$group=='Red'),which(ysub$samples$group=='Green'))] group <- factor(ygall$samples$group) leaf_list = read.table("RedGreenGallID.May2020.txt", stringsAsFactors = FALSE, header = TRUE) leaf = vector(,dim(ygall$samples)[1]) for (i in 1:length(leaf)){ #every row of RNAseq for (j in 1:dim(leaf_list)[1]){#every row of lookup table if (sub("Sample_","",row.names(ygall$samples)[i]) == leaf_list$Sample_ID[j]){ leaf[i]=leaf_list$Leaf[j] } } } leaf <- factor(leaf) ygall$samples = cbind(ygall$samples,leaf) #average library size mean(ygall$samples$lib.size) #4711102 #MDS plot glMDSPlot(ygall,groups=ygall$samples$group,labels=row.names(ygall$samples)) glMDSPlot(ygall,groups=ygall$samples$leaf,labels=row.names(ygall$samples)) design <- model.matrix(~ leaf + group) ygall <- estimateDisp(ygall,design) fit <- glmQLFit(ygall, design) qlf_gall <- glmQLFTest(fit) qlf_gall$table$logFC = -qlf_gall$table$logFC #want red - green rather than green - red qlf_gall_adj = qlf_gall$table qlf_gall_adj[,4]=p.adjust(qlf_gall$table[,4], method = 'fdr') #p=0.01 dt_gall <-decideTests(qlf_gall, p.value = 0.01) summary(dt_gall) #groupGreen #Down 0 #NotSig 23139 #Up 6 #p=0.05 dt_gall <-decideTests(qlf_gall, p.value = 0.05) summary(dt_gall) #groupGreen #Down 0 #NotSig 23137 #Up 8 DEUP_red <- row.names(dt_gall)[which(dt_gall ==1)] DEDOWN_red <- row.names(dt_gall)[which(dt_gall ==-1)] plotMD(qlf_gall) ###volcano### DEredgrSG <- qlf_gall$table DEredgrSG$qValue <- p.adjust(DEredgrSG$PValue, method = "fdr") orderedDEredgrSG<-DEredgrSG[order(DEredgrSG$qValue),] P05cutoff<-orderedDEredgrSG[orderedDEredgrSG$qValue<0.05,][dim(orderedDEredgrSG[orderedDEredgrSG$qValue<0.05,])[1],] P01cutoff<-orderedDEredgrSG[orderedDEredgrSG$qValue<0.01,][dim(orderedDEredgrSG[orderedDEredgrSG$qValue<0.01,])[1],] p <- P05cutoff$PValue keyvals <- rep('grey75', nrow(DEredgrSG)) names(keyvals) <- rep('NS', nrow(DEredgrSG)) keyvals[which(DEredgrSG$PValue <= p & DEredgrSG$logFC > 0)] <- 'blue' names(keyvals)[which(DEredgrSG$PValue <= p & DEredgrSG$logFC > 0)] <- 'Signif. down-regulated' keyvals[which(DEredgrSG$PValue <= p & DEredgrSG$logFC < 0)] <- 'red' names(keyvals)[which(DEredgrSG$PValue <= p & DEredgrSG$logFC < 0)] <- 'Signif. up-regulated' unique(keyvals) DEredgrSG[rownames(DEredgrSG) %in% c("g134919","g14194","g22774","g23591","g7147","g109682", "g97071"),] pdf("VP_RedvsGr_Galls_p0.05_Sep2020_wUTR.pdf") p1 <- EnhancedVolcano(toptable = DEredgrSG, lab = NA, x = "logFC", y = "PValue", title = NULL, subtitle = NULL, caption = NULL, colAlpha = 0.65, colCustom = keyvals, pCutoff = p, FCcutoff = FALSE, cutoffLineType = "blank", hline = c(p,P01cutoff$PValue), hlineCol = "black", hlineType = "longdash", hlineWidth = 0.5, pointSize = c(ifelse(DEredgrSG$qValue < 0.05, 3, 1)), ylim = c(0, 20), xlim = c(-10,10), xlab = expression('Gall (Red - Green) Fold Change (log2)'), col = c("black", "black","royalblue", "red2") ) p1 + theme(legend.position = 'none', axis.text.x = element_text(angle = 0, size = 20, vjust= 1), axis.text.y = element_text(angle = 0, size = 20, vjust= 0.5), axis.title.x = element_text(angle = 0,size = 20,vjust = 1), axis.title.y = element_text(angle = 90,size = 20,vjust = 0) ) + geom_text(data=DEredgrSG %>% filter(row.names(DEredgrSG) %in% c("g23591", "g109682", "g134919","g14194","g7147", "g22774", "g97071")), aes(x = logFC, y = -log10(PValue), label = c("GSTF12", "GSTF11", "UGT75C1", "UFGT", "FAOMT-1","FAOMT-2", "ACCA")), nudge_x = c(2.2, 2.2, 2.5, -1.6, 2.4, 2.4, 1.7), nudge_y=c(0,0,0,0.6,0.1,0,0), size = 5.5, colour = "darkblue", fontface = "plain") + annotate("text", label =c("FDR = 0.05", "FDR = 0.01"), x = c(-8.5,-8.5), y = c(4.2,7.3), size = 5, colour = "black") dev.off() ############################################################################## #plot PCA counts = cpm(ysub) PCA = prcomp(t(log(counts+1)), retx = TRUE, center = TRUE) rownames(PCA$x)==colnames(ygall$counts) #false, so need to re-order later tab <- data.frame(sample.id = rownames(PCA$x), pop = factor(ygall$samples$group)[match(rownames(PCA$x), colnames(ygall$counts))], EV1 = PCA$x[,1], # the first eigenvector EV2 = PCA$x[,2], # the second eigenvector stringsAsFactors = FALSE) pdf("PCA_RedvsGr_Galls_p0.05_Sep2020_wUTR.pdf", width=5, height=5) ggplot(tab, aes(x=EV1, y=EV2, col=pop)) + geom_point(size=2.2) + theme_classic() + ylab("PC2") + xlab("PC1") + scale_color_manual(values=c('#D55E00','#009E73')) + theme(legend.position = "none", axis.text=element_blank(), axis.title=element_text(size=18)) dev.off() #plot percent variance explained pc.percent <- PCA$sdev^2/sum(PCA$sdev^2,na.rm = TRUE)*100 #need to divide by the total because the raw numbers don't add to 1. fixed on 8.25.20 pc.var.plot = head(round(pc.percent, 2),10) pc.var.plot = cbind(as.character(1:10),pc.var.plot) pc.var.plot = as.data.frame(pc.var.plot) colnames(pc.var.plot) = c("PC","perc") pc.var.plot = transform(pc.var.plot, perc= as.numeric(as.character(perc))) pdf("PerVarExplained_RedvsGr_Galls_p0.05_Sep2020_wUTR.pdf", width=5, height=5) ggplot(data = pc.var.plot, aes(x=reorder(PC, -perc), y=perc)) + geom_bar(stat="identity", fill="gray80",color="black") + labs(x = "PC", y = "Percent variance explained") + theme_classic() + ylim(0,30) + theme(axis.text.x = element_text(size = 16), axis.text.y = element_text(size = 16), axis.title = element_text(size = 18)) dev.off()