library(ggplot2) library(dplyr) #plot percentage of discordant reads in red1, the percentage of discordant reads in green, #and the difference between the percentage of discordant reads in red1 - the percentage of discordant reads in green #percentage of discordant reads is defined as bam -F 1286 / all bam reads red_disc=read.table("unmapped_red1-merged-rmdup-1Mbp_position.txt", header=FALSE, stringsAsFactors = FALSE) green_disc=read.table("unmapped_green-merged-rmdup-1Mbp_position.txt", header=FALSE, stringsAsFactors = FALSE) red_total=read.table("red1-merged-rmdup-1Mbp_position.txt", header=FALSE, stringsAsFactors = FALSE) green_total=read.table("green-merged-rmdup-1Mbp_position.txt", header=FALSE, stringsAsFactors = FALSE) windows=5000 red_disc_hist=hist(red_disc$V2, breaks=seq(40000000-windows/2,41000000+windows/2,windows)) green_disc_hist=hist(green_disc$V2, breaks=seq(40000000-windows/2,41000000+windows/2,windows)) red_total_hist=hist(red_total$V2, breaks=seq(40000000-windows/2,41000000+windows/2,windows)) green_total_hist=hist(green_total$V2, breaks=seq(40000000-windows/2,41000000+windows/2,windows)) plot_hist = cbind(red_disc_hist$mids, red_disc_hist$count/red_total_hist$count) colnames(plot_hist) = c("position","diff") plot_hist = as_tibble(plot_hist) which(green_disc_hist$count==0) plot_hist$diff[196]=0 plot_hist$diff[201]=0 plot_hist$color=ifelse(plot_hist$diff>0, "red", "green") start = -0.3 end = -0.6 pdf("Hcor_resequencing_discordant_alignment_red1_July2020.pdf", width=10, height=2) ggplot(plot_hist, aes(y=diff*100, x=position/1000000, fill=color)) + geom_bar(stat="identity") + ylim(-0.6,4) + scale_fill_manual(values = c("#009E73","#D55E00")) + xlim(40092625/1000000,40892915/1000000) + theme_classic() + theme(legend.position = "none", axis.text = element_text(size = 14), axis.title = element_text(size = 14)) + geom_segment(aes(x = 40480212/1000000, y = start, xend = 40480212/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40484391/1000000, y = start, xend = 40484391/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40487389/1000000, y = start, xend = 40487389/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492286/1000000, y = start, xend = 40492286/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492727/1000000, y = start, xend = 40492727/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492749/1000000, y = start, xend = 40492749/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492772/1000000, y = start, xend = 40492772/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492786/1000000, y = start, xend = 40492786/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492787/1000000, y = start, xend = 40492787/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40508398/1000000, y = start, xend = 40508398/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40508399/1000000, y = start, xend = 40508399/1000000, yend = end, colour = "red"), data=plot_hist) + labs(x ="Chromosome 1 (Mbp)", y = "Discordant Reads \nin Red (%)") + geom_vline(xintercept = 40450000/1000000, linetype="dashed", color = "#0072B2", size=1) + geom_vline(xintercept = 40550000/1000000, linetype="dashed", color = "#0072B2", size=1) dev.off() plot_hist = cbind(green_disc_hist$mids, green_disc_hist$count/green_total_hist$count) colnames(plot_hist) = c("position","diff") plot_hist = as_tibble(plot_hist) which(green_disc_hist$count==0) plot_hist$diff[196]=0 plot_hist$diff[201]=0 plot_hist$color=ifelse(plot_hist$diff>0, "green", "red") start = -0.3 end = -0.6 pdf("Hcor_resequencing_discordant_alignment_green_July2020.pdf", width=10, height=2) ggplot(plot_hist, aes(y=diff*100, x=position/1000000, fill=color)) + geom_bar(stat="identity") + ylim(-0.6,4) + scale_fill_manual(values = c("#009E73","#D55E00")) + xlim(40092625/1000000,40892915/1000000) + theme_classic() + theme(legend.position = "none", axis.text = element_text(size = 14), axis.title = element_text(size = 14)) + geom_segment(aes(x = 40480212/1000000, y = start, xend = 40480212/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40484391/1000000, y = start, xend = 40484391/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40487389/1000000, y = start, xend = 40487389/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492286/1000000, y = start, xend = 40492286/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492727/1000000, y = start, xend = 40492727/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492749/1000000, y = start, xend = 40492749/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492772/1000000, y = start, xend = 40492772/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492786/1000000, y = start, xend = 40492786/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492787/1000000, y = start, xend = 40492787/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40508398/1000000, y = start, xend = 40508398/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40508399/1000000, y = start, xend = 40508399/1000000, yend = end, colour = "red"), data=plot_hist) + labs(x ="Chromosome 1 (Mbp)", y = "Discordant Reads \nin Green (%) ") + geom_vline(xintercept = 40450000/1000000, linetype="dashed", color = "#0072B2", size=1) + geom_vline(xintercept = 40550000/1000000, linetype="dashed", color = "#0072B2", size=1) dev.off() ###################### diff = red_disc_hist$count/red_total_hist$count - green_disc_hist$count/green_total_hist$count plot_hist = cbind(red_disc_hist$mids, diff) colnames(plot_hist) = c("position","diff") plot_hist = as_tibble(plot_hist) which(green_disc_hist$count==0) plot_hist$diff[196]=0 plot_hist$diff[201]=0 plot_hist$color=ifelse(plot_hist$diff>0, "red", "green") start = -0.5 end = -0.6 pdf("Hcor_resequencing_discordant_alignment_diff_July2020.pdf", width=10, height=2.5) ggplot(plot_hist, aes(y=diff*100, x=position/1000000, fill=color)) + geom_bar(stat="identity") + ylim(-0.6,0.6) + scale_fill_manual(values = c("#009E73","#D55E00")) + xlim(40092625/1000000,40892915/1000000) + theme_classic() + theme(legend.position = "none", axis.text = element_text(size = 14), axis.title = element_text(size = 14)) + geom_segment(aes(x = 40480212/1000000, y = start, xend = 40480212/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40484391/1000000, y = start, xend = 40484391/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40487389/1000000, y = start, xend = 40487389/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492286/1000000, y = start, xend = 40492286/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492727/1000000, y = start, xend = 40492727/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492749/1000000, y = start, xend = 40492749/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492772/1000000, y = start, xend = 40492772/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492786/1000000, y = start, xend = 40492786/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492787/1000000, y = start, xend = 40492787/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40508398/1000000, y = start, xend = 40508398/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40508399/1000000, y = start, xend = 40508399/1000000, yend = end, colour = "red"), data=plot_hist) + labs(x ="Chromosome 1 (Mbp)", y = "Discordant Reads (%) \n(Red - Green)") + geom_vline(xintercept = 40450000/1000000, linetype="dashed", color = "#0072B2", size=1) + geom_vline(xintercept = 40550000/1000000, linetype="dashed", color = "#0072B2", size=1) dev.off() ###################################################################################################################### #same as above, but zoomed into just the 100kbp region around the key SNPs red_disc=read.table("unmapped_red1-merged-rmdup-1Mbp_position.txt", header=FALSE, stringsAsFactors = FALSE) green_disc=read.table("unmapped_green-merged-rmdup-1Mbp_position.txt", header=FALSE, stringsAsFactors = FALSE) red_total=read.table("red1-merged-rmdup-1Mbp_position.txt", header=FALSE, stringsAsFactors = FALSE) green_total=read.table("green-merged-rmdup-1Mbp_position.txt", header=FALSE, stringsAsFactors = FALSE) #the region is the same as used for looking at where the discordant reads mapped in the next section region_s = 40450000 region_e = 40550000 red_disc = red_disc %>% filter(V2region_s) green_disc = green_disc %>% filter(V2region_s) red_total = red_total %>% filter(V2region_s) green_total = green_total %>% filter(V2region_s) windows=500 red_disc_hist=hist(red_disc$V2, breaks=seq(region_s-windows/2,region_e+windows/2,windows)) green_disc_hist=hist(green_disc$V2, breaks=seq(region_s-windows/2,region_e+windows/2,windows)) red_total_hist=hist(red_total$V2, breaks=seq(region_s-windows/2,region_e+windows/2,windows)) green_total_hist=hist(green_total$V2, breaks=seq(region_s-windows/2,region_e+windows/2,windows)) diff = red_disc_hist$count/red_total_hist$count - green_disc_hist$count/green_total_hist$count plot_hist = cbind(red_disc_hist$mids, diff) colnames(plot_hist) = c("position","diff") plot_hist = as_tibble(plot_hist) which(green_disc_hist$count==0) plot_hist$color=ifelse(plot_hist$diff>0, "red", "green") start = -2.5 end = -3 pdf("Hcor_resequencing_discordant_alignment_diff_SNPsRegion_July2020.pdf", width=10, height=2.5) ggplot(plot_hist, aes(y=diff*100, x=position/1000000, fill=color)) + geom_bar(stat="identity") + ylim(-3,3) + scale_fill_manual(values = c("#009E73","#D55E00")) + theme_classic() + theme(legend.position = "none", axis.text = element_text(size = 14), axis.title = element_text(size = 14)) + geom_segment(aes(x = 40480212/1000000, y = start, xend = 40480212/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40484391/1000000, y = start, xend = 40484391/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40487389/1000000, y = start, xend = 40487389/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492286/1000000, y = start, xend = 40492286/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492727/1000000, y = start, xend = 40492727/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492749/1000000, y = start, xend = 40492749/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492772/1000000, y = start, xend = 40492772/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492786/1000000, y = start, xend = 40492786/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40492787/1000000, y = start, xend = 40492787/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40508398/1000000, y = start, xend = 40508398/1000000, yend = end, colour = "red"), data=plot_hist) + geom_segment(aes(x = 40508399/1000000, y = start, xend = 40508399/1000000, yend = end, colour = "red"), data=plot_hist) + labs(x ="Chromosome 1 (Mbp)", y = "Discordant Reads (%) \n(Red - Green)") dev.off() ################################################################################################################### #this maps the location of all the discordant reads when they are mapped as SE to the entire genome + A. pisum TE library(scales) library(gridExtra) #percent of unmapped reads when all reads from discordant read pairs in the 11SNPs region are mapped as SE #(input based on the number of unmapped reads in "discordantReads_red1-merged-11SNPsRegion+TE_SE_position.txt" on cluster) (24616-24282)/24616 #0.01356841 red1= read.table("discordantReads_red1-merged-11SNPsRegion+TE_SE_position.txt", stringsAsFactors = FALSE, header=FALSE) Hcor_genome=c(60222264, 39701450, 38924103, 36083769, 32569495, 24543776, 23553545, 20814484, 17934322) windows=100000 ymax=1000 #chr1 i=1 chr = red1 %>% filter(V1=="ScPoQx8_4268;HRSCAF=5273") chr_hist = hist(chr$V2, breaks=seq(1,Hcor_genome[i]+windows-1,windows)) chr_hist$count = ifelse(chr_hist$count==0, NA, chr_hist$count) plot_hist = cbind(chr_hist$mids, chr_hist$count) colnames(plot_hist) = c("position","count") plot_hist = as_tibble(plot_hist) plot_hist$mask=ifelse(plot_hist$position>=40350000 & plot_hist$position<=40650000, "1", "2") p1 = ggplot(plot_hist, aes(y=count, x=position/1000000)) + scale_y_log10(limits = c(1,ymax)) + #scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x) (c(0,ymax))) + geom_point(color="black", cex=0.7) + theme_classic() + theme(legend.position = "none", axis.text = element_text(size = 12), axis.title = element_text(size = 12), axis.text.x = element_blank(), axis.ticks.x = element_blank(), ) + labs(x =i, y = "Discordant Reads \nMapping Count") #does not plot the two windows containing the 40,450,000-40,550,000 regions, because of the y-axis limit #chr2 i=2 chr = red1 %>% filter(V1=="ScPoQx8_1269;HRSCAF=1516") chr_hist = hist(chr$V2, breaks=seq(1,Hcor_genome[i]+windows-1,windows)) chr_hist$count = ifelse(chr_hist$count==0, NA, chr_hist$count) plot_hist = cbind(chr_hist$mids, chr_hist$count) colnames(plot_hist) = c("position","count") plot_hist = as_tibble(plot_hist) p2 = ggplot(plot_hist, aes(y=count, x=position/1000000)) + scale_y_log10(limits = c(1,ymax)) + #scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x) (c(0,ymax))) + geom_point(color="blue4", cex=0.7) + theme_classic() + theme(legend.position = "none", axis.title = element_text(size = 14), axis.title.y=element_blank(), axis.text=element_blank(), axis.ticks=element_blank(), axis.line.y = element_blank()) + labs(x =i) #chr3 i=3 chr = red1 %>% filter(V1=="ScPoQx8_9746;HRSCAF=11751") chr_hist = hist(chr$V2, breaks=seq(1,Hcor_genome[i]+windows-1,windows)) chr_hist$count = ifelse(chr_hist$count==0, NA, chr_hist$count) plot_hist = cbind(chr_hist$mids, chr_hist$count) colnames(plot_hist) = c("position","count") plot_hist = as_tibble(plot_hist) p3 = ggplot(plot_hist, aes(y=count, x=position/1000000)) + scale_y_log10(limits = c(1,ymax)) + #scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x) (c(0,ymax))) + geom_point(color="dodgerblue3", cex=0.7) + theme_classic() + theme(legend.position = "none", axis.title = element_text(size = 14), axis.title.y=element_blank(), axis.text=element_blank(), axis.ticks=element_blank(), axis.line.y = element_blank()) + labs(x =i) #chr4 i=4 chr = red1 %>% filter(V1=="ScPoQx8_7909;HRSCAF=9379") chr_hist = hist(chr$V2, breaks=seq(1,Hcor_genome[i]+windows-1,windows)) chr_hist$count = ifelse(chr_hist$count==0, NA, chr_hist$count) plot_hist = cbind(chr_hist$mids, chr_hist$count) colnames(plot_hist) = c("position","count") plot_hist = as_tibble(plot_hist) p4 = ggplot(plot_hist, aes(y=count, x=position/1000000)) + scale_y_log10(limits = c(1,ymax)) + #scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x) (c(0,ymax))) + geom_point(color="blue2", cex=0.7) + theme_classic() + theme(legend.position = "none", axis.title = element_text(size = 14), axis.title.y=element_blank(), axis.text=element_blank(), axis.ticks=element_blank(), axis.line.y = element_blank()) + labs(x =i) #chr5 i=5 chr = red1 %>% filter(V1=="ScPoQx8_3632;HRSCAF=4405") chr_hist = hist(chr$V2, breaks=seq(1,Hcor_genome[i]+windows-1,windows)) chr_hist$count = ifelse(chr_hist$count==0, NA, chr_hist$count) plot_hist = cbind(chr_hist$mids, chr_hist$count) colnames(plot_hist) = c("position","count") plot_hist = as_tibble(plot_hist) p5 = ggplot(plot_hist, aes(y=count, x=position/1000000)) + scale_y_log10(limits = c(1,ymax)) + #scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x) (c(0,ymax))) + geom_point(color="blueviolet", cex=0.7) + theme_classic() + theme(legend.position = "none", axis.title = element_text(size = 14), axis.title.y=element_blank(), axis.text=element_blank(), axis.ticks=element_blank(), axis.line.y = element_blank()) + labs(x =i) #chr6 i=6 chr = red1 %>% filter(V1=="ScPoQx8_9972;HRSCAF=12060") chr_hist = hist(chr$V2, breaks=seq(1,Hcor_genome[i]+windows-1,windows)) chr_hist$count = ifelse(chr_hist$count==0, NA, chr_hist$count) plot_hist = cbind(chr_hist$mids, chr_hist$count) colnames(plot_hist) = c("position","count") plot_hist = as_tibble(plot_hist) p6 = ggplot(plot_hist, aes(y=count, x=position/1000000)) + scale_y_log10(limits = c(1,ymax)) + #scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x) (c(0,ymax))) + geom_point(color="violetred4", cex=0.7) + theme_classic() + theme(legend.position = "none", axis.title = element_text(size = 14), axis.title.y=element_blank(), axis.text=element_blank(), axis.ticks=element_blank(), axis.line.y = element_blank()) + labs(x =i) #chr7 i=7 chr = red1 %>% filter(V1=="ScPoQx8_8456;HRSCAF=10181") chr_hist = hist(chr$V2, breaks=seq(1,Hcor_genome[i]+windows-1,windows)) chr_hist$count = ifelse(chr_hist$count==0, NA, chr_hist$count) plot_hist = cbind(chr_hist$mids, chr_hist$count) colnames(plot_hist) = c("position","count") plot_hist = as_tibble(plot_hist) p7 = ggplot(plot_hist, aes(y=count, x=position/1000000)) + scale_y_log10(limits = c(1,ymax)) + #scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x) (c(0,ymax))) + geom_point(color="red", cex=0.7) + theme_classic() + theme(legend.position = "none", axis.title = element_text(size = 14), axis.title.y=element_blank(), axis.text=element_blank(), axis.ticks=element_blank(), axis.line.y = element_blank()) + labs(x =i) #chr8 i=8 chr = red1 %>% filter(V1=="ScPoQx8_7882;HRSCAF=9334") chr_hist = hist(chr$V2, breaks=seq(1,Hcor_genome[i]+windows-1,windows)) chr_hist$count = ifelse(chr_hist$count==0, NA, chr_hist$count) plot_hist = cbind(chr_hist$mids, chr_hist$count) colnames(plot_hist) = c("position","count") plot_hist = as_tibble(plot_hist) p8 = ggplot(plot_hist, aes(y=count, x=position/1000000)) + scale_y_log10(limits = c(1,ymax)) + #scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x) (c(0,ymax))) + geom_point(color="orangered", cex=0.7) + theme_classic() + theme(legend.position = "none", axis.title = element_text(size = 14), axis.title.y=element_blank(), axis.text=element_blank(), axis.ticks=element_blank(), axis.line.y = element_blank()) + labs(x =i) #chr9 i=9 chr = red1 %>% filter(V1=="ScPoQx8_9720;HRSCAF=11714") chr_hist = hist(chr$V2, breaks=seq(1,Hcor_genome[i]+windows-1,windows)) chr_hist$count = ifelse(chr_hist$count==0, NA, chr_hist$count) plot_hist = cbind(chr_hist$mids, chr_hist$count) colnames(plot_hist) = c("position","count") plot_hist = as_tibble(plot_hist) p9 = ggplot(plot_hist, aes(y=count, x=position/1000000)) + scale_y_log10(limits = c(1,ymax)) + #scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x) (c(0,ymax))) + geom_point( color="orange", cex=0.7) + theme_classic() + theme(legend.position = "none", axis.title = element_text(size = 14), axis.title.y=element_blank(), axis.text=element_blank(), axis.ticks=element_blank(), axis.line.y = element_blank()) + labs(x =i) #A. pisum TE plot_hist = matrix(0, 1,2) colnames(plot_hist) = c("position","count") plot_hist[1,2] = NA plot_hist = as_tibble(plot_hist) p10 = ggplot(plot_hist, aes(y=count, x=position/1000000)) + scale_y_log10(limits = c(1,ymax)) + #scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x) (c(0,ymax))) + geom_point(color="black", cex=0.7) + theme_classic() + theme(legend.position = "none", axis.title = element_text(size = 14), axis.title.y=element_blank(), axis.text=element_blank(), axis.ticks=element_blank(), axis.line.y = element_blank()) + labs(x = "TE") Hcor_genome_v2=c(60222264+20000000, #to account for the axis space 39701450, 38924103, 36083769, 32569495, 24543776, 23553545, 20814484, 17934322, 10000000) widths=Hcor_genome_v2/sum(Hcor_genome_v2) pdf("Hcor_resequencing_discordant_alignment_SE_mapped_wholeGenome_July2020.pdf", width=10, height=1.7) grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, widths=widths, nrow = 1) dev.off()