#raw data mkdir rawdata cd rawdata ln -s *.fastq.gz . cd .. #adapter trimming mkdir trimmed_reads cd trimmed_reads #paired end for i in ../rawdata/*_1.fastq.gz do file=`basename $i` numbertwo=${i//_1.fastq.gz/_2.fastq.gz} numbertwofile=${file//_1.fastq.gz/_2.fastq.gz} bsub -n 2 -o trim_reads_2.log -We 10 "cutadapt -j 2 --trim-n \ -a revcompTruSeq_Univ_Adap=AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT \ -A revcompTruSeq_Univ_Adap=AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT \ -a TruSeq_Idx_Adap=GATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG \ -A TruSeq_Idx_Adap=GATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG \ -a pMENTS=CTGTCTCTTATACACATCT \ -A pMENTS=CTGTCTCTTATACACATCT \ -a common_Tn5_end=AGATGTGTATAAGAGACAG \ -A common_Tn5_end=AGATGTGTATAAGAGACAG \ -m 36 -q 10,10 --nextseq-trim=10 -o catrim.${file} -p catrim.${numbertwofile} \ ${i} ${numbertwo}" done #single end for i in ../rawdata/*pooled.fastq.gz do file=`basename $i` bsub -n 2 -o trim_reads_2.log -We 10 "cutadapt -j 2 --trim-n \ -a revcompTruSeq_Univ_Adap=AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT \ -a TruSeq_Idx_Adap=GATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG \ -a pMENTS=CTGTCTCTTATACACATCT \ -a common_Tn5_end=AGATGTGTATAAGAGACAG \ -m 36 -q 10,10 --nextseq-trim=10 -o catrim.${file} ${i}" done conda activate RNAseq cd .. #map with hisat2 ln -s hormaphis_cornu_26Sep2017_PoQx8.fasta bsub -n 8 -o hisat2_idx "hisat2-build hormaphis_cornu_26Sep2017_PoQx8.fasta \ hormaphis_cornu_26Sep2017_hisat2 -p 8" cd trimmed_reads #move all the files that are not standard PE mkdir special_fastq mv catrim.*_08.pooled.fastq.gz special_fastq/ for i in catrim.*_run1_1.fastq.gz do root_name=`basename ${i} _1.fastq.gz` second_name=${root_name}_2.fastq.gz settwo_II=${second_name//_run1/} settwo_I=${settwo_II//_2/_1} mv ${i} special_fastq/ mv ${settwo_I} special_fastq/ mv ${second_name} special_fastq/ mv ${settwo_II} special_fastq/ done #map SE cd special_fastq for i in catrim.*_08.pooled.fastq.gz do root_name=`basename ${i} .fastq.gz` prefix=${root_name//catrim./} bsub -n 4 -o hisat2_aln_${prefix} \ "hisat2 -x ../../hormaphis_cornu_26Sep2017_hisat2 \ -p 4 \ -U ${i} | samtools sort -O BAM -@ 4 - > ../${prefix}.bam" done #PE samples with 2 library runs for i in catrim.*_run1_1.fastq.gz do root_name=`basename ${i} _1.fastq.gz` second_name=${root_name}_2.fastq.gz prefix=${root_name//catrim./} prefix=${prefix//_run1/} settwo_II=${second_name//_run1/} settwo_I=${settwo_II//_2/_1} bsub -n 4 -o hisat2_aln_${prefix} \ "hisat2 -x ../../hormaphis_cornu_26Sep2017_hisat2 \ -p 4 \ -1 ${i},${settwo_I} -2 ${second_name},${settwo_II} | samtools sort -O BAM -@ 4 - > ../${prefix}.bam" done cd .. #the rest of the PE samples for i in catrim*1.fastq.gz do root_name=`basename ${i} _1.fastq.gz` second_name=${root_name}_2.fastq.gz6ix21 prefix=${root_name//catrim.SG./} bsub -n 4 -o hisat2_aln_${prefix} \ "hisat2 -x ../hormaphis_cornu_26Sep2017_hisat2 \ -p 4 \ -1 ${i} -2 ${second_name} | samtools sort -O BAM -@ 4 - > ${prefix}.bam" done #reformat gff grep exon Augustus.updated_w_annots.3ix21_final.gff3 | sed 's|Parent|transcript_id|' > exons_3ix21.gff3 sed 's|.*transcript_id=\(g.*\).t.*|; gene_id=\1|' exons_3ix21.gff3 > genes_3ix21.gff3 paste -d \0 exons_3ix21.gff3 genes_3ix21.gff3 > Augustus.updated_w_annots.3ix21_final.exons.gff3 cd .. #htseq to generate counts mkdir counts_3ix21 for i in trimmed_reads/*.bam do root_name=`basename ${i} .bam` bsub -o counts_3ix21/count.${root_name}.txt \ "htseq-count -i "gene_id" --stranded=no -f bam -t exon \ ${i} trimmed_reads/Augustus.updated_w_annots.3ix21_final.exons.gff3 > counts_3ix21/${root_name}.counts" done conda deactivate