# Short read aligner - SRA accession, reference GenBank accession # # Usage example: # # bash align.sh CP050134 SRR653447 # # Stop on error set -uex # Reference genome accession number. ACC=$1 # SRA accession number. SRR=$2 # Reference FASTA. REF=ref/$ACC.fa # Create the directory. mkdir -p ref reads # Get the reference accession. bio fetch $ACC -format fasta > $REF # Get FASTQ data from your query. fastq-dump -X 10000 --split-files --outdir reads $SRR 1>> log.txt # Assign variable names to your FASTQ data. R1=reads/${SRR}_1.fastq R2=reads/${SRR}_2.fastq # The trimmed reads. TP1=reads/${SRR}_trimmed_1.fastq TU1=reads/${SRR}_unpaired_1.fastq TP2=reads/${SRR}_trimmed_2.fastq TU2=reads/${SRR}_unpaired_2.fastq # Index the two genomes bwa index $REF > log.txt 2> log.txt bowtie2-build $REF $REF > log.txt 2>>log.txt # Align before doing any quality control on your FASTQ data. bwa mem $REF $R1 $R2 > bwa.sam 2>> log.txt bowtie2 -x $REF -1 $R1 -2 $R2 > bowtie.sam 2>> log.txt # Define your adaptors. echo ">illumina" > adapter.fa echo "AGATCGGAAGAG" >> adapter.fa # Trim off adaptors and low quality reads. trimmomatic PE ${R1} ${R2} $TP1 $TU1 $TP2 $TU2 ILLUMINACLIP:adapter.fa:2:30:5 SLIDINGWINDOW:4:30 >> log.txt 2>> log.txt # Align after doing any quality control on your FASTQ data. bwa mem $REF $TP1 $TP2 > bwa_qc.sam 2>> log.txt bowtie2 -x $REF -1 $TP1 -2 $TP2 > bowtie_qc.sam 2>> log.txt # Display certain lines from flagstat output ls -1 *.sam | parallel -n 1 '(echo {} && samtools flagstat {} | grep mapped)'