# Stop script on any error. set -ue # # This tool requires minimap2 to run. # If you don't already have it install it with. # # conda install minimap2 -y # # Two sequence accession numbers SEQ1=AF086833 SEQ2=KR105328 # # The default values are comparing # the Ebola strain from 1972 to one strain observed in 2014 # # Ebola Virus: Mayinga 1972: AF086833 #SEQ1=AF086833 # Ebola Virus: Makona 2014: KR105328 # Download both sequences from NCBI and store them locally. # You may want to name them $SEQ1.fa and $SEQ2.fa # We will keep the names short here for visiblity. efetch -db nuccore -format fasta -id $SEQ1 > 1.fa efetch -db nuccore -format fasta -id $SEQ2 > 2.fa # # Typically you want to know what the scoring matrix is. # # Get the scoring matrix # wget -q -nc ftp://ftp.ncbi.nlm.nih.gov/blast/matrices/NUC.4.4 # Generate a global alignment in default (markx0) format. stretcher -filter -asequence 1.fa -bsequence 2.fa -data NUC.4.4 > alignment.1.stretcher.markx0.txt # # The stretcher has the NUC4.4 matrix built into it under the special # name of EDNAFULL. From now on we will use that. # # Generate alignment in the so called markx2 format. stretcher -filter -asequence 1.fa -bsequence 2.fa -data EDNAFULL -aformat markx2 > alignment.2.stretcher.markx2.txt # Generate alignment in the SAM format. stretcher -filter -asequence 1.fa -bsequence 2.fa -data EDNAFULL -aformat sam > alignment.3.stretcher.sam.txt # Generate a pariwise local alignment with blastn. # It is surprisingly difficult to tell what scoring matrix # blastn used. See blastn --help for information. # # The default output format of blasn. blastn -subject 1.fa -query 2.fa > alignment.4.blast.default.txt # Format alignments in tabular form. blastn -subject 1.fa -query 2.fa -outfmt 7 > alignment.5.blast.outfmt7.txt # # The minimap aligner, this perform a SEMI-GLOBAL alignment. # # Run minimap2 configured to produces MD tags. minimap2 -c --MD 1.fa 2.fa > alignment.6.minimap2.MD.txt # Run minimap2 configured to produces CS tags. minimap2 --cs 1.fa 2.fa > alignment.7.minimap2.cs.txt # # How to interpret the alignments in one glance. # See lecture video. # # The 23rd column shows the CIGAR string. echo "*** The alignment as a CIGAR string:" cat alignment.6.minimap2.MD.txt | cut -f 22 # The 23rd column shows the MD tag. echo "*** The alignment as an MD tag:" cat alignment.6.minimap2.MD.txt | cut -f 23 # The 22nd column of the CS tag. echo "*** The alignment as a CS tag:" cat alignment.7.minimap2.cs.txt | cut -f 22