# Stop script on any error. set -ue # If you set the so called BLASTDB variable # blast will find the files there automatically. # This is handy as it makes for shorter commands. # This will be the directory we store BLAST databases in. export BLASTDB=~/refs/blastdb # Make the BLAST database directory. mkdir -p $BLASTDB # Temporarily switch to the BLASTDB directory and run the blast database updater. # It will only update the data if it is incomplete or has changed. (cd $BLASTDB && update_blastdb.pl --quiet --decompress 16SMicrobial) # Unix trick: when one or more commands are enclosed with parenthesis as above then a new bash shell # is opened for the commands inside the parenthesis. This is why we don't have to change # the directory back once we are done with the commands. Only the newly opened "child" bash # shell changes the directory the "parent" bash stays where it started. # The && is treated as a logical AND. Use it to list multiple commands on a single line. # Get the "mystery sequence" from the book site. wget -q -nc http://data.biostarhandbook.com/fasta/mystery3.fa # We can list the short database name because BLASTDB is set properly blastn -db 16SMicrobial -query mystery3.fa > blast-1-results.txt # If you don't set the BLASTB the same command would be: # blastn -db ~/refs/blastdb/16SMicrobial -query mystery3.fa > blast-1-default-results.txt # We can format the output differently. # Look at the blast -help for information on what each field means. blastn -db 16SMicrobial -query mystery3.fa -outfmt 6 > blast-2-results.txt # We can also selectively output only certain columns: # subject accession, query accession and percent identity. # Look at blast --help for information on how to encode the format string. blastn -db 16SMicrobial -query mystery3.fa -outfmt "6 sacc qacc pident" > blast-3-results.txt # Generate a report of top hits. echo "Top ten hits with highest percent identity": cat blast-3-results.txt | sort -k3,3rn | head -10