# Stop script on any error.
# If you set the so called BLASTDB variable
# blast will find the files there automatically.
# This is handy as it makes for shorter commands.
# This will be the directory we store BLAST databases in.
# Make the BLAST database directory.
mkdir -p $BLASTDB
# Temporarily switch to the BLASTDB directory and run the blast database updater.
# It will only update the data if it is incomplete or has changed.
(cd $BLASTDB && update_blastdb.pl --quiet --decompress 16SMicrobial)
# Unix trick: when one or more commands are enclosed with parenthesis as above then a new bash shell
# is opened for the commands inside the parenthesis. This is why we don't have to change
# the directory back once we are done with the commands. Only the newly opened "child" bash
# shell changes the directory the "parent" bash stays where it started.
# The && is treated as a logical AND. Use it to list multiple commands on a single line.
# Get the "mystery sequence" from the book site.
wget -q -nc http://data.biostarhandbook.com/fasta/mystery3.fa
# We can list the short database name because BLASTDB is set properly
blastn -db 16SMicrobial -query mystery3.fa > blast-1-results.txt
# If you don't set the BLASTB the same command would be:
# blastn -db ~/refs/blastdb/16SMicrobial -query mystery3.fa > blast-1-default-results.txt
# We can format the output differently.
# Look at the blast -help for information on what each field means.
blastn -db 16SMicrobial -query mystery3.fa -outfmt 6 > blast-2-results.txt
# We can also selectively output only certain columns:
# subject accession, query accession and percent identity.
# Look at blast --help for information on how to encode the format string.
blastn -db 16SMicrobial -query mystery3.fa -outfmt "6 sacc qacc pident" > blast-3-results.txt
# Generate a report of top hits.
echo "Top ten hits with highest percent identity":
cat blast-3-results.txt | sort -k3,3rn | head -10