# Stop script on any error. set -ux # The name of the BLAST database DBNAME=16S_ribosomal_RNA # This will be the directory we store BLAST databases in. export BLASTDB=~/blastdb # Make the BLAST database directory. mkdir -p $BLASTDB # Temporarily switch to the BLASTDB directory and run the blast database updater. # It will only update the data if it is incomplete or has changed. (cd $BLASTDB && update_blastdb.pl --quiet --decompress 16SMicrobial) # What is inside of this database? blastdbcmd -db $DBNAME -info > database.info.txt # Get all the sequences out of the database blastdbcmd -db $DBNAME -entry all > database.all.fa # Get just one sequence ouf the the database. blastdbcmd -db $DBNAME -entry NR_026093 > database.sequence.NR_026093.fa # Get just the first 20 bases of all sequences ouf the the database. blastdbcmd -db $DBNAME -entry all -range 1-20 > database.all.range.fa # Get just the first 20 bases of a given sequence ouf the the database. blastdbcmd -db $DBNAME -entry NR_026093 -range 1-20 > database.sequence.NR_026093.range.fa # We can also format the output using a format string. # # See blastdbcmd -help for the -outfmt parameter # # List the lenght of each sequence in the database blastdbcmd -db $DBNAME -entry all -outfmt "%a %l" > database.table.txt # # Since these are 16SMicrobial genes that are highly conserved sequence starts? # # What are the top ten most common gene starts and how many of each? blastdbcmd -db $DBNAME -entry all -range 1-20 -outfmt "%s" | sort | uniq -c | sort -rn > database.starts.txt # Count how many times does each 16S gene starts with a sequence. cat database.starts.txt | sort | uniq -c | sort -rn > database.sequence.starts.common.txt # # How to we build a new blast database from sequences. # # Use the all.fa file to create a new blast database. # # Create a directory that will store the blast database. mkdir -p newdb # Create a new blast database. makeblastdb -dbtype nucl -in database.all.fa -parse_seqids -out newdb/my16S # How many sequences in the new database? blastdbcmd -db newdb/my16S -info > newdb.info.txt