# # This recipe simulates sequencing data from an accession number. # set -uex # Accession number for the reference. ACC=AF086833 # How many reads to generate. N=10000 # Make the genome directory mkdir -p refs # This will be the reference genome. REF=refs/$ACC.fa # The resulting alignment file. BAM=align.bam # Obtain the reference genome. efetch -db nuccore -format fasta -id $ACC > $REF # Index reference genome for BWA. bwa index $REF 2>> runlog.txt # Index the reference genome for IGV. samtools faidx $REF # Generate the simulated reads. # The mutations will stored in the mutations.txt file. wgsim -N $N $REF r1.fq r2.fq > mutations.txt 2>> runlog.txt # Read pair names that are the result of the simulation R1=r1.fq R2=r2.fq # Align the reads and generate the BAM file. bwa mem $REF $R1 $R2 2>> runlog.txt | samtools sort > $BAM # Index the alignment file. samtools index $BAM # Generate a flagstat. samtools flagstat $BAM > flagstat.txt