# # Download and unpack gatk in a folder # # https://github.com/broadinstitute/gatk/releases # # mine is located in: ~/Downloads/gatk-4.2.3.0/gatk # set -uex # Or use fastq dump SRR=SRR15595110 fastq-dump -X 10000 --split-files $SRR # Accesion number ACC=NC_045512 # Output file names. REF=refs/$ACC.fa DICT=refs/$ACC.dict BAM=$SRR.bam mkdir -p refs # Install bio if needed. # pip install bio --upgrade # Get the reference bio fetch NC_045512 -format fasta > $REF # Read groups are a way to TAG data with a label (RG) a sample (SM) and a library (LB) tag. TAG="@RG\tID:$SRR\tSM:$SRR\tLB:$SRR" # Shortcuts to files. R1=${SRR}_1.fastq R2=${SRR}_2.fastq # Index the reference bwa index $REF # Run the alignment bwa mem -R $TAG $REF $R1 $R2 | samtools sort > $BAM # Index the resulting BAM file samtools index $BAM # Index the reference samtools faidx $REF # Picard won't create the dictionary file if exists. rm -f $DICT # Create the dictionary file. picard CreateSequenceDictionary -REFERENCE $REF -OUTPUT $DICT # Run GATK ~/Downloads/gatk-4.2.3.0/gatk HaplotypeCaller -R $REF -I $BAM -O variants.vcf