#
# Download and unpack gatk in a folder
#
# https://github.com/broadinstitute/gatk/releases
#
# mine is located in: ~/Downloads/gatk-4.2.3.0/gatk
#
set -uex

# Or use fastq dump
SRR=SRR15595110

fastq-dump -X 10000 --split-files $SRR

# Accesion number
ACC=NC_045512 

# Output file names.
REF=refs/$ACC.fa
DICT=refs/$ACC.dict
BAM=$SRR.bam

mkdir -p refs

# Install bio if needed.
# pip install bio --upgrade

# Get the reference
bio fetch NC_045512 -format fasta  > $REF

# Read groups are a way to TAG data with a label (RG) a sample (SM) and a library (LB) tag.
TAG="@RG\tID:$SRR\tSM:$SRR\tLB:$SRR"

# Shortcuts to files.
R1=${SRR}_1.fastq
R2=${SRR}_2.fastq

# Index the reference
bwa index $REF

# Run the alignment
bwa mem -R $TAG $REF $R1 $R2 | samtools sort > $BAM

# Index the resulting BAM file
samtools index $BAM

# Index the reference
samtools faidx $REF

# Picard won't create the dictionary file if exists.
rm -f $DICT

# Create the dictionary file.
picard CreateSequenceDictionary -REFERENCE $REF -OUTPUT $DICT

# Run GATK
~/Downloads/gatk-4.2.3.0/gatk HaplotypeCaller -R $REF -I $BAM -O variants.vcf