# Print the commands as are executed. # Stop on errors. set -uex # The URL the data is located at. URL=http://data.biostarhandbook.com/rnaseq/projects/griffith/griffith-data.tar.gz # Obtain and download the data wget -q -nc $URL # Unpack the dataset. tar zxvf griffith-data.tar.gz # This is the reference genome. REF=refs/ERCC92.fa # Represents the annotations. GTF=refs/ERCC92.gtf # Build a hisat2 index for the genome. hisat2-build $REF $REF 2>> log.txt 1>> log.txt # Generate the root names for the files. parallel echo {1}_{2} ::: UHR HBR ::: 1 2 3 > names.txt # Make a directory for BAM files. mkdir -p bam # Run the alignment per each root name. cat names.txt | parallel "hisat2 $REF -1 reads/{}_R1.fq -2 reads/{}_R2.fq | samtools sort > bam/{}.bam" 2>> stats.txt # Count the features that overlap annotations. featureCounts -a $GTF -g gene_name -o counts.txt bam/*.bam 2>> log.txt # Get the script that runs the deseq 1 method. wget -q -nc http://data.biostarhandbook.com/rnaseq/code/deseq1.r # Simplify the count matrix to only contain gene names and read counts. cat counts.txt | cut -f 1,7-12 > simple_counts.txt # Perform the differential expression study. cat simple_counts.txt | Rscript deseq1.r 3x3 > results.txt 2>> log.txt # Download the script to draw the heatmaps (requires the glplots package). wget -q -nc http://data.biostarhandbook.com/rnaseq/code/draw-heatmap.r # Generate heatmap from the deseq1 normalized matrix. cat norm-matrix-deseq1.txt | Rscript draw-heatmap.r > heatmap-norm-matrix-deseq1.pdf 2>> log.txt