Commit b631c3b5 authored by Rebecca E Batorsky's avatar Rebecca E Batorsky
Browse files

edited README, removed course_dir variable

parent b9946caa
......@@ -2,11 +2,10 @@
Tufts workshop December 2019
This repository contains instructional material for the workshop.
The raw_data directory contains reads from NA12878 gene cyp on chromosome 10
The raw_data directory contains reads from NA12878 gene cyp on chromosome 10, from 1000 genome phase 3 release
The ref_data directory contains a reference sequence from hg19 chromosome 10
The ref_data directory contains a reference sequence from GrCh38 chromosome 10
The all_commands.sh file contains all the commands we will use in the workshop.
#!/bin/bash
# Set the course directory, change this if you did not clone into your home dir
course_dir=~/intro-to-ngs
# Step 1
printf "\n----\nUse BWA to Index Reference sequence\n---\n"
module load bwa/0.7.17
bwa index $course_dir/ref_data/chr10.fa
bwa index ref_data/chr10.fa
# Step 2
printf "\n----\nUse BWA to Align Reads\n---\n"
mkdir -p $course_dir/results
mkdir -p results
bwa mem \
-M \
-t 2 \
-R "@RG\tID:reads\tSM:NA12878" \
$course_dir/ref_data/chr10.fa \
$course_dir/raw_data/na12878_1.fq $course_dir/raw_data/na12878_2.fq \
> $course_dir/results/na12878.sam
ref_data/chr10.fa \
raw_data/na12878_1.fq raw_data/na12878_2.fq \
> results/na12878.sam
# Step 3
printf "\n----\nUse Picard to Sort SAM file\n----\n"
......@@ -29,46 +25,44 @@ printf "\n----\nUse Picard to Sort SAM file\n----\n"
module load picard/2.8.0
picard SortSam \
INPUT=$course_dir/results/na12878.sam \
OUTPUT=$course_dir/results/na12878.srt.bam \
INPUT=results/na12878.sam \
OUTPUT=results/na12878.srt.bam \
SORT_ORDER=coordinate
# Step 4
printf "\n----\nUse Picard to Mark BAM duplicates\n----\n"
picard MarkDuplicates \
INPUT=$course_dir/results/na12878.srt.bam \
OUTPUT=$course_dir/results/na12878.srt.markdup.bam \
METRICS_FILE=$course_dir/results/na12878.markdup.txt
INPUT=results/na12878.srt.bam \
OUTPUT=results/na12878.srt.markdup.bam \
METRICS_FILE=results/na12878.markdup.txt
# Step 5
printf "\n----\nUse Picard to Build a BAM Index\n----\n"
picard BuildBamIndex \
INPUT=$course_dir/results/na12878.srt.markdup.bam
INPUT=results/na12878.srt.markdup.bam
# Step 6
printf "\n----\nUse Samtools to Build a Reference Sequence Index\n----\n"
module load samtools/1.9
samtools faidx $course_dir/ref_data/chr10.fa
samtools faidx ref_data/chr10.fa
# Step 8
printf "\n----\nUse Picard to Build A Reference Sequence Dictionary\n----\n"
picard CreateSequenceDictionary \
REFERENCE=$course_dir/ref_data/chr10.fa \
OUTPUT=$course_dir/ref_data/chr10.dict
REFERENCE=ref_data/chr10.fa \
OUTPUT=ref_data/chr10.dict
# Step 7: Use GATK to call variants
# Step 9
printf "\n----\nUse GATK to Call Variants on the BAM\n----\n"
module load GATK/3.7
gatk -T HaplotypeCaller \
-R $course_dir/ref_data/chr10.fa \
-I $course_dir/results/na12878.srt.markdup.bam \
-o $course_dir/results/na12878.vcf
# Bonus - run VEP!
-R ref_data/chr10.fa \
-I results/na12878.srt.markdup.bam \
-o results/na12878.vcf
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment