| 78 | | |
| 79 | | == Workflow 1: genome reference file creation == |
| 80 | | |
| 81 | | This workflow creates reference files per chromosome including: |
| 82 | | * genome, dbsnp and indel vcfs per chromosome |
| 83 | | * realign targets for faster realignment target creation |
| 84 | | * index files for samtools and bwa |
| 85 | | |
| 86 | | Workflow inputs: |
| 87 | | * genome.chr.fa - downloaded from genome supplier (now hg19) |
| 88 | | * dbsnpXYZ.rod - downloaded reference SNPs from dbsnp (now 129) |
| 89 | | * indelsXYZ.vcf - downloaded reference indels from 1KG |
| 90 | | |
| 91 | | Workflow outputs: |
| 92 | | * genome.chr.fa - cleaned headers |
| 93 | | * genome.chr.fa.fa - index for samtools |
| 94 | | * genome.chr.fa.<format> - multilple index files for bwa |
| 95 | | * dbsnpXYZ.chr.rod - split per chromosome |
| 96 | | * indelsXYZ.chr.vcf - split per chromosome |
| 97 | | * genome.chr.realign.intervals - targets for realignment |
| 98 | | |
| 99 | | === clean-fasta-headers === |
| 100 | | Clean headers to only have '1' instead of Chr1, etc |
| 101 | | |
| 102 | | ||tool: || || |
| 103 | | ||inputs: ||genome.chr.fa || |
| 104 | | ||outputs: ||genome.chr.fa || |
| 105 | | ||doc: ||internally developed || |
| 106 | | |
| 107 | | === split-vcf-chr for dbsnp and indels === |
| 108 | | Split vcf per chromosome |
| 109 | | ||tool: || || |
| 110 | | ||inputs: ||dbsnpXYZ.rod, indelsXYZ.vcf || |
| 111 | | ||outputs: ||dbsnpXYz.chr.rod, indelsXYZ.vcf || |
| 112 | | ||doc: || || |
| 113 | | |
| 114 | | Discussion: |
| 115 | | > Can we use http://vcftools.sourceforge.net/options.html ? |
| 116 | | >> vcftools --vcf indelsXYZ.vcf --chr <i> --recode --out indelsXYZ.chr |
| 117 | | |
| 118 | | === index-chromosomes === |
| 119 | | Index reference sequence for each chromosome in the FASTA format |
| 120 | | |
| 121 | | ||tool: ||samtools faidx || |
| 122 | | ||input: ||genome.chr.fa || |
| 123 | | ||output: ||genome.chr.fa.fai || |
| 124 | | ||doc: ||http://samtools.sourceforge.net/samtools.shtml#3 || |
| 125 | | |
| 126 | | === bwa-index-chromosomes === |
| 127 | | Index reference sequence for each chromosome for bwa alignment |
| 128 | | |
| 129 | | ||tool: ||bwa index -a IS || |
| 130 | | ||input: ||genome.chr.fa || |
| 131 | | ||output: ||genome.chr.fa.xyz || |
| 132 | | ||doc: ||http://bio-bwa.sourceforge.net/bwa.shtml#3 || |
| 133 | | |
| 134 | | === !RealignerTargetCreator === |
| 135 | | Generate realignment targets for known sites for each chromosome |
| 136 | | |
| 137 | | ||tool: ||GenomeAnalysisTK.jar -T RealignerTargetCreator || |
| 138 | | ||input: ||genome.chr.fa, dbsnpXYz.chr.rod, indelsXYZ.vcf || |
| 139 | | ||output: ||genome.chr.realign.intervals || |
| 140 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Local_realignment_around_indels#Running_the_Indel_Realigner_only_at_known_sites || |
| 141 | | |
| 142 | | |