261 | | == Workflow 3: sample level variant calling == |
262 | | This workflow will call variants for the samples including: |
263 | | * sample level recalibration |
264 | | * sample level realignment |
265 | | N.B. no sample level MarkDuplicates is needed as lanes == libraries. |
266 | | |
267 | | Workflow inputs: |
268 | | * lane.chr.recal.sorted.bam - for all sample lanes: dedupped, recalibrated, realigned, sorted and indexed bams (3) |
269 | | * sample.chip.vcf - genotypes called from genotype chip |
270 | | Reference: |
271 | | * genome.chr.fasta - reference genome split on chromosome |
272 | | * genome.chr.realign.intervals - targets for realignment per chromosome |
273 | | * genome.chr.dbsnpXYZ.rod - known snp variants, here from dpbsnp |
274 | | * genome.chr.indelsXYZ.vcf - known indels from, here from 1KG |
275 | | |
276 | | Workflow outputs: |
277 | | * sample.chr.bam - merged bam files per sample |
278 | | * sample.chr.realign.interval - realignment target intervals |
279 | | * sample.chr.realigned.bam - realigned |
280 | | * sample.chr.matesfixed.bam - fixed pairs in realignment |
281 | | * sample.chr.indels.vcf - raw indels called |
282 | | * sample.chr.indels.bed - raw indels annotations |
283 | | * sample.chr.indels.txt - output from the indel calling |
284 | | * sample.chr.indels.filtered.bed - indels filtered |
285 | | * sample.chr.snps.vcf - raw snps called |
286 | | * sample.chr.snps.filtered.vcf - snps filtered |
287 | | |
288 | | === merge-lanes === |
289 | | Merge lanes into one sample bam |
290 | | |
291 | | ||tool: ||sam merge || |
292 | | ||inputs: ||lane.chr.recal.sorted.bam || |
293 | | ||outputs: ||sample.chr.bam || |
294 | | ||docs: ||http://samtools.sourceforge.net/samtools.shtml || |
295 | | |
296 | | === !RealignerTargetCreator === |
297 | | Create realignment targets based on the data (so not only knowns) |
298 | | |
299 | | ||tool: ||GenomeAnalysisTK.jar -T RealignerTargetCreator || |
300 | | ||inputs: ||sample.chr.bam [[BR]]genome.chr.fa [[BR]]dbsnpXYz.chr.rod [[BR]]indelsXYZ.vcf |
301 | | ||outputs: ||sample.chr.realign.intervals || |
302 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Local_realignment_around_indels#Creating_Intervals || |
303 | | |
304 | | === !IndelRealigner === |
305 | | Realign based on realignment targets in previous step |
306 | | |
307 | | ||tool: ||GenomeAnalysisTK.jar -T IndelRealigner || |
308 | | ||inputs: ||sample.chr.bam [[BR]]genome.chr.realign.intervals [[BR]] genome.chr.dbsnpXYZ.rod [[BR]] genome.chr.indelsXYZ.vcf || |
309 | | ||outputs: ||sample.chr.realigned.bam || |
310 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Local_realignment_around_indels#Realigning || |
311 | | |
312 | | === !FixMateInformation === |
313 | | See description in workflow2, now applied to sample |
314 | | |
315 | | ||inputs: ||sample.chr.realigned.bam || |
316 | | ||ouputs: ||sample.chr.matesfixed.bam || |
317 | | === IndelGenotyperV2 === |
318 | | Call indels |
319 | | |
320 | | ||tool: ||GenomeAnalysisTK.jar -T IndelGenotyperV2 || |
321 | | ||inputs: ||sample.chr.matesfixed.bam [[BR]]genome.chr.fa || |
322 | | ||outputs: ||sample.chr.indels.vcf [[BR]]sample.chr.indels.bed [[BR]]sample.chr.indels.txt || |
323 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Indel_Genotyper_V2.0 [[BR]] |
324 | | |
325 | | http://www.broadinstitute.org/gsa/wiki/index.php/Firehose_Parameters#SampleIndelGenotyper || |
326 | | === filterSingleSampleCalls === |
327 | | Filter indels |
328 | | |
329 | | ||tool: ||filterSingleSampleCalls.pl || |
330 | | ||inputs: ||sample.chr.indels.bed || |
331 | | ||outputs: ||sample.chr.indels.filtered.bed || |
332 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Firehose_Parameters#SampleIndelGenotyper || |
333 | | |
334 | | === !UnifiedGenotyper === |
335 | | Call SNPs |
336 | | |
337 | | ||tool: ||GenomeAnalysisTK.jar -T UnifiedGenotyper || |
338 | | ||inputs: ||sample.chr.matesfixed [[BR]]genome.chr.fa [[BR]]dbsnpXYz.chr.rod || |
339 | | ||outputs: ||sample.chr.snps.vcf || |
340 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Firehose_Parameters#SetUnifiedGenotypertoEval [[BR]] |
341 | | |
342 | | http://www.broadinstitute.org/gsa/wiki/index.php/Unified_genotyper || |
343 | | === makeIndelMask === |
344 | | Make indel mask |
345 | | |
346 | | ||tool: ||makeIndelMask.py || |
347 | | ||inputs: ||sample.chr.indels.bed || |
348 | | ||outputs: ||sample.chr.indels.mask.bed || |
349 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Indel_Genotyper_V2.0#Creating_a_indel_mask_file || |
350 | | |
351 | | === !VariantFiltration === |
352 | | Filter variants to get the best calls possible |
353 | | |
354 | | ||tool: ||GenomeAnalysisTK.jar -T VariantFiltration || |
355 | | ||inputs: ||sample.chr.snps.vcf [[BR]]genome.chr.fa [[BR]]dbsnpXYz.chr.rod || |
356 | | ||outputs: ||sample.chr.snps.filtered.vcf || |
357 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v2#Integrating_analyses:_getting_the_best_call_set_possible |
358 | | |
359 | | || |
360 | | |
361 | | === !MergeVcfs === |
362 | | === !ChipVcf === |
363 | | Produce vcf for the chips |
364 | | |
365 | | === !VariantEval === |
366 | | Create summary information on the variations called for evaluation. |
367 | | Run per sample.snps.filtered.vcf against chip. |
368 | | |
369 | | ||tool: ||GenomeAnalysisTK.jar -T VariantEval || |
370 | | ||inputs: ||sample.snps.vcf [[BR]]sample.chip.vcf [[BR]]genome.chr.fa [[BR]]dbsnpXYz.chr.rod|| |
371 | | ||outputs: ||sample.snps.eval || |
372 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/VariantEval || |
373 | | |
374 | | |
375 | | Discussion: |
376 | | > Do we call SNPs based on the filtered indels or the raw indels? |
377 | | > Should we realign AGAIN after merge of lanes? |
378 | | > BAQ? |
379 | | > MINDEL/PINDEL? |