| 261 | | == Workflow 3: sample level variant calling == |
| 262 | | This workflow will call variants for the samples including: |
| 263 | | * sample level recalibration |
| 264 | | * sample level realignment |
| 265 | | N.B. no sample level MarkDuplicates is needed as lanes == libraries. |
| 266 | | |
| 267 | | Workflow inputs: |
| 268 | | * lane.chr.recal.sorted.bam - for all sample lanes: dedupped, recalibrated, realigned, sorted and indexed bams (3) |
| 269 | | * sample.chip.vcf - genotypes called from genotype chip |
| 270 | | Reference: |
| 271 | | * genome.chr.fasta - reference genome split on chromosome |
| 272 | | * genome.chr.realign.intervals - targets for realignment per chromosome |
| 273 | | * genome.chr.dbsnpXYZ.rod - known snp variants, here from dpbsnp |
| 274 | | * genome.chr.indelsXYZ.vcf - known indels from, here from 1KG |
| 275 | | |
| 276 | | Workflow outputs: |
| 277 | | * sample.chr.bam - merged bam files per sample |
| 278 | | * sample.chr.realign.interval - realignment target intervals |
| 279 | | * sample.chr.realigned.bam - realigned |
| 280 | | * sample.chr.matesfixed.bam - fixed pairs in realignment |
| 281 | | * sample.chr.indels.vcf - raw indels called |
| 282 | | * sample.chr.indels.bed - raw indels annotations |
| 283 | | * sample.chr.indels.txt - output from the indel calling |
| 284 | | * sample.chr.indels.filtered.bed - indels filtered |
| 285 | | * sample.chr.snps.vcf - raw snps called |
| 286 | | * sample.chr.snps.filtered.vcf - snps filtered |
| 287 | | |
| 288 | | === merge-lanes === |
| 289 | | Merge lanes into one sample bam |
| 290 | | |
| 291 | | ||tool: ||sam merge || |
| 292 | | ||inputs: ||lane.chr.recal.sorted.bam || |
| 293 | | ||outputs: ||sample.chr.bam || |
| 294 | | ||docs: ||http://samtools.sourceforge.net/samtools.shtml || |
| 295 | | |
| 296 | | === !RealignerTargetCreator === |
| 297 | | Create realignment targets based on the data (so not only knowns) |
| 298 | | |
| 299 | | ||tool: ||GenomeAnalysisTK.jar -T RealignerTargetCreator || |
| 300 | | ||inputs: ||sample.chr.bam [[BR]]genome.chr.fa [[BR]]dbsnpXYz.chr.rod [[BR]]indelsXYZ.vcf |
| 301 | | ||outputs: ||sample.chr.realign.intervals || |
| 302 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Local_realignment_around_indels#Creating_Intervals || |
| 303 | | |
| 304 | | === !IndelRealigner === |
| 305 | | Realign based on realignment targets in previous step |
| 306 | | |
| 307 | | ||tool: ||GenomeAnalysisTK.jar -T IndelRealigner || |
| 308 | | ||inputs: ||sample.chr.bam [[BR]]genome.chr.realign.intervals [[BR]] genome.chr.dbsnpXYZ.rod [[BR]] genome.chr.indelsXYZ.vcf || |
| 309 | | ||outputs: ||sample.chr.realigned.bam || |
| 310 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Local_realignment_around_indels#Realigning || |
| 311 | | |
| 312 | | === !FixMateInformation === |
| 313 | | See description in workflow2, now applied to sample |
| 314 | | |
| 315 | | ||inputs: ||sample.chr.realigned.bam || |
| 316 | | ||ouputs: ||sample.chr.matesfixed.bam || |
| 317 | | === IndelGenotyperV2 === |
| 318 | | Call indels |
| 319 | | |
| 320 | | ||tool: ||GenomeAnalysisTK.jar -T IndelGenotyperV2 || |
| 321 | | ||inputs: ||sample.chr.matesfixed.bam [[BR]]genome.chr.fa || |
| 322 | | ||outputs: ||sample.chr.indels.vcf [[BR]]sample.chr.indels.bed [[BR]]sample.chr.indels.txt || |
| 323 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Indel_Genotyper_V2.0 [[BR]] |
| 324 | | |
| 325 | | http://www.broadinstitute.org/gsa/wiki/index.php/Firehose_Parameters#SampleIndelGenotyper || |
| 326 | | === filterSingleSampleCalls === |
| 327 | | Filter indels |
| 328 | | |
| 329 | | ||tool: ||filterSingleSampleCalls.pl || |
| 330 | | ||inputs: ||sample.chr.indels.bed || |
| 331 | | ||outputs: ||sample.chr.indels.filtered.bed || |
| 332 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Firehose_Parameters#SampleIndelGenotyper || |
| 333 | | |
| 334 | | === !UnifiedGenotyper === |
| 335 | | Call SNPs |
| 336 | | |
| 337 | | ||tool: ||GenomeAnalysisTK.jar -T UnifiedGenotyper || |
| 338 | | ||inputs: ||sample.chr.matesfixed [[BR]]genome.chr.fa [[BR]]dbsnpXYz.chr.rod || |
| 339 | | ||outputs: ||sample.chr.snps.vcf || |
| 340 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Firehose_Parameters#SetUnifiedGenotypertoEval [[BR]] |
| 341 | | |
| 342 | | http://www.broadinstitute.org/gsa/wiki/index.php/Unified_genotyper || |
| 343 | | === makeIndelMask === |
| 344 | | Make indel mask |
| 345 | | |
| 346 | | ||tool: ||makeIndelMask.py || |
| 347 | | ||inputs: ||sample.chr.indels.bed || |
| 348 | | ||outputs: ||sample.chr.indels.mask.bed || |
| 349 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Indel_Genotyper_V2.0#Creating_a_indel_mask_file || |
| 350 | | |
| 351 | | === !VariantFiltration === |
| 352 | | Filter variants to get the best calls possible |
| 353 | | |
| 354 | | ||tool: ||GenomeAnalysisTK.jar -T VariantFiltration || |
| 355 | | ||inputs: ||sample.chr.snps.vcf [[BR]]genome.chr.fa [[BR]]dbsnpXYz.chr.rod || |
| 356 | | ||outputs: ||sample.chr.snps.filtered.vcf || |
| 357 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v2#Integrating_analyses:_getting_the_best_call_set_possible |
| 358 | | |
| 359 | | || |
| 360 | | |
| 361 | | === !MergeVcfs === |
| 362 | | === !ChipVcf === |
| 363 | | Produce vcf for the chips |
| 364 | | |
| 365 | | === !VariantEval === |
| 366 | | Create summary information on the variations called for evaluation. |
| 367 | | Run per sample.snps.filtered.vcf against chip. |
| 368 | | |
| 369 | | ||tool: ||GenomeAnalysisTK.jar -T VariantEval || |
| 370 | | ||inputs: ||sample.snps.vcf [[BR]]sample.chip.vcf [[BR]]genome.chr.fa [[BR]]dbsnpXYz.chr.rod|| |
| 371 | | ||outputs: ||sample.snps.eval || |
| 372 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/VariantEval || |
| 373 | | |
| 374 | | |
| 375 | | Discussion: |
| 376 | | > Do we call SNPs based on the filtered indels or the raw indels? |
| 377 | | > Should we realign AGAIN after merge of lanes? |
| 378 | | > BAQ? |
| 379 | | > MINDEL/PINDEL? |