Changes between Version 58 and Version 59 of SnpCallingPipeline
- Timestamp:
- Dec 16, 2010 3:51:22 PM (14 years ago)
Legend:
- Unmodified
- Added
- Removed
- Modified
-
SnpCallingPipeline
v58 v59 8 8 * Workflow 3: SnpCallingPipeline/VariantCalling 9 9 10 ''To install and test this pipeline, you could follow this instruction:''''' '''[wiki:SNPPipelineTestManual] 11 10 12 == Schematic Overview == 11 13 This simplified overview this schema hides intermediate sort and indexing steps and only shows data inputs/outputs first time they occur. … … 15 17 digraph g { 16 18 17 size="10,10" node [shape=box,style=filled,color=white] "dbsnp" "reference.fasta" "realign.intervals" "indelcalls.vcf" "chr[1-24] .fasta" "flowcell_lane.1.fq.gz" "flowcell_lane.2.fq.gz" "flowcell_lane.aligned.bam" "flowcell_lane2.aligned.bam" "flowcell_lane3.aligned.bam" "sample.aligned.bam" "sample QC reports" "sample_chr[1-24].vcf"19 size="10,10" node [shape=box,style=filled,color=white] "dbsnp" "reference.fasta" "realign.intervals" "indelcalls.vcf" "chr[1-24] .fasta" "flowcell_lane.1.fq.gz" "flowcell_lane.2.fq.gz" "flowcell_lane.aligned.bam" "flowcell_lane2.aligned.bam" "flowcell_lane3.aligned.bam" "sample.aligned.bam" "sample QC reports" "sample_chr[1-24] .vcf" 18 20 19 21 node [shape=ellipse,color=yellow] … … 22 24 style=filled; color=lightgrey; 23 25 24 "reference.fasta" -> RealignerTargetCreator -> "realign.intervals" "indelcalls.vcf"-> RealignerTargetCreator "reference.fasta"->Split->"chr[1-24] .fasta" dbsnp -> RealignerTargetCreatorlabel = "Per genome (1)";26 "reference.fasta" -> RealignerTargetCreator -> "realign.intervals" "indelcalls.vcf"-> RealignerTargetCreator "reference.fasta"->Split->"chr[1-24] .fasta" dbsnp -> RealignerTargetCreator label = "Per genome (1)"; 25 27 26 28 } 27 29 28 30 subgraph cluster_1 { 29 style=filled; color=lightgrey; "flowcell_lane.1.fq.gz" -> align1 -> alignPE "chr[1-24] .fasta" -> align1 "chr[1-24] .fasta" -> align2 "chr[1-24] .fasta" -> alignPE "flowcell_lane.2.fq.gz" -> align2 -> alignPE -> MarkDuplicates -> "IndelRealigner & \n FixMateInformation (knownsOnly)" ->"Quality Recalibration"->"flowcell_lane.aligned.bam" "realign.intervals" -> "IndelRealigner & \n FixMateInformation(knownsOnly)" label = "Per Lane (750*3=2250) ";31 style=filled; color=lightgrey; "flowcell_lane.1.fq.gz" -> align1 -> alignPE "chr[1-24] .fasta" -> align1 "chr[1-24] .fasta" -> align2 "chr[1-24] .fasta" -> alignPE "flowcell_lane.2.fq.gz" -> align2 -> alignPE -> MarkDuplicates -> "IndelRealigner & \n FixMateInformation (knownsOnly)" ->"Quality Recalibration"->"flowcell_lane.aligned.bam" "realign.intervals" -> "IndelRealigner & \n FixMateInformation (knownsOnly)" label = "Per Lane (750*3=2250) "; 30 32 } 31 33 32 34 subgraph cluster_2 { 33 style=filled; color=lightgrey; "flowcell_lane.aligned.bam" -> Merge -> "sample.aligned.bam" -> "IndelRealigner & FixMateInformation " "flowcell_lane2.aligned.bam" -> Merge "flowcell_lane3.aligned.bam" -> Merge "IndelRealigner & FixMateInformation " -> IndelGenotyperV2 -> FilterSingleCalls -> UnifiedGenotyper -> Filtration -> VariantEval-> "sample QC reports"35 style=filled; color=lightgrey; "flowcell_lane.aligned.bam" -> Merge -> "sample.aligned.bam" -> "IndelRealigner & FixMateInformation " "flowcell_lane2.aligned.bam" -> Merge "flowcell_lane3.aligned.bam" -> Merge "IndelRealigner & FixMateInformation " -> IndelGenotyperV2 -> FilterSingleCalls -> UnifiedGenotyper -> Filtration -> VariantEval -> "sample QC reports" 34 36 35 37 Filtration -> "sample_chr[1-24].vcf" … … 42 44 style=filled; color=lightgrey; 43 45 44 "sample.aligned.bam" -> "UnifiedGenotype (without realign)"->"QC against arrays and BGI"46 "sample.aligned.bam" -> "UnifiedGenotype (without realign)"->"QC against arrays and BGI" 45 47 46 48 label = "QC per sample"; … … 87 89 == Optimization? == 88 90 ==== Current ==== 89 Step Cores Memory (gb) Time (hh.mm)[[BR]]BWA alignment 1 ± 6 10.05[[BR]]BWA spe 1 3.35[[BR]]Sam-Bam 1 12.3[[BR]]Sam sort 1 5.05[[BR]]Mark Duplicates 1 4 1.55[[BR]]Realignment (knowns only) 1 8 (*can be lowered) 5.2[[BR]]Fix mates 1 6 (*) 3.05[[BR]]Covariates bef. 1 2 12.35[[BR]]Recalibrate 1 4 7.3[[BR]]Sam sort 1 4.5[[BR]]Covariates aft. 1 2 11.2[[BR]]Analyze Covar. 1 4< 00.0191 Step Cores Memory (gb) Time (hh.mm)[[BR]]BWA alignment 1 ± 6 10.05[[BR]]BWA spe 1 3.35[[BR]]Sam-Bam 1 12.3[[BR]]Sam sort 1 5.05[[BR]]Mark Duplicates 1 4 1.55[[BR]]Realignment (knowns only) 1 8 (*can be lowered) 5.2[[BR]]Fix mates 1 6 (*) 3.05[[BR]]Covariates bef. 1 2 12.35[[BR]]Recalibrate 1 4 7.3[[BR]]Sam sort 1 4.5[[BR]]Covariates aft. 1 2 11.2[[BR]]Analyze Covar. 1 4 < 00.01 90 92 91 93 ==== Disk ====