Changes between Version 58 and Version 59 of SnpCallingPipeline


Ignore:
Timestamp:
Dec 16, 2010 3:51:22 PM (14 years ago)
Author:
Leon Mei
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • SnpCallingPipeline

    v58 v59  
    88 * Workflow 3: SnpCallingPipeline/VariantCalling
    99
     10''To install and test this pipeline, you could follow this instruction:''''' '''[wiki:SNPPipelineTestManual]
     11
    1012== Schematic Overview ==
    1113This simplified overview this schema hides intermediate sort and indexing steps and only shows data inputs/outputs first time they occur.
     
    1517digraph g {
    1618
    17   size="10,10" node [shape=box,style=filled,color=white] "dbsnp" "reference.fasta" "realign.intervals" "indelcalls.vcf" "chr[1-24]  .fasta" "flowcell_lane.1.fq.gz" "flowcell_lane.2.fq.gz" "flowcell_lane.aligned.bam" "flowcell_lane2.aligned.bam" "flowcell_lane3.aligned.bam" "sample.aligned.bam" "sample QC reports" "sample_chr[1-24]  .vcf"
     19  size="10,10" node [shape=box,style=filled,color=white] "dbsnp" "reference.fasta" "realign.intervals" "indelcalls.vcf" "chr[1-24]   .fasta" "flowcell_lane.1.fq.gz" "flowcell_lane.2.fq.gz" "flowcell_lane.aligned.bam" "flowcell_lane2.aligned.bam" "flowcell_lane3.aligned.bam" "sample.aligned.bam" "sample QC reports" "sample_chr[1-24]   .vcf"
    1820
    1921  node [shape=ellipse,color=yellow]
     
    2224    style=filled; color=lightgrey;
    2325
    24   "reference.fasta" -> RealignerTargetCreator   -> "realign.intervals" "indelcalls.vcf"-> RealignerTargetCreator    "reference.fasta"->Split->"chr[1-24]  .fasta"  dbsnp -> RealignerTargetCreator    label = "Per genome (1)";
     26  "reference.fasta" -> RealignerTargetCreator    -> "realign.intervals" "indelcalls.vcf"-> RealignerTargetCreator     "reference.fasta"->Split->"chr[1-24]   .fasta"  dbsnp -> RealignerTargetCreator     label = "Per genome (1)";
    2527
    2628}
    2729
    2830  subgraph cluster_1 {
    29     style=filled; color=lightgrey; "flowcell_lane.1.fq.gz" -> align1 -> alignPE "chr[1-24]  .fasta" -> align1 "chr[1-24]  .fasta" -> align2 "chr[1-24]  .fasta" -> alignPE "flowcell_lane.2.fq.gz" -> align2 -> alignPE -> MarkDuplicates   -> "IndelRealigner   & \n FixMateInformation   (knownsOnly)" ->"Quality Recalibration"->"flowcell_lane.aligned.bam" "realign.intervals" -> "IndelRealigner   & \n FixMateInformation   (knownsOnly)"    label = "Per Lane (750*3=2250) ";
     31    style=filled; color=lightgrey; "flowcell_lane.1.fq.gz" -> align1 -> alignPE "chr[1-24]   .fasta" -> align1 "chr[1-24]   .fasta" -> align2 "chr[1-24]   .fasta" -> alignPE "flowcell_lane.2.fq.gz" -> align2 -> alignPE -> MarkDuplicates    -> "IndelRealigner    & \n FixMateInformation    (knownsOnly)" ->"Quality Recalibration"->"flowcell_lane.aligned.bam" "realign.intervals" -> "IndelRealigner    & \n FixMateInformation    (knownsOnly)"    label = "Per Lane (750*3=2250) ";
    3032  }
    3133
    3234  subgraph cluster_2 {
    33     style=filled; color=lightgrey; "flowcell_lane.aligned.bam" -> Merge -> "sample.aligned.bam" -> "IndelRealigner   & FixMateInformation  " "flowcell_lane2.aligned.bam" -> Merge "flowcell_lane3.aligned.bam" -> Merge "IndelRealigner   & FixMateInformation  " -> IndelGenotyperV2 -> FilterSingleCalls   -> UnifiedGenotyper   -> Filtration -> VariantEval   -> "sample QC reports"
     35    style=filled; color=lightgrey; "flowcell_lane.aligned.bam" -> Merge -> "sample.aligned.bam" -> "IndelRealigner    & FixMateInformation   " "flowcell_lane2.aligned.bam" -> Merge "flowcell_lane3.aligned.bam" -> Merge "IndelRealigner    & FixMateInformation   " -> IndelGenotyperV2 -> FilterSingleCalls    -> UnifiedGenotyper    -> Filtration -> VariantEval    -> "sample QC reports"
    3436
    3537Filtration -> "sample_chr[1-24].vcf"
     
    4244    style=filled; color=lightgrey;
    4345
    44   "sample.aligned.bam" -> "UnifiedGenotype   (without realign)"->"QC against arrays and BGI"
     46  "sample.aligned.bam" -> "UnifiedGenotype    (without realign)"->"QC against arrays and BGI"
    4547
    4648  label = "QC per sample";
     
    8789== Optimization? ==
    8890==== Current ====
    89 Step    Cores    Memory (gb)    Time (hh.mm)[[BR]]BWA alignment    1    ± 6    10.05[[BR]]BWA spe    1        3.35[[BR]]Sam-Bam    1        12.3[[BR]]Sam sort    1        5.05[[BR]]Mark Duplicates    1    4    1.55[[BR]]Realignment (knowns only)    1    8 (*can be lowered)    5.2[[BR]]Fix mates    1    6 (*)    3.05[[BR]]Covariates bef.    1    2    12.35[[BR]]Recalibrate    1    4    7.3[[BR]]Sam sort    1        4.5[[BR]]Covariates aft.    1    2    11.2[[BR]]Analyze Covar.    1    4    < 00.01
     91Step    Cores    Memory (gb)    Time (hh.mm)[[BR]]BWA alignment    1    ± 6    10.05[[BR]]BWA spe    1        3.35[[BR]]Sam-Bam    1        12.3[[BR]]Sam sort    1        5.05[[BR]]Mark Duplicates    1    4    1.55[[BR]]Realignment (knowns only)    1    8 (*can be lowered)    5.2[[BR]]Fix mates    1    6 (*)    3.05[[BR]]Covariates bef.    1    2    12.35[[BR]]Recalibrate    1    4    7.3[[BR]]Sam sort    1        4.5[[BR]]Covariates aft.    1    2    11.2[[BR]]Analyze Covar.    1    4    < 00.01
    9092
    9193==== Disk ====