diff --git a/config/cluster/slurm.json b/config/cluster/slurm.json index 0627f77..ead9ed5 100644 --- a/config/cluster/slurm.json +++ b/config/cluster/slurm.json @@ -290,6 +290,11 @@ "time" : "1-00:00:00", "threads": "24" }, + "clairs_tumor_only": { + "mem": "64G", + "time" : "1-00:00:00", + "threads": "24" + }, "somatic_purple_maf": { "threads": "4", "mem": "32G", diff --git a/config/cluster/uge.json b/config/cluster/uge.json index e8e4a6e..6d0937e 100644 --- a/config/cluster/uge.json +++ b/config/cluster/uge.json @@ -281,6 +281,11 @@ "partition": "", "threads": "8" }, + "clairs_tumor_only": { + "mem": "4G", + "partition": "", + "threads": "8" + }, "inbreeding": { "mem": "4G", "partition": "", diff --git a/config/containers.json b/config/containers.json index e57119f..19c7fae 100644 --- a/config/containers.json +++ b/config/containers.json @@ -10,6 +10,7 @@ "base": "docker://skchronicles/ccbr_wes_base:v0.1.0", "deepvariant_gpu": "docker://google/deepvariant:1.5.0-gpu", "deepvariant": "docker://google/deepvariant:1.5.0", + "clairs-to": "docker://hkubal/clairs-to:v0.2.0", "glnexus": "docker://ghcr.io/dnanexus-rnd/glnexus:v1.4.1", "open_cravat": "docker://skchronicles/ncbr_opencravat:v0.1.0", "octopus": "docker://skchronicles/ncbr_octopus:v0.2.0", @@ -17,4 +18,4 @@ "sequenza": "docker://sequenza/sequenza:3.0.0", "vcf2maf": "docker://skchronicles/ncbr_vcf2maf:v0.1.0" } -} +} \ No newline at end of file diff --git a/workflow/rules/somatic.smk b/workflow/rules/somatic.smk index a18c153..1436334 100644 --- a/workflow/rules/somatic.smk +++ b/workflow/rules/somatic.smk @@ -690,6 +690,62 @@ rule hmftools_sage: """ +rule clairs_tumor_only: + """Data-processing step to call somatic variants in tumor-only samples using + ClairS. ClairS is a deep-learning based variant caller that uses an ensembl + of two neural networks to call somatic variants. ClairS-TO is unique in that + it can call somatic variants without a matched normal. More information about + ClairS-TO can be found here: https://github.com/HKU-BAL/ClairS-TO + @Input: + Realigned, recalibrated BAM file (scatter-per-tumor-sample) + @Output: + Per sample somatic variants in VCF format + """ + input: + tumor = join(workpath, "BAM", "{name}.recal.bam"), + output: + snps = join(workpath, "clairs", "somatic", "{name}", "snv.vcf.gz"), + indels = join(workpath, "clairs", "somatic", "{name}", "indel.vcf.gz"), + tmp = join(workpath, "clairs", "somatic", "{name}", "clairs_snps_indels.vcf"), + vcf = join(workpath, "clairs", "somatic", "{name}.clairs.vcf"), + params: + rname = 'clairs_to', + tumor = '{name}', + genome = config['references']['GENOME'], + outdir = join(workpath, "clairs", "somatic", "{name}"), + threads: + int(allocated("threads", "clairs_tumor_only", cluster)), + container: config['images']['clairs-to'] + envmodules: config['tools']['rlang'] + shell: """ + # Call somatic variants with ClairS-TO, + # run in isolated sample directory to + # collisions in file names + /opt/bin/run_clairs_to \\ + --tumor_bam_fn {input.tumor} \\ + --ref_fn {params.genome} \\ + --threads {threads} \\ + --platform ilmn \\ + --output_dir {params.outdir} \\ + --conda_prefix /opt/micromamba/envs/clairs-to + + # Concatenate SNPs and Indels + bcftools concat \\ + -a \\ + -O v \\ + -o {output.tmp} \\ + {output.snps} \\ + {output.indels} + + # Filter for PASS variants + bcftools view \\ + -f 'PASS' \\ + -O v \\ + -o {output.vcf} \\ + {output.tmp} + """ + + rule muse: """Data-processing step to call somatic mutations with MuSE. This tool is unique in accounting for tumor heterogeneity using a sample-specific error