Skip to content

Commit

Permalink
fix: improve accuracy of preliminary score estimation
Browse files Browse the repository at this point in the history
- Remove fragment_min/max_mz parameters
- Adjust ppm tolerance during preliminary search to account for charge
  • Loading branch information
lazear committed Jul 25, 2024
1 parent 5379198 commit d1280a7
Show file tree
Hide file tree
Showing 10 changed files with 33 additions and 54 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [v0.15.0-alpha] (unreleased)
### Added
- Initial support for searching diaPASEF data
### Changed
- Don't deisotope reporter ion regions if MS2-based TMT/iTRAQ is used
- Removed `fragment_min_mz` and `fragment_max_mz` parameters. These were decreasing the accuracy of preliminary scoring estimation when attempting to annotate multiply-charged, high-m/z ions.

## [v0.14.7]
### Added
- Added columns missing from parquet output: `semi_enzymatic` and `missed_cleavages`
Expand Down
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 4 additions & 11 deletions crates/sage-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,21 +184,16 @@ impl Runner {
.sn
.then_some(self.parameters.quant.tmt_settings.level);

let (min_fragment_mz, min_deisotope_mz) = match &self.parameters.quant.tmt {
let min_deisotope_mz = match &self.parameters.quant.tmt {
Some(i) => match self.parameters.quant.tmt_settings.level {
2 => (
i.reporter_masses().first().map(|x| x * (1.0 - 20E-6)),
i.reporter_masses().last().map(|x| x * (1.0 + 20E-6)),
),
_ => (None, None),
2 => i.reporter_masses().last().map(|x| x * (1.0 + 20E-6)),
_ => None,
},
None => (None, None),
None => None,
};

let sp = SpectrumProcessor::new(
self.parameters.max_peaks,
min_fragment_mz.unwrap_or(self.parameters.database.fragment_min_mz),
self.parameters.database.fragment_max_mz,
self.parameters.deisotope,
min_deisotope_mz.unwrap_or(0.0),
);
Expand Down Expand Up @@ -268,8 +263,6 @@ impl Runner {
min_precursor_charge: self.parameters.precursor_charge.0,
max_precursor_charge: self.parameters.precursor_charge.1,
max_fragment_charge: self.parameters.max_fragment_charge,
min_fragment_mass: self.parameters.database.fragment_min_mz,
max_fragment_mass: self.parameters.database.fragment_max_mz,
chimera: self.parameters.chimera,
report_psms: self.parameters.report_psms,
wide_window: self.parameters.wide_window,
Expand Down
4 changes: 1 addition & 3 deletions crates/sage-cli/tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ fn integration() -> anyhow::Result<()> {
let spectra = sage_cloudpath::util::read_mzml("../../tests/LQSRPAAPPAPGPGQLTLR.mzML", 0, None)?;
assert_eq!(spectra.len(), 1);

let sp = SpectrumProcessor::new(100, 0.0, 1500.0, true, 0.0);
let sp = SpectrumProcessor::new(100, true, 0.0);
let processed = sp.process(spectra[0].clone());
assert!(processed.peaks.len() <= 300);

Expand All @@ -27,8 +27,6 @@ fn integration() -> anyhow::Result<()> {
min_precursor_charge: 2,
max_precursor_charge: 4,
max_fragment_charge: Some(1),
min_fragment_mass: 0.0,
max_fragment_mass: 1500.0,
chimera: false,
report_psms: 1,
wide_window: false,
Expand Down
2 changes: 1 addition & 1 deletion crates/sage-cloudpath/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "sage-cloudpath"
version = "0.14.7"
version = "0.15.0-alpha"
authors = ["Michael Lazear <[email protected]"]
edition = "2021"
rust-version = "1.62"
Expand Down
2 changes: 1 addition & 1 deletion crates/sage/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "sage-core"
version = "0.14.7"
version = "0.15.0-alpha"
authors = ["Michael Lazear <[email protected]"]
edition = "2021"
rust-version = "1.62"
Expand Down
25 changes: 11 additions & 14 deletions crates/sage/src/database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,6 @@ pub struct Builder {
pub bucket_size: Option<usize>,

pub enzyme: Option<EnzymeBuilder>,
/// Minimum fragment m/z that will be stored in the database
pub fragment_min_mz: Option<f32>,
/// Maximum fragment m/z that will be stored in the database
pub fragment_max_mz: Option<f32>,
/// Minimum peptide monoisotopic mass that will be fragmented
pub peptide_min_mass: Option<f32>,
/// Maximum peptide monoisotopic mass that will be fragmented
Expand Down Expand Up @@ -95,8 +91,6 @@ impl Builder {
let bucket_size = self.bucket_size.unwrap_or(8192).next_power_of_two();
Parameters {
bucket_size,
fragment_min_mz: self.fragment_min_mz.unwrap_or(150.0),
fragment_max_mz: self.fragment_max_mz.unwrap_or(2000.0),
peptide_min_mass: self.peptide_min_mass.unwrap_or(500.0),
peptide_max_mass: self.peptide_max_mass.unwrap_or(5000.0),
ion_kinds: self.ion_kinds.unwrap_or(vec![Kind::B, Kind::Y]),
Expand All @@ -120,8 +114,6 @@ impl Builder {
pub struct Parameters {
pub bucket_size: usize,
pub enzyme: EnzymeBuilder,
pub fragment_min_mz: f32,
pub fragment_max_mz: f32,
pub peptide_min_mass: f32,
pub peptide_max_mass: f32,
pub ion_kinds: Vec<Kind>,
Expand Down Expand Up @@ -236,8 +228,6 @@ impl Parameters {
}
};
ion_idx_filter
&& ion.monoisotopic_mass >= self.fragment_min_mz
&& ion.monoisotopic_mass <= self.fragment_max_mz
})
.map(move |(_, ion)| Theoretical {
peptide_index: PeptideIx(idx as u32),
Expand Down Expand Up @@ -426,8 +416,17 @@ pub struct IndexedQuery<'d> {

impl<'d> IndexedQuery<'d> {
/// Search for a specified `fragment_mz` within the database
pub fn page_search(&self, fragment_mz: f32) -> impl Iterator<Item = &Theoretical> {
let (fragment_lo, fragment_hi) = self.fragment_tol.bounds(fragment_mz);
pub fn page_search(&self, fragment_mz: f32, charge: u8) -> impl Iterator<Item = &Theoretical> {
let mass = fragment_mz * charge as f32;

// Account for multiplication of observed decharged mass
// - relative tolerance needs to be proportionally decreased
let tol = match self.fragment_tol {
Tolerance::Ppm(lo, hi) => Tolerance::Ppm(lo / charge as f32, hi / charge as f32),
Tolerance::Da(_, _) => self.fragment_tol,
};

let (fragment_lo, fragment_hi) = tol.bounds(mass);
let (precursor_lo, precursor_hi) = self.precursor_tol.bounds(self.precursor_mass);

// Locate the left and right page indices that contain matching fragments
Expand Down Expand Up @@ -587,8 +586,6 @@ mod test {
max_len: Some(10),
..Default::default()
},
fragment_min_mz: 100.0,
fragment_max_mz: 1000.0,
peptide_min_mass: 150.0,
peptide_max_mass: 5000.0,
ion_kinds: vec![Kind::B, Kind::Y],
Expand Down
7 changes: 3 additions & 4 deletions crates/sage/src/scoring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,6 @@ pub struct Scorer<'db> {
pub min_precursor_charge: u8,
pub max_precursor_charge: u8,
pub max_fragment_charge: Option<u8>,
pub min_fragment_mass: f32,
pub max_fragment_mass: f32,
pub chimera: bool,
pub report_psms: usize,

Expand Down Expand Up @@ -270,8 +268,7 @@ impl<'db> Scorer<'db> {

for peak in query.peaks.iter() {
for charge in 1..max_fragment_charge {
let mass = peak.mass * charge as f32;
for frag in candidates.page_search(mass) {
for frag in candidates.page_search(peak.mass, charge) {
let idx = frag.peptide_index.0 as usize - candidates.pre_idx_lo;
let sc = &mut hits.preliminary[idx];
if sc.matched == 0 {
Expand All @@ -280,6 +277,7 @@ impl<'db> Scorer<'db> {
sc.peptide = frag.peptide_index;
sc.isotope_error = isotope_error;
}

sc.matched += 1;
hits.matched_peaks += 1;
}
Expand Down Expand Up @@ -598,6 +596,7 @@ impl<'db> Scorer<'db> {
for charge in 1..max_fragment_charge {
// Experimental peaks are multipled by charge, therefore theoretical are divided
let mz = frag.monoisotopic_mass / charge as f32;

if let Some(peak) = crate::spectrum::select_most_intense_peak(
&query.peaks,
mz,
Expand Down
19 changes: 2 additions & 17 deletions crates/sage/src/spectrum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ pub struct Deisotoped {
#[derive(Debug, Clone)]
pub struct SpectrumProcessor {
pub take_top_n: usize,
pub max_fragment_mz: f32,
pub min_fragment_mz: f32,
pub min_deisotope_mz: f32,
pub deisotope: bool,
}
Expand Down Expand Up @@ -262,17 +260,9 @@ impl SpectrumProcessor {
/// * `min_fragment_mz`: Keep only fragments >= this m/z
/// * `max_fragment_mz`: Keep only fragments <= this m/z
/// * `deisotope`: Perform deisotoping & charge state deconvolution
pub fn new(
take_top_n: usize,
min_fragment_mz: f32,
max_fragment_mz: f32,
deisotope: bool,
min_deisotope_mz: f32,
) -> Self {
pub fn new(take_top_n: usize, deisotope: bool, min_deisotope_mz: f32) -> Self {
Self {
take_top_n,
min_fragment_mz,
max_fragment_mz,
min_deisotope_mz,
deisotope,
}
Expand Down Expand Up @@ -310,11 +300,7 @@ impl SpectrumProcessor {

peaks
.into_iter()
.filter(|peak| {
peak.envelope.is_none()
&& peak.mz >= self.min_fragment_mz
&& peak.mz <= self.max_fragment_mz
})
.filter(|peak| peak.envelope.is_none())
.map(|peak| {
// Convert from MH* to M
let mass = (peak.mz - PROTON) * peak.charge.unwrap_or(1) as f32;
Expand All @@ -330,7 +316,6 @@ impl SpectrumProcessor {
.mz
.iter()
.zip(spectrum.intensity.iter())
.filter(|&(mz, _)| *mz >= self.min_fragment_mz && *mz <= self.max_fragment_mz)
.map(|(mz, &intensity)| {
let mass = (mz - PROTON) * 1.0;
Peak { mass, intensity }
Expand Down
2 changes: 1 addition & 1 deletion crates/sage/tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ fn check_all_ions_visited(target_fragment_mz: f32, bucket_size: usize) {
// are returned to us by searching the database.
let query = database.query(1000.0, Tolerance::Da(-5000.0, 5000.0), fragment_tol);

for fragment in query.page_search(target_fragment_mz) {
for fragment in query.page_search(target_fragment_mz, 1) {
visited[fragment.peptide_index.0 as usize] += 1;
}

Expand Down

0 comments on commit d1280a7

Please sign in to comment.