diff --git a/Cargo.lock b/Cargo.lock index 004815351..375ccb5b7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4435,7 +4435,9 @@ dependencies = [ "rstest_reuse", "serde", "serde_json", + "serde_with", "smallvec", + "strum", "tempfile", "test_util", "thiserror", diff --git a/crates/voicevox_core/Cargo.toml b/crates/voicevox_core/Cargo.toml index bd3456abb..5c8dd440d 100644 --- a/crates/voicevox_core/Cargo.toml +++ b/crates/voicevox_core/Cargo.toml @@ -32,9 +32,11 @@ open_jtalk.workspace = true ouroboros.workspace = true rayon.workspace = true regex.workspace = true -serde = { workspace = true, features = ["derive"] } +serde = { workspace = true, features = ["derive", "rc"] } serde_json = { workspace = true, features = ["preserve_order"] } +serde_with.workspace = true smallvec.workspace = true +strum = { workspace = true, features = ["derive"] } tempfile.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["rt"] } # FIXME: feature-gateする diff --git a/crates/voicevox_core/src/error.rs b/crates/voicevox_core/src/error.rs index 19d464d21..916964429 100644 --- a/crates/voicevox_core/src/error.rs +++ b/crates/voicevox_core/src/error.rs @@ -1,11 +1,12 @@ use crate::{ engine::{FullContextLabelError, KanaParseError}, user_dict::InvalidWordError, - StyleId, VoiceModelId, + StyleId, StyleType, VoiceModelId, }; //use engine:: use duplicate::duplicate_item; -use std::path::PathBuf; +use itertools::Itertools as _; +use std::{collections::BTreeSet, path::PathBuf}; use thiserror::Error; use uuid::Uuid; @@ -38,6 +39,7 @@ impl Error { LoadModelErrorKind::ReadZipEntry { .. } => ErrorKind::ReadZipEntry, LoadModelErrorKind::ModelAlreadyLoaded { .. } => ErrorKind::ModelAlreadyLoaded, LoadModelErrorKind::StyleAlreadyLoaded { .. } => ErrorKind::StyleAlreadyLoaded, + LoadModelErrorKind::InvalidModelFormat { .. } => ErrorKind::InvalidModelFormat, LoadModelErrorKind::InvalidModelData => ErrorKind::InvalidModelData, }, ErrorRepr::GetSupportedDevices(_) => ErrorKind::GetSupportedDevices, @@ -70,10 +72,14 @@ pub(crate) enum ErrorRepr { GetSupportedDevices(#[source] anyhow::Error), #[error( - "`{style_id}`に対するスタイルが見つかりませんでした。音声モデルが読み込まれていないか、読\ - み込みが解除されています" + "`{style_id}` ([{style_types}])に対するスタイルが見つかりませんでした。音声モデルが\ + 読み込まれていないか、読み込みが解除されています", + style_types = style_types.iter().format(", ") )] - StyleNotFound { style_id: StyleId }, + StyleNotFound { + style_id: StyleId, + style_types: &'static BTreeSet, + }, #[error( "`{model_id}`に対する音声モデルが見つかりませんでした。読み込まれていないか、読み込みが既\ @@ -117,6 +123,8 @@ pub enum ErrorKind { OpenZipFile, /// ZIP内のファイルが読めなかった。 ReadZipEntry, + /// モデルの形式が不正。 + InvalidModelFormat, /// すでに読み込まれている音声モデルを読み込もうとした。 ModelAlreadyLoaded, /// すでに読み込まれているスタイルを読み込もうとした。 @@ -165,6 +173,8 @@ pub(crate) enum LoadModelErrorKind { OpenZipFile, #[display(fmt = "`{filename}`を読み取れませんでした")] ReadZipEntry { filename: String }, + #[display(fmt = "モデルの形式が不正です")] + InvalidModelFormat, #[display(fmt = "モデル`{id}`は既に読み込まれています")] ModelAlreadyLoaded { id: VoiceModelId }, #[display(fmt = "スタイル`{id}`は既に読み込まれています")] diff --git a/crates/voicevox_core/src/infer.rs b/crates/voicevox_core/src/infer.rs index c816c9899..fc8954e7d 100644 --- a/crates/voicevox_core/src/infer.rs +++ b/crates/voicevox_core/src/infer.rs @@ -1,9 +1,9 @@ -pub(crate) mod domain; +pub(crate) mod domains; mod model_file; pub(crate) mod runtimes; -pub(crate) mod status; +pub(crate) mod session_set; -use std::{borrow::Cow, fmt::Debug}; +use std::{borrow::Cow, collections::BTreeSet, fmt::Debug}; use derive_new::new; use duplicate::duplicate_item; @@ -11,9 +11,10 @@ use enum_map::{Enum, EnumMap}; use ndarray::{Array, ArrayD, Dimension, ShapeError}; use thiserror::Error; -use crate::SupportedDevices; +use crate::{StyleType, SupportedDevices}; pub(crate) trait InferenceRuntime: 'static { + // TODO: "session"とは何なのかを定め、ドキュメントを書く。`InferenceSessionSet`も同様。 type Session: Sized + Send + 'static; type RunContext<'a>: From<&'a mut Self::Session> + PushInputTensor; @@ -32,9 +33,17 @@ pub(crate) trait InferenceRuntime: 'static { fn run(ctx: Self::RunContext<'_>) -> anyhow::Result>; } -/// ある`VoiceModel`が提供する推論操作の集合を示す。 -pub(crate) trait InferenceDomain { +/// 共に扱われるべき推論操作の集合を示す。 +pub(crate) trait InferenceDomain: Sized { type Operation: InferenceOperation; + + /// 対応する`StyleType`。 + /// + /// 複数の`InferenceDomain`に対応する`StyleType`があってもよい。 + /// + /// また、どの`InferenceDomain`にも属さない`StyleType`があってもよい。そのような`StyleType`は + /// 音声モデルのロード時に単に拒否されるべきである。 + fn style_types() -> &'static BTreeSet; } /// `InferenceDomain`の推論操作を表す列挙型。 diff --git a/crates/voicevox_core/src/infer/domains.rs b/crates/voicevox_core/src/infer/domains.rs new file mode 100644 index 000000000..687550399 --- /dev/null +++ b/crates/voicevox_core/src/infer/domains.rs @@ -0,0 +1,22 @@ +mod talk; + +pub(crate) use self::talk::{ + DecodeInput, DecodeOutput, PredictDurationInput, PredictDurationOutput, PredictIntonationInput, + PredictIntonationOutput, TalkDomain, TalkOperation, +}; + +pub(crate) struct InferenceDomainMap { + pub(crate) talk: V::Talk, +} + +pub(crate) trait InferenceDomainMapValues { + type Talk; +} + +impl InferenceDomainMapValues for (T,) { + type Talk = T; +} + +impl InferenceDomainMapValues for [A] { + type Talk = A; +} diff --git a/crates/voicevox_core/src/infer/domain.rs b/crates/voicevox_core/src/infer/domains/talk.rs similarity index 82% rename from crates/voicevox_core/src/infer/domain.rs rename to crates/voicevox_core/src/infer/domains/talk.rs index bb83886dd..e0716fa50 100644 --- a/crates/voicevox_core/src/infer/domain.rs +++ b/crates/voicevox_core/src/infer/domains/talk.rs @@ -1,22 +1,32 @@ +use std::collections::BTreeSet; + use enum_map::Enum; use macros::{InferenceInputSignature, InferenceOperation, InferenceOutputSignature}; use ndarray::{Array0, Array1, Array2}; +use once_cell::sync::Lazy; + +use crate::StyleType; -use super::{ +use super::super::{ InferenceDomain, InferenceInputSignature as _, InferenceOutputSignature as _, OutputTensor, }; -pub(crate) enum InferenceDomainImpl {} +pub(crate) enum TalkDomain {} + +impl InferenceDomain for TalkDomain { + type Operation = TalkOperation; -impl InferenceDomain for InferenceDomainImpl { - type Operation = InferenceOperationImpl; + fn style_types() -> &'static BTreeSet { + static STYLE_TYPES: Lazy> = Lazy::new(|| [StyleType::Talk].into()); + &STYLE_TYPES + } } #[derive(Clone, Copy, Enum, InferenceOperation)] #[inference_operation( - type Domain = InferenceDomainImpl; + type Domain = TalkDomain; )] -pub(crate) enum InferenceOperationImpl { +pub(crate) enum TalkOperation { #[inference_operation( type Input = PredictDurationInput; type Output = PredictDurationOutput; diff --git a/crates/voicevox_core/src/infer/session_set.rs b/crates/voicevox_core/src/infer/session_set.rs new file mode 100644 index 000000000..56d570f98 --- /dev/null +++ b/crates/voicevox_core/src/infer/session_set.rs @@ -0,0 +1,102 @@ +use std::{collections::HashMap, fmt::Display, marker::PhantomData, sync::Arc}; + +use anyhow::bail; +use enum_map::{Enum as _, EnumMap}; +use itertools::Itertools as _; + +use crate::error::ErrorRepr; + +use super::{ + model_file, InferenceDomain, InferenceInputSignature, InferenceOperation, InferenceRuntime, + InferenceSessionOptions, InferenceSignature, ParamInfo, +}; + +pub(crate) struct InferenceSessionSet( + EnumMap>>, +); + +impl InferenceSessionSet { + pub(crate) fn new( + model_bytes: &EnumMap>, + options: &EnumMap, + ) -> anyhow::Result { + let mut sessions = model_bytes + .iter() + .map(|(op, model_bytes)| { + let (expected_input_param_infos, expected_output_param_infos) = + ::PARAM_INFOS[op]; + + let (sess, actual_input_param_infos, actual_output_param_infos) = + R::new_session(|| model_file::decrypt(model_bytes), options[op])?; + + check_param_infos(expected_input_param_infos, &actual_input_param_infos)?; + check_param_infos(expected_output_param_infos, &actual_output_param_infos)?; + + Ok((op.into_usize(), std::sync::Mutex::new(sess).into())) + }) + .collect::>>()?; + + return Ok(Self(EnumMap::::from_fn(|k| { + sessions.remove(&k.into_usize()).expect("should exist") + }))); + + fn check_param_infos( + expected: &[ParamInfo], + actual: &[ParamInfo], + ) -> anyhow::Result<()> { + if !(expected.len() == actual.len() + && itertools::zip_eq(expected, actual) + .all(|(expected, actual)| expected.accepts(actual))) + { + let expected = display_param_infos(expected); + let actual = display_param_infos(actual); + bail!("expected {{{expected}}}, got {{{actual}}}") + } + Ok(()) + } + + fn display_param_infos(infos: &[ParamInfo]) -> impl Display { + infos + .iter() + .map(|ParamInfo { name, dt, ndim }| { + let brackets = match *ndim { + Some(ndim) => "[]".repeat(ndim), + None => "[]...".to_owned(), + }; + format!("{name}: {dt}{brackets}") + }) + .join(", ") + } + } +} + +impl InferenceSessionSet { + pub(crate) fn get(&self) -> InferenceSessionCell + where + I: InferenceInputSignature, + I::Signature: InferenceSignature, + { + InferenceSessionCell { + inner: self.0[I::Signature::OPERATION].clone(), + marker: PhantomData, + } + } +} + +pub(crate) struct InferenceSessionCell { + inner: Arc>, + marker: PhantomData, +} + +impl InferenceSessionCell { + pub(crate) fn run( + self, + input: I, + ) -> crate::Result<::Output> { + let inner = &mut self.inner.lock().unwrap(); + let ctx = input.make_run_context::(inner); + R::run(ctx) + .and_then(TryInto::try_into) + .map_err(|e| ErrorRepr::InferenceFailed(e).into()) + } +} diff --git a/crates/voicevox_core/src/infer/status.rs b/crates/voicevox_core/src/infer/status.rs deleted file mode 100644 index 2a575153d..000000000 --- a/crates/voicevox_core/src/infer/status.rs +++ /dev/null @@ -1,429 +0,0 @@ -use std::{ - collections::{BTreeMap, HashMap}, - fmt::Display, - marker::PhantomData, - sync::Arc, -}; - -use anyhow::bail; -use educe::Educe; -use enum_map::{Enum as _, EnumMap}; -use indexmap::IndexMap; -use itertools::{iproduct, Itertools as _}; - -use crate::{ - error::{ErrorRepr, LoadModelError, LoadModelErrorKind, LoadModelResult}, - infer::{InferenceOperation, ParamInfo}, - manifest::ModelInnerId, - metas::{self, SpeakerMeta, StyleId, StyleMeta, VoiceModelMeta}, - voice_model::{VoiceModelHeader, VoiceModelId}, - Result, -}; - -use super::{ - model_file, InferenceDomain, InferenceInputSignature, InferenceRuntime, - InferenceSessionOptions, InferenceSignature, -}; - -pub(crate) struct Status { - loaded_models: std::sync::Mutex>, - session_options: EnumMap, -} - -impl Status { - pub(crate) fn new(session_options: EnumMap) -> Self { - Self { - loaded_models: Default::default(), - session_options, - } - } - - pub(crate) fn insert_model( - &self, - model_header: &VoiceModelHeader, - model_bytes: &EnumMap>, - ) -> Result<()> { - self.loaded_models - .lock() - .unwrap() - .ensure_acceptable(model_header)?; - - let session_set = - SessionSet::new(model_bytes, &self.session_options).map_err(|source| { - LoadModelError { - path: model_header.path.clone(), - context: LoadModelErrorKind::InvalidModelData, - source: Some(source), - } - })?; - - self.loaded_models - .lock() - .unwrap() - .insert(model_header, session_set)?; - Ok(()) - } - - pub(crate) fn unload_model(&self, voice_model_id: &VoiceModelId) -> Result<()> { - self.loaded_models.lock().unwrap().remove(voice_model_id) - } - - pub(crate) fn metas(&self) -> VoiceModelMeta { - self.loaded_models.lock().unwrap().metas() - } - - pub(crate) fn ids_for(&self, style_id: StyleId) -> Result<(VoiceModelId, ModelInnerId)> { - self.loaded_models.lock().unwrap().ids_for(style_id) - } - - pub(crate) fn is_loaded_model(&self, voice_model_id: &VoiceModelId) -> bool { - self.loaded_models - .lock() - .unwrap() - .contains_voice_model(voice_model_id) - } - - pub(crate) fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool { - self.loaded_models.lock().unwrap().contains_style(style_id) - } - - pub(crate) fn validate_speaker_id(&self, style_id: StyleId) -> bool { - self.is_loaded_model_by_style_id(style_id) - } - - /// 推論を実行する。 - /// - /// # Performance - /// - /// CPU/GPU-boundな操作であるため、非同期ランタイム上では直接実行されるべきではない。 - /// - /// # Panics - /// - /// `self`が`model_id`を含んでいないとき、パニックする。 - pub(crate) fn run_session( - &self, - model_id: &VoiceModelId, - input: I, - ) -> Result<::Output> - where - I: InferenceInputSignature, - I::Signature: InferenceSignature, - { - let sess = self.loaded_models.lock().unwrap().get(model_id); - sess.run(input) - } -} - -/// 読み込んだモデルの`Session`とそのメタ情報を保有し、追加/削除/取得の操作を提供する。 -/// -/// この構造体のメソッドは、すべて一瞬で完了すべきである。 -#[derive(Educe)] -#[educe(Default(bound = "R: InferenceRuntime, D: InferenceDomain"))] -struct LoadedModels( - IndexMap>, -); - -struct LoadedModel { - model_inner_ids: BTreeMap, - metas: VoiceModelMeta, - session_set: SessionSet, -} - -impl LoadedModels { - fn metas(&self) -> VoiceModelMeta { - metas::merge(self.0.values().flat_map(|LoadedModel { metas, .. }| metas)) - } - - fn ids_for(&self, style_id: StyleId) -> Result<(VoiceModelId, ModelInnerId)> { - let ( - model_id, - LoadedModel { - model_inner_ids, .. - }, - ) = self - .0 - .iter() - .find(|(_, LoadedModel { metas, .. })| { - metas - .iter() - .flat_map(SpeakerMeta::styles) - .any(|style| *style.id() == style_id) - }) - .ok_or(ErrorRepr::StyleNotFound { style_id })?; - - let model_inner_id = *model_inner_ids - .get(&style_id) - .expect("`model_inner_ids` should contains all of the style IDs in the model"); - - Ok((model_id.clone(), model_inner_id)) - } - - /// # Panics - /// - /// `self`が`model_id`を含んでいないとき、パニックする。 - fn get(&self, model_id: &VoiceModelId) -> SessionCell - where - I: InferenceInputSignature, - I::Signature: InferenceSignature, - { - self.0[model_id].session_set.get() - } - - fn contains_voice_model(&self, model_id: &VoiceModelId) -> bool { - self.0.contains_key(model_id) - } - - fn contains_style(&self, style_id: StyleId) -> bool { - self.styles().any(|style| *style.id() == style_id) - } - - /// 音声モデルを受け入れ可能かをチェックする。 - /// - /// # Errors - /// - /// 次の場合にエラーを返す。 - /// - /// - 音声モデルIDかスタイルIDが`model_header`と重複するとき - fn ensure_acceptable(&self, model_header: &VoiceModelHeader) -> LoadModelResult<()> { - let error = |context| LoadModelError { - path: model_header.path.clone(), - context, - source: None, - }; - - let loaded = self.speakers(); - let external = model_header.metas.iter(); - for (loaded, external) in iproduct!(loaded, external) { - if loaded.speaker_uuid() == external.speaker_uuid() { - loaded.warn_diff_except_styles(external); - } - } - - let loaded = self.styles(); - let external = model_header - .metas - .iter() - .flat_map(|speaker| speaker.styles()); - if self.0.contains_key(&model_header.id) { - return Err(error(LoadModelErrorKind::ModelAlreadyLoaded { - id: model_header.id.clone(), - })); - } - if let Some((style, _)) = - iproduct!(loaded, external).find(|(loaded, external)| loaded.id() == external.id()) - { - return Err(error(LoadModelErrorKind::StyleAlreadyLoaded { - id: *style.id(), - })); - } - Ok(()) - } - - fn insert( - &mut self, - model_header: &VoiceModelHeader, - session_set: SessionSet, - ) -> Result<()> { - self.ensure_acceptable(model_header)?; - - let prev = self.0.insert( - model_header.id.clone(), - LoadedModel { - model_inner_ids: model_header.model_inner_ids(), - metas: model_header.metas.clone(), - session_set, - }, - ); - assert!(prev.is_none()); - Ok(()) - } - - fn remove(&mut self, model_id: &VoiceModelId) -> Result<()> { - if self.0.remove(model_id).is_none() { - return Err(ErrorRepr::ModelNotFound { - model_id: model_id.clone(), - } - .into()); - } - Ok(()) - } - - fn speakers(&self) -> impl Iterator + Clone { - self.0.values().flat_map(|LoadedModel { metas, .. }| metas) - } - - fn styles(&self) -> impl Iterator { - self.speakers().flat_map(|speaker| speaker.styles()) - } -} - -struct SessionSet( - EnumMap>>, -); - -impl SessionSet { - fn new( - model_bytes: &EnumMap>, - options: &EnumMap, - ) -> anyhow::Result { - let mut sessions = model_bytes - .iter() - .map(|(op, model_bytes)| { - let (expected_input_param_infos, expected_output_param_infos) = - ::PARAM_INFOS[op]; - - let (sess, actual_input_param_infos, actual_output_param_infos) = - R::new_session(|| model_file::decrypt(model_bytes), options[op])?; - - check_param_infos(expected_input_param_infos, &actual_input_param_infos)?; - check_param_infos(expected_output_param_infos, &actual_output_param_infos)?; - - Ok((op.into_usize(), std::sync::Mutex::new(sess).into())) - }) - .collect::>>()?; - - return Ok(Self(EnumMap::::from_fn(|k| { - sessions.remove(&k.into_usize()).expect("should exist") - }))); - - fn check_param_infos( - expected: &[ParamInfo], - actual: &[ParamInfo], - ) -> anyhow::Result<()> { - if !(expected.len() == actual.len() - && itertools::zip_eq(expected, actual) - .all(|(expected, actual)| expected.accepts(actual))) - { - let expected = display_param_infos(expected); - let actual = display_param_infos(actual); - bail!("expected {{{expected}}}, got {{{actual}}}") - } - Ok(()) - } - - fn display_param_infos(infos: &[ParamInfo]) -> impl Display { - infos - .iter() - .map(|ParamInfo { name, dt, ndim }| { - let brackets = match *ndim { - Some(ndim) => "[]".repeat(ndim), - None => "[]...".to_owned(), - }; - format!("{name}: {dt}{brackets}") - }) - .join(", ") - } - } -} - -impl SessionSet { - fn get(&self) -> SessionCell - where - I: InferenceInputSignature, - I::Signature: InferenceSignature, - { - SessionCell { - inner: self.0[I::Signature::OPERATION].clone(), - marker: PhantomData, - } - } -} - -struct SessionCell { - inner: Arc>, - marker: PhantomData, -} - -impl SessionCell { - fn run(self, input: I) -> crate::Result<::Output> { - let inner = &mut self.inner.lock().unwrap(); - let ctx = input.make_run_context::(inner); - R::run(ctx) - .and_then(TryInto::try_into) - .map_err(|e| ErrorRepr::InferenceFailed(e).into()) - } -} - -#[cfg(test)] -mod tests { - use enum_map::enum_map; - use pretty_assertions::assert_eq; - use rstest::rstest; - - use crate::{ - infer::domain::{InferenceDomainImpl, InferenceOperationImpl}, - macros::tests::assert_debug_fmt_eq, - synthesizer::InferenceRuntimeImpl, - test_util::open_default_vvm_file, - }; - - use super::{super::InferenceSessionOptions, Status}; - - #[rstest] - #[case(true, 0)] - #[case(true, 1)] - #[case(true, 8)] - #[case(false, 2)] - #[case(false, 4)] - #[case(false, 8)] - #[case(false, 0)] - fn status_new_works(#[case] use_gpu: bool, #[case] cpu_num_threads: u16) { - let light_session_options = InferenceSessionOptions::new(cpu_num_threads, false); - let heavy_session_options = InferenceSessionOptions::new(cpu_num_threads, use_gpu); - let session_options = enum_map! { - InferenceOperationImpl::PredictDuration - | InferenceOperationImpl::PredictIntonation => light_session_options, - InferenceOperationImpl::Decode => heavy_session_options, - }; - let status = Status::::new(session_options); - - assert_eq!( - light_session_options, - status.session_options[InferenceOperationImpl::PredictDuration], - ); - assert_eq!( - light_session_options, - status.session_options[InferenceOperationImpl::PredictIntonation], - ); - assert_eq!( - heavy_session_options, - status.session_options[InferenceOperationImpl::Decode], - ); - - assert!(status.loaded_models.lock().unwrap().0.is_empty()); - } - - #[rstest] - #[tokio::test] - async fn status_load_model_works() { - let status = Status::::new( - enum_map!(_ => InferenceSessionOptions::new(0, false)), - ); - let model = &open_default_vvm_file().await; - let model_bytes = &model.read_inference_models().await.unwrap(); - let result = status.insert_model(model.header(), model_bytes); - assert_debug_fmt_eq!(Ok(()), result); - assert_eq!(1, status.loaded_models.lock().unwrap().0.len()); - } - - #[rstest] - #[tokio::test] - async fn status_is_model_loaded_works() { - let status = Status::::new( - enum_map!(_ => InferenceSessionOptions::new(0, false)), - ); - let vvm = open_default_vvm_file().await; - let model_header = vvm.header(); - let model_bytes = &vvm.read_inference_models().await.unwrap(); - assert!( - !status.is_loaded_model(&model_header.id), - "model should not be loaded" - ); - let result = status.insert_model(model_header, model_bytes); - assert_debug_fmt_eq!(Ok(()), result); - assert!( - status.is_loaded_model(&model_header.id), - "model should be loaded", - ); - } -} diff --git a/crates/voicevox_core/src/lib.rs b/crates/voicevox_core/src/lib.rs index 29efba8f1..0f34c5962 100644 --- a/crates/voicevox_core/src/lib.rs +++ b/crates/voicevox_core/src/lib.rs @@ -9,6 +9,7 @@ mod macros; mod manifest; mod metas; mod result; +mod status; mod synthesizer; mod task; mod text_analyzer; @@ -33,7 +34,8 @@ pub use self::{ engine::{AccentPhraseModel, AudioQueryModel, FullcontextExtractor}, error::{Error, ErrorKind}, metas::{ - RawStyleId, RawStyleVersion, SpeakerMeta, StyleId, StyleMeta, StyleVersion, VoiceModelMeta, + RawStyleId, RawStyleVersion, SpeakerMeta, StyleId, StyleMeta, StyleType, StyleVersion, + VoiceModelMeta, }, result::Result, synthesizer::{AccelerationMode, InitializeOptions, SynthesisOptions, TtsOptions}, diff --git a/crates/voicevox_core/src/manifest.rs b/crates/voicevox_core/src/manifest.rs index 650e151d5..3b17ae3f1 100644 --- a/crates/voicevox_core/src/manifest.rs +++ b/crates/voicevox_core/src/manifest.rs @@ -1,8 +1,10 @@ -use std::{collections::BTreeMap, fmt::Display}; +use std::{collections::BTreeMap, fmt::Display, sync::Arc}; use derive_getters::Getters; +use derive_more::Deref; use derive_new::new; use serde::{Deserialize, Serialize}; +use serde_with::{serde_as, DisplayFromStr}; use crate::StyleId; @@ -41,9 +43,27 @@ pub struct Manifest { #[allow(dead_code)] manifest_version: ManifestVersion, metas_filename: String, - decode_filename: String, - predict_duration_filename: String, - predict_intonation_filename: String, + #[serde(flatten)] + domains: ManifestDomains, +} + +#[derive(Deserialize, Clone)] +pub(crate) struct ManifestDomains { + pub(crate) talk: Option, +} + +#[derive(Deserialize, Clone)] +pub(crate) struct TalkManifest { + pub(crate) predict_duration_filename: String, + pub(crate) predict_intonation_filename: String, + pub(crate) decode_filename: String, #[serde(default)] - style_id_to_model_inner_id: BTreeMap, + pub(crate) style_id_to_model_inner_id: StyleIdToModelInnerId, } + +#[serde_as] +#[derive(Default, Clone, Deref, Deserialize)] +#[deref(forward)] +pub(crate) struct StyleIdToModelInnerId( + #[serde_as(as = "Arc>")] Arc>, +); diff --git a/crates/voicevox_core/src/metas.rs b/crates/voicevox_core/src/metas.rs index 78314d52a..b9f274c48 100644 --- a/crates/voicevox_core/src/metas.rs +++ b/crates/voicevox_core/src/metas.rs @@ -49,7 +49,20 @@ pub type RawStyleId = u32; /// /// [**話者**(_speaker_)]: SpeakerMeta /// [**スタイル**(_style_)]: StyleMeta -#[derive(PartialEq, Eq, Clone, Copy, Ord, Hash, PartialOrd, Deserialize, Serialize, new, Debug)] +#[derive( + PartialEq, + Eq, + Clone, + Copy, + Ord, + Hash, + PartialOrd, + derive_more::FromStr, + Deserialize, + Serialize, + new, + Debug, +)] pub struct StyleId(RawStyleId); impl StyleId { @@ -154,12 +167,47 @@ pub struct StyleMeta { id: StyleId, /// スタイル名。 name: String, + /// スタイルに対応するモデルの種類。 + #[serde(default)] + r#type: StyleType, /// スタイルの順番。 /// /// [`SpeakerMeta::styles`]は、この値に対して昇順に並んでいるべきである。 order: Option, } +/// **スタイル**(_style_)に対応するモデルの種類。 +#[derive( + Default, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, + Debug, + strum::Display, + Deserialize, + Serialize, +)] +#[strum(serialize_all = "snake_case")] +#[serde(rename_all = "snake_case")] +pub enum StyleType { + /// 音声合成クエリの作成と音声合成が可能。 + #[default] + Talk, + + /// 歌唱音声合成用のクエリの作成が可能。 + SingingTeacher, + + /// 歌唱音声合成が可能。 + FrameDecode, + + /// 歌唱音声合成用のクエリの作成と歌唱音声合成が可能。 + Sing, +} + #[cfg(test)] mod tests { use once_cell::sync::Lazy; @@ -175,6 +223,7 @@ mod tests { { "id": 3, "name": "B_1", + "type": "talk", "order": 0 } ], @@ -188,6 +237,7 @@ mod tests { { "id": 2, "name": "A_3", + "type": "talk", "order": 2 } ], @@ -201,11 +251,13 @@ mod tests { { "id": 1, "name": "A_1", + "type": "talk", "order": 0 }, { "id": 0, "name": "A_2", + "type": "talk", "order": 1 } ], @@ -224,16 +276,19 @@ mod tests { { "id": 1, "name": "A_1", + "type": "talk", "order": 0 }, { "id": 0, "name": "A_2", + "type": "talk", "order": 1 }, { "id": 2, "name": "A_3", + "type": "talk", "order": 2 } ], @@ -247,6 +302,7 @@ mod tests { { "id": 3, "name": "B_1", + "type": "talk", "order": 0 } ], diff --git a/crates/voicevox_core/src/status.rs b/crates/voicevox_core/src/status.rs new file mode 100644 index 000000000..40eef430e --- /dev/null +++ b/crates/voicevox_core/src/status.rs @@ -0,0 +1,429 @@ +use std::any; + +use duplicate::{duplicate, duplicate_item}; +use educe::Educe; +use enum_map::EnumMap; +use indexmap::IndexMap; +use itertools::iproduct; + +use crate::{ + error::{ErrorRepr, LoadModelError, LoadModelErrorKind, LoadModelResult}, + infer::{ + domains::{InferenceDomainMap, TalkDomain, TalkOperation}, + session_set::{InferenceSessionCell, InferenceSessionSet}, + InferenceDomain, InferenceInputSignature, InferenceRuntime, InferenceSessionOptions, + InferenceSignature, + }, + manifest::{ModelInnerId, StyleIdToModelInnerId}, + metas::{self, SpeakerMeta, StyleId, StyleMeta, VoiceModelMeta}, + voice_model::{ModelBytesWithInnerIdsByDomain, VoiceModelHeader, VoiceModelId}, + Result, +}; + +pub(crate) struct Status { + loaded_models: std::sync::Mutex>, + session_options: InferenceDomainMap, +} + +impl Status { + pub(crate) fn new(session_options: InferenceDomainMap) -> Self { + Self { + loaded_models: Default::default(), + session_options, + } + } + + pub(crate) fn insert_model( + &self, + model_header: &VoiceModelHeader, + model_contents: &InferenceDomainMap, + ) -> Result<()> { + self.loaded_models + .lock() + .unwrap() + .ensure_acceptable(model_header)?; + + let session_sets_with_inner_ids = model_contents + .create_session_sets(&self.session_options) + .map_err(|source| LoadModelError { + path: model_header.path.clone(), + context: LoadModelErrorKind::InvalidModelData, + source: Some(source), + })?; + + self.loaded_models + .lock() + .unwrap() + .insert(model_header, session_sets_with_inner_ids)?; + Ok(()) + } + + pub(crate) fn unload_model(&self, voice_model_id: &VoiceModelId) -> Result<()> { + self.loaded_models.lock().unwrap().remove(voice_model_id) + } + + pub(crate) fn metas(&self) -> VoiceModelMeta { + self.loaded_models.lock().unwrap().metas() + } + + /// あるスタイルに対応する`VoiceModelId`と`ModelInnerId`の組を返す。 + /// + /// `StyleId` → `ModelInnerId`のマッピングが存在しない場合は、`ModelInnerId`としては + /// `style_id`と同じ値を返す。 + pub(crate) fn ids_for( + &self, + style_id: StyleId, + ) -> Result<(VoiceModelId, ModelInnerId)> { + self.loaded_models.lock().unwrap().ids_for::(style_id) + } + + pub(crate) fn is_loaded_model(&self, voice_model_id: &VoiceModelId) -> bool { + self.loaded_models + .lock() + .unwrap() + .contains_voice_model(voice_model_id) + } + + // FIXME: この関数はcompatible_engineとテストでのみ使われるが、テストのために`StyleType`を + // 引数に含めるようにする + pub(crate) fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool { + self.loaded_models.lock().unwrap().contains_style(style_id) + } + + /// 推論を実行する。 + /// + /// # Performance + /// + /// CPU/GPU-boundな操作であるため、非同期ランタイム上では直接実行されるべきではない。 + /// + /// # Panics + /// + /// `self`が`model_id`を含んでいないとき、パニックする。 + pub(crate) fn run_session( + &self, + model_id: &VoiceModelId, + input: I, + ) -> Result<::Output> + where + I: InferenceInputSignature, + ::Domain: InferenceDomainExt, + { + let sess = self.loaded_models.lock().unwrap().get(model_id); + sess.run(input) + } +} + +/// 読み込んだモデルの`Session`とそのメタ情報を保有し、追加/削除/取得の操作を提供する。 +/// +/// この構造体のメソッドは、すべて一瞬で完了すべきである。 +#[derive(Educe)] +#[educe(Default(bound = "R: InferenceRuntime"))] +struct LoadedModels(IndexMap>); + +struct LoadedModel { + metas: VoiceModelMeta, + session_sets_with_inner_ids: InferenceDomainMap>, +} + +impl LoadedModels { + fn metas(&self) -> VoiceModelMeta { + metas::merge(self.0.values().flat_map(|LoadedModel { metas, .. }| metas)) + } + + fn ids_for( + &self, + style_id: StyleId, + ) -> Result<(VoiceModelId, ModelInnerId)> { + let ( + model_id, + LoadedModel { + session_sets_with_inner_ids, + .. + }, + ) = self + .0 + .iter() + .find(|(_, LoadedModel { metas, .. })| { + metas.iter().flat_map(SpeakerMeta::styles).any(|style| { + *style.id() == style_id && D::style_types().contains(style.r#type()) + }) + }) + .ok_or(ErrorRepr::StyleNotFound { + style_id, + style_types: D::style_types(), + })?; + + let model_inner_id = session_sets_with_inner_ids + .get::() + .as_ref() + .and_then(|(model_inner_ids, _)| model_inner_ids.get(&style_id).copied()) + .unwrap_or_else(|| ModelInnerId::new(style_id.raw_id())); + + Ok((model_id.clone(), model_inner_id)) + } + + /// # Panics + /// + /// 次の場合にパニックする。 + /// + /// - `self`が`model_id`を含んでいないとき + /// - 対応する`InferenceDomain`が欠けているとき + fn get(&self, model_id: &VoiceModelId) -> InferenceSessionCell + where + I: InferenceInputSignature, + ::Domain: InferenceDomainExt, + { + let (_, session_set) = self.0[model_id] + .session_sets_with_inner_ids + .get::<::Domain>() + .as_ref() + .unwrap_or_else(|| { + let type_name = any::type_name::<::Domain>() + .split("::") + .last() + .unwrap(); + panic!( + "missing session set for `{type_name}` (should be checked in \ + `VoiceModelHeader::new` and `ids_for`)", + ); + }); + session_set.get() + } + + fn contains_voice_model(&self, model_id: &VoiceModelId) -> bool { + self.0.contains_key(model_id) + } + + fn contains_style(&self, style_id: StyleId) -> bool { + self.styles().any(|style| *style.id() == style_id) + } + + /// 音声モデルを受け入れ可能かをチェックする。 + /// + /// # Errors + /// + /// 次の場合にエラーを返す。 + /// + /// - 現在持っている音声モデルIDかスタイルIDが`model_header`と重複するとき + /// - 必要であるはずの`InferenceDomain`のモデルデータが欠けているとき + // FIXME: コメントとテストを書く + // - https://github.com/VOICEVOX/voicevox_core/pull/761#discussion_r1589978521 + // - https://github.com/VOICEVOX/voicevox_core/pull/761#discussion_r1589976759 + fn ensure_acceptable(&self, model_header: &VoiceModelHeader) -> LoadModelResult<()> { + let error = |context| LoadModelError { + path: model_header.path.clone(), + context, + source: None, + }; + + if self.0.contains_key(&model_header.id) { + return Err(error(LoadModelErrorKind::ModelAlreadyLoaded { + id: model_header.id.clone(), + })); + } + + // FIXME: https://github.com/VOICEVOX/voicevox_core/pull/761#discussion_r1590200343 + + let loaded = self.speakers(); + let external = model_header.metas.iter(); + for (loaded, external) in iproduct!(loaded, external) { + if loaded.speaker_uuid() == external.speaker_uuid() { + loaded.warn_diff_except_styles(external); + } + } + + let loaded = self.styles(); + let external = model_header + .metas + .iter() + .flat_map(|speaker| speaker.styles()); + if let Some((style, _)) = + iproduct!(loaded, external).find(|(loaded, external)| loaded.id() == external.id()) + { + return Err(error(LoadModelErrorKind::StyleAlreadyLoaded { + id: *style.id(), + })); + } + Ok(()) + } + + fn insert( + &mut self, + model_header: &VoiceModelHeader, + session_sets_with_inner_ids: InferenceDomainMap>, + ) -> Result<()> { + self.ensure_acceptable(model_header)?; + + let prev = self.0.insert( + model_header.id.clone(), + LoadedModel { + metas: model_header.metas.clone(), + session_sets_with_inner_ids, + }, + ); + assert!(prev.is_none()); + Ok(()) + } + + fn remove(&mut self, model_id: &VoiceModelId) -> Result<()> { + if self.0.remove(model_id).is_none() { + return Err(ErrorRepr::ModelNotFound { + model_id: model_id.clone(), + } + .into()); + } + Ok(()) + } + + fn speakers(&self) -> impl Iterator + Clone { + self.0.values().flat_map(|LoadedModel { metas, .. }| metas) + } + + fn styles(&self) -> impl Iterator { + self.speakers().flat_map(|speaker| speaker.styles()) + } +} + +pub(crate) trait InferenceDomainExt: InferenceDomain { + fn visit( + map: &InferenceDomainMap>, + ) -> Option<&(StyleIdToModelInnerId, InferenceSessionSet)>; +} + +#[duplicate_item( + T field; + [ TalkDomain ] [ talk ]; +)] +impl InferenceDomainExt for T { + fn visit( + map: &InferenceDomainMap>, + ) -> Option<&(StyleIdToModelInnerId, InferenceSessionSet)> { + map.field.as_ref() + } +} + +impl InferenceDomainMap> { + fn get( + &self, + ) -> Option<&(StyleIdToModelInnerId, InferenceSessionSet)> { + D::visit(self) + } +} + +impl InferenceDomainMap { + fn create_session_sets( + &self, + session_options: &InferenceDomainMap, + ) -> anyhow::Result>> { + duplicate! { + [ + field; + [ talk ]; + ] + let field = self + .field + .as_ref() + .map(|(model_inner_ids, model_bytes)| { + let session_set = InferenceSessionSet::new(model_bytes, &session_options.field)?; + Ok::<_, anyhow::Error>((model_inner_ids.clone(), session_set)) + }) + .transpose()?; + } + + Ok(InferenceDomainMap { talk }) + } +} + +type SessionOptionsByDomain = (EnumMap,); + +type SessionSetsWithInnerIdsByDomain = + (Option<(StyleIdToModelInnerId, InferenceSessionSet)>,); + +#[cfg(test)] +mod tests { + use enum_map::enum_map; + use pretty_assertions::assert_eq; + use rstest::rstest; + + use crate::{ + infer::{ + domains::{InferenceDomainMap, TalkOperation}, + InferenceSessionOptions, + }, + macros::tests::assert_debug_fmt_eq, + synthesizer::InferenceRuntimeImpl, + test_util::open_default_vvm_file, + }; + + use super::Status; + + #[rstest] + #[case(true, 0)] + #[case(true, 1)] + #[case(true, 8)] + #[case(false, 2)] + #[case(false, 4)] + #[case(false, 8)] + #[case(false, 0)] + fn status_new_works(#[case] use_gpu: bool, #[case] cpu_num_threads: u16) { + let light_session_options = InferenceSessionOptions::new(cpu_num_threads, false); + let heavy_session_options = InferenceSessionOptions::new(cpu_num_threads, use_gpu); + let session_options = InferenceDomainMap { + talk: enum_map! { + TalkOperation::PredictDuration + | TalkOperation::PredictIntonation => light_session_options, + TalkOperation::Decode => heavy_session_options, + }, + }; + let status = Status::::new(session_options); + + assert_eq!( + light_session_options, + status.session_options.talk[TalkOperation::PredictDuration], + ); + assert_eq!( + light_session_options, + status.session_options.talk[TalkOperation::PredictIntonation], + ); + assert_eq!( + heavy_session_options, + status.session_options.talk[TalkOperation::Decode], + ); + + assert!(status.loaded_models.lock().unwrap().0.is_empty()); + } + + #[rstest] + #[tokio::test] + async fn status_load_model_works() { + let status = Status::::new(InferenceDomainMap { + talk: enum_map!(_ => InferenceSessionOptions::new(0, false)), + }); + let model = &open_default_vvm_file().await; + let model_contents = &model.read_inference_models().await.unwrap(); + let result = status.insert_model(model.header(), model_contents); + assert_debug_fmt_eq!(Ok(()), result); + assert_eq!(1, status.loaded_models.lock().unwrap().0.len()); + } + + #[rstest] + #[tokio::test] + async fn status_is_model_loaded_works() { + let status = Status::::new(InferenceDomainMap { + talk: enum_map!(_ => InferenceSessionOptions::new(0, false)), + }); + let vvm = open_default_vvm_file().await; + let model_header = vvm.header(); + let model_contents = &vvm.read_inference_models().await.unwrap(); + assert!( + !status.is_loaded_model(&model_header.id), + "model should not be loaded" + ); + let result = status.insert_model(model_header, model_contents); + assert_debug_fmt_eq!(Ok(()), result); + assert!( + status.is_loaded_model(&model_header.id), + "model should be loaded", + ); + } +} diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs index b90139434..d90cdce3c 100644 --- a/crates/voicevox_core/src/synthesizer.rs +++ b/crates/voicevox_core/src/synthesizer.rs @@ -83,14 +83,14 @@ pub(crate) mod blocking { engine::{create_kana, mora_to_text, MoraModel, OjtPhoneme}, error::ErrorRepr, infer::{ - domain::{ - DecodeInput, DecodeOutput, InferenceDomainImpl, InferenceOperationImpl, - PredictDurationInput, PredictDurationOutput, PredictIntonationInput, - PredictIntonationOutput, + domains::{ + DecodeInput, DecodeOutput, InferenceDomainMap, PredictDurationInput, + PredictDurationOutput, PredictIntonationInput, PredictIntonationOutput, TalkDomain, + TalkOperation, }, - status::Status, InferenceSessionOptions, }, + status::Status, text_analyzer::{KanaAnalyzer, OpenJTalkAnalyzer, TextAnalyzer}, AccentPhraseModel, AudioQueryModel, FullcontextExtractor, Result, StyleId, SupportedDevices, SynthesisOptions, VoiceModelId, VoiceModelMeta, @@ -102,7 +102,7 @@ pub(crate) mod blocking { /// 音声シンセサイザ。 pub struct Synthesizer { - pub(super) status: Status, + pub(super) status: Status, open_jtalk_analyzer: OpenJTalkAnalyzer, kana_analyzer: KanaAnalyzer, use_gpu: bool, @@ -169,10 +169,12 @@ pub(crate) mod blocking { let heavy_session_options = InferenceSessionOptions::new(options.cpu_num_threads, use_gpu); - let status = Status::new(enum_map! { - InferenceOperationImpl::PredictDuration - | InferenceOperationImpl::PredictIntonation => light_session_options, - InferenceOperationImpl::Decode => heavy_session_options, + let status = Status::new(InferenceDomainMap { + talk: enum_map! { + TalkOperation::PredictDuration + | TalkOperation::PredictIntonation => light_session_options, + TalkOperation::Decode => heavy_session_options, + }, }); return Ok(Self { @@ -830,12 +832,7 @@ pub(crate) mod blocking { impl PerformInference for self::Synthesizer { fn predict_duration(&self, phoneme_vector: &[i64], style_id: StyleId) -> Result> { - // FIXME: `Status::ids_for`があるため、ここは不要なはず - if !self.status.validate_speaker_id(style_id) { - return Err(ErrorRepr::StyleNotFound { style_id }.into()); - } - - let (model_id, model_inner_id) = self.status.ids_for(style_id)?; + let (model_id, model_inner_id) = self.status.ids_for::(style_id)?; let PredictDurationOutput { phoneme_length: output, @@ -870,12 +867,7 @@ pub(crate) mod blocking { end_accent_phrase_vector: &[i64], style_id: StyleId, ) -> Result> { - // FIXME: `Status::ids_for`があるため、ここは不要なはず - if !self.status.validate_speaker_id(style_id) { - return Err(ErrorRepr::StyleNotFound { style_id }.into()); - } - - let (model_id, model_inner_id) = self.status.ids_for(style_id)?; + let (model_id, model_inner_id) = self.status.ids_for::(style_id)?; let PredictIntonationOutput { f0_list: output } = self.status.run_session( &model_id, @@ -902,12 +894,7 @@ pub(crate) mod blocking { phoneme_vector: &[f32], style_id: StyleId, ) -> Result> { - // FIXME: `Status::ids_for`があるため、ここは不要なはず - if !self.status.validate_speaker_id(style_id) { - return Err(ErrorRepr::StyleNotFound { style_id }.into()); - } - - let (model_id, model_inner_id) = self.status.ids_for(style_id)?; + let (model_id, model_inner_id) = self.status.ids_for::(style_id)?; // 音が途切れてしまうのを避けるworkaround処理が入っている // TODO: 改善したらここのpadding処理を取り除く diff --git a/crates/voicevox_core/src/test_data/model_sources/load_model_works1/manifest.json b/crates/voicevox_core/src/test_data/model_sources/load_model_works1/manifest.json index a8a7adebf..2c6721d08 100644 --- a/crates/voicevox_core/src/test_data/model_sources/load_model_works1/manifest.json +++ b/crates/voicevox_core/src/test_data/model_sources/load_model_works1/manifest.json @@ -1,11 +1,13 @@ { "manifest_version": "0.0.0", "metas_filename": "metas.json", - "decode_filename": "decode.onnx", - "predict_duration_filename": "predict_duration.onnx", - "predict_intonation_filename": "predict_intonation.onnx", - "style_id_to_model_inner_id": { - "302": 2, - "303": 3 + "talk": { + "predict_duration_filename": "predict_duration.onnx", + "predict_intonation_filename": "predict_intonation.onnx", + "decode_filename": "decode.onnx", + "style_id_to_model_inner_id": { + "302": 2, + "303": 3 + } } } diff --git a/crates/voicevox_core/src/voice_model.rs b/crates/voicevox_core/src/voice_model.rs index 76f692eaa..364c8db0a 100644 --- a/crates/voicevox_core/src/voice_model.rs +++ b/crates/voicevox_core/src/voice_model.rs @@ -2,21 +2,33 @@ //! //! VVM ファイルの定義と形式は[ドキュメント](../../../docs/vvm.md)を参照。 +use anyhow::anyhow; use derive_getters::Getters; use derive_new::new; +use easy_ext::ext; +use enum_map::EnumMap; +use itertools::Itertools as _; use serde::Deserialize; use crate::{ - manifest::{Manifest, ModelInnerId}, - SpeakerMeta, StyleId, StyleMeta, VoiceModelMeta, + error::{LoadModelError, LoadModelErrorKind, LoadModelResult}, + infer::{ + domains::{TalkDomain, TalkOperation}, + InferenceDomain, + }, + manifest::{Manifest, ManifestDomains, StyleIdToModelInnerId}, + SpeakerMeta, StyleMeta, StyleType, VoiceModelMeta, }; -use std::{collections::BTreeMap, path::PathBuf}; +use std::path::{Path, PathBuf}; /// [`VoiceModelId`]の実体。 /// /// [`VoiceModelId`]: VoiceModelId pub type RawVoiceModelId = String; +pub(crate) type ModelBytesWithInnerIdsByDomain = + (Option<(StyleIdToModelInnerId, EnumMap>)>,); + /// 音声モデルID。 #[derive( PartialEq, @@ -45,29 +57,91 @@ pub(crate) struct VoiceModelHeader { pub(crate) id: VoiceModelId, manifest: Manifest, /// メタ情報。 + /// + /// `manifest`が対応していない`StyleType`のスタイルは含まれるべきではない。 pub(crate) metas: VoiceModelMeta, pub(crate) path: PathBuf, } impl VoiceModelHeader { - /// モデル内のすべてのスタイルに対するモデル内IDを取得する。 + fn new( + id: VoiceModelId, + manifest: Manifest, + metas: &[u8], + path: &Path, + ) -> LoadModelResult { + let metas = + serde_json::from_slice::(metas).map_err(|source| LoadModelError { + path: path.to_owned(), + context: LoadModelErrorKind::InvalidModelFormat, + source: Some( + anyhow::Error::from(source) + .context(format!("{}が不正です", manifest.metas_filename())), + ), + })?; + + manifest + .domains() + .check_acceptable(&metas) + .map_err(|style_type| LoadModelError { + path: path.to_owned(), + context: LoadModelErrorKind::InvalidModelFormat, + source: Some(anyhow!( + "{metas_filename}には`{style_type}`のスタイルが存在しますが、manifest.jsonでの\ + 対応がありません", + metas_filename = manifest.metas_filename(), + )), + })?; + + Ok(Self { + id, + manifest, + metas, + path: path.to_owned(), + }) + } +} + +impl ManifestDomains { + /// manifestとして対応していない`StyleType`に対してエラーを発する。 /// - /// モデル内IDのマッピングが存在しない場合はそのままスタイルIDを返す。 - pub(crate) fn model_inner_ids(&self) -> BTreeMap { - self.metas + /// `Status`はこのバリデーションを信頼し、`InferenceDomain`の不足時にパニックする。 + fn check_acceptable(&self, metas: &[SpeakerMeta]) -> std::result::Result<(), StyleType> { + let err = metas .iter() .flat_map(SpeakerMeta::styles) - .map(StyleMeta::id) - .map(|&style_id| { - let model_inner_id = self - .manifest - .style_id_to_model_inner_id() - .get(&style_id) - .copied() - .unwrap_or_else(|| ModelInnerId::new(style_id.raw_id())); - (style_id, model_inner_id) - }) - .collect() + .map(StyleMeta::r#type) + .copied() + .unique() + .find(|&style_type| !self.accepts(style_type)); + + match err { + Some(err) => Err(err), + None => Ok(()), + } + } + + /// メタ情報にタイプが`style_type`のスタイルが含まれることを許容するかどうか。 + /// + /// 例えば`self.talk`が`None`のとき、`StyleType::Talk`に対して`false`を返す。 + fn accepts(&self, style_type: StyleType) -> bool { + let Self { talk } = self; + + return TalkDomain::contains(style_type).implies(|| talk.is_some()); + + #[ext] + impl D { + fn contains(style_type: StyleType) -> bool { + Self::style_types().contains(&style_type) + } + } + + #[ext] + impl bool { + fn implies(self, other: impl FnOnce() -> Self) -> Self { + !self || other() + } + } } } @@ -85,12 +159,12 @@ pub(crate) mod blocking { use crate::{ error::{LoadModelError, LoadModelErrorKind, LoadModelResult}, - infer::domain::InferenceOperationImpl, - manifest::Manifest, + infer::domains::InferenceDomainMap, + manifest::{Manifest, TalkManifest}, VoiceModelMeta, }; - use super::{VoiceModelHeader, VoiceModelId}; + use super::{ModelBytesWithInnerIdsByDomain, VoiceModelHeader, VoiceModelId}; /// 音声モデル。 /// @@ -103,39 +177,52 @@ pub(crate) mod blocking { impl self::VoiceModel { pub(crate) fn read_inference_models( &self, - ) -> LoadModelResult>> { + ) -> LoadModelResult> { let reader = BlockingVvmEntryReader::open(&self.header.path)?; - let model_bytes = [ - self.header.manifest.predict_duration_filename(), - self.header.manifest.predict_intonation_filename(), - self.header.manifest.decode_filename(), - ] - .into_par_iter() - .map(|filename| reader.read_vvm_entry(filename)) - .collect::, _>>()? - .try_into() - .unwrap_or_else(|_| panic!("should be same length")); - - Ok(EnumMap::from_array(model_bytes)) + let talk = self + .header + .manifest + .domains() + .talk + .as_ref() + .map( + |TalkManifest { + predict_duration_filename, + predict_intonation_filename, + decode_filename, + style_id_to_model_inner_id, + }| { + let model_bytes = [ + predict_duration_filename, + predict_intonation_filename, + decode_filename, + ] + .into_par_iter() + .map(|filename| reader.read_vvm_entry(filename)) + .collect::, _>>()? + .try_into() + .unwrap_or_else(|_| panic!("should be same length")); + + let model_bytes = EnumMap::from_array(model_bytes); + + Ok((style_id_to_model_inner_id.clone(), model_bytes)) + }, + ) + .transpose()?; + + Ok(InferenceDomainMap { talk }) } /// VVMファイルから`VoiceModel`をコンストラクトする。 pub fn from_path(path: impl AsRef) -> crate::Result { - let path = path.as_ref().to_owned(); - let reader = BlockingVvmEntryReader::open(&path)?; + let path = path.as_ref(); + let reader = BlockingVvmEntryReader::open(path)?; let manifest = reader.read_vvm_json::("manifest.json")?; - let metas = reader.read_vvm_json(manifest.metas_filename())?; + let metas = &reader.read_vvm_entry(manifest.metas_filename())?; let id = VoiceModelId::new(nanoid!()); - - Ok(Self { - header: VoiceModelHeader { - id, - metas, - manifest, - path, - }, - }) + let header = VoiceModelHeader::new(id, manifest, metas, path)?; + Ok(Self { header }) } /// ID。 @@ -177,12 +264,13 @@ pub(crate) mod blocking { }) } + // FIXME: manifest.json専用になっているので、そういう関数名にする fn read_vvm_json(&self, filename: &str) -> LoadModelResult { let bytes = &self.read_vvm_entry(filename)?; serde_json::from_slice(bytes).map_err(|source| LoadModelError { path: self.borrow_path().clone(), - context: LoadModelErrorKind::OpenZipFile, - source: Some(source.into()), + context: LoadModelErrorKind::InvalidModelFormat, + source: Some(anyhow::Error::from(source).context(format!("{filename}が不正です"))), }) } @@ -208,18 +296,18 @@ pub(crate) mod tokio { use derive_new::new; use enum_map::EnumMap; - use futures::future::join3; + use futures::future::{join3, OptionFuture}; use nanoid::nanoid; use serde::de::DeserializeOwned; use crate::{ error::{LoadModelError, LoadModelErrorKind, LoadModelResult}, - infer::domain::InferenceOperationImpl, - manifest::Manifest, + infer::domains::InferenceDomainMap, + manifest::{Manifest, TalkManifest}, Result, VoiceModelMeta, }; - use super::{VoiceModelHeader, VoiceModelId}; + use super::{ModelBytesWithInnerIdsByDomain, VoiceModelHeader, VoiceModelId}; /// 音声モデル。 /// @@ -232,42 +320,49 @@ pub(crate) mod tokio { impl self::VoiceModel { pub(crate) async fn read_inference_models( &self, - ) -> LoadModelResult>> { + ) -> LoadModelResult> { let reader = AsyncVvmEntryReader::open(&self.header.path).await?; - let ( - decode_model_result, - predict_duration_model_result, - predict_intonation_model_result, - ) = join3( - reader.read_vvm_entry(self.header.manifest.decode_filename()), - reader.read_vvm_entry(self.header.manifest.predict_duration_filename()), - reader.read_vvm_entry(self.header.manifest.predict_intonation_filename()), - ) - .await; - - Ok(EnumMap::from_array([ - predict_duration_model_result?, - predict_intonation_model_result?, - decode_model_result?, - ])) + + let talk = OptionFuture::from(self.header.manifest.domains().talk.as_ref().map( + |TalkManifest { + predict_duration_filename, + predict_intonation_filename, + decode_filename, + style_id_to_model_inner_id, + }| async { + let ( + decode_model_result, + predict_duration_model_result, + predict_intonation_model_result, + ) = join3( + reader.read_vvm_entry(decode_filename), + reader.read_vvm_entry(predict_duration_filename), + reader.read_vvm_entry(predict_intonation_filename), + ) + .await; + + let model_bytes = EnumMap::from_array([ + predict_duration_model_result?, + predict_intonation_model_result?, + decode_model_result?, + ]); + + Ok((style_id_to_model_inner_id.clone(), model_bytes)) + }, + )) + .await + .transpose()?; + + Ok(InferenceDomainMap { talk }) } /// VVMファイルから`VoiceModel`をコンストラクトする。 pub async fn from_path(path: impl AsRef) -> Result { let reader = AsyncVvmEntryReader::open(path.as_ref()).await?; let manifest = reader.read_vvm_json::("manifest.json").await?; - let metas = reader - .read_vvm_json::(manifest.metas_filename()) - .await?; + let metas = &reader.read_vvm_entry(manifest.metas_filename()).await?; let id = VoiceModelId::new(nanoid!()); - - Ok(Self { - header: VoiceModelHeader { - id, - metas, - manifest, - path: path.as_ref().into(), - }, - }) + let header = VoiceModelHeader::new(id, manifest, metas, path.as_ref())?; + Ok(Self { header }) } /// ID。 @@ -324,14 +419,13 @@ pub(crate) mod tokio { .collect(); Ok(AsyncVvmEntryReader::new(path, reader, entry_map)) } + // FIXME: manifest.json専用になっているので、そういう関数名にする async fn read_vvm_json(&self, filename: &str) -> LoadModelResult { let bytes = self.read_vvm_entry(filename).await?; serde_json::from_slice(&bytes).map_err(|source| LoadModelError { path: self.path.to_owned(), - context: LoadModelErrorKind::ReadZipEntry { - filename: filename.to_owned(), - }, - source: Some(source.into()), + context: LoadModelErrorKind::InvalidModelFormat, + source: Some(anyhow::Error::from(source).context(format!("{filename}が不正です"))), }) } @@ -357,3 +451,102 @@ pub(crate) mod tokio { } } } + +#[cfg(test)] +mod tests { + use once_cell::sync::Lazy; + use rstest::{fixture, rstest}; + use serde_json::json; + + use crate::{ + manifest::{ManifestDomains, TalkManifest}, + SpeakerMeta, StyleType, + }; + + #[rstest] + #[case( + &ManifestDomains { + talk: None, + }, + &[], + Ok(()) + )] + #[case( + &ManifestDomains { + talk: Some(TALK_MANIFEST.clone()), + }, + &[speaker(&[StyleType::Talk])], + Ok(()) + )] + #[case( + &ManifestDomains { + talk: Some(TALK_MANIFEST.clone()), + }, + &[speaker(&[StyleType::Talk, StyleType::Sing])], + Ok(()) + )] + #[case( + &ManifestDomains { + talk: None, + }, + &[speaker(&[StyleType::Talk])], + Err(()) + )] + fn check_acceptable_works( + #[case] manifest: &ManifestDomains, + #[case] metas: &[SpeakerMeta], + #[case] expected: std::result::Result<(), ()>, + ) { + let actual = manifest.check_acceptable(metas).map_err(|_| ()); + assert_eq!(expected, actual); + } + + static TALK_MANIFEST: Lazy = Lazy::new(|| TalkManifest { + predict_duration_filename: "".to_owned(), + predict_intonation_filename: "".to_owned(), + decode_filename: "".to_owned(), + style_id_to_model_inner_id: Default::default(), + }); + + #[fixture] + fn talk_speaker() -> SpeakerMeta { + serde_json::from_value(json!({ + "name": "dummy", + "styles": [ + { + "id": 0, + "name": "style1", + "type": "talk", + "order": 0 + } + ], + "version": "0.0.1", + "speaker_uuid": "574bc678-8370-44be-b941-08e46e7b47d7", + "order": 0 + })) + .unwrap() + } + + fn speaker(style_types: &'static [StyleType]) -> SpeakerMeta { + let styles = style_types + .iter() + .map(|style_type| { + json!({ + "id": 0, + "name": "style1", + "type": style_type, + "order": null + }) + }) + .collect::>(); + + serde_json::from_value(json!({ + "name": "dummy", + "styles": styles, + "version": "0.0.1", + "speaker_uuid": "574bc678-8370-44be-b941-08e46e7b47d7", + "order": null + })) + .unwrap() + } +} diff --git a/crates/voicevox_core_c_api/include/voicevox_core.h b/crates/voicevox_core_c_api/include/voicevox_core.h index 2275bd1d7..592ec8abb 100644 --- a/crates/voicevox_core_c_api/include/voicevox_core.h +++ b/crates/voicevox_core_c_api/include/voicevox_core.h @@ -142,6 +142,10 @@ enum VoicevoxResultCode * ZIP内のファイルが読めなかった */ VOICEVOX_RESULT_READ_ZIP_ENTRY_ERROR = 17, + /** + * モデルの形式が不正 + */ + VOICEVOX_RESULT_INVALID_MODEL_HEADER_ERROR = 28, /** * すでに読み込まれている音声モデルを読み込もうとした */ diff --git a/crates/voicevox_core_c_api/src/helpers.rs b/crates/voicevox_core_c_api/src/helpers.rs index 74177db9c..1c163a0d0 100644 --- a/crates/voicevox_core_c_api/src/helpers.rs +++ b/crates/voicevox_core_c_api/src/helpers.rs @@ -37,6 +37,7 @@ pub(crate) fn into_result_code_with_error(result: CApiResult<()>) -> VoicevoxRes GpuSupport => VOICEVOX_RESULT_GPU_SUPPORT_ERROR, OpenZipFile => VOICEVOX_RESULT_OPEN_ZIP_FILE_ERROR, ReadZipEntry => VOICEVOX_RESULT_READ_ZIP_ENTRY_ERROR, + InvalidModelFormat => VOICEVOX_RESULT_INVALID_MODEL_HEADER_ERROR, ModelAlreadyLoaded => VOICEVOX_RESULT_MODEL_ALREADY_LOADED_ERROR, StyleAlreadyLoaded => VOICEVOX_RESULT_STYLE_ALREADY_LOADED_ERROR, InvalidModelData => VOICEVOX_RESULT_INVALID_MODEL_DATA_ERROR, diff --git a/crates/voicevox_core_c_api/src/result_code.rs b/crates/voicevox_core_c_api/src/result_code.rs index 65236ada4..0897dfa87 100644 --- a/crates/voicevox_core_c_api/src/result_code.rs +++ b/crates/voicevox_core_c_api/src/result_code.rs @@ -37,6 +37,8 @@ pub enum VoicevoxResultCode { VOICEVOX_RESULT_OPEN_ZIP_FILE_ERROR = 16, /// ZIP内のファイルが読めなかった VOICEVOX_RESULT_READ_ZIP_ENTRY_ERROR = 17, + /// モデルの形式が不正 + VOICEVOX_RESULT_INVALID_MODEL_HEADER_ERROR = 28, /// すでに読み込まれている音声モデルを読み込もうとした VOICEVOX_RESULT_MODEL_ALREADY_LOADED_ERROR = 18, /// すでに読み込まれているスタイルを読み込もうとした @@ -90,6 +92,7 @@ pub(crate) const fn error_result_to_message(result_code: VoicevoxResultCode) -> VOICEVOX_RESULT_READ_ZIP_ENTRY_ERROR => { cstr!("ZIP内のファイルを読むことができませんでした") } + VOICEVOX_RESULT_INVALID_MODEL_HEADER_ERROR => cstr!("モデルの形式が不正です"), VOICEVOX_RESULT_MODEL_ALREADY_LOADED_ERROR => cstr!("同じIDのモデルを読むことはできません"), VOICEVOX_RESULT_STYLE_ALREADY_LOADED_ERROR => { cstr!("同じIDのスタイルを読むことはできません") diff --git a/crates/voicevox_core_c_api/tests/e2e/snapshots.toml b/crates/voicevox_core_c_api/tests/e2e/snapshots.toml index 25926487e..151074cb3 100644 --- a/crates/voicevox_core_c_api/tests/e2e/snapshots.toml +++ b/crates/voicevox_core_c_api/tests/e2e/snapshots.toml @@ -7,6 +7,7 @@ metas = ''' { "id": 0, "name": "style1", + "type": "talk", "order": null } ], @@ -20,6 +21,7 @@ metas = ''' { "id": 1, "name": "style2", + "type": "talk", "order": null } ], @@ -33,11 +35,13 @@ metas = ''' { "id": 302, "name": "style3-1", + "type": "talk", "order": null }, { "id": 303, "name": "style3-2", + "type": "talk", "order": null } ], @@ -97,6 +101,7 @@ metas = ''' { "id": 0, "name": "style1", + "type": "talk", "order": null } ], @@ -110,6 +115,7 @@ metas = ''' { "id": 1, "name": "style2", + "type": "talk", "order": null } ], @@ -123,11 +129,13 @@ metas = ''' { "id": 302, "name": "style3-1", + "type": "talk", "order": null }, { "id": 303, "name": "style3-2", + "type": "talk", "order": null } ], diff --git a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java index ba4881566..576629515 100644 --- a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java +++ b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java @@ -103,6 +103,12 @@ public static class StyleMeta { @Expose public final int id; + /** スタイルに対応するモデルの種類。 */ + @SerializedName("type") + @Expose + @Nonnull + public final StyleType type; + /** * 話者の順番。 * @@ -116,7 +122,16 @@ public static class StyleMeta { private StyleMeta() { this.name = ""; this.id = 0; + this.type = StyleType.TALK; this.order = null; } } + + /** スタイル(style)に対応するモデルの種類。 */ + public static enum StyleType { + /** 音声合成クエリの作成と音声合成が可能。 */ + @SerializedName("talk") + @Expose + TALK, + } } diff --git a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/exceptions/InvalidModelFormatException.java b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/exceptions/InvalidModelFormatException.java new file mode 100644 index 000000000..b82016164 --- /dev/null +++ b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/exceptions/InvalidModelFormatException.java @@ -0,0 +1,14 @@ +package jp.hiroshiba.voicevoxcore.exceptions; + +import java.io.IOException; + +/** モデルの形式が不正。 */ +public class InvalidModelFormatException extends IOException { + public InvalidModelFormatException(String message) { + super(message); + } + + public InvalidModelFormatException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/crates/voicevox_core_java_api/src/common.rs b/crates/voicevox_core_java_api/src/common.rs index 71a60f5f3..bdb37f4ff 100644 --- a/crates/voicevox_core_java_api/src/common.rs +++ b/crates/voicevox_core_java_api/src/common.rs @@ -69,6 +69,7 @@ where GpuSupport, OpenZipFile, ReadZipEntry, + InvalidModelFormat, ModelAlreadyLoaded, StyleAlreadyLoaded, InvalidModelData, diff --git a/crates/voicevox_core_macros/src/lib.rs b/crates/voicevox_core_macros/src/lib.rs index 5f2f26809..98a2fdc5c 100644 --- a/crates/voicevox_core_macros/src/lib.rs +++ b/crates/voicevox_core_macros/src/lib.rs @@ -17,17 +17,18 @@ use syn::parse_macro_input; /// use enum_map::Enum; /// use macros::InferenceOperation; /// -/// pub(crate) enum InferenceDomainImpl {} +/// pub(crate) enum TalkDomain {} /// -/// impl InferenceDomain for InferenceDomainImpl { -/// type Operation = InferenceOperationImpl; +/// impl InferenceDomain for TalkDomain { +/// type Operation = TalkOperation; +/// // ... /// } /// /// #[derive(Clone, Copy, Enum, InferenceOperation)] /// #[inference_operation( -/// type Domain = InferenceDomainImpl; +/// type Domain = TalkDomain; /// )] -/// pub(crate) enum InferenceOperationImpl { +/// pub(crate) enum TalkOperation { /// #[inference_operation( /// type Input = PredictDurationInput; /// type Output = PredictDurationOutput; diff --git a/crates/voicevox_core_python_api/python/test/test_asyncio_metas.py b/crates/voicevox_core_python_api/python/test/test_asyncio_metas.py new file mode 100644 index 000000000..ec69032b1 --- /dev/null +++ b/crates/voicevox_core_python_api/python/test/test_asyncio_metas.py @@ -0,0 +1,26 @@ +""" +メタ情報の出力が可能かどうかをテストする。 + +``test_blocking_metas`` と対になる。 +""" + +import conftest +import pytest +import pytest_asyncio +from voicevox_core.asyncio import OpenJtalk, Synthesizer, VoiceModel + + +def test_voice_model_metas_works(voice_model: VoiceModel) -> None: + _ = voice_model.metas + + +@pytest.mark.asyncio +async def test_synthesizer_metas_works(voice_model: VoiceModel) -> None: + synthesizer = Synthesizer(await OpenJtalk.new(conftest.open_jtalk_dic_dir)) + await synthesizer.load_voice_model(voice_model) + _ = synthesizer.metas + + +@pytest_asyncio.fixture +async def voice_model() -> VoiceModel: + return await VoiceModel.from_path(conftest.model_dir) diff --git a/crates/voicevox_core_python_api/python/test/test_blocking_metas.py b/crates/voicevox_core_python_api/python/test/test_blocking_metas.py new file mode 100644 index 000000000..c305e2cdb --- /dev/null +++ b/crates/voicevox_core_python_api/python/test/test_blocking_metas.py @@ -0,0 +1,24 @@ +""" +メタ情報の出力が可能かどうかをテストする。 + +``test_asyncio_metas`` と対になる。 +""" + +import conftest +import pytest +from voicevox_core.blocking import OpenJtalk, Synthesizer, VoiceModel + + +def test_voice_model_metas_works(voice_model: VoiceModel) -> None: + _ = voice_model.metas + + +def test_synthesizer_metas_works(voice_model: VoiceModel) -> None: + synthesizer = Synthesizer(OpenJtalk(conftest.open_jtalk_dic_dir)) + synthesizer.load_voice_model(voice_model) + _ = synthesizer.metas + + +@pytest.fixture +def voice_model() -> VoiceModel: + return VoiceModel.from_path(conftest.model_dir) diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_models.py b/crates/voicevox_core_python_api/python/voicevox_core/_models.py index c72bbcbf0..21a2016fe 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/_models.py +++ b/crates/voicevox_core_python_api/python/voicevox_core/_models.py @@ -34,6 +34,13 @@ """ +class StyleType(str, Enum): + """**スタイル** (_style_)に対応するモデルの種類。""" + + TALK = "talk" + """音声合成クエリの作成と音声合成が可能。""" + + @pydantic.dataclasses.dataclass class StyleMeta: """**スタイル** (_style_)のメタ情報。""" @@ -44,6 +51,9 @@ class StyleMeta: id: StyleId """スタイルID。""" + type: StyleType = dataclasses.field(default=StyleType.TALK) + """スタイルに対応するモデルの種類。""" + order: Optional[int] = None """ 話者の順番。 diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_rust/__init__.pyi b/crates/voicevox_core_python_api/python/voicevox_core/_rust/__init__.pyi index 3a47ef02b..89a50d230 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/_rust/__init__.pyi +++ b/crates/voicevox_core_python_api/python/voicevox_core/_rust/__init__.pyi @@ -37,6 +37,11 @@ class ReadZipEntryError(Exception): ... +class InvalidModelFormatError(Exception): + """モデルの形式が不正。""" + + ... + class ModelAlreadyLoadedError(Exception): """すでに読み込まれている音声モデルを読み込もうとした。""" diff --git a/crates/voicevox_core_python_api/src/convert.rs b/crates/voicevox_core_python_api/src/convert.rs index 3f629f90e..3cee4186b 100644 --- a/crates/voicevox_core_python_api/src/convert.rs +++ b/crates/voicevox_core_python_api/src/convert.rs @@ -16,10 +16,10 @@ use voicevox_core::{ use crate::{ ExtractFullContextLabelError, GetSupportedDevicesError, GpuSupportError, InferenceFailedError, - InvalidModelDataError, InvalidWordError, LoadUserDictError, ModelAlreadyLoadedError, - ModelNotFoundError, NotLoadedOpenjtalkDictError, OpenZipFileError, ParseKanaError, - ReadZipEntryError, SaveUserDictError, StyleAlreadyLoadedError, StyleNotFoundError, - UseUserDictError, WordNotFoundError, + InvalidModelDataError, InvalidModelFormatError, InvalidWordError, LoadUserDictError, + ModelAlreadyLoadedError, ModelNotFoundError, NotLoadedOpenjtalkDictError, OpenZipFileError, + ParseKanaError, ReadZipEntryError, SaveUserDictError, StyleAlreadyLoadedError, + StyleNotFoundError, UseUserDictError, WordNotFoundError, }; pub(crate) fn from_acceleration_mode(ob: &PyAny) -> PyResult { @@ -194,6 +194,7 @@ pub(crate) impl voicevox_core::Result { ErrorKind::ReadZipEntry => ReadZipEntryError::new_err(msg), ErrorKind::ModelAlreadyLoaded => ModelAlreadyLoadedError::new_err(msg), ErrorKind::StyleAlreadyLoaded => StyleAlreadyLoadedError::new_err(msg), + ErrorKind::InvalidModelFormat => InvalidModelFormatError::new_err(msg), ErrorKind::InvalidModelData => InvalidModelDataError::new_err(msg), ErrorKind::GetSupportedDevices => GetSupportedDevicesError::new_err(msg), ErrorKind::StyleNotFound => StyleNotFoundError::new_err(msg), diff --git a/crates/voicevox_core_python_api/src/lib.rs b/crates/voicevox_core_python_api/src/lib.rs index 4d190333d..492d18f0e 100644 --- a/crates/voicevox_core_python_api/src/lib.rs +++ b/crates/voicevox_core_python_api/src/lib.rs @@ -71,6 +71,7 @@ exceptions! { ReadZipEntryError: PyException; ModelAlreadyLoadedError: PyException; StyleAlreadyLoadedError: PyException; + InvalidModelFormatError: PyException; InvalidModelDataError: PyException; GetSupportedDevicesError: PyException; StyleNotFoundError: PyKeyError; diff --git a/model/sample.vvm b/model/sample.vvm index 48d23745d..f541ae082 100644 Binary files a/model/sample.vvm and b/model/sample.vvm differ