Skip to content

Commit

Permalink
feat: pause_length{,_scale}をデフォルト値限定で受け入れる (#872)
Browse files Browse the repository at this point in the history
feat: `pause_length{,_scale}`をデフォルト値限定で受け入れる

`AudioQuery`に`pause_length{,_scale}`を追加する。ただしそれぞれ`null`と
`1.`のみを許し、無音時間調整自体はまだ実装しない。

VOICEVOX/voicevox_engine#1308VOICEVOX/voicevox_engine#1425 の一部
を参考にコードを書いた。

@Hiroshiba さんと以下の2名の許諾のもと、 #874 にのっとりMITライセンスと
してライセンスする。

* @X-20A (VOICEVOX/voicevox_engine#1308)
* @sabonerune (VOICEVOX/voicevox_engine#1425)

Co-authored-by: X-20A <[email protected]>
Co-authored-by: sabonerune <[email protected]>
Co-authored-by: Hiroshiba <[email protected]>
Refs: VOICEVOX/voicevox_engine#1308, VOICEVOX/voicevox_engine#1425
Refs: #874 (comment)
Refs: #874 (comment)
  • Loading branch information
4 people authored Nov 23, 2024
1 parent 52b3d0d commit 38517cc
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 1 deletion.
141 changes: 140 additions & 1 deletion crates/voicevox_core/src/engine/model.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use serde::{Deserialize, Serialize};
use std::fmt;

use duplicate::duplicate_item;
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};

/* 各フィールドのjsonフィールド名はsnake_caseとする*/

Expand Down Expand Up @@ -64,6 +67,21 @@ pub struct AudioQuery {
pub output_sampling_rate: u32,
/// 音声データをステレオ出力するか否か。
pub output_stereo: bool,
// TODO: VOICEVOX/voicevox_engine#1308 を実装する
/// 句読点などの無音時間。`null`のときは無視される。デフォルト値は`null`。
#[serde(
default,
deserialize_with = "deserialize_pause_length",
serialize_with = "serialize_pause_length"
)]
pub pause_length: (),
/// 読点などの無音時間(倍率)。デフォルト値は`1`。
#[serde(
default,
deserialize_with = "deserialize_pause_length_scale",
serialize_with = "serialize_pause_length_scale"
)]
pub pause_length_scale: (),
/// \[読み取り専用\] AquesTalk風記法。
///
/// [`Synthesizer::audio_query`]が返すもののみ`Some`となる。入力としてのAudioQueryでは無視され
Expand All @@ -73,6 +91,87 @@ pub struct AudioQuery {
pub kana: Option<String>,
}

fn deserialize_pause_length<'de, D>(deserializer: D) -> Result<(), D::Error>
where
D: Deserializer<'de>,
{
return deserializer.deserialize_any(Visitor);

struct Visitor;

impl<'de> de::Visitor<'de> for Visitor {
type Value = ();

fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str("`null`")
}

#[duplicate_item(
method T;
[ visit_i64 ] [ i64 ];
[ visit_u64 ] [ u64 ];
[ visit_f64 ] [ f64 ];
)]
fn method<E>(self, _: T) -> Result<Self::Value, E>
where
E: de::Error,
{
Err(E::custom("currently `pause_length` must be `null`"))
}

fn visit_unit<E>(self) -> Result<Self::Value, E> {
Ok(())
}
}
}

fn serialize_pause_length<S>(_: &(), serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_unit()
}

fn deserialize_pause_length_scale<'de, D>(deserializer: D) -> Result<(), D::Error>
where
D: Deserializer<'de>,
{
return deserializer.deserialize_any(Visitor);

struct Visitor;

impl<'de> de::Visitor<'de> for Visitor {
type Value = ();

fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str("`1.`")
}

#[duplicate_item(
method T ONE;
[ visit_i64 ] [ i64 ] [ 1 ];
[ visit_u64 ] [ u64 ] [ 1 ];
[ visit_f64 ] [ f64 ] [ 1. ];
)]
fn method<E>(self, v: T) -> Result<Self::Value, E>
where
E: de::Error,
{
if v != ONE {
return Err(E::custom("currently `pause_length_scale` must be `1.`"));
}
Ok(())
}
}
}

fn serialize_pause_length_scale<S>(_: &(), serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
(1.).serialize(serializer)
}

impl AudioQuery {
pub(crate) fn with_kana(self, kana: Option<String>) -> Self {
Self { kana, ..self }
Expand All @@ -99,6 +198,8 @@ mod tests {
post_phoneme_length: 0.0,
output_sampling_rate: 0,
output_stereo: false,
pause_length: (),
pause_length_scale: (),
kana: None,
};
let val = serde_json::to_value(audio_query_model).unwrap();
Expand Down Expand Up @@ -152,4 +253,42 @@ mod tests {
}))?;
Ok(())
}

// TODO: 型的に自明になったらこのテストは削除する
#[rstest]
fn it_denies_non_null_for_pause_length() {
serde_json::from_value::<AudioQuery>(json!({
"accent_phrases": [],
"speed_scale": 1.0,
"pitch_scale": 0.0,
"intonation_scale": 1.0,
"volume_scale": 1.0,
"pre_phoneme_length": 0.1,
"post_phoneme_length": 0.1,
"output_sampling_rate": 24000,
"output_stereo": false,
"pause_length": "aaaaa"
}))
.map(|_| ())
.unwrap_err();
}

// TODO: 型的に自明になったらこのテストは削除する
#[rstest]
fn it_denies_non_float_for_pause_length_scale() {
serde_json::from_value::<AudioQuery>(json!({
"accent_phrases": [],
"speed_scale": 1.0,
"pitch_scale": 0.0,
"intonation_scale": 1.0,
"volume_scale": 1.0,
"pre_phoneme_length": 0.1,
"post_phoneme_length": 0.1,
"output_sampling_rate": 24000,
"output_stereo": false,
"pause_length_scale": "aaaaa",
}))
.map(|_| ())
.unwrap_err();
}
}
2 changes: 2 additions & 0 deletions crates/voicevox_core/src/synthesizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1187,6 +1187,8 @@ mod inner {
post_phoneme_length: 0.1,
output_sampling_rate: DEFAULT_SAMPLING_RATE,
output_stereo: false,
pause_length: (),
pause_length_scale: (),
kana: Some(kana),
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,17 @@ public class AudioQuery {
@Expose
public boolean outputStereo;

/** 句読点などの無音時間。{@code null}のときは無視される。デフォルト値は{@code null}。 */
@SerializedName("pause_length")
@Expose
@Nullable
public Double pauseLength;

/** 読点などの無音時間(倍率)。デフォルト値は{@code 1.}。 */
@SerializedName("pause_length_scale")
@Expose
public double pauseLengthScale;

/**
* [読み取り専用] AquesTalk風記法。
*
Expand All @@ -75,6 +86,8 @@ public AudioQuery() {
this.prePhonemeLength = 0.1;
this.postPhonemeLength = 0.1;
this.outputSamplingRate = 24000;
this.pauseLength = null;
this.pauseLengthScale = 1.0;
this.kana = null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,12 @@ class AudioQuery:
output_stereo: bool
"""音声データをステレオ出力するか否か。"""

pause_length: None = None
"""句読点などの無音時間。 ``None`` のときは無視される。デフォルト値は ``None`` 。"""

pause_length_scale: float = 1.0
"""読点などの無音時間(倍率)。デフォルト値は ``1.0`` 。"""

kana: Optional[str] = None
"""
[読み取り専用] AquesTalk風記法。
Expand Down

0 comments on commit 38517cc

Please sign in to comment.