Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions core/src/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ extern "C" {
typedef enum {
// 成功
VOICEVOX_RESULT_SUCCEED = 0,
// OpenJTalk初期化に失敗した
VOICEVOX_RESULT_NOT_INITIALIZE_OPEN_JTALK_ERR = 1,
// OpenJTalk辞書がロードされていない
VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT = 1,
} VoicevoxResultCode;
/**
* @fn
Expand Down Expand Up @@ -125,10 +125,10 @@ VOICEVOX_CORE_API const char *last_error_message();

/**
* @fn
* open jtalkを初期化する
* open jtalkの辞書を読み込む
* @return 結果コード
*/
VOICEVOX_CORE_API VoicevoxResultCode voicevox_initialize_openjtalk(const char *dict_path);
VOICEVOX_CORE_API VoicevoxResultCode voicevox_load_openjtalk_dict(const char *dict_path);

/**
* @fn
Expand Down
26 changes: 10 additions & 16 deletions core/src/engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,29 @@

#include "core.h"
#include "engine/model.h"
#include "engine/openjtalk.h"
#include "engine/synthesis_engine.h"

using namespace voicevox::core::engine;

// TODO:SynthesisEngineにopenjtalkを持たせるためshared_ptrにしているが、やめたい
static std::shared_ptr<OpenJTalk> openjtalk;
static std::unique_ptr<SynthesisEngine> engine;
static SynthesisEngine engine;

VoicevoxResultCode voicevox_initialize_openjtalk(const char *dict_path) {
VoicevoxResultCode voicevox_load_openjtalk_dict(const char *dict_path) {
// TODO: error handling
openjtalk = std::make_shared<OpenJTalk>(dict_path);
engine.load_openjtalk_dict(dict_path);
return VOICEVOX_RESULT_SUCCEED;
}

VoicevoxResultCode voicevox_tts(const char *text, int64_t speaker_id, int *output_binary_size, uint8_t **output_wav) {
if (!openjtalk) {
return VOICEVOX_RESULT_NOT_INITIALIZE_OPEN_JTALK_ERR;
}
if (!engine) {
engine = std::make_unique<SynthesisEngine>(openjtalk);
if (!engine.is_openjtalk_dict_loaded()) {
return VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT;
}

std::vector<AccentPhraseModel> accent_phrases = engine->create_accent_phrases(std::string(text), &speaker_id);
std::vector<AccentPhraseModel> accent_phrases = engine.create_accent_phrases(std::string(text), &speaker_id);
const AudioQueryModel audio_query = {
accent_phrases, 1.0f, 0.0f, 1.0f, 1.0f, 0.1f, 0.1f, engine->default_sampling_rate, false, "",
accent_phrases, 1.0f, 0.0f, 1.0f, 1.0f, 0.1f, 0.1f, engine.default_sampling_rate, false, "",
};

const auto wav = engine->synthesis_wave_format(audio_query, &speaker_id, output_binary_size);
const auto wav = engine.synthesis_wave_format(audio_query, &speaker_id, output_binary_size);
auto *wav_heap = new uint8_t[*output_binary_size];
std::copy(wav.begin(), wav.end(), wav_heap);
*output_wav = wav_heap;
Expand All @@ -44,8 +38,8 @@ void voicevox_wav_free(uint8_t *wav) { delete wav; }

const char *voicevox_error_result_to_message(VoicevoxResultCode result_code) {
switch (result_code) {
case VOICEVOX_RESULT_NOT_INITIALIZE_OPEN_JTALK_ERR:
return "Call initialize_openjtalk() first.";
case VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT:
return "Call voicevox_load_openjtalk_dict() first.";

default:
throw std::runtime_error("Unexpected error result code.");
Expand Down
1 change: 1 addition & 0 deletions core/src/engine/openjtalk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ void OpenJTalk::load(const std::string& dn_mecab) {
clear();
throw std::runtime_error("failed to initialize mecab");
}
dict_loaded = true;
}

void OpenJTalk::clear() {
Expand Down
6 changes: 4 additions & 2 deletions core/src/engine/openjtalk.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@ class OpenJTalk {
JPCommon_initialize(&jpcommon);
}

OpenJTalk(const std::string& dn_mecab) : OpenJTalk() { load(dn_mecab); }

~OpenJTalk() { clear(); }

std::vector<std::string> extract_fullcontext(std::string text);

void load(const std::string& dn_mecab);
void clear();
bool is_dict_loaded() const { return dict_loaded; }

private:
bool dict_loaded = false;
};
} // namespace voicevox::core::engine
4 changes: 3 additions & 1 deletion core/src/engine/synthesis_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ std::vector<AccentPhraseModel> SynthesisEngine::create_accent_phrases(std::strin
return {};
}

Utterance utterance = extract_full_context_label(*m_openjtalk, text);
Utterance utterance = extract_full_context_label(m_openjtalk, text);
if (utterance.breath_groups.empty()) {
return {};
}
Expand Down Expand Up @@ -513,6 +513,8 @@ std::vector<float> SynthesisEngine::synthesis(AudioQueryModel query, int64_t *sp
return wave;
}

void SynthesisEngine::load_openjtalk_dict(const std::string &dict_path) { m_openjtalk.load(dict_path); }

void SynthesisEngine::initial_process(std::vector<AccentPhraseModel> &accent_phrases,
std::vector<MoraModel> &flatten_moras, std::vector<std::string> &phoneme_str_list,
std::vector<OjtPhoneme> &phoneme_data_list) {
Expand Down
7 changes: 5 additions & 2 deletions core/src/engine/synthesis_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class SynthesisEngine {
public:
const unsigned int default_sampling_rate = 24000;

SynthesisEngine(std::shared_ptr<OpenJTalk> openjtalk) : m_openjtalk(openjtalk) {}
SynthesisEngine() {}

std::vector<AccentPhraseModel> create_accent_phrases(std::string text, int64_t *speaker_id);
std::vector<AccentPhraseModel> replace_mora_data(std::vector<AccentPhraseModel> accent_phrases, int64_t *speaker_id);
Expand All @@ -37,8 +37,11 @@ class SynthesisEngine {
std::vector<uint8_t> synthesis_wave_format(AudioQueryModel query, int64_t *speaker_id, int *binary_size,
bool enable_interrogative_upspeak = true);

void load_openjtalk_dict(const std::string &dict_path);
bool is_openjtalk_dict_loaded() const { return m_openjtalk.is_dict_loaded(); }

private:
std::shared_ptr<OpenJTalk> m_openjtalk;
OpenJTalk m_openjtalk;

void initial_process(std::vector<AccentPhraseModel> &accent_phrases, std::vector<MoraModel> &flatten_moras,
std::vector<std::string> &phoneme_str_list, std::vector<OjtPhoneme> &phoneme_data_list);
Expand Down