Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ project(VoiceVoxCore)

# TODO: download onnxruntime
set(ONNXRUNTIME_DIR "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime" CACHE PATH "Path to ONNX Runtime")
set(MODEL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/model" CACHE PATH "Path to model")

set(CMAKE_POSITION_INDEPENDENT_CODE ON)

option(DIRECTML "Enables building for DirectML" OFF)
Expand Down
19 changes: 19 additions & 0 deletions core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,21 @@ project(ONNXCore)

set(CMAKE_MACOSX_RPATH 1)

# modelファイルを読み込み
include(src/embedBin/FindEmbed.cmake)
if(EXISTS "${MODEL_DIR}/metas.json"
AND EXISTS "${MODEL_DIR}/yukarin_s.onnx"
AND EXISTS "${MODEL_DIR}/yukarin_sa.onnx"
AND EXISTS "${MODEL_DIR}/decode.onnx")
message("Models exist.")
EMBED_TARGET(YUKARIN_S "${MODEL_DIR}/yukarin_s.onnx" "core")
EMBED_TARGET(YUKARIN_SA "${MODEL_DIR}/yukarin_sa.onnx" "core")
EMBED_TARGET(DECODE "${MODEL_DIR}/decode.onnx" "core")
EMBED_TARGET(METAS "${MODEL_DIR}/metas.json" "core")
else()
message(FATAL_ERROR "Unable to find Model. Use option -DMODEL_DIR=...")
endif()

# coreライブラリのインストール先設定。デフォルトはCMakeLists.txtと同じ位置
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_SOURCE_DIR}" CACHE PATH "Path to install" FORCE)
Expand All @@ -13,6 +28,10 @@ message("core will be installed to: ${CMAKE_INSTALL_PREFIX}")
# coreライブラリのビルド設定
add_library(core
SHARED src/core.cpp
${EMBED_YUKARIN_S_OUTPUTS}
${EMBED_YUKARIN_SA_OUTPUTS}
${EMBED_DECODE_OUTPUTS}
${EMBED_METAS_OUTPUTS}
src/engine/full_context_label.cpp
src/engine/acoustic_feature_extractor.cpp
src/engine/openjtalk.cpp
Expand Down
7 changes: 3 additions & 4 deletions core/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
lib = cdll.LoadLibrary(str(core_dll_path))

# 関数型定義
lib.initialize.argtypes = (c_char_p, c_bool, c_int)
lib.initialize.argtypes = (c_bool, c_int)
lib.initialize.restype = c_bool

lib.finalize.argtypes = ()
Expand All @@ -52,9 +52,8 @@


# ラッパー関数
def initialize(root_dir_path: str, use_gpu: bool, cpu_num_threads=0):
path = create_string_buffer(root_dir_path.encode())
success = lib.initialize(path, use_gpu, cpu_num_threads)
def initialize(use_gpu: bool, cpu_num_threads=0):
success = lib.initialize(use_gpu, cpu_num_threads)
if not success:
raise Exception(lib.last_error_message().decode())

Expand Down
86 changes: 28 additions & 58 deletions core/src/core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,11 @@

#include <array>
#include <exception>
#include <filesystem>
#include <fstream>
#include <memory>
#include <string>
#include <unordered_set>

#include "embedBin/embed.h"
#include "nlohmann/json.hpp"

#ifndef VOICEVOX_CORE_EXPORTS
Expand All @@ -30,50 +29,17 @@

constexpr float PHONEME_LENGTH_MINIMAL = 0.01f;

namespace fs = std::filesystem;
constexpr std::array<int64_t, 0> scalar_shape{};
constexpr std::array<int64_t, 1> speaker_shape{1};

static std::string error_message;
static bool initialized = false;
static std::string supported_devices_str;

bool open_models(const fs::path &yukarin_s_path, const fs::path &yukarin_sa_path, const fs::path &decode_path,
std::vector<unsigned char> &yukarin_s_model, std::vector<unsigned char> &yukarin_sa_model,
std::vector<unsigned char> &decode_model) {
std::ifstream yukarin_s_file(yukarin_s_path, std::ios::binary), yukarin_sa_file(yukarin_sa_path, std::ios::binary),
decode_file(decode_path, std::ios::binary);
if (!yukarin_s_file.is_open() || !yukarin_sa_file.is_open() || !decode_file.is_open()) {
error_message = FAILED_TO_OPEN_MODEL_ERR;
return false;
}

yukarin_s_model = std::vector<unsigned char>(std::istreambuf_iterator<char>(yukarin_s_file), {});
yukarin_sa_model = std::vector<unsigned char>(std::istreambuf_iterator<char>(yukarin_sa_file), {});
decode_model = std::vector<unsigned char>(std::istreambuf_iterator<char>(decode_file), {});
return true;
}

/**
* Loads the metas.json.
*
* schema:
* [{
* name: string,
* styles: [{name: string, id: int}],
* speaker_uuid: string,
* version: string
* }]
*/
bool open_metas(const fs::path &metas_path, nlohmann::json &metas) {
std::ifstream metas_file(metas_path);
if (!metas_file.is_open()) {
error_message = FAILED_TO_OPEN_METAS_ERR;
return false;
}
metas_file >> metas;
return true;
}
EMBED_DECL(YUKARIN_SA);
EMBED_DECL(YUKARIN_S);
EMBED_DECL(DECODE);
EMBED_DECL(METAS);

struct SupportedDevices {
bool cpu = true;
Expand All @@ -96,21 +62,30 @@ SupportedDevices get_supported_devices() {
}

struct Status {
Status(const char *root_dir_path_utf8, bool use_gpu_)
: root_dir_path(root_dir_path_utf8),
use_gpu(use_gpu_),
Status(bool use_gpu_)
: use_gpu(use_gpu_),
memory_info(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU)),
yukarin_s(nullptr),
yukarin_sa(nullptr),
decode(nullptr) {}

/**
* Loads the metas.json.
*
* schema:
* [{
* name: string,
* styles: [{name: string, id: int}],
* speaker_uuid: string,
* version: string
* }]
*/
bool load(int cpu_num_threads) {
// deprecated in C++20; Use char8_t for utf-8 char in the future.
fs::path root = fs::u8path(root_dir_path);
embed::Resource yukarin_s_model = YUKARIN_S();
embed::Resource yukarin_sa_model = YUKARIN_SA();
embed::Resource decode_model = DECODE();
embed::Resource metas_file = METAS();

if (!open_metas(root / "metas.json", metas)) {
return false;
}
metas = nlohmann::json::parse(metas_file.data, metas_file.data + metas_file.size);
metas_str = metas.dump();
supported_styles.clear();
for (const auto &meta : metas) {
Expand All @@ -119,15 +94,10 @@ struct Status {
}
}

std::vector<unsigned char> yukarin_s_model, yukarin_sa_model, decode_model;
if (!open_models(root / "yukarin_s.onnx", root / "yukarin_sa.onnx", root / "decode.onnx", yukarin_s_model,
yukarin_sa_model, decode_model)) {
return false;
}
Ort::SessionOptions session_options;
session_options.SetInterOpNumThreads(cpu_num_threads).SetIntraOpNumThreads(cpu_num_threads);
yukarin_s = Ort::Session(env, yukarin_s_model.data(), yukarin_s_model.size(), session_options);
yukarin_sa = Ort::Session(env, yukarin_sa_model.data(), yukarin_sa_model.size(), session_options);
yukarin_s = Ort::Session(env, yukarin_s_model.data, yukarin_s_model.size, session_options);
yukarin_sa = Ort::Session(env, yukarin_sa_model.data, yukarin_sa_model.size, session_options);
if (use_gpu) {
#ifdef DIRECTML
session_options.DisableMemPattern().SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
Expand All @@ -137,7 +107,7 @@ struct Status {
session_options.AppendExecutionProvider_CUDA(cuda_options);
#endif
}
decode = Ort::Session(env, decode_model.data(), decode_model.size(), session_options);
decode = Ort::Session(env, decode_model.data, decode_model.size, session_options);
return true;
}

Expand Down Expand Up @@ -172,7 +142,7 @@ bool validate_speaker_id(int64_t speaker_id) {
return true;
}

bool initialize(const char *root_dir_path, bool use_gpu, int cpu_num_threads) {
bool initialize(bool use_gpu, int cpu_num_threads) {
initialized = false;

#ifdef DIRECTML
Expand All @@ -184,7 +154,7 @@ bool initialize(const char *root_dir_path, bool use_gpu, int cpu_num_threads) {
return false;
}
try {
status = std::make_unique<Status>(root_dir_path, use_gpu);
status = std::make_unique<Status>(use_gpu);
if (!status->load(cpu_num_threads)) {
return false;
}
Expand Down
3 changes: 1 addition & 2 deletions core/src/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,14 @@ typedef enum {
* @fn
* 初期化する
* @brief 音声合成するための初期化を行う。他の関数を正しく実行するには先に初期化が必要
* @param root_dir_path 必要なファイルがあるディレクトリ。相対パス・絶対パスどちらも指定可能。文字コードはUTF-8
* @param use_gpu trueならGPU用、falseならCPU用の初期化を行う
* @param cpu_num_threads 推論に用いるスレッド数を設定する。0の場合論理コア数の半分か、物理コア数が設定される
* @return 成功したらtrue、失敗したらfalse
* @detail
* 何度も実行可能。use_gpuを変更して実行しなおすことも可能。
* 最後に実行したuse_gpuに従って他の関数が実行される。
*/
VOICEVOX_CORE_API bool initialize(const char *root_dir_path, bool use_gpu, int cpu_num_threads = 0);
VOICEVOX_CORE_API bool initialize(bool use_gpu, int cpu_num_threads = 0);

/**
* @fn
Expand Down
99 changes: 99 additions & 0 deletions core/src/embedBin/FindEmbed.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# - Provide a macro to embed binary files into the executable.
#
# This file is part of the Embed project: https://github.com/magcks/embed
#
# The module defines the macros:
#
# EMBED_TARGET(<Name> <BinFile>)
#
# which will create a custom rule to a assembly file. <BinFile> is
# the path to the binary file.
#
# The macro defines a set of variables:
# EMBED_${Name}_DEFINED - true is the macro ran successfully
# EMBED_${Name}_INPUT - The input source file, an alias for <BinFile>
# EMBED_${Name}_OUTPUTS - The source file generated
#
# ====================================================================
# Example:
#
# find_package(Embed REQUIRED)
# EMBED_TARGET(SHADER source.glsl)
# add_executable(example main.cc ${EMBED_SHADER_OUTPUTS})
# ====================================================================


cmake_minimum_required(VERSION 3.16)

set(RES_ID 16384)
set(STRUCT
"#include \"stddef.h\"
struct Res {
const char *data\;
const size_t size\;
}\;"
)

macro(EMBED_TARGET Name Input LibName)
get_filename_component(InputAbs "${Input}" REALPATH)
if(WIN32)
set(OutputRC "${CMAKE_CURRENT_BINARY_DIR}/${Name}.rc")
set(OutputC "${CMAKE_CURRENT_BINARY_DIR}/${Name}.c")
set(Outputs ${OutputRC} ${OutputC})
set(RCCODE "${RES_ID} RCDATA \"${InputAbs}\"\n")
set(CODE
"#include \"windows.h\"
${STRUCT}
struct Res ${Name}(void) {
HMODULE handle = GetModuleHandle(\"${LibName}\")\;
HRSRC res = FindResource(handle, MAKEINTRESOURCE(${RES_ID}), RT_RCDATA)\;
struct Res r = {
(const char*) LockResource(LoadResource(handle, res)),
SizeofResource(handle, res)
}\;
return r\;
}"
)
file(WRITE ${OutputRC} ${RCCODE})
file(WRITE ${OutputC} ${CODE})
math(EXPR RES_ID "${RES_ID}+1")
else()
if(APPLE)
set(Section ".const_data")
set(DataName "_data")
set(EndName "_end_data")
else()
set(Section ".section .rodata")
set(DataName "data")
set(EndName "end_data")
endif()
set(CODE
"${STRUCT}
asm(
\"${Section}\\n\"
\".balign ${CMAKE_SIZEOF_VOID_P}\\n\"
\"${DataName}: .incbin \\\"${InputAbs}\\\"\\n\"
\"${EndName}:\\n\"
\".text\\n\"
)\;
extern const char data[]\;
extern const char end_data[]\;
struct Res ${Name}(void) {
struct Res r = { data, end_data - data }\;
return r\;
}"
)
set(OutputC "${CMAKE_CURRENT_BINARY_DIR}/${Name}.c")
set(Outputs ${OutputC})
file(WRITE ${OutputC} ${CODE})

add_custom_command(
OUTPUT ${OutputC}
COMMAND ${CMAKE_COMMAND} -E touch ${OutputC}
DEPENDS ${Input}
)
endif()
set(EMBED_${Name}_DEFINED TRUE)
set(EMBED_${Name}_INPUT ${Input})
set(EMBED_${Name}_OUTPUTS ${Outputs})
endmacro()
9 changes: 9 additions & 0 deletions core/src/embedBin/LICENCE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
The MIT License (MIT)

Copyright (c) 2018 Max von Buelow

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
39 changes: 39 additions & 0 deletions core/src/embedBin/embed.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#ifndef __EMBED_H
#define __EMBED_H
#ifndef EMBED_DATA_NAME
#define EMBED_DATA_NAME data
#endif
#ifndef EMBED_SIZE_NAME
#define EMBED_SIZE_NAME size
#endif
#ifndef EMBED_NS
#define EMBED_NS embed
#endif
#ifndef EMBED_STRUCT
#ifdef __cplusplus
#define EMBED_RES Resource
#else
#define EMBED_RES embed_resource
#endif
#endif
#ifndef EMBED_RES_TYPE
#define EMBED_RES_TYPE embed_resource_t
#endif
#ifdef __cplusplus
#include <cstddef>
#define EMBED_DECL(NAME) extern "C" EMBED_NS::EMBED_RES NAME(void)
namespace EMBED_NS {
struct EMBED_RES {
const char *EMBED_DATA_NAME;
std::size_t EMBED_SIZE_NAME;
};
} // namespace EMBED_NS
#else
#include <stddef.h>
#define EMBED_DECL(NAME) extern struct EMBED_RES NAME(void)
typedef struct EMBED_RES {
const char *EMBED_DATA_NAME;
size_t EMBED_SIZE_NAME;
} EMBED_RES_TYPE;
#endif
#endif
Loading