VOICEVOX · PickledChair · Mar 23, 2022 · Mar 9, 2022 · Mar 9, 2022 · Mar 10, 2022
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -3,6 +3,8 @@ project(VoiceVoxCore)
 
 # TODO: download onnxruntime
 set(ONNXRUNTIME_DIR "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime" CACHE PATH "Path to ONNX Runtime")
+set(MODEL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/model" CACHE PATH "Path to model")
+
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 
 option(DIRECTML "Enables building for DirectML" OFF)

diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
@@ -4,6 +4,21 @@ project(ONNXCore)
 
 set(CMAKE_MACOSX_RPATH 1)
 
+# modelファイルを読み込み
+include(src/embedBin/FindEmbed.cmake)
+if(EXISTS "${MODEL_DIR}/metas.json"
+	AND EXISTS "${MODEL_DIR}/yukarin_s.onnx"
+	AND EXISTS "${MODEL_DIR}/yukarin_sa.onnx"
+	AND EXISTS "${MODEL_DIR}/decode.onnx")
+	message("Models exist.")
+	EMBED_TARGET(YUKARIN_S "${MODEL_DIR}/yukarin_s.onnx" "core")
+	EMBED_TARGET(YUKARIN_SA "${MODEL_DIR}/yukarin_sa.onnx" "core")
+	EMBED_TARGET(DECODE "${MODEL_DIR}/decode.onnx" "core")
+	EMBED_TARGET(METAS "${MODEL_DIR}/metas.json" "core")
+else()
+	message(FATAL_ERROR "Unable to find Model. Use option -DMODEL_DIR=...")
+endif()
+
 # coreライブラリのインストール先設定。デフォルトはCMakeLists.txtと同じ位置
 if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
 	set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_SOURCE_DIR}" CACHE PATH "Path to install" FORCE)
@@ -13,6 +28,10 @@ message("core will be installed to: ${CMAKE_INSTALL_PREFIX}")
 # coreライブラリのビルド設定
 add_library(core
 		SHARED src/core.cpp
+    ${EMBED_YUKARIN_S_OUTPUTS} 
+		${EMBED_YUKARIN_SA_OUTPUTS} 
+		${EMBED_DECODE_OUTPUTS} 
+		${EMBED_METAS_OUTPUTS}
 		src/engine/full_context_label.cpp
 		src/engine/acoustic_feature_extractor.cpp
 		src/engine/openjtalk.cpp

diff --git a/core/_core.py b/core/_core.py
@@ -27,7 +27,7 @@
 lib = cdll.LoadLibrary(str(core_dll_path))
 
 # 関数型定義
-lib.initialize.argtypes = (c_char_p, c_bool, c_int)
+lib.initialize.argtypes = (c_bool, c_int)
 lib.initialize.restype = c_bool
 
 lib.finalize.argtypes = ()
@@ -52,9 +52,8 @@
 
 
 # ラッパー関数
-def initialize(root_dir_path: str, use_gpu: bool, cpu_num_threads=0):
-    path = create_string_buffer(root_dir_path.encode())
-    success = lib.initialize(path, use_gpu, cpu_num_threads)
+def initialize(use_gpu: bool, cpu_num_threads=0):
+    success = lib.initialize(use_gpu, cpu_num_threads)
     if not success:
         raise Exception(lib.last_error_message().decode())
 

diff --git a/core/src/core.cpp b/core/src/core.cpp
@@ -6,12 +6,11 @@
 
 #include <array>
 #include <exception>
-#include <filesystem>
-#include <fstream>
 #include <memory>
 #include <string>
 #include <unordered_set>
 
+#include "embedBin/embed.h"
 #include "nlohmann/json.hpp"
 
 #ifndef VOICEVOX_CORE_EXPORTS
@@ -30,50 +29,17 @@
 
 constexpr float PHONEME_LENGTH_MINIMAL = 0.01f;
 
-namespace fs = std::filesystem;
 constexpr std::array<int64_t, 0> scalar_shape{};
 constexpr std::array<int64_t, 1> speaker_shape{1};
 
 static std::string error_message;
 static bool initialized = false;
 static std::string supported_devices_str;
 
-bool open_models(const fs::path &yukarin_s_path, const fs::path &yukarin_sa_path, const fs::path &decode_path,
-                 std::vector<unsigned char> &yukarin_s_model, std::vector<unsigned char> &yukarin_sa_model,
-                 std::vector<unsigned char> &decode_model) {
-  std::ifstream yukarin_s_file(yukarin_s_path, std::ios::binary), yukarin_sa_file(yukarin_sa_path, std::ios::binary),
-      decode_file(decode_path, std::ios::binary);
-  if (!yukarin_s_file.is_open() || !yukarin_sa_file.is_open() || !decode_file.is_open()) {
-    error_message = FAILED_TO_OPEN_MODEL_ERR;
-    return false;
-  }
-
-  yukarin_s_model = std::vector<unsigned char>(std::istreambuf_iterator<char>(yukarin_s_file), {});
-  yukarin_sa_model = std::vector<unsigned char>(std::istreambuf_iterator<char>(yukarin_sa_file), {});
-  decode_model = std::vector<unsigned char>(std::istreambuf_iterator<char>(decode_file), {});
-  return true;
-}
-
-/**
- * Loads the metas.json.
- *
- * schema:
- * [{
- *  name: string,
- *  styles: [{name: string, id: int}],
- *  speaker_uuid: string,
- *  version: string
- * }]
- */
-bool open_metas(const fs::path &metas_path, nlohmann::json &metas) {
-  std::ifstream metas_file(metas_path);
-  if (!metas_file.is_open()) {
-    error_message = FAILED_TO_OPEN_METAS_ERR;
-    return false;
-  }
-  metas_file >> metas;
-  return true;
-}
+EMBED_DECL(YUKARIN_SA);
+EMBED_DECL(YUKARIN_S);
+EMBED_DECL(DECODE);
+EMBED_DECL(METAS);
 
 struct SupportedDevices {
   bool cpu = true;
@@ -96,21 +62,30 @@ SupportedDevices get_supported_devices() {
 }
 
 struct Status {
-  Status(const char *root_dir_path_utf8, bool use_gpu_)
-      : root_dir_path(root_dir_path_utf8),
-        use_gpu(use_gpu_),
+  Status(bool use_gpu_)
+      : use_gpu(use_gpu_),
         memory_info(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU)),
         yukarin_s(nullptr),
         yukarin_sa(nullptr),
         decode(nullptr) {}
-
+  /**
+   * Loads the metas.json.
+   *
+   * schema:
+   * [{
+   *  name: string,
+   *  styles: [{name: string, id: int}],
+   *  speaker_uuid: string,
+   *  version: string
+   * }]
+   */
   bool load(int cpu_num_threads) {
-    // deprecated in C++20; Use char8_t for utf-8 char in the future.
-    fs::path root = fs::u8path(root_dir_path);
+    embed::Resource yukarin_s_model = YUKARIN_S();
+    embed::Resource yukarin_sa_model = YUKARIN_SA();
+    embed::Resource decode_model = DECODE();
+    embed::Resource metas_file = METAS();
 
-    if (!open_metas(root / "metas.json", metas)) {
-      return false;
-    }
+    metas = nlohmann::json::parse(metas_file.data, metas_file.data + metas_file.size);
     metas_str = metas.dump();
     supported_styles.clear();
     for (const auto &meta : metas) {
@@ -119,15 +94,10 @@ struct Status {
       }
     }
 
-    std::vector<unsigned char> yukarin_s_model, yukarin_sa_model, decode_model;
-    if (!open_models(root / "yukarin_s.onnx", root / "yukarin_sa.onnx", root / "decode.onnx", yukarin_s_model,
-                     yukarin_sa_model, decode_model)) {
-      return false;
-    }
     Ort::SessionOptions session_options;
     session_options.SetInterOpNumThreads(cpu_num_threads).SetIntraOpNumThreads(cpu_num_threads);
-    yukarin_s = Ort::Session(env, yukarin_s_model.data(), yukarin_s_model.size(), session_options);
-    yukarin_sa = Ort::Session(env, yukarin_sa_model.data(), yukarin_sa_model.size(), session_options);
+    yukarin_s = Ort::Session(env, yukarin_s_model.data, yukarin_s_model.size, session_options);
+    yukarin_sa = Ort::Session(env, yukarin_sa_model.data, yukarin_sa_model.size, session_options);
     if (use_gpu) {
 #ifdef DIRECTML
       session_options.DisableMemPattern().SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
@@ -137,7 +107,7 @@ struct Status {
       session_options.AppendExecutionProvider_CUDA(cuda_options);
 #endif
     }
-    decode = Ort::Session(env, decode_model.data(), decode_model.size(), session_options);
+    decode = Ort::Session(env, decode_model.data, decode_model.size, session_options);
     return true;
   }
 
@@ -172,7 +142,7 @@ bool validate_speaker_id(int64_t speaker_id) {
   return true;
 }
 
-bool initialize(const char *root_dir_path, bool use_gpu, int cpu_num_threads) {
+bool initialize(bool use_gpu, int cpu_num_threads) {
   initialized = false;
 
 #ifdef DIRECTML
@@ -184,7 +154,7 @@ bool initialize(const char *root_dir_path, bool use_gpu, int cpu_num_threads) {
     return false;
   }
   try {
-    status = std::make_unique<Status>(root_dir_path, use_gpu);
+    status = std::make_unique<Status>(use_gpu);
     if (!status->load(cpu_num_threads)) {
       return false;
     }

diff --git a/core/src/core.h b/core/src/core.h
@@ -36,15 +36,14 @@ typedef enum {
  * @fn
  * 初期化する
  * @brief 音声合成するための初期化を行う。他の関数を正しく実行するには先に初期化が必要
- * @param root_dir_path 必要なファイルがあるディレクトリ。相対パス・絶対パスどちらも指定可能。文字コードはUTF-8
  * @param use_gpu trueならGPU用、falseならCPU用の初期化を行う
  * @param cpu_num_threads 推論に用いるスレッド数を設定する。0の場合論理コア数の半分か、物理コア数が設定される
  * @return 成功したらtrue、失敗したらfalse
  * @detail
  * 何度も実行可能。use_gpuを変更して実行しなおすことも可能。
  * 最後に実行したuse_gpuに従って他の関数が実行される。
  */
-VOICEVOX_CORE_API bool initialize(const char *root_dir_path, bool use_gpu, int cpu_num_threads = 0);
+VOICEVOX_CORE_API bool initialize(bool use_gpu, int cpu_num_threads = 0);
 
 /**
  * @fn

diff --git a/core/src/embedBin/FindEmbed.cmake b/core/src/embedBin/FindEmbed.cmake
@@ -0,0 +1,99 @@
+# - Provide a macro to embed binary files into the executable.
+#
+# This file is part of the Embed project: https://github.com/magcks/embed
+#
+# The module defines the macros:
+#
+#  EMBED_TARGET(<Name> <BinFile>)
+#
+# which will create  a custom rule to a assembly file. <BinFile> is
+# the path to the binary file.
+#
+# The macro defines a set of variables:
+#  EMBED_${Name}_DEFINED       - true is the macro ran successfully
+#  EMBED_${Name}_INPUT         - The input source file, an alias for <BinFile>
+#  EMBED_${Name}_OUTPUTS       - The source file generated
+#
+#  ====================================================================
+#  Example:
+#
+#   find_package(Embed REQUIRED)
+#   EMBED_TARGET(SHADER source.glsl)
+#   add_executable(example main.cc ${EMBED_SHADER_OUTPUTS})
+#  ====================================================================
+
+
+cmake_minimum_required(VERSION 3.16)
+
+set(RES_ID 16384)
+set(STRUCT
+"#include \"stddef.h\"
+struct Res {
+	const char *data\;
+	const size_t size\;
+}\;"
+)
+
+macro(EMBED_TARGET Name Input LibName)
+	get_filename_component(InputAbs "${Input}" REALPATH)
+	if(WIN32)
+		set(OutputRC "${CMAKE_CURRENT_BINARY_DIR}/${Name}.rc")
+		set(OutputC "${CMAKE_CURRENT_BINARY_DIR}/${Name}.c")
+		set(Outputs ${OutputRC} ${OutputC})
+		set(RCCODE "${RES_ID} RCDATA \"${InputAbs}\"\n")
+		set(CODE
+"#include \"windows.h\"
+${STRUCT}
+struct Res ${Name}(void) {
+	HMODULE handle = GetModuleHandle(\"${LibName}\")\;
+	HRSRC res = FindResource(handle, MAKEINTRESOURCE(${RES_ID}), RT_RCDATA)\;
+	struct Res r = {
+		(const char*) LockResource(LoadResource(handle, res)),
+		SizeofResource(handle, res)
+	}\;
+	return r\;
+}"
+		)
+		file(WRITE ${OutputRC} ${RCCODE})
+		file(WRITE ${OutputC} ${CODE})
+		math(EXPR RES_ID "${RES_ID}+1")
+	else()
+		if(APPLE)
+			set(Section ".const_data")
+			set(DataName "_data")
+			set(EndName "_end_data")
+		else()
+			set(Section ".section .rodata")
+			set(DataName "data")
+			set(EndName "end_data")
+		endif()
+		set(CODE
+"${STRUCT}
+asm(
+	\"${Section}\\n\"
+	\".balign ${CMAKE_SIZEOF_VOID_P}\\n\"
+	\"${DataName}: .incbin \\\"${InputAbs}\\\"\\n\"
+	\"${EndName}:\\n\"
+	\".text\\n\"
+)\;
+extern const char data[]\;
+extern const char end_data[]\;
+struct Res ${Name}(void) {
+	struct Res r = { data, end_data - data }\;
+	return r\;
+}"
+		)
+		set(OutputC "${CMAKE_CURRENT_BINARY_DIR}/${Name}.c")
+		set(Outputs ${OutputC})
+		file(WRITE ${OutputC} ${CODE})
+
+		add_custom_command(
+			OUTPUT ${OutputC}
+			COMMAND ${CMAKE_COMMAND} -E touch ${OutputC}
+			DEPENDS ${Input}
+		)
+	endif()
+	set(EMBED_${Name}_DEFINED TRUE)
+	set(EMBED_${Name}_INPUT ${Input})
+	set(EMBED_${Name}_OUTPUTS ${Outputs})
+endmacro()
diff --git a/core/src/embedBin/LICENCE.md b/core/src/embedBin/LICENCE.md
@@ -0,0 +1,9 @@
+The MIT License (MIT)
+
+Copyright (c) 2018 Max von Buelow
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/core/src/embedBin/embed.h b/core/src/embedBin/embed.h
@@ -0,0 +1,39 @@
+#ifndef __EMBED_H
+#define __EMBED_H
+#ifndef EMBED_DATA_NAME
+#define EMBED_DATA_NAME data
+#endif
+#ifndef EMBED_SIZE_NAME
+#define EMBED_SIZE_NAME size
+#endif
+#ifndef EMBED_NS
+#define EMBED_NS embed
+#endif
+#ifndef EMBED_STRUCT
+#ifdef __cplusplus
+#define EMBED_RES Resource
+#else
+#define EMBED_RES embed_resource
+#endif
+#endif
+#ifndef EMBED_RES_TYPE
+#define EMBED_RES_TYPE embed_resource_t
+#endif
+#ifdef __cplusplus
+#include <cstddef>
+#define EMBED_DECL(NAME) extern "C" EMBED_NS::EMBED_RES NAME(void)
+namespace EMBED_NS {
+struct EMBED_RES {
+  const char *EMBED_DATA_NAME;
+  std::size_t EMBED_SIZE_NAME;
+};
+}  // namespace EMBED_NS
+#else
+#include <stddef.h>
+#define EMBED_DECL(NAME) extern struct EMBED_RES NAME(void)
+typedef struct EMBED_RES {
+  const char *EMBED_DATA_NAME;
+  size_t EMBED_SIZE_NAME;
+} EMBED_RES_TYPE;
+#endif
+#endif