From 2ac370a40b138e973b14db1ab03eb397ada2f37c Mon Sep 17 00:00:00 2001
From: John Howe <89397553+timerring@users.noreply.github.com>
Date: Fri, 28 Mar 2025 15:11:16 +0800
Subject: [PATCH] feat: introduce auto slice video

fix #233
---
 .dockerignore                      |  1 +
 .gitignore                         |  1 +
 .gitmodules                        |  3 ++
 src/autoslice/__init__.py          |  5 +-
 src/autoslice/auto_slice_video     |  1 +
 src/autoslice/calculate_density.py | 51 --------------------
 src/autoslice/inject_metadata.py   | 23 ++++++++++
 src/autoslice/slice_video.py       | 74 ------------------------------
 src/autoslice/zhipu_sdk.py         | 35 ++++++++++++++
 src/burn/render_video.py           | 36 +++++++--------
 src/config.py                      |  5 +-
 11 files changed, 89 insertions(+), 146 deletions(-)
 create mode 160000 src/autoslice/auto_slice_video
 delete mode 100644 src/autoslice/calculate_density.py
 create mode 100644 src/autoslice/inject_metadata.py
 delete mode 100644 src/autoslice/slice_video.py
 create mode 100644 src/autoslice/zhipu_sdk.py

diff --git a/.dockerignore b/.dockerignore
index c68e752..20fe58e 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -6,6 +6,7 @@ pnpm-lock.yaml
 **/__pycache__
 test/*
 Videos/*
+!Videos/.gitkeep
 settings-production.toml
 startRecord-production.sh
 src/utils/cookies.json
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index dc8db39..efef367 100755
--- a/.gitignore
+++ b/.gitignore
@@ -350,6 +350,7 @@ $RECYCLE.BIN/
 # End of https://www.toptal.com/developers/gitignore/api/intellij+all,python,pycharm+all,macos,windows
 # Data&test
 Videos/
+!Videos/.gitkeep
 test/
 .cache
 .vscode/
diff --git a/.gitmodules b/.gitmodules
index e56823f..e5ed06d 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "src/danmaku/DanmakuConvert"]
 	path = src/danmaku/DanmakuConvert
 	url = https://github.com/timerring/DanmakuConvert.git
+[submodule "src/autoslice/auto_slice_video"]
+	path = src/autoslice/auto_slice_video
+	url = git@github.com:timerring/auto-slice-video.git
diff --git a/src/autoslice/__init__.py b/src/autoslice/__init__.py
index 3b36319..8e3fec0 100644
--- a/src/autoslice/__init__.py
+++ b/src/autoslice/__init__.py
@@ -2,4 +2,7 @@
 
 import sys
 import os
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
\ No newline at end of file
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from .auto_slice_video.autosv import slice_video_by_danmaku
+
+__all__ = ["slice_video_by_danmaku"]
\ No newline at end of file
diff --git a/src/autoslice/auto_slice_video b/src/autoslice/auto_slice_video
new file mode 160000
index 0000000..c26376d
--- /dev/null
+++ b/src/autoslice/auto_slice_video
@@ -0,0 +1 @@
+Subproject commit c26376d5d14e8528a7d2600ae6d710ca1211b1ed
diff --git a/src/autoslice/calculate_density.py b/src/autoslice/calculate_density.py
deleted file mode 100644
index dff9dc6..0000000
--- a/src/autoslice/calculate_density.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (c) 2024 bilive.
-
-import re
-from collections import defaultdict
-from src.config import SLICE_DURATION
-
-def parse_time(time_str):
-    """Convert ASS time format to seconds with milliseconds."""
-    h, m, s = time_str.split(':')
-    s, ms = s.split('.')
-    return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 100
-
-def format_time(seconds):
-    """Format seconds to hh:mm:ss.xx."""
-    h = int(seconds // 3600)
-    m = int((seconds % 3600) // 60)
-    s = int(seconds % 60)
-    ms = int((seconds - int(seconds)) * 100)
-    return f"{h:02}:{m:02}:{s:02}.{ms:02}"
-
-def extract_dialogues(file_path):
-    """Extract dialogue start times from the ASS file."""
-    dialogues = []
-    with open(file_path, 'r', encoding='utf-8') as file:
-        for line in file:
-            if line.startswith('Dialogue:'):
-                parts = line.split(',')
-                start_time = parse_time(parts[1].strip())
-                dialogues.append(start_time)
-    return dialogues
-
-def calculate_density(dialogues, window_size=SLICE_DURATION):
-    """Calculate the maximum density of dialogues in a given window size."""
-    time_counts = defaultdict(int)
-    for time in dialogues:
-        time_counts[time] += 1
-
-    max_density = 0
-    max_start_time = 0
-
-    # Use a sliding window to calculate density
-    sorted_times = sorted(time_counts.keys())
-    for i in range(len(sorted_times)):
-        start_time = sorted_times[i]
-        end_time = start_time + window_size
-        current_density = sum(count for time, count in time_counts.items() if start_time <= time < end_time)
-        if current_density > max_density:
-            max_density = current_density
-            max_start_time = start_time
-
-    return max_start_time, max_density
\ No newline at end of file
diff --git a/src/autoslice/inject_metadata.py b/src/autoslice/inject_metadata.py
new file mode 100644
index 0000000..2f8459b
--- /dev/null
+++ b/src/autoslice/inject_metadata.py
@@ -0,0 +1,23 @@
+# Copyright (c) 2024 bilive.
+
+import subprocess
+from src.log.logger import scan_log
+
+# https://stackoverflow.com/questions/64849478/cant-insert-stream-metadata-into-mp4
+def inject_metadata(video_path, generate_title, output_path):
+    """Slice the video using ffmpeg."""
+    command = [
+        'ffmpeg',
+        '-i', video_path,
+        '-metadata:g', f'generate={generate_title}',
+        '-c:v', 'copy',
+        '-c:a', 'copy',
+        output_path
+    ]
+    try:
+        result = subprocess.run(command, check=True, capture_output=True, text=True)
+        scan_log.debug(f"FFmpeg output: {result.stdout}")
+        if result.stderr:
+            scan_log.debug(f"FFmpeg debug: {result.stderr}")
+    except subprocess.CalledProcessError as e:
+        scan_log.error(f"Error: {e.stderr}")
\ No newline at end of file
diff --git a/src/autoslice/slice_video.py b/src/autoslice/slice_video.py
deleted file mode 100644
index 3560ce4..0000000
--- a/src/autoslice/slice_video.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Copyright (c) 2024 bilive.
-
-import base64
-import subprocess
-from src.config import Your_API_KEY, SLICE_DURATION
-from zhipuai import ZhipuAI
-from src.autoslice.calculate_density import extract_dialogues, calculate_density, format_time
-from src.log.logger import scan_log
-
-def zhipu_glm_4v_plus_generate_title(video_path, artist):
-    with open(video_path, 'rb') as video_file:
-        video_base = base64.b64encode(video_file.read()).decode('utf-8')
-
-    client = ZhipuAI(api_key=Your_API_KEY)
-    response = client.chat.completions.create(
-        model="glm-4v-plus",
-        messages=[
-        {
-            "role": "user",
-            "content": [
-            {
-                "type": "video_url",
-                "video_url": {
-                    "url" : video_base
-                }
-            },
-            {
-                "type": "text",
-                "text": f"视频是{artist}的直播的切片，请根据该视频中的内容及弹幕信息，为这段视频起一个调皮并且吸引眼球的标题，注意标题中如果有“主播”请替换成{artist}。"
-            }
-            ]
-        }
-        ]
-    )
-    return response.choices[0].message.content.replace("《", "").replace("》", "")
-
-# https://stackoverflow.com/questions/64849478/cant-insert-stream-metadata-into-mp4
-def inject_metadata(video_path, generate_title, output_path):
-    """Slice the video using ffmpeg."""
-    command = [
-        'ffmpeg',
-        '-i', video_path,
-        '-metadata:g', f'generate={generate_title}',
-        '-c:v', 'copy',
-        '-c:a', 'copy',
-        output_path
-    ]
-    try:
-        result = subprocess.run(command, check=True, capture_output=True, text=True)
-        scan_log.debug(f"FFmpeg output: {result.stdout}")
-        if result.stderr:
-            scan_log.debug(f"FFmpeg debug: {result.stderr}")
-    except subprocess.CalledProcessError as e:
-        scan_log.error(f"Error: {e.stderr}")
-
-def slice_video(video_path, start_time, output_path, duration=f'00:00:{SLICE_DURATION}'):
-    """Slice the video using ffmpeg."""
-    command = [
-        'ffmpeg',
-        '-ss', format_time(start_time),
-        '-i', video_path,
-        '-t', duration,
-        '-map_metadata', '-1',
-        '-c:v', 'copy',
-        '-c:a', 'copy',
-        output_path
-    ]
-    try:
-        result = subprocess.run(command, check=True, capture_output=True, text=True)
-        scan_log.debug(f"FFmpeg output: {result.stdout}")
-        if result.stderr:
-            scan_log.debug(f"FFmpeg debug: {result.stderr}")
-    except subprocess.CalledProcessError as e:
-        scan_log.error(f"Error: {e.stderr}")
\ No newline at end of file
diff --git a/src/autoslice/zhipu_sdk.py b/src/autoslice/zhipu_sdk.py
new file mode 100644
index 0000000..3cb9c81
--- /dev/null
+++ b/src/autoslice/zhipu_sdk.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2024 bilive.
+
+import base64
+from src.config import Your_API_KEY
+from zhipuai import ZhipuAI
+from src.log.logger import scan_log
+
+def zhipu_glm_4v_plus_generate_title(video_path, artist):
+    with open(video_path, 'rb') as video_file:
+        video_base = base64.b64encode(video_file.read()).decode('utf-8')
+
+    client = ZhipuAI(api_key=Your_API_KEY)
+    response = client.chat.completions.create(
+        model="glm-4v-plus-0111",
+        messages=[
+        {
+            "role": "user",
+            "content": [
+            {
+                "type": "video_url",
+                "video_url": {
+                    "url" : video_base
+                }
+            },
+            {
+                "type": "text",
+                "text": f"视频是{artist}的直播的切片，请根据该视频中的内容及弹幕信息，为这段视频起一个调皮并且吸引眼球的标题，注意标题中如果有“主播”请替换成{artist}"
+            }
+            ]
+        }
+        ]
+    )
+    scan_log.info(f"Prompt: 视频是{artist}的直播的切片，请根据该视频中的内容及弹幕信息，为这段视频起一个调皮并且吸引眼球的标题，注意标题中如果有“主播”请替换成{artist}")
+    scan_log.info(f"生成的切片标题为: {response.choices[0].message.content}")
+    return response.choices[0].message.content.replace("《", "").replace("》", "")
\ No newline at end of file
diff --git a/src/burn/render_video.py b/src/burn/render_video.py
index 7a1bbc2..1242bc8 100644
--- a/src/burn/render_video.py
+++ b/src/burn/render_video.py
@@ -3,12 +3,13 @@
 import argparse
 import os
 import subprocess
-from src.config import GPU_EXIST, SRC_DIR, MODEL_TYPE, AUTO_SLICE, SLICE_DURATION, MIN_VIDEO_SIZE
+from src.config import GPU_EXIST, SRC_DIR, MODEL_TYPE, AUTO_SLICE, SLICE_DURATION, MIN_VIDEO_SIZE, SLICE_NUM, SLICE_OVERLAP, SLICE_STEP
 from src.danmaku.generate_danmakus import get_resolution, process_danmakus
 from src.subtitle.generate_subtitles import generate_subtitles
 from src.burn.render_command import render_command
-from src.autoslice.slice_video import slice_video, inject_metadata, zhipu_glm_4v_plus_generate_title
-from src.autoslice.calculate_density import extract_dialogues, calculate_density, format_time
+from autoslice import slice_video_by_danmaku
+from src.autoslice.inject_metadata import inject_metadata
+from src.autoslice.zhipu_sdk import zhipu_glm_4v_plus_generate_title
 from src.upload.extract_video_info import get_video_info
 from src.log.logger import scan_log
 
@@ -61,16 +62,18 @@ def render_video(video_path):
     if AUTO_SLICE:
         if check_file_size(format_video_path) > MIN_VIDEO_SIZE:
             title, artist, date = get_video_info(format_video_path)
-            slice_video_path = format_video_path[:-4] + '_slice.mp4'
-            dialogues = extract_dialogues(ass_path)
-            max_start_time, max_density = calculate_density(dialogues)
-            formatted_time = format_time(max_start_time)
-            scan_log.info(f"The 30-second window with the highest density starts at {formatted_time} seconds with {max_density} danmakus.")
-            slice_video(format_video_path, max_start_time, slice_video_path)
-            glm_title = zhipu_glm_4v_plus_generate_title(slice_video_path, artist)
-            slice_video_flv_path = slice_video_path[:-4] + '.flv'
-            inject_metadata(slice_video_path, glm_title, slice_video_flv_path)
-            os.remove(slice_video_path)
+            slices_path = slice_video_by_danmaku(ass_path, format_video_path, SLICE_DURATION, SLICE_NUM, SLICE_OVERLAP, SLICE_STEP)
+            for slice_path in slices_path:
+                try:
+                    glm_title = zhipu_glm_4v_plus_generate_title(slice_path, artist)
+                    slice_video_flv_path = slice_path[:-4] + '.flv'
+                    inject_metadata(slice_path, glm_title, slice_video_flv_path)
+                    os.remove(slice_path)
+                    with open(f"{SRC_DIR}/upload/uploadVideoQueue.txt", "a") as file:
+                        scan_log.info(f"Complete {slice_video_flv_path} and wait for uploading!")
+                        file.write(f"{slice_video_flv_path}\n")
+                except Exception as e:
+                    scan_log.error(f"Error in {slice_path}: {e}")
 
     # Delete relative files
     for remove_path in [original_video_path, xml_path, ass_path, srt_path, jsonl_path]:
@@ -82,9 +85,4 @@ def render_video(video_path):
     # os.rename(original_video_path, test_path)
 
     with open(f"{SRC_DIR}/upload/uploadVideoQueue.txt", "a") as file:
-        file.write(f"{format_video_path}\n")
-        if AUTO_SLICE:
-            scan_log.info("Complete slice video and wait for uploading!")
-            slice_video_path = format_video_path[:-4] + '_slice.mp4'
-            slice_video_flv_path = slice_video_path[:-4] + '.flv'
-            file.write(f"{slice_video_flv_path}\n")
\ No newline at end of file
+        file.write(f"{format_video_path}\n")
\ No newline at end of file
diff --git a/src/config.py b/src/config.py
index d665605..5711223 100644
--- a/src/config.py
+++ b/src/config.py
@@ -12,7 +12,10 @@
 Inference_Model = "small"
 # ============================ The video slice configuration ==================
 AUTO_SLICE = False
-SLICE_DURATION = 30
+SLICE_DURATION = 60 # better not exceed 300 seconds
+SLICE_NUM = 2
+SLICE_OVERLAP = 30
+SLICE_STEP = 1
 # The minimum video size to be sliced (MB)
 MIN_VIDEO_SIZE = 200
 # Apply for your own GLM-4v-Plus API key at https://www.bigmodel.cn/invite?icode=shBtZUfNE6FfdMH1R6NybGczbXFgPRGIalpycrEwJ28%3D