diff --git a/.config/hakari.toml b/.config/hakari.toml index 851be7019518e..8a1b9b8ab642b 100644 --- a/.config/hakari.toml +++ b/.config/hakari.toml @@ -3,8 +3,8 @@ hakari-package = "workspace-hack" -# Format for `workspace-hack = ...` lines in other Cargo.tomls. Requires cargo-hakari 0.9.8 or above. -dep-format-version = "2" +# Format for `workspace-hack = ...` lines in other Cargo.tomls. Requires cargo-hakari 0.9.22 or above. +dep-format-version = "4" # Setting workspace.resolver = "2" in the root Cargo.toml is HIGHLY recommended. # Hakari works much better with the new feature resolver. @@ -24,4 +24,7 @@ platforms = [ # exact-versions = true [traversal-excludes] -workspace-members = ["workspace-config"] +workspace-members = ["workspace-config", "risingwave_object_store"] +third-party = [ + { name = "opendal" }, +] \ No newline at end of file diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 717ebe5da03be..5b64ba3cbd751 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,7 +1,9 @@ -I hereby agree to the terms of the [Singularity Data, Inc. Contributor License Agreement](https://gist.github.com/skyzh/0663682a70b0edde7ae991492f2314cb#file-s9y_cla). +I hereby agree to the terms of the [RisingWave Labs, Inc. Contributor License Agreement](https://gist.github.com/TennyZhuang/f00be7f16996ea48effb049aa7be4d66#file-rw_cla). ## What's changed and what's your intention? + ## Checklist - [ ] I have written necessary rustdoc comments - [ ] I have added necessary unit tests and integration tests - [ ] I have added fuzzing tests or opened an issue to track them. (Optional, recommended for new SQL features). +- [ ] I have demonstrated that backward compatibility is not broken by breaking changes and created issues to track deprecated features to be removed in the future. (Please refer to the issue) - [ ] All checks passed in `./risedev check` (or alias, `./risedev c`) ## Documentation -If your pull request contains user-facing changes, please specify the types of the changes, and create a release note. Otherwise, please feel free to remove this section. +- [ ] My PR **DOES NOT** contain user-facing changes. + + + +
Click here for Documentation ### Types of user-facing changes -Please keep the types that apply to your changes, and remove those that do not apply. +Please keep the types that apply to your changes, and remove the others. -* Installation and deployment -* Connector (sources & sinks) -* SQL commands, functions, and operators -* RisingWave cluster configuration changes -* Other (please specify in the release note below) +- Installation and deployment +- Connector (sources & sinks) +- SQL commands, functions, and operators +- RisingWave cluster configuration changes +- Other (please specify in the release note below) ### Release note -Please create a release note for your changes. In the release note, focus on the impact on users, and mention the environment or conditions where the impact may occur. + -## Refer to a related PR or issue link (optional) +
diff --git a/.github/workflows/dashboard_main.yml b/.github/workflows/dashboard_main.yml index d8b697b05583b..b20fb0956d4a8 100644 --- a/.github/workflows/dashboard_main.yml +++ b/.github/workflows/dashboard_main.yml @@ -7,7 +7,7 @@ jobs: dashboard-ui-deploy: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/setup-node@v2 with: node-version: '18' diff --git a/.github/workflows/dashboard_pr.yml b/.github/workflows/dashboard_pr.yml index f2bd578debf45..5df7869e964ea 100644 --- a/.github/workflows/dashboard_pr.yml +++ b/.github/workflows/dashboard_pr.yml @@ -5,7 +5,7 @@ on: paths: [dashboard/**, proto/**] concurrency: - group: dashbaord-build-${{ github.ref }} + group: dashboard-build-${{ github.ref }} cancel-in-progress: true jobs: diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml index a9b51042456dd..e88d030f46a14 100644 --- a/.github/workflows/doc.yml +++ b/.github/workflows/doc.yml @@ -9,7 +9,7 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions-rs/toolchain@v1 - name: Install dependencies run: sudo apt-get update && sudo apt-get install -y make build-essential cmake protobuf-compiler curl openssl libssl-dev libsasl2-dev libcurl4-openssl-dev pkg-config postgresql-client tmux lld diff --git a/.github/workflows/license_check.yml b/.github/workflows/license_check.yml index 2f9c6de615d1b..a73852a3404e8 100644 --- a/.github/workflows/license_check.yml +++ b/.github/workflows/license_check.yml @@ -7,11 +7,13 @@ on: - "forks/*" pull_request: branches: [main] + merge_group: + types: [checks_requested] jobs: license-header-check: runs-on: ubuntu-latest name: license-header-check steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Check License Header uses: apache/skywalking-eyes/header@main diff --git a/.github/workflows/protobuf-breaking.yml b/.github/workflows/protobuf-breaking.yml new file mode 100644 index 0000000000000..d68f1d5d5eba0 --- /dev/null +++ b/.github/workflows/protobuf-breaking.yml @@ -0,0 +1,21 @@ +name: Protobuf Breaking Check + +on: + pull_request: + branches: [main] + paths: [proto/**] + +jobs: + buf-breaking-check: + runs-on: ubuntu-latest + name: Check breaking changes in Protobuf files + steps: + - uses: actions/checkout@v2 + - uses: bufbuild/buf-setup-action@v1 + with: + github_token: ${{ github.token }} + # Run breaking change detection against the `main` branch + - uses: bufbuild/buf-breaking-action@v1 + with: + input: 'proto' + against: 'https://github.com/risingwavelabs/risingwave.git#branch=main,subdir=proto' diff --git a/.github/workflows/typo.yml b/.github/workflows/typo.yml index 5acb16ee24ea6..51f1f221b4fba 100644 --- a/.github/workflows/typo.yml +++ b/.github/workflows/typo.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Actions Repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Check spelling of the entire repository uses: crate-ci/typos@v1.11.1 diff --git a/.gitignore b/.gitignore index d322f7c8f4b56..907131a7ac187 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,8 @@ .DS_Store # Visual Studio Code -.vscode/ +.vscode/* +!.vscode/*.example .devcontainer/ # JetBrains diff --git a/.licenserc.yaml b/.licenserc.yaml index 192c4ae5e70f4..3589dbe6fd971 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -1,7 +1,7 @@ header: license: spdx-id: Apache-2.0 - copyright-owner: Singularity Data + copyright-owner: RisingWave Labs paths: - "src/**/*.rs" diff --git a/.mergify.yml b/.mergify.yml index 2b7541e9a0151..c4e86d18cd74c 100644 --- a/.mergify.yml +++ b/.mergify.yml @@ -22,23 +22,6 @@ queue_rules: - "label=mergify/can-merge" pull_request_rules: - - name: Automatic merge on approval (high priority) - conditions: - - "base=main" - - "#approved-reviews-by>=1" - - "#changes-requested-reviews-by=0" - - "check-success=buildkite/pull-request" - - "check-failure!=buildkite/docker" - - "check-success=task-list-completed" - - "label=mergify/can-merge" - - "label=mergify/high-priority" - actions: - queue: - name: default - method: squash - priority: high - commit_message_template: *commit_message_template - - name: Automatic merge on approval conditions: - "base=main" @@ -52,7 +35,6 @@ pull_request_rules: queue: name: default method: squash - priority: medium commit_message_template: *commit_message_template - name: Notify author on queue failure diff --git a/.typos.toml b/.typos.toml index 93b8b43f9b58e..4d60ef0ffcafd 100644 --- a/.typos.toml +++ b/.typos.toml @@ -9,4 +9,10 @@ numer = "numer" nd = "nd" [files] -extend-exclude = ["src/tests/regress/data", "e2e_test", "**/*.svg", "scripts"] +extend-exclude = [ + "src/tests/regress/data", + "e2e_test", + "**/*.svg", + "scripts", + "src/frontend/planner_test/tests/testdata", +] diff --git a/.vscode/launch.json.example b/.vscode/launch.json.example new file mode 100644 index 0000000000000..6f8fbb18d4fe7 --- /dev/null +++ b/.vscode/launch.json.example @@ -0,0 +1,18 @@ +// Copy this file to .vscode/launch.json and edit it to your needs +{ + "version": "0.2.0", + "configurations": [ + { + // Use this if you want to debug risedev p locally + "name": "Launch playground debug", + "type": "lldb", + "request": "launch", + "program": "${workspaceRoot}/target/debug/risingwave", + "args": [ + "playground" + ], + "cwd": "${workspaceRoot}", + "preLaunchTask": "build rw bin" + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json.example b/.vscode/settings.json.example new file mode 100644 index 0000000000000..341fd7937804c --- /dev/null +++ b/.vscode/settings.json.example @@ -0,0 +1,42 @@ +// Copy this file to .vscode/settings.json and edit it to your needs +{ + "protoc": { + "options": [ + "--proto_path=proto" + ] + }, + "yaml.schemas": { + "src/risedevtool/schemas/risedev.json": "risedev.yml", + "src/risedevtool/schemas/risedev-profiles.user.json": "risedev-profiles.user.yml" + }, + // // ============================================================================ + // // BEGIN SECTION: configurations for the development of deterministic testing + // // ============================================================================ + // "rust-analyzer.cargo.extraEnv": { + // "RUSTFLAGS": "--cfg tokio_unstable --cfg madsim" + // }, + // "rust-analyzer.runnableEnv": { + // "RUSTFLAGS": "--cfg tokio_unstable --cfg madsim" + // }, + // "rust-analyzer.checkOnSave.overrideCommand": [ + // "cargo", + // "check", + // "-p", + // "risingwave_simulation_scale", // Replace with your crate + // "--tests", + // "--message-format=json", + // ], + // "rust-analyzer.runnables.extraArgs": [ + // "--profile", + // "ci-release" // Replace with your target profile + // ], + // "rust-analyzer.server.extraEnv": { + // "CARGO_TARGET_DIR": "target/analyzer" + // }, + // "terminal.integrated.env.linux": { + // "RUSTFLAGS": "--cfg tokio_unstable --cfg madsim" + // }, + // // =========================================================================== + // // END SECTION + // // =========================================================================== +} \ No newline at end of file diff --git a/.vscode/tasks.json.example b/.vscode/tasks.json.example new file mode 100644 index 0000000000000..6f4d809eaa9eb --- /dev/null +++ b/.vscode/tasks.json.example @@ -0,0 +1,15 @@ +// Copy this file to .vscode/tasks.json and edit it to your needs +{ + "version": "2.0.0", + "tasks": [ + { + "label": "build rw bin", + "type": "shell", + "command": "/bin/bash", + "args": [ + "-c", + "risedev k ; cargo build --bin risingwave" + ] + } + ] +} \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d6314087b5a68..d0391fca98781 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,6 +11,8 @@ To report bugs, create a [GitHub issue](https://github.com/risingwavelabs/rising ## Table of contents +- [Contribution guidelines](#contribution-guidelines) + - [Table of contents](#table-of-contents) - [Tests and miscellaneous checks](#tests-and-miscellaneous-checks) - [Submit a PR](#submit-a-pr) - [Pull Request title](#pull-request-title) @@ -53,5 +55,4 @@ You may also check out previous PRs in the [PR list](https://github.com/risingwa ### Sign the CLA -Contributors will need to sign Singularity Data's CLA. - +Contributors will need to sign RisingWave Labs' CLA. diff --git a/Cargo.lock b/Cargo.lock index 85f8fb87f6b7b..f840a4d5bb80a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10,9 +10,9 @@ checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" [[package]] name = "addr2line" -version = "0.17.0" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9ecd88a8c8378ca913a680cd98f0f13ac67383d35993f86c90a70e3f137816b" +checksum = "a76fd60b23679b7d19bd066031410fb7e458ccc5e958eb5c325888ce4baedc97" dependencies = [ "gimli", ] @@ -42,9 +42,9 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf6ccdb167abbf410dcb915cabd428929d7f6a04980b54a11f26a39f1c7f7107" +checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" dependencies = [ "cfg-if", "const-random", @@ -55,9 +55,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "0.7.19" +version = "0.7.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" dependencies = [ "memchr", ] @@ -85,9 +85,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anyhow" -version = "1.0.66" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "216261ddc8289130e551ddcd5ce8a064710c0d064a4d2895c67151c92b5443f6" +checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800" dependencies = [ "backtrace", ] @@ -95,7 +95,7 @@ dependencies = [ [[package]] name = "apache-avro" version = "0.14.0" -source = "git+https://github.com/risingwavelabs/avro?branch=master#024f422ebda0d33b153e2ceecd2c3d538f90af21" +source = "git+https://github.com/risingwavelabs/avro?branch=waruto/modify-decimal#99acf32c68d17d1a94caf08b5918848fc5036cdf" dependencies = [ "byteorder", "bzip2", @@ -122,9 +122,9 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.5.1" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "983cd8b9d4b02a6dc6ffa557262eb5858a27a0038ffffe21a0f133eaa819a164" +checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" [[package]] name = "arrayvec" @@ -134,32 +134,35 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow-array" -version = "30.0.0" -source = "git+https://github.com/apache/arrow-rs?rev=6139d898#6139d8984ca702fa744aaf3b0b2193dd85468cf7" +version = "31.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1e6e839764618a911cc460a58ebee5ad3d42bc12d9a5e96a29b7cc296303aa1" dependencies = [ - "ahash 0.8.2", + "ahash 0.8.3", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "half 2.1.0", - "hashbrown 0.13.1", + "half 2.2.1", + "hashbrown 0.13.2", "num", ] [[package]] name = "arrow-buffer" -version = "30.0.0" -source = "git+https://github.com/apache/arrow-rs?rev=6139d898#6139d8984ca702fa744aaf3b0b2193dd85468cf7" +version = "31.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03a21d232b1bc1190a3fdd2f9c1e39b7cd41235e95a0d44dd4f522bc5f495748" dependencies = [ - "half 2.1.0", + "half 2.2.1", "num", ] [[package]] name = "arrow-cast" -version = "30.0.0" -source = "git+https://github.com/apache/arrow-rs?rev=6139d898#6139d8984ca702fa744aaf3b0b2193dd85468cf7" +version = "31.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83dcdb1436cac574f1c1b30fda91c53c467534337bef4064bbd4ea2d6fbc6e04" dependencies = [ "arrow-array", "arrow-buffer", @@ -173,26 +176,28 @@ dependencies = [ [[package]] name = "arrow-data" -version = "30.0.0" -source = "git+https://github.com/apache/arrow-rs?rev=6139d898#6139d8984ca702fa744aaf3b0b2193dd85468cf7" +version = "31.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14e3e69c9fd98357eeeab4aa0f626ecf7ecf663e68e8fc04eac87c424a414477" dependencies = [ "arrow-buffer", "arrow-schema", - "half 2.1.0", + "half 2.2.1", "num", ] [[package]] name = "arrow-flight" -version = "30.0.0" -source = "git+https://github.com/apache/arrow-rs?rev=6139d898#6139d8984ca702fa744aaf3b0b2193dd85468cf7" +version = "31.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd3ce08d31a1a24497bcf144029f8475539984aa50e41585e01b2057cf3dbb21" dependencies = [ "arrow-array", "arrow-buffer", "arrow-cast", "arrow-ipc", "arrow-schema", - "base64 0.20.0", + "base64 0.21.0", "bytes", "futures", "proc-macro2", @@ -206,8 +211,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "30.0.0" -source = "git+https://github.com/apache/arrow-rs?rev=6139d898#6139d8984ca702fa744aaf3b0b2193dd85468cf7" +version = "31.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64cac2706acbd796965b6eaf0da30204fe44aacf70273f8cb3c9b7d7f3d4c190" dependencies = [ "arrow-array", "arrow-buffer", @@ -219,13 +225,15 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "30.0.0" -source = "git+https://github.com/apache/arrow-rs?rev=6139d898#6139d8984ca702fa744aaf3b0b2193dd85468cf7" +version = "31.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73ca49d010b27e2d73f70c1d1f90c1b378550ed0f4ad379c4dea0c997d97d723" [[package]] name = "arrow-select" -version = "30.0.0" -source = "git+https://github.com/apache/arrow-rs?rev=6139d898#6139d8984ca702fa744aaf3b0b2193dd85468cf7" +version = "31.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "976cbaeb1a85c09eea81f3f9c149c758630ff422ed0238624c5c3f4704b6a53c" dependencies = [ "arrow-array", "arrow-buffer", @@ -234,12 +242,6 @@ dependencies = [ "num", ] -[[package]] -name = "assert-impl" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3464313de0c867016e3e69d7e1e9ae3499bcc4c18e12283d381359ed38b5b9e" - [[package]] name = "assert-json-diff" version = "2.0.2" @@ -258,15 +260,28 @@ checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" [[package]] name = "async-channel" -version = "1.7.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14485364214912d3b19cc3435dde4df66065127f05fa0d75c712f36f12c2f28" +checksum = "cf46fee83e5ccffc220104713af3292ff9bc7c64c7de289f66dae8e38d826833" dependencies = [ - "concurrent-queue 1.2.4", + "concurrent-queue", "event-listener", "futures-core", ] +[[package]] +name = "async-compat" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b48b4ff0c2026db683dea961cd8ea874737f56cffca86fa84415eaddc51c00d" +dependencies = [ + "futures-core", + "futures-io", + "once_cell", + "pin-project-lite", + "tokio", +] + [[package]] name = "async-executor" version = "1.5.0" @@ -275,7 +290,7 @@ checksum = "17adb73da160dfb475c183343c8cccd80721ea5a605d3eb57125f0a7b7a92d0b" dependencies = [ "async-lock", "async-task", - "concurrent-queue 2.0.0", + "concurrent-queue", "fastrand", "futures-lite", "slab", @@ -298,13 +313,13 @@ dependencies = [ [[package]] name = "async-io" -version = "1.10.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8121296a9f05be7f34aa4196b1747243b3b62e048bb7906f644f3fbfc490cf7" +checksum = "8c374dda1ed3e7d8f0d9ba58715f924862c63eae6849c92d3a18e7fbde9e2794" dependencies = [ "async-lock", "autocfg", - "concurrent-queue 1.2.4", + "concurrent-queue", "futures-lite", "libc", "log", @@ -313,7 +328,7 @@ dependencies = [ "slab", "socket2", "waker-fn", - "winapi", + "windows-sys 0.42.0", ] [[package]] @@ -328,9 +343,9 @@ dependencies = [ [[package]] name = "async-recursion" -version = "1.0.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cda8f4bcc10624c4e85bc66b3f452cca98cfa5ca002dc83a16aad2367641bea" +checksum = "3b015a331cc64ebd1774ba119538573603427eaace0a1950c423ab971f903796" dependencies = [ "proc-macro2", "quote", @@ -392,9 +407,9 @@ checksum = "7a40729d2133846d9ed0ea60a8b9541bccddab49cd30f0715a1da672fe9a2524" [[package]] name = "async-trait" -version = "0.1.58" +version = "0.1.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e805d94e6b5001b651426cf4cd446b1ab5f319d27bab5c644f61de0a804360c" +checksum = "1cd7fce9ba8c3c042128ce72d8b2ddbf3a05747efb67ea0313c635e10bda47a2" dependencies = [ "proc-macro2", "quote", @@ -428,9 +443,9 @@ dependencies = [ [[package]] name = "atomic-waker" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "065374052e7df7ee4047b1160cca5e1467a12351a40b3da123c870ba0b8eda2a" +checksum = "debc29dde2e69f9e47506b525f639ed42300fc014a3e007832592448fa8e4599" [[package]] name = "atty" @@ -438,7 +453,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -513,7 +528,7 @@ dependencies = [ "http", "hyper", "ring", - "time 0.3.15", + "time 0.3.17", "tokio", "tower", "tracing", @@ -602,29 +617,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "aws-sdk-sqs" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b26bb3d12238492cb12bde0de8486679b007daada21fdb110913b32a2a38275" -dependencies = [ - "aws-endpoint", - "aws-http", - "aws-sig-auth", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-http-tower", - "aws-smithy-query", - "aws-smithy-types", - "aws-smithy-xml", - "aws-types", - "bytes", - "http", - "tokio-stream", - "tower", -] - [[package]] name = "aws-sdk-sso" version = "0.21.0" @@ -699,7 +691,7 @@ dependencies = [ "percent-encoding", "regex", "ring", - "time 0.3.15", + "time 0.3.17", "tracing", ] @@ -751,7 +743,9 @@ dependencies = [ "http", "http-body", "hyper", + "hyper-rustls", "hyper-tls", + "lazy_static", "pin-project-lite", "tokio", "tower", @@ -835,7 +829,7 @@ dependencies = [ "itoa", "num-integer", "ryu", - "time 0.3.15", + "time 0.3.17", ] [[package]] @@ -858,16 +852,16 @@ dependencies = [ "aws-smithy-http", "aws-smithy-types", "http", - "rustc_version 0.4.0", + "rustc_version", "tracing", "zeroize", ] [[package]] name = "axum" -version = "0.6.1" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08b108ad2665fa3f6e6a517c3d80ec3e77d224c47d605167aefaa5d7ef97fa48" +checksum = "e5694b64066a2459918d8074c2ce0d5a88f409431994c2356617c8ae0c4721fc" dependencies = [ "async-trait", "axum-core", @@ -898,9 +892,9 @@ dependencies = [ [[package]] name = "axum-core" -version = "0.3.0" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79b8558f5a0581152dc94dcd289132a1d377494bdeafcd41869b3258e3e2ad92" +checksum = "1cae3e661676ffbacb30f1a824089a8c9150e71017f7e1e38f2aa32009188d34" dependencies = [ "async-trait", "bytes", @@ -913,11 +907,35 @@ dependencies = [ "tower-service", ] +[[package]] +name = "backon" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cd1a59bc091e593ee9ed62df4e4a07115e00a0e0a52fd7e0e04540773939b80" +dependencies = [ + "futures", + "pin-project", + "rand 0.8.5", + "tokio", +] + +[[package]] +name = "backon" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f34fac4d7cdaefa2deded0eda2d5d59dbfd43370ff3f856209e72340ae84c294" +dependencies = [ + "futures", + "pin-project", + "rand 0.8.5", + "tokio", +] + [[package]] name = "backtrace" -version = "0.3.66" +version = "0.3.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cab84319d616cfb654d03394f38ab7e6f0919e181b1b57e1fd15e7fb4077d9a7" +checksum = "233d376d6d185f2a3093e58f283f60f880315b6c60075b01f36b3b85154564ca" dependencies = [ "addr2line", "cc", @@ -928,6 +946,19 @@ dependencies = [ "rustc-demangle", ] +[[package]] +name = "bae" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33b8de67cc41132507eeece2584804efcb15f85ba516e34c944b7667f480397a" +dependencies = [ + "heck 0.3.3", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "base64" version = "0.13.1" @@ -936,9 +967,9 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" [[package]] name = "base64" -version = "0.20.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ea22880d78093b0cbe17c89f64a7d457941e65759157ec6cb31a31d652b05e5" +checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" [[package]] name = "bcc" @@ -961,42 +992,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f40afb3abbf90895dda3ddbc6d8734d24215130a22d646067690f5e318f81bc" [[package]] -name = "bigdecimal" -version = "0.3.0" +name = "bincode" +version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6aaf33151a6429fe9211d1b276eafdf70cdff28b071e76c0b0e1503221ea3744" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" dependencies = [ - "num-bigint", - "num-integer", - "num-traits", + "serde", ] [[package]] name = "bincode" -version = "1.3.3" +version = "2.0.0-rc.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +checksum = "7bb50c5a2ef4b9b1e7ae73e3a73b52ea24b20312d629f9c4df28260b7ad2c3c4" dependencies = [ + "bincode_derive", "serde", ] [[package]] -name = "bindgen" -version = "0.59.2" +name = "bincode_derive" +version = "2.0.0-rc.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8" +checksum = "0a45a23389446d2dd25dc8e73a7a3b3c43522b630cac068927f0649d43d719d2" dependencies = [ - "bitflags", - "cexpr", - "clang-sys", - "lazy_static", - "lazycell", - "peeking_take_while", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", + "virtue", ] [[package]] @@ -1012,15 +1032,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] -name = "bitvec" -version = "1.0.1" +name = "bk-tree" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +checksum = "6121f6e107e94d717b5ca2631d56e7c2ed1542a21b2eb87b4bda1d6c1420ef3f" dependencies = [ - "funty", - "radium", - "tap", - "wyz", + "fnv", + "triple_accel", ] [[package]] @@ -1034,32 +1052,99 @@ dependencies = [ [[package]] name = "blocking" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6ccb65d468978a086b69884437ded69a90faab3bbe6e67f242173ea728acccc" +checksum = "3c67b173a56acffd6d2326fb7ab938ba0b00a71480e14902b2591c87bc5741e8" dependencies = [ "async-channel", + "async-lock", "async-task", "atomic-waker", "fastrand", "futures-lite", - "once_cell", +] + +[[package]] +name = "borsh" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3ef05d137e34b7ac51dbec170ee523a9b728cff71385796771d259771d592f8" +dependencies = [ + "borsh-derive", + "hashbrown 0.13.2", +] + +[[package]] +name = "borsh-derive" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "190b1188f062217531748807129459c8c14641b648e887e39681a433db7fc939" +dependencies = [ + "borsh-derive-internal", + "borsh-schema-derive-internal", + "proc-macro-crate 0.1.5", + "proc-macro2", + "syn", +] + +[[package]] +name = "borsh-derive-internal" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fcf747a3e4eb47869441664df09d0eb88dcc9a85d499860efb82c2cfe6affc" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "borsh-schema-derive-internal" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f671d085f791c5fd3331c843ded45454b034b6188bf0f78ed28e7fd66a8b3f69" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] name = "bstr" -version = "0.2.17" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +checksum = "b7f0778972c64420fdedc63f09919c8a88bda7b25135357fd25a5d9f3257e832" dependencies = [ "memchr", + "serde", ] [[package]] name = "bumpalo" -version = "3.11.1" +version = "3.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" + +[[package]] +name = "bytecheck" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d11cac2c12b5adc6570dad2ee1b87eff4955dac476fe12d81e5fdd352e52406f" +dependencies = [ + "bytecheck_derive", + "ptr_meta", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" +checksum = "13e576ebe98e605500b3c8041bb888e966653577172df6dd97398714eb30b9bf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "bytecount" @@ -1069,9 +1154,9 @@ checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" [[package]] name = "bytemuck" -version = "1.12.3" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aaa3a8d9a1ca92e282c96a32d6511b695d7d994d1d102ba85d279f9b2756947f" +checksum = "c041d3eab048880cb0b86b256447da3f18859a163c3b8d8893f4e6368abe6393" [[package]] name = "byteorder" @@ -1081,9 +1166,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "bytes" -version = "1.2.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db" +checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" dependencies = [ "serde", ] @@ -1109,9 +1194,9 @@ dependencies = [ [[package]] name = "bzip2" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6afcd980b5f3a45017c57e57a2fcccbb351cc43a356ce117ef760ef8052b89b0" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" dependencies = [ "bzip2-sys", "libc", @@ -1128,17 +1213,11 @@ dependencies = [ "pkg-config", ] -[[package]] -name = "cache-padded" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1db59621ec70f09c5e9b597b220c7a2b43611f4710dc03ceb8748637775692c" - [[package]] name = "camino" -version = "1.1.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ad0e1e3e88dd237a156ab9f571021b8a158caa0ae44b1968a241efb5144c1e" +checksum = "c77df041dc383319cc661b428b6961a005db4d6808d5e12536931b1ca9556055" dependencies = [ "serde", ] @@ -1160,7 +1239,7 @@ checksum = "4acbb09d9ee8e23699b9634375c72795d095bf268439da88562cf9b501f181fa" dependencies = [ "camino", "cargo-platform", - "semver 1.0.14", + "semver", "serde", "serde_json", ] @@ -1179,9 +1258,9 @@ checksum = "a2698f953def977c68f935bb0dfa959375ad4638570e969e2f1e9f433cbf1af6" [[package]] name = "cc" -version = "1.0.76" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76a284da2e6fe2092f2353e51713435363112dfd60030e22add80be333fb928f" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" dependencies = [ "jobserver", ] @@ -1192,15 +1271,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom 7.1.1", -] - [[package]] name = "cfg-if" version = "1.0.0" @@ -1218,7 +1288,7 @@ dependencies = [ "num-integer", "num-traits", "serde", - "time 0.1.44", + "time 0.1.45", "wasm-bindgen", "winapi", ] @@ -1274,17 +1344,6 @@ dependencies = [ "half 1.8.2", ] -[[package]] -name = "clang-sys" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa2e27ae6ab525c3d369ded447057bca5438d86dc3a68f6faafb8269ba82ebf3" -dependencies = [ - "glob", - "libc", - "libloading", -] - [[package]] name = "clap" version = "3.2.23" @@ -1304,14 +1363,14 @@ dependencies = [ [[package]] name = "clap" -version = "4.0.26" +version = "4.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2148adefda54e14492fb9bddcc600b4344c5d1a3123bd666dcb939c6f0e0e57e" +checksum = "f13b9c79b5d1dd500d20ef541215a6423c75829ef43117e1b4d17fd8af0b5d76" dependencies = [ - "atty", "bitflags", - "clap_derive 4.0.21", - "clap_lex 0.3.0", + "clap_derive 4.1.0", + "clap_lex 0.3.1", + "is-terminal", "once_cell", "strsim", "termcolor", @@ -1323,7 +1382,7 @@ version = "3.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65" dependencies = [ - "heck", + "heck 0.4.1", "proc-macro-error", "proc-macro2", "quote", @@ -1332,11 +1391,11 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.0.21" +version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0177313f9f02afc995627906bbd8967e2be069f5261954222dac78290c2b9014" +checksum = "684a277d672e91966334af371f1a7b5833f9aa00b07c84e92fbce95e00208ce8" dependencies = [ - "heck", + "heck 0.4.1", "proc-macro-error", "proc-macro2", "quote", @@ -1354,9 +1413,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8" +checksum = "783fe232adfca04f90f56201b26d79682d4cd2625e0bc7290b95123afe558ade" dependencies = [ "os_str_bytes", ] @@ -1404,9 +1463,9 @@ dependencies = [ [[package]] name = "comfy-table" -version = "6.1.2" +version = "6.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1090f39f45786ec6dc6286f8ea9c75d0a7ef0a0d3cda674cef0c3af7b307fbc2" +checksum = "6e7b787b0dc42e8111badfdbe4c3059158ccb2db8780352fa1b01e8ccf45cc4d" dependencies = [ "crossterm", "strum", @@ -1416,34 +1475,24 @@ dependencies = [ [[package]] name = "concurrent-queue" -version = "1.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af4780a44ab5696ea9e28294517f1fffb421a83a25af521333c838635509db9c" -dependencies = [ - "cache-padded", -] - -[[package]] -name = "concurrent-queue" -version = "2.0.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd7bef69dc86e3c610e4e7aed41035e2a7ed12e72dd7530f61327a6579a4390b" +checksum = "c278839b831783b70278b14df4d45e1beb1aad306c07bb796637de9a0e323e8e" dependencies = [ "crossbeam-utils", ] [[package]] name = "console" -version = "0.15.2" +version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c050367d967ced717c04b65d8c619d863ef9292ce0c5760028655a2fb298718c" +checksum = "c3d79fbe8970a77e3e34151cc13d3b3e248aa0faaecb9f6091fa07ebefe5ad60" dependencies = [ "encode_unicode", "lazy_static", "libc", - "terminal_size", "unicode-width", - "winapi", + "windows-sys 0.42.0", ] [[package]] @@ -1549,11 +1598,11 @@ dependencies = [ [[package]] name = "crc" -version = "3.0.0" +version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53757d12b596c16c78b83458d732a5d1a17ab3f53f2f7412f6fb57cc8a140ab3" +checksum = "86ec7a15cbe22e59248fc7eadb1907dab5ba09372595da4d73dd805ed4417dfe" dependencies = [ - "crc-catalog 2.1.0", + "crc-catalog 2.2.0", ] [[package]] @@ -1564,9 +1613,9 @@ checksum = "ccaeedb56da03b09f598226e25e80088cb4cd25f316e6e4df7d695f0feeb1403" [[package]] name = "crc-catalog" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d0165d2900ae6778e36e80bbc4da3b5eefccee9ba939761f9c2882a5d9af3ff" +checksum = "9cace84e55f07e7301bae1c519df89cdad8cc3cd868413d3fdbdeca9ff3db484" [[package]] name = "crc32c" @@ -1574,7 +1623,7 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dfea2db42e9927a3845fb268a10a72faed6d416065f77873f05e411457c363e" dependencies = [ - "rustc_version 0.4.0", + "rustc_version", ] [[package]] @@ -1586,6 +1635,19 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crepe" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "493fa2eaae544a976a9e60394d012254515885d19015e75f5d7cfb28a5db1069" +dependencies = [ + "petgraph", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "criterion" version = "0.4.0" @@ -1661,23 +1723,22 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.10" +version = "0.9.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1" +checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", - "memoffset", - "once_cell", + "memoffset 0.7.1", "scopeguard", ] [[package]] name = "crossbeam-queue" -version = "0.3.6" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd42583b04998a5363558e5f9291ee5a5ff6b49944332103f251e7479a82aa7" +checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" dependencies = [ "cfg-if", "crossbeam-utils", @@ -1685,12 +1746,11 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.11" +version = "0.8.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" +checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f" dependencies = [ "cfg-if", - "once_cell", ] [[package]] @@ -1785,9 +1845,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.81" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97abf9f0eca9e52b7f81b945524e76710e6cb2366aead23b7d4fbf72e281f888" +checksum = "bc831ee6a32dd495436e317595e639a587aa9907bef96fe6e6abc290ab6204e9" dependencies = [ "cc", "cxxbridge-flags", @@ -1797,9 +1857,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.81" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cc32cc5fea1d894b77d269ddb9f192110069a8a9c1f1d441195fba90553dea3" +checksum = "94331d54f1b1a8895cd81049f7eaaaef9d05a7dcb4d1fd08bf3ff0806246789d" dependencies = [ "cc", "codespan-reporting", @@ -1812,15 +1872,15 @@ dependencies = [ [[package]] name = "cxxbridge-flags" -version = "1.0.81" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ca220e4794c934dc6b1207c3b42856ad4c302f2df1712e9f8d2eec5afaacf1f" +checksum = "48dcd35ba14ca9b40d6e4b4b39961f23d835dbb8eed74565ded361d93e1feb8a" [[package]] name = "cxxbridge-macro" -version = "1.0.81" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b846f081361125bfc8dc9d3940c84e1fd83ba54bbca7b17cd29483c828be0704" +checksum = "81bbeb29798b407ccd82a3324ade1a7286e0d29851475990b612670f6f5124d2" dependencies = [ "proc-macro2", "quote", @@ -1829,9 +1889,9 @@ dependencies = [ [[package]] name = "darling" -version = "0.14.2" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0dd3cd20dc6b5a876612a6e5accfe7f3dd883db6d07acfbf14c128f61550dfa" +checksum = "c0808e1bd8671fb44a113a14e13497557533369847788fa2ae912b6ebfce9fa8" dependencies = [ "darling_core", "darling_macro", @@ -1839,9 +1899,9 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.14.2" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a784d2ccaf7c98501746bf0be29b2022ba41fd62a2e622af997a03e9f972859f" +checksum = "001d80444f28e193f30c2f293455da62dcf9a6b29918a4253152ae2b1de592cb" dependencies = [ "fnv", "ident_case", @@ -1853,9 +1913,9 @@ dependencies = [ [[package]] name = "darling_macro" -version = "0.14.2" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7618812407e9402654622dd402b0a89dff9ba93badd6540781526117b92aab7e" +checksum = "b36230598a2d5de7ec1c6f51f72d8a99a9208daff41de2084d06e3fd3ea56685" dependencies = [ "darling_core", "quote", @@ -1887,7 +1947,7 @@ dependencies = [ "hashbrown 0.12.3", "lock_api", "once_cell", - "parking_lot_core 0.9.4", + "parking_lot_core 0.9.7", ] [[package]] @@ -1942,11 +2002,12 @@ dependencies = [ [[package]] name = "dialoguer" -version = "0.10.2" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a92e7e37ecef6857fdc0c0c5d42fd5b0938e46590c2183cc92dd310a6d078eb1" +checksum = "af3c796f3b0b408d9fd581611b47fa850821fcb84aa640b83a3c1a5be2d691f2" dependencies = [ "console", + "shell-words", "tempfile", "zeroize", ] @@ -1957,12 +2018,6 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" -[[package]] -name = "difference" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" - [[package]] name = "digest" version = "0.10.6" @@ -1974,6 +2029,32 @@ dependencies = [ "subtle", ] +[[package]] +name = "dirs" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + +[[package]] +name = "dlv-list" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0688c2a7f92e427f44895cd63841bff7b29f8d7a1648b9e7e07a4a365b2e1257" + [[package]] name = "downcast-rs" version = "1.2.0" @@ -1982,9 +2063,9 @@ checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" [[package]] name = "duct" -version = "0.13.5" +version = "0.13.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fc6a0a59ed0888e0041cf708e66357b7ae1a82f1c67247e1f93b5e0818f7d8d" +checksum = "37ae3fc31835f74c2a7ceda3aeede378b0ae2e74c8f1c36559fcc9ae2a4e7d3e" dependencies = [ "libc", "once_cell", @@ -1992,17 +2073,49 @@ dependencies = [ "shared_child", ] +[[package]] +name = "duration-str" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f94be4825ff6a563f1bfbdb786ae10c687333c7524fade954e2271170e7f7e6d" +dependencies = [ + "chrono", + "nom", + "rust_decimal", + "serde", + "thiserror", + "time 0.3.17", +] + [[package]] name = "dyn-clone" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f94fa09c2aeea5b8839e414b7b841bf429fd25b9c522116ac97ee87856d88b2" +checksum = "c9b0705efd4599c15a38151f4721f7bc388306f61084d3bfd50bd07fbca5cb60" + +[[package]] +name = "easy-ext" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49457524c7e65648794c98283282a0b7c73b10018e7091f1cdcfff314fd7ae59" + +[[package]] +name = "educe" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb0188e3c3ba8df5753894d54461f0e39bc91741dc5b22e1c46999ec2c71f4e4" +dependencies = [ + "enum-ordinalize", + "proc-macro2", + "quote", + "syn", +] [[package]] name = "either" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" +checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" [[package]] name = "encode_unicode" @@ -2012,9 +2125,9 @@ checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" [[package]] name = "encoding_rs" -version = "0.8.31" +version = "0.8.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b" +checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394" dependencies = [ "cfg-if", ] @@ -2025,7 +2138,7 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116" dependencies = [ - "heck", + "heck 0.4.1", "proc-macro2", "quote", "syn", @@ -2051,6 +2164,20 @@ dependencies = [ "syn", ] +[[package]] +name = "enum-ordinalize" +version = "3.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a62bb1df8b45ecb7ffa78dca1c17a438fb193eb083db0b1b494d2a61bcb5096a" +dependencies = [ + "num-bigint", + "num-traits", + "proc-macro2", + "quote", + "rustc_version", + "syn", +] + [[package]] name = "errno" version = "0.2.8" @@ -2120,12 +2247,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" -[[package]] -name = "farmhash" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f35ce9c8fb9891c75ceadbc330752951a4e369b50af10775955aeb9af3eee34b" - [[package]] name = "fastrand" version = "1.8.0" @@ -2162,6 +2283,12 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "flagset" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cda653ca797810c02f7ca4b804b40b8b95ae046eb989d356bce17919a8c25499" + [[package]] name = "flatbuffers" version = "22.9.29" @@ -2174,12 +2301,11 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.24" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6" +checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841" dependencies = [ "crc32fast", - "libz-sys", "miniz_oxide", ] @@ -2222,70 +2348,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "frunk" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a89c703bf50009f383a0873845357cc400a95fc535f836feddfe015d7df6e1e0" -dependencies = [ - "frunk_core", - "frunk_derives", - "frunk_proc_macros", -] - -[[package]] -name = "frunk_core" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a446d01a558301dca28ef43222864a9fa2bd9a2e71370f769d5d5d5ec9f3537" - -[[package]] -name = "frunk_derives" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b83164912bb4c97cfe0772913c7af7387ee2e00cb6d4636fb65a35b3d0c8f173" -dependencies = [ - "frunk_proc_macro_helpers", - "quote", - "syn", -] - -[[package]] -name = "frunk_proc_macro_helpers" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "015425591bbeb0f5b8a75593340f1789af428e9f887a4f1e36c0c471f067ef50" -dependencies = [ - "frunk_core", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "frunk_proc_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea01524f285deab48affffb342b97f186e657b119c3f1821ac531780e0fbfae0" -dependencies = [ - "frunk_core", - "frunk_proc_macros_impl", - "proc-macro-hack", -] - -[[package]] -name = "frunk_proc_macros_impl" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a802d974cc18ee7fe1a7868fc9ce31086294fd96ba62f8da64ecb44e92a2653" -dependencies = [ - "frunk_core", - "frunk_proc_macro_helpers", - "proc-macro-hack", - "quote", - "syn", -] - [[package]] name = "fs-err" version = "2.9.0" @@ -2302,12 +2364,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "fs_extra" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394" - [[package]] name = "function_name" version = "0.3.0" @@ -2323,17 +2379,11 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "673464e1e314dd67a0fd9544abc99e8eb28d0c7e3b69b033bcff9b2d00b87333" -[[package]] -name = "funty" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" - [[package]] name = "futures" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38390104763dc37a5145a53c29c63c1290b5d316d6086ec32c293f6736051bb0" +checksum = "13e2792b0ff0340399d58445b88fd9770e3489eff258a4cbc1523418f12abf84" dependencies = [ "futures-channel", "futures-core", @@ -2346,9 +2396,9 @@ dependencies = [ [[package]] name = "futures-async-stream" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b492e1173b06145d9324d105cca85fb9249f48676800a2c085138f0d9bae19e6" +checksum = "78501ea1cd42fff5ac783fa99765af9179a782dcfb62af50bae7a8ac4154df69" dependencies = [ "futures-async-stream-macro", "futures-core", @@ -2357,9 +2407,9 @@ dependencies = [ [[package]] name = "futures-async-stream-macro" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6716fcdbbcebe690099a18cad71b61fbba10a0a3f8a8c0c1ed36583b42b06590" +checksum = "fb6646d5c7b236481975efca1f025165b1eeec61fa4abf27842825121e9abf19" dependencies = [ "proc-macro2", "quote", @@ -2368,36 +2418,25 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52ba265a92256105f45b719605a571ffe2d1f0fea3807304b522c1d778f79eed" +checksum = "2e5317663a9089767a1ec00a487df42e0ca174b61b4483213ac24448e4664df5" dependencies = [ "futures-core", "futures-sink", ] -[[package]] -name = "futures-concurrency" -version = "3.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "113411b30b3a4fd9aba3eb9654f436976b71bc7f709318aeae7f8e90f74a71d6" -dependencies = [ - "async-trait", - "futures-core", - "pin-project", -] - [[package]] name = "futures-core" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04909a7a7e4633ae6c4a9ab280aeb86da1236243a77b694a49eacd659a4bd3ac" +checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608" [[package]] name = "futures-executor" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7acc85df6714c176ab5edf386123fafe217be88c0840ec11f199441134a074e2" +checksum = "e8de0a35a6ab97ec8869e32a2473f4b1324459e14c29275d14b10cb1fd19b50e" dependencies = [ "futures-core", "futures-task", @@ -2406,9 +2445,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00f5fb52a06bdcadeb54e8d3671f8888a39697dcb0b81b23b55174030427f4eb" +checksum = "bfb8371b6fb2aeb2d280374607aeabfc99d95c72edfe51692e42d3d7f0d08531" [[package]] name = "futures-lite" @@ -2427,9 +2466,9 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdfb8ce053d86b91919aad980c220b1fb8401a9394410e1c289ed7e66b61835d" +checksum = "95a73af87da33b5acf53acfebdc339fe592ecf5357ac7c0a7734ab9d8c876a70" dependencies = [ "proc-macro2", "quote", @@ -2438,15 +2477,15 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39c15cf1a4aa79df40f1bb462fb39676d0ad9e366c2a33b590d7c66f4f81fcf9" +checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364" [[package]] name = "futures-task" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffb393ac5d9a6eaa9d3fdf37ae2776656b706e200c8e16b1bdb227f5198e6ea" +checksum = "dcf79a1bf610b10f42aea489289c5a2c478a786509693b80cd39c44ccd936366" [[package]] name = "futures-timer" @@ -2456,9 +2495,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" [[package]] name = "futures-util" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "197676987abd2f9cadff84926f410af1c183608d36641465df73ae8211dc65d6" +checksum = "9c1d6de3acfef38d2be4b1f543f553131788603495be83da675e180c8d6b7bd1" dependencies = [ "futures-channel", "futures-core", @@ -2542,21 +2581,21 @@ dependencies = [ [[package]] name = "gimli" -version = "0.26.2" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22030e2c5a68ec659fde1e949a745124b48e6fa8b045b7ed5bd1fe4ccc5c4e5d" +checksum = "221996f774192f0f718773def8201c4ae31f02616a54ccfc2d358bb0e5cefdec" [[package]] name = "glob" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "globset" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a1e17342619edbc21a964c2afbeb6c820c6a2560032872f397bb97ea127bd0a" +checksum = "029d74589adefde59de1a0c4f4732695c32805624aec7b68d91503d4dba79afc" dependencies = [ "aho-corasick", "bstr", @@ -2567,9 +2606,9 @@ dependencies = [ [[package]] name = "gloo-timers" -version = "0.2.4" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fb7d06c1c8cc2a29bee7ec961009a0b2caa0793ee4900c2ffb348734ba1c8f9" +checksum = "9b995a66bb87bebce9a0f4a95aed01daca4872c050bfcb21653361c03bc35e5c" dependencies = [ "futures-channel", "futures-core", @@ -2683,9 +2722,9 @@ checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" [[package]] name = "half" -version = "2.1.0" +version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad6a9459c9c30b177b925162351f97e7d967c7ea8bab3b8352805327daf45554" +checksum = "02b4af3693f1b705df946e9fe5631932443781d0aabb423b62fcd4d73f6d2fd0" dependencies = [ "crunchy", "num-traits", @@ -2693,11 +2732,11 @@ dependencies = [ [[package]] name = "halfbrown" -version = "0.1.16" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff8ba437813c5a31783dd9a21ce4f555583dc9b048af6bd2b12217394ed9c199" +checksum = "9e2a3c70a9c00cc1ee87b54e89f9505f73bb17d63f1b25c9a462ba8ef885444f" dependencies = [ - "hashbrown 0.12.3", + "hashbrown 0.13.2", "serde", ] @@ -2712,9 +2751,12 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ff8ae62cd3a9102e5637afc8452c55acf3844001bd5374e0b0bd7b6616c038" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash 0.8.3", +] [[package]] name = "hdrhistogram" @@ -2726,15 +2768,24 @@ dependencies = [ "byteorder", "crossbeam-channel", "flate2", - "nom 7.1.1", + "nom", "num-traits", ] [[package]] name = "heck" -version = "0.4.0" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "heck" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" @@ -2745,6 +2796,21 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" +dependencies = [ + "libc", +] + +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + [[package]] name = "hex" version = "0.4.3" @@ -2848,9 +2914,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.23" +version = "0.14.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "034711faac9d2166cb1baf1a2fb0b60b1f277f8492fd72176c17f3515e1abd3c" +checksum = "5e011372fa0b68db8350aa7a248930ecc7839bf46d8485577d69f117a75f164c" dependencies = [ "bytes", "futures-channel", @@ -2870,6 +2936,21 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1788965e61b367cd03a62950836d5cd41560c3577d90e40e0819373194d1661c" +dependencies = [ + "http", + "hyper", + "log", + "rustls", + "rustls-native-certs", + "tokio", + "tokio-rustls", +] + [[package]] name = "hyper-timeout" version = "0.4.1" @@ -2950,9 +3031,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.9.1" +version = "1.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" +checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" dependencies = [ "autocfg", "hashbrown 0.12.3", @@ -2967,9 +3048,9 @@ checksum = "497f036ac2fae75c34224648a77802e5dd4e9cfb56f4713ab6b12b7160a0523b" [[package]] name = "indicatif" -version = "0.17.2" +version = "0.17.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4295cbb7573c16d310e99e713cf9e75101eb190ab31fccd35f2d2691b4352b19" +checksum = "cef509aa9bc73864d6756f0d34d35504af3cf0844373afe9b8669a5b8005a729" dependencies = [ "console", "number_prefix", @@ -2985,18 +3066,18 @@ checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac" [[package]] name = "inferno" -version = "0.11.12" +version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd2fa5a9ad16dedcfabbc87f048ee6dd40d4944736fe4c5d362fb01df1209de1" +checksum = "d6e66fa9bb3c52f40d05c11b78919ff2f18993c2305bd8a62556d20cb3e9606f" dependencies = [ - "ahash 0.7.6", + "ahash 0.8.3", "atty", "indexmap", "itoa", "log", "num-format", "once_cell", - "quick-xml", + "quick-xml 0.26.0", "rgb", "str_stack", ] @@ -3018,15 +3099,31 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "io-lifetimes" -version = "0.7.5" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ce5ef949d49ee85593fc4d3f3f95ad61657076395cbbce23e2121fc5542074" +checksum = "1abeb7a0dd0f8181267ff8adc397075586500b81b28a73e8a0208b00fc170fb3" +dependencies = [ + "libc", + "windows-sys 0.45.0", +] [[package]] name = "ipnet" -version = "2.5.1" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146" + +[[package]] +name = "is-terminal" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f88c5561171189e69df9d98bcf18fd5f9558300f7ea7b801eb8a0fd748bd8745" +checksum = "22e18b0a45d56fe973d6db23972bf5bc46f988a4a2385deac9cc29572f09daef" +dependencies = [ + "hermit-abi 0.3.1", + "io-lifetimes", + "rustix", + "windows-sys 0.45.0", +] [[package]] name = "isahc" @@ -3072,9 +3169,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" +checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" [[package]] name = "jni" @@ -3107,18 +3204,18 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.60" +version = "0.3.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" +checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" dependencies = [ "wasm-bindgen", ] [[package]] name = "jsonwebtoken" -version = "8.1.1" +version = "8.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aa4b4af834c6cfd35d8763d359661b90f2e45d8f750a0849156c7f4671af09c" +checksum = "09f4f04699947111ec1733e71778d763555737579e44b85844cae8e1940a1828" dependencies = [ "base64 0.13.1", "pem", @@ -3167,21 +3264,6 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - -[[package]] -name = "lexical" -version = "6.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7aefb36fd43fef7003334742cbf77b243fcd36418a1d1bdd480d613a67968f6" -dependencies = [ - "lexical-core", -] - [[package]] name = "lexical-core" version = "0.8.5" @@ -3248,9 +3330,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.137" +version = "0.2.139" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" [[package]] name = "libflate" @@ -3272,16 +3354,6 @@ dependencies = [ "rle-decode-fast", ] -[[package]] -name = "libloading" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" -dependencies = [ - "cfg-if", - "winapi", -] - [[package]] name = "libm" version = "0.2.6" @@ -3294,7 +3366,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7b603516767d1ab23d0de09d023e62966c3322f7148297c35cf3d97aa8b37fa" dependencies = [ - "clap 4.0.26", + "clap 4.1.4", "termcolor", "threadpool", ] @@ -3313,9 +3385,9 @@ dependencies = [ [[package]] name = "link-cplusplus" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9272ab7b96c9046fbc5bc56c06c117cb639fe2d509df0c421cad82d2915cf369" +checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" dependencies = [ "cc", ] @@ -3328,9 +3400,9 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" [[package]] name = "linux-raw-sys" -version = "0.0.46" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d2456c373231a208ad294c33dc5bff30051eafd954cd4caae83a712b12854d" +checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" [[package]] name = "local_stats_alloc" @@ -3383,15 +3455,6 @@ dependencies = [ "hashbrown 0.12.3", ] -[[package]] -name = "lru" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6e8aaa3f231bb4bd57b84b2d5dc3ae7f350265df8aa96492e0bc394a1571909" -dependencies = [ - "hashbrown 0.12.3", -] - [[package]] name = "lz4" version = "1.24.0" @@ -3434,14 +3497,14 @@ dependencies = [ [[package]] name = "madsim" -version = "0.2.13" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d2469ca5362ee2674302c477af49577be8d08f2922e4005e83a63661d7ceb5" +checksum = "7e3c98b41d46214f4ae435a95e246710ad7fb1100754f809dd7c18606a7607c4" dependencies = [ "ahash 0.7.6", "async-channel", "async-task", - "bincode", + "bincode 1.3.3", "bytes", "downcast-rs", "futures-util", @@ -3452,19 +3515,36 @@ dependencies = [ "rand 0.8.5", "rustversion", "serde", - "spin 0.9.4", + "spin 0.9.5", "tokio", "tokio-util", - "toml", + "toml 0.7.2", "tracing", "tracing-subscriber", ] +[[package]] +name = "madsim-aws-sdk-s3" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eeb7fb1e7d0d15c979ee36d9c44074298ff5662b317a1e122807e350d9b92b6" +dependencies = [ + "aws-sdk-s3", + "aws-smithy-http", + "aws-smithy-types", + "aws-types", + "bytes", + "http", + "madsim", + "spin 0.9.5", + "tracing", +] + [[package]] name = "madsim-etcd-client" -version = "0.2.13" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a282949bceaf35a250b48d9c5dab411a3948d0155a0cccfbcb0db953f67cef65" +checksum = "bb918383c4f5966f29760ec48820e1c2846739e4ae411c2a8aaa4466ce1421b7" dependencies = [ "etcd-client", "futures-util", @@ -3472,10 +3552,10 @@ dependencies = [ "madsim", "serde", "serde_with", - "spin 0.9.4", + "spin 0.9.5", "thiserror", "tokio", - "toml", + "toml 0.7.2", "tonic", "tracing", ] @@ -3494,9 +3574,9 @@ dependencies = [ [[package]] name = "madsim-rdkafka" -version = "0.2.13-alpha" +version = "0.2.14-alpha" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "934e420610a054eed044d02780a3f9c472ccd5f478a30054c5231dd66193cfec" +checksum = "945034b3d7c612f5bed8a34dcefd9278801bab180470e92d4b2297ddb3023cc8" dependencies = [ "async-channel", "async-trait", @@ -3510,7 +3590,7 @@ dependencies = [ "serde_derive", "serde_json", "slab", - "spin 0.9.4", + "spin 0.9.5", "thiserror", "tokio", "tracing", @@ -3518,20 +3598,19 @@ dependencies = [ [[package]] name = "madsim-tokio" -version = "0.2.11" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a95122ce38cd63608a480fa7ebd8216f79ba8228cea34744e1a93e9c49f21872" +checksum = "ebc35d85610c81cdefc44f71aad0781b0093fa2d956360e418466cb1d5b1adf2" dependencies = [ - "futures-lite", "madsim", "tokio", ] [[package]] name = "madsim-tonic" -version = "0.2.11" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "796f59e9fe0c88487a23a946a52c25c7761d32d41c5afd6e3e212fea8e291d0e" +checksum = "420ca55ac297f5a3555cb03fdb085e7e91b1287dd872751a6b30dd3c3573277c" dependencies = [ "async-stream", "chrono", @@ -3572,9 +3651,9 @@ dependencies = [ [[package]] name = "matches" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" +checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" [[package]] name = "matchit" @@ -3633,6 +3712,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.16" @@ -3696,9 +3784,9 @@ dependencies = [ [[package]] name = "miniz_oxide" -version = "0.5.4" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96590ba8f175222643a85693f33d26e9c8a015f599c216509b1a6894af675d34" +checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" dependencies = [ "adler", ] @@ -3728,9 +3816,9 @@ dependencies = [ [[package]] name = "moka" -version = "0.9.6" +version = "0.9.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b49a05f67020456541f4f29cbaa812016a266a86ec76f96d3873d459c68fe5e" +checksum = "19b9268097a2cf211ac9955b1cc95e80fa84fff5c2d13ba292916445dc8a311f" dependencies = [ "async-io", "async-lock", @@ -3742,7 +3830,7 @@ dependencies = [ "once_cell", "parking_lot 0.12.1", "quanta", - "rustc_version 0.4.0", + "rustc_version", "scheduled-thread-pool", "skeptic", "smallvec", @@ -3768,85 +3856,16 @@ dependencies = [ ] [[package]] -name = "mysql_async" -version = "0.31.0" +name = "naive-timer" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8fbd756177cfa8248baa7c5f555b9446349822bb94810c22336ec7597a72652" -dependencies = [ - "bytes", - "crossbeam", - "flate2", - "futures-core", - "futures-sink", - "futures-util", - "lazy_static", - "lru 0.8.1", - "mio", - "mysql_common", - "native-tls", - "once_cell", - "pem", - "percent-encoding", - "pin-project", - "serde", - "serde_json", - "socket2", - "thiserror", - "tokio", - "tokio-native-tls", - "tokio-util", - "twox-hash", - "url", -] +checksum = "034a0ad7deebf0c2abcf2435950a6666c3c15ea9d8fad0c0f48efa8a7f843fed" [[package]] -name = "mysql_common" -version = "0.29.1" +name = "native-tls" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "522f2f30f72de409fc04f88df25a031f98cfc5c398a94e0b892cabb33a1464cb" -dependencies = [ - "base64 0.13.1", - "bigdecimal", - "bindgen", - "bitflags", - "bitvec", - "byteorder", - "bytes", - "cc", - "cmake", - "crc32fast", - "flate2", - "frunk", - "lazy_static", - "lexical", - "num-bigint", - "num-traits", - "rand 0.8.5", - "regex", - "rust_decimal", - "saturating", - "serde", - "serde_json", - "sha-1", - "sha2", - "smallvec", - "subprocess", - "thiserror", - "time 0.3.15", - "uuid", -] - -[[package]] -name = "naive-timer" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "034a0ad7deebf0c2abcf2435950a6666c3c15ea9d8fad0c0f48efa8a7f843fed" - -[[package]] -name = "native-tls" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" +checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" dependencies = [ "lazy_static", "libc", @@ -3873,9 +3892,9 @@ dependencies = [ [[package]] name = "nix" -version = "0.24.2" +version = "0.24.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "195cdbc1741b8134346d515b3a56a1c94b0912758009cfd53f99ea0f57b065fc" +checksum = "fa52e972a9a719cecb6864fb88568781eb706bac2cd1d4f04a648542dbf78069" dependencies = [ "bitflags", "cfg-if", @@ -3884,32 +3903,35 @@ dependencies = [ [[package]] name = "nix" -version = "0.25.0" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e322c04a9e3440c327fca7b6c8a63e6890a32fa2ad689db972425f07e0d22abb" +checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4" dependencies = [ "autocfg", "bitflags", "cfg-if", "libc", - "memoffset", + "memoffset 0.6.5", "pin-utils", ] [[package]] name = "nom" -version = "2.2.1" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf51a729ecf40266a2368ad335a5fdde43471f545a967109cd62146ecf8b66ff" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] [[package]] -name = "nom" -version = "7.1.1" +name = "nom8" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36" +checksum = "ae01545c9c7fc4486ab7debaf2aad7003ac19431791868fb2e8066df97fad2f8" dependencies = [ "memchr", - "minimal-lexical", ] [[package]] @@ -3958,9 +3980,9 @@ dependencies = [ [[package]] name = "num-complex" -version = "0.4.2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ae39348c8bc5fbd7f40c727a9925f03517afd2ab27d46702108b6a7e5414c19" +checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" dependencies = [ "num-traits", ] @@ -3978,9 +4000,9 @@ dependencies = [ [[package]] name = "num-format" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54b862ff8df690cf089058c98b183676a7ed0f974cc08b426800093227cbff3b" +checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" dependencies = [ "arrayvec", "itoa", @@ -4031,30 +4053,30 @@ dependencies = [ [[package]] name = "num_cpus" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6058e64324c71e02bc2b150e4f3bc8286db6c83092132ffa3f6b1eab0f9def5" +checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" dependencies = [ - "hermit-abi", + "hermit-abi 0.2.6", "libc", ] [[package]] name = "num_enum" -version = "0.5.7" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf5395665662ef45796a4ff5486c5d41d29e0c09640af4c5f17fd94ee2c119c9" +checksum = "8d829733185c1ca374f17e52b762f24f535ec625d2cc1f070e34c8a9068f341b" dependencies = [ "num_enum_derive", ] [[package]] name = "num_enum_derive" -version = "0.5.7" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b0498641e53dd6ac1a4f22547548caa6864cc4933784319cd1775271c5a46ce" +checksum = "2be1598bf1c313dcdd12092e3f1920f463462525a21b7b4e11b4168353d0123e" dependencies = [ - "proc-macro-crate", + "proc-macro-crate 1.3.0", "proc-macro2", "quote", "syn", @@ -4077,18 +4099,18 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "object" -version = "0.29.0" +version = "0.30.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21158b2c33aa6d4561f1c0a6ea283ca92bc54802a93b263e910746d679a7eb53" +checksum = "ea86265d3d3dcb6a27fc51bd29a4bf387fae9d2986b823079d4986af253eb439" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" +checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" [[package]] name = "oorandom" @@ -4096,11 +4118,45 @@ version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" +[[package]] +name = "opendal" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c40ed33cc9fed187ce8293587416e0afd6ac9fcde17f2a20ad0dca14dd685ebe" +dependencies = [ + "anyhow", + "async-compat", + "async-trait", + "backon 0.2.0", + "base64 0.21.0", + "bincode 2.0.0-rc.2", + "bytes", + "flagset", + "futures", + "http", + "hyper", + "log", + "md-5", + "once_cell", + "parking_lot 0.12.1", + "percent-encoding", + "pin-project", + "quick-xml 0.27.1", + "reqsign", + "reqwest", + "serde", + "serde_json", + "time 0.3.17", + "tokio", + "ureq", + "uuid", +] + [[package]] name = "openssl" -version = "0.10.42" +version = "0.10.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12fc0523e3bd51a692c8850d075d74dc062ccf251c0110668cbd921917118a13" +checksum = "b102428fd03bc5edf97f62620f7298614c45cedf287c271e7ed450bbaf83f2e1" dependencies = [ "bitflags", "cfg-if", @@ -4139,9 +4195,9 @@ dependencies = [ [[package]] name = "openssl-sys" -version = "0.9.77" +version = "0.9.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b03b84c3b2d099b81f0953422b4d4ad58761589d0229b5506356afca05a3670a" +checksum = "23bbbf7854cd45b83958ebe919f0e8e516793727652e27fda10a8384cfc790b7" dependencies = [ "autocfg", "cc", @@ -4222,29 +4278,30 @@ dependencies = [ ] [[package]] -name = "ordered-float" -version = "2.10.0" +name = "ordered-multimap" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87" +checksum = "ccd746e37177e1711c20dd619a1620f34f5c8b569c53590a72dedd5344d8924a" dependencies = [ - "num-traits", + "dlv-list", + "hashbrown 0.12.3", ] [[package]] name = "os_pipe" -version = "0.9.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb233f06c2307e1f5ce2ecad9f8121cffbbee2c95428f44ea85222e460d0d213" +checksum = "a53dbb20faf34b16087a931834cba2d7a73cc74af2b7ef345a4c8324e2409a12" dependencies = [ "libc", - "winapi", + "windows-sys 0.45.0", ] [[package]] name = "os_str_bytes" -version = "6.4.0" +version = "6.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5bf27447411e9ee3ff51186bf7a08e16c341efdde93f4d823e8844429bed7e" +checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" [[package]] name = "ouroboros" @@ -4304,7 +4361,7 @@ checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" dependencies = [ "instant", "lock_api", - "parking_lot_core 0.8.5", + "parking_lot_core 0.8.6", ] [[package]] @@ -4314,14 +4371,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", - "parking_lot_core 0.9.4", + "parking_lot_core 0.9.7", ] [[package]] name = "parking_lot_core" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216" +checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" dependencies = [ "cfg-if", "instant", @@ -4333,9 +4390,9 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.4" +version = "0.9.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dc9e0dc2adc1c69d09143aff38d3d30c5c3f0df0dad82e6d25547af174ebec0" +checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" dependencies = [ "backtrace", "cfg-if", @@ -4344,7 +4401,7 @@ dependencies = [ "redox_syscall", "smallvec", "thread-id", - "windows-sys 0.42.0", + "windows-sys 0.45.0", ] [[package]] @@ -4384,9 +4441,9 @@ dependencies = [ [[package]] name = "paste" -version = "1.0.9" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1de2e551fb905ac83f73f7aedf2f0cb4a0da7e35efa24a202a936269f1f18e1" +checksum = "d01a5bd0424d00070b0098dd17ebca6f961a959dead1dbcbbbc1d1cd8d3deeba" [[package]] name = "path-absolutize" @@ -4422,23 +4479,17 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bdbb7b706f2afc610f3853550cdbbf6372fd324824a087806bd4480ea4996e24" dependencies = [ - "heck", + "heck 0.4.1", "itertools", "prost 0.11.6", "prost-types", ] -[[package]] -name = "peeking_take_while" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" - [[package]] name = "pem" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c64931a1a212348ec4f3b4362585eca7159d0d09cbdf4a7f74f02173596fd4" +checksum = "a8835c273a76a90455d7344889b0964598e3316e2a79ede8e36f16bdcf2228b8" dependencies = [ "base64 0.13.1", ] @@ -4451,9 +4502,9 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" [[package]] name = "petgraph" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5014253a1331579ce62aa67443b4a658c5e7dd03d4bc6d302b94474888143" +checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" dependencies = [ "fixedbitset", "indexmap", @@ -4487,6 +4538,7 @@ dependencies = [ "postgres-types", "regex", "risingwave_common", + "risingwave_sqlparser", "rust_decimal", "thiserror", "tokio-openssl", @@ -4602,23 +4654,23 @@ dependencies = [ [[package]] name = "polling" -version = "2.4.0" +version = "2.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab4609a838d88b73d8238967b60dd115cc08d38e2bbaf51ee1e4b695f89122e2" +checksum = "22122d5ec4f9fe1b3916419b76be1e80bcb93f618d071d2edf841b137b2a2bd6" dependencies = [ "autocfg", "cfg-if", "libc", "log", "wepoll-ffi", - "winapi", + "windows-sys 0.42.0", ] [[package]] name = "portable-atomic" -version = "0.3.15" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15eb2c6e362923af47e13c23ca5afb859e83d54452c55b0b9ac763b8f7c1ac16" +checksum = "26f6a7b87c2e435a3241addceeeff740ff8b7e76b74c13bf9acb17fa454ea00b" [[package]] name = "postgres" @@ -4671,6 +4723,8 @@ dependencies = [ "fallible-iterator", "postgres-derive", "postgres-protocol", + "serde", + "serde_json", ] [[package]] @@ -4685,7 +4739,7 @@ dependencies = [ "inferno", "libc", "log", - "nix 0.24.2", + "nix 0.24.3", "once_cell", "parking_lot 0.12.1", "smallvec", @@ -4710,6 +4764,16 @@ dependencies = [ "madsim-tokio", ] +[[package]] +name = "pretty-bytes" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "009d6edd2c1dbf2e1c0cd48a2f7766e03498d49ada7109a01c6911815c685316" +dependencies = [ + "atty", + "getopts", +] + [[package]] name = "pretty_assertions" version = "1.3.0" @@ -4724,9 +4788,9 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.1.21" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c142c0e46b57171fe0c528bee8c5b7569e80f0c17e377cd0e30ea57dbc11bb51" +checksum = "e97e3215779627f01ee256d2fad52f3d95e8e1c11e9fc6fd08f7cd455d5d5c78" dependencies = [ "proc-macro2", "syn", @@ -4734,13 +4798,21 @@ dependencies = [ [[package]] name = "proc-macro-crate" -version = "1.2.1" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d6ea3c4595b96363c13943497db34af4460fb474a95c43f4446ad341b8c9785" +dependencies = [ + "toml 0.5.11", +] + +[[package]] +name = "proc-macro-crate" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eda0fc3b0fb7c975631757e14d9049da17374063edb6ebbcbc54d880d4fe94e9" +checksum = "66618389e4ec1c7afe67d51a9bf34ff9236480f8d51e7489b7d5ab0303c13f34" dependencies = [ "once_cell", - "thiserror", - "toml", + "toml_edit 0.18.1", ] [[package]] @@ -4769,9 +4841,9 @@ dependencies = [ [[package]] name = "proc-macro-hack" -version = "0.5.19" +version = "0.5.20+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" +checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" @@ -4797,9 +4869,9 @@ dependencies = [ [[package]] name = "procfs" -version = "0.14.1" +version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dfb6451c91904606a1abe93e83a8ec851f45827fa84273f256ade45dc095818" +checksum = "b1de8dacb0873f77e6aefc6d71e044761fcc68060290f5b1089fcdf84626bb69" dependencies = [ "bitflags", "byteorder", @@ -4808,17 +4880,6 @@ dependencies = [ "rustix", ] -[[package]] -name = "procinfo" -version = "0.4.2" -source = "git+https://github.com/tikv/procinfo-rs?rev=6599eb9dca74229b2c1fcc44118bef7eff127128#6599eb9dca74229b2c1fcc44118bef7eff127128" -dependencies = [ - "byteorder", - "libc", - "nom 2.2.1", - "rustc_version 0.2.3", -] - [[package]] name = "prometheus" version = "0.13.3" @@ -4831,21 +4892,21 @@ dependencies = [ "libc", "memchr", "parking_lot 0.12.1", - "procfs 0.14.1", + "procfs 0.14.2", "protobuf", "thiserror", ] [[package]] name = "prometheus-http-query" -version = "0.6.3" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "544213c26792ce82b2d01e5b0aca35cd70761a0eb4d635ca3f86f6ad372ecd4a" +checksum = "fbb5eca70036162115bebb7647b47d0c1e38bb039c70898f19d0b74c8c388674" dependencies = [ "reqwest", "serde", "serde_json", - "time 0.3.15", + "time 0.3.17", "url", ] @@ -4871,12 +4932,12 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb5320c680de74ba083512704acb90fe00f28f79207286a848e730c45dd73ed6" +checksum = "a3f8ad728fb08fe212df3c05169e940fbb6d9d16a877ddde14644a983ba2012e" dependencies = [ "bytes", - "heck", + "heck 0.4.1", "itertools", "lazy_static", "log", @@ -4977,6 +5038,26 @@ dependencies = [ "autotools", ] +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "pulldown-cmark" version = "0.9.2" @@ -4999,13 +5080,13 @@ dependencies = [ "bit-vec", "bytes", "chrono", - "crc 3.0.0", + "crc 3.0.1", "futures", "futures-io", "futures-timer", "log", "native-tls", - "nom 7.1.1", + "nom", "pem", "prost 0.11.6", "prost-build", @@ -5036,27 +5117,31 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.23.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11bafc859c6815fbaffbbbf4229ecb767ac913fecb27f9ad4343662e9ef099ea" +checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd" dependencies = [ "memchr", ] [[package]] -name = "quote" -version = "1.0.21" +name = "quick-xml" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "ffc053f057dd768a56f62cd7e434c42c831d296968997e9ac1f76ea7c2d14c41" dependencies = [ - "proc-macro2", + "memchr", + "serde", ] [[package]] -name = "radium" -version = "0.7.0" +name = "quote" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +dependencies = [ + "proc-macro2", +] [[package]] name = "rand" @@ -5129,32 +5214,39 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "random-string" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4e63111ec5292d8af9c220f06fe3bb87991cc78b6f1f7e291d1ae6b8a60817" +dependencies = [ + "fastrand", +] + [[package]] name = "raw-cpuid" -version = "10.6.0" +version = "10.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6823ea29436221176fe662da99998ad3b4db2c7f31e7b6f5fe43adccd6320bb" +checksum = "c307f7aacdbab3f0adee67d52739a1d71112cc068d6fab169ddeb18e48877fad" dependencies = [ "bitflags", ] [[package]] name = "rayon" -version = "1.5.3" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" +checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" dependencies = [ - "autocfg", - "crossbeam-deque", "either", "rayon-core", ] [[package]] name = "rayon-core" -version = "1.9.3" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" +checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b" dependencies = [ "crossbeam-channel", "crossbeam-deque", @@ -5179,9 +5271,9 @@ dependencies = [ [[package]] name = "redis" -version = "0.22.1" +version = "0.22.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "513b3649f1a111c17954296e4a3b9eecb108b766c803e2b99f179ebe27005985" +checksum = "aa8455fa3621f6b41c514946de66ea0531f57ca017b2e6c7cc368035ea5b46df" dependencies = [ "combine", "itoa", @@ -5200,6 +5292,17 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_users" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +dependencies = [ + "getrandom 0.2.7", + "redox_syscall", + "thiserror", +] + [[package]] name = "ref_slice" version = "1.2.1" @@ -5208,9 +5311,9 @@ checksum = "f4ed1d73fb92eba9b841ba2aef69533a060ccc0d3ec71c90aeda5996d4afb7a9" [[package]] name = "regex" -version = "1.7.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a" +checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" dependencies = [ "aho-corasick", "memchr", @@ -5241,13 +5344,51 @@ dependencies = [ "winapi", ] +[[package]] +name = "rend" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79af64b4b6362ffba04eef3a4e10829718a4896dac19daa741851c86781edf95" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "reqsign" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e0154ced5e44389686689a3c96c49ca5a70ad9c708e0989982adae2e0378bbf" +dependencies = [ + "anyhow", + "backon 0.4.0", + "base64 0.21.0", + "bytes", + "dirs", + "form_urlencoded", + "hex", + "hmac", + "http", + "jsonwebtoken", + "log", + "once_cell", + "percent-encoding", + "quick-xml 0.27.1", + "rust-ini", + "serde", + "serde_json", + "sha1", + "sha2", + "time 0.3.17", + "ureq", +] + [[package]] name = "reqwest" -version = "0.11.13" +version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68cc60575865c7831548863cc02356512e3f1dc2f3f82cb837d7fc4cc8f3c97c" +checksum = "21eed90ec8570952d53b772ecf8f206aa1ec9a3d76b2521c56c42973f2d91ee9" dependencies = [ - "base64 0.13.1", + "base64 0.21.0", "bytes", "encoding_rs", "futures-core", @@ -5256,24 +5397,32 @@ dependencies = [ "http", "http-body", "hyper", + "hyper-rustls", "hyper-tls", "ipnet", "js-sys", "log", "mime", + "mime_guess", "native-tls", "once_cell", "percent-encoding", "pin-project-lite", + "rustls", + "rustls-native-certs", + "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", "tokio", "tokio-native-tls", + "tokio-rustls", + "tokio-util", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", + "wasm-streams", "web-sys", "winreg", ] @@ -5286,9 +5435,9 @@ checksum = "4389f1d5789befaf6029ebd9f7dac4af7f7e3d61b69d4f30e2ac02b57e7712b0" [[package]] name = "rgb" -version = "0.8.34" +version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3603b7d71ca82644f79b5a06d1220e9a58ede60bd32255f698cb1af8838b8db3" +checksum = "7495acf66551cdb696b7711408144bcd3194fc78e32f3a09e809bfe7dd4a7ce3" dependencies = [ "bytemuck", ] @@ -5313,7 +5462,6 @@ name = "risedev" version = "0.2.0-alpha" dependencies = [ "anyhow", - "bytes", "chrono", "clap 3.2.23", "console", @@ -5333,7 +5481,6 @@ dependencies = [ "serde_with", "serde_yaml", "tempfile", - "toml", "workspace-hack", "yaml-rust", ] @@ -5353,7 +5500,6 @@ dependencies = [ "risingwave_object_store", "risingwave_pb", "serde", - "serde_derive", "serde_json", "thiserror", "twox-hash", @@ -5377,15 +5523,9 @@ dependencies = [ "anyhow", "assert_matches", "async-recursion", - "async-stream", "async-trait", - "byteorder", - "bytes", - "chrono", - "crc32fast", "criterion", "either", - "farmhash", "fixedbitset", "futures", "futures-async-stream", @@ -5396,9 +5536,7 @@ dependencies = [ "minitrace", "num-traits", "parking_lot 0.12.1", - "paste", "prometheus", - "prost 0.11.6", "rand 0.8.5", "risingwave_common", "risingwave_connector", @@ -5408,11 +5546,7 @@ dependencies = [ "risingwave_rpc_client", "risingwave_source", "risingwave_storage", - "serde", - "serde-value", "serde_json", - "smallvec", - "static_assertions", "task_stats_alloc", "tempfile", "thiserror", @@ -5420,9 +5554,6 @@ dependencies = [ "tokio-metrics", "tokio-stream", "tracing", - "tracing-futures", - "twox-hash", - "url", "uuid", "workspace-hack", ] @@ -5445,24 +5576,17 @@ dependencies = [ "itertools", "libc", "madsim-tokio", - "minitrace", - "minitrace-jaeger", - "moka", - "nix 0.25.0", + "nix 0.25.1", "opentelemetry", "opentelemetry-jaeger", "parking_lot 0.12.1", "prometheus", "rand 0.8.5", "risingwave_common", - "risingwave_hummock_sdk", - "risingwave_meta", - "risingwave_pb", - "risingwave_rpc_client", "risingwave_storage", "serde", "tokio-stream", - "toml", + "toml 0.5.11", "tracing", "tracing-opentelemetry", "tracing-subscriber", @@ -5475,10 +5599,8 @@ version = "0.2.0-alpha" dependencies = [ "anyhow", "clap 3.2.23", - "log", "madsim-tokio", "risingwave_common", - "risingwave_compaction_test", "risingwave_compactor", "risingwave_compute", "risingwave_ctl", @@ -5487,7 +5609,6 @@ dependencies = [ "risingwave_rt", "task_stats_alloc", "tikv-jemallocator", - "tracing", "workspace-config", "workspace-hack", ] @@ -5499,9 +5620,7 @@ dependencies = [ "anyhow", "clap 3.2.23", "console", - "log", "madsim-tokio", - "risedev", "risingwave_common", "risingwave_compactor", "risingwave_compute", @@ -5509,7 +5628,6 @@ dependencies = [ "risingwave_frontend", "risingwave_meta", "risingwave_rt", - "sync-point", "task_stats_alloc", "tempfile", "tikv-jemallocator", @@ -5525,7 +5643,6 @@ dependencies = [ "anyhow", "arrow-array", "arrow-schema", - "async-stream", "async-trait", "auto_enums", "bitflags", @@ -5533,13 +5650,12 @@ dependencies = [ "bytes", "chrono", "chrono-tz", + "clap 3.2.23", "comfy-table", "crc32fast", "criterion", "darwin-libproc", "derivative", - "either", - "enum-as-inner", "fixedbitset", "futures", "futures-async-stream", @@ -5548,7 +5664,6 @@ dependencies = [ "itertools", "itoa", "libc", - "lru 0.7.6", "mach", "madsim-tokio", "madsim-tonic", @@ -5558,10 +5673,8 @@ dependencies = [ "parking_lot 0.12.1", "parse-display", "paste", - "pin-project", "postgres-types", "procfs 0.12.0", - "procinfo", "prometheus", "prost 0.11.6", "rand 0.8.5", @@ -5570,24 +5683,32 @@ dependencies = [ "rust_decimal", "ryu", "serde", - "serde_derive", "serde_json", - "smallvec", - "spin 0.9.4", "static_assertions", "strum", "strum_macros", "sysinfo", "tempfile", "thiserror", - "tokio-stream", - "toml", + "toml 0.5.11", "tracing", "twox-hash", "url", "workspace-hack", ] +[[package]] +name = "risingwave_common_proc_macro" +version = "0.2.0-alpha" +dependencies = [ + "bae", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", + "workspace-hack", +] + [[package]] name = "risingwave_common_service" version = "0.2.0-alpha" @@ -5616,10 +5737,7 @@ dependencies = [ "clap 3.2.23", "futures", "itertools", - "lazy_static", "madsim-tokio", - "madsim-tonic", - "parking_lot 0.12.1", "rand 0.8.5", "risingwave_common", "risingwave_compactor", @@ -5632,8 +5750,6 @@ dependencies = [ "risingwave_rt", "risingwave_storage", "risingwave_tracing", - "serde", - "toml", "tracing", "workspace-hack", ] @@ -5646,20 +5762,15 @@ dependencies = [ "clap 3.2.23", "madsim-tokio", "madsim-tonic", - "parking_lot 0.12.1", "prometheus", "risingwave_common", + "risingwave_common_proc_macro", "risingwave_common_service", "risingwave_hummock_sdk", "risingwave_object_store", "risingwave_pb", "risingwave_rpc_client", "risingwave_storage", - "risingwave_tracing", - "serde", - "tokio-retry", - "tokio-stream", - "toml", "tracing", "workspace-hack", ] @@ -5668,35 +5779,24 @@ dependencies = [ name = "risingwave_compute" version = "0.2.0-alpha" dependencies = [ - "anyhow", - "async-stream", "async-trait", "async_stack_trace", - "byteorder", - "bytes", - "chrono", "clap 3.2.23", - "crc32fast", - "dyn-clone", "either", - "farmhash", "futures", "futures-async-stream", "hyper", "itertools", - "lru 0.7.6", "madsim-tokio", "madsim-tonic", "maplit", - "num-traits", - "parking_lot 0.12.1", - "paste", "pprof", + "pretty-bytes", "prometheus", - "prost 0.11.6", "rand 0.8.5", "risingwave_batch", "risingwave_common", + "risingwave_common_proc_macro", "risingwave_common_service", "risingwave_connector", "risingwave_hummock_sdk", @@ -5706,21 +5806,12 @@ dependencies = [ "risingwave_storage", "risingwave_stream", "risingwave_tracing", - "serde", - "serde-value", "serde_json", - "smallvec", - "static_assertions", - "sysinfo", "tempfile", - "thiserror", "tikv-jemalloc-ctl", "tokio-stream", "tower", - "tower-http", "tracing", - "twox-hash", - "url", "workspace-hack", ] @@ -5730,32 +5821,24 @@ version = "0.2.0-alpha" dependencies = [ "anyhow", "apache-avro", - "async-stream", "async-trait", "aws-config", "aws-sdk-kinesis", "aws-sdk-s3", - "aws-sdk-sqs", "aws-smithy-http", - "aws-smithy-types", "aws-types", "byteorder", "bytes", "chrono", - "crc32fast", "csv-core", - "either", + "duration-str", "enum-as-inner", - "farmhash", "futures", "futures-async-stream", - "futures-concurrency", "globset", - "google-cloud-googleapis", "google-cloud-pubsub", "http", "http-serde", - "humantime", "hyper", "itertools", "madsim-rdkafka", @@ -5763,10 +5846,9 @@ dependencies = [ "madsim-tonic", "maplit", "moka", - "mysql_async", "nexmark", "num-traits", - "paste", + "prometheus", "prost 0.11.6", "prost-reflect", "protobuf-native", @@ -5777,24 +5859,18 @@ dependencies = [ "risingwave_expr", "risingwave_pb", "risingwave_rpc_client", - "risingwave_storage", "rust_decimal", "serde", "serde_derive", "serde_json", "serde_with", "simd-json", - "smallvec", - "static_assertions", - "strum", - "strum_macros", "tempfile", "thiserror", "tokio-retry", "tokio-stream", "tokio-util", "tracing", - "twox-hash", "url", "urlencoding", "wiremock", @@ -5813,10 +5889,8 @@ dependencies = [ "futures", "itertools", "madsim-tokio", - "parking_lot 0.12.1", "regex", "risingwave_common", - "risingwave_common_service", "risingwave_frontend", "risingwave_hummock_sdk", "risingwave_object_store", @@ -5840,38 +5914,24 @@ dependencies = [ "anyhow", "arrow-array", "arrow-schema", - "async-stream", - "async-trait", - "byteorder", - "bytes", "chrono", "chrono-tz", - "crc32fast", "criterion", "dyn-clone", "either", - "hex", "itertools", "madsim-tokio", - "madsim-tonic", "md5", "num-traits", "ouroboros", "parse-display", "paste", - "postgres-types", - "prost 0.11.6", "regex", "risingwave_common", "risingwave_pb", "risingwave_udf", - "rust_decimal", - "serde", - "smallvec", "speedate", "thiserror", - "tokio-stream", - "toml", "workspace-hack", ] @@ -5881,10 +5941,10 @@ version = "0.2.0-alpha" dependencies = [ "anyhow", "arc-swap", - "assert-impl", "assert_matches", + "async-recursion", "async-trait", - "byteorder", + "bk-tree", "bytes", "clap 3.2.23", "derivative", @@ -5900,18 +5960,18 @@ dependencies = [ "maplit", "md5", "num-integer", - "num-traits", "parking_lot 0.12.1", "parse-display", "paste", + "petgraph", "pgwire", "pin-project-lite", "postgres-types", "prometheus", - "prost 0.11.6", "rand 0.8.5", "risingwave_batch", "risingwave_common", + "risingwave_common_proc_macro", "risingwave_common_service", "risingwave_connector", "risingwave_expr", @@ -5921,7 +5981,6 @@ dependencies = [ "risingwave_sqlparser", "risingwave_storage", "serde", - "serde_derive", "serde_json", "sha2", "smallvec", @@ -5945,7 +6004,6 @@ dependencies = [ "num-traits", "parking_lot 0.12.1", "parse-display", - "prost 0.11.6", "risingwave_common", "risingwave_pb", "tracing", @@ -5987,10 +6045,11 @@ version = "0.1.0" dependencies = [ "bytes", "futures", - "itertools", "jni", "madsim-tokio", + "prost 0.11.6", "risingwave_common", + "risingwave_hummock_sdk", "risingwave_object_store", "risingwave_pb", "risingwave_storage", @@ -6005,16 +6064,14 @@ dependencies = [ "anyhow", "arc-swap", "assert_matches", - "async-stream", "async-trait", "axum", - "byteorder", "bytes", - "chrono", "clap 3.2.23", - "crc32fast", - "derivative", + "crepe", + "easy-ext", "either", + "enum-as-inner", "fail", "function_name", "futures", @@ -6030,7 +6087,6 @@ dependencies = [ "num-integer", "num-traits", "parking_lot 0.12.1", - "paste", "prometheus", "prometheus-http-query", "prost 0.11.6", @@ -6038,6 +6094,7 @@ dependencies = [ "reqwest", "risingwave_backup", "risingwave_common", + "risingwave_common_proc_macro", "risingwave_common_service", "risingwave_connector", "risingwave_hummock_sdk", @@ -6047,24 +6104,19 @@ dependencies = [ "risingwave_test_runner", "scopeguard", "serde", - "serde_derive", "serde_json", - "serial_test", - "smallvec", "static_assertions", "sync-point", "tempfile", "thiserror", "tokio-retry", "tokio-stream", - "toml", "tower", "tower-http", "tracing", "url", "uuid", "workspace-hack", - "xxhash-rust", ] [[package]] @@ -6074,39 +6126,36 @@ dependencies = [ "async-trait", "async_stack_trace", "aws-config", - "aws-endpoint", - "aws-sdk-s3", "aws-smithy-http", "aws-smithy-types", - "aws-types", "bytes", "crc32fast", "fail", "futures", "hyper", "itertools", + "madsim-aws-sdk-s3", "madsim-tokio", + "opendal", "prometheus", + "random-string", "risingwave_common", - "spin 0.9.4", + "spin 0.9.5", "tempfile", "thiserror", "tracing", - "workspace-hack", ] [[package]] name = "risingwave_pb" version = "0.2.0-alpha" dependencies = [ - "bytes", "madsim-tonic", "madsim-tonic-build", "pbjson", "pbjson-build", "prost 0.11.6", "prost-helpers", - "prost-types", "serde", "workspace-hack", ] @@ -6116,6 +6165,7 @@ name = "risingwave_planner_test" version = "0.2.0-alpha" dependencies = [ "anyhow", + "backtrace", "console", "futures", "itertools", @@ -6140,7 +6190,6 @@ dependencies = [ "madsim-tokio", "path-absolutize", "similar", - "tempfile", "tracing", "tracing-subscriber", "workspace-hack", @@ -6152,12 +6201,12 @@ version = "0.2.0-alpha" dependencies = [ "anyhow", "async-trait", - "async_stack_trace", + "either", "futures", + "itertools", "madsim-tokio", "madsim-tonic", "moka", - "paste", "rand 0.8.5", "risingwave_common", "risingwave_hummock_sdk", @@ -6166,6 +6215,7 @@ dependencies = [ "tokio-retry", "tokio-stream", "tracing", + "url", "workspace-hack", ] @@ -6173,7 +6223,6 @@ dependencies = [ name = "risingwave_rt" version = "0.2.0-alpha" dependencies = [ - "async-trait", "async_stack_trace", "console", "console-subscriber", @@ -6181,7 +6230,6 @@ dependencies = [ "madsim-tokio", "parking_lot 0.12.1", "pprof", - "tokio-stream", "tracing", "tracing-subscriber", "workspace-hack", @@ -6199,6 +6247,7 @@ dependencies = [ "glob", "itertools", "madsim", + "madsim-aws-sdk-s3", "madsim-etcd-client", "madsim-rdkafka", "madsim-tokio", @@ -6228,42 +6277,19 @@ version = "0.2.0-alpha" dependencies = [ "anyhow", "assert_matches", - "async-stream", - "async-trait", - "aws-config", - "aws-sdk-s3", - "byteorder", - "bytes", - "chrono", - "crc32fast", "criterion", - "enum-as-inner", - "farmhash", + "easy-ext", "futures", "futures-async-stream", "itertools", "madsim-tokio", - "madsim-tonic", - "maplit", - "num-traits", "parking_lot 0.12.1", - "paste", - "prometheus", "rand 0.8.5", "risingwave_common", "risingwave_connector", - "risingwave_expr", "risingwave_pb", - "risingwave_storage", - "serde", - "smallvec", - "static_assertions", "tempfile", - "thiserror", - "tokio-stream", "tracing", - "twox-hash", - "url", "workspace-hack", ] @@ -6274,7 +6300,6 @@ dependencies = [ "itertools", "matches", "serde", - "serde_json", "tracing", "workspace-hack", ] @@ -6284,11 +6309,9 @@ name = "risingwave_sqlparser_test_runner" version = "0.1.0" dependencies = [ "anyhow", - "itertools", "risingwave_sqlparser", "serde", "serde_yaml", - "tempfile", "walkdir", "workspace-hack", ] @@ -6297,14 +6320,11 @@ dependencies = [ name = "risingwave_sqlsmith" version = "0.2.0-alpha" dependencies = [ - "anyhow", "chrono", "clap 3.2.23", - "futures", "itertools", "libtest-mimic", "madsim-tokio", - "paste", "rand 0.8.5", "risingwave_common", "risingwave_expr", @@ -6320,46 +6340,33 @@ dependencies = [ name = "risingwave_storage" version = "0.2.0-alpha" dependencies = [ - "anyhow", "arc-swap", "async-trait", "async_stack_trace", "auto_enums", - "bitvec", - "byteorder", "bytes", - "chrono", - "crc32fast", "criterion", "crossbeam", "dashmap", "dyn-clone", - "either", "enum-as-inner", "fail", - "farmhash", "fiemap", "futures", "futures-async-stream", - "hyper", "itertools", "libc", "lz4", "madsim-tokio", - "madsim-tonic", "memcomparable", "minitrace", "minstant", "moka", - "nix 0.25.0", - "num-integer", - "num-traits", + "nix 0.25.1", "parking_lot 0.12.1", - "paste", "prometheus", "prost 0.11.6", "rand 0.8.5", - "regex", "risingwave_backup", "risingwave_common", "risingwave_common_service", @@ -6370,19 +6377,16 @@ dependencies = [ "risingwave_test_runner", "risingwave_tracing", "scopeguard", - "serde", "sled", - "smallvec", - "spin 0.9.4", + "spin 0.9.5", "sync-point", "tempfile", "thiserror", - "tokio-metrics", "tokio-retry", - "tokio-stream", "tracing", "uuid", "workspace-hack", + "xorf", "xxhash-rust", "zstd", ] @@ -6397,24 +6401,19 @@ dependencies = [ "async-stream", "async-trait", "async_stack_trace", - "auto_enums", - "byteorder", "bytes", - "chrono", - "crc32fast", "dyn-clone", "either", "enum-as-inner", - "farmhash", "fixedbitset", "futures", "futures-async-stream", "gen-iter", - "hyper", + "hytra", "iter-chunks", "itertools", "local_stats_alloc", - "lru 0.7.6", + "lru", "madsim-tokio", "madsim-tonic", "maplit", @@ -6424,7 +6423,6 @@ dependencies = [ "multimap", "num-traits", "parking_lot 0.12.1", - "paste", "pin-project", "prometheus", "prost 0.11.6", @@ -6438,21 +6436,13 @@ dependencies = [ "risingwave_rpc_client", "risingwave_source", "risingwave_storage", - "serde", - "serde-value", - "serde_json", "smallvec", "static_assertions", "task_stats_alloc", "thiserror", - "tikv-jemalloc-ctl", "tokio-metrics", "tokio-stream", - "tower", "tracing", - "tracing-futures", - "twox-hash", - "url", "workspace-hack", ] @@ -6492,24 +6482,64 @@ dependencies = [ "thiserror", ] +[[package]] +name = "rkyv" +version = "0.7.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cec2b3485b07d96ddfd3134767b8a447b45ea4eb91448d0a35180ec0ffd5ed15" +dependencies = [ + "bytecheck", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eaedadc88b53e36dd32d940ed21ae4d850d5916f2581526921f553a72ac34c4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "rle-decode-fast" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" +[[package]] +name = "rust-ini" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6d5f2436026b4f6e79dc829837d467cc7e9a55ee40e750d716713540715a2df" +dependencies = [ + "cfg-if", + "ordered-multimap", +] + [[package]] name = "rust_decimal" -version = "1.26.1" +version = "1.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee9164faf726e4f3ece4978b25ca877ddc6802fa77f38cdccb32c7f805ecd70c" +checksum = "e13cf35f7140155d02ba4ec3294373d513a3c7baa8364c162b030e33c61520a8" dependencies = [ "arrayvec", + "borsh", + "bytecheck", "byteorder", "bytes", "num-traits", "postgres", + "rand 0.8.5", + "rkyv", "serde", + "serde_json", "tokio-postgres", ] @@ -6519,49 +6549,34 @@ version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "rustc_version" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" -dependencies = [ - "semver 0.9.0", -] - [[package]] name = "rustc_version" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" dependencies = [ - "semver 1.0.14", + "semver", ] [[package]] name = "rustix" -version = "0.35.13" +version = "0.36.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "727a1a6d65f786ec22df8a81ca3121107f235970dc1705ed681d3e6e8b9cd5f9" +checksum = "f43abb88211988493c1abb44a70efa56ff0ce98f233b7b276146f1f3f7ba9644" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys 0.42.0", + "windows-sys 0.45.0", ] [[package]] name = "rustls" -version = "0.20.7" +version = "0.20.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "539a2bfe908f471bfa933876bd1eb6a19cf2176d375f82ef7f99530a40e48c2c" +checksum = "fff78fc74d175294f4e83b28343315ffcfb114b156f0185e9741cb5570f50e2f" dependencies = [ "log", "ring", @@ -6569,26 +6584,38 @@ dependencies = [ "webpki", ] +[[package]] +name = "rustls-native-certs" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0167bac7a9f490495f3c33013e7722b53cb087ecbe082fb0c6387c96f634ea50" +dependencies = [ + "openssl-probe", + "rustls-pemfile", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pemfile" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0864aeff53f8c05aa08d86e5ef839d3dfcf07aeba2db32f12db0ef716e87bd55" +checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b" dependencies = [ - "base64 0.13.1", + "base64 0.21.0", ] [[package]] name = "rustversion" -version = "1.0.9" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97477e48b4cf8603ad5f7aaf897467cf42ab4218a38ef76fb14c2d6773a6d6a8" +checksum = "5583e89e108996506031660fe09baa5011b9dd0341b89029313006d1fb508d70" [[package]] name = "ryu" -version = "1.0.11" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" +checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde" [[package]] name = "same-file" @@ -6608,24 +6635,17 @@ dependencies = [ "cc", "duct", "krb5-src", - "libc", - "pkg-config", -] - -[[package]] -name = "saturating" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ece8e78b2f38ec51c51f5d475df0a7187ba5111b2a28bdc761ee05b075d40a71" + "libc", + "pkg-config", +] [[package]] name = "schannel" -version = "0.1.20" +version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d6731146462ea25d9244b2ed5fd1d716d25c52e4d54aa4fb0f3c4e9854dbe2" +checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3" dependencies = [ - "lazy_static", - "windows-sys 0.36.1", + "windows-sys 0.42.0", ] [[package]] @@ -6651,9 +6671,9 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "scratch" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8132065adcfd6e02db789d9285a0deb2f3fcb04002865ab67d5fb103533898" +checksum = "ddccb15bcce173023b3fedd9436f882a0739b8dfb45e4f6b6002bee5929f61b2" [[package]] name = "sct" @@ -6665,11 +6685,17 @@ dependencies = [ "untrusted", ] +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + [[package]] name = "security-framework" -version = "2.7.0" +version = "2.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bc1bb97804af6631813c55739f771071e0f2ed33ee20b68c86ec505d906356c" +checksum = "a332be01508d814fed64bf28f798a146d73792121129962fdf335bb3c49a4254" dependencies = [ "bitflags", "core-foundation", @@ -6680,9 +6706,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.6.1" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0160a13a177a45bfb43ce71c01580998474f556ad854dcbca936dd2841a5c556" +checksum = "31c9bb296072e961fcbd8853511dd39c2d8be2deb1e17c6860b1d30732b323b4" dependencies = [ "core-foundation-sys", "libc", @@ -6690,28 +6716,13 @@ dependencies = [ [[package]] name = "semver" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" -dependencies = [ - "semver-parser", -] - -[[package]] -name = "semver" -version = "1.0.14" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e25dfac463d778e353db5be2449d1cce89bd6fd23c9f1ea21310ce6e5a1b29c4" +checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" dependencies = [ "serde", ] -[[package]] -name = "semver-parser" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" - [[package]] name = "serde" version = "1.0.152" @@ -6721,16 +6732,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "serde-value" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" -dependencies = [ - "ordered-float 2.10.0", - "serde", -] - [[package]] name = "serde_derive" version = "1.0.152" @@ -6744,9 +6745,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.91" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883" +checksum = "cad406b69c91885b5107daf2c29572f6c8cdb3c66826821e286c533490c0bc76" dependencies = [ "itoa", "ryu", @@ -6755,9 +6756,9 @@ dependencies = [ [[package]] name = "serde_path_to_error" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "184c643044780f7ceb59104cef98a5a6f12cb2288a7bc701ab93a362b49fd47d" +checksum = "26b04f22b563c91331a10074bda3dd5492e3cc39d56bd557e91c0af42b6c7341" dependencies = [ "serde", ] @@ -6773,6 +6774,15 @@ dependencies = [ "thiserror", ] +[[package]] +name = "serde_spanned" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0efd8caf556a6cebd3b285caf480045fcc1ac04f6bd786b09a6f11af30c4fcf4" +dependencies = [ + "serde", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -6787,9 +6797,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25bf4a5a814902cd1014dbccfa4d4560fb8432c779471e96e035602519f82eef" +checksum = "30d904179146de381af4c93d3af6ca4984b3152db687dacb9c3c35e86f39809c" dependencies = [ "base64 0.13.1", "chrono", @@ -6798,14 +6808,14 @@ dependencies = [ "serde", "serde_json", "serde_with_macros", - "time 0.3.15", + "time 0.3.17", ] [[package]] name = "serde_with_macros" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3452b4c0f6c1e357f73fdb87cd1efabaa12acf328c7a528e252893baeb3f4aa" +checksum = "a1966009f3c05f095697c537312f5415d1e3ed31ce0a56942bac4c771c5c335e" dependencies = [ "darling", "proc-macro2", @@ -6815,9 +6825,9 @@ dependencies = [ [[package]] name = "serde_yaml" -version = "0.9.14" +version = "0.9.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d232d893b10de3eb7258ff01974d6ee20663d8e833263c99409d4b13a0209da" +checksum = "8fb06d4b6cdaef0e0c51fa881acb721bed3c924cfaa71d9c94a3b771dfdf6567" dependencies = [ "indexmap", "itoa", @@ -6852,17 +6862,6 @@ dependencies = [ "syn", ] -[[package]] -name = "sha-1" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "028f48d513f9678cda28f6e4064755b3fbb2af6acd672f2c209b62323f7aea0f" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - [[package]] name = "sha1" version = "0.10.5" @@ -6902,19 +6901,19 @@ dependencies = [ [[package]] name = "shared_child" -version = "0.3.5" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6be9f7d5565b1483af3e72975e2dee33879b3b86bd48c0929fccf6585d79e65a" +checksum = "b0d94659ad3c2137fef23ae75b03d5241d633f8acded53d672decfa0e6e0caef" dependencies = [ "libc", "winapi", ] [[package]] -name = "shlex" +name = "shell-words" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" +checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" [[package]] name = "signal-hook" @@ -6980,7 +6979,7 @@ dependencies = [ "num-bigint", "num-traits", "thiserror", - "time 0.3.15", + "time 0.3.17", ] [[package]] @@ -7057,9 +7056,9 @@ dependencies = [ [[package]] name = "snap" -version = "1.0.5" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451" +checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" [[package]] name = "socket2" @@ -7089,31 +7088,31 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "spin" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f6002a767bff9e83f8eeecf883ecb8011875a21ae8da43bffb817a57e78cc09" +checksum = "7dccf47db1b41fa1573ed27ccf5e08e3ca771cb994f776668c5ebda893b248fc" dependencies = [ "lock_api", ] [[package]] name = "sqllogictest" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b2f781e2de5df40f526b3d7c93cb972f87425771a27cc8882a4178794b2ad11" +checksum = "71378f7ef90bc4d448f2d84c11898adca45ced916d95df16d233a0e6da39f118" dependencies = [ "async-trait", - "derivative", - "difference", + "educe", "fs-err", "futures", "glob", "humantime", "itertools", "libtest-mimic", - "md5", + "md-5", "owo-colors", "regex", + "similar", "tempfile", "thiserror", "tracing", @@ -7191,23 +7190,13 @@ version = "0.24.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" dependencies = [ - "heck", + "heck 0.4.1", "proc-macro2", "quote", "rustversion", "syn", ] -[[package]] -name = "subprocess" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2e86926081dda636c546d8c5e641661049d7562a68f5488be4a1f7f66f6086" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "subtle" version = "2.4.1" @@ -7216,9 +7205,9 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" [[package]] name = "symbolic-common" -version = "10.1.5" +version = "10.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56de97fb15cc5db0100c22aed51cee31b013234bc43e00e4035cad048a4c7cd8" +checksum = "1b55cdc318ede251d0957f07afe5fed912119b8c1bc5a7804151826db999e737" dependencies = [ "debugid", "memmap2", @@ -7228,9 +7217,9 @@ dependencies = [ [[package]] name = "symbolic-demangle" -version = "10.1.5" +version = "10.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fe238e66ada3122c2efe810ba17459df9f7573b45dd523a8057e4c0f515e860" +checksum = "79be897be8a483a81fff6a3a4e195b4ac838ef73ca42d348b3f722da9902e489" dependencies = [ "cpp_demangle", "rustc-demangle", @@ -7254,21 +7243,21 @@ version = "0.1.0" dependencies = [ "futures-util", "madsim-tokio", - "spin 0.9.4", + "spin 0.9.5", "thiserror", ] [[package]] name = "sync_wrapper" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" [[package]] name = "sysinfo" -version = "0.26.7" +version = "0.26.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c375d5fd899e32847b8566e10598d6e9f1d9b55ec6de3cdf9e7da4bdc51371bc" +checksum = "5c18a6156d1f27a9592ee18c1a846ca8dd5c258b7179fc193ae87c74ebb666f5" dependencies = [ "cfg-if", "core-foundation-sys", @@ -7285,12 +7274,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" -[[package]] -name = "tap" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" - [[package]] name = "task_stats_alloc" version = "0.1.11" @@ -7315,23 +7298,13 @@ dependencies = [ [[package]] name = "termcolor" -version = "1.1.3" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" dependencies = [ "winapi-util", ] -[[package]] -name = "terminal_size" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "textwrap" version = "0.16.0" @@ -7340,18 +7313,18 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" -version = "1.0.37" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10deb33631e3c9018b9baf9dcbbc4f737320d2b576bac10f6aefa048fa407e3e" +checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.37" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb" +checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" dependencies = [ "proc-macro2", "quote", @@ -7396,7 +7369,7 @@ dependencies = [ "byteorder", "integer-encoding", "log", - "ordered-float 1.1.1", + "ordered-float", "threadpool", ] @@ -7423,12 +7396,11 @@ dependencies = [ [[package]] name = "tikv-jemalloc-sys" -version = "0.5.2+5.3.0-patched" +version = "0.5.3+5.3.0-patched" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec45c14da997d0925c7835883e4d5c181f196fa142f8c19d7643d1e9af2592c3" +checksum = "a678df20055b43e57ef8cddde41cdfda9a3c1a060b67f4c5836dfb1d78543ba8" dependencies = [ "cc", - "fs_extra", "libc", ] @@ -7444,9 +7416,9 @@ dependencies = [ [[package]] name = "time" -version = "0.1.44" +version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" dependencies = [ "libc", "wasi 0.10.0+wasi-snapshot-preview1", @@ -7455,22 +7427,32 @@ dependencies = [ [[package]] name = "time" -version = "0.3.15" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d634a985c4d4238ec39cacaed2e7ae552fbd3c476b552c1deac3021b7d7eaf0c" +checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376" dependencies = [ "itoa", "libc", "num_threads", "serde", + "time-core", "time-macros", ] +[[package]] +name = "time-core" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" + [[package]] name = "time-macros" -version = "0.2.4" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792" +checksum = "d967f99f534ca7e495c575c62638eebc2898a8c84c119b89e250477bc4ba16b2" +dependencies = [ + "time-core", +] [[package]] name = "tiny-keccak" @@ -7502,15 +7484,15 @@ dependencies = [ [[package]] name = "tinyvec_macros" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.22.0" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d76ce4a75fb488c605c54bf610f221cea8b0dafb53333c1a67e8ee199dcd2ae3" +checksum = "c8e00990ebabbe4c14c08aca901caed183ecd5c09562a12c824bb53d3c3fd3af" dependencies = [ "autocfg", "bytes", @@ -7524,7 +7506,7 @@ dependencies = [ "socket2", "tokio-macros", "tracing", - "winapi", + "windows-sys 0.42.0", ] [[package]] @@ -7539,9 +7521,9 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "1.8.0" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9724f9a975fb987ef7a3cd9be0350edcbe130698af5b8f7a631e23d42d052484" +checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8" dependencies = [ "proc-macro2", "quote", @@ -7561,9 +7543,9 @@ dependencies = [ [[package]] name = "tokio-native-tls" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d995660bd2b7f8c1568414c1126076c13fbb725c40112dc0120b78eb9b717b" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" dependencies = [ "native-tls", "tokio", @@ -7638,9 +7620,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.4" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bb2e075f03b3d66d8d8785356224ba688d2906a371015e225beeb65ca92c740" +checksum = "bc6a3b08b64e6dfad376fa2432c7b1f01522e37a623c3050bc95db2d3ff21583" dependencies = [ "bytes", "futures-core", @@ -7652,11 +7634,62 @@ dependencies = [ [[package]] name = "toml" -version = "0.5.9" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +dependencies = [ + "serde", +] + +[[package]] +name = "toml" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7afcae9e3f0fe2c370fd4657108972cbb2fa9db1b9f84849cefd80741b01cb6" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime 0.6.1", + "toml_edit 0.19.3", +] + +[[package]] +name = "toml_datetime" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4553f467ac8e3d374bc9a177a26801e5d0f9b211aa1673fb137a403afd1c9cf5" + +[[package]] +name = "toml_datetime" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ab8ed2edee10b50132aed5f331333428b011c99402b5a534154ed15746f9622" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c59d8dd7d0dcbc6428bf7aa2f0e823e26e43b3c9aca15bbc9475d23e5fa12b" +dependencies = [ + "indexmap", + "nom8", + "toml_datetime 0.5.1", +] + +[[package]] +name = "toml_edit" +version = "0.19.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7" +checksum = "5e6a7712b49e1775fb9a7b998de6635b299237f48b404dde71704f2e0e7f37e5" dependencies = [ + "indexmap", + "nom8", "serde", + "serde_spanned", + "toml_datetime 0.6.1", ] [[package]] @@ -7730,9 +7763,9 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.3.4" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c530c8675c1dbf98facee631536fa116b5fb6382d7dd6dc1b118d970eafe3ba" +checksum = "f873044bf02dd1e8239e9c1293ea39dad76dc594ec16185d0a1bf31d8dc8d858" dependencies = [ "bitflags", "bytes", @@ -7848,7 +7881,7 @@ dependencies = [ "sharded-slab", "smallvec", "thread_local", - "time 0.3.15", + "time 0.3.17", "tracing", "tracing-core", "tracing-log", @@ -7893,11 +7926,17 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "triple_accel" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622b09ce2fe2df4618636fb92176d205662f59803f39e70d1c333393082de96c" + [[package]] name = "try-lock" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" +checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" [[package]] name = "twox-hash" @@ -7923,9 +7962,9 @@ dependencies = [ [[package]] name = "typenum" -version = "1.15.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" +checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "uncased" @@ -7947,15 +7986,15 @@ dependencies = [ [[package]] name = "unicode-bidi" -version = "0.3.8" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" +checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" [[package]] name = "unicode-ident" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" [[package]] name = "unicode-normalization" @@ -7966,6 +8005,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + [[package]] name = "unicode-width" version = "0.1.10" @@ -7974,9 +8019,9 @@ checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" [[package]] name = "unsafe-libyaml" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1e5fa573d8ac5f1a856f8d7be41d390ee973daf97c806b2c1a465e4e1406e68" +checksum = "bc7ed8ba44ca06be78ea1ad2c3682a43349126c8818054231ee6f4748012aed2" [[package]] name = "untrusted" @@ -7984,6 +8029,22 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" +[[package]] +name = "ureq" +version = "2.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "338b31dd1314f68f3aabf3ed57ab922df95ffcd902476ca7ba3c4ce7b908c46d" +dependencies = [ + "base64 0.13.1", + "log", + "once_cell", + "rustls", + "rustls-native-certs", + "url", + "webpki", + "webpki-roots", +] + [[package]] name = "url" version = "2.3.1" @@ -8004,9 +8065,9 @@ checksum = "e8db7427f936968176eaa7cdf81b7f98b980b18495ec28f1b5791ac3bfe3eea9" [[package]] name = "uuid" -version = "1.2.2" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "422ee0de9031b5b948b97a8fc04e3aa35230001a722ddd27943e0be31564ce4c" +checksum = "1674845326ee10d37ca60470760d4288a6f80f304007d92e5c53bab78c9cfd79" dependencies = [ "getrandom 0.2.7", "rand 0.8.5", @@ -8053,6 +8114,12 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "virtue" +version = "0.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b60dcd6a64dd45abf9bd426970c9843726da7fc08f44cd6fcebf68c21220a63" + [[package]] name = "waker-fn" version = "1.1.0" @@ -8106,9 +8173,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.83" +version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" +checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -8116,9 +8183,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.83" +version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" +checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" dependencies = [ "bumpalo", "log", @@ -8131,9 +8198,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.33" +version = "0.4.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23639446165ca5a5de86ae1d8896b737ae80319560fbaa4c2887b7da6e7ebd7d" +checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454" dependencies = [ "cfg-if", "js-sys", @@ -8143,9 +8210,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.83" +version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" +checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -8153,9 +8220,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.83" +version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" +checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" dependencies = [ "proc-macro2", "quote", @@ -8166,15 +8233,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.83" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" + +[[package]] +name = "wasm-streams" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" +checksum = "6bbae3363c08332cadccd13b67db371814cd214c2524020932f0804b8cf7c078" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] [[package]] name = "web-sys" -version = "0.3.60" +version = "0.3.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f" +checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" dependencies = [ "js-sys", "wasm-bindgen", @@ -8192,9 +8272,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.22.5" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368bfe657969fb01238bb756d351dcade285e0f6fcbd36dcb23359a5169975be" +checksum = "b6c71e40d7d2c34a5106301fb632274ca37242cd0c9d3e64dbece371a40a2d87" dependencies = [ "webpki", ] @@ -8210,9 +8290,9 @@ dependencies = [ [[package]] name = "which" -version = "4.3.0" +version = "4.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c831fbbee9e129a8cf93e7747a82da9d95ba8e16621cae60ec2cdc849bacb7b" +checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269" dependencies = [ "either", "libc", @@ -8265,43 +8345,48 @@ dependencies = [ [[package]] name = "windows-sys" -version = "0.36.1" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ - "windows_aarch64_msvc 0.36.1", - "windows_i686_gnu 0.36.1", - "windows_i686_msvc 0.36.1", - "windows_x86_64_gnu 0.36.1", - "windows_x86_64_msvc 0.36.1", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc 0.42.1", + "windows_i686_gnu 0.42.1", + "windows_i686_msvc 0.42.1", + "windows_x86_64_gnu 0.42.1", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc 0.42.1", ] [[package]] name = "windows-sys" -version = "0.42.0" +version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc 0.42.0", - "windows_i686_gnu 0.42.0", - "windows_i686_msvc 0.42.0", - "windows_x86_64_gnu 0.42.0", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc 0.42.0", + "windows-targets", ] [[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.0" +name = "windows-targets" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" +checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc 0.42.1", + "windows_i686_gnu 0.42.1", + "windows_i686_msvc 0.42.1", + "windows_x86_64_gnu 0.42.1", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc 0.42.1", +] [[package]] -name = "windows_aarch64_msvc" -version = "0.36.1" +name = "windows_aarch64_gnullvm" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" [[package]] name = "windows_aarch64_msvc" @@ -8311,15 +8396,9 @@ checksum = "ec7711666096bd4096ffa835238905bb33fb87267910e154b18b44eaabb340f2" [[package]] name = "windows_aarch64_msvc" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" - -[[package]] -name = "windows_i686_gnu" -version = "0.36.1" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" [[package]] name = "windows_i686_gnu" @@ -8329,15 +8408,9 @@ checksum = "763fc57100a5f7042e3057e7e8d9bdd7860d330070251a73d003563a3bb49e1b" [[package]] name = "windows_i686_gnu" -version = "0.42.0" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" - -[[package]] -name = "windows_i686_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" [[package]] name = "windows_i686_msvc" @@ -8347,15 +8420,9 @@ checksum = "7bc7cbfe58828921e10a9f446fcaaf649204dcfe6c1ddd712c5eebae6bda1106" [[package]] name = "windows_i686_msvc" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.36.1" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" [[package]] name = "windows_x86_64_gnu" @@ -8365,21 +8432,15 @@ checksum = "6868c165637d653ae1e8dc4d82c25d4f97dd6605eaa8d784b5c6e0ab2a252b65" [[package]] name = "windows_x86_64_gnu" -version = "0.42.0" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" +checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.36.1" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" [[package]] name = "windows_x86_64_msvc" @@ -8389,9 +8450,9 @@ checksum = "5e4d40883ae9cae962787ca76ba76390ffa29214667a111db9e0a1ad8377e809" [[package]] name = "windows_x86_64_msvc" -version = "0.42.0" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" +checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" [[package]] name = "winreg" @@ -8404,9 +8465,9 @@ dependencies = [ [[package]] name = "wiremock" -version = "0.5.15" +version = "0.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "249dc68542861d17eae4b4e5e8fb381c2f9e8f255a84f6771d5fdf8b6c03ce3c" +checksum = "12316b50eb725e22b2f6b9c4cbede5b7b89984274d113a7440c86e5c3fc6f99b" dependencies = [ "assert-json-diff", "async-trait", @@ -8439,10 +8500,15 @@ dependencies = [ name = "workspace-hack" version = "0.2.0-alpha" dependencies = [ - "ahash 0.7.6", + "ahash 0.8.3", "anyhow", + "arrayvec", "auto_enums", "auto_enums_derive", + "aws-sdk-s3", + "aws-smithy-client", + "aws-types", + "base64 0.21.0", "bytes", "cc", "chrono", @@ -8452,11 +8518,11 @@ dependencies = [ "crossbeam-channel", "crossbeam-deque", "crossbeam-epoch", + "crossbeam-utils", + "digest", "either", "fail", "fixedbitset", - "flate2", - "frunk_core", "futures", "futures-channel", "futures-core", @@ -8466,9 +8532,9 @@ dependencies = [ "futures-task", "futures-util", "hashbrown 0.12.3", + "hashbrown 0.13.2", "hdrhistogram", "hyper", - "indexmap", "isahc", "itertools", "lexical-core", @@ -8478,19 +8544,17 @@ dependencies = [ "lexical-write-float", "lexical-write-integer", "libc", - "libz-sys", "lock_api", "log", "madsim-tokio", "memchr", - "minimal-lexical", + "miniz_oxide", "multimap", - "nom 7.1.1", "num-bigint", "num-integer", "num-traits", "parking_lot 0.12.1", - "parking_lot_core 0.9.4", + "parking_lot_core 0.9.7", "petgraph", "phf", "phf_shared", @@ -8511,7 +8575,7 @@ dependencies = [ "stable_deref_trait", "strum", "syn", - "time 0.3.15", + "time 0.3.17", "tokio", "tokio-stream", "tokio-util", @@ -8521,18 +8585,14 @@ dependencies = [ "tower-http", "tracing", "tracing-core", + "tracing-futures", "tracing-subscriber", + "triomphe", "url", "uuid", -] - -[[package]] -name = "wyz" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" -dependencies = [ - "tap", + "zstd", + "zstd-safe", + "zstd-sys", ] [[package]] @@ -8541,6 +8601,16 @@ version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d25c75bf9ea12c4040a97f829154768bbbce366287e2dc044af160cd79a13fd" +[[package]] +name = "xorf" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57901b00e3f8e14f4d20b8955bf8087ecb545cfe2ed8741c2a2dbc89847a1a29" +dependencies = [ + "libm", + "rand 0.8.5", +] + [[package]] name = "xxhash-rust" version = "0.8.6" @@ -8619,10 +8689,11 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.1+zstd.1.5.2" +version = "2.0.7+zstd.1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" +checksum = "94509c3ba2fe55294d752b79842c530ccfab760192521df74a081a78d2b3c7f5" dependencies = [ "cc", "libc", + "pkg-config", ] diff --git a/Cargo.toml b/Cargo.toml index dc4e1f5b8c92f..7fb3e579492f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,7 @@ lto = 'off' [profile.release] debug = true +lto = 'thin' [profile.bench] opt-level = 3 @@ -112,6 +113,7 @@ opt-level = 3 [profile.ci-sim] inherits = "dev" opt-level = 2 +incremental = false # Patch third-party crates for deterministic simulation. [patch.crates-io] diff --git a/Makefile.toml b/Makefile.toml index 78694529e65e1..b83fd7d6e2a60 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -23,6 +23,7 @@ env_scripts = [ is_sanitizer_enabled = get_env ENABLE_SANITIZER is_all_in_one_enabled = get_env ENABLE_ALL_IN_ONE +is_hdfs_backend = get_env ENABLE_HDFS if ${is_sanitizer_enabled} set_env RISEDEV_CARGO_BUILD_EXTRA_ARGS "-Zbuild-std --target ${CARGO_MAKE_RUST_TARGET_TRIPLE}" @@ -38,9 +39,17 @@ if ${is_all_in_one_enabled} else set_env RISEDEV_CARGO_BUILD_CRATE "risingwave_cmd" end + +if ${is_hdfs_backend} + set_env BUILD_HDFS_BACKEND_CMD "-p risingwave_object_store --features hdfs-backend" +else + set_env BUILD_HDFS_BACKEND_CMD "" +end + ''', ] + [config] default_to_workspace = false skip_core_tasks = true @@ -314,7 +323,7 @@ echo "$(tput setaf 4)$(tput bold)[Reminder]$(tput sgr0) risedev will only build [[ -z "${RISEDEV_RUSTFLAGS}" ]] || export RUSTFLAGS="${RISEDEV_RUSTFLAGS}" echo + RUSTFLAGS="${RUSTFLAGS:-}" set -xe -cargo build -p ${RISEDEV_CARGO_BUILD_CRATE} -p risedev \ +cargo build -p ${RISEDEV_CARGO_BUILD_CRATE} -p risedev ${BUILD_HDFS_BACKEND_CMD}\ --profile "${RISINGWAVE_BUILD_PROFILE}" \ ${RISEDEV_CARGO_BUILD_EXTRA_ARGS} ''' @@ -501,17 +510,30 @@ script = """ set -e -for tool in cargo-llvm-cov cargo-nextest cargo-udeps cargo-hakari cargo-sort cargo-make cargo-upgrades typos-cli +cargo install cargo-quickinstall --locked +cargo quickinstall cargo-binstall + +for tool in cargo-llvm-cov cargo-nextest cargo-hakari cargo-sort cargo-make do echo "install: $(tput setaf 4)$tool$(tput sgr0)" - cargo install $tool --locked + cargo binstall -y --no-symlinks $tool echo done -echo "install: $(tput setaf 4)risinglightdb/sqllogictest-rs$(tput sgr0)" -cargo install --git https://github.com/risinglightdb/sqllogictest-rs --bin sqllogictest +# Need https://github.com/est31/cargo-udeps/pull/147 to make --exclude work +echo "install: $(tput setaf 4)cargo-udeps$(tput sgr0)" +cargo install cargo-udeps --locked --git https://github.com/est31/cargo-udeps --rev 63dd458 echo +# Tools that fallback to `cargo install` when using `cargo binstall`. +# We directly use `cargo install` here to be faster. +for tool in typos-cli sqllogictest-bin +do + echo "install: $(tput setaf 4)$tool$(tput sgr0)" + cargo install $tool --locked + echo +done + echo "check: $(tput setaf 4)tmux >= v3.2a$(tput sgr0)" tmux -V || echo "$(tput setaf 3)tmux$(tput sgr0) not found." echo @@ -524,11 +546,11 @@ echo "check: $(tput setaf 4)cmake$(tput sgr0)" cmake --version || echo "$(tput setaf 3)cmake$(tput sgr0) not found." echo -echo "check: $(tput setaf 4)protoc$(tput sgr0)" +echo "check: $(tput setaf 4)protoc >= 3.12.0$(tput sgr0)" protoc --version || echo "$(tput setaf 3)protoc$(tput sgr0) not found." echo """ -description = "Install required tools to do pre-CI check and run e2e tests" +description = "Install (or upgrade) required tools to do pre-CI check and run e2e tests" [tasks.warn-on-missing-tools] private = true @@ -733,6 +755,24 @@ cargo clippy --all-targets --all-features echo "If cargo clippy check failed or generates warning, you may run $(tput setaf 4)cargo clippy --workspace --all-targets --fix$(tput sgr0) to fix it. Note that clippy fix requires manual review, as not all auto fixes are guaranteed to be reasonable." """ +[tasks.check-clippy-fix] +category = "RiseDev - Check" +description = "Run cargo clippy check and fixes all files (including dirty and staged)" +script = """ +#!/usr/bin/env bash + +echo "Running $(tput setaf 4)cargo clippy$(tput sgr0) checks and attempting to fix" +if [ $# -gt 0 ]; then + ARGS=("$@") + + echo "Applying clippy --fix for $@ (including dirty and staged files)" + cargo clippy ${ARGS[@]/#/--package risingwave_} --all-features --fix --allow-dirty --allow-staged +else + echo "Applying clippy --fix for all targets to all files (including dirty and staged files)" + cargo clippy --all-targets --all-features --fix --allow-dirty --allow-staged +fi +""" + [tasks.check-typos] category = "RiseDev - Check" description = "Run cargo typos-cli check" @@ -747,6 +787,21 @@ if ! typos ; then fi """ +[tasks.check-udeps] +category = "RiseDev - Check" +description = "Check unused dependencies" +env = { RUSTFLAGS = "--cfg tokio_unstable" } +script = """ +#!/usr/bin/env bash + +# TODO: after cargo-machete supports excluding packages, we may use it instead of cargo udeps. +# It's much faster so we can add it to CI and [tasks.check]. + +echo "Running $(tput setaf 4)cargo udeps$(tput sgr0) checks" + +cargo udeps --workspace --all-targets --all-features --exclude workspace-hack --exclude risingwave_bench --exclude risingwave_udf --exclude risingwave_simulation +""" + [tasks.check] category = "RiseDev - Check" @@ -768,6 +823,27 @@ description = "Perform pre-CI checks and automatically fix cargo sort, cargo hak [tasks.c] alias = "check" +[tasks.check-fix] +category = "RiseDev - Check" +dependencies = [ + "warn-on-missing-tools", + "check-hakari", + "check-dep-sort", + "check-fmt", + "check-clippy-fix", + "check-typos", +] +script = """ +#!/usr/bin/env bash + +echo "Good work! You may run $(tput setaf 4)./risedev test$(tput sgr0) or $(tput setaf 4)./risedev test-cov$(tput sgr0) to run unit tests." +""" +description = "Perform pre-CI checks and automatically fix cargo sort, cargo hakari, fixes cargo fmt warnings" + +[tasks.cf] +alias = "check-fix" + + [tasks.install] category = "RiseDev - Prepare" description = "Install RiseDev to user local" @@ -827,7 +903,7 @@ description = "Run all streaming e2e tests" [tasks.slt-batch] category = "RiseDev - SQLLogicTest" extend = "slt" -args = ["${@}", "./e2e_test/batch/**/*.slt"] +args = ["${@}", "./e2e_test/batch/*.slt"] description = "Run all batch e2e tests" [tasks.slt-generated] diff --git a/README.md b/README.md index c32225a075152..68b02e0346971 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,6 @@ -![RisingWave Logo](./docs/images/logo-title.svg) + +RisingWave Logo + [![Slack](https://badgen.net/badge/Slack/Join%20RisingWave/0abd59?icon=slack)](https://join.slack.com/t/risingwave-community/shared_invite/zt-120rft0mr-d8uGk3d~NZiZAQWPnElOfw) [![Build status](https://badge.buildkite.com/9394d2bca0f87e2e97aa78b25f765c92d4207c0b65e7f6648f.svg)](https://buildkite.com/risingwavelabs/main) @@ -22,9 +24,9 @@ There are two ways to install RisingWave: use a pre-built package or compile fro ```shell # Download the pre-built binary -wget https://github.com/risingwavelabs/risingwave/releases/download/v0.1.15/risingwave-v0.1.15-x86_64-unknown-linux.tar.gz +wget https://github.com/risingwavelabs/risingwave/releases/download/v0.1.17/risingwave-v0.1.17-x86_64-unknown-linux.tar.gz # Unzip the binary -tar xvf risingwave-v0.1.15-x86_64-unknown-linux.tar.gz +tar xvf risingwave-v0.1.17-x86_64-unknown-linux.tar.gz # Start RisingWave in single-binary playground mode ./risingwave playground ``` @@ -33,7 +35,7 @@ tar xvf risingwave-v0.1.15-x86_64-unknown-linux.tar.gz ```shell # Start RisingWave in single-binary playground mode -docker run -it --pull=always -p 4566:4566 -p 5691:5691 ghcr.io/risingwavelabs/risingwave:v0.1.15 playground +docker run -it --pull=always -p 4566:4566 -p 5691:5691 ghcr.io/risingwavelabs/risingwave:v0.1.17 playground ``` **Compile from Source with [RiseDev](docs/developer-guide.md#set-up-the-development-environment) (Linux and macOS)** diff --git a/ci/Dockerfile b/ci/Dockerfile index 40d0b3b418c90..e75678be4da54 100644 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -6,7 +6,7 @@ ARG RUST_TOOLCHAIN RUN apt-get update -yy && \ DEBIAN_FRONTEND=noninteractive apt-get -y install make build-essential cmake protobuf-compiler curl parallel \ - openssl libssl-dev libsasl2-dev libcurl4-openssl-dev pkg-config bash openjdk-11-jdk wget unzip git tmux lld postgresql-client kafkacat \ + openssl libssl-dev libsasl2-dev libcurl4-openssl-dev pkg-config bash openjdk-11-jdk wget unzip git tmux lld postgresql-client kafkacat netcat mysql-client \ maven -yy \ && rm -rf /var/lib/{apt,dpkg,cache,log}/ @@ -27,9 +27,13 @@ ENV PATH /root/.cargo/bin/:$PATH RUN rustup component add rustfmt llvm-tools-preview clippy # install build tools -RUN cargo install cargo-llvm-cov cargo-nextest cargo-udeps cargo-hakari cargo-sort cargo-make cargo-cache \ +RUN cargo install cargo-llvm-cov cargo-nextest cargo-udeps cargo-hakari cargo-sort cargo-make cargo-cache sccache \ && cargo install --git https://github.com/risinglightdb/sqllogictest-rs --rev dbadddb --bin sqllogictest --locked \ && cargo cache -a \ && rm -rf "/root/.cargo/registry/index" \ && rm -rf "/root/.cargo/registry/cache" \ && rm -rf "/root/.cargo/git/db" + +ENV RUSTC_WRAPPER=sccache +ENV SCCACHE_BUCKET=ci-sccache-bucket +ENV CARGO_INCREMENTAL=0 \ No newline at end of file diff --git a/ci/build-ci-image.sh b/ci/build-ci-image.sh index f5a0b9fae3958..cfab60dac04ef 100755 --- a/ci/build-ci-image.sh +++ b/ci/build-ci-image.sh @@ -14,7 +14,7 @@ export RUST_TOOLCHAIN=$(cat ../rust-toolchain) # !!! CHANGE THIS WHEN YOU WANT TO BUMP CI IMAGE !!! # # AND ALSO docker-compose.yml # ###################################################### -export BUILD_ENV_VERSION=v20230116 +export BUILD_ENV_VERSION=v20230208_05 export BUILD_TAG="public.ecr.aws/x5u3w5h6/rw-build-env:${BUILD_ENV_VERSION}" diff --git a/ci/connector-node-version b/ci/connector-node-version new file mode 100644 index 0000000000000..1afd364109050 --- /dev/null +++ b/ci/connector-node-version @@ -0,0 +1 @@ +v0.1.17 \ No newline at end of file diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml index b58c689fbb47e..525b56edb9d60 100644 --- a/ci/docker-compose.yml +++ b/ci/docker-compose.yml @@ -30,7 +30,7 @@ services: retries: 5 source-test-env: - image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230116 + image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230208_05 depends_on: - mysql - db @@ -38,7 +38,7 @@ services: - ..:/risingwave sink-test-env: - image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230116 + image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230208_05 depends_on: - mysql - db @@ -46,12 +46,12 @@ services: - ..:/risingwave rw-build-env: - image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230116 + image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230208_05 volumes: - ..:/risingwave regress-test-env: - image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230116 + image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230208_05 depends_on: db: condition: service_healthy diff --git a/ci/plugins/upload-failure-logs/hooks/post-command b/ci/plugins/upload-failure-logs/hooks/post-command index c1a6e1f5b872e..a0cd360c1c575 100755 --- a/ci/plugins/upload-failure-logs/hooks/post-command +++ b/ci/plugins/upload-failure-logs/hooks/post-command @@ -14,7 +14,7 @@ if [ $BUILDKITE_COMMAND_EXIT_STATUS -ne 0 ]; then buildkite-agent artifact upload "regress-test-logs/*" buildkite-agent artifact upload regress-test-logs.zip fi - if [ -e "$PWD/connector-service.log" ]; then - buildkite-agent artifact upload "$PWD/connector-service.log" + if [ -e "$PWD/connector-node.log" ]; then + buildkite-agent artifact upload "$PWD/connector-node.log" fi fi \ No newline at end of file diff --git a/ci/scripts/build-other.sh b/ci/scripts/build-other.sh index 4f0f7bf264af4..71095c019cab5 100755 --- a/ci/scripts/build-other.sh +++ b/ci/scripts/build-other.sh @@ -6,14 +6,14 @@ set -euo pipefail source ci/scripts/common.env.sh # Should set a stable version of connector node -STABLE_VERSION=bd12fb55c75f09b234d1b75b8671b7582ca533f3 +STABLE_VERSION=4380fc207d2a76defdcac38754a61606a2e8f83f echo "--- Build Java connector node" git clone https://"$GITHUB_TOKEN"@github.com/risingwavelabs/risingwave-connector-node.git cd risingwave-connector-node # checkout a stable version git checkout $STABLE_VERSION -mvn package -Dmaven.test.skip=true +mvn -B package -Dmaven.test.skip=true echo "--- Upload Java artifacts" -cp service/target/service-*.jar ./connector-service.jar -buildkite-agent artifact upload ./connector-service.jar +cp assembly/target/risingwave-connector-1.0.0.tar.gz ./risingwave-connector.tar.gz +buildkite-agent artifact upload ./risingwave-connector.tar.gz diff --git a/ci/scripts/build-simulation.sh b/ci/scripts/build-simulation.sh index f8550b9a5c3f7..93f7e9e22d978 100755 --- a/ci/scripts/build-simulation.sh +++ b/ci/scripts/build-simulation.sh @@ -19,3 +19,6 @@ mv target/sim/ci-sim/risingwave_simulation ./risingwave_simulation artifacts=(risingwave_simulation scale-test.tar.zst) echo -n "${artifacts[*]}" | parallel -d ' ' "buildkite-agent artifact upload ./{}" + +echo "--- Show sccache stats" +sccache --show-stats diff --git a/ci/scripts/build.sh b/ci/scripts/build.sh index 7772d1124580f..b04ba2aad59c6 100755 --- a/ci/scripts/build.sh +++ b/ci/scripts/build.sh @@ -29,6 +29,7 @@ cargo sort --check --workspace echo "--- Rust cargo-hakari check" cargo hakari generate --diff +cargo hakari verify echo "--- Rust format check" cargo fmt --all -- --check @@ -53,3 +54,6 @@ ldd target/"$target"/risingwave echo "--- Upload artifacts" echo -n "${artifacts[*]}" | parallel -d ' ' "mv target/$target/{} ./{}-$profile && buildkite-agent artifact upload ./{}-$profile" + +echo "--- Show sccache stats" +sccache --show-stats diff --git a/ci/scripts/common.env.sh b/ci/scripts/common.env.sh index f72d5c00acf3e..31e95d47737c9 100644 --- a/ci/scripts/common.env.sh +++ b/ci/scripts/common.env.sh @@ -3,3 +3,6 @@ export PROTOC_NO_VENDOR=true export CARGO_HOME=/risingwave/.cargo export RISINGWAVE_CI=true export RUST_BACKTRACE=1 +if [ -n "${BUILDKITE_COMMIT:-}" ]; then + export GIT_SHA=$BUILDKITE_COMMIT +fi diff --git a/ci/scripts/deterministic-e2e-test.sh b/ci/scripts/deterministic-e2e-test.sh index ed93f403cd292..1918c20ce889a 100755 --- a/ci/scripts/deterministic-e2e-test.sh +++ b/ci/scripts/deterministic-e2e-test.sh @@ -20,23 +20,23 @@ export LOGDIR=.risingwave/log mkdir -p $LOGDIR -echo "--- deterministic simulation e2e, ci-3cn-1fe, ddl" -seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation ./e2e_test/ddl/\*\*/\*.slt > $LOGDIR/ddl-{}.log && rm $LOGDIR/ddl-{}.log' +echo "--- deterministic simulation e2e, ci-3cn-2fe, ddl" +seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation ./e2e_test/ddl/\*\*/\*.slt 2> $LOGDIR/ddl-{}.log && rm $LOGDIR/ddl-{}.log' -echo "--- deterministic simulation e2e, ci-3cn-1fe, streaming" -seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation ./e2e_test/streaming/\*\*/\*.slt > $LOGDIR/streaming-{}.log && rm $LOGDIR/streaming-{}.log' +echo "--- deterministic simulation e2e, ci-3cn-2fe, streaming" +seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation ./e2e_test/streaming/\*\*/\*.slt 2> $LOGDIR/streaming-{}.log && rm $LOGDIR/streaming-{}.log' -echo "--- deterministic simulation e2e, ci-3cn-1fe, batch" -seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation ./e2e_test/batch/\*\*/\*.slt > $LOGDIR/batch-{}.log && rm $LOGDIR/batch-{}.log' +echo "--- deterministic simulation e2e, ci-3cn-2fe, batch" +seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation ./e2e_test/batch/\*\*/\*.slt 2> $LOGDIR/batch-{}.log && rm $LOGDIR/batch-{}.log' -echo "--- deterministic simulation e2e, ci-3cn-1fe, kafka source" -seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation --kafka-datadir=./scripts/source/test_data ./e2e_test/source/kafka.slt > $LOGDIR/source-{}.log && rm $LOGDIR/source-{}.log' +echo "--- deterministic simulation e2e, ci-3cn-2fe, kafka source" +seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation --kafka-datadir=./scripts/source/test_data ./e2e_test/source/basic/kafka\*.slt 2> $LOGDIR/source-{}.log && rm $LOGDIR/source-{}.log' echo "--- deterministic simulation e2e, ci-3cn-2fe, parallel, streaming" -seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation -j 16 ./e2e_test/streaming/\*\*/\*.slt > $LOGDIR/parallel-streaming-{}.log && rm $LOGDIR/parallel-streaming-{}.log' +seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation -j 16 ./e2e_test/streaming/\*\*/\*.slt 2> $LOGDIR/parallel-streaming-{}.log && rm $LOGDIR/parallel-streaming-{}.log' echo "--- deterministic simulation e2e, ci-3cn-2fe, parallel, batch" -seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation -j 16 ./e2e_test/batch/\*\*/\*.slt > $LOGDIR/parallel-batch-{}.log && rm $LOGDIR/parallel-batch-{}.log' +seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation -j 16 ./e2e_test/batch/\*\*/\*.slt 2> $LOGDIR/parallel-batch-{}.log && rm $LOGDIR/parallel-batch-{}.log' -echo "--- deterministic simulation e2e, ci-3cn-1fe, fuzzing" -seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation --sqlsmith 100 ./src/tests/sqlsmith/tests/testdata > $LOGDIR/fuzzing-{}.log && rm $LOGDIR/fuzzing-{}.log' +echo "--- deterministic simulation e2e, ci-3cn-2fe, fuzzing" +seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation --sqlsmith 100 ./src/tests/sqlsmith/tests/testdata 2> $LOGDIR/fuzzing-{}.log && rm $LOGDIR/fuzzing-{}.log' diff --git a/ci/scripts/deterministic-recovery-test.sh b/ci/scripts/deterministic-recovery-test.sh index 3b2fa9018ee02..6ff9f51c4ce40 100755 --- a/ci/scripts/deterministic-recovery-test.sh +++ b/ci/scripts/deterministic-recovery-test.sh @@ -14,8 +14,8 @@ export LOGDIR=.risingwave/log mkdir -p $LOGDIR -echo "--- deterministic simulation e2e, ci-3cn-1fe, recovery, streaming" -seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation --kill --kill-rate=${KILL_RATE} ./e2e_test/streaming/\*\*/\*.slt > $LOGDIR/recovery-streaming-{}.log && rm $LOGDIR/recovery-streaming-{}.log' +echo "--- deterministic simulation e2e, ci-3cn-2fe-3meta, recovery, streaming" +seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation --kill --kill-rate=${KILL_RATE} ./e2e_test/streaming/\*\*/\*.slt 2> $LOGDIR/recovery-streaming-{}.log && rm $LOGDIR/recovery-streaming-{}.log' -echo "--- deterministic simulation e2e, ci-3cn-1fe, recovery, batch" -seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation --kill --kill-rate=${KILL_RATE} ./e2e_test/batch/\*\*/\*.slt > $LOGDIR/recovery-batch-{}.log && rm $LOGDIR/recovery-batch-{}.log' +echo "--- deterministic simulation e2e, ci-3cn-2fe-3meta, recovery, batch" +seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation --kill --kill-rate=${KILL_RATE} ./e2e_test/batch/\*\*/\*.slt 2> $LOGDIR/recovery-batch-{}.log && rm $LOGDIR/recovery-batch-{}.log' diff --git a/ci/scripts/docker.sh b/ci/scripts/docker.sh index 999402ad90383..fff87329d684c 100755 --- a/ci/scripts/docker.sh +++ b/ci/scripts/docker.sh @@ -3,21 +3,26 @@ # Exits as soon as any line fails. set -euo pipefail -# Build docker image ${BUILDKITE_COMMIT}-${arch} - -date="$(date +%Y%m%d)" ghcraddr="ghcr.io/risingwavelabs/risingwave" dockerhubaddr="risingwavelabs/risingwave" arch="$(uname -m)" +connector_node_version=$(cat ci/connector-node-version) + +# Git clone risingwave-connector-node repo +git clone https://"$GITHUB_TOKEN"@github.com/risingwavelabs/risingwave-connector-node.git +cd risingwave-connector-node && git checkout ${connector_node_version} && cd .. +# Build RisingWave docker image ${BUILDKITE_COMMIT}-${arch} echo "--- docker build and tag" -docker build -f docker/Dockerfile -t "${ghcraddr}:${BUILDKITE_COMMIT}-${arch}" --target risingwave . +docker build -f docker/Dockerfile --build-arg "GIT_SHA=${BUILDKITE_COMMIT}" -t "${ghcraddr}:${BUILDKITE_COMMIT}-${arch}" --target risingwave . echo "--- check the image can start correctly" -container_id=$(docker run -d "${ghcraddr}:${BUILDKITE_COMMIT}-${arch}" risingwave) -ret_code=$(docker inspect --format='{{.State.ExitCode}}' "$container_id") -if [ "$ret_code" -ne 0 ]; then - echo "docker run failed with exit code $ret_code" +container_id=$(docker run -d "${ghcraddr}:${BUILDKITE_COMMIT}-${arch}" playground) +sleep 10 +container_status=$(docker inspect --format='{{.State.Status}}' "$container_id") +if [ "$container_status" != "running" ]; then + echo "docker run failed with status $container_status" + docker inspect "$container_id" docker logs "$container_id" exit 1 fi @@ -36,4 +41,4 @@ docker push "${ghcraddr}:${BUILDKITE_COMMIT}-${arch}" echo "--- docker push to dockerhub" docker tag "${ghcraddr}:${BUILDKITE_COMMIT}-${arch}" "${dockerhubaddr}:${BUILDKITE_COMMIT}-${arch}" -docker push "${dockerhubaddr}:${BUILDKITE_COMMIT}-${arch}" \ No newline at end of file +docker push "${dockerhubaddr}:${BUILDKITE_COMMIT}-${arch}" diff --git a/ci/scripts/e2e-iceberg-sink-test.sh b/ci/scripts/e2e-iceberg-sink-test.sh new file mode 100755 index 0000000000000..67a34a0e1466c --- /dev/null +++ b/ci/scripts/e2e-iceberg-sink-test.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash + +# Exits as soon as any line fails. +set -euo pipefail + +source ci/scripts/common.env.sh + +while getopts 'p:' opt; do + case ${opt} in + p ) + profile=$OPTARG + ;; + \? ) + echo "Invalid Option: -$OPTARG" 1>&2 + exit 1 + ;; + : ) + echo "Invalid option: $OPTARG requires an argument" 1>&2 + ;; + esac +done +shift $((OPTIND -1)) + +echo "--- Download artifacts" +mkdir -p target/debug +buildkite-agent artifact download risingwave-"$profile" target/debug/ +buildkite-agent artifact download risedev-dev-"$profile" target/debug/ +mv target/debug/risingwave-"$profile" target/debug/risingwave +mv target/debug/risedev-dev-"$profile" target/debug/risedev-dev + +echo "--- Download connector node package" +buildkite-agent artifact download risingwave-connector.tar.gz ./ +mkdir ./connector-node +tar xf ./risingwave-connector.tar.gz -C ./connector-node + +echo "--- Adjust permission" +chmod +x ./target/debug/risingwave +chmod +x ./target/debug/risedev-dev + +echo "--- Generate RiseDev CI config" +cp ci/risedev-components.ci.source.env risedev-components.user.env + +echo "--- Prepare RiseDev dev cluster" +cargo make pre-start-dev +cargo make link-all-in-one-binaries + +echo "--- starting risingwave cluster with connector node" +./connector-node/start-service.sh -p 50051 > .risingwave/log/connector-sink.log 2>&1 & +cargo make ci-start ci-iceberg-test +sleep 1 + +# prepare minio iceberg sink +echo "--- preparing iceberg" +.risingwave/bin/mcli -C .risingwave/config/mcli mb hummock-minio/iceberg +wget https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz +tar -xf spark-3.3.1-bin-hadoop3.tgz --no-same-owner +DEPENDENCIES=org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.0.0,org.apache.hadoop:hadoop-aws:3.3.2 +spark-3.3.1-bin-hadoop3/bin/spark-sql --packages $DEPENDENCIES \ + --conf spark.sql.catalog.demo=org.apache.iceberg.spark.SparkCatalog \ + --conf spark.sql.catalog.demo.type=hadoop \ + --conf spark.sql.catalog.demo.warehouse=s3a://iceberg/ \ + --conf spark.sql.catalog.demo.hadoop.fs.s3a.endpoint=http://127.0.0.1:9301 \ + --conf spark.sql.catalog.demo.hadoop.fs.s3a.access.key=hummockadmin \ + --conf spark.sql.catalog.demo.hadoop.fs.s3a.secret.key=hummockadmin \ + --S --e "CREATE TABLE demo.demo_db.demo_table(v1 int, v2 int) TBLPROPERTIES ('format-version'='2');" + +echo "--- testing sinks" +sqllogictest -p 4566 -d dev './e2e_test/sink/iceberg_sink.slt' +sleep 1 + +# check sink destination iceberg +spark-3.3.1-bin-hadoop3/bin/spark-sql --packages $DEPENDENCIES \ + --conf spark.sql.catalog.demo=org.apache.iceberg.spark.SparkCatalog \ + --conf spark.sql.catalog.demo.type=hadoop \ + --conf spark.sql.catalog.demo.warehouse=s3a://iceberg/ \ + --conf spark.sql.catalog.demo.hadoop.fs.s3a.endpoint=http://127.0.0.1:9301 \ + --conf spark.sql.catalog.demo.hadoop.fs.s3a.access.key=hummockadmin \ + --conf spark.sql.catalog.demo.hadoop.fs.s3a.secret.key=hummockadmin \ + --S --e "INSERT OVERWRITE DIRECTORY './spark-output' USING CSV SELECT * FROM demo.demo_db.demo_table;" + +# check sink destination using shell +if cat ./spark-output/*.csv | sort | awk -F "," '{ +if ($1 == 1 && $2 == 2) c1++; + if ($1 == 13 && $2 == 2) c2++; + if ($1 == 21 && $2 == 2) c3++; + if ($1 == 2 && $2 == 2) c4++; + if ($1 == 3 && $2 == 2) c5++; + if ($1 == 5 && $2 == 2) c6++; + if ($1 == 8 && $2 == 2) c7++; } + END { exit !(c1 == 1 && c2 == 1 && c3 == 1 && c4 == 1 && c5 == 1 && c6 == 1 && c7 == 1); }'; then + echo "Iceberg sink check passed" +else + echo "The output is not as expected." + exit 1 +fi + +echo "--- Kill cluster" +pkill -f connector-node +cargo make ci-kill diff --git a/ci/scripts/e2e-sink-test.sh b/ci/scripts/e2e-sink-test.sh index 301bea04dc9c6..ad5854a2fd8bd 100755 --- a/ci/scripts/e2e-sink-test.sh +++ b/ci/scripts/e2e-sink-test.sh @@ -28,8 +28,11 @@ buildkite-agent artifact download risedev-dev-"$profile" target/debug/ mv target/debug/risingwave-"$profile" target/debug/risingwave mv target/debug/risedev-dev-"$profile" target/debug/risedev-dev -echo "--- Download connector node jar" -buildkite-agent artifact download connector-service.jar ./ +echo "--- Download connector node package" +buildkite-agent artifact download risingwave-connector.tar.gz ./ +mkdir ./connector-node +tar xf ./risingwave-connector.tar.gz -C ./connector-node + echo "--- Adjust permission" chmod +x ./target/debug/risingwave @@ -43,7 +46,6 @@ cargo make pre-start-dev cargo make link-all-in-one-binaries # prepare environment mysql sink -apt-get -y install mysql-client mysql --host=mysql --port=3306 -u root -p123456 -e "CREATE DATABASE IF NOT EXISTS test;" # grant access to `test` for ci test user mysql --host=mysql --port=3306 -u root -p123456 -e "GRANT ALL PRIVILEGES ON test.* TO 'mysqluser'@'%';" @@ -60,13 +62,33 @@ createdb -h db -U postgres test psql -h db -U postgres -d test -c "CREATE TABLE t4 (v1 int, v2 int);" psql -h db -U postgres -d test -c "CREATE TABLE t_remote (id serial PRIMARY KEY, name VARCHAR (50) NOT NULL);" +node_port=50051 +node_timeout=10 +./connector-node/start-service.sh -p $node_port > .risingwave/log/connector-source.log 2>&1 & +echo "waiting for connector node to start" +start_time=$(date +%s) +while : +do + if nc -z localhost $node_port; then + echo "Port $node_port is listened! Connector Node is up!" + break + fi + + current_time=$(date +%s) + elapsed_time=$((current_time - start_time)) + if [ $elapsed_time -ge $node_timeout ]; then + echo "Timeout waiting for port $node_port to be listened!" + exit 1 + fi + sleep 0.1 +done + echo "--- starting risingwave cluster with connector node" cargo make ci-start ci-1cn-1fe -java -jar ./connector-service.jar --port 60061 > .risingwave/log/connector-source.log 2>&1 & -sleep 1 echo "--- testing sinks" -sqllogictest -p 4566 -d dev './e2e_test/sink/*.slt' +sqllogictest -p 4566 -d dev './e2e_test/sink/create_sink_as.slt' +sqllogictest -p 4566 -d dev './e2e_test/sink/blackhole_sink.slt' sleep 1 # check sink destination postgres @@ -90,5 +112,5 @@ else fi echo "--- Kill cluster" -pkill -f connector-service.jar +pkill -f connector-node cargo make ci-kill diff --git a/ci/scripts/e2e-source-test.sh b/ci/scripts/e2e-source-test.sh index 69f767646a222..886bd51f5e2ed 100755 --- a/ci/scripts/e2e-source-test.sh +++ b/ci/scripts/e2e-source-test.sh @@ -31,8 +31,11 @@ buildkite-agent artifact download risedev-dev-"$profile" target/debug/ mv target/debug/risingwave-"$profile" target/debug/risingwave mv target/debug/risedev-dev-"$profile" target/debug/risedev-dev -echo "--- Download connector node jar" -buildkite-agent artifact download connector-service.jar ./ + +echo "--- Download connector node package" +buildkite-agent artifact download risingwave-connector.tar.gz ./ +mkdir ./connector-node +tar xf ./risingwave-connector.tar.gz -C ./connector-node echo "--- Prepare data" cp src/connector/src/test_data/simple-schema.avsc ./avro-simple-schema.avsc @@ -51,8 +54,7 @@ cargo make pre-start-dev cargo make link-all-in-one-binaries echo "--- e2e, ci-1cn-1fe, mysql & postgres cdc" -# install mysql client -apt-get -y install mysql-client + # import data to mysql mysql --host=mysql --port=3306 -u root -p123456 < ./e2e_test/source/cdc/mysql_cdc.sql @@ -61,8 +63,28 @@ export PGPASSWORD='postgres'; createdb -h db -U postgres cdc_test psql -h db -U postgres -d cdc_test < ./e2e_test/source/cdc/postgres_cdc.sql -# start cdc connector node -nohup java -jar ./connector-service.jar --port 60061 > .risingwave/log/connector-node.log 2>&1 & +node_port=50051 +node_timeout=10 +./connector-node/start-service.sh -p $node_port > .risingwave/log/connector-source.log 2>&1 & + +echo "waiting for connector node to start" +start_time=$(date +%s) +while : +do + if nc -z localhost $node_port; then + echo "Port $node_port is listened! Connector Node is up!" + break + fi + + current_time=$(date +%s) + elapsed_time=$((current_time - start_time)) + if [ $elapsed_time -ge $node_timeout ]; then + echo "Timeout waiting for port $node_port to be listened!" + exit 1 + fi + sleep 0.1 +done + # start risingwave cluster cargo make ci-start ci-1cn-1fe-with-recovery sleep 2 @@ -92,7 +114,7 @@ echo "check mviews after cluster recovery" sqllogictest -p 4566 -d dev './e2e_test/source/cdc/cdc.check_new_rows.slt' echo "--- Kill cluster" -pkill -f connector-service.jar +pkill -f connector-node cargo make ci-kill echo "--- e2e, ci-1cn-1fe, nexmark endless" diff --git a/ci/scripts/release.sh b/ci/scripts/release.sh index 78d995873bde0..9ef91a726ead5 100755 --- a/ci/scripts/release.sh +++ b/ci/scripts/release.sh @@ -3,8 +3,10 @@ # Exits as soon as any line fails. set -euo pipefail +connector_node_version=$(cat ci/connector-node-version) + echo "--- Check env" -if [ "${BUILDKITE_SOURCE}" != "schedule" ] && [[ -z "${BINARY_NAME+x}" ]]; then +if [ "${BUILDKITE_SOURCE}" != "schedule" ] && [ "${BUILDKITE_SOURCE}" != "webhook" ] && [[ -z "${BINARY_NAME+x}" ]]; then exit 0 fi @@ -28,9 +30,10 @@ echo "--- Install aws cli" curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" unzip -q awscliv2.zip && ./aws/install && mv /usr/local/bin/aws /bin/aws -echo "--- Build release binary" +echo "--- Build risingwave release binary" cargo build -p risingwave_cmd_all --features "static-link static-log-level" --profile release -cd target/release && chmod +x risingwave +cargo build --bin risectl --features "static-link static-log-level" --profile release +cd target/release && chmod +x risingwave risectl echo "--- Upload nightly binary to s3" if [ "${BUILDKITE_SOURCE}" == "schedule" ]; then @@ -51,9 +54,19 @@ if [[ -n "${BUILDKITE_TAG+x}" ]]; then echo "--- Release create" gh release create "${BUILDKITE_TAG}" --notes "release ${BUILDKITE_TAG}" -d -p - echo "--- Release upload asset" + echo "--- Release upload risingwave asset" tar -czvf risingwave-"${BUILDKITE_TAG}"-x86_64-unknown-linux.tar.gz risingwave gh release upload "${BUILDKITE_TAG}" risingwave-"${BUILDKITE_TAG}"-x86_64-unknown-linux.tar.gz + + echo "--- Release upload risingwave asset" + tar -czvf risectl-"${BUILDKITE_TAG}"-x86_64-unknown-linux.tar.gz risectl + gh release upload "${BUILDKITE_TAG}" risectl-"${BUILDKITE_TAG}"-x86_64-unknown-linux.tar.gz + + echo "--- Release build and upload risingwave connector node jar asset" + git clone https://"$GITHUB_TOKEN"@github.com/risingwavelabs/risingwave-connector-node.git + cd risingwave-connector-node && git checkout ${connector_node_version} && mvn -B package -Dmaven.test.skip=true + cd assembly/target && mv risingwave-connector-1.0.0.tar.gz risingwave-connector-"${BUILDKITE_TAG}".tar.gz + gh release upload "${BUILDKITE_TAG}" risingwave-connector-"${BUILDKITE_TAG}".tar.gz fi diff --git a/ci/scripts/unit-test.sh b/ci/scripts/unit-test.sh index 89fffe46a6c63..d391c11bc8694 100755 --- a/ci/scripts/unit-test.sh +++ b/ci/scripts/unit-test.sh @@ -5,3 +5,6 @@ set -euo pipefail source ci/scripts/common.env.sh source ci/scripts/run-unit-test.sh + +echo "--- Show sccache stats" +sccache --show-stats diff --git a/ci/workflows/docker.yml b/ci/workflows/docker.yml index 757dbc13b28b2..977a15c5a5490 100644 --- a/ci/workflows/docker.yml +++ b/ci/workflows/docker.yml @@ -15,6 +15,7 @@ steps: GHCR_USERNAME: ghcr-username GHCR_TOKEN: ghcr-token DOCKER_TOKEN: docker-token + GITHUB_TOKEN: github-token retry: *auto-retry - label: "docker-build-push: aarch64" @@ -26,6 +27,7 @@ steps: GHCR_USERNAME: ghcr-username GHCR_TOKEN: ghcr-token DOCKER_TOKEN: docker-token + GITHUB_TOKEN: github-token retry: *auto-retry agents: queue: "linux-arm64" @@ -46,10 +48,14 @@ steps: - label: "pre build binary" command: "ci/scripts/release.sh" plugins: + - seek-oss/aws-sm#v2.3.1: + env: + GITHUB_TOKEN: github-token - docker-compose#v4.9.0: run: release-env config: ci/docker-compose.yml environment: - BINARY_NAME - BUILDKITE_SOURCE + - GITHUB_TOKEN retry: *auto-retry \ No newline at end of file diff --git a/ci/workflows/main-cron.yml b/ci/workflows/main-cron.yml index e7d3e03cc233a..44a0efeba929a 100644 --- a/ci/workflows/main-cron.yml +++ b/ci/workflows/main-cron.yml @@ -172,7 +172,7 @@ steps: retry: *auto-retry - label: "recovery test (deterministic simulation)" - command: "TEST_NUM=16 KILL_RATE=1.0 timeout 55m ci/scripts/deterministic-recovery-test.sh" + command: "TEST_NUM=12 KILL_RATE=1.0 timeout 55m ci/scripts/deterministic-recovery-test.sh" depends_on: "build-simulation" plugins: - gencer/cache#v2.4.10: *cargo-cache diff --git a/ci/workflows/main.yml b/ci/workflows/main.yml index 8dc3a9099534a..f2eae9d16375a 100644 --- a/ci/workflows/main.yml +++ b/ci/workflows/main.yml @@ -41,6 +41,8 @@ steps: run: rw-build-env config: ci/docker-compose.yml mount-buildkite-agent: true + env: + - BUILDKITE_COMMIT timeout_in_minutes: 20 retry: *auto-retry @@ -251,7 +253,7 @@ steps: retry: *auto-retry - label: "recovery test (deterministic simulation)" - command: "TEST_NUM=32 KILL_RATE=0.5 timeout 25m ci/scripts/deterministic-recovery-test.sh" + command: "TEST_NUM=16 KILL_RATE=0.5 timeout 25m ci/scripts/deterministic-recovery-test.sh" depends_on: "build-simulation" plugins: - gencer/cache#v2.4.10: *cargo-cache @@ -284,6 +286,21 @@ steps: timeout_in_minutes: 5 retry: *auto-retry + - label: "end-to-end iceberg sink test (release mode)" + command: "ci/scripts/e2e-iceberg-sink-test.sh -p ci-release" + depends_on: + - "build-release" + - "build-other" + plugins: + - gencer/cache#v2.4.10: *cargo-cache + - docker-compose#v4.9.0: + run: rw-build-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 5 + retry: *auto-retry + - label: "release" command: "ci/scripts/release.sh" if: build.tag != null @@ -299,6 +316,7 @@ steps: environment: - GITHUB_TOKEN - BUILDKITE_TAG + - BUILDKITE_SOURCE timeout_in_minutes: 60 retry: *auto-retry @@ -314,6 +332,7 @@ steps: GHCR_USERNAME: ghcr-username GHCR_TOKEN: ghcr-token DOCKER_TOKEN: docker-token + GITHUB_TOKEN: github-token timeout_in_minutes: 60 retry: *auto-retry @@ -329,6 +348,7 @@ steps: GHCR_USERNAME: ghcr-username GHCR_TOKEN: ghcr-token DOCKER_TOKEN: docker-token + GITHUB_TOKEN: github-token timeout_in_minutes: 60 retry: *auto-retry agents: diff --git a/ci/workflows/pull-request.yml b/ci/workflows/pull-request.yml index 9408df49f09b5..e6b36f3f68e78 100644 --- a/ci/workflows/pull-request.yml +++ b/ci/workflows/pull-request.yml @@ -156,6 +156,21 @@ steps: timeout_in_minutes: 5 retry: *auto-retry + - label: "end-to-end iceberg sink test" + command: "ci/scripts/e2e-iceberg-sink-test.sh -p ci-dev" + depends_on: + - "build" + - "build-other" + plugins: + - gencer/cache#v2.4.10: *cargo-cache + - docker-compose#v4.9.0: + run: rw-build-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 5 + retry: *auto-retry + - label: "regress test" command: "ci/scripts/regress-test.sh -p ci-dev" depends_on: "build" @@ -247,7 +262,7 @@ steps: retry: *auto-retry - label: "recovery test (deterministic simulation)" - command: "TEST_NUM=16 KILL_RATE=0.5 timeout 14m ci/scripts/deterministic-recovery-test.sh" + command: "TEST_NUM=8 KILL_RATE=0.5 timeout 14m ci/scripts/deterministic-recovery-test.sh" depends_on: "build-simulation" plugins: - gencer/cache#v2.4.10: *cargo-cache @@ -275,4 +290,4 @@ steps: - shellcheck#v1.2.0: files: ./**/*.sh timeout_in_minutes: 5 - retry: *auto-retry \ No newline at end of file + retry: *auto-retry diff --git a/clippy.toml b/clippy.toml index b2dd111cd85f7..f5472c7dae0d3 100644 --- a/clippy.toml +++ b/clippy.toml @@ -1,6 +1,8 @@ disallowed-methods = [ - { path = "std::iter::Iterator::zip", reason = "Please use Itertools::zip_eq instead." }, + { path = "std::iter::Iterator::zip", reason = "Please use `zip_eq_fast` if it's available. Otherwise use `zip_eq_debug`" }, + { path = "itertools::Itertools::zip_eq", reason = "Please use `zip_eq_fast` if it's available. Otherwise use `zip_eq_debug`" }, { path = "futures::stream::select_all", reason = "Please use `risingwave_common::util::select_all` instead." }, + { path = "risingwave_common::array::JsonbVal::from_serde", reason = "Please add dedicated methods as part of `JsonbRef`/`JsonbVal`, rather than take inner `serde_json::Value` out, process, and put back." }, ] doc-valid-idents = [ "RisingWave", diff --git a/dashboard/components/FragmentGraph.tsx b/dashboard/components/FragmentGraph.tsx index 963772fecef23..bb58b7e95b58e 100644 --- a/dashboard/components/FragmentGraph.tsx +++ b/dashboard/components/FragmentGraph.tsx @@ -20,7 +20,7 @@ import { generateBoxLinks, layout, } from "../lib/layout" -import { StreamNode } from "../proto/gen/stream_plan" +import { PlanNodeDatum } from "../pages/streaming_plan" const ReactJson = loadable(() => import("react-json-view")) @@ -87,24 +87,19 @@ export default function FragmentGraph({ fragmentDependency, selectedFragmentId, }: { - planNodeDependencies: Map> + planNodeDependencies: Map> fragmentDependency: ActorBox[] selectedFragmentId: string | undefined }) { const svgRef = useRef() const { isOpen, onOpen, onClose } = useDisclosure() - const [currentStreamNode, setCurrentStreamNode] = useState() + const [currentStreamNode, setCurrentStreamNode] = useState() const openPlanNodeDetail = useCallback( - () => (node: d3.HierarchyNode) => { - const streamNode = cloneDeep(node.data.node as StreamNode) - - if (streamNode) { - streamNode.input = [] - setCurrentStreamNode(streamNode) - onOpen() - } + () => (node: d3.HierarchyNode) => { + setCurrentStreamNode(node.data) + onOpen() }, [onOpen] )() @@ -115,7 +110,7 @@ export default function FragmentGraph({ const layoutActorResult = new Map< string, { - layoutRoot: d3.HierarchyPointNode + layoutRoot: d3.HierarchyPointNode width: number height: number extraInfo: string @@ -138,7 +133,7 @@ export default function FragmentGraph({ layoutRoot, width, height, - extraInfo: fragmentRoot.data.extraInfo, + extraInfo: fragmentRoot.data.extraInfo ?? "", }) } const fragmentLayout = layout( @@ -169,7 +164,7 @@ export default function FragmentGraph({ }, [planNodeDependencies, fragmentDependency]) type PlanNodeDesc = { - layoutRoot: d3.HierarchyPointNode + layoutRoot: d3.HierarchyPointNode width: number height: number x: number @@ -400,14 +395,16 @@ export default function FragmentGraph({ - {currentStreamNode?.operatorId} -{" "} - {currentStreamNode?.nodeBody?.$case} + {currentStreamNode?.operatorId} - {currentStreamNode?.name} - {isOpen && currentStreamNode && ( + {isOpen && currentStreamNode?.node && ( + name === "input" || name === "fields" || name === "streamKey" + } // collapse top-level fields for better readability + src={currentStreamNode.node} collapsed={3} name={null} displayDataTypes={false} diff --git a/dashboard/components/Layout.tsx b/dashboard/components/Layout.tsx index 8511e06454003..47b33f3aa0a4d 100644 --- a/dashboard/components/Layout.tsx +++ b/dashboard/components/Layout.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/components/NoData.tsx b/dashboard/components/NoData.tsx index e9b27effcb55a..c3b4014c4363a 100644 --- a/dashboard/components/NoData.tsx +++ b/dashboard/components/NoData.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/components/Relations.tsx b/dashboard/components/Relations.tsx index 0bcfa5d0ad3ce..0f3dc54b3c9c0 100644 --- a/dashboard/components/Relations.tsx +++ b/dashboard/components/Relations.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/components/SpinnerOverlay.tsx b/dashboard/components/SpinnerOverlay.tsx index d7ddee1ef5ef0..a9a43574a335d 100644 --- a/dashboard/components/SpinnerOverlay.tsx +++ b/dashboard/components/SpinnerOverlay.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/components/StatusLamp.js b/dashboard/components/StatusLamp.js index ecdcce64ae767..719a990ac2742 100644 --- a/dashboard/components/StatusLamp.js +++ b/dashboard/components/StatusLamp.js @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/components/StreamGraph.tsx b/dashboard/components/StreamGraph.tsx index 5bca067365cbc..3d35fd3577536 100644 --- a/dashboard/components/StreamGraph.tsx +++ b/dashboard/components/StreamGraph.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/components/Title.tsx b/dashboard/components/Title.tsx index 2afe03327e300..2f683b738d258 100644 --- a/dashboard/components/Title.tsx +++ b/dashboard/components/Title.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/components/utils/icons.tsx b/dashboard/components/utils/icons.tsx index de545b8662303..536d4b188e073 100644 --- a/dashboard/components/utils/icons.tsx +++ b/dashboard/components/utils/icons.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/hook/useWindowSize.js b/dashboard/hook/useWindowSize.js index ff57a0bf6030d..5c9d580bbb1f5 100644 --- a/dashboard/hook/useWindowSize.js +++ b/dashboard/hook/useWindowSize.js @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/lib/algo.ts b/dashboard/lib/algo.ts index 8af59281a0642..0ed620d3f4135 100644 --- a/dashboard/lib/algo.ts +++ b/dashboard/lib/algo.ts @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/lib/color.js b/dashboard/lib/color.js index f30a146360950..b8f366319c8aa 100644 --- a/dashboard/lib/color.js +++ b/dashboard/lib/color.js @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/lib/extractInfo.ts b/dashboard/lib/extractInfo.ts index b2d0f23511ddb..fc6eccf669eed 100644 --- a/dashboard/lib/extractInfo.ts +++ b/dashboard/lib/extractInfo.ts @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/lib/graaphEngine/canvasEngine.js b/dashboard/lib/graaphEngine/canvasEngine.js index ce47b3a6773c2..ad53fa7b48074 100644 --- a/dashboard/lib/graaphEngine/canvasEngine.js +++ b/dashboard/lib/graaphEngine/canvasEngine.js @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/lib/graaphEngine/svgEngine.js b/dashboard/lib/graaphEngine/svgEngine.js index cc9446475de3e..3d21d70f0f8d2 100644 --- a/dashboard/lib/graaphEngine/svgEngine.js +++ b/dashboard/lib/graaphEngine/svgEngine.js @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/lib/layout.ts b/dashboard/lib/layout.ts index fff46b7cb7386..a4ff1c878be73 100644 --- a/dashboard/lib/layout.ts +++ b/dashboard/lib/layout.ts @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/lib/str.js b/dashboard/lib/str.js index a29607d814f5a..42418c98a6290 100644 --- a/dashboard/lib/str.js +++ b/dashboard/lib/str.js @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/lib/streamPlan/parser.js b/dashboard/lib/streamPlan/parser.js index f2ea410d1e3b1..a2d8e24a58df6 100644 --- a/dashboard/lib/streamPlan/parser.js +++ b/dashboard/lib/streamPlan/parser.js @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/lib/streamPlan/streamChartHelper.js b/dashboard/lib/streamPlan/streamChartHelper.js index 1e29c7fef7925..e709eb573939b 100644 --- a/dashboard/lib/streamPlan/streamChartHelper.js +++ b/dashboard/lib/streamPlan/streamChartHelper.js @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/lib/util.js b/dashboard/lib/util.js index dfb95e7c06822..43cb0af966921 100644 --- a/dashboard/lib/util.js +++ b/dashboard/lib/util.js @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/mock-server.js b/dashboard/mock-server.js index 47ef71468252c..1f8d97ee3769b 100644 --- a/dashboard/mock-server.js +++ b/dashboard/mock-server.js @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/next.config.js b/dashboard/next.config.js index 5049c531453df..b9b76901086ee 100644 --- a/dashboard/next.config.js +++ b/dashboard/next.config.js @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/package-lock.json b/dashboard/package-lock.json index a895871384a4d..0cfd223220323 100644 --- a/dashboard/package-lock.json +++ b/dashboard/package-lock.json @@ -28,7 +28,7 @@ "react-flow-renderer": "10.3.16", "react-json-view": "^1.21.3", "react-syntax-highlighter": "^15.5.0", - "recharts": "^2.1.16", + "recharts": "^2.3.2", "styled-components": "5.3.0", "ts-proto": "^1.131.0" }, @@ -8638,24 +8638,19 @@ } }, "node_modules/recharts": { - "version": "2.1.16", - "resolved": "https://registry.npmjs.org/recharts/-/recharts-2.1.16.tgz", - "integrity": "sha512-aYn1plTjYzRCo3UGxtWsduslwYd+Cuww3h/YAAEoRdGe0LRnBgYgaXSlVrNFkWOOSXrBavpmnli9h7pvRuk5wg==", + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/recharts/-/recharts-2.3.2.tgz", + "integrity": "sha512-2II30fGzKaypHfHNQNUhCfiLMxrOS/gF0WFahDIEFgXtJkVEe2DpZWFfEfAn+RU3B7/h2V/B05Bwmqq3rTXwLw==", "dependencies": { - "@types/d3-interpolate": "^2.0.0", - "@types/d3-scale": "^3.0.0", - "@types/d3-shape": "^2.0.0", "classnames": "^2.2.5", - "d3-interpolate": "^2.0.0", - "d3-scale": "^3.0.0", - "d3-shape": "^2.0.0", "eventemitter3": "^4.0.1", "lodash": "^4.17.19", "react-is": "^16.10.2", "react-resize-detector": "^7.1.2", "react-smooth": "^2.0.1", "recharts-scale": "^0.4.4", - "reduce-css-calc": "^2.1.8" + "reduce-css-calc": "^2.1.8", + "victory-vendor": "^36.6.8" }, "engines": { "node": ">=12" @@ -8674,117 +8669,6 @@ "decimal.js-light": "^2.4.1" } }, - "node_modules/recharts/node_modules/@types/d3-color": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/@types/d3-color/-/d3-color-2.0.3.tgz", - "integrity": "sha512-+0EtEjBfKEDtH9Rk3u3kLOUXM5F+iZK+WvASPb0MhIZl8J8NUvGeZRwKCXl+P3HkYx5TdU4YtcibpqHkSR9n7w==" - }, - "node_modules/recharts/node_modules/@types/d3-interpolate": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/@types/d3-interpolate/-/d3-interpolate-2.0.2.tgz", - "integrity": "sha512-lElyqlUfIPyWG/cD475vl6msPL4aMU7eJvx1//Q177L8mdXoVPFl1djIESF2FKnc0NyaHvQlJpWwKJYwAhUoCw==", - "dependencies": { - "@types/d3-color": "^2" - } - }, - "node_modules/recharts/node_modules/@types/d3-path": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/@types/d3-path/-/d3-path-2.0.2.tgz", - "integrity": "sha512-3YHpvDw9LzONaJzejXLOwZ3LqwwkoXb9LI2YN7Hbd6pkGo5nIlJ09ul4bQhBN4hQZJKmUpX8HkVqbzgUKY48cg==" - }, - "node_modules/recharts/node_modules/@types/d3-scale": { - "version": "3.3.2", - "resolved": "https://registry.npmjs.org/@types/d3-scale/-/d3-scale-3.3.2.tgz", - "integrity": "sha512-gGqr7x1ost9px3FvIfUMi5XA/F/yAf4UkUDtdQhpH92XCT0Oa7zkkRzY61gPVJq+DxpHn/btouw5ohWkbBsCzQ==", - "dependencies": { - "@types/d3-time": "^2" - } - }, - "node_modules/recharts/node_modules/@types/d3-shape": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/@types/d3-shape/-/d3-shape-2.1.3.tgz", - "integrity": "sha512-HAhCel3wP93kh4/rq+7atLdybcESZ5bRHDEZUojClyZWsRuEMo3A52NGYJSh48SxfxEU6RZIVbZL2YFZ2OAlzQ==", - "dependencies": { - "@types/d3-path": "^2" - } - }, - "node_modules/recharts/node_modules/@types/d3-time": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/@types/d3-time/-/d3-time-2.1.1.tgz", - "integrity": "sha512-9MVYlmIgmRR31C5b4FVSWtuMmBHh2mOWQYfl7XAYOa8dsnb7iEmUmRSWSFgXFtkjxO65d7hTUHQC+RhR/9IWFg==" - }, - "node_modules/recharts/node_modules/d3-array": { - "version": "2.12.1", - "resolved": "https://registry.npmjs.org/d3-array/-/d3-array-2.12.1.tgz", - "integrity": "sha512-B0ErZK/66mHtEsR1TkPEEkwdy+WDesimkM5gpZr5Dsg54BiTA5RXtYW5qTLIAcekaS9xfZrzBLF/OAkB3Qn1YQ==", - "dependencies": { - "internmap": "^1.0.0" - } - }, - "node_modules/recharts/node_modules/d3-color": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/d3-color/-/d3-color-2.0.0.tgz", - "integrity": "sha512-SPXi0TSKPD4g9tw0NMZFnR95XVgUZiBH+uUTqQuDu1OsE2zomHU7ho0FISciaPvosimixwHFl3WHLGabv6dDgQ==" - }, - "node_modules/recharts/node_modules/d3-format": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/d3-format/-/d3-format-2.0.0.tgz", - "integrity": "sha512-Ab3S6XuE/Q+flY96HXT0jOXcM4EAClYFnRGY5zsjRGNy6qCYrQsMffs7cV5Q9xejb35zxW5hf/guKw34kvIKsA==" - }, - "node_modules/recharts/node_modules/d3-interpolate": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-2.0.1.tgz", - "integrity": "sha512-c5UhwwTs/yybcmTpAVqwSFl6vrQ8JZJoT5F7xNFK9pymv5C0Ymcc9/LIJHtYIggg/yS9YHw8i8O8tgb9pupjeQ==", - "dependencies": { - "d3-color": "1 - 2" - } - }, - "node_modules/recharts/node_modules/d3-path": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/d3-path/-/d3-path-2.0.0.tgz", - "integrity": "sha512-ZwZQxKhBnv9yHaiWd6ZU4x5BtCQ7pXszEV9CU6kRgwIQVQGLMv1oiL4M+MK/n79sYzsj+gcgpPQSctJUsLN7fA==" - }, - "node_modules/recharts/node_modules/d3-scale": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-3.3.0.tgz", - "integrity": "sha512-1JGp44NQCt5d1g+Yy+GeOnZP7xHo0ii8zsQp6PGzd+C1/dl0KGsp9A7Mxwp+1D1o4unbTTxVdU/ZOIEBoeZPbQ==", - "dependencies": { - "d3-array": "^2.3.0", - "d3-format": "1 - 2", - "d3-interpolate": "1.2.0 - 2", - "d3-time": "^2.1.1", - "d3-time-format": "2 - 3" - } - }, - "node_modules/recharts/node_modules/d3-shape": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/d3-shape/-/d3-shape-2.1.0.tgz", - "integrity": "sha512-PnjUqfM2PpskbSLTJvAzp2Wv4CZsnAgTfcVRTwW03QR3MkXF8Uo7B1y/lWkAsmbKwuecto++4NlsYcvYpXpTHA==", - "dependencies": { - "d3-path": "1 - 2" - } - }, - "node_modules/recharts/node_modules/d3-time": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/d3-time/-/d3-time-2.1.1.tgz", - "integrity": "sha512-/eIQe/eR4kCQwq7yxi7z4c6qEXf2IYGcjoWB5OOQy4Tq9Uv39/947qlDcN2TLkiTzQWzvnsuYPB9TrWaNfipKQ==", - "dependencies": { - "d3-array": "2" - } - }, - "node_modules/recharts/node_modules/d3-time-format": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/d3-time-format/-/d3-time-format-3.0.0.tgz", - "integrity": "sha512-UXJh6EKsHBTjopVqZBhFysQcoXSv/5yLONZvkQ5Kk3qbwiUYkdX17Xa1PT6U1ZWXGGfB1ey5L8dKMlFq2DO0Ag==", - "dependencies": { - "d3-time": "1 - 2" - } - }, - "node_modules/recharts/node_modules/internmap": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/internmap/-/internmap-1.0.1.tgz", - "integrity": "sha512-lDB5YccMydFBtasVtxnZ3MRBHuaoE8GKsppq+EchKL2U4nK/DmEpPHNH8MZe5HkMtpSiTSOZwfN0tzYjO/lJEw==" - }, "node_modules/recharts/node_modules/react-is": { "version": "16.13.1", "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", @@ -9683,9 +9567,9 @@ } }, "node_modules/ua-parser-js": { - "version": "0.7.31", - "resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-0.7.31.tgz", - "integrity": "sha512-qLK/Xe9E2uzmYI3qLeOmI0tEOt+TBBQyUIAh4aAgU05FVYzeZrKUdkAZfBNVGRaHVgV0TDkdEngJSw/SyQchkQ==", + "version": "0.7.33", + "resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-0.7.33.tgz", + "integrity": "sha512-s8ax/CeZdK9R/56Sui0WM6y9OFREJarMRHqLB2EwkovemBxNQ+Bqu8GAsUnVcXKgphb++ghr/B2BZx4mahujPw==", "funding": [ { "type": "opencollective", @@ -9888,6 +9772,27 @@ "node": ">= 0.8" } }, + "node_modules/victory-vendor": { + "version": "36.6.8", + "resolved": "https://registry.npmjs.org/victory-vendor/-/victory-vendor-36.6.8.tgz", + "integrity": "sha512-H3kyQ+2zgjMPvbPqAl7Vwm2FD5dU7/4bCTQakFQnpIsfDljeOMDojRsrmJfwh4oAlNnWhpAf+mbAoLh8u7dwyQ==", + "dependencies": { + "@types/d3-array": "^3.0.3", + "@types/d3-ease": "^3.0.0", + "@types/d3-interpolate": "^3.0.1", + "@types/d3-scale": "^4.0.2", + "@types/d3-shape": "^3.1.0", + "@types/d3-time": "^3.0.0", + "@types/d3-timer": "^3.0.0", + "d3-array": "^3.1.6", + "d3-ease": "^3.0.1", + "d3-interpolate": "^3.0.1", + "d3-scale": "^4.0.2", + "d3-shape": "^3.1.0", + "d3-time": "^3.0.0", + "d3-timer": "^3.0.1" + } + }, "node_modules/w3c-hr-time": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/w3c-hr-time/-/w3c-hr-time-1.0.2.tgz", @@ -16521,137 +16426,21 @@ } }, "recharts": { - "version": "2.1.16", - "resolved": "https://registry.npmjs.org/recharts/-/recharts-2.1.16.tgz", - "integrity": "sha512-aYn1plTjYzRCo3UGxtWsduslwYd+Cuww3h/YAAEoRdGe0LRnBgYgaXSlVrNFkWOOSXrBavpmnli9h7pvRuk5wg==", + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/recharts/-/recharts-2.3.2.tgz", + "integrity": "sha512-2II30fGzKaypHfHNQNUhCfiLMxrOS/gF0WFahDIEFgXtJkVEe2DpZWFfEfAn+RU3B7/h2V/B05Bwmqq3rTXwLw==", "requires": { - "@types/d3-interpolate": "^2.0.0", - "@types/d3-scale": "^3.0.0", - "@types/d3-shape": "^2.0.0", "classnames": "^2.2.5", - "d3-interpolate": "^2.0.0", - "d3-scale": "^3.0.0", - "d3-shape": "^2.0.0", "eventemitter3": "^4.0.1", "lodash": "^4.17.19", "react-is": "^16.10.2", "react-resize-detector": "^7.1.2", "react-smooth": "^2.0.1", "recharts-scale": "^0.4.4", - "reduce-css-calc": "^2.1.8" + "reduce-css-calc": "^2.1.8", + "victory-vendor": "^36.6.8" }, "dependencies": { - "@types/d3-color": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/@types/d3-color/-/d3-color-2.0.3.tgz", - "integrity": "sha512-+0EtEjBfKEDtH9Rk3u3kLOUXM5F+iZK+WvASPb0MhIZl8J8NUvGeZRwKCXl+P3HkYx5TdU4YtcibpqHkSR9n7w==" - }, - "@types/d3-interpolate": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/@types/d3-interpolate/-/d3-interpolate-2.0.2.tgz", - "integrity": "sha512-lElyqlUfIPyWG/cD475vl6msPL4aMU7eJvx1//Q177L8mdXoVPFl1djIESF2FKnc0NyaHvQlJpWwKJYwAhUoCw==", - "requires": { - "@types/d3-color": "^2" - } - }, - "@types/d3-path": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/@types/d3-path/-/d3-path-2.0.2.tgz", - "integrity": "sha512-3YHpvDw9LzONaJzejXLOwZ3LqwwkoXb9LI2YN7Hbd6pkGo5nIlJ09ul4bQhBN4hQZJKmUpX8HkVqbzgUKY48cg==" - }, - "@types/d3-scale": { - "version": "3.3.2", - "resolved": "https://registry.npmjs.org/@types/d3-scale/-/d3-scale-3.3.2.tgz", - "integrity": "sha512-gGqr7x1ost9px3FvIfUMi5XA/F/yAf4UkUDtdQhpH92XCT0Oa7zkkRzY61gPVJq+DxpHn/btouw5ohWkbBsCzQ==", - "requires": { - "@types/d3-time": "^2" - } - }, - "@types/d3-shape": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/@types/d3-shape/-/d3-shape-2.1.3.tgz", - "integrity": "sha512-HAhCel3wP93kh4/rq+7atLdybcESZ5bRHDEZUojClyZWsRuEMo3A52NGYJSh48SxfxEU6RZIVbZL2YFZ2OAlzQ==", - "requires": { - "@types/d3-path": "^2" - } - }, - "@types/d3-time": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/@types/d3-time/-/d3-time-2.1.1.tgz", - "integrity": "sha512-9MVYlmIgmRR31C5b4FVSWtuMmBHh2mOWQYfl7XAYOa8dsnb7iEmUmRSWSFgXFtkjxO65d7hTUHQC+RhR/9IWFg==" - }, - "d3-array": { - "version": "2.12.1", - "resolved": "https://registry.npmjs.org/d3-array/-/d3-array-2.12.1.tgz", - "integrity": "sha512-B0ErZK/66mHtEsR1TkPEEkwdy+WDesimkM5gpZr5Dsg54BiTA5RXtYW5qTLIAcekaS9xfZrzBLF/OAkB3Qn1YQ==", - "requires": { - "internmap": "^1.0.0" - } - }, - "d3-color": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/d3-color/-/d3-color-2.0.0.tgz", - "integrity": "sha512-SPXi0TSKPD4g9tw0NMZFnR95XVgUZiBH+uUTqQuDu1OsE2zomHU7ho0FISciaPvosimixwHFl3WHLGabv6dDgQ==" - }, - "d3-format": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/d3-format/-/d3-format-2.0.0.tgz", - "integrity": "sha512-Ab3S6XuE/Q+flY96HXT0jOXcM4EAClYFnRGY5zsjRGNy6qCYrQsMffs7cV5Q9xejb35zxW5hf/guKw34kvIKsA==" - }, - "d3-interpolate": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-2.0.1.tgz", - "integrity": "sha512-c5UhwwTs/yybcmTpAVqwSFl6vrQ8JZJoT5F7xNFK9pymv5C0Ymcc9/LIJHtYIggg/yS9YHw8i8O8tgb9pupjeQ==", - "requires": { - "d3-color": "1 - 2" - } - }, - "d3-path": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/d3-path/-/d3-path-2.0.0.tgz", - "integrity": "sha512-ZwZQxKhBnv9yHaiWd6ZU4x5BtCQ7pXszEV9CU6kRgwIQVQGLMv1oiL4M+MK/n79sYzsj+gcgpPQSctJUsLN7fA==" - }, - "d3-scale": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-3.3.0.tgz", - "integrity": "sha512-1JGp44NQCt5d1g+Yy+GeOnZP7xHo0ii8zsQp6PGzd+C1/dl0KGsp9A7Mxwp+1D1o4unbTTxVdU/ZOIEBoeZPbQ==", - "requires": { - "d3-array": "^2.3.0", - "d3-format": "1 - 2", - "d3-interpolate": "1.2.0 - 2", - "d3-time": "^2.1.1", - "d3-time-format": "2 - 3" - } - }, - "d3-shape": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/d3-shape/-/d3-shape-2.1.0.tgz", - "integrity": "sha512-PnjUqfM2PpskbSLTJvAzp2Wv4CZsnAgTfcVRTwW03QR3MkXF8Uo7B1y/lWkAsmbKwuecto++4NlsYcvYpXpTHA==", - "requires": { - "d3-path": "1 - 2" - } - }, - "d3-time": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/d3-time/-/d3-time-2.1.1.tgz", - "integrity": "sha512-/eIQe/eR4kCQwq7yxi7z4c6qEXf2IYGcjoWB5OOQy4Tq9Uv39/947qlDcN2TLkiTzQWzvnsuYPB9TrWaNfipKQ==", - "requires": { - "d3-array": "2" - } - }, - "d3-time-format": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/d3-time-format/-/d3-time-format-3.0.0.tgz", - "integrity": "sha512-UXJh6EKsHBTjopVqZBhFysQcoXSv/5yLONZvkQ5Kk3qbwiUYkdX17Xa1PT6U1ZWXGGfB1ey5L8dKMlFq2DO0Ag==", - "requires": { - "d3-time": "1 - 2" - } - }, - "internmap": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/internmap/-/internmap-1.0.1.tgz", - "integrity": "sha512-lDB5YccMydFBtasVtxnZ3MRBHuaoE8GKsppq+EchKL2U4nK/DmEpPHNH8MZe5HkMtpSiTSOZwfN0tzYjO/lJEw==" - }, "react-is": { "version": "16.13.1", "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", @@ -17342,9 +17131,9 @@ "dev": true }, "ua-parser-js": { - "version": "0.7.31", - "resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-0.7.31.tgz", - "integrity": "sha512-qLK/Xe9E2uzmYI3qLeOmI0tEOt+TBBQyUIAh4aAgU05FVYzeZrKUdkAZfBNVGRaHVgV0TDkdEngJSw/SyQchkQ==" + "version": "0.7.33", + "resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-0.7.33.tgz", + "integrity": "sha512-s8ax/CeZdK9R/56Sui0WM6y9OFREJarMRHqLB2EwkovemBxNQ+Bqu8GAsUnVcXKgphb++ghr/B2BZx4mahujPw==" }, "unbox-primitive": { "version": "1.0.2", @@ -17460,6 +17249,27 @@ "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==", "dev": true }, + "victory-vendor": { + "version": "36.6.8", + "resolved": "https://registry.npmjs.org/victory-vendor/-/victory-vendor-36.6.8.tgz", + "integrity": "sha512-H3kyQ+2zgjMPvbPqAl7Vwm2FD5dU7/4bCTQakFQnpIsfDljeOMDojRsrmJfwh4oAlNnWhpAf+mbAoLh8u7dwyQ==", + "requires": { + "@types/d3-array": "^3.0.3", + "@types/d3-ease": "^3.0.0", + "@types/d3-interpolate": "^3.0.1", + "@types/d3-scale": "^4.0.2", + "@types/d3-shape": "^3.1.0", + "@types/d3-time": "^3.0.0", + "@types/d3-timer": "^3.0.0", + "d3-array": "^3.1.6", + "d3-ease": "^3.0.1", + "d3-interpolate": "^3.0.1", + "d3-scale": "^4.0.2", + "d3-shape": "^3.1.0", + "d3-time": "^3.0.0", + "d3-timer": "^3.0.1" + } + }, "w3c-hr-time": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/w3c-hr-time/-/w3c-hr-time-1.0.2.tgz", diff --git a/dashboard/package.json b/dashboard/package.json index ed81cc551ad93..da787d102027e 100644 --- a/dashboard/package.json +++ b/dashboard/package.json @@ -34,7 +34,7 @@ "react-flow-renderer": "10.3.16", "react-json-view": "^1.21.3", "react-syntax-highlighter": "^15.5.0", - "recharts": "^2.1.16", + "recharts": "^2.3.2", "styled-components": "5.3.0", "ts-proto": "^1.131.0" }, diff --git a/dashboard/pages/_app.tsx b/dashboard/pages/_app.tsx index 4f9140c101f0c..cc696c8a29ad5 100644 --- a/dashboard/pages/_app.tsx +++ b/dashboard/pages/_app.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/pages/api/api.ts b/dashboard/pages/api/api.ts index 28d40aaec0205..c213243cc8bee 100644 --- a/dashboard/pages/api/api.ts +++ b/dashboard/pages/api/api.ts @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/pages/api/cluster.ts b/dashboard/pages/api/cluster.ts index 6f7777c6b61fe..4f78d8814be2a 100644 --- a/dashboard/pages/api/cluster.ts +++ b/dashboard/pages/api/cluster.ts @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/pages/api/streaming.ts b/dashboard/pages/api/streaming.ts index cf4ddd9bf86e6..879c02a5ccfcd 100644 --- a/dashboard/pages/api/streaming.ts +++ b/dashboard/pages/api/streaming.ts @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/pages/batch_tasks.tsx b/dashboard/pages/batch_tasks.tsx index b167f907b0ca4..a83900ce9f1c5 100644 --- a/dashboard/pages/batch_tasks.tsx +++ b/dashboard/pages/batch_tasks.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/pages/cluster.tsx b/dashboard/pages/cluster.tsx index 7e6ba21a457a6..d58210534b77a 100644 --- a/dashboard/pages/cluster.tsx +++ b/dashboard/pages/cluster.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/pages/data_sources.tsx b/dashboard/pages/data_sources.tsx index c5bf986532e2b..568d2da9dcc75 100644 --- a/dashboard/pages/data_sources.tsx +++ b/dashboard/pages/data_sources.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/pages/explain_distsql.tsx b/dashboard/pages/explain_distsql.tsx index 93cd07ade3a7f..b9fc39058f9e9 100644 --- a/dashboard/pages/explain_distsql.tsx +++ b/dashboard/pages/explain_distsql.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/pages/index.tsx b/dashboard/pages/index.tsx index 56b4e5d67715f..1aa08fc1368d2 100644 --- a/dashboard/pages/index.tsx +++ b/dashboard/pages/index.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/pages/indexes.tsx b/dashboard/pages/indexes.tsx index fbf2a3361ba43..24ef116178de6 100644 --- a/dashboard/pages/indexes.tsx +++ b/dashboard/pages/indexes.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/pages/internal_tables.tsx b/dashboard/pages/internal_tables.tsx index b66ae7232269d..e46da250c69ec 100644 --- a/dashboard/pages/internal_tables.tsx +++ b/dashboard/pages/internal_tables.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/pages/materialized_views.tsx b/dashboard/pages/materialized_views.tsx index 8cc330979fa34..3f2f8ccf3e0f7 100644 --- a/dashboard/pages/materialized_views.tsx +++ b/dashboard/pages/materialized_views.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/pages/node.tsx b/dashboard/pages/node.tsx index c16b211f785f4..b5de6cc8857f6 100644 --- a/dashboard/pages/node.tsx +++ b/dashboard/pages/node.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/pages/settings.tsx b/dashboard/pages/settings.tsx index 7d3f44ea9d981..7f9d217a5ba8e 100644 --- a/dashboard/pages/settings.tsx +++ b/dashboard/pages/settings.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/pages/sinks.tsx b/dashboard/pages/sinks.tsx index db59670afddcb..60e164bd5f471 100644 --- a/dashboard/pages/sinks.tsx +++ b/dashboard/pages/sinks.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/pages/streaming_graph.tsx b/dashboard/pages/streaming_graph.tsx index 5d8fb7b65ea7d..d11d24d96710c 100644 --- a/dashboard/pages/streaming_graph.tsx +++ b/dashboard/pages/streaming_graph.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/pages/streaming_plan.tsx b/dashboard/pages/streaming_plan.tsx index 7464e3be665a2..5825c9548ab1f 100644 --- a/dashboard/pages/streaming_plan.tsx +++ b/dashboard/pages/streaming_plan.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,15 +39,28 @@ import FragmentGraph from "../components/FragmentGraph" import Title from "../components/Title" import { ActorBox } from "../lib/layout" import { TableFragments, TableFragments_Fragment } from "../proto/gen/meta" -import { StreamNode } from "../proto/gen/stream_plan" +import { Dispatcher, StreamNode } from "../proto/gen/stream_plan" import { getFragments, getStreamingJobs } from "./api/streaming" +interface DispatcherNode { + [actorId: number]: Dispatcher[] +} + +/** Associated data of each plan node in the fragment graph, including the dispatchers. */ +export interface PlanNodeDatum { + name: string + children?: PlanNodeDatum[] + operatorId: string | number + node: StreamNode | DispatcherNode + extraInfo?: string +} + function buildPlanNodeDependency( fragment: TableFragments_Fragment -): d3.HierarchyNode { - const actor = fragment.actors[0] +): d3.HierarchyNode { + const firstActor = fragment.actors[0] - const hierarchyActorNode = (node: StreamNode): any => { + const hierarchyActorNode = (node: StreamNode): PlanNodeDatum => { return { name: node.nodeBody?.$case.toString() || "unknown", children: (node.input || []).map(hierarchyActorNode), @@ -56,13 +69,24 @@ function buildPlanNodeDependency( } } + let dispatcherName = "noDispatcher" + if (firstActor.dispatcher.length > 1) { + dispatcherName = "multipleDispatchers" + } else if (firstActor.dispatcher.length === 1) { + dispatcherName = `${toLower(firstActor.dispatcher[0].type)}Dispatcher` + } + + const dispatcherNode = fragment.actors.reduce((obj, actor) => { + obj[actor.actorId] = actor.dispatcher + return obj + }, {} as DispatcherNode) + return d3.hierarchy({ - name: - actor.dispatcher.map((d) => `${toLower(d.type)}Dispatcher`).join(",") || - "noDispatcher", + name: dispatcherName, extraInfo: `Actor ${fragment.actors.map((a) => a.actorId).join(", ")}`, - children: actor.nodes ? [hierarchyActorNode(actor.nodes)] : [], + children: firstActor.nodes ? [hierarchyActorNode(firstActor.nodes)] : [], operatorId: "dispatcher", + node: dispatcherNode, }) } @@ -170,7 +194,10 @@ export default function Streaming() { const planNodeDependenciesCallback = useCallback(() => { const fragments_ = fragments?.fragments if (fragments_) { - const planNodeDependencies = new Map>() + const planNodeDependencies = new Map< + string, + d3.HierarchyNode + >() for (const fragmentId in fragments_) { const fragment = fragments_[fragmentId] const dep = buildPlanNodeDependency(fragment) diff --git a/dashboard/pages/tables.tsx b/dashboard/pages/tables.tsx index d74228c5c092a..17e3373f8d808 100644 --- a/dashboard/pages/tables.tsx +++ b/dashboard/pages/tables.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/dashboard/proto/gen/batch_plan.ts b/dashboard/proto/gen/batch_plan.ts index ef4c80e0d9379..a9935d332f9e7 100644 --- a/dashboard/proto/gen/batch_plan.ts +++ b/dashboard/proto/gen/batch_plan.ts @@ -38,6 +38,12 @@ export interface RowSeqScanNode { | undefined; /** Whether the order on output columns should be preserved. */ ordered: boolean; + /** If along with `batch_limit`, `chunk_size` will be set. */ + chunkSize: RowSeqScanNode_ChunkSize | undefined; +} + +export interface RowSeqScanNode_ChunkSize { + chunkSize: number; } export interface SysRowSeqScanNode { @@ -94,6 +100,8 @@ export interface FilterNode { export interface InsertNode { /** Id of the table to perform inserting. */ tableId: number; + /** Version of the table. */ + tableVersionId: number; columnIndices: number[]; /** * An optional field and will be `None` for tables without user-defined pk. @@ -107,12 +115,16 @@ export interface InsertNode { export interface DeleteNode { /** Id of the table to perform deleting. */ tableId: number; + /** Version of the table. */ + tableVersionId: number; returning: boolean; } export interface UpdateNode { /** Id of the table to perform updating. */ tableId: number; + /** Version of the table. */ + tableVersionId: number; exprs: ExprNode[]; returning: boolean; } @@ -414,7 +426,14 @@ export interface PlanFragment { } function createBaseRowSeqScanNode(): RowSeqScanNode { - return { tableDesc: undefined, columnIds: [], scanRanges: [], vnodeBitmap: undefined, ordered: false }; + return { + tableDesc: undefined, + columnIds: [], + scanRanges: [], + vnodeBitmap: undefined, + ordered: false, + chunkSize: undefined, + }; } export const RowSeqScanNode = { @@ -425,6 +444,7 @@ export const RowSeqScanNode = { scanRanges: Array.isArray(object?.scanRanges) ? object.scanRanges.map((e: any) => ScanRange.fromJSON(e)) : [], vnodeBitmap: isSet(object.vnodeBitmap) ? Buffer.fromJSON(object.vnodeBitmap) : undefined, ordered: isSet(object.ordered) ? Boolean(object.ordered) : false, + chunkSize: isSet(object.chunkSize) ? RowSeqScanNode_ChunkSize.fromJSON(object.chunkSize) : undefined, }; }, @@ -445,6 +465,8 @@ export const RowSeqScanNode = { message.vnodeBitmap !== undefined && (obj.vnodeBitmap = message.vnodeBitmap ? Buffer.toJSON(message.vnodeBitmap) : undefined); message.ordered !== undefined && (obj.ordered = message.ordered); + message.chunkSize !== undefined && + (obj.chunkSize = message.chunkSize ? RowSeqScanNode_ChunkSize.toJSON(message.chunkSize) : undefined); return obj; }, @@ -459,6 +481,31 @@ export const RowSeqScanNode = { ? Buffer.fromPartial(object.vnodeBitmap) : undefined; message.ordered = object.ordered ?? false; + message.chunkSize = (object.chunkSize !== undefined && object.chunkSize !== null) + ? RowSeqScanNode_ChunkSize.fromPartial(object.chunkSize) + : undefined; + return message; + }, +}; + +function createBaseRowSeqScanNode_ChunkSize(): RowSeqScanNode_ChunkSize { + return { chunkSize: 0 }; +} + +export const RowSeqScanNode_ChunkSize = { + fromJSON(object: any): RowSeqScanNode_ChunkSize { + return { chunkSize: isSet(object.chunkSize) ? Number(object.chunkSize) : 0 }; + }, + + toJSON(message: RowSeqScanNode_ChunkSize): unknown { + const obj: any = {}; + message.chunkSize !== undefined && (obj.chunkSize = Math.round(message.chunkSize)); + return obj; + }, + + fromPartial, I>>(object: I): RowSeqScanNode_ChunkSize { + const message = createBaseRowSeqScanNode_ChunkSize(); + message.chunkSize = object.chunkSize ?? 0; return message; }, }; @@ -701,13 +748,14 @@ export const FilterNode = { }; function createBaseInsertNode(): InsertNode { - return { tableId: 0, columnIndices: [], rowIdIndex: undefined, returning: false }; + return { tableId: 0, tableVersionId: 0, columnIndices: [], rowIdIndex: undefined, returning: false }; } export const InsertNode = { fromJSON(object: any): InsertNode { return { tableId: isSet(object.tableId) ? Number(object.tableId) : 0, + tableVersionId: isSet(object.tableVersionId) ? Number(object.tableVersionId) : 0, columnIndices: Array.isArray(object?.columnIndices) ? object.columnIndices.map((e: any) => Number(e)) : [], rowIdIndex: isSet(object.rowIdIndex) ? ColumnIndex.fromJSON(object.rowIdIndex) : undefined, returning: isSet(object.returning) ? Boolean(object.returning) : false, @@ -717,6 +765,7 @@ export const InsertNode = { toJSON(message: InsertNode): unknown { const obj: any = {}; message.tableId !== undefined && (obj.tableId = Math.round(message.tableId)); + message.tableVersionId !== undefined && (obj.tableVersionId = Math.round(message.tableVersionId)); if (message.columnIndices) { obj.columnIndices = message.columnIndices.map((e) => Math.round(e)); } else { @@ -731,6 +780,7 @@ export const InsertNode = { fromPartial, I>>(object: I): InsertNode { const message = createBaseInsertNode(); message.tableId = object.tableId ?? 0; + message.tableVersionId = object.tableVersionId ?? 0; message.columnIndices = object.columnIndices?.map((e) => e) || []; message.rowIdIndex = (object.rowIdIndex !== undefined && object.rowIdIndex !== null) ? ColumnIndex.fromPartial(object.rowIdIndex) @@ -741,13 +791,14 @@ export const InsertNode = { }; function createBaseDeleteNode(): DeleteNode { - return { tableId: 0, returning: false }; + return { tableId: 0, tableVersionId: 0, returning: false }; } export const DeleteNode = { fromJSON(object: any): DeleteNode { return { tableId: isSet(object.tableId) ? Number(object.tableId) : 0, + tableVersionId: isSet(object.tableVersionId) ? Number(object.tableVersionId) : 0, returning: isSet(object.returning) ? Boolean(object.returning) : false, }; }, @@ -755,6 +806,7 @@ export const DeleteNode = { toJSON(message: DeleteNode): unknown { const obj: any = {}; message.tableId !== undefined && (obj.tableId = Math.round(message.tableId)); + message.tableVersionId !== undefined && (obj.tableVersionId = Math.round(message.tableVersionId)); message.returning !== undefined && (obj.returning = message.returning); return obj; }, @@ -762,19 +814,21 @@ export const DeleteNode = { fromPartial, I>>(object: I): DeleteNode { const message = createBaseDeleteNode(); message.tableId = object.tableId ?? 0; + message.tableVersionId = object.tableVersionId ?? 0; message.returning = object.returning ?? false; return message; }, }; function createBaseUpdateNode(): UpdateNode { - return { tableId: 0, exprs: [], returning: false }; + return { tableId: 0, tableVersionId: 0, exprs: [], returning: false }; } export const UpdateNode = { fromJSON(object: any): UpdateNode { return { tableId: isSet(object.tableId) ? Number(object.tableId) : 0, + tableVersionId: isSet(object.tableVersionId) ? Number(object.tableVersionId) : 0, exprs: Array.isArray(object?.exprs) ? object.exprs.map((e: any) => ExprNode.fromJSON(e)) : [], returning: isSet(object.returning) ? Boolean(object.returning) : false, }; @@ -783,6 +837,7 @@ export const UpdateNode = { toJSON(message: UpdateNode): unknown { const obj: any = {}; message.tableId !== undefined && (obj.tableId = Math.round(message.tableId)); + message.tableVersionId !== undefined && (obj.tableVersionId = Math.round(message.tableVersionId)); if (message.exprs) { obj.exprs = message.exprs.map((e) => e ? ExprNode.toJSON(e) : undefined); } else { @@ -795,6 +850,7 @@ export const UpdateNode = { fromPartial, I>>(object: I): UpdateNode { const message = createBaseUpdateNode(); message.tableId = object.tableId ?? 0; + message.tableVersionId = object.tableVersionId ?? 0; message.exprs = object.exprs?.map((e) => ExprNode.fromPartial(e)) || []; message.returning = object.returning ?? false; return message; diff --git a/dashboard/proto/gen/catalog.ts b/dashboard/proto/gen/catalog.ts index 489b1c1aaf81b..f032be4bd55b7 100644 --- a/dashboard/proto/gen/catalog.ts +++ b/dashboard/proto/gen/catalog.ts @@ -12,15 +12,69 @@ import { export const protobufPackage = "catalog"; +export const SinkType = { + UNSPECIFIED: "UNSPECIFIED", + APPEND_ONLY: "APPEND_ONLY", + FORCE_APPEND_ONLY: "FORCE_APPEND_ONLY", + UPSERT: "UPSERT", + UNRECOGNIZED: "UNRECOGNIZED", +} as const; + +export type SinkType = typeof SinkType[keyof typeof SinkType]; + +export function sinkTypeFromJSON(object: any): SinkType { + switch (object) { + case 0: + case "UNSPECIFIED": + return SinkType.UNSPECIFIED; + case 1: + case "APPEND_ONLY": + return SinkType.APPEND_ONLY; + case 2: + case "FORCE_APPEND_ONLY": + return SinkType.FORCE_APPEND_ONLY; + case 3: + case "UPSERT": + return SinkType.UPSERT; + case -1: + case "UNRECOGNIZED": + default: + return SinkType.UNRECOGNIZED; + } +} + +export function sinkTypeToJSON(object: SinkType): string { + switch (object) { + case SinkType.UNSPECIFIED: + return "UNSPECIFIED"; + case SinkType.APPEND_ONLY: + return "APPEND_ONLY"; + case SinkType.FORCE_APPEND_ONLY: + return "FORCE_APPEND_ONLY"; + case SinkType.UPSERT: + return "UPSERT"; + case SinkType.UNRECOGNIZED: + default: + return "UNRECOGNIZED"; + } +} + /** * The rust prost library always treats uint64 as required and message as - * optional. In order to allow `row_id_index` as optional field in - * `StreamSourceInfo` and `TableSourceInfo`, we wrap uint64 inside this message. + * optional. In order to allow `row_id_index` as an optional field, we wrap + * uint64 inside this message. */ export interface ColumnIndex { index: number; } +export interface WatermarkDesc { + /** The column idx the watermark is on */ + watermarkIdx: number; + /** The expression to calculate the watermark value. */ + expr: ExprNode | undefined; +} + export interface StreamSourceInfo { rowFormat: RowFormatType; rowSchemaLocation: string; @@ -52,7 +106,14 @@ export interface Source { /** Properties specified by the user in WITH clause. */ properties: { [key: string]: string }; owner: number; - info: StreamSourceInfo | undefined; + info: + | StreamSourceInfo + | undefined; + /** + * Define watermarks on the source. The `repeated` is just for forward + * compatibility, currently, only one watermark on the source + */ + watermarkDescs: WatermarkDesc[]; } export interface Source_PropertiesEntry { @@ -71,7 +132,7 @@ export interface Sink { distributionKey: number[]; /** pk_indices of the corresponding materialize operator's output. */ streamKey: number[]; - appendOnly: boolean; + sinkType: SinkType; owner: number; properties: { [key: string]: string }; definition: string; @@ -151,6 +212,7 @@ export interface Table { definition: string; handlePkConflict: boolean; readPrefixLenHint: number; + watermarkIndices: number[]; /** * Per-table catalog version, used by schema change. `None` for internal tables and tests. * Not to be confused with the global catalog version for notification service. @@ -282,6 +344,33 @@ export const ColumnIndex = { }, }; +function createBaseWatermarkDesc(): WatermarkDesc { + return { watermarkIdx: 0, expr: undefined }; +} + +export const WatermarkDesc = { + fromJSON(object: any): WatermarkDesc { + return { + watermarkIdx: isSet(object.watermarkIdx) ? Number(object.watermarkIdx) : 0, + expr: isSet(object.expr) ? ExprNode.fromJSON(object.expr) : undefined, + }; + }, + + toJSON(message: WatermarkDesc): unknown { + const obj: any = {}; + message.watermarkIdx !== undefined && (obj.watermarkIdx = Math.round(message.watermarkIdx)); + message.expr !== undefined && (obj.expr = message.expr ? ExprNode.toJSON(message.expr) : undefined); + return obj; + }, + + fromPartial, I>>(object: I): WatermarkDesc { + const message = createBaseWatermarkDesc(); + message.watermarkIdx = object.watermarkIdx ?? 0; + message.expr = (object.expr !== undefined && object.expr !== null) ? ExprNode.fromPartial(object.expr) : undefined; + return message; + }, +}; + function createBaseStreamSourceInfo(): StreamSourceInfo { return { rowFormat: RowFormatType.ROW_UNSPECIFIED, @@ -340,6 +429,7 @@ function createBaseSource(): Source { properties: {}, owner: 0, info: undefined, + watermarkDescs: [], }; } @@ -361,6 +451,9 @@ export const Source = { : {}, owner: isSet(object.owner) ? Number(object.owner) : 0, info: isSet(object.info) ? StreamSourceInfo.fromJSON(object.info) : undefined, + watermarkDescs: Array.isArray(object?.watermarkDescs) + ? object.watermarkDescs.map((e: any) => WatermarkDesc.fromJSON(e)) + : [], }; }, @@ -390,6 +483,11 @@ export const Source = { } message.owner !== undefined && (obj.owner = Math.round(message.owner)); message.info !== undefined && (obj.info = message.info ? StreamSourceInfo.toJSON(message.info) : undefined); + if (message.watermarkDescs) { + obj.watermarkDescs = message.watermarkDescs.map((e) => e ? WatermarkDesc.toJSON(e) : undefined); + } else { + obj.watermarkDescs = []; + } return obj; }, @@ -417,6 +515,7 @@ export const Source = { message.info = (object.info !== undefined && object.info !== null) ? StreamSourceInfo.fromPartial(object.info) : undefined; + message.watermarkDescs = object.watermarkDescs?.map((e) => WatermarkDesc.fromPartial(e)) || []; return message; }, }; @@ -456,7 +555,7 @@ function createBaseSink(): Sink { dependentRelations: [], distributionKey: [], streamKey: [], - appendOnly: false, + sinkType: SinkType.UNSPECIFIED, owner: 0, properties: {}, definition: "", @@ -479,7 +578,7 @@ export const Sink = { ? object.distributionKey.map((e: any) => Number(e)) : [], streamKey: Array.isArray(object?.streamKey) ? object.streamKey.map((e: any) => Number(e)) : [], - appendOnly: isSet(object.appendOnly) ? Boolean(object.appendOnly) : false, + sinkType: isSet(object.sinkType) ? sinkTypeFromJSON(object.sinkType) : SinkType.UNSPECIFIED, owner: isSet(object.owner) ? Number(object.owner) : 0, properties: isObject(object.properties) ? Object.entries(object.properties).reduce<{ [key: string]: string }>((acc, [key, value]) => { @@ -522,7 +621,7 @@ export const Sink = { } else { obj.streamKey = []; } - message.appendOnly !== undefined && (obj.appendOnly = message.appendOnly); + message.sinkType !== undefined && (obj.sinkType = sinkTypeToJSON(message.sinkType)); message.owner !== undefined && (obj.owner = Math.round(message.owner)); obj.properties = {}; if (message.properties) { @@ -545,7 +644,7 @@ export const Sink = { message.dependentRelations = object.dependentRelations?.map((e) => e) || []; message.distributionKey = object.distributionKey?.map((e) => e) || []; message.streamKey = object.streamKey?.map((e) => e) || []; - message.appendOnly = object.appendOnly ?? false; + message.sinkType = object.sinkType ?? SinkType.UNSPECIFIED; message.owner = object.owner ?? 0; message.properties = Object.entries(object.properties ?? {}).reduce<{ [key: string]: string }>( (acc, [key, value]) => { @@ -741,6 +840,7 @@ function createBaseTable(): Table { definition: "", handlePkConflict: false, readPrefixLenHint: 0, + watermarkIndices: [], version: undefined, }; } @@ -782,6 +882,9 @@ export const Table = { definition: isSet(object.definition) ? String(object.definition) : "", handlePkConflict: isSet(object.handlePkConflict) ? Boolean(object.handlePkConflict) : false, readPrefixLenHint: isSet(object.readPrefixLenHint) ? Number(object.readPrefixLenHint) : 0, + watermarkIndices: Array.isArray(object?.watermarkIndices) + ? object.watermarkIndices.map((e: any) => Number(e)) + : [], version: isSet(object.version) ? Table_TableVersion.fromJSON(object.version) : undefined, }; }, @@ -841,6 +944,11 @@ export const Table = { message.definition !== undefined && (obj.definition = message.definition); message.handlePkConflict !== undefined && (obj.handlePkConflict = message.handlePkConflict); message.readPrefixLenHint !== undefined && (obj.readPrefixLenHint = Math.round(message.readPrefixLenHint)); + if (message.watermarkIndices) { + obj.watermarkIndices = message.watermarkIndices.map((e) => Math.round(e)); + } else { + obj.watermarkIndices = []; + } message.version !== undefined && (obj.version = message.version ? Table_TableVersion.toJSON(message.version) : undefined); return obj; @@ -890,6 +998,7 @@ export const Table = { message.definition = object.definition ?? ""; message.handlePkConflict = object.handlePkConflict ?? false; message.readPrefixLenHint = object.readPrefixLenHint ?? 0; + message.watermarkIndices = object.watermarkIndices?.map((e) => e) || []; message.version = (object.version !== undefined && object.version !== null) ? Table_TableVersion.fromPartial(object.version) : undefined; diff --git a/dashboard/proto/gen/cdc_service.ts b/dashboard/proto/gen/cdc_service.ts deleted file mode 100644 index bf9ea97163dd5..0000000000000 --- a/dashboard/proto/gen/cdc_service.ts +++ /dev/null @@ -1,260 +0,0 @@ -/* eslint-disable */ - -export const protobufPackage = "cdc_service"; - -/** Notes: This proto needs to be self-contained */ -export interface Status { - code: Status_Code; - message: string; -} - -export const Status_Code = { UNSPECIFIED: "UNSPECIFIED", OK: "OK", UNRECOGNIZED: "UNRECOGNIZED" } as const; - -export type Status_Code = typeof Status_Code[keyof typeof Status_Code]; - -export function status_CodeFromJSON(object: any): Status_Code { - switch (object) { - case 0: - case "UNSPECIFIED": - return Status_Code.UNSPECIFIED; - case 1: - case "OK": - return Status_Code.OK; - case -1: - case "UNRECOGNIZED": - default: - return Status_Code.UNRECOGNIZED; - } -} - -export function status_CodeToJSON(object: Status_Code): string { - switch (object) { - case Status_Code.UNSPECIFIED: - return "UNSPECIFIED"; - case Status_Code.OK: - return "OK"; - case Status_Code.UNRECOGNIZED: - default: - return "UNRECOGNIZED"; - } -} - -export interface DbConnectorProperties { - databaseHost: string; - databasePort: string; - databaseUser: string; - databasePassword: string; - databaseName: string; - tableName: string; - partition: string; - startOffset: string; - includeSchemaEvents: boolean; -} - -export interface CdcMessage { - payload: string; - partition: string; - offset: string; -} - -export interface GetEventStreamRequest { - sourceId: number; - properties: DbConnectorProperties | undefined; -} - -export interface GetEventStreamResponse { - sourceId: number; - events: CdcMessage[]; -} - -function createBaseStatus(): Status { - return { code: Status_Code.UNSPECIFIED, message: "" }; -} - -export const Status = { - fromJSON(object: any): Status { - return { - code: isSet(object.code) ? status_CodeFromJSON(object.code) : Status_Code.UNSPECIFIED, - message: isSet(object.message) ? String(object.message) : "", - }; - }, - - toJSON(message: Status): unknown { - const obj: any = {}; - message.code !== undefined && (obj.code = status_CodeToJSON(message.code)); - message.message !== undefined && (obj.message = message.message); - return obj; - }, - - fromPartial, I>>(object: I): Status { - const message = createBaseStatus(); - message.code = object.code ?? Status_Code.UNSPECIFIED; - message.message = object.message ?? ""; - return message; - }, -}; - -function createBaseDbConnectorProperties(): DbConnectorProperties { - return { - databaseHost: "", - databasePort: "", - databaseUser: "", - databasePassword: "", - databaseName: "", - tableName: "", - partition: "", - startOffset: "", - includeSchemaEvents: false, - }; -} - -export const DbConnectorProperties = { - fromJSON(object: any): DbConnectorProperties { - return { - databaseHost: isSet(object.databaseHost) ? String(object.databaseHost) : "", - databasePort: isSet(object.databasePort) ? String(object.databasePort) : "", - databaseUser: isSet(object.databaseUser) ? String(object.databaseUser) : "", - databasePassword: isSet(object.databasePassword) ? String(object.databasePassword) : "", - databaseName: isSet(object.databaseName) ? String(object.databaseName) : "", - tableName: isSet(object.tableName) ? String(object.tableName) : "", - partition: isSet(object.partition) ? String(object.partition) : "", - startOffset: isSet(object.startOffset) ? String(object.startOffset) : "", - includeSchemaEvents: isSet(object.includeSchemaEvents) ? Boolean(object.includeSchemaEvents) : false, - }; - }, - - toJSON(message: DbConnectorProperties): unknown { - const obj: any = {}; - message.databaseHost !== undefined && (obj.databaseHost = message.databaseHost); - message.databasePort !== undefined && (obj.databasePort = message.databasePort); - message.databaseUser !== undefined && (obj.databaseUser = message.databaseUser); - message.databasePassword !== undefined && (obj.databasePassword = message.databasePassword); - message.databaseName !== undefined && (obj.databaseName = message.databaseName); - message.tableName !== undefined && (obj.tableName = message.tableName); - message.partition !== undefined && (obj.partition = message.partition); - message.startOffset !== undefined && (obj.startOffset = message.startOffset); - message.includeSchemaEvents !== undefined && (obj.includeSchemaEvents = message.includeSchemaEvents); - return obj; - }, - - fromPartial, I>>(object: I): DbConnectorProperties { - const message = createBaseDbConnectorProperties(); - message.databaseHost = object.databaseHost ?? ""; - message.databasePort = object.databasePort ?? ""; - message.databaseUser = object.databaseUser ?? ""; - message.databasePassword = object.databasePassword ?? ""; - message.databaseName = object.databaseName ?? ""; - message.tableName = object.tableName ?? ""; - message.partition = object.partition ?? ""; - message.startOffset = object.startOffset ?? ""; - message.includeSchemaEvents = object.includeSchemaEvents ?? false; - return message; - }, -}; - -function createBaseCdcMessage(): CdcMessage { - return { payload: "", partition: "", offset: "" }; -} - -export const CdcMessage = { - fromJSON(object: any): CdcMessage { - return { - payload: isSet(object.payload) ? String(object.payload) : "", - partition: isSet(object.partition) ? String(object.partition) : "", - offset: isSet(object.offset) ? String(object.offset) : "", - }; - }, - - toJSON(message: CdcMessage): unknown { - const obj: any = {}; - message.payload !== undefined && (obj.payload = message.payload); - message.partition !== undefined && (obj.partition = message.partition); - message.offset !== undefined && (obj.offset = message.offset); - return obj; - }, - - fromPartial, I>>(object: I): CdcMessage { - const message = createBaseCdcMessage(); - message.payload = object.payload ?? ""; - message.partition = object.partition ?? ""; - message.offset = object.offset ?? ""; - return message; - }, -}; - -function createBaseGetEventStreamRequest(): GetEventStreamRequest { - return { sourceId: 0, properties: undefined }; -} - -export const GetEventStreamRequest = { - fromJSON(object: any): GetEventStreamRequest { - return { - sourceId: isSet(object.sourceId) ? Number(object.sourceId) : 0, - properties: isSet(object.properties) ? DbConnectorProperties.fromJSON(object.properties) : undefined, - }; - }, - - toJSON(message: GetEventStreamRequest): unknown { - const obj: any = {}; - message.sourceId !== undefined && (obj.sourceId = Math.round(message.sourceId)); - message.properties !== undefined && - (obj.properties = message.properties ? DbConnectorProperties.toJSON(message.properties) : undefined); - return obj; - }, - - fromPartial, I>>(object: I): GetEventStreamRequest { - const message = createBaseGetEventStreamRequest(); - message.sourceId = object.sourceId ?? 0; - message.properties = (object.properties !== undefined && object.properties !== null) - ? DbConnectorProperties.fromPartial(object.properties) - : undefined; - return message; - }, -}; - -function createBaseGetEventStreamResponse(): GetEventStreamResponse { - return { sourceId: 0, events: [] }; -} - -export const GetEventStreamResponse = { - fromJSON(object: any): GetEventStreamResponse { - return { - sourceId: isSet(object.sourceId) ? Number(object.sourceId) : 0, - events: Array.isArray(object?.events) ? object.events.map((e: any) => CdcMessage.fromJSON(e)) : [], - }; - }, - - toJSON(message: GetEventStreamResponse): unknown { - const obj: any = {}; - message.sourceId !== undefined && (obj.sourceId = Math.round(message.sourceId)); - if (message.events) { - obj.events = message.events.map((e) => e ? CdcMessage.toJSON(e) : undefined); - } else { - obj.events = []; - } - return obj; - }, - - fromPartial, I>>(object: I): GetEventStreamResponse { - const message = createBaseGetEventStreamResponse(); - message.sourceId = object.sourceId ?? 0; - message.events = object.events?.map((e) => CdcMessage.fromPartial(e)) || []; - return message; - }, -}; - -type Builtin = Date | Function | Uint8Array | string | number | boolean | undefined; - -export type DeepPartial = T extends Builtin ? T - : T extends Array ? Array> : T extends ReadonlyArray ? ReadonlyArray> - : T extends { $case: string } ? { [K in keyof Omit]?: DeepPartial } & { $case: T["$case"] } - : T extends {} ? { [K in keyof T]?: DeepPartial } - : Partial; - -type KeysOfUnion = T extends T ? keyof T : never; -export type Exact = P extends Builtin ? P - : P & { [K in keyof P]: Exact } & { [K in Exclude>]: never }; - -function isSet(value: any): boolean { - return value !== null && value !== undefined; -} diff --git a/dashboard/proto/gen/common.ts b/dashboard/proto/gen/common.ts index 1042efe39a81a..0e5c3f88ce789 100644 --- a/dashboard/proto/gen/common.ts +++ b/dashboard/proto/gen/common.ts @@ -208,7 +208,6 @@ export function buffer_CompressionTypeToJSON(object: Buffer_CompressionType): st /** Vnode mapping for stream fragments. Stores mapping from virtual node to parallel unit id. */ export interface ParallelUnitMapping { - fragmentId: number; originalIndices: number[]; data: number[]; } @@ -408,13 +407,12 @@ export const Buffer = { }; function createBaseParallelUnitMapping(): ParallelUnitMapping { - return { fragmentId: 0, originalIndices: [], data: [] }; + return { originalIndices: [], data: [] }; } export const ParallelUnitMapping = { fromJSON(object: any): ParallelUnitMapping { return { - fragmentId: isSet(object.fragmentId) ? Number(object.fragmentId) : 0, originalIndices: Array.isArray(object?.originalIndices) ? object.originalIndices.map((e: any) => Number(e)) : [], data: Array.isArray(object?.data) ? object.data.map((e: any) => Number(e)) : [], }; @@ -422,7 +420,6 @@ export const ParallelUnitMapping = { toJSON(message: ParallelUnitMapping): unknown { const obj: any = {}; - message.fragmentId !== undefined && (obj.fragmentId = Math.round(message.fragmentId)); if (message.originalIndices) { obj.originalIndices = message.originalIndices.map((e) => Math.round(e)); } else { @@ -438,7 +435,6 @@ export const ParallelUnitMapping = { fromPartial, I>>(object: I): ParallelUnitMapping { const message = createBaseParallelUnitMapping(); - message.fragmentId = object.fragmentId ?? 0; message.originalIndices = object.originalIndices?.map((e) => e) || []; message.data = object.data?.map((e) => e) || []; return message; diff --git a/dashboard/proto/gen/data.ts b/dashboard/proto/gen/data.ts index 16e721937978d..f34bbab892582 100644 --- a/dashboard/proto/gen/data.ts +++ b/dashboard/proto/gen/data.ts @@ -20,6 +20,7 @@ export const RwArrayType = { STRUCT: "STRUCT", LIST: "LIST", BYTEA: "BYTEA", + JSONB: "JSONB", UNRECOGNIZED: "UNRECOGNIZED", } as const; @@ -75,6 +76,9 @@ export function rwArrayTypeFromJSON(object: any): RwArrayType { case 15: case "BYTEA": return RwArrayType.BYTEA; + case 16: + case "JSONB": + return RwArrayType.JSONB; case -1: case "UNRECOGNIZED": default: @@ -116,6 +120,8 @@ export function rwArrayTypeToJSON(object: RwArrayType): string { return "LIST"; case RwArrayType.BYTEA: return "BYTEA"; + case RwArrayType.JSONB: + return "JSONB"; case RwArrayType.UNRECOGNIZED: default: return "UNRECOGNIZED"; @@ -329,6 +335,7 @@ export const DataType_TypeName = { STRUCT: "STRUCT", LIST: "LIST", BYTEA: "BYTEA", + JSONB: "JSONB", UNRECOGNIZED: "UNRECOGNIZED", } as const; @@ -387,6 +394,9 @@ export function dataType_TypeNameFromJSON(object: any): DataType_TypeName { case 17: case "BYTEA": return DataType_TypeName.BYTEA; + case 18: + case "JSONB": + return DataType_TypeName.JSONB; case -1: case "UNRECOGNIZED": default: @@ -430,6 +440,8 @@ export function dataType_TypeNameToJSON(object: DataType_TypeName): string { return "LIST"; case DataType_TypeName.BYTEA: return "BYTEA"; + case DataType_TypeName.JSONB: + return "JSONB"; case DataType_TypeName.UNRECOGNIZED: default: return "UNRECOGNIZED"; diff --git a/dashboard/proto/gen/ddl_service.ts b/dashboard/proto/gen/ddl_service.ts index d5833043bd2b1..7ddf38241f3f3 100644 --- a/dashboard/proto/gen/ddl_service.ts +++ b/dashboard/proto/gen/ddl_service.ts @@ -217,6 +217,28 @@ export interface ReplaceTablePlanResponse { version: number; } +export interface GetTableRequest { + databaseName: string; + tableName: string; +} + +export interface GetTableResponse { + table: Table | undefined; +} + +export interface GetDdlProgressRequest { +} + +export interface DdlProgress { + id: number; + statement: string; + progress: string; +} + +export interface GetDdlProgressResponse { + ddlProgress: DdlProgress[]; +} + function createBaseCreateDatabaseRequest(): CreateDatabaseRequest { return { db: undefined }; } @@ -1335,6 +1357,135 @@ export const ReplaceTablePlanResponse = { }, }; +function createBaseGetTableRequest(): GetTableRequest { + return { databaseName: "", tableName: "" }; +} + +export const GetTableRequest = { + fromJSON(object: any): GetTableRequest { + return { + databaseName: isSet(object.databaseName) ? String(object.databaseName) : "", + tableName: isSet(object.tableName) ? String(object.tableName) : "", + }; + }, + + toJSON(message: GetTableRequest): unknown { + const obj: any = {}; + message.databaseName !== undefined && (obj.databaseName = message.databaseName); + message.tableName !== undefined && (obj.tableName = message.tableName); + return obj; + }, + + fromPartial, I>>(object: I): GetTableRequest { + const message = createBaseGetTableRequest(); + message.databaseName = object.databaseName ?? ""; + message.tableName = object.tableName ?? ""; + return message; + }, +}; + +function createBaseGetTableResponse(): GetTableResponse { + return { table: undefined }; +} + +export const GetTableResponse = { + fromJSON(object: any): GetTableResponse { + return { table: isSet(object.table) ? Table.fromJSON(object.table) : undefined }; + }, + + toJSON(message: GetTableResponse): unknown { + const obj: any = {}; + message.table !== undefined && (obj.table = message.table ? Table.toJSON(message.table) : undefined); + return obj; + }, + + fromPartial, I>>(object: I): GetTableResponse { + const message = createBaseGetTableResponse(); + message.table = (object.table !== undefined && object.table !== null) ? Table.fromPartial(object.table) : undefined; + return message; + }, +}; + +function createBaseGetDdlProgressRequest(): GetDdlProgressRequest { + return {}; +} + +export const GetDdlProgressRequest = { + fromJSON(_: any): GetDdlProgressRequest { + return {}; + }, + + toJSON(_: GetDdlProgressRequest): unknown { + const obj: any = {}; + return obj; + }, + + fromPartial, I>>(_: I): GetDdlProgressRequest { + const message = createBaseGetDdlProgressRequest(); + return message; + }, +}; + +function createBaseDdlProgress(): DdlProgress { + return { id: 0, statement: "", progress: "" }; +} + +export const DdlProgress = { + fromJSON(object: any): DdlProgress { + return { + id: isSet(object.id) ? Number(object.id) : 0, + statement: isSet(object.statement) ? String(object.statement) : "", + progress: isSet(object.progress) ? String(object.progress) : "", + }; + }, + + toJSON(message: DdlProgress): unknown { + const obj: any = {}; + message.id !== undefined && (obj.id = Math.round(message.id)); + message.statement !== undefined && (obj.statement = message.statement); + message.progress !== undefined && (obj.progress = message.progress); + return obj; + }, + + fromPartial, I>>(object: I): DdlProgress { + const message = createBaseDdlProgress(); + message.id = object.id ?? 0; + message.statement = object.statement ?? ""; + message.progress = object.progress ?? ""; + return message; + }, +}; + +function createBaseGetDdlProgressResponse(): GetDdlProgressResponse { + return { ddlProgress: [] }; +} + +export const GetDdlProgressResponse = { + fromJSON(object: any): GetDdlProgressResponse { + return { + ddlProgress: Array.isArray(object?.ddlProgress) + ? object.ddlProgress.map((e: any) => DdlProgress.fromJSON(e)) + : [], + }; + }, + + toJSON(message: GetDdlProgressResponse): unknown { + const obj: any = {}; + if (message.ddlProgress) { + obj.ddlProgress = message.ddlProgress.map((e) => e ? DdlProgress.toJSON(e) : undefined); + } else { + obj.ddlProgress = []; + } + return obj; + }, + + fromPartial, I>>(object: I): GetDdlProgressResponse { + const message = createBaseGetDdlProgressResponse(); + message.ddlProgress = object.ddlProgress?.map((e) => DdlProgress.fromPartial(e)) || []; + return message; + }, +}; + type Builtin = Date | Function | Uint8Array | string | number | boolean | undefined; export type DeepPartial = T extends Builtin ? T diff --git a/dashboard/proto/gen/expr.ts b/dashboard/proto/gen/expr.ts index 8ea49251de5db..039adaa0eedae 100644 --- a/dashboard/proto/gen/expr.ts +++ b/dashboard/proto/gen/expr.ts @@ -105,6 +105,8 @@ export const ExprNode_Type = { BIT_LENGTH: "BIT_LENGTH", OVERLAY: "OVERLAY", REGEXP_MATCH: "REGEXP_MATCH", + POW: "POW", + EXP: "EXP", /** IS_TRUE - Boolean comparison */ IS_TRUE: "IS_TRUE", IS_NOT_TRUE: "IS_NOT_TRUE", @@ -126,6 +128,7 @@ export const ExprNode_Type = { ARRAY_CAT: "ARRAY_CAT", ARRAY_APPEND: "ARRAY_APPEND", ARRAY_PREPEND: "ARRAY_PREPEND", + FORMAT_TYPE: "FORMAT_TYPE", /** * VNODE - Non-pure functions below (> 600) * ------------------------ @@ -338,6 +341,12 @@ export function exprNode_TypeFromJSON(object: any): ExprNode_Type { case 232: case "REGEXP_MATCH": return ExprNode_Type.REGEXP_MATCH; + case 233: + case "POW": + return ExprNode_Type.POW; + case 234: + case "EXP": + return ExprNode_Type.EXP; case 301: case "IS_TRUE": return ExprNode_Type.IS_TRUE; @@ -386,6 +395,9 @@ export function exprNode_TypeFromJSON(object: any): ExprNode_Type { case 533: case "ARRAY_PREPEND": return ExprNode_Type.ARRAY_PREPEND; + case 534: + case "FORMAT_TYPE": + return ExprNode_Type.FORMAT_TYPE; case 1101: case "VNODE": return ExprNode_Type.VNODE; @@ -534,6 +546,10 @@ export function exprNode_TypeToJSON(object: ExprNode_Type): string { return "OVERLAY"; case ExprNode_Type.REGEXP_MATCH: return "REGEXP_MATCH"; + case ExprNode_Type.POW: + return "POW"; + case ExprNode_Type.EXP: + return "EXP"; case ExprNode_Type.IS_TRUE: return "IS_TRUE"; case ExprNode_Type.IS_NOT_TRUE: @@ -566,6 +582,8 @@ export function exprNode_TypeToJSON(object: ExprNode_Type): string { return "ARRAY_APPEND"; case ExprNode_Type.ARRAY_PREPEND: return "ARRAY_PREPEND"; + case ExprNode_Type.FORMAT_TYPE: + return "FORMAT_TYPE"; case ExprNode_Type.VNODE: return "VNODE"; case ExprNode_Type.NOW: @@ -699,6 +717,10 @@ export const AggCall_Type = { ARRAY_AGG: "ARRAY_AGG", FIRST_VALUE: "FIRST_VALUE", SUM0: "SUM0", + VAR_POP: "VAR_POP", + VAR_SAMP: "VAR_SAMP", + STDDEV_POP: "STDDEV_POP", + STDDEV_SAMP: "STDDEV_SAMP", UNRECOGNIZED: "UNRECOGNIZED", } as const; @@ -739,6 +761,18 @@ export function aggCall_TypeFromJSON(object: any): AggCall_Type { case 10: case "SUM0": return AggCall_Type.SUM0; + case 11: + case "VAR_POP": + return AggCall_Type.VAR_POP; + case 12: + case "VAR_SAMP": + return AggCall_Type.VAR_SAMP; + case 13: + case "STDDEV_POP": + return AggCall_Type.STDDEV_POP; + case 14: + case "STDDEV_SAMP": + return AggCall_Type.STDDEV_SAMP; case -1: case "UNRECOGNIZED": default: @@ -770,6 +804,14 @@ export function aggCall_TypeToJSON(object: AggCall_Type): string { return "FIRST_VALUE"; case AggCall_Type.SUM0: return "SUM0"; + case AggCall_Type.VAR_POP: + return "VAR_POP"; + case AggCall_Type.VAR_SAMP: + return "VAR_SAMP"; + case AggCall_Type.STDDEV_POP: + return "STDDEV_POP"; + case AggCall_Type.STDDEV_SAMP: + return "STDDEV_SAMP"; case AggCall_Type.UNRECOGNIZED: default: return "UNRECOGNIZED"; diff --git a/dashboard/proto/gen/hummock.ts b/dashboard/proto/gen/hummock.ts index bc36fd28fc827..2deaff7c6e9a6 100644 --- a/dashboard/proto/gen/hummock.ts +++ b/dashboard/proto/gen/hummock.ts @@ -91,16 +91,23 @@ export interface GroupConstruct { /** If `parent_group_id` is not 0, it means `parent_group_id` splits into `parent_group_id` and this group, so this group is not empty initially. */ parentGroupId: number; tableIds: number[]; + groupId: number; +} + +export interface GroupMetaChange { + tableIdsAdd: number[]; + tableIdsRemove: number[]; } export interface GroupDestroy { } export interface GroupDelta { - deltaType?: { $case: "intraLevel"; intraLevel: IntraLevelDelta } | { - $case: "groupConstruct"; - groupConstruct: GroupConstruct; - } | { $case: "groupDestroy"; groupDestroy: GroupDestroy }; + deltaType?: + | { $case: "intraLevel"; intraLevel: IntraLevelDelta } + | { $case: "groupConstruct"; groupConstruct: GroupConstruct } + | { $case: "groupDestroy"; groupDestroy: GroupDestroy } + | { $case: "groupMetaChange"; groupMetaChange: GroupMetaChange }; } export interface UncommittedEpoch { @@ -123,6 +130,9 @@ export interface HummockVersion { export interface HummockVersion_Levels { levels: Level[]; l0: OverlappingLevel | undefined; + groupId: number; + parentGroupId: number; + memberTableIds: number[]; } export interface HummockVersion_LevelsEntry { @@ -166,13 +176,7 @@ export interface HummockSnapshot { currentEpoch: number; } -export interface PinVersionRequest { - contextId: number; - lastPinned: number; -} - -export interface PinVersionResponse { - status: Status | undefined; +export interface VersionUpdatePayload { payload?: { $case: "versionDeltas"; versionDeltas: HummockVersionDeltas } | { $case: "pinnedVersion"; pinnedVersion: HummockVersion; @@ -292,6 +296,8 @@ export interface CompactTask { tableOptions: { [key: number]: TableOption }; currentEpochTime: number; targetSubLevelId: number; + /** Identifies whether the task is space_reclaim, if the compact_task_type increases, it will be refactored to enum */ + taskType: CompactTask_TaskType; } export const CompactTask_TaskStatus = { @@ -389,6 +395,65 @@ export function compactTask_TaskStatusToJSON(object: CompactTask_TaskStatus): st } } +export const CompactTask_TaskType = { + TYPE_UNSPECIFIED: "TYPE_UNSPECIFIED", + DYNAMIC: "DYNAMIC", + SPACE_RECLAIM: "SPACE_RECLAIM", + MANUAL: "MANUAL", + SHARED_BUFFER: "SHARED_BUFFER", + TTL: "TTL", + UNRECOGNIZED: "UNRECOGNIZED", +} as const; + +export type CompactTask_TaskType = typeof CompactTask_TaskType[keyof typeof CompactTask_TaskType]; + +export function compactTask_TaskTypeFromJSON(object: any): CompactTask_TaskType { + switch (object) { + case 0: + case "TYPE_UNSPECIFIED": + return CompactTask_TaskType.TYPE_UNSPECIFIED; + case 1: + case "DYNAMIC": + return CompactTask_TaskType.DYNAMIC; + case 2: + case "SPACE_RECLAIM": + return CompactTask_TaskType.SPACE_RECLAIM; + case 3: + case "MANUAL": + return CompactTask_TaskType.MANUAL; + case 4: + case "SHARED_BUFFER": + return CompactTask_TaskType.SHARED_BUFFER; + case 5: + case "TTL": + return CompactTask_TaskType.TTL; + case -1: + case "UNRECOGNIZED": + default: + return CompactTask_TaskType.UNRECOGNIZED; + } +} + +export function compactTask_TaskTypeToJSON(object: CompactTask_TaskType): string { + switch (object) { + case CompactTask_TaskType.TYPE_UNSPECIFIED: + return "TYPE_UNSPECIFIED"; + case CompactTask_TaskType.DYNAMIC: + return "DYNAMIC"; + case CompactTask_TaskType.SPACE_RECLAIM: + return "SPACE_RECLAIM"; + case CompactTask_TaskType.MANUAL: + return "MANUAL"; + case CompactTask_TaskType.SHARED_BUFFER: + return "SHARED_BUFFER"; + case CompactTask_TaskType.TTL: + return "TTL"; + case CompactTask_TaskType.UNRECOGNIZED: + default: + return "UNRECOGNIZED"; + } +} + export interface CompactTask_TableOptionsEntry { key: number; value: TableOption | undefined; @@ -411,17 +476,21 @@ export interface CompactStatus { levelHandlers: LevelHandler[]; } +/** Config info of compaction group. */ export interface CompactionGroup { id: number; - parentId: number; - memberTableIds: number[]; compactionConfig: CompactionConfig | undefined; - tableIdToOptions: { [key: number]: TableOption }; } -export interface CompactionGroup_TableIdToOptionsEntry { - key: number; - value: TableOption | undefined; +/** + * Complete info of compaction group. + * The info is the aggregate of `HummockVersion` and `CompactionGroupConfig` + */ +export interface CompactionGroupInfo { + id: number; + parentId: number; + memberTableIds: number[]; + compactionConfig: CompactionConfig | undefined; } export interface CompactTaskAssignment { @@ -545,14 +614,6 @@ export interface ReportVacuumTaskResponse { status: Status | undefined; } -export interface GetCompactionGroupsRequest { -} - -export interface GetCompactionGroupsResponse { - status: Status | undefined; - compactionGroups: CompactionGroup[]; -} - export interface TriggerManualCompactionRequest { compactionGroupId: number; keyRange: KeyRange | undefined; @@ -627,7 +688,7 @@ export interface RiseCtlGetPinnedSnapshotsSummaryResponse { export interface InitMetadataForReplayRequest { tables: Table[]; - compactionGroups: CompactionGroup[]; + compactionGroups: CompactionGroupInfo[]; } export interface InitMetadataForReplayResponse { @@ -662,7 +723,7 @@ export interface RiseCtlListCompactionGroupRequest { export interface RiseCtlListCompactionGroupResponse { status: Status | undefined; - compactionGroups: CompactionGroup[]; + compactionGroups: CompactionGroupInfo[]; } export interface RiseCtlUpdateCompactionConfigRequest { @@ -694,6 +755,14 @@ export interface SetCompactorRuntimeConfigRequest { export interface SetCompactorRuntimeConfigResponse { } +export interface PinVersionRequest { + contextId: number; +} + +export interface PinVersionResponse { + pinnedVersion: HummockVersion | undefined; +} + export interface CompactionConfig { maxBytesForLevelBase: number; maxLevel: number; @@ -706,6 +775,7 @@ export interface CompactionConfig { targetFileSizeBase: number; compactionFilterMask: number; maxSubCompaction: number; + maxSpaceReclaimBytes: number; } export const CompactionConfig_CompactionMode = { @@ -969,7 +1039,7 @@ export const IntraLevelDelta = { }; function createBaseGroupConstruct(): GroupConstruct { - return { groupConfig: undefined, parentGroupId: 0, tableIds: [] }; + return { groupConfig: undefined, parentGroupId: 0, tableIds: [], groupId: 0 }; } export const GroupConstruct = { @@ -978,6 +1048,7 @@ export const GroupConstruct = { groupConfig: isSet(object.groupConfig) ? CompactionConfig.fromJSON(object.groupConfig) : undefined, parentGroupId: isSet(object.parentGroupId) ? Number(object.parentGroupId) : 0, tableIds: Array.isArray(object?.tableIds) ? object.tableIds.map((e: any) => Number(e)) : [], + groupId: isSet(object.groupId) ? Number(object.groupId) : 0, }; }, @@ -991,6 +1062,7 @@ export const GroupConstruct = { } else { obj.tableIds = []; } + message.groupId !== undefined && (obj.groupId = Math.round(message.groupId)); return obj; }, @@ -1001,6 +1073,42 @@ export const GroupConstruct = { : undefined; message.parentGroupId = object.parentGroupId ?? 0; message.tableIds = object.tableIds?.map((e) => e) || []; + message.groupId = object.groupId ?? 0; + return message; + }, +}; + +function createBaseGroupMetaChange(): GroupMetaChange { + return { tableIdsAdd: [], tableIdsRemove: [] }; +} + +export const GroupMetaChange = { + fromJSON(object: any): GroupMetaChange { + return { + tableIdsAdd: Array.isArray(object?.tableIdsAdd) ? object.tableIdsAdd.map((e: any) => Number(e)) : [], + tableIdsRemove: Array.isArray(object?.tableIdsRemove) ? object.tableIdsRemove.map((e: any) => Number(e)) : [], + }; + }, + + toJSON(message: GroupMetaChange): unknown { + const obj: any = {}; + if (message.tableIdsAdd) { + obj.tableIdsAdd = message.tableIdsAdd.map((e) => Math.round(e)); + } else { + obj.tableIdsAdd = []; + } + if (message.tableIdsRemove) { + obj.tableIdsRemove = message.tableIdsRemove.map((e) => Math.round(e)); + } else { + obj.tableIdsRemove = []; + } + return obj; + }, + + fromPartial, I>>(object: I): GroupMetaChange { + const message = createBaseGroupMetaChange(); + message.tableIdsAdd = object.tableIdsAdd?.map((e) => e) || []; + message.tableIdsRemove = object.tableIdsRemove?.map((e) => e) || []; return message; }, }; @@ -1038,6 +1146,8 @@ export const GroupDelta = { ? { $case: "groupConstruct", groupConstruct: GroupConstruct.fromJSON(object.groupConstruct) } : isSet(object.groupDestroy) ? { $case: "groupDestroy", groupDestroy: GroupDestroy.fromJSON(object.groupDestroy) } + : isSet(object.groupMetaChange) + ? { $case: "groupMetaChange", groupMetaChange: GroupMetaChange.fromJSON(object.groupMetaChange) } : undefined, }; }, @@ -1053,6 +1163,9 @@ export const GroupDelta = { message.deltaType?.$case === "groupDestroy" && (obj.groupDestroy = message.deltaType?.groupDestroy ? GroupDestroy.toJSON(message.deltaType?.groupDestroy) : undefined); + message.deltaType?.$case === "groupMetaChange" && (obj.groupMetaChange = message.deltaType?.groupMetaChange + ? GroupMetaChange.toJSON(message.deltaType?.groupMetaChange) + : undefined); return obj; }, @@ -1085,6 +1198,16 @@ export const GroupDelta = { groupDestroy: GroupDestroy.fromPartial(object.deltaType.groupDestroy), }; } + if ( + object.deltaType?.$case === "groupMetaChange" && + object.deltaType?.groupMetaChange !== undefined && + object.deltaType?.groupMetaChange !== null + ) { + message.deltaType = { + $case: "groupMetaChange", + groupMetaChange: GroupMetaChange.fromPartial(object.deltaType.groupMetaChange), + }; + } return message; }, }; @@ -1172,7 +1295,7 @@ export const HummockVersion = { }; function createBaseHummockVersion_Levels(): HummockVersion_Levels { - return { levels: [], l0: undefined }; + return { levels: [], l0: undefined, groupId: 0, parentGroupId: 0, memberTableIds: [] }; } export const HummockVersion_Levels = { @@ -1180,6 +1303,9 @@ export const HummockVersion_Levels = { return { levels: Array.isArray(object?.levels) ? object.levels.map((e: any) => Level.fromJSON(e)) : [], l0: isSet(object.l0) ? OverlappingLevel.fromJSON(object.l0) : undefined, + groupId: isSet(object.groupId) ? Number(object.groupId) : 0, + parentGroupId: isSet(object.parentGroupId) ? Number(object.parentGroupId) : 0, + memberTableIds: Array.isArray(object?.memberTableIds) ? object.memberTableIds.map((e: any) => Number(e)) : [], }; }, @@ -1191,6 +1317,13 @@ export const HummockVersion_Levels = { obj.levels = []; } message.l0 !== undefined && (obj.l0 = message.l0 ? OverlappingLevel.toJSON(message.l0) : undefined); + message.groupId !== undefined && (obj.groupId = Math.round(message.groupId)); + message.parentGroupId !== undefined && (obj.parentGroupId = Math.round(message.parentGroupId)); + if (message.memberTableIds) { + obj.memberTableIds = message.memberTableIds.map((e) => Math.round(e)); + } else { + obj.memberTableIds = []; + } return obj; }, @@ -1198,6 +1331,9 @@ export const HummockVersion_Levels = { const message = createBaseHummockVersion_Levels(); message.levels = object.levels?.map((e) => Level.fromPartial(e)) || []; message.l0 = (object.l0 !== undefined && object.l0 !== null) ? OverlappingLevel.fromPartial(object.l0) : undefined; + message.groupId = object.groupId ?? 0; + message.parentGroupId = object.parentGroupId ?? 0; + message.memberTableIds = object.memberTableIds?.map((e) => e) || []; return message; }, }; @@ -1419,41 +1555,13 @@ export const HummockSnapshot = { }, }; -function createBasePinVersionRequest(): PinVersionRequest { - return { contextId: 0, lastPinned: 0 }; -} - -export const PinVersionRequest = { - fromJSON(object: any): PinVersionRequest { - return { - contextId: isSet(object.contextId) ? Number(object.contextId) : 0, - lastPinned: isSet(object.lastPinned) ? Number(object.lastPinned) : 0, - }; - }, - - toJSON(message: PinVersionRequest): unknown { - const obj: any = {}; - message.contextId !== undefined && (obj.contextId = Math.round(message.contextId)); - message.lastPinned !== undefined && (obj.lastPinned = Math.round(message.lastPinned)); - return obj; - }, - - fromPartial, I>>(object: I): PinVersionRequest { - const message = createBasePinVersionRequest(); - message.contextId = object.contextId ?? 0; - message.lastPinned = object.lastPinned ?? 0; - return message; - }, -}; - -function createBasePinVersionResponse(): PinVersionResponse { - return { status: undefined, payload: undefined }; +function createBaseVersionUpdatePayload(): VersionUpdatePayload { + return { payload: undefined }; } -export const PinVersionResponse = { - fromJSON(object: any): PinVersionResponse { +export const VersionUpdatePayload = { + fromJSON(object: any): VersionUpdatePayload { return { - status: isSet(object.status) ? Status.fromJSON(object.status) : undefined, payload: isSet(object.versionDeltas) ? { $case: "versionDeltas", versionDeltas: HummockVersionDeltas.fromJSON(object.versionDeltas) } : isSet(object.pinnedVersion) @@ -1462,9 +1570,8 @@ export const PinVersionResponse = { }; }, - toJSON(message: PinVersionResponse): unknown { + toJSON(message: VersionUpdatePayload): unknown { const obj: any = {}; - message.status !== undefined && (obj.status = message.status ? Status.toJSON(message.status) : undefined); message.payload?.$case === "versionDeltas" && (obj.versionDeltas = message.payload?.versionDeltas ? HummockVersionDeltas.toJSON(message.payload?.versionDeltas) : undefined); @@ -1474,11 +1581,8 @@ export const PinVersionResponse = { return obj; }, - fromPartial, I>>(object: I): PinVersionResponse { - const message = createBasePinVersionResponse(); - message.status = (object.status !== undefined && object.status !== null) - ? Status.fromPartial(object.status) - : undefined; + fromPartial, I>>(object: I): VersionUpdatePayload { + const message = createBaseVersionUpdatePayload(); if ( object.payload?.$case === "versionDeltas" && object.payload?.versionDeltas !== undefined && @@ -2003,6 +2107,7 @@ function createBaseCompactTask(): CompactTask { tableOptions: {}, currentEpochTime: 0, targetSubLevelId: 0, + taskType: CompactTask_TaskType.TYPE_UNSPECIFIED, }; } @@ -2036,6 +2141,9 @@ export const CompactTask = { : {}, currentEpochTime: isSet(object.currentEpochTime) ? Number(object.currentEpochTime) : 0, targetSubLevelId: isSet(object.targetSubLevelId) ? Number(object.targetSubLevelId) : 0, + taskType: isSet(object.taskType) + ? compactTask_TaskTypeFromJSON(object.taskType) + : CompactTask_TaskType.TYPE_UNSPECIFIED, }; }, @@ -2078,6 +2186,7 @@ export const CompactTask = { } message.currentEpochTime !== undefined && (obj.currentEpochTime = Math.round(message.currentEpochTime)); message.targetSubLevelId !== undefined && (obj.targetSubLevelId = Math.round(message.targetSubLevelId)); + message.taskType !== undefined && (obj.taskType = compactTask_TaskTypeToJSON(message.taskType)); return obj; }, @@ -2107,6 +2216,7 @@ export const CompactTask = { ); message.currentEpochTime = object.currentEpochTime ?? 0; message.targetSubLevelId = object.targetSubLevelId ?? 0; + message.taskType = object.taskType ?? CompactTask_TaskType.TYPE_UNSPECIFIED; return message; }, }; @@ -2248,92 +2358,70 @@ export const CompactStatus = { }; function createBaseCompactionGroup(): CompactionGroup { - return { id: 0, parentId: 0, memberTableIds: [], compactionConfig: undefined, tableIdToOptions: {} }; + return { id: 0, compactionConfig: undefined }; } export const CompactionGroup = { fromJSON(object: any): CompactionGroup { return { id: isSet(object.id) ? Number(object.id) : 0, - parentId: isSet(object.parentId) ? Number(object.parentId) : 0, - memberTableIds: Array.isArray(object?.memberTableIds) ? object.memberTableIds.map((e: any) => Number(e)) : [], compactionConfig: isSet(object.compactionConfig) ? CompactionConfig.fromJSON(object.compactionConfig) : undefined, - tableIdToOptions: isObject(object.tableIdToOptions) - ? Object.entries(object.tableIdToOptions).reduce<{ [key: number]: TableOption }>((acc, [key, value]) => { - acc[Number(key)] = TableOption.fromJSON(value); - return acc; - }, {}) - : {}, }; }, toJSON(message: CompactionGroup): unknown { const obj: any = {}; message.id !== undefined && (obj.id = Math.round(message.id)); - message.parentId !== undefined && (obj.parentId = Math.round(message.parentId)); - if (message.memberTableIds) { - obj.memberTableIds = message.memberTableIds.map((e) => Math.round(e)); - } else { - obj.memberTableIds = []; - } message.compactionConfig !== undefined && (obj.compactionConfig = message.compactionConfig ? CompactionConfig.toJSON(message.compactionConfig) : undefined); - obj.tableIdToOptions = {}; - if (message.tableIdToOptions) { - Object.entries(message.tableIdToOptions).forEach(([k, v]) => { - obj.tableIdToOptions[k] = TableOption.toJSON(v); - }); - } return obj; }, fromPartial, I>>(object: I): CompactionGroup { const message = createBaseCompactionGroup(); message.id = object.id ?? 0; - message.parentId = object.parentId ?? 0; - message.memberTableIds = object.memberTableIds?.map((e) => e) || []; message.compactionConfig = (object.compactionConfig !== undefined && object.compactionConfig !== null) ? CompactionConfig.fromPartial(object.compactionConfig) : undefined; - message.tableIdToOptions = Object.entries(object.tableIdToOptions ?? {}).reduce<{ [key: number]: TableOption }>( - (acc, [key, value]) => { - if (value !== undefined) { - acc[Number(key)] = TableOption.fromPartial(value); - } - return acc; - }, - {}, - ); return message; }, }; -function createBaseCompactionGroup_TableIdToOptionsEntry(): CompactionGroup_TableIdToOptionsEntry { - return { key: 0, value: undefined }; +function createBaseCompactionGroupInfo(): CompactionGroupInfo { + return { id: 0, parentId: 0, memberTableIds: [], compactionConfig: undefined }; } -export const CompactionGroup_TableIdToOptionsEntry = { - fromJSON(object: any): CompactionGroup_TableIdToOptionsEntry { +export const CompactionGroupInfo = { + fromJSON(object: any): CompactionGroupInfo { return { - key: isSet(object.key) ? Number(object.key) : 0, - value: isSet(object.value) ? TableOption.fromJSON(object.value) : undefined, + id: isSet(object.id) ? Number(object.id) : 0, + parentId: isSet(object.parentId) ? Number(object.parentId) : 0, + memberTableIds: Array.isArray(object?.memberTableIds) ? object.memberTableIds.map((e: any) => Number(e)) : [], + compactionConfig: isSet(object.compactionConfig) ? CompactionConfig.fromJSON(object.compactionConfig) : undefined, }; }, - toJSON(message: CompactionGroup_TableIdToOptionsEntry): unknown { + toJSON(message: CompactionGroupInfo): unknown { const obj: any = {}; - message.key !== undefined && (obj.key = Math.round(message.key)); - message.value !== undefined && (obj.value = message.value ? TableOption.toJSON(message.value) : undefined); + message.id !== undefined && (obj.id = Math.round(message.id)); + message.parentId !== undefined && (obj.parentId = Math.round(message.parentId)); + if (message.memberTableIds) { + obj.memberTableIds = message.memberTableIds.map((e) => Math.round(e)); + } else { + obj.memberTableIds = []; + } + message.compactionConfig !== undefined && + (obj.compactionConfig = message.compactionConfig ? CompactionConfig.toJSON(message.compactionConfig) : undefined); return obj; }, - fromPartial, I>>( - object: I, - ): CompactionGroup_TableIdToOptionsEntry { - const message = createBaseCompactionGroup_TableIdToOptionsEntry(); - message.key = object.key ?? 0; - message.value = (object.value !== undefined && object.value !== null) - ? TableOption.fromPartial(object.value) + fromPartial, I>>(object: I): CompactionGroupInfo { + const message = createBaseCompactionGroupInfo(); + message.id = object.id ?? 0; + message.parentId = object.parentId ?? 0; + message.memberTableIds = object.memberTableIds?.map((e) => e) || []; + message.compactionConfig = (object.compactionConfig !== undefined && object.compactionConfig !== null) + ? CompactionConfig.fromPartial(object.compactionConfig) : undefined; return message; }, @@ -3042,61 +3130,6 @@ export const ReportVacuumTaskResponse = { }, }; -function createBaseGetCompactionGroupsRequest(): GetCompactionGroupsRequest { - return {}; -} - -export const GetCompactionGroupsRequest = { - fromJSON(_: any): GetCompactionGroupsRequest { - return {}; - }, - - toJSON(_: GetCompactionGroupsRequest): unknown { - const obj: any = {}; - return obj; - }, - - fromPartial, I>>(_: I): GetCompactionGroupsRequest { - const message = createBaseGetCompactionGroupsRequest(); - return message; - }, -}; - -function createBaseGetCompactionGroupsResponse(): GetCompactionGroupsResponse { - return { status: undefined, compactionGroups: [] }; -} - -export const GetCompactionGroupsResponse = { - fromJSON(object: any): GetCompactionGroupsResponse { - return { - status: isSet(object.status) ? Status.fromJSON(object.status) : undefined, - compactionGroups: Array.isArray(object?.compactionGroups) - ? object.compactionGroups.map((e: any) => CompactionGroup.fromJSON(e)) - : [], - }; - }, - - toJSON(message: GetCompactionGroupsResponse): unknown { - const obj: any = {}; - message.status !== undefined && (obj.status = message.status ? Status.toJSON(message.status) : undefined); - if (message.compactionGroups) { - obj.compactionGroups = message.compactionGroups.map((e) => e ? CompactionGroup.toJSON(e) : undefined); - } else { - obj.compactionGroups = []; - } - return obj; - }, - - fromPartial, I>>(object: I): GetCompactionGroupsResponse { - const message = createBaseGetCompactionGroupsResponse(); - message.status = (object.status !== undefined && object.status !== null) - ? Status.fromPartial(object.status) - : undefined; - message.compactionGroups = object.compactionGroups?.map((e) => CompactionGroup.fromPartial(e)) || []; - return message; - }, -}; - function createBaseTriggerManualCompactionRequest(): TriggerManualCompactionRequest { return { compactionGroupId: 0, keyRange: undefined, tableId: 0, level: 0, sstIds: [] }; } @@ -3591,7 +3624,7 @@ export const InitMetadataForReplayRequest = { return { tables: Array.isArray(object?.tables) ? object.tables.map((e: any) => Table.fromJSON(e)) : [], compactionGroups: Array.isArray(object?.compactionGroups) - ? object.compactionGroups.map((e: any) => CompactionGroup.fromJSON(e)) + ? object.compactionGroups.map((e: any) => CompactionGroupInfo.fromJSON(e)) : [], }; }, @@ -3604,7 +3637,7 @@ export const InitMetadataForReplayRequest = { obj.tables = []; } if (message.compactionGroups) { - obj.compactionGroups = message.compactionGroups.map((e) => e ? CompactionGroup.toJSON(e) : undefined); + obj.compactionGroups = message.compactionGroups.map((e) => e ? CompactionGroupInfo.toJSON(e) : undefined); } else { obj.compactionGroups = []; } @@ -3614,7 +3647,7 @@ export const InitMetadataForReplayRequest = { fromPartial, I>>(object: I): InitMetadataForReplayRequest { const message = createBaseInitMetadataForReplayRequest(); message.tables = object.tables?.map((e) => Table.fromPartial(e)) || []; - message.compactionGroups = object.compactionGroups?.map((e) => CompactionGroup.fromPartial(e)) || []; + message.compactionGroups = object.compactionGroups?.map((e) => CompactionGroupInfo.fromPartial(e)) || []; return message; }, }; @@ -3835,7 +3868,7 @@ export const RiseCtlListCompactionGroupResponse = { return { status: isSet(object.status) ? Status.fromJSON(object.status) : undefined, compactionGroups: Array.isArray(object?.compactionGroups) - ? object.compactionGroups.map((e: any) => CompactionGroup.fromJSON(e)) + ? object.compactionGroups.map((e: any) => CompactionGroupInfo.fromJSON(e)) : [], }; }, @@ -3844,7 +3877,7 @@ export const RiseCtlListCompactionGroupResponse = { const obj: any = {}; message.status !== undefined && (obj.status = message.status ? Status.toJSON(message.status) : undefined); if (message.compactionGroups) { - obj.compactionGroups = message.compactionGroups.map((e) => e ? CompactionGroup.toJSON(e) : undefined); + obj.compactionGroups = message.compactionGroups.map((e) => e ? CompactionGroupInfo.toJSON(e) : undefined); } else { obj.compactionGroups = []; } @@ -3858,7 +3891,7 @@ export const RiseCtlListCompactionGroupResponse = { message.status = (object.status !== undefined && object.status !== null) ? Status.fromPartial(object.status) : undefined; - message.compactionGroups = object.compactionGroups?.map((e) => CompactionGroup.fromPartial(e)) || []; + message.compactionGroups = object.compactionGroups?.map((e) => CompactionGroupInfo.fromPartial(e)) || []; return message; }, }; @@ -4123,6 +4156,53 @@ export const SetCompactorRuntimeConfigResponse = { }, }; +function createBasePinVersionRequest(): PinVersionRequest { + return { contextId: 0 }; +} + +export const PinVersionRequest = { + fromJSON(object: any): PinVersionRequest { + return { contextId: isSet(object.contextId) ? Number(object.contextId) : 0 }; + }, + + toJSON(message: PinVersionRequest): unknown { + const obj: any = {}; + message.contextId !== undefined && (obj.contextId = Math.round(message.contextId)); + return obj; + }, + + fromPartial, I>>(object: I): PinVersionRequest { + const message = createBasePinVersionRequest(); + message.contextId = object.contextId ?? 0; + return message; + }, +}; + +function createBasePinVersionResponse(): PinVersionResponse { + return { pinnedVersion: undefined }; +} + +export const PinVersionResponse = { + fromJSON(object: any): PinVersionResponse { + return { pinnedVersion: isSet(object.pinnedVersion) ? HummockVersion.fromJSON(object.pinnedVersion) : undefined }; + }, + + toJSON(message: PinVersionResponse): unknown { + const obj: any = {}; + message.pinnedVersion !== undefined && + (obj.pinnedVersion = message.pinnedVersion ? HummockVersion.toJSON(message.pinnedVersion) : undefined); + return obj; + }, + + fromPartial, I>>(object: I): PinVersionResponse { + const message = createBasePinVersionResponse(); + message.pinnedVersion = (object.pinnedVersion !== undefined && object.pinnedVersion !== null) + ? HummockVersion.fromPartial(object.pinnedVersion) + : undefined; + return message; + }, +}; + function createBaseCompactionConfig(): CompactionConfig { return { maxBytesForLevelBase: 0, @@ -4136,6 +4216,7 @@ function createBaseCompactionConfig(): CompactionConfig { targetFileSizeBase: 0, compactionFilterMask: 0, maxSubCompaction: 0, + maxSpaceReclaimBytes: 0, }; } @@ -4163,6 +4244,7 @@ export const CompactionConfig = { targetFileSizeBase: isSet(object.targetFileSizeBase) ? Number(object.targetFileSizeBase) : 0, compactionFilterMask: isSet(object.compactionFilterMask) ? Number(object.compactionFilterMask) : 0, maxSubCompaction: isSet(object.maxSubCompaction) ? Number(object.maxSubCompaction) : 0, + maxSpaceReclaimBytes: isSet(object.maxSpaceReclaimBytes) ? Number(object.maxSpaceReclaimBytes) : 0, }; }, @@ -4187,6 +4269,7 @@ export const CompactionConfig = { message.targetFileSizeBase !== undefined && (obj.targetFileSizeBase = Math.round(message.targetFileSizeBase)); message.compactionFilterMask !== undefined && (obj.compactionFilterMask = Math.round(message.compactionFilterMask)); message.maxSubCompaction !== undefined && (obj.maxSubCompaction = Math.round(message.maxSubCompaction)); + message.maxSpaceReclaimBytes !== undefined && (obj.maxSpaceReclaimBytes = Math.round(message.maxSpaceReclaimBytes)); return obj; }, @@ -4203,6 +4286,7 @@ export const CompactionConfig = { message.targetFileSizeBase = object.targetFileSizeBase ?? 0; message.compactionFilterMask = object.compactionFilterMask ?? 0; message.maxSubCompaction = object.maxSubCompaction ?? 0; + message.maxSpaceReclaimBytes = object.maxSpaceReclaimBytes ?? 0; return message; }, }; diff --git a/dashboard/proto/gen/java_binding.ts b/dashboard/proto/gen/java_binding.ts new file mode 100644 index 0000000000000..ed7c72438c87b --- /dev/null +++ b/dashboard/proto/gen/java_binding.ts @@ -0,0 +1,228 @@ +/* eslint-disable */ +import { Table } from "./catalog"; +import { HummockVersion } from "./hummock"; + +export const protobufPackage = "java_binding"; + +/** When `left` or `right` is none, it represents unbounded. */ +export interface KeyRange { + left: Uint8Array; + right: Uint8Array; + leftBound: KeyRange_Bound; + rightBound: KeyRange_Bound; +} + +export const KeyRange_Bound = { + UNSPECIFIED: "UNSPECIFIED", + UNBOUNDED: "UNBOUNDED", + INCLUDED: "INCLUDED", + EXCLUDED: "EXCLUDED", + UNRECOGNIZED: "UNRECOGNIZED", +} as const; + +export type KeyRange_Bound = typeof KeyRange_Bound[keyof typeof KeyRange_Bound]; + +export function keyRange_BoundFromJSON(object: any): KeyRange_Bound { + switch (object) { + case 0: + case "UNSPECIFIED": + return KeyRange_Bound.UNSPECIFIED; + case 1: + case "UNBOUNDED": + return KeyRange_Bound.UNBOUNDED; + case 2: + case "INCLUDED": + return KeyRange_Bound.INCLUDED; + case 3: + case "EXCLUDED": + return KeyRange_Bound.EXCLUDED; + case -1: + case "UNRECOGNIZED": + default: + return KeyRange_Bound.UNRECOGNIZED; + } +} + +export function keyRange_BoundToJSON(object: KeyRange_Bound): string { + switch (object) { + case KeyRange_Bound.UNSPECIFIED: + return "UNSPECIFIED"; + case KeyRange_Bound.UNBOUNDED: + return "UNBOUNDED"; + case KeyRange_Bound.INCLUDED: + return "INCLUDED"; + case KeyRange_Bound.EXCLUDED: + return "EXCLUDED"; + case KeyRange_Bound.UNRECOGNIZED: + default: + return "UNRECOGNIZED"; + } +} + +export interface ReadPlan { + objectStoreUrl: string; + dataDir: string; + keyRange: KeyRange | undefined; + tableId: number; + epoch: number; + version: HummockVersion | undefined; + tableCatalog: Table | undefined; +} + +function createBaseKeyRange(): KeyRange { + return { + left: new Uint8Array(), + right: new Uint8Array(), + leftBound: KeyRange_Bound.UNSPECIFIED, + rightBound: KeyRange_Bound.UNSPECIFIED, + }; +} + +export const KeyRange = { + fromJSON(object: any): KeyRange { + return { + left: isSet(object.left) ? bytesFromBase64(object.left) : new Uint8Array(), + right: isSet(object.right) ? bytesFromBase64(object.right) : new Uint8Array(), + leftBound: isSet(object.leftBound) ? keyRange_BoundFromJSON(object.leftBound) : KeyRange_Bound.UNSPECIFIED, + rightBound: isSet(object.rightBound) ? keyRange_BoundFromJSON(object.rightBound) : KeyRange_Bound.UNSPECIFIED, + }; + }, + + toJSON(message: KeyRange): unknown { + const obj: any = {}; + message.left !== undefined && + (obj.left = base64FromBytes(message.left !== undefined ? message.left : new Uint8Array())); + message.right !== undefined && + (obj.right = base64FromBytes(message.right !== undefined ? message.right : new Uint8Array())); + message.leftBound !== undefined && (obj.leftBound = keyRange_BoundToJSON(message.leftBound)); + message.rightBound !== undefined && (obj.rightBound = keyRange_BoundToJSON(message.rightBound)); + return obj; + }, + + fromPartial, I>>(object: I): KeyRange { + const message = createBaseKeyRange(); + message.left = object.left ?? new Uint8Array(); + message.right = object.right ?? new Uint8Array(); + message.leftBound = object.leftBound ?? KeyRange_Bound.UNSPECIFIED; + message.rightBound = object.rightBound ?? KeyRange_Bound.UNSPECIFIED; + return message; + }, +}; + +function createBaseReadPlan(): ReadPlan { + return { + objectStoreUrl: "", + dataDir: "", + keyRange: undefined, + tableId: 0, + epoch: 0, + version: undefined, + tableCatalog: undefined, + }; +} + +export const ReadPlan = { + fromJSON(object: any): ReadPlan { + return { + objectStoreUrl: isSet(object.objectStoreUrl) ? String(object.objectStoreUrl) : "", + dataDir: isSet(object.dataDir) ? String(object.dataDir) : "", + keyRange: isSet(object.keyRange) ? KeyRange.fromJSON(object.keyRange) : undefined, + tableId: isSet(object.tableId) ? Number(object.tableId) : 0, + epoch: isSet(object.epoch) ? Number(object.epoch) : 0, + version: isSet(object.version) ? HummockVersion.fromJSON(object.version) : undefined, + tableCatalog: isSet(object.tableCatalog) ? Table.fromJSON(object.tableCatalog) : undefined, + }; + }, + + toJSON(message: ReadPlan): unknown { + const obj: any = {}; + message.objectStoreUrl !== undefined && (obj.objectStoreUrl = message.objectStoreUrl); + message.dataDir !== undefined && (obj.dataDir = message.dataDir); + message.keyRange !== undefined && (obj.keyRange = message.keyRange ? KeyRange.toJSON(message.keyRange) : undefined); + message.tableId !== undefined && (obj.tableId = Math.round(message.tableId)); + message.epoch !== undefined && (obj.epoch = Math.round(message.epoch)); + message.version !== undefined && + (obj.version = message.version ? HummockVersion.toJSON(message.version) : undefined); + message.tableCatalog !== undefined && + (obj.tableCatalog = message.tableCatalog ? Table.toJSON(message.tableCatalog) : undefined); + return obj; + }, + + fromPartial, I>>(object: I): ReadPlan { + const message = createBaseReadPlan(); + message.objectStoreUrl = object.objectStoreUrl ?? ""; + message.dataDir = object.dataDir ?? ""; + message.keyRange = (object.keyRange !== undefined && object.keyRange !== null) + ? KeyRange.fromPartial(object.keyRange) + : undefined; + message.tableId = object.tableId ?? 0; + message.epoch = object.epoch ?? 0; + message.version = (object.version !== undefined && object.version !== null) + ? HummockVersion.fromPartial(object.version) + : undefined; + message.tableCatalog = (object.tableCatalog !== undefined && object.tableCatalog !== null) + ? Table.fromPartial(object.tableCatalog) + : undefined; + return message; + }, +}; + +declare var self: any | undefined; +declare var window: any | undefined; +declare var global: any | undefined; +var globalThis: any = (() => { + if (typeof globalThis !== "undefined") { + return globalThis; + } + if (typeof self !== "undefined") { + return self; + } + if (typeof window !== "undefined") { + return window; + } + if (typeof global !== "undefined") { + return global; + } + throw "Unable to locate global object"; +})(); + +function bytesFromBase64(b64: string): Uint8Array { + if (globalThis.Buffer) { + return Uint8Array.from(globalThis.Buffer.from(b64, "base64")); + } else { + const bin = globalThis.atob(b64); + const arr = new Uint8Array(bin.length); + for (let i = 0; i < bin.length; ++i) { + arr[i] = bin.charCodeAt(i); + } + return arr; + } +} + +function base64FromBytes(arr: Uint8Array): string { + if (globalThis.Buffer) { + return globalThis.Buffer.from(arr).toString("base64"); + } else { + const bin: string[] = []; + arr.forEach((byte) => { + bin.push(String.fromCharCode(byte)); + }); + return globalThis.btoa(bin.join("")); + } +} + +type Builtin = Date | Function | Uint8Array | string | number | boolean | undefined; + +export type DeepPartial = T extends Builtin ? T + : T extends Array ? Array> : T extends ReadonlyArray ? ReadonlyArray> + : T extends { $case: string } ? { [K in keyof Omit]?: DeepPartial } & { $case: T["$case"] } + : T extends {} ? { [K in keyof T]?: DeepPartial } + : Partial; + +type KeysOfUnion = T extends T ? keyof T : never; +export type Exact = P extends Builtin ? P + : P & { [K in keyof P]: Exact } & { [K in Exclude>]: never }; + +function isSet(value: any): boolean { + return value !== null && value !== undefined; +} diff --git a/dashboard/proto/gen/leader.ts b/dashboard/proto/gen/leader.ts deleted file mode 100644 index 850f2a49943ca..0000000000000 --- a/dashboard/proto/gen/leader.ts +++ /dev/null @@ -1,160 +0,0 @@ -/* eslint-disable */ -import { HostAddress } from "./common"; - -export const protobufPackage = "leader"; - -export interface LeaderRequest { -} - -export interface LeaderResponse { - leaderAddr: HostAddress | undefined; -} - -export interface MembersRequest { -} - -export interface Member { - memberAddr: HostAddress | undefined; - leaseId: number; -} - -export interface MembersResponse { - members: Member[]; -} - -function createBaseLeaderRequest(): LeaderRequest { - return {}; -} - -export const LeaderRequest = { - fromJSON(_: any): LeaderRequest { - return {}; - }, - - toJSON(_: LeaderRequest): unknown { - const obj: any = {}; - return obj; - }, - - fromPartial, I>>(_: I): LeaderRequest { - const message = createBaseLeaderRequest(); - return message; - }, -}; - -function createBaseLeaderResponse(): LeaderResponse { - return { leaderAddr: undefined }; -} - -export const LeaderResponse = { - fromJSON(object: any): LeaderResponse { - return { leaderAddr: isSet(object.leaderAddr) ? HostAddress.fromJSON(object.leaderAddr) : undefined }; - }, - - toJSON(message: LeaderResponse): unknown { - const obj: any = {}; - message.leaderAddr !== undefined && - (obj.leaderAddr = message.leaderAddr ? HostAddress.toJSON(message.leaderAddr) : undefined); - return obj; - }, - - fromPartial, I>>(object: I): LeaderResponse { - const message = createBaseLeaderResponse(); - message.leaderAddr = (object.leaderAddr !== undefined && object.leaderAddr !== null) - ? HostAddress.fromPartial(object.leaderAddr) - : undefined; - return message; - }, -}; - -function createBaseMembersRequest(): MembersRequest { - return {}; -} - -export const MembersRequest = { - fromJSON(_: any): MembersRequest { - return {}; - }, - - toJSON(_: MembersRequest): unknown { - const obj: any = {}; - return obj; - }, - - fromPartial, I>>(_: I): MembersRequest { - const message = createBaseMembersRequest(); - return message; - }, -}; - -function createBaseMember(): Member { - return { memberAddr: undefined, leaseId: 0 }; -} - -export const Member = { - fromJSON(object: any): Member { - return { - memberAddr: isSet(object.memberAddr) ? HostAddress.fromJSON(object.memberAddr) : undefined, - leaseId: isSet(object.leaseId) ? Number(object.leaseId) : 0, - }; - }, - - toJSON(message: Member): unknown { - const obj: any = {}; - message.memberAddr !== undefined && - (obj.memberAddr = message.memberAddr ? HostAddress.toJSON(message.memberAddr) : undefined); - message.leaseId !== undefined && (obj.leaseId = Math.round(message.leaseId)); - return obj; - }, - - fromPartial, I>>(object: I): Member { - const message = createBaseMember(); - message.memberAddr = (object.memberAddr !== undefined && object.memberAddr !== null) - ? HostAddress.fromPartial(object.memberAddr) - : undefined; - message.leaseId = object.leaseId ?? 0; - return message; - }, -}; - -function createBaseMembersResponse(): MembersResponse { - return { members: [] }; -} - -export const MembersResponse = { - fromJSON(object: any): MembersResponse { - return { members: Array.isArray(object?.members) ? object.members.map((e: any) => Member.fromJSON(e)) : [] }; - }, - - toJSON(message: MembersResponse): unknown { - const obj: any = {}; - if (message.members) { - obj.members = message.members.map((e) => e ? Member.toJSON(e) : undefined); - } else { - obj.members = []; - } - return obj; - }, - - fromPartial, I>>(object: I): MembersResponse { - const message = createBaseMembersResponse(); - message.members = object.members?.map((e) => Member.fromPartial(e)) || []; - return message; - }, -}; - -type Builtin = Date | Function | Uint8Array | string | number | boolean | undefined; - -export type DeepPartial = T extends Builtin ? T - : T extends Array ? Array> : T extends ReadonlyArray ? ReadonlyArray> - : T extends { $case: string } ? { [K in keyof Omit]?: DeepPartial } & { $case: T["$case"] } - : T extends {} ? { [K in keyof T]?: DeepPartial } - : Partial; - -type KeysOfUnion = T extends T ? keyof T : never; -export type Exact = P extends Builtin ? P - : P & { [K in keyof P]: Exact } & { [K in Exclude>]: never }; - -function isSet(value: any): boolean { - return value !== null && value !== undefined; -} diff --git a/dashboard/proto/gen/meta.ts b/dashboard/proto/gen/meta.ts index ede4e6b2b8e6f..a6db5e2353f4f 100644 --- a/dashboard/proto/gen/meta.ts +++ b/dashboard/proto/gen/meta.ts @@ -279,6 +279,12 @@ export interface TableFragments_ActorSplitsEntry { value: ConnectorSplits | undefined; } +/** / Parallel unit mapping with fragment id, used for notification. */ +export interface FragmentParallelUnitMapping { + fragmentId: number; + mapping: ParallelUnitMapping | undefined; +} + /** TODO: remove this when dashboard refactored. */ export interface ActorLocation { node: WorkerNode | undefined; @@ -294,6 +300,20 @@ export interface FlushResponse { snapshot: HummockSnapshot | undefined; } +export interface CreatingJobInfo { + databaseId: number; + schemaId: number; + name: string; +} + +export interface CancelCreatingJobsRequest { + infos: CreatingJobInfo[]; +} + +export interface CancelCreatingJobsResponse { + status: Status | undefined; +} + export interface ListTableFragmentsRequest { tableIds: number[]; } @@ -378,7 +398,7 @@ export interface MetaSnapshot { views: View[]; functions: Function[]; users: UserInfo[]; - parallelUnitMappings: ParallelUnitMapping[]; + parallelUnitMappings: FragmentParallelUnitMapping[]; nodes: WorkerNode[]; hummockSnapshot: HummockSnapshot | undefined; hummockVersion: HummockVersion | undefined; @@ -406,7 +426,7 @@ export interface SubscribeResponse { | { $case: "view"; view: View } | { $case: "function"; function: Function } | { $case: "user"; user: UserInfo } - | { $case: "parallelUnitMapping"; parallelUnitMapping: ParallelUnitMapping } + | { $case: "parallelUnitMapping"; parallelUnitMapping: FragmentParallelUnitMapping } | { $case: "node"; node: WorkerNode } | { $case: "hummockSnapshot"; hummockSnapshot: HummockSnapshot } | { $case: "hummockVersionDeltas"; hummockVersionDeltas: HummockVersionDeltas } @@ -467,17 +487,6 @@ export function subscribeResponse_OperationToJSON(object: SubscribeResponse_Oper } } -export interface MetaLeaderInfo { - nodeAddress: string; - leaseId: number; -} - -export interface MetaLeaseInfo { - leader: MetaLeaderInfo | undefined; - leaseRegisterTime: number; - leaseExpireTime: number; -} - export interface PauseRequest { } @@ -529,6 +538,52 @@ export interface RescheduleResponse { success: boolean; } +export interface MembersRequest { +} + +export interface MetaMember { + address: HostAddress | undefined; + isLeader: boolean; +} + +export interface MembersResponse { + members: MetaMember[]; +} + +/** + * The schema for persisted system parameters. + * Note on backward compatibility: + * - Do not remove deprecated fields. + * - To rename, change the type or semantic of a field, introduce a new field postfixed by the version. + */ +export interface SystemParams { + barrierIntervalMs?: number | undefined; + checkpointFrequency?: number | undefined; + sstableSizeMb?: number | undefined; + blockSizeKb?: number | undefined; + bloomFalsePositive?: number | undefined; + stateStore?: string | undefined; + dataDirectory?: string | undefined; + backupStorageUrl?: string | undefined; + backupStorageDirectory?: string | undefined; +} + +export interface GetSystemParamsRequest { +} + +export interface GetSystemParamsResponse { + params: SystemParams | undefined; +} + +export interface SetSystemParamRequest { + param: string; + /** None means set to default value. */ + value?: string | undefined; +} + +export interface SetSystemParamResponse { +} + function createBaseHeartbeatRequest(): HeartbeatRequest { return { nodeId: 0, info: [] }; } @@ -918,6 +973,36 @@ export const TableFragments_ActorSplitsEntry = { }, }; +function createBaseFragmentParallelUnitMapping(): FragmentParallelUnitMapping { + return { fragmentId: 0, mapping: undefined }; +} + +export const FragmentParallelUnitMapping = { + fromJSON(object: any): FragmentParallelUnitMapping { + return { + fragmentId: isSet(object.fragmentId) ? Number(object.fragmentId) : 0, + mapping: isSet(object.mapping) ? ParallelUnitMapping.fromJSON(object.mapping) : undefined, + }; + }, + + toJSON(message: FragmentParallelUnitMapping): unknown { + const obj: any = {}; + message.fragmentId !== undefined && (obj.fragmentId = Math.round(message.fragmentId)); + message.mapping !== undefined && + (obj.mapping = message.mapping ? ParallelUnitMapping.toJSON(message.mapping) : undefined); + return obj; + }, + + fromPartial, I>>(object: I): FragmentParallelUnitMapping { + const message = createBaseFragmentParallelUnitMapping(); + message.fragmentId = object.fragmentId ?? 0; + message.mapping = (object.mapping !== undefined && object.mapping !== null) + ? ParallelUnitMapping.fromPartial(object.mapping) + : undefined; + return message; + }, +}; + function createBaseActorLocation(): ActorLocation { return { node: undefined, actors: [] }; } @@ -1005,6 +1090,86 @@ export const FlushResponse = { }, }; +function createBaseCreatingJobInfo(): CreatingJobInfo { + return { databaseId: 0, schemaId: 0, name: "" }; +} + +export const CreatingJobInfo = { + fromJSON(object: any): CreatingJobInfo { + return { + databaseId: isSet(object.databaseId) ? Number(object.databaseId) : 0, + schemaId: isSet(object.schemaId) ? Number(object.schemaId) : 0, + name: isSet(object.name) ? String(object.name) : "", + }; + }, + + toJSON(message: CreatingJobInfo): unknown { + const obj: any = {}; + message.databaseId !== undefined && (obj.databaseId = Math.round(message.databaseId)); + message.schemaId !== undefined && (obj.schemaId = Math.round(message.schemaId)); + message.name !== undefined && (obj.name = message.name); + return obj; + }, + + fromPartial, I>>(object: I): CreatingJobInfo { + const message = createBaseCreatingJobInfo(); + message.databaseId = object.databaseId ?? 0; + message.schemaId = object.schemaId ?? 0; + message.name = object.name ?? ""; + return message; + }, +}; + +function createBaseCancelCreatingJobsRequest(): CancelCreatingJobsRequest { + return { infos: [] }; +} + +export const CancelCreatingJobsRequest = { + fromJSON(object: any): CancelCreatingJobsRequest { + return { infos: Array.isArray(object?.infos) ? object.infos.map((e: any) => CreatingJobInfo.fromJSON(e)) : [] }; + }, + + toJSON(message: CancelCreatingJobsRequest): unknown { + const obj: any = {}; + if (message.infos) { + obj.infos = message.infos.map((e) => e ? CreatingJobInfo.toJSON(e) : undefined); + } else { + obj.infos = []; + } + return obj; + }, + + fromPartial, I>>(object: I): CancelCreatingJobsRequest { + const message = createBaseCancelCreatingJobsRequest(); + message.infos = object.infos?.map((e) => CreatingJobInfo.fromPartial(e)) || []; + return message; + }, +}; + +function createBaseCancelCreatingJobsResponse(): CancelCreatingJobsResponse { + return { status: undefined }; +} + +export const CancelCreatingJobsResponse = { + fromJSON(object: any): CancelCreatingJobsResponse { + return { status: isSet(object.status) ? Status.fromJSON(object.status) : undefined }; + }, + + toJSON(message: CancelCreatingJobsResponse): unknown { + const obj: any = {}; + message.status !== undefined && (obj.status = message.status ? Status.toJSON(message.status) : undefined); + return obj; + }, + + fromPartial, I>>(object: I): CancelCreatingJobsResponse { + const message = createBaseCancelCreatingJobsResponse(); + message.status = (object.status !== undefined && object.status !== null) + ? Status.fromPartial(object.status) + : undefined; + return message; + }, +}; + function createBaseListTableFragmentsRequest(): ListTableFragmentsRequest { return { tableIds: [] }; } @@ -1504,7 +1669,7 @@ export const MetaSnapshot = { functions: Array.isArray(object?.functions) ? object.functions.map((e: any) => Function.fromJSON(e)) : [], users: Array.isArray(object?.users) ? object.users.map((e: any) => UserInfo.fromJSON(e)) : [], parallelUnitMappings: Array.isArray(object?.parallelUnitMappings) - ? object.parallelUnitMappings.map((e: any) => ParallelUnitMapping.fromJSON(e)) + ? object.parallelUnitMappings.map((e: any) => FragmentParallelUnitMapping.fromJSON(e)) : [], nodes: Array.isArray(object?.nodes) ? object.nodes.map((e: any) => WorkerNode.fromJSON(e)) @@ -1566,7 +1731,9 @@ export const MetaSnapshot = { obj.users = []; } if (message.parallelUnitMappings) { - obj.parallelUnitMappings = message.parallelUnitMappings.map((e) => e ? ParallelUnitMapping.toJSON(e) : undefined); + obj.parallelUnitMappings = message.parallelUnitMappings.map((e) => + e ? FragmentParallelUnitMapping.toJSON(e) : undefined + ); } else { obj.parallelUnitMappings = []; } @@ -1598,7 +1765,8 @@ export const MetaSnapshot = { message.views = object.views?.map((e) => View.fromPartial(e)) || []; message.functions = object.functions?.map((e) => Function.fromPartial(e)) || []; message.users = object.users?.map((e) => UserInfo.fromPartial(e)) || []; - message.parallelUnitMappings = object.parallelUnitMappings?.map((e) => ParallelUnitMapping.fromPartial(e)) || []; + message.parallelUnitMappings = + object.parallelUnitMappings?.map((e) => FragmentParallelUnitMapping.fromPartial(e)) || []; message.nodes = object.nodes?.map((e) => WorkerNode.fromPartial(e)) || []; message.hummockSnapshot = (object.hummockSnapshot !== undefined && object.hummockSnapshot !== null) ? HummockSnapshot.fromPartial(object.hummockSnapshot) @@ -1682,7 +1850,7 @@ export const SubscribeResponse = { : isSet(object.parallelUnitMapping) ? { $case: "parallelUnitMapping", - parallelUnitMapping: ParallelUnitMapping.fromJSON(object.parallelUnitMapping), + parallelUnitMapping: FragmentParallelUnitMapping.fromJSON(object.parallelUnitMapping), } : isSet(object.node) ? { $case: "node", node: WorkerNode.fromJSON(object.node) } @@ -1725,7 +1893,7 @@ export const SubscribeResponse = { (obj.function = message.info?.function ? Function.toJSON(message.info?.function) : undefined); message.info?.$case === "user" && (obj.user = message.info?.user ? UserInfo.toJSON(message.info?.user) : undefined); message.info?.$case === "parallelUnitMapping" && (obj.parallelUnitMapping = message.info?.parallelUnitMapping - ? ParallelUnitMapping.toJSON(message.info?.parallelUnitMapping) + ? FragmentParallelUnitMapping.toJSON(message.info?.parallelUnitMapping) : undefined); message.info?.$case === "node" && (obj.node = message.info?.node ? WorkerNode.toJSON(message.info?.node) : undefined); @@ -1784,7 +1952,7 @@ export const SubscribeResponse = { ) { message.info = { $case: "parallelUnitMapping", - parallelUnitMapping: ParallelUnitMapping.fromPartial(object.info.parallelUnitMapping), + parallelUnitMapping: FragmentParallelUnitMapping.fromPartial(object.info.parallelUnitMapping), }; } if (object.info?.$case === "node" && object.info?.node !== undefined && object.info?.node !== null) { @@ -1827,65 +1995,6 @@ export const SubscribeResponse = { }, }; -function createBaseMetaLeaderInfo(): MetaLeaderInfo { - return { nodeAddress: "", leaseId: 0 }; -} - -export const MetaLeaderInfo = { - fromJSON(object: any): MetaLeaderInfo { - return { - nodeAddress: isSet(object.nodeAddress) ? String(object.nodeAddress) : "", - leaseId: isSet(object.leaseId) ? Number(object.leaseId) : 0, - }; - }, - - toJSON(message: MetaLeaderInfo): unknown { - const obj: any = {}; - message.nodeAddress !== undefined && (obj.nodeAddress = message.nodeAddress); - message.leaseId !== undefined && (obj.leaseId = Math.round(message.leaseId)); - return obj; - }, - - fromPartial, I>>(object: I): MetaLeaderInfo { - const message = createBaseMetaLeaderInfo(); - message.nodeAddress = object.nodeAddress ?? ""; - message.leaseId = object.leaseId ?? 0; - return message; - }, -}; - -function createBaseMetaLeaseInfo(): MetaLeaseInfo { - return { leader: undefined, leaseRegisterTime: 0, leaseExpireTime: 0 }; -} - -export const MetaLeaseInfo = { - fromJSON(object: any): MetaLeaseInfo { - return { - leader: isSet(object.leader) ? MetaLeaderInfo.fromJSON(object.leader) : undefined, - leaseRegisterTime: isSet(object.leaseRegisterTime) ? Number(object.leaseRegisterTime) : 0, - leaseExpireTime: isSet(object.leaseExpireTime) ? Number(object.leaseExpireTime) : 0, - }; - }, - - toJSON(message: MetaLeaseInfo): unknown { - const obj: any = {}; - message.leader !== undefined && (obj.leader = message.leader ? MetaLeaderInfo.toJSON(message.leader) : undefined); - message.leaseRegisterTime !== undefined && (obj.leaseRegisterTime = Math.round(message.leaseRegisterTime)); - message.leaseExpireTime !== undefined && (obj.leaseExpireTime = Math.round(message.leaseExpireTime)); - return obj; - }, - - fromPartial, I>>(object: I): MetaLeaseInfo { - const message = createBaseMetaLeaseInfo(); - message.leader = (object.leader !== undefined && object.leader !== null) - ? MetaLeaderInfo.fromPartial(object.leader) - : undefined; - message.leaseRegisterTime = object.leaseRegisterTime ?? 0; - message.leaseExpireTime = object.leaseExpireTime ?? 0; - return message; - }, -}; - function createBasePauseRequest(): PauseRequest { return {}; } @@ -2264,6 +2373,230 @@ export const RescheduleResponse = { }, }; +function createBaseMembersRequest(): MembersRequest { + return {}; +} + +export const MembersRequest = { + fromJSON(_: any): MembersRequest { + return {}; + }, + + toJSON(_: MembersRequest): unknown { + const obj: any = {}; + return obj; + }, + + fromPartial, I>>(_: I): MembersRequest { + const message = createBaseMembersRequest(); + return message; + }, +}; + +function createBaseMetaMember(): MetaMember { + return { address: undefined, isLeader: false }; +} + +export const MetaMember = { + fromJSON(object: any): MetaMember { + return { + address: isSet(object.address) ? HostAddress.fromJSON(object.address) : undefined, + isLeader: isSet(object.isLeader) ? Boolean(object.isLeader) : false, + }; + }, + + toJSON(message: MetaMember): unknown { + const obj: any = {}; + message.address !== undefined && (obj.address = message.address ? HostAddress.toJSON(message.address) : undefined); + message.isLeader !== undefined && (obj.isLeader = message.isLeader); + return obj; + }, + + fromPartial, I>>(object: I): MetaMember { + const message = createBaseMetaMember(); + message.address = (object.address !== undefined && object.address !== null) + ? HostAddress.fromPartial(object.address) + : undefined; + message.isLeader = object.isLeader ?? false; + return message; + }, +}; + +function createBaseMembersResponse(): MembersResponse { + return { members: [] }; +} + +export const MembersResponse = { + fromJSON(object: any): MembersResponse { + return { members: Array.isArray(object?.members) ? object.members.map((e: any) => MetaMember.fromJSON(e)) : [] }; + }, + + toJSON(message: MembersResponse): unknown { + const obj: any = {}; + if (message.members) { + obj.members = message.members.map((e) => e ? MetaMember.toJSON(e) : undefined); + } else { + obj.members = []; + } + return obj; + }, + + fromPartial, I>>(object: I): MembersResponse { + const message = createBaseMembersResponse(); + message.members = object.members?.map((e) => MetaMember.fromPartial(e)) || []; + return message; + }, +}; + +function createBaseSystemParams(): SystemParams { + return { + barrierIntervalMs: undefined, + checkpointFrequency: undefined, + sstableSizeMb: undefined, + blockSizeKb: undefined, + bloomFalsePositive: undefined, + stateStore: undefined, + dataDirectory: undefined, + backupStorageUrl: undefined, + backupStorageDirectory: undefined, + }; +} + +export const SystemParams = { + fromJSON(object: any): SystemParams { + return { + barrierIntervalMs: isSet(object.barrierIntervalMs) ? Number(object.barrierIntervalMs) : undefined, + checkpointFrequency: isSet(object.checkpointFrequency) ? Number(object.checkpointFrequency) : undefined, + sstableSizeMb: isSet(object.sstableSizeMb) ? Number(object.sstableSizeMb) : undefined, + blockSizeKb: isSet(object.blockSizeKb) ? Number(object.blockSizeKb) : undefined, + bloomFalsePositive: isSet(object.bloomFalsePositive) ? Number(object.bloomFalsePositive) : undefined, + stateStore: isSet(object.stateStore) ? String(object.stateStore) : undefined, + dataDirectory: isSet(object.dataDirectory) ? String(object.dataDirectory) : undefined, + backupStorageUrl: isSet(object.backupStorageUrl) ? String(object.backupStorageUrl) : undefined, + backupStorageDirectory: isSet(object.backupStorageDirectory) ? String(object.backupStorageDirectory) : undefined, + }; + }, + + toJSON(message: SystemParams): unknown { + const obj: any = {}; + message.barrierIntervalMs !== undefined && (obj.barrierIntervalMs = Math.round(message.barrierIntervalMs)); + message.checkpointFrequency !== undefined && (obj.checkpointFrequency = Math.round(message.checkpointFrequency)); + message.sstableSizeMb !== undefined && (obj.sstableSizeMb = Math.round(message.sstableSizeMb)); + message.blockSizeKb !== undefined && (obj.blockSizeKb = Math.round(message.blockSizeKb)); + message.bloomFalsePositive !== undefined && (obj.bloomFalsePositive = message.bloomFalsePositive); + message.stateStore !== undefined && (obj.stateStore = message.stateStore); + message.dataDirectory !== undefined && (obj.dataDirectory = message.dataDirectory); + message.backupStorageUrl !== undefined && (obj.backupStorageUrl = message.backupStorageUrl); + message.backupStorageDirectory !== undefined && (obj.backupStorageDirectory = message.backupStorageDirectory); + return obj; + }, + + fromPartial, I>>(object: I): SystemParams { + const message = createBaseSystemParams(); + message.barrierIntervalMs = object.barrierIntervalMs ?? undefined; + message.checkpointFrequency = object.checkpointFrequency ?? undefined; + message.sstableSizeMb = object.sstableSizeMb ?? undefined; + message.blockSizeKb = object.blockSizeKb ?? undefined; + message.bloomFalsePositive = object.bloomFalsePositive ?? undefined; + message.stateStore = object.stateStore ?? undefined; + message.dataDirectory = object.dataDirectory ?? undefined; + message.backupStorageUrl = object.backupStorageUrl ?? undefined; + message.backupStorageDirectory = object.backupStorageDirectory ?? undefined; + return message; + }, +}; + +function createBaseGetSystemParamsRequest(): GetSystemParamsRequest { + return {}; +} + +export const GetSystemParamsRequest = { + fromJSON(_: any): GetSystemParamsRequest { + return {}; + }, + + toJSON(_: GetSystemParamsRequest): unknown { + const obj: any = {}; + return obj; + }, + + fromPartial, I>>(_: I): GetSystemParamsRequest { + const message = createBaseGetSystemParamsRequest(); + return message; + }, +}; + +function createBaseGetSystemParamsResponse(): GetSystemParamsResponse { + return { params: undefined }; +} + +export const GetSystemParamsResponse = { + fromJSON(object: any): GetSystemParamsResponse { + return { params: isSet(object.params) ? SystemParams.fromJSON(object.params) : undefined }; + }, + + toJSON(message: GetSystemParamsResponse): unknown { + const obj: any = {}; + message.params !== undefined && (obj.params = message.params ? SystemParams.toJSON(message.params) : undefined); + return obj; + }, + + fromPartial, I>>(object: I): GetSystemParamsResponse { + const message = createBaseGetSystemParamsResponse(); + message.params = (object.params !== undefined && object.params !== null) + ? SystemParams.fromPartial(object.params) + : undefined; + return message; + }, +}; + +function createBaseSetSystemParamRequest(): SetSystemParamRequest { + return { param: "", value: undefined }; +} + +export const SetSystemParamRequest = { + fromJSON(object: any): SetSystemParamRequest { + return { + param: isSet(object.param) ? String(object.param) : "", + value: isSet(object.value) ? String(object.value) : undefined, + }; + }, + + toJSON(message: SetSystemParamRequest): unknown { + const obj: any = {}; + message.param !== undefined && (obj.param = message.param); + message.value !== undefined && (obj.value = message.value); + return obj; + }, + + fromPartial, I>>(object: I): SetSystemParamRequest { + const message = createBaseSetSystemParamRequest(); + message.param = object.param ?? ""; + message.value = object.value ?? undefined; + return message; + }, +}; + +function createBaseSetSystemParamResponse(): SetSystemParamResponse { + return {}; +} + +export const SetSystemParamResponse = { + fromJSON(_: any): SetSystemParamResponse { + return {}; + }, + + toJSON(_: SetSystemParamResponse): unknown { + const obj: any = {}; + return obj; + }, + + fromPartial, I>>(_: I): SetSystemParamResponse { + const message = createBaseSetSystemParamResponse(); + return message; + }, +}; + type Builtin = Date | Function | Uint8Array | string | number | boolean | undefined; export type DeepPartial = T extends Builtin ? T diff --git a/dashboard/proto/gen/plan_common.ts b/dashboard/proto/gen/plan_common.ts index b419b8c14d235..bb5e8b7568f69 100644 --- a/dashboard/proto/gen/plan_common.ts +++ b/dashboard/proto/gen/plan_common.ts @@ -134,6 +134,8 @@ export const RowFormatType = { MAXWELL: "MAXWELL", CANAL_JSON: "CANAL_JSON", CSV: "CSV", + NATIVE: "NATIVE", + DEBEZIUM_AVRO: "DEBEZIUM_AVRO", UNRECOGNIZED: "UNRECOGNIZED", } as const; @@ -165,6 +167,12 @@ export function rowFormatTypeFromJSON(object: any): RowFormatType { case 7: case "CSV": return RowFormatType.CSV; + case 8: + case "NATIVE": + return RowFormatType.NATIVE; + case 9: + case "DEBEZIUM_AVRO": + return RowFormatType.DEBEZIUM_AVRO; case -1: case "UNRECOGNIZED": default: @@ -190,6 +198,10 @@ export function rowFormatTypeToJSON(object: RowFormatType): string { return "CANAL_JSON"; case RowFormatType.CSV: return "CSV"; + case RowFormatType.NATIVE: + return "NATIVE"; + case RowFormatType.DEBEZIUM_AVRO: + return "DEBEZIUM_AVRO"; case RowFormatType.UNRECOGNIZED: default: return "UNRECOGNIZED"; diff --git a/dashboard/proto/gen/stream_plan.ts b/dashboard/proto/gen/stream_plan.ts index caf9f97ce9e4a..0afafa37d4bb5 100644 --- a/dashboard/proto/gen/stream_plan.ts +++ b/dashboard/proto/gen/stream_plan.ts @@ -1,5 +1,13 @@ /* eslint-disable */ -import { ColumnIndex, StreamSourceInfo, Table } from "./catalog"; +import { + ColumnIndex, + SinkType, + sinkTypeFromJSON, + sinkTypeToJSON, + StreamSourceInfo, + Table, + WatermarkDesc, +} from "./catalog"; import { Buffer } from "./common"; import { DataType, Datum, Epoch, IntervalUnit, StreamChunk } from "./data"; import { AggCall, ExprNode, InputRefExpr, ProjectSetSelectItem } from "./expr"; @@ -25,6 +33,8 @@ export const ChainType = { REARRANGE: "REARRANGE", /** BACKFILL - BACKFILL is corresponding to the backfill executor. */ BACKFILL: "BACKFILL", + /** UPSTREAM_ONLY - UPSTREAM_ONLY is corresponding to the chain executor, but doesn't consume the snapshot. */ + UPSTREAM_ONLY: "UPSTREAM_ONLY", UNRECOGNIZED: "UNRECOGNIZED", } as const; @@ -44,6 +54,9 @@ export function chainTypeFromJSON(object: any): ChainType { case 3: case "BACKFILL": return ChainType.BACKFILL; + case 4: + case "UPSTREAM_ONLY": + return ChainType.UPSTREAM_ONLY; case -1: case "UNRECOGNIZED": default: @@ -61,6 +74,8 @@ export function chainTypeToJSON(object: ChainType): string { return "REARRANGE"; case ChainType.BACKFILL: return "BACKFILL"; + case ChainType.UPSTREAM_ONLY: + return "UPSTREAM_ONLY"; case ChainType.UNRECOGNIZED: default: return "UNRECOGNIZED"; @@ -254,6 +269,14 @@ export interface UpdateMutation_MergeUpdate { /** Merge executor can be uniquely identified by a combination of actor id and upstream fragment id. */ actorId: number; upstreamFragmentId: number; + /** + * - For scaling, this is always `None`. + * - For plan change, the upstream fragment will be changed to a new one, and this will be `Some`. + * In this case, all the upstream actors should be removed and replaced by the `new` ones. + */ + newUpstreamFragmentId?: + | number + | undefined; /** Added upstream actors. */ addedUpstreamActorId: number[]; /** Removed upstream actors. */ @@ -350,18 +373,27 @@ export interface SourceNode { sourceInner: StreamSource | undefined; } -export interface SinkNode { - tableId: number; +export interface SinkDesc { + id: number; + name: string; + definition: string; + columns: ColumnDesc[]; + pk: ColumnOrder[]; + streamKey: number[]; + distributionKey: number[]; properties: { [key: string]: string }; - fields: Field[]; - sinkPk: number[]; + sinkType: SinkType; } -export interface SinkNode_PropertiesEntry { +export interface SinkDesc_PropertiesEntry { key: string; value: string; } +export interface SinkNode { + sinkDesc: SinkDesc | undefined; +} + export interface ProjectNode { selectList: ExprNode[]; /** @@ -437,6 +469,12 @@ export interface SimpleAggNode { * It is true when the input is append-only */ isAppendOnly: boolean; + distinctDedupTables: { [key: number]: Table }; +} + +export interface SimpleAggNode_DistinctDedupTablesEntry { + key: number; + value: Table | undefined; } export interface HashAggNode { @@ -451,6 +489,12 @@ export interface HashAggNode { * It is true when the input is append-only */ isAppendOnly: boolean; + distinctDedupTables: { [key: number]: Table }; +} + +export interface HashAggNode_DistinctDedupTablesEntry { + key: number; + value: Table | undefined; } export interface TopNNode { @@ -497,6 +541,20 @@ export interface HashJoinNode { | undefined; /** The output indices of current node */ outputIndices: number[]; + /** + * Left deduped input pk indices. The pk of the left_table and + * left_degree_table is [left_join_key | left_deduped_input_pk_indices] + * and is expected to be the shortest key which starts with + * the join key and satisfies unique constrain. + */ + leftDedupedInputPkIndices: number[]; + /** + * Right deduped input pk indices. The pk of the right_table and + * right_degree_table is [right_join_key | right_deduped_input_pk_indices] + * and is expected to be the shortest key which starts with + * the join key and satisfies unique constrain. + */ + rightDedupedInputPkIndices: number[]; nullSafe: boolean[]; /** * Whether to optimize for append only stream. @@ -592,8 +650,6 @@ export interface ChainNode { * ChainType is used to decide which implementation for the ChainNode. */ chainType: ChainType; - /** Whether to place this chain on the same worker node as upstream actors. */ - sameWorkerNode: boolean; /** * Whether the upstream materialize is and this chain should be a singleton. * FIXME: This is a workaround for fragmenter since the distribution info will be lost if there's only one @@ -622,6 +678,8 @@ export interface ArrangementInfo { arrangeKeyOrders: ColumnOrder[]; /** Column descs of the arrangement */ columnDescs: ColumnDesc[]; + /** Used to build storage table by stream lookup join of delta join. */ + tableDesc: StorageTableDesc | undefined; } /** @@ -661,23 +719,15 @@ export interface LookupNode { | { $case: "tableId"; tableId: number } | { $case: "indexId"; indexId: number }; /** Info about the arrangement */ - arrangementTableInfo: - | ArrangementInfo - | undefined; - /** Internal table of arrangement. */ - arrangementTable: Table | undefined; + arrangementTableInfo: ArrangementInfo | undefined; } /** WatermarkFilter needs to filter the upstream data by the water mark. */ export interface WatermarkFilterNode { - /** The expression to calculate the watermark value. */ - watermarkExpr: - | ExprNode - | undefined; - /** The column the event time belongs. */ - eventTimeColIdx: number; - /** The table used to persist watermark, the key is vnode. */ - table: Table | undefined; + /** The watermark descs */ + watermarkDescs: WatermarkDesc[]; + /** The tables used to persist watermarks, the key is vnode. */ + tables: Table[]; } /** Acts like a merger, but on different inputs. */ @@ -715,6 +765,8 @@ export interface SortNode { export interface DmlNode { /** Id of the table on which DML performs. */ tableId: number; + /** Version of the table. */ + tableVersionId: number; /** Column descriptions of the table. */ columnDescs: ColumnDesc[]; } @@ -759,7 +811,8 @@ export interface StreamNode { | { $case: "watermarkFilter"; watermarkFilter: WatermarkFilterNode } | { $case: "dml"; dml: DmlNode } | { $case: "rowIdGen"; rowIdGen: RowIdGenNode } - | { $case: "now"; now: NowNode }; + | { $case: "now"; now: NowNode } + | { $case: "appendOnlyGroupTopN"; appendOnlyGroupTopN: GroupTopNNode }; /** * The id for the operator. This is local per mview. * TODO: should better be a uint32. @@ -799,10 +852,7 @@ export interface Dispatcher { | undefined; /** * Dispatcher can be uniquely identified by a combination of actor id and dispatcher id. - * - For dispatchers within actors, the id is the same as its downstream fragment id. - * We can't use the exchange operator id directly as the dispatch id, because an exchange - * could belong to more than one downstream in DAG. - * - For MV on MV, the id is the same as the actor id of chain node in the downstream MV. + * This is exactly the same as its downstream fragment id. */ dispatcherId: number; /** Number of downstreams decides how many endpoints a dispatcher should dispatch. */ @@ -822,8 +872,6 @@ export interface StreamActor { * We duplicate the information here to ease the parsing logic in stream manager. */ upstreamActorId: number[]; - /** Placement rule for actor, need to stay on the same node as upstream. */ - sameWorkerNodeAsUpstream: boolean; /** * Vnodes that the executors in this actor own. * If the fragment is a singleton, this field will not be set and leave a `None`. @@ -877,8 +925,6 @@ export interface StreamFragmentGraph_StreamFragmentEdge { dispatchStrategy: | DispatchStrategy | undefined; - /** Whether the two linked nodes should be placed on the same worker node */ - sameWorkerNode: boolean; /** * A unique identifier of this edge. Generally it should be exchange node's operator id. When * rewriting fragments into delta joins or when inserting 1-to-1 exchange, there will be @@ -1230,7 +1276,13 @@ export const UpdateMutation_DispatcherUpdate = { }; function createBaseUpdateMutation_MergeUpdate(): UpdateMutation_MergeUpdate { - return { actorId: 0, upstreamFragmentId: 0, addedUpstreamActorId: [], removedUpstreamActorId: [] }; + return { + actorId: 0, + upstreamFragmentId: 0, + newUpstreamFragmentId: undefined, + addedUpstreamActorId: [], + removedUpstreamActorId: [], + }; } export const UpdateMutation_MergeUpdate = { @@ -1238,6 +1290,7 @@ export const UpdateMutation_MergeUpdate = { return { actorId: isSet(object.actorId) ? Number(object.actorId) : 0, upstreamFragmentId: isSet(object.upstreamFragmentId) ? Number(object.upstreamFragmentId) : 0, + newUpstreamFragmentId: isSet(object.newUpstreamFragmentId) ? Number(object.newUpstreamFragmentId) : undefined, addedUpstreamActorId: Array.isArray(object?.addedUpstreamActorId) ? object.addedUpstreamActorId.map((e: any) => Number(e)) : [], @@ -1251,6 +1304,8 @@ export const UpdateMutation_MergeUpdate = { const obj: any = {}; message.actorId !== undefined && (obj.actorId = Math.round(message.actorId)); message.upstreamFragmentId !== undefined && (obj.upstreamFragmentId = Math.round(message.upstreamFragmentId)); + message.newUpstreamFragmentId !== undefined && + (obj.newUpstreamFragmentId = Math.round(message.newUpstreamFragmentId)); if (message.addedUpstreamActorId) { obj.addedUpstreamActorId = message.addedUpstreamActorId.map((e) => Math.round(e)); } else { @@ -1268,6 +1323,7 @@ export const UpdateMutation_MergeUpdate = { const message = createBaseUpdateMutation_MergeUpdate(); message.actorId = object.actorId ?? 0; message.upstreamFragmentId = object.upstreamFragmentId ?? 0; + message.newUpstreamFragmentId = object.newUpstreamFragmentId ?? undefined; message.addedUpstreamActorId = object.addedUpstreamActorId?.map((e) => e) || []; message.removedUpstreamActorId = object.removedUpstreamActorId?.map((e) => e) || []; return message; @@ -1803,54 +1859,86 @@ export const SourceNode = { }, }; -function createBaseSinkNode(): SinkNode { - return { tableId: 0, properties: {}, fields: [], sinkPk: [] }; +function createBaseSinkDesc(): SinkDesc { + return { + id: 0, + name: "", + definition: "", + columns: [], + pk: [], + streamKey: [], + distributionKey: [], + properties: {}, + sinkType: SinkType.UNSPECIFIED, + }; } -export const SinkNode = { - fromJSON(object: any): SinkNode { +export const SinkDesc = { + fromJSON(object: any): SinkDesc { return { - tableId: isSet(object.tableId) ? Number(object.tableId) : 0, + id: isSet(object.id) ? Number(object.id) : 0, + name: isSet(object.name) ? String(object.name) : "", + definition: isSet(object.definition) ? String(object.definition) : "", + columns: Array.isArray(object?.columns) + ? object.columns.map((e: any) => ColumnDesc.fromJSON(e)) + : [], + pk: Array.isArray(object?.pk) ? object.pk.map((e: any) => ColumnOrder.fromJSON(e)) : [], + streamKey: Array.isArray(object?.streamKey) ? object.streamKey.map((e: any) => Number(e)) : [], + distributionKey: Array.isArray(object?.distributionKey) ? object.distributionKey.map((e: any) => Number(e)) : [], properties: isObject(object.properties) ? Object.entries(object.properties).reduce<{ [key: string]: string }>((acc, [key, value]) => { acc[key] = String(value); return acc; }, {}) : {}, - fields: Array.isArray(object?.fields) - ? object.fields.map((e: any) => Field.fromJSON(e)) - : [], - sinkPk: Array.isArray(object?.sinkPk) - ? object.sinkPk.map((e: any) => Number(e)) - : [], + sinkType: isSet(object.sinkType) ? sinkTypeFromJSON(object.sinkType) : SinkType.UNSPECIFIED, }; }, - toJSON(message: SinkNode): unknown { + toJSON(message: SinkDesc): unknown { const obj: any = {}; - message.tableId !== undefined && (obj.tableId = Math.round(message.tableId)); + message.id !== undefined && (obj.id = Math.round(message.id)); + message.name !== undefined && (obj.name = message.name); + message.definition !== undefined && (obj.definition = message.definition); + if (message.columns) { + obj.columns = message.columns.map((e) => e ? ColumnDesc.toJSON(e) : undefined); + } else { + obj.columns = []; + } + if (message.pk) { + obj.pk = message.pk.map((e) => e ? ColumnOrder.toJSON(e) : undefined); + } else { + obj.pk = []; + } + if (message.streamKey) { + obj.streamKey = message.streamKey.map((e) => Math.round(e)); + } else { + obj.streamKey = []; + } + if (message.distributionKey) { + obj.distributionKey = message.distributionKey.map((e) => Math.round(e)); + } else { + obj.distributionKey = []; + } obj.properties = {}; if (message.properties) { Object.entries(message.properties).forEach(([k, v]) => { obj.properties[k] = v; }); } - if (message.fields) { - obj.fields = message.fields.map((e) => e ? Field.toJSON(e) : undefined); - } else { - obj.fields = []; - } - if (message.sinkPk) { - obj.sinkPk = message.sinkPk.map((e) => Math.round(e)); - } else { - obj.sinkPk = []; - } + message.sinkType !== undefined && (obj.sinkType = sinkTypeToJSON(message.sinkType)); return obj; }, - fromPartial, I>>(object: I): SinkNode { - const message = createBaseSinkNode(); - message.tableId = object.tableId ?? 0; + fromPartial, I>>(object: I): SinkDesc { + const message = createBaseSinkDesc(); + message.id = object.id ?? 0; + message.name = object.name ?? ""; + message.definition = object.definition ?? ""; + message.columns = object.columns?.map((e) => ColumnDesc.fromPartial(e)) || []; + message.pk = object.pk?.map((e) => ColumnOrder.fromPartial(e)) || []; + message.streamKey = object.streamKey?.map((e) => e) || []; + message.distributionKey = object.distributionKey?.map((e) => e) || []; message.properties = Object.entries(object.properties ?? {}).reduce<{ [key: string]: string }>( (acc, [key, value]) => { if (value !== undefined) { @@ -1860,36 +1948,59 @@ export const SinkNode = { }, {}, ); - message.fields = object.fields?.map((e) => Field.fromPartial(e)) || []; - message.sinkPk = object.sinkPk?.map((e) => e) || []; + message.sinkType = object.sinkType ?? SinkType.UNSPECIFIED; return message; }, }; -function createBaseSinkNode_PropertiesEntry(): SinkNode_PropertiesEntry { +function createBaseSinkDesc_PropertiesEntry(): SinkDesc_PropertiesEntry { return { key: "", value: "" }; } -export const SinkNode_PropertiesEntry = { - fromJSON(object: any): SinkNode_PropertiesEntry { +export const SinkDesc_PropertiesEntry = { + fromJSON(object: any): SinkDesc_PropertiesEntry { return { key: isSet(object.key) ? String(object.key) : "", value: isSet(object.value) ? String(object.value) : "" }; }, - toJSON(message: SinkNode_PropertiesEntry): unknown { + toJSON(message: SinkDesc_PropertiesEntry): unknown { const obj: any = {}; message.key !== undefined && (obj.key = message.key); message.value !== undefined && (obj.value = message.value); return obj; }, - fromPartial, I>>(object: I): SinkNode_PropertiesEntry { - const message = createBaseSinkNode_PropertiesEntry(); + fromPartial, I>>(object: I): SinkDesc_PropertiesEntry { + const message = createBaseSinkDesc_PropertiesEntry(); message.key = object.key ?? ""; message.value = object.value ?? ""; return message; }, }; +function createBaseSinkNode(): SinkNode { + return { sinkDesc: undefined }; +} + +export const SinkNode = { + fromJSON(object: any): SinkNode { + return { sinkDesc: isSet(object.sinkDesc) ? SinkDesc.fromJSON(object.sinkDesc) : undefined }; + }, + + toJSON(message: SinkNode): unknown { + const obj: any = {}; + message.sinkDesc !== undefined && (obj.sinkDesc = message.sinkDesc ? SinkDesc.toJSON(message.sinkDesc) : undefined); + return obj; + }, + + fromPartial, I>>(object: I): SinkNode { + const message = createBaseSinkNode(); + message.sinkDesc = (object.sinkDesc !== undefined && object.sinkDesc !== null) + ? SinkDesc.fromPartial(object.sinkDesc) + : undefined; + return message; + }, +}; + function createBaseProjectNode(): ProjectNode { return { selectList: [], watermarkInputKey: [], watermarkOutputKey: [] }; } @@ -2158,7 +2269,14 @@ export const AggCallState_MaterializedInputState = { }; function createBaseSimpleAggNode(): SimpleAggNode { - return { aggCalls: [], distributionKey: [], aggCallStates: [], resultTable: undefined, isAppendOnly: false }; + return { + aggCalls: [], + distributionKey: [], + aggCallStates: [], + resultTable: undefined, + isAppendOnly: false, + distinctDedupTables: {}, + }; } export const SimpleAggNode = { @@ -2171,6 +2289,12 @@ export const SimpleAggNode = { : [], resultTable: isSet(object.resultTable) ? Table.fromJSON(object.resultTable) : undefined, isAppendOnly: isSet(object.isAppendOnly) ? Boolean(object.isAppendOnly) : false, + distinctDedupTables: isObject(object.distinctDedupTables) + ? Object.entries(object.distinctDedupTables).reduce<{ [key: number]: Table }>((acc, [key, value]) => { + acc[Number(key)] = Table.fromJSON(value); + return acc; + }, {}) + : {}, }; }, @@ -2194,6 +2318,12 @@ export const SimpleAggNode = { message.resultTable !== undefined && (obj.resultTable = message.resultTable ? Table.toJSON(message.resultTable) : undefined); message.isAppendOnly !== undefined && (obj.isAppendOnly = message.isAppendOnly); + obj.distinctDedupTables = {}; + if (message.distinctDedupTables) { + Object.entries(message.distinctDedupTables).forEach(([k, v]) => { + obj.distinctDedupTables[k] = Table.toJSON(v); + }); + } return obj; }, @@ -2206,12 +2336,57 @@ export const SimpleAggNode = { ? Table.fromPartial(object.resultTable) : undefined; message.isAppendOnly = object.isAppendOnly ?? false; + message.distinctDedupTables = Object.entries(object.distinctDedupTables ?? {}).reduce<{ [key: number]: Table }>( + (acc, [key, value]) => { + if (value !== undefined) { + acc[Number(key)] = Table.fromPartial(value); + } + return acc; + }, + {}, + ); + return message; + }, +}; + +function createBaseSimpleAggNode_DistinctDedupTablesEntry(): SimpleAggNode_DistinctDedupTablesEntry { + return { key: 0, value: undefined }; +} + +export const SimpleAggNode_DistinctDedupTablesEntry = { + fromJSON(object: any): SimpleAggNode_DistinctDedupTablesEntry { + return { + key: isSet(object.key) ? Number(object.key) : 0, + value: isSet(object.value) ? Table.fromJSON(object.value) : undefined, + }; + }, + + toJSON(message: SimpleAggNode_DistinctDedupTablesEntry): unknown { + const obj: any = {}; + message.key !== undefined && (obj.key = Math.round(message.key)); + message.value !== undefined && (obj.value = message.value ? Table.toJSON(message.value) : undefined); + return obj; + }, + + fromPartial, I>>( + object: I, + ): SimpleAggNode_DistinctDedupTablesEntry { + const message = createBaseSimpleAggNode_DistinctDedupTablesEntry(); + message.key = object.key ?? 0; + message.value = (object.value !== undefined && object.value !== null) ? Table.fromPartial(object.value) : undefined; return message; }, }; function createBaseHashAggNode(): HashAggNode { - return { groupKey: [], aggCalls: [], aggCallStates: [], resultTable: undefined, isAppendOnly: false }; + return { + groupKey: [], + aggCalls: [], + aggCallStates: [], + resultTable: undefined, + isAppendOnly: false, + distinctDedupTables: {}, + }; } export const HashAggNode = { @@ -2224,6 +2399,12 @@ export const HashAggNode = { : [], resultTable: isSet(object.resultTable) ? Table.fromJSON(object.resultTable) : undefined, isAppendOnly: isSet(object.isAppendOnly) ? Boolean(object.isAppendOnly) : false, + distinctDedupTables: isObject(object.distinctDedupTables) + ? Object.entries(object.distinctDedupTables).reduce<{ [key: number]: Table }>((acc, [key, value]) => { + acc[Number(key)] = Table.fromJSON(value); + return acc; + }, {}) + : {}, }; }, @@ -2247,6 +2428,12 @@ export const HashAggNode = { message.resultTable !== undefined && (obj.resultTable = message.resultTable ? Table.toJSON(message.resultTable) : undefined); message.isAppendOnly !== undefined && (obj.isAppendOnly = message.isAppendOnly); + obj.distinctDedupTables = {}; + if (message.distinctDedupTables) { + Object.entries(message.distinctDedupTables).forEach(([k, v]) => { + obj.distinctDedupTables[k] = Table.toJSON(v); + }); + } return obj; }, @@ -2259,6 +2446,44 @@ export const HashAggNode = { ? Table.fromPartial(object.resultTable) : undefined; message.isAppendOnly = object.isAppendOnly ?? false; + message.distinctDedupTables = Object.entries(object.distinctDedupTables ?? {}).reduce<{ [key: number]: Table }>( + (acc, [key, value]) => { + if (value !== undefined) { + acc[Number(key)] = Table.fromPartial(value); + } + return acc; + }, + {}, + ); + return message; + }, +}; + +function createBaseHashAggNode_DistinctDedupTablesEntry(): HashAggNode_DistinctDedupTablesEntry { + return { key: 0, value: undefined }; +} + +export const HashAggNode_DistinctDedupTablesEntry = { + fromJSON(object: any): HashAggNode_DistinctDedupTablesEntry { + return { + key: isSet(object.key) ? Number(object.key) : 0, + value: isSet(object.value) ? Table.fromJSON(object.value) : undefined, + }; + }, + + toJSON(message: HashAggNode_DistinctDedupTablesEntry): unknown { + const obj: any = {}; + message.key !== undefined && (obj.key = Math.round(message.key)); + message.value !== undefined && (obj.value = message.value ? Table.toJSON(message.value) : undefined); + return obj; + }, + + fromPartial, I>>( + object: I, + ): HashAggNode_DistinctDedupTablesEntry { + const message = createBaseHashAggNode_DistinctDedupTablesEntry(); + message.key = object.key ?? 0; + message.value = (object.value !== undefined && object.value !== null) ? Table.fromPartial(object.value) : undefined; return message; }, }; @@ -2361,6 +2586,8 @@ function createBaseHashJoinNode(): HashJoinNode { leftDegreeTable: undefined, rightDegreeTable: undefined, outputIndices: [], + leftDedupedInputPkIndices: [], + rightDedupedInputPkIndices: [], nullSafe: [], isAppendOnly: false, }; @@ -2378,6 +2605,12 @@ export const HashJoinNode = { leftDegreeTable: isSet(object.leftDegreeTable) ? Table.fromJSON(object.leftDegreeTable) : undefined, rightDegreeTable: isSet(object.rightDegreeTable) ? Table.fromJSON(object.rightDegreeTable) : undefined, outputIndices: Array.isArray(object?.outputIndices) ? object.outputIndices.map((e: any) => Number(e)) : [], + leftDedupedInputPkIndices: Array.isArray(object?.leftDedupedInputPkIndices) + ? object.leftDedupedInputPkIndices.map((e: any) => Number(e)) + : [], + rightDedupedInputPkIndices: Array.isArray(object?.rightDedupedInputPkIndices) + ? object.rightDedupedInputPkIndices.map((e: any) => Number(e)) + : [], nullSafe: Array.isArray(object?.nullSafe) ? object.nullSafe.map((e: any) => Boolean(e)) : [], isAppendOnly: isSet(object.isAppendOnly) ? Boolean(object.isAppendOnly) : false, }; @@ -2411,6 +2644,16 @@ export const HashJoinNode = { } else { obj.outputIndices = []; } + if (message.leftDedupedInputPkIndices) { + obj.leftDedupedInputPkIndices = message.leftDedupedInputPkIndices.map((e) => Math.round(e)); + } else { + obj.leftDedupedInputPkIndices = []; + } + if (message.rightDedupedInputPkIndices) { + obj.rightDedupedInputPkIndices = message.rightDedupedInputPkIndices.map((e) => Math.round(e)); + } else { + obj.rightDedupedInputPkIndices = []; + } if (message.nullSafe) { obj.nullSafe = message.nullSafe.map((e) => e); } else { @@ -2441,6 +2684,8 @@ export const HashJoinNode = { ? Table.fromPartial(object.rightDegreeTable) : undefined; message.outputIndices = object.outputIndices?.map((e) => e) || []; + message.leftDedupedInputPkIndices = object.leftDedupedInputPkIndices?.map((e) => e) || []; + message.rightDedupedInputPkIndices = object.rightDedupedInputPkIndices?.map((e) => e) || []; message.nullSafe = object.nullSafe?.map((e) => e) || []; message.isAppendOnly = object.isAppendOnly ?? false; return message; @@ -2688,7 +2933,6 @@ function createBaseChainNode(): ChainNode { upstreamFields: [], upstreamColumnIndices: [], chainType: ChainType.CHAIN_UNSPECIFIED, - sameWorkerNode: false, isSingleton: false, tableDesc: undefined, }; @@ -2705,7 +2949,6 @@ export const ChainNode = { ? object.upstreamColumnIndices.map((e: any) => Number(e)) : [], chainType: isSet(object.chainType) ? chainTypeFromJSON(object.chainType) : ChainType.CHAIN_UNSPECIFIED, - sameWorkerNode: isSet(object.sameWorkerNode) ? Boolean(object.sameWorkerNode) : false, isSingleton: isSet(object.isSingleton) ? Boolean(object.isSingleton) : false, tableDesc: isSet(object.tableDesc) ? StorageTableDesc.fromJSON(object.tableDesc) : undefined, }; @@ -2725,7 +2968,6 @@ export const ChainNode = { obj.upstreamColumnIndices = []; } message.chainType !== undefined && (obj.chainType = chainTypeToJSON(message.chainType)); - message.sameWorkerNode !== undefined && (obj.sameWorkerNode = message.sameWorkerNode); message.isSingleton !== undefined && (obj.isSingleton = message.isSingleton); message.tableDesc !== undefined && (obj.tableDesc = message.tableDesc ? StorageTableDesc.toJSON(message.tableDesc) : undefined); @@ -2738,7 +2980,6 @@ export const ChainNode = { message.upstreamFields = object.upstreamFields?.map((e) => Field.fromPartial(e)) || []; message.upstreamColumnIndices = object.upstreamColumnIndices?.map((e) => e) || []; message.chainType = object.chainType ?? ChainType.CHAIN_UNSPECIFIED; - message.sameWorkerNode = object.sameWorkerNode ?? false; message.isSingleton = object.isSingleton ?? false; message.tableDesc = (object.tableDesc !== undefined && object.tableDesc !== null) ? StorageTableDesc.fromPartial(object.tableDesc) @@ -2782,7 +3023,7 @@ export const BatchPlanNode = { }; function createBaseArrangementInfo(): ArrangementInfo { - return { arrangeKeyOrders: [], columnDescs: [] }; + return { arrangeKeyOrders: [], columnDescs: [], tableDesc: undefined }; } export const ArrangementInfo = { @@ -2794,6 +3035,7 @@ export const ArrangementInfo = { columnDescs: Array.isArray(object?.columnDescs) ? object.columnDescs.map((e: any) => ColumnDesc.fromJSON(e)) : [], + tableDesc: isSet(object.tableDesc) ? StorageTableDesc.fromJSON(object.tableDesc) : undefined, }; }, @@ -2809,6 +3051,8 @@ export const ArrangementInfo = { } else { obj.columnDescs = []; } + message.tableDesc !== undefined && + (obj.tableDesc = message.tableDesc ? StorageTableDesc.toJSON(message.tableDesc) : undefined); return obj; }, @@ -2816,6 +3060,9 @@ export const ArrangementInfo = { const message = createBaseArrangementInfo(); message.arrangeKeyOrders = object.arrangeKeyOrders?.map((e) => ColumnOrder.fromPartial(e)) || []; message.columnDescs = object.columnDescs?.map((e) => ColumnDesc.fromPartial(e)) || []; + message.tableDesc = (object.tableDesc !== undefined && object.tableDesc !== null) + ? StorageTableDesc.fromPartial(object.tableDesc) + : undefined; return message; }, }; @@ -2868,7 +3115,6 @@ function createBaseLookupNode(): LookupNode { columnMapping: [], arrangementTableId: undefined, arrangementTableInfo: undefined, - arrangementTable: undefined, }; } @@ -2887,7 +3133,6 @@ export const LookupNode = { arrangementTableInfo: isSet(object.arrangementTableInfo) ? ArrangementInfo.fromJSON(object.arrangementTableInfo) : undefined, - arrangementTable: isSet(object.arrangementTable) ? Table.fromJSON(object.arrangementTable) : undefined, }; }, @@ -2914,8 +3159,6 @@ export const LookupNode = { message.arrangementTableInfo !== undefined && (obj.arrangementTableInfo = message.arrangementTableInfo ? ArrangementInfo.toJSON(message.arrangementTableInfo) : undefined); - message.arrangementTable !== undefined && - (obj.arrangementTable = message.arrangementTable ? Table.toJSON(message.arrangementTable) : undefined); return obj; }, @@ -2942,42 +3185,45 @@ export const LookupNode = { message.arrangementTableInfo = (object.arrangementTableInfo !== undefined && object.arrangementTableInfo !== null) ? ArrangementInfo.fromPartial(object.arrangementTableInfo) : undefined; - message.arrangementTable = (object.arrangementTable !== undefined && object.arrangementTable !== null) - ? Table.fromPartial(object.arrangementTable) - : undefined; return message; }, }; function createBaseWatermarkFilterNode(): WatermarkFilterNode { - return { watermarkExpr: undefined, eventTimeColIdx: 0, table: undefined }; + return { watermarkDescs: [], tables: [] }; } export const WatermarkFilterNode = { fromJSON(object: any): WatermarkFilterNode { return { - watermarkExpr: isSet(object.watermarkExpr) ? ExprNode.fromJSON(object.watermarkExpr) : undefined, - eventTimeColIdx: isSet(object.eventTimeColIdx) ? Number(object.eventTimeColIdx) : 0, - table: isSet(object.table) ? Table.fromJSON(object.table) : undefined, + watermarkDescs: Array.isArray(object?.watermarkDescs) + ? object.watermarkDescs.map((e: any) => WatermarkDesc.fromJSON(e)) + : [], + tables: Array.isArray(object?.tables) + ? object.tables.map((e: any) => Table.fromJSON(e)) + : [], }; }, toJSON(message: WatermarkFilterNode): unknown { const obj: any = {}; - message.watermarkExpr !== undefined && - (obj.watermarkExpr = message.watermarkExpr ? ExprNode.toJSON(message.watermarkExpr) : undefined); - message.eventTimeColIdx !== undefined && (obj.eventTimeColIdx = Math.round(message.eventTimeColIdx)); - message.table !== undefined && (obj.table = message.table ? Table.toJSON(message.table) : undefined); + if (message.watermarkDescs) { + obj.watermarkDescs = message.watermarkDescs.map((e) => e ? WatermarkDesc.toJSON(e) : undefined); + } else { + obj.watermarkDescs = []; + } + if (message.tables) { + obj.tables = message.tables.map((e) => e ? Table.toJSON(e) : undefined); + } else { + obj.tables = []; + } return obj; }, fromPartial, I>>(object: I): WatermarkFilterNode { const message = createBaseWatermarkFilterNode(); - message.watermarkExpr = (object.watermarkExpr !== undefined && object.watermarkExpr !== null) - ? ExprNode.fromPartial(object.watermarkExpr) - : undefined; - message.eventTimeColIdx = object.eventTimeColIdx ?? 0; - message.table = (object.table !== undefined && object.table !== null) ? Table.fromPartial(object.table) : undefined; + message.watermarkDescs = object.watermarkDescs?.map((e) => WatermarkDesc.fromPartial(e)) || []; + message.tables = object.tables?.map((e) => Table.fromPartial(e)) || []; return message; }, }; @@ -3147,13 +3393,14 @@ export const SortNode = { }; function createBaseDmlNode(): DmlNode { - return { tableId: 0, columnDescs: [] }; + return { tableId: 0, tableVersionId: 0, columnDescs: [] }; } export const DmlNode = { fromJSON(object: any): DmlNode { return { tableId: isSet(object.tableId) ? Number(object.tableId) : 0, + tableVersionId: isSet(object.tableVersionId) ? Number(object.tableVersionId) : 0, columnDescs: Array.isArray(object?.columnDescs) ? object.columnDescs.map((e: any) => ColumnDesc.fromJSON(e)) : [], }; }, @@ -3161,6 +3408,7 @@ export const DmlNode = { toJSON(message: DmlNode): unknown { const obj: any = {}; message.tableId !== undefined && (obj.tableId = Math.round(message.tableId)); + message.tableVersionId !== undefined && (obj.tableVersionId = Math.round(message.tableVersionId)); if (message.columnDescs) { obj.columnDescs = message.columnDescs.map((e) => e ? ColumnDesc.toJSON(e) : undefined); } else { @@ -3172,6 +3420,7 @@ export const DmlNode = { fromPartial, I>>(object: I): DmlNode { const message = createBaseDmlNode(); message.tableId = object.tableId ?? 0; + message.tableVersionId = object.tableVersionId ?? 0; message.columnDescs = object.columnDescs?.map((e) => ColumnDesc.fromPartial(e)) || []; return message; }, @@ -3291,6 +3540,8 @@ export const StreamNode = { ? { $case: "rowIdGen", rowIdGen: RowIdGenNode.fromJSON(object.rowIdGen) } : isSet(object.now) ? { $case: "now", now: NowNode.fromJSON(object.now) } + : isSet(object.appendOnlyGroupTopN) + ? { $case: "appendOnlyGroupTopN", appendOnlyGroupTopN: GroupTopNNode.fromJSON(object.appendOnlyGroupTopN) } : undefined, operatorId: isSet(object.operatorId) ? Number(object.operatorId) : 0, input: Array.isArray(object?.input) @@ -3373,6 +3624,10 @@ export const StreamNode = { (obj.rowIdGen = message.nodeBody?.rowIdGen ? RowIdGenNode.toJSON(message.nodeBody?.rowIdGen) : undefined); message.nodeBody?.$case === "now" && (obj.now = message.nodeBody?.now ? NowNode.toJSON(message.nodeBody?.now) : undefined); + message.nodeBody?.$case === "appendOnlyGroupTopN" && + (obj.appendOnlyGroupTopN = message.nodeBody?.appendOnlyGroupTopN + ? GroupTopNNode.toJSON(message.nodeBody?.appendOnlyGroupTopN) + : undefined); message.operatorId !== undefined && (obj.operatorId = Math.round(message.operatorId)); if (message.input) { obj.input = message.input.map((e) => @@ -3594,6 +3849,16 @@ export const StreamNode = { if (object.nodeBody?.$case === "now" && object.nodeBody?.now !== undefined && object.nodeBody?.now !== null) { message.nodeBody = { $case: "now", now: NowNode.fromPartial(object.nodeBody.now) }; } + if ( + object.nodeBody?.$case === "appendOnlyGroupTopN" && + object.nodeBody?.appendOnlyGroupTopN !== undefined && + object.nodeBody?.appendOnlyGroupTopN !== null + ) { + message.nodeBody = { + $case: "appendOnlyGroupTopN", + appendOnlyGroupTopN: GroupTopNNode.fromPartial(object.nodeBody.appendOnlyGroupTopN), + }; + } message.operatorId = object.operatorId ?? 0; message.input = object.input?.map((e) => StreamNode.fromPartial(e)) || []; message.streamKey = object.streamKey?.map((e) => e) || []; @@ -3697,7 +3962,6 @@ function createBaseStreamActor(): StreamActor { nodes: undefined, dispatcher: [], upstreamActorId: [], - sameWorkerNodeAsUpstream: false, vnodeBitmap: undefined, mviewDefinition: "", }; @@ -3711,9 +3975,6 @@ export const StreamActor = { nodes: isSet(object.nodes) ? StreamNode.fromJSON(object.nodes) : undefined, dispatcher: Array.isArray(object?.dispatcher) ? object.dispatcher.map((e: any) => Dispatcher.fromJSON(e)) : [], upstreamActorId: Array.isArray(object?.upstreamActorId) ? object.upstreamActorId.map((e: any) => Number(e)) : [], - sameWorkerNodeAsUpstream: isSet(object.sameWorkerNodeAsUpstream) - ? Boolean(object.sameWorkerNodeAsUpstream) - : false, vnodeBitmap: isSet(object.vnodeBitmap) ? Buffer.fromJSON(object.vnodeBitmap) : undefined, mviewDefinition: isSet(object.mviewDefinition) ? String(object.mviewDefinition) : "", }; @@ -3734,7 +3995,6 @@ export const StreamActor = { } else { obj.upstreamActorId = []; } - message.sameWorkerNodeAsUpstream !== undefined && (obj.sameWorkerNodeAsUpstream = message.sameWorkerNodeAsUpstream); message.vnodeBitmap !== undefined && (obj.vnodeBitmap = message.vnodeBitmap ? Buffer.toJSON(message.vnodeBitmap) : undefined); message.mviewDefinition !== undefined && (obj.mviewDefinition = message.mviewDefinition); @@ -3750,7 +4010,6 @@ export const StreamActor = { : undefined; message.dispatcher = object.dispatcher?.map((e) => Dispatcher.fromPartial(e)) || []; message.upstreamActorId = object.upstreamActorId?.map((e) => e) || []; - message.sameWorkerNodeAsUpstream = object.sameWorkerNodeAsUpstream ?? false; message.vnodeBitmap = (object.vnodeBitmap !== undefined && object.vnodeBitmap !== null) ? Buffer.fromPartial(object.vnodeBitmap) : undefined; @@ -3914,14 +4173,13 @@ export const StreamFragmentGraph_StreamFragment = { }; function createBaseStreamFragmentGraph_StreamFragmentEdge(): StreamFragmentGraph_StreamFragmentEdge { - return { dispatchStrategy: undefined, sameWorkerNode: false, linkId: 0, upstreamId: 0, downstreamId: 0 }; + return { dispatchStrategy: undefined, linkId: 0, upstreamId: 0, downstreamId: 0 }; } export const StreamFragmentGraph_StreamFragmentEdge = { fromJSON(object: any): StreamFragmentGraph_StreamFragmentEdge { return { dispatchStrategy: isSet(object.dispatchStrategy) ? DispatchStrategy.fromJSON(object.dispatchStrategy) : undefined, - sameWorkerNode: isSet(object.sameWorkerNode) ? Boolean(object.sameWorkerNode) : false, linkId: isSet(object.linkId) ? Number(object.linkId) : 0, upstreamId: isSet(object.upstreamId) ? Number(object.upstreamId) : 0, downstreamId: isSet(object.downstreamId) ? Number(object.downstreamId) : 0, @@ -3932,7 +4190,6 @@ export const StreamFragmentGraph_StreamFragmentEdge = { const obj: any = {}; message.dispatchStrategy !== undefined && (obj.dispatchStrategy = message.dispatchStrategy ? DispatchStrategy.toJSON(message.dispatchStrategy) : undefined); - message.sameWorkerNode !== undefined && (obj.sameWorkerNode = message.sameWorkerNode); message.linkId !== undefined && (obj.linkId = Math.round(message.linkId)); message.upstreamId !== undefined && (obj.upstreamId = Math.round(message.upstreamId)); message.downstreamId !== undefined && (obj.downstreamId = Math.round(message.downstreamId)); @@ -3946,7 +4203,6 @@ export const StreamFragmentGraph_StreamFragmentEdge = { message.dispatchStrategy = (object.dispatchStrategy !== undefined && object.dispatchStrategy !== null) ? DispatchStrategy.fromPartial(object.dispatchStrategy) : undefined; - message.sameWorkerNode = object.sameWorkerNode ?? false; message.linkId = object.linkId ?? 0; message.upstreamId = object.upstreamId ?? 0; message.downstreamId = object.downstreamId ?? 0; diff --git a/dashboard/proto/gen/stream_service.ts b/dashboard/proto/gen/stream_service.ts index df0b07345db9b..7e74f1baa9bd4 100644 --- a/dashboard/proto/gen/stream_service.ts +++ b/dashboard/proto/gen/stream_service.ts @@ -84,6 +84,7 @@ export interface BarrierCompleteResponse_CreateMviewProgress { chainActorId: number; done: boolean; consumedEpoch: number; + consumedRows: number; } export interface BarrierCompleteResponse_GroupedSstableInfo { @@ -564,7 +565,7 @@ export const BarrierCompleteResponse = { }; function createBaseBarrierCompleteResponse_CreateMviewProgress(): BarrierCompleteResponse_CreateMviewProgress { - return { chainActorId: 0, done: false, consumedEpoch: 0 }; + return { chainActorId: 0, done: false, consumedEpoch: 0, consumedRows: 0 }; } export const BarrierCompleteResponse_CreateMviewProgress = { @@ -573,6 +574,7 @@ export const BarrierCompleteResponse_CreateMviewProgress = { chainActorId: isSet(object.chainActorId) ? Number(object.chainActorId) : 0, done: isSet(object.done) ? Boolean(object.done) : false, consumedEpoch: isSet(object.consumedEpoch) ? Number(object.consumedEpoch) : 0, + consumedRows: isSet(object.consumedRows) ? Number(object.consumedRows) : 0, }; }, @@ -581,6 +583,7 @@ export const BarrierCompleteResponse_CreateMviewProgress = { message.chainActorId !== undefined && (obj.chainActorId = Math.round(message.chainActorId)); message.done !== undefined && (obj.done = message.done); message.consumedEpoch !== undefined && (obj.consumedEpoch = Math.round(message.consumedEpoch)); + message.consumedRows !== undefined && (obj.consumedRows = Math.round(message.consumedRows)); return obj; }, @@ -591,6 +594,7 @@ export const BarrierCompleteResponse_CreateMviewProgress = { message.chainActorId = object.chainActorId ?? 0; message.done = object.done ?? false; message.consumedEpoch = object.consumedEpoch ?? 0; + message.consumedRows = object.consumedRows ?? 0; return message; }, }; diff --git a/dashboard/scripts/generate_proto.sh b/dashboard/scripts/generate_proto.sh index c409ba06019ab..860820d56fea1 100755 --- a/dashboard/scripts/generate_proto.sh +++ b/dashboard/scripts/generate_proto.sh @@ -14,6 +14,7 @@ else fi protoc --plugin=./node_modules/.bin/protoc-gen-ts_proto \ + --experimental_allow_proto3_optional \ --ts_proto_out=proto/gen/ \ --proto_path=tmp_gen \ --ts_proto_opt=outputServices=false \ diff --git a/dashboard/test/algo.test.js b/dashboard/test/algo.test.js index a4133a79c0b22..1bbfca8984a3c 100644 --- a/dashboard/test/algo.test.js +++ b/dashboard/test/algo.test.js @@ -1,5 +1,5 @@ /* - * Copyright 2023 Singularity Data + * Copyright 2023 RisingWave Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/docker/Dockerfile b/docker/Dockerfile index 4f435110be167..35e510a8d6b23 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,7 +2,7 @@ FROM ubuntu:22.04 as builder ENV LANG en_US.utf8 -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -y install make build-essential cmake protobuf-compiler curl pkg-config bash lld +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -y install make build-essential cmake protobuf-compiler curl pkg-config bash lld maven SHELL ["/bin/bash", "-c"] @@ -18,6 +18,9 @@ ENV PATH /root/.cargo/bin/:$PATH ENV IN_CONTAINER=1 +ARG GIT_SHA +ENV GIT_SHA=$GIT_SHA + # We need to add the `rustfmt` dependency, otherwise `risingwave_pb` will not compile RUN rustup self update \ && rustup set profile minimal \ @@ -25,14 +28,7 @@ RUN rustup self update \ && rustup component add rustfmt RUN cargo fetch -RUN mkdir -p /risingwave/bin - -ARG simd_disabled=false - -RUN if [ "$simd_disabled" == "true" ]; then \ - echo "Disabling SIMD build flags for risingwave" && \ - . scripts/cargo-config-disable-simd.sh; \ - fi +RUN mkdir -p /risingwave/bin/connector-node RUN cargo build -p risingwave_cmd -p risingwave_cmd_all --release --features "static-link static-log-level" && \ mv /risingwave/target/release/{frontend,compute-node,meta-node,compactor,risingwave} /risingwave/bin/ && \ @@ -41,13 +37,17 @@ RUN for component in "risingwave" "compute-node" "meta-node" "frontend" "compact objcopy --compress-debug-sections=zlib-gnu /risingwave/bin/${component}; \ done +RUN cd risingwave-connector-node && mvn -B package -Dmaven.test.skip=true +RUN tar -zxvf /risingwave/risingwave-connector-node/assembly/target/risingwave-connector-1.0.0.tar.gz -C /risingwave/bin/connector-node + FROM ubuntu:22.04 as image-base -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -y install ca-certificates && rm -rf /var/lib/{apt,dpkg,cache,log}/ +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -y install ca-certificates openjdk-11-jdk && rm -rf /var/lib/{apt,dpkg,cache,log}/ FROM image-base as risingwave LABEL org.opencontainers.image.source https://github.com/risingwavelabs/risingwave -RUN mkdir -p /risingwave/bin +RUN mkdir -p /risingwave/bin/connector-node COPY --from=builder /risingwave/bin/risingwave /risingwave/bin/risingwave +COPY --from=builder /risingwave/bin/connector-node /risingwave/bin/connector-node # Set default playground mode to docker-playground profile ENV PLAYGROUND_PROFILE docker-playground ENTRYPOINT [ "/risingwave/bin/risingwave" ] diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index b5261c0da8362..2c11d7a5f4964 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -5,7 +5,7 @@ services: image: "ghcr.io/risingwavelabs/risingwave:latest" command: - compactor-node - - "--host" + - "--listen-addr" - "0.0.0.0:6660" - "--prometheus-listener-addr" - "0.0.0.0:1260" @@ -42,11 +42,11 @@ services: image: "ghcr.io/risingwavelabs/risingwave:latest" command: - compute-node - - "--host" + - "--listen-addr" - "0.0.0.0:5688" - "--prometheus-listener-addr" - "0.0.0.0:1222" - - "--client-address" + - "--advertise-addr" - "compute-node-0:5688" - "--metrics-level" - "1" @@ -124,7 +124,7 @@ services: image: "ghcr.io/risingwavelabs/risingwave:latest" command: - frontend-node - - "--host" + - "--listen-addr" - "0.0.0.0:4566" - "--meta-addr" - "http://meta-node-0:5690" @@ -181,8 +181,8 @@ services: - meta-node - "--listen-addr" - "0.0.0.0:5690" - - "--host" - - meta-node-0 + - "--advertise-addr" + - "meta-node-0:5690" - "--dashboard-host" - "0.0.0.0:5691" - "--prometheus-host" diff --git a/docs/developer-guide.md b/docs/developer-guide.md index 1ee1913cef9d3..304c156873145 100644 --- a/docs/developer-guide.md +++ b/docs/developer-guide.md @@ -22,6 +22,7 @@ http://ecotrust-canada.github.io/markdown-toc/ * [Configure system variables](#configure-system-variables) * [Start the playground with RiseDev](#start-the-playground-with-risedev) * [Start the playground with cargo](#start-the-playground-with-cargo) +- [Debug playground using vscode](#debug-playground-using-vscode) - [Develop the dashboard](#develop-the-dashboard) * [Dashboard v1](#dashboard-v1) * [Dashboard v2](#dashboard-v2) @@ -66,10 +67,10 @@ RiseDev is the development mode of RisingWave. To develop RisingWave, you need t * Rust toolchain * CMake -* protobuf +* protobuf (>= 3.12.0) * OpenSSL * PostgreSQL (psql) (>= 14.1) -* Tmux +* Tmux (>= v3.2a) * LLVM 15 (To workaround some bugs in macOS toolchain, see https://github.com/risingwavelabs/risingwave/issues/6205). To install the dependencies on macOS, run: @@ -197,6 +198,10 @@ Then, connect to the playground instance via: psql -h localhost -p 4566 -d dev -U root ``` +## Debug playground using vscode + +To step through risingwave locally with a debugger you can use the `launch.json` and the `tasks.json` provided in `vscode_suggestions`. After adding these files to your local `.vscode` folder you can debug and set breakpoints by launching `Launch 'risingwave p' debug`. + ## Develop the dashboard Currently, RisingWave has two versions of dashboards. You can use RiseDev config to select which version to use. @@ -302,7 +307,7 @@ Use [sqllogictest-rs](https://github.com/risinglightdb/sqllogictest-rs) to run R sqllogictest installation is included when you install test tools with the `./risedev install-tools` command. You may also install it with: ```shell -cargo install --git https://github.com/risinglightdb/sqllogictest-rs --bin sqllogictest +cargo install sqllogictest-bin --locked ``` Before running end-to-end tests, you will need to start a full cluster first: @@ -450,15 +455,14 @@ license-eye -c .licenserc.yaml header fix ## Add new dependencies -To avoid rebuild some common dependencies across different crates in workspace, use +`./risedev check-hakari`: To avoid rebuild some common dependencies across different crates in workspace, use [cargo-hakari](https://docs.rs/cargo-hakari/latest/cargo_hakari/) to ensure all dependencies are built with the same feature set across workspace. You'll need to run `cargo hakari generate` after deps get updated. -Use [cargo-udeps](https://github.com/est31/cargo-udeps) to find unused dependencies in -workspace. +`./risedev check-udeps`: Use [cargo-udeps](https://github.com/est31/cargo-udeps) to find unused dependencies in workspace. -And use [cargo-sort](https://crates.io/crates/cargo-sort) to ensure all deps are get sorted. +`./risedev check-dep-sort`: Use [cargo-sort](https://crates.io/crates/cargo-sort) to ensure all deps are get sorted. ## Submit PRs diff --git a/docs/memory-profiling.md b/docs/memory-profiling.md index e65a1185c52ca..dc8a29b8433b8 100644 --- a/docs/memory-profiling.md +++ b/docs/memory-profiling.md @@ -12,7 +12,7 @@ RisingWave uses [tikv-jemallocator](https://crates.io/crates/tikv-jemallocator) Luckily, jemalloc provides built-in profiling support ([official wiki](https://github.com/jemalloc/jemalloc/wiki/Use-Case%3A-Heap-Profiling)). jemallocator exposes the feature via a cargo feature ‘profiling’. [Here](https://gist.github.com/ordian/928dc2bd45022cddd547528f64db9174) is a simple guide to profiling with jemallocator. -For RisingWave, [feat: support heap profiling from risedev by fuyufjh · Pull Request #4871](https://github.com/singularity-data/risingwave/pull/4871) added all things needed. Please just follow the below steps. +For RisingWave, [feat: support heap profiling from risedev by fuyufjh · Pull Request #4871](https://github.com/risingwave-labs/risingwave/pull/4871) added all things needed. Please just follow the below steps. ## Step 1 - Deploy and Run diff --git a/e2e_test/batch/aggregate/distinct.slt.part b/e2e_test/batch/aggregate/distinct.slt.part index 67eb9a50ab392..41dcf59947089 100644 --- a/e2e_test/batch/aggregate/distinct.slt.part +++ b/e2e_test/batch/aggregate/distinct.slt.part @@ -5,25 +5,40 @@ statement ok create table t (v1 int, v2 int, v3 int); statement ok -insert into t values (1, 2, 3), (4, 3, 2), (4, 2, 3), (1, 3, 2); +insert into t values (1,2,3), (1,2,4), (5,3,8), (2,4,4); -query I rowsort -select distinct v1 from t; +query I +select count(distinct v1) from t; ---- -1 -4 +3 -query I -select distinct sum(v1) from t group by v2; +query II rowsort +select v2, count(distinct v1) from t group by v2; ---- -5 +2 1 +3 1 +4 1 -# v2, v3 can be either 2, 3 or 3, 2 -query I -select distinct on(v1) v2 + v3 from t order by v1; +query III rowsort +select v2, count(distinct v1), max(v3) from t group by v2; +---- +2 1 4 +3 1 8 +4 1 4 + +query IIII rowsort +select v1, count(distinct v2), count(distinct v3), max(v2) from t group by v1; +---- +1 1 2 2 +2 1 1 4 +5 1 1 3 + +query IIIII rowsort +select v1, count(distinct v2), min(distinct v2), count(distinct v3), max(v3) from t group by v1; ---- -5 -5 +1 1 2 2 4 +2 1 4 1 4 +5 1 3 1 8 statement ok -drop table t +drop table t; diff --git a/e2e_test/batch/aggregate/stddev_and_variance.slt.part b/e2e_test/batch/aggregate/stddev_and_variance.slt.part new file mode 100644 index 0000000000000..6f3c9b5ff4fdb --- /dev/null +++ b/e2e_test/batch/aggregate/stddev_and_variance.slt.part @@ -0,0 +1,45 @@ +statement ok +SET RW_IMPLICIT_FLUSH TO true; + +statement ok +create table t(v int) + +query RRRR +select stddev_pop(v), stddev_samp(v), var_pop(v), var_samp(v) from t +---- +NULL NULL NULL NULL + +statement ok +insert into t values (1) + +query RRRR +select stddev_pop(v), stddev_samp(v), var_pop(v), var_samp(v) from t +---- +0 NULL 0 NULL + +statement ok +insert into t values (2), (3), (4), (5), (6) + +query RRRR +select stddev_pop(v), stddev_samp(v), var_pop(v), var_samp(v) from t +---- +1.707825127659933 1.8708286933869707 2.9166666666666666666666666667 3.50 + +statement ok +delete from t where v = 3 + +query RRRR +select stddev_pop(v), stddev_samp(v), var_pop(v), var_samp(v) from t +---- +1.8547236990991407 2.073644135332772 3.44 4.30 + +statement ok +update t set v = 7 where v = 4 + +query RRRR +select stddev_pop(v), stddev_samp(v), var_pop(v), var_samp(v) from t +---- +2.3151673805580453 2.588435821108957 5.36 6.70 + +statement ok +drop table t diff --git a/e2e_test/batch/aggregate/sum.slt.part b/e2e_test/batch/aggregate/sum.slt.part index 30f922b52a4dd..7d847e4a2f81a 100644 --- a/e2e_test/batch/aggregate/sum.slt.part +++ b/e2e_test/batch/aggregate/sum.slt.part @@ -55,3 +55,30 @@ select sum(c1) from (select v1, sum(v2) as c1 from t group by v1) statement ok drop table t + +statement ok +create table t(d decimal); + +statement ok +insert into t values (9000000000000000000000000000), +(9000000000000000000000000000), +(9000000000000000000000000000), +(9000000000000000000000000000), +(9000000000000000000000000000), +(9000000000000000000000000000), +(9000000000000000000000000000), +(9000000000000000000000000000); + +query T +select sum(d) from t; +---- +72000000000000000000000000000 + +statement ok +insert into t values (9000000000000000000000000000); + +statement error QueryError: Expr error: Numeric out of range +select sum(d) from t; + +statement ok +drop table t; \ No newline at end of file diff --git a/e2e_test/batch/aggregate/two_phase_agg.slt.part b/e2e_test/batch/aggregate/two_phase_agg.slt.part new file mode 100644 index 0000000000000..bb540e036b3fe --- /dev/null +++ b/e2e_test/batch/aggregate/two_phase_agg.slt.part @@ -0,0 +1,44 @@ +statement ok +SET RW_IMPLICIT_FLUSH TO true; + +statement ok +SET RW_ENABLE_TWO_PHASE_AGG=false; + +# This should override `RW_ENABLE_TWO_PHASE_AGG`, enabling it. +statement ok +SET RW_FORCE_TWO_PHASE_AGG=true; + +query T +SHOW RW_ENABLE_TWO_PHASE_AGG; +---- +true + +statement ok +create table t(v1 int, v2 smallint, v3 varchar); + +statement ok +insert into t values +(1, 2, 'abc'), +(3, 4, 'aaaaaaaaaaaaabc'), +(1, 2222, 'ajhaajaj'), +(1, -2, 'aj22jj1j1'), +(0, 2, 'aaaaaaaaaanz'); + +query II +select min(v3), sum(v1) from t group by v1, v3, v2 order by v3 ASC; +---- +aaaaaaaaaaaaabc 3 +aaaaaaaaaanz 0 +abc 1 +aj22jj1j1 1 +ajhaajaj 1 + +query II +select min(v3), sum(v1), min(v1) from t group by v1 order by min(v3); +---- +aaaaaaaaaaaaabc 3 3 +aaaaaaaaaanz 0 0 +abc 3 1 + +statement ok +drop table t; \ No newline at end of file diff --git a/e2e_test/batch/basic/distinct.slt.part b/e2e_test/batch/basic/distinct.slt.part new file mode 100644 index 0000000000000..67eb9a50ab392 --- /dev/null +++ b/e2e_test/batch/basic/distinct.slt.part @@ -0,0 +1,29 @@ +statement ok +SET RW_IMPLICIT_FLUSH TO true; + +statement ok +create table t (v1 int, v2 int, v3 int); + +statement ok +insert into t values (1, 2, 3), (4, 3, 2), (4, 2, 3), (1, 3, 2); + +query I rowsort +select distinct v1 from t; +---- +1 +4 + +query I +select distinct sum(v1) from t group by v2; +---- +5 + +# v2, v3 can be either 2, 3 or 3, 2 +query I +select distinct on(v1) v2 + v3 from t order by v1; +---- +5 +5 + +statement ok +drop table t diff --git a/e2e_test/batch/basic/dml_returning.slt.part.disabled b/e2e_test/batch/basic/dml_returning.slt.part similarity index 80% rename from e2e_test/batch/basic/dml_returning.slt.part.disabled rename to e2e_test/batch/basic/dml_returning.slt.part index d31c5f7e94d7c..0fd05d7a85609 100644 --- a/e2e_test/batch/basic/dml_returning.slt.part.disabled +++ b/e2e_test/batch/basic/dml_returning.slt.part @@ -1,3 +1,6 @@ +statement ok +SET RW_IMPLICIT_FLUSH TO true; + statement ok create table t (v1 int); @@ -8,10 +11,6 @@ insert into t values (1 + 2), (2 + 2), (2 + 3) returning *; 4 5 -# FIXME: implicit flush for `RETURNING` does not work now. -statement ok -flush; - query I rowsort select * from t; ---- @@ -25,9 +24,6 @@ update t set v1 = v1 + 3 where v1 <= 4 returning *; 6 7 -statement ok -flush; - query I rowsort select * from t; ---- @@ -41,9 +37,6 @@ delete from t where v1 > 5 returning v1, v1 * 2; 6 12 7 14 -statement ok -flush; - query I rowsort select * from t; ---- diff --git a/e2e_test/batch/basic/shared_view.slt.part b/e2e_test/batch/basic/shared_view.slt.part new file mode 100644 index 0000000000000..ca36fd276555c --- /dev/null +++ b/e2e_test/batch/basic/shared_view.slt.part @@ -0,0 +1,25 @@ +statement ok +SET RW_IMPLICIT_FLUSH TO true; + +statement ok +create table t1 (a int, b int); + +statement ok +create view v1 (x, y, z) as select a + b, a, b from t1 where a < b; + +statement ok +insert into t1 values (1, 2), (3, 2), (1, 3); + +query I rowsort +select * from v1 as w1, v1 as w2 +---- +3 1 2 3 1 2 +3 1 2 4 1 3 +4 1 3 3 1 2 +4 1 3 4 1 3 + +statement ok +drop view v1; + +statement ok +drop table t1; diff --git a/e2e_test/batch/basic/time_window.slt.part b/e2e_test/batch/basic/time_window.slt.part index 42ab5ff499938..ec7187da29319 100644 --- a/e2e_test/batch/basic/time_window.slt.part +++ b/e2e_test/batch/basic/time_window.slt.part @@ -49,6 +49,27 @@ from hop(t1, created_at, interval '15' minute, interval '30' minute) order by ro 8 3 2022-01-01 11:02:00 2022-01-01 10:45:00 2022-01-01 11:15:00 8 3 2022-01-01 11:02:00 2022-01-01 11:00:00 2022-01-01 11:30:00 +query IIT rowsort +select row_id, uid, created_at +from hop(t1, created_at, interval '15' minute, interval '30' minute); +---- +1 1 2022-01-01 10:00:00 +1 1 2022-01-01 10:00:00 +2 3 2022-01-01 10:05:00 +2 3 2022-01-01 10:05:00 +3 2 2022-01-01 10:14:00 +3 2 2022-01-01 10:14:00 +4 1 2022-01-01 10:22:00 +4 1 2022-01-01 10:22:00 +5 3 2022-01-01 10:33:00 +5 3 2022-01-01 10:33:00 +6 2 2022-01-01 10:42:00 +6 2 2022-01-01 10:42:00 +7 1 2022-01-01 10:51:00 +7 1 2022-01-01 10:51:00 +8 3 2022-01-01 11:02:00 +8 3 2022-01-01 11:02:00 + query IT select sum(v), window_start from tumble(t1, created_at, interval '30' minute) diff --git a/e2e_test/batch/catalog/pg_cast.slt.part b/e2e_test/batch/catalog/pg_cast.slt.part index 51e26062be4ca..35fe3749e556a 100644 --- a/e2e_test/batch/catalog/pg_cast.slt.part +++ b/e2e_test/batch/catalog/pg_cast.slt.part @@ -55,18 +55,20 @@ SELECT * FROM pg_catalog.pg_cast 51 1043 1114 EXPLICIT 52 1043 1184 EXPLICIT 53 1043 1186 EXPLICIT -54 1083 1043 ASSIGN -55 1083 1186 IMPLICIT -56 1114 1082 ASSIGN -57 1114 1043 ASSIGN -58 1114 1083 ASSIGN -59 1114 1184 IMPLICIT -60 1184 1082 ASSIGN -61 1184 1043 ASSIGN -62 1184 1083 ASSIGN -63 1184 1114 ASSIGN -64 1186 1043 ASSIGN -65 1186 1083 ASSIGN +54 1043 3802 EXPLICIT +55 1083 1043 ASSIGN +56 1083 1186 IMPLICIT +57 1114 1082 ASSIGN +58 1114 1043 ASSIGN +59 1114 1083 ASSIGN +60 1114 1184 IMPLICIT +61 1184 1082 ASSIGN +62 1184 1043 ASSIGN +63 1184 1083 ASSIGN +64 1184 1114 ASSIGN +65 1186 1043 ASSIGN +66 1186 1083 ASSIGN +67 3802 1043 ASSIGN query TT rowsort SELECT s.typname, t.typname diff --git a/e2e_test/batch/catalog/pg_conversion.slt.part b/e2e_test/batch/catalog/pg_conversion.slt.part new file mode 100644 index 0000000000000..0ba897f8211bb --- /dev/null +++ b/e2e_test/batch/catalog/pg_conversion.slt.part @@ -0,0 +1,3 @@ +query ITIIIIIT +SELECT * FROM pg_catalog.pg_conversion; +---- diff --git a/e2e_test/batch/catalog/pg_enum.slt.part b/e2e_test/batch/catalog/pg_enum.slt.part new file mode 100644 index 0000000000000..e76cc989a0914 --- /dev/null +++ b/e2e_test/batch/catalog/pg_enum.slt.part @@ -0,0 +1,3 @@ +query IIIT +SELECT * FROM pg_catalog.pg_enum; +---- diff --git a/e2e_test/batch/catalog/version.slt.part b/e2e_test/batch/catalog/version.slt.part index a95a313413539..6e1a6fb6a75fc 100644 --- a/e2e_test/batch/catalog/version.slt.part +++ b/e2e_test/batch/catalog/version.slt.part @@ -1,4 +1,4 @@ query T -SELECT version(); +select substring(version() from 1 for 15); ---- -PostgreSQL 13.9-RW-0.2.0-alpha +PostgreSQL 13.9 diff --git a/e2e_test/batch/functions/format_type.slt.part b/e2e_test/batch/functions/format_type.slt.part new file mode 100644 index 0000000000000..2f762e7fe7750 --- /dev/null +++ b/e2e_test/batch/functions/format_type.slt.part @@ -0,0 +1,19 @@ +query T +SELECT format_type(16, 0); +---- +boolean + +query T +SELECT pg_catalog.format_type(9527, 1); +---- +??? + +query T +SELECT format_type(1000, NULL); +---- +boolean[] + +query T +SELECT format_type(NULL, 0); +---- +NULL diff --git a/e2e_test/batch/functions/now.slt.part b/e2e_test/batch/functions/now.slt.part index ee706e80e2eb9..8d3ce7867b3d9 100644 --- a/e2e_test/batch/functions/now.slt.part +++ b/e2e_test/batch/functions/now.slt.part @@ -10,6 +10,13 @@ create table t (a timestamp); statement ok insert into t values(now()); +# constant eval of now in batch plan +# query T +# explain select now() + interval '1 hour' = now() + interval '30 minutes' + interval '30 minutes' true; +# ---- +# BatchProject { exprs: [true:Boolean] } +# └─BatchValues { rows: [[]] } + statement ok drop table tz diff --git a/e2e_test/batch/functions/overlay.slt.part b/e2e_test/batch/functions/overlay.slt.part index 0f521061b9492..dd59d09eb933a 100644 --- a/e2e_test/batch/functions/overlay.slt.part +++ b/e2e_test/batch/functions/overlay.slt.part @@ -29,3 +29,6 @@ SELECT OVERLAY('abc' PLACING 'xyz' FOR 2) statement error SELECT OVERLAY('abc' PLACING 'xyz' FOR 2 FROM 1) + +statement error +SELECT OVERLAY('l2bWQBOIj9' PLACING 'DIHgr7AB4z' FROM (INT '-2147483648') FOR (INT '1')); diff --git a/e2e_test/batch/functions/pow.slt.part b/e2e_test/batch/functions/pow.slt.part new file mode 100644 index 0000000000000..ac67691dec08a --- /dev/null +++ b/e2e_test/batch/functions/pow.slt.part @@ -0,0 +1,91 @@ +query R +select pow(2.0, 3.0) +---- +8 + +query R +select power(2.0, 3.0) +---- +8 + +query R +select pow(2.0::decimal, 3.0::decimal) +---- +8 + +query R +select pow(2.0::double, 3.0::double) +---- +8 + +query R +select pow(2.0::smallint, 3.0::smallint) +---- +8 + +query R +select pow(2.0::bigint, 3.0::bigint) +---- +8 + +query R +select pow(2.0, -2); +---- +0.25 + +query R +select pow(2.23, -2.33); +---- +0.15432975583772085 + +query R +select pow(100000, 0); +---- +1 + +query R +select pow(100000, -200000000000000); +---- +0 + +statement error QueryError: Expr error: Numeric out of range +select pow(100000, 200000000000000); + + +statement error QueryError: Expr error: Numeric out of range +select pow(-100000, 200000000000001); + +query R +select exp(0::smallint); +---- +1 + +query R +select exp(0.0); +---- +1 + +query R +select exp(0.0::decimal); +---- +1 + +query R +select exp(2.0); +---- +7.38905609893065 + +query R +select exp(2::smallint) +---- +7.38905609893065 + +statement error QueryError: Expr error: Numeric out of range +select exp(10000000); + +# We remark that this test case underflows in PG. TODO: We can make it fully compatible if necessary +# https://github.com/postgres/postgres/blob/REL_15_2/src/backend/utils/adt/float.c#L1649 +query T +select exp(-10000000); +---- +0 \ No newline at end of file diff --git a/e2e_test/batch/functions/session_timezone.slt b/e2e_test/batch/functions/session_timezone.slt index 1e018f2980503..5339d37f566a1 100644 --- a/e2e_test/batch/functions/session_timezone.slt +++ b/e2e_test/batch/functions/session_timezone.slt @@ -55,5 +55,35 @@ select '2022-01-01 00:00:00-08:00'::timestamp with time zone::varchar; ---- 2022-01-01 00:00:00-08:00 +statement ok +set timezone = 'europe/london' + +# Add/Subtract timestamptz with interval across a daylight savings boundary +# Daylight savings falls on 2016-10-30 in London timezone + +# This should first add the 24 hours crossing the daylight saving boundary from UTC+1->UTC, then the day +query T +select '2016-10-29 12:00:00'::timestamptz + interval '24 hours' + interval '1 day'; +---- +2016-10-31 11:00:00+00:00 + +# This should first add the days at UTC+1->UTC boundary (no change to time), then the hours +query T +select (interval '24 hours' + interval '1 day') + '2016-10-29 12:00:00'::timestamptz; +---- +2016-10-31 12:00:00+00:00 + +# Test inverse for subtract, only -1 day is applied at the UTC->UTC+1 boundary (no change to time) +query T +select '2016-10-31 11:00:00+00:00'::timestamptz - interval '24 hours' - interval '1 day'; +---- +2016-10-29 11:00:00+01:00 + +# Test inverse for subtract, this time we apply diff 1 day first, then -24 hours at the UTC->UTC+1 boundary +query T +select '2016-10-31 11:00:00+00:00'::timestamptz - (interval '24 hours' + interval '1 day'); +---- +2016-10-29 12:00:00+01:00 + statement ok set timezone = 'UTC'; \ No newline at end of file diff --git a/e2e_test/batch/functions/substr.slt.part b/e2e_test/batch/functions/substr.slt.part new file mode 100644 index 0000000000000..651def53e84a8 --- /dev/null +++ b/e2e_test/batch/functions/substr.slt.part @@ -0,0 +1,29 @@ +# issue: https://github.com/risingwavelabs/risingwave/issues/7632 +query T +select substr('W7Jc3Vyufj', (INT '-2147483648')); +---- +W7Jc3Vyufj + +statement error length in substr should be non-negative +select substr('W7Jc3Vyufj', INT '-2147483648', INT '-2147483648'); + +query T +select substr('W7Jc3Vyufj', INT '2147483647', INT '2147483647'); +---- +(empty) + +query T +select substr('W7Jc3Vyufj', INT '-2147483645', INT '2147483647'); +---- +W + +query T +select substr('W7Jc3Vyufj', INT '-2147483648', INT '2147483647'); +---- +(empty) + +# issue: https://github.com/risingwavelabs/risingwave/issues/7601 +query T +select substr('a', 2147483646, 1); +---- +(empty) \ No newline at end of file diff --git a/e2e_test/batch/issue_7324.slt b/e2e_test/batch/issue_7324.slt new file mode 100644 index 0000000000000..e40e67064f742 --- /dev/null +++ b/e2e_test/batch/issue_7324.slt @@ -0,0 +1,31 @@ +# This is a test on error propagation of local mode. If we can not correctly handle the error report, it will hang up (#7324). + +statement ok +SET RW_IMPLICIT_FLUSH TO true; + +statement ok +SET CREATE_COMPACTION_GROUP_FOR_MV TO true; + +statement ok +CREATE TABLE INT2_TBL(f1 int2); + +statement ok +INSERT INTO INT2_TBL(f1) VALUES ('0 '); + +statement ok +INSERT INTO INT2_TBL(f1) VALUES (' 1234 '); + +statement ok +INSERT INTO INT2_TBL(f1) VALUES (' -1234'); + +statement ok +INSERT INTO INT2_TBL(f1) VALUES ('32767'); + +statement ok +INSERT INTO INT2_TBL(f1) VALUES ('-32767'); + +statement error +SELECT i.f1, i.f1 * smallint '2' AS x FROM INT2_TBL i; + +statement ok +drop table INT2_TBL; \ No newline at end of file diff --git a/e2e_test/batch/top_n/top_n_on_index.slt b/e2e_test/batch/top_n/top_n_on_index.slt new file mode 100644 index 0000000000000..616f551e92e34 --- /dev/null +++ b/e2e_test/batch/top_n/top_n_on_index.slt @@ -0,0 +1,62 @@ +statement ok +SET RW_IMPLICIT_FLUSH TO true; + +statement ok +create table t(x int, y int); + +statement ok +create index idx on t(x); + +statement ok +create index idx2 on t(x desc); + +statement ok +insert into t values (100, 3), (1, 0), (2, 3), (3, 4), (5, 4); + +query II +select * from t order by x limit 1; +---- +1 0 + +query II +select * from t order by x limit 3; +---- +1 0 +2 3 +3 4 + +query II +select * from t order by x desc limit 1; +---- +100 3 + +query II +select * from t order by x desc limit 3; +---- +100 3 +5 4 +3 4 + +statement ok +create table t1(x int primary key, y int); + +statement ok +insert into t1 values (100, 3), (1, 0), (2, 3), (3, 4), (5, 4); + +query II +select * from t1 order by x limit 1; +---- +1 0 + +query II +select * from t1 order by x limit 3; +---- +1 0 +2 3 +3 4 + +statement ok +drop table t; + +statement ok +drop table t1; diff --git a/e2e_test/batch/types/array_ty.slt.part b/e2e_test/batch/types/array_ty.slt.part index 3839d261dcfdd..5f1a58e685319 100644 --- a/e2e_test/batch/types/array_ty.slt.part +++ b/e2e_test/batch/types/array_ty.slt.part @@ -84,6 +84,20 @@ select max(ARRAY[1, v1*2]) from t; ---- {1,6} +query T +select CAST(NULL as bool[]) from t; +---- +NULL +NULL +NULL + +query T +select array[false, false] from t; +---- +{f,f} +{f,f} +{f,f} + statement ok drop table t; diff --git a/e2e_test/batch/types/bytea.slt.part b/e2e_test/batch/types/bytea.slt.part index 7e1c1ee05d550..c4df7c81889fe 100644 --- a/e2e_test/batch/types/bytea.slt.part +++ b/e2e_test/batch/types/bytea.slt.part @@ -32,6 +32,16 @@ select * from bytes; \x666768 \xdeadbeef +query T +select * from bytes order by i; +---- +\x12cd +\x3078666768 +\x31323334 +\x31324344 +\x666768 +\xdeadbeef + statement ok drop table bytes; diff --git a/e2e_test/batch/types/date.slt b/e2e_test/batch/types/date.slt index 0d9d49b32598c..ccdc36ba868a1 100644 --- a/e2e_test/batch/types/date.slt +++ b/e2e_test/batch/types/date.slt @@ -58,3 +58,20 @@ INSERT INTO dates VALUES ('1000000000-01-01'); statement ok DROP TABLE dates; + +# Issue #7566 +# date_int_sub +statement error out of range +select DATE '2022-08-09' - (INT '-2147483648'); + +# date_int_add +statement error out of range +select DATE '2022-08-09' + (INT '-2147483648'); + +# date_interval_sub +statement error out of range +select DATE '2022-08-09' - (INTERVAL '-2147483648 days'); + +# date_interval_add +statement error out of range +select DATE '2022-08-09' + (INTERVAL '-2147483648 days'); diff --git a/e2e_test/batch/types/jsonb.slt.part b/e2e_test/batch/types/jsonb.slt.part new file mode 100644 index 0000000000000..c34ba5850b17b --- /dev/null +++ b/e2e_test/batch/types/jsonb.slt.part @@ -0,0 +1,37 @@ +statement ok +SET RW_IMPLICIT_FLUSH TO true; + +query T rowsort +values ('{"a":[2, true, "", {}]}'::jsonb), ('1'), ('true'), ('null'), (null), ('[1, true]'); +---- +1 +NULL +[1,true] +null +true +{"a":[2,true,"",{}]} + +statement ok +create table t (v1 jsonb); + +statement ok +insert into t values ('1'), ('true'), ('null'), (null); + +query T rowsort +select * from t; +---- +1 +NULL +null +true + +query T +select * from t order by v1::varchar; +---- +1 +null +true +NULL + +statement ok +drop table t; diff --git a/e2e_test/batch/types/jsonb_ord.slt.part b/e2e_test/batch/types/jsonb_ord.slt.part new file mode 100644 index 0000000000000..59dc5406ab718 --- /dev/null +++ b/e2e_test/batch/types/jsonb_ord.slt.part @@ -0,0 +1,44 @@ +# We do not intend to support using `jsonb` type for `group by` / `order by` / `primary key` +# Before #7981 is done, we need these tests to make sure our system do not panic. +# After #7981, we need them to make sure proper errors are returned to user. + +statement ok +SET RW_IMPLICIT_FLUSH TO true; + +statement ok +values ('{"a":[2, true, "", {}]}'::jsonb), ('1'), ('true'), ('null'), (null), ('[1, true]') order by 1; + +statement ok +create table t (v1 jsonb); + +statement ok +insert into t values ('1'), ('true'), ('null'), (null); + +statement ok +select * from t order by v1; + +# deserialize length +statement ok +create materialized view mv1 as select * from t group by v1; + +statement ok +select * from mv1; + +statement ok +drop materialized view mv1; + +# deserialize pk +statement ok +create table t2 (v1 jsonb primary key); + +statement ok +insert into t2 values ('1'), ('true'), ('null'), (null); + +statement ok +select * from t2; + +statement ok +drop table t2; + +statement ok +drop table t; diff --git a/e2e_test/batch/types/list/multi-dimentional_list_cast.slt.part b/e2e_test/batch/types/list/multi-dimentional_list_cast.slt.part new file mode 100644 index 0000000000000..a69e52ad22872 --- /dev/null +++ b/e2e_test/batch/types/list/multi-dimentional_list_cast.slt.part @@ -0,0 +1,25 @@ +query I +select array[array[1, 2], array[3, 4]]; +---- +{{1,2},{3,4}} + +query I +select array[[1, 2], [3, 4]]; +---- +{{1,2},{3,4}} + +query I +select array[[array[1, 2]], [[3, 4]]]; +---- +{{{1,2}},{{3,4}}} + +query I +select array[[[1, 2]], [array[3, 4]]]; +---- +{{{1,2}},{{3,4}}} + +statement error syntax error at or near +select array[array[1, 2], [3, 4]]; + +statement error syntax error at or near +select array[[1, 2], array[3, 4]]; \ No newline at end of file diff --git a/e2e_test/batch/types/time.slt.part b/e2e_test/batch/types/time.slt.part index ee6612f175d09..7f35da5d37cbb 100644 --- a/e2e_test/batch/types/time.slt.part +++ b/e2e_test/batch/types/time.slt.part @@ -55,3 +55,12 @@ select v3 from t where v3 > '2022-01-01'::DATE; statement ok drop table t; + +# Issue #7566 +# timestamp_interval_sub +statement error out of range +select TIMESTAMP '2022-08-09 00:00:00' - (INTERVAL '-2147483648 days'); + +# timestamp_interval_add +statement error out of range +select TIMESTAMP '2022-08-09 00:00:00' + (INTERVAL '-2147483648 days'); diff --git a/e2e_test/batch/types/timestamptz_utc.slt.part b/e2e_test/batch/types/timestamptz_utc.slt.part index c632669df24f6..131ffb2e81941 100644 --- a/e2e_test/batch/types/timestamptz_utc.slt.part +++ b/e2e_test/batch/types/timestamptz_utc.slt.part @@ -119,3 +119,12 @@ query T select '2022-03-13 09:00:00Z'::timestamptz + interval '1' day - interval '24' hour; ---- 2022-03-13 09:00:00+00:00 + +# Issue #7566 +# timestamptz_interval_sub +statement error out of range +select TIMESTAMP WITH TIME ZONE '2022-08-09 00:00:00' - (INTERVAL '-2147483648 days'); + +# timestamptz_interval_add +statement error out of range +select TIMESTAMP WITH TIME ZONE '2022-08-09 00:00:00' + (INTERVAL '-2147483648 days'); diff --git a/e2e_test/compaction/ingest_rows.slt b/e2e_test/compaction/ingest_rows.slt index 5b6b8656ed31f..3b59de609f674 100644 --- a/e2e_test/compaction/ingest_rows.slt +++ b/e2e_test/compaction/ingest_rows.slt @@ -1,30 +1,30 @@ statement ok CREATE SOURCE person - (id INTEGER, name VARCHAR, "email_address" VARCHAR, "credit_card" VARCHAR, city VARCHAR, state VARCHAR, "date_time" TIMESTAMP) + (id BIGINT, name VARCHAR, "email_address" VARCHAR, "credit_card" VARCHAR, city VARCHAR, state VARCHAR, "date_time" TIMESTAMP, "extra" VARCHAR) with ( connector = 'nexmark', nexmark.table.type = 'Person', nexmark.split.num = '12', nexmark.min.event.gap.in.ns = '0' -) ROW FORMAT JSON; +); statement ok -CREATE SOURCE auction (id INTEGER, "item_name" VARCHAR, description VARCHAR, "initial_bid" INTEGER, reserve INTEGER, "date_time" TIMESTAMP, expires TIMESTAMP, seller INTEGER, category INTEGER) +CREATE SOURCE auction (id BIGINT, "item_name" VARCHAR, description VARCHAR, "initial_bid" BIGINT, reserve BIGINT, "date_time" TIMESTAMP, expires TIMESTAMP, seller BIGINT, category BIGINT, "extra" VARCHAR) with ( connector = 'nexmark', nexmark.table.type = 'Auction', nexmark.split.num = '12', nexmark.min.event.gap.in.ns = '0' -) ROW FORMAT JSON; +); statement ok -CREATE SOURCE bid (auction INTEGER, bidder INTEGER, price INTEGER, "date_time" TIMESTAMP) +CREATE SOURCE bid (auction BIGINT, bidder BIGINT, price BIGINT, "channel" VARCHAR, "url" VARCHAR, "date_time" TIMESTAMP, "extra" VARCHAR) with ( connector = 'nexmark', nexmark.table.type = 'Bid', nexmark.split.num = '12', nexmark.min.event.gap.in.ns = '0' -) ROW FORMAT JSON; +); statement ok CREATE MATERIALIZED VIEW nexmark_q7 AS diff --git a/e2e_test/ddl/show.slt b/e2e_test/ddl/show.slt index 7092a7d4fae7a..f49ae4d58edfa 100644 --- a/e2e_test/ddl/show.slt +++ b/e2e_test/ddl/show.slt @@ -13,6 +13,7 @@ describe t3; v1 Int32 v2 Int32 v3 Int32 +primary key _row_id query TT show columns from t3; @@ -30,6 +31,7 @@ describe t3; v1 Int32 v2 Int32 v3 Int32 +primary key _row_id idx1 index(v1, v2) include(v3) distributed by(v1, v2) statement ok @@ -125,6 +127,7 @@ definition Varchar matviewid Int32 matviewtimezone Varchar matviewgraph Varchar +primary key schemaname query TT show columns from pg_catalog.pg_matviews; diff --git a/e2e_test/ddl/table.slt b/e2e_test/ddl/table.slt index dbcb86cd9cceb..6cc3d7325c424 100644 --- a/e2e_test/ddl/table.slt +++ b/e2e_test/ddl/table.slt @@ -126,13 +126,27 @@ statement error create table t (v1 varchar collate "en_US"); # Test create-table-as -statement error alias must be specified +statement ok create table t as select 1; -# FIXME: this should be supported -statement error alias must be specified +statement ok +drop table t; + +statement error +create table t as select 1,2; + +statement ok +create table t as select 1 as a, 2 as b; + +statement ok +drop table t; + +statement ok create table t(v1) as select 1; +statement ok +drop table t; + statement ok create table t (v1 int,v2 int); diff --git a/e2e_test/extended_query/basic.slt b/e2e_test/extended_query/basic.slt index 76d9093110d6b..bcc8b51babc0d 100644 --- a/e2e_test/extended_query/basic.slt +++ b/e2e_test/extended_query/basic.slt @@ -50,6 +50,7 @@ describe t3; v1 Int32 v2 Int32 v3 Int32 +primary key _row_id query III show columns from t3; diff --git a/e2e_test/nexmark/create_sources.slt.part b/e2e_test/nexmark/create_sources.slt.part index 1b852ea5a410d..88e62229efe96 100644 --- a/e2e_test/nexmark/create_sources.slt.part +++ b/e2e_test/nexmark/create_sources.slt.part @@ -30,7 +30,7 @@ CREATE SOURCE nexmark ( connector = 'nexmark', nexmark.split.num = '2', nexmark.min.event.gap.in.ns = '100' -) ROW FORMAT JSON; +); statement ok CREATE VIEW PERSON as select (person).* from nexmark where event_type = 0; diff --git a/e2e_test/sink/iceberg_sink.slt b/e2e_test/sink/iceberg_sink.slt new file mode 100644 index 0000000000000..9b9e51c095cdd --- /dev/null +++ b/e2e_test/sink/iceberg_sink.slt @@ -0,0 +1,30 @@ +statement ok +CREATE TABLE t6 (v1 int primary key, v2 int); + +statement ok +CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6; + +statement ok +CREATE SINK s6 AS select mv6.v1 as v1, mv6.v2 as v2 from mv6 WITH ( + connector = 'iceberg', + sink.mode='append-only', + location.type='minio', + warehouse.path='minio://hummockadmin:hummockadmin@127.0.0.1:9301/iceberg', + database.name='demo_db', + table.name='demo_table' +); + +statement ok +INSERT INTO t6 VALUES (1, 2), (2, 2), (3, 2), (5, 2), (8, 2), (13, 2), (21, 2); + +statement ok +DROP SINK s6; + +statement ok +DROP MATERIALIZED VIEW mv6; + +statement ok +DROP TABLE t6; + +statement ok +FLUSH; diff --git a/e2e_test/source/basic/datagen.slt b/e2e_test/source/basic/datagen.slt index d14e5816ac557..d238f1baa9e75 100644 --- a/e2e_test/source/basic/datagen.slt +++ b/e2e_test/source/basic/datagen.slt @@ -9,7 +9,7 @@ create table s1 (v1 int, v2 float) with ( fields.v2.end = '20', datagen.rows.per.second='15', datagen.split.num = '1' -) row format json; +); # Wait enough time to ensure Datagen connector generate data sleep 2s @@ -19,7 +19,7 @@ flush; # Will only generate 10 records since `fields.v1.end` is 10 query II rowsort -select v1, v2 from s1 where v1 is not null limit 15; +select v1, v2 from s1 limit 15; ---- 1 11 10 20 @@ -43,7 +43,7 @@ create table s1 (v1 int) with ( fields.v1.end = '100', datagen.rows.per.second = '10', datagen.split.num = '5' -) row format json; +); # Wait enough time to ensure Datagen connector generate data sleep 2s @@ -166,4 +166,25 @@ select count(*) > 10 from s1; t statement ok -drop table s1; \ No newline at end of file +drop table s1; + +# Allow an extra comma at the end of With clause. +statement ok +create table s1 (v1 int) with ( + connector = 'datagen', + fields.v1.kind = 'sequence', + fields.v1.start = '1', + fields.v1.end = '100', + datagen.split.num = '5', +) row format json; + +statement ok +drop table s1; + +# Do NOT allow With clause to contain a comma only. +statement error QueryError: sql parser error: Expected identifier.* +create table s1 (v1 int) with (,) row format json; + +# Do NOT allow an empty With clause. +statement error QueryError: sql parser error: Expected identifier.* +create table s1 (v1 int) with () row format json; diff --git a/e2e_test/source/basic/kafka.slt b/e2e_test/source/basic/kafka.slt index 196bc64cc7d9f..a7c8b6c73205e 100644 --- a/e2e_test/source/basic/kafka.slt +++ b/e2e_test/source/basic/kafka.slt @@ -56,7 +56,8 @@ create table s5 (v1 int, v2 varchar, v3 int[], v4 struct) with ( connector = 'kafka', topic = 'kafka_4_partition_topic_with_100_message', properties.bootstrap.server = '127.0.0.1:29092', - scan.startup.mode = 'earliest' + scan.startup.mode = 'earliest', + appendonly = 'true' ) row format json statement ok @@ -77,8 +78,8 @@ statement ok create sink si from s5 with ( - kafka.brokers = '127.0.0.1:29092', - kafka.topic = 'sink_target', + properties.bootstrap.server = '127.0.0.1:29092', + topic = 'sink_target', format = 'append_only', connector = 'kafka' ) diff --git a/e2e_test/source/basic/kafka_batch.slt b/e2e_test/source/basic/kafka_batch.slt index ee5253890b99f..20aa4ff74d8f4 100644 --- a/e2e_test/source/basic/kafka_batch.slt +++ b/e2e_test/source/basic/kafka_batch.slt @@ -88,6 +88,30 @@ query IT select * from s1 where _rw_kafka_timestamp > '2045-01-01 0:00:00+00:00' ---- +query B +select _rw_kafka_timestamp > '1977-01-01 00:00:00+00:00' from s1 +---- +t +t +t +t + +query B +select _rw_kafka_timestamp < now() from s1 +---- +t +t +t +t + +query B +select _rw_kafka_timestamp < now() - interval '1 day' from s1 +---- +f +f +f +f + query IT rowsort select * from s1 limit 2 ---- diff --git a/e2e_test/streaming/append_only.slt b/e2e_test/streaming/append_only.slt index 01e869a3782c3..bbba0508548e7 100644 --- a/e2e_test/streaming/append_only.slt +++ b/e2e_test/streaming/append_only.slt @@ -18,7 +18,7 @@ insert into t2 values (1,5), (2,6), (3, 7); statement ok create materialized view mv1 as select t1.v1 as id, v2, v3 from t1 join t2 on t1.v1=t2.v1; -query I rowsort +query III rowsort select * from mv1; ---- 1 2 5 @@ -27,7 +27,7 @@ select * from mv1; statement ok insert into t1 values (3,4), (7,7); -query II rowsort +query III rowsort select * from mv1; ---- 1 2 5 @@ -45,7 +45,7 @@ insert into t4 values (1,1,4), (5,1,4), (1,9,1), (9,8,1), (0,2,3); statement ok create materialized view mv3 as select v3, sum(v1) as sum_v1, min(v1) as min_v1, max(v1) as max_v1 from t4 group by v3; -query III +query IIII select sum_v1, min_v1, max_v1, v3 from mv3 order by sum_v1; ---- 0 0 0 3 @@ -58,18 +58,31 @@ statement ok create materialized view mv4 as select v1, v3 from t4 order by v1 limit 3 offset 3; ## scan MV with ORDER BY isn't guaranteed to be ordered -query IV rowsort +query II rowsort select * from mv4; ---- 5 4 9 1 +## Group TopN +statement ok +create materialized view mv4_1 as +select v1, v3 from ( + select *, ROW_NUMBER() OVER (PARTITION BY v3 ORDER BY v1) as rank from t4 +) +where rank <= 2 AND rank > 1; + +query II rowsort +select * from mv4_1; +---- +5 4 +9 1 ## SimpleAgg statement ok create materialized view mv5 as select sum(v1) as sum_v1, max(v2) as max_v2, min(v3) as min_v3 from t4; -query V +query III select * from mv5; ---- 16 9 1 @@ -84,7 +97,7 @@ insert into t5 values (1,0), (1,1), (1,2), (1,3); statement ok create materialized view mv6 as select v1, v2 from t5 order by v1 fetch first 3 rows with ties; -query IV rowsort +query II rowsort select * from mv6; ---- 1 0 @@ -95,7 +108,7 @@ select * from mv6; statement ok insert into t5 values (0,1), (0,2); -query IV rowsort +query II rowsort select * from mv6; ---- 0 1 @@ -108,7 +121,7 @@ select * from mv6; statement ok insert into t5 values (0,3); -query IV rowsort +query II rowsort select * from mv6; ---- 0 1 @@ -121,6 +134,9 @@ drop materialized view mv6 statement ok drop materialized view mv5 +statement ok +drop materialized view mv4_1 + statement ok drop materialized view mv4 diff --git a/e2e_test/streaming/basic_agg.slt b/e2e_test/streaming/basic_agg.slt index b8d52b02c8e3c..24167237b0091 100644 --- a/e2e_test/streaming/basic_agg.slt +++ b/e2e_test/streaming/basic_agg.slt @@ -4,13 +4,6 @@ SET RW_IMPLICIT_FLUSH TO true; statement ok create table t (v1 int, v2 numeric, v3 double, v4 interval); -statement ok -insert into t values -(null, 2, 3, interval '1' second), -(4, null, 6, interval '1' minute), -(7, 8, null, interval '1' hour), -(10, 11, 12, null); - statement ok create materialized view mv_sum as select @@ -36,6 +29,20 @@ from t; statement ok flush; +# For basic/simple agg, there should be the trivial agg values +# even without any input rows. +query I +select * from mv_sum; +---- +0 0 0 0 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL + +statement ok +insert into t values +(null, 2, 3, interval '1' second), +(4, null, 6, interval '1' minute), +(7, 8, null, interval '1' hour), +(10, 11, 12, null); + query I select * from mv_sum; ---- diff --git a/e2e_test/streaming/delta_join/delta_join_snapshot.slt b/e2e_test/streaming/delta_join/delta_join_snapshot.slt new file mode 100644 index 0000000000000..1f973608aacb0 --- /dev/null +++ b/e2e_test/streaming/delta_join/delta_join_snapshot.slt @@ -0,0 +1,55 @@ +statement ok +set rw_implicit_flush = true; + +statement ok +set rw_streaming_enable_delta_join = true; + +statement ok +set streaming_parallelism = 2; + +statement ok +create table a (a1 int, a2 int); + +statement ok +create index i_a1 on a(a1); + +statement ok +set streaming_parallelism = 3; + +statement ok +create table b (b1 int, b2 int); + +statement ok +create index i_b1 on b(b1); + +statement ok +insert into A values (1,2), (1,3); + +statement ok +insert into B values (1,4), (1,5); + +statement ok +set streaming_parallelism = 0; + +statement ok +create materialized view v as select * from a join b on a.a1 = b.b1; + +query IIII rowsort +select * from v order by a1, a2, b1, b2; +---- +1 2 1 4 +1 2 1 5 +1 3 1 4 +1 3 1 5 + +statement ok +drop materialized view v; + +statement ok +drop table a; + +statement ok +drop table b; + +statement ok +set rw_streaming_enable_delta_join = false; diff --git a/e2e_test/streaming/delta_join/delta_join_snapshot_no_index.slt b/e2e_test/streaming/delta_join/delta_join_snapshot_no_index.slt new file mode 100644 index 0000000000000..a22ceff6423a4 --- /dev/null +++ b/e2e_test/streaming/delta_join/delta_join_snapshot_no_index.slt @@ -0,0 +1,47 @@ +statement ok +set rw_implicit_flush = true; + +statement ok +set rw_streaming_enable_delta_join = true; + +statement ok +set streaming_parallelism = 2; + +statement ok +create table a (a1 int primary key, a2 int); + +statement ok +set streaming_parallelism = 3; + +statement ok +create table b (b1 int primary key, b2 int); + +statement ok +insert into A values (1,2), (11, 22); + +statement ok +insert into B values (1,4), (11, 44); + +statement ok +set streaming_parallelism = 0; + +statement ok +create materialized view v as select * from a join b on a.a1 = b.b1; + +query IIII rowsort +select * from v order by a1, a2, b1, b2; +---- +1 2 1 4 +11 22 11 44 + +statement ok +drop materialized view v; + +statement ok +drop table a; + +statement ok +drop table b; + +statement ok +set rw_streaming_enable_delta_join = false; diff --git a/e2e_test/streaming/delta_join/delta_join_upstream.slt b/e2e_test/streaming/delta_join/delta_join_upstream.slt new file mode 100644 index 0000000000000..090e001d4885e --- /dev/null +++ b/e2e_test/streaming/delta_join/delta_join_upstream.slt @@ -0,0 +1,55 @@ +statement ok +set rw_implicit_flush = true; + +statement ok +set rw_streaming_enable_delta_join = true; + +statement ok +set streaming_parallelism = 2; + +statement ok +create table a (a1 int, a2 int); + +statement ok +create index i_a1 on a(a1); + +statement ok +set streaming_parallelism = 3; + +statement ok +create table b (b1 int, b2 int); + +statement ok +create index i_b1 on b(b1); + +statement ok +set streaming_parallelism = 0; + +statement ok +create materialized view v as select * from a join b on a.a1 = b.b1; + +statement ok +insert into A values (1,2), (1,3); + +statement ok +insert into B values (1,4), (1,5); + +query IIII rowsort +select * from v order by a1, a2, b1, b2; +---- +1 2 1 4 +1 2 1 5 +1 3 1 4 +1 3 1 5 + +statement ok +drop materialized view v; + +statement ok +drop table a; + +statement ok +drop table b; + +statement ok +set rw_streaming_enable_delta_join = false; diff --git a/e2e_test/streaming/delta_join/delta_join_upstream_no_index.slt b/e2e_test/streaming/delta_join/delta_join_upstream_no_index.slt new file mode 100644 index 0000000000000..956bdc31682f4 --- /dev/null +++ b/e2e_test/streaming/delta_join/delta_join_upstream_no_index.slt @@ -0,0 +1,47 @@ +statement ok +set rw_implicit_flush = true; + +statement ok +set rw_streaming_enable_delta_join = true; + +statement ok +set streaming_parallelism = 2; + +statement ok +create table a (a1 int primary key, a2 int); + +statement ok +set streaming_parallelism = 3; + +statement ok +create table b (b1 int primary key, b2 int); + +statement ok +set streaming_parallelism = 0; + +statement ok +create materialized view v as select * from a join b on a.a1 = b.b1; + +statement ok +insert into A values (1,2), (11, 22); + +statement ok +insert into B values (1,4), (11, 44); + +query IIII rowsort +select * from v order by a1, a2, b1, b2; +---- +1 2 1 4 +11 22 11 44 + +statement ok +drop materialized view v; + +statement ok +drop table a; + +statement ok +drop table b; + +statement ok +set rw_streaming_enable_delta_join = false; diff --git a/e2e_test/streaming/distinct_agg.slt b/e2e_test/streaming/distinct_agg.slt new file mode 100644 index 0000000000000..4eb7cae73bf50 --- /dev/null +++ b/e2e_test/streaming/distinct_agg.slt @@ -0,0 +1,74 @@ +statement ok +SET RW_IMPLICIT_FLUSH TO true; + +statement ok +create table t (v1 int, v2 int, v3 int); + +statement ok +create materialized view mv1 as select count(distinct v1) as c_d_v1 from t; + +statement ok +create materialized view mv2 as select v2, count(distinct v1) as c_d_v1 from t group by v2; + +statement ok +create materialized view mv3 as select v2, count(distinct v1) as c_d_v1, max(v3) as max_v3 from t group by v2; + +statement ok +create materialized view mv4 as select v1, count(distinct v2) as c_d_v2, count(distinct v3) as c_d_v3, max(v2) as max_v2 from t group by v1; + +statement ok +create materialized view mv5 as select v1, count(distinct v2) as c_d_v2, min(distinct v2) as min_d_v2, count(distinct v3) as c_d_v3, max(v3) as max_v3 from t group by v1; + +statement ok +insert into t values (1,2,3), (1,2,4), (5,3,8), (2,4,4); + +query I +select * from mv1; +---- +3 + +query II rowsort +select * from mv2; +---- +2 1 +3 1 +4 1 + +query III rowsort +select * from mv3; +---- +2 1 4 +3 1 8 +4 1 4 + +query IIII rowsort +select * from mv4; +---- +1 1 2 2 +2 1 1 4 +5 1 1 3 + +query IIIII rowsort +select * from mv5; +---- +1 1 2 2 4 +2 1 4 1 4 +5 1 3 1 8 + +statement ok +drop materialized view mv1; + +statement ok +drop materialized view mv2; + +statement ok +drop materialized view mv3; + +statement ok +drop materialized view mv4; + +statement ok +drop materialized view mv5; + +statement ok +drop table t; diff --git a/e2e_test/streaming/dynamic_filter.slt b/e2e_test/streaming/dynamic_filter.slt index 826cdc8e251b4..4f4790bac80d4 100644 --- a/e2e_test/streaming/dynamic_filter.slt +++ b/e2e_test/streaming/dynamic_filter.slt @@ -99,7 +99,7 @@ drop table t1; statement ok drop table t2; -# ~ Simple Agg with timestamp/timestamptz ~ +# Simple Agg with timestamp/timestamptz statement ok create table t1 (v1 timestamp); diff --git a/e2e_test/streaming/temporal_filter.slt b/e2e_test/streaming/temporal_filter.slt new file mode 100644 index 0000000000000..aaf9041586fb3 --- /dev/null +++ b/e2e_test/streaming/temporal_filter.slt @@ -0,0 +1,56 @@ +statement ok +SET RW_IMPLICIT_FLUSH TO true; + +statement ok +create table t1 (v1 timestamp); + + +# This statement should be correct for the next ~1000 years +# We cannot have a variable interval for now, so we use 2000 year's worth of days as the upper bound. +statement ok +create materialized view mv1 as select v1 from t1 where v1 between now() and now() + interval '1 day' * 365 * 2000; + +statement ok +insert into t1 values ('3031-01-01 19:00:00'), ('3031-01-01 20:00:00'), ('3031-01-01 21:00:00'), ('5031-01-01 21:00:00'), ('0001-01-01 21:00:00'); + +# Below lower bound and above upper bound are not shown +query I +select * from mv1 order by v1; +---- +3031-01-01 19:00:00 +3031-01-01 20:00:00 +3031-01-01 21:00:00 + +# Deleting visible and filtered values +statement ok +delete from t1 where v1 = '3031-01-01 19:00:00' or v1 = '5031-01-01 21:00:00'; + +# Updating visible and filtered values +query I rowsort +update t1 set v1 = v1 + interval '1 hour' where v1 = '3031-01-01 20:00:00' or v1 = '0001-01-01 21:00:00' returning v1; +---- +0001-01-01 22:00:00 +3031-01-01 21:00:00 + + +query I +select * from mv1 order by v1; +---- +3031-01-01 21:00:00 +3031-01-01 21:00:00 + +# Interaction with batch `now()`: both values should fall outside of the range +statement ok +insert into t1 values (now() - interval '1 minute'), ((now() + interval '1 day' * 365 * 3000)); + +query I +select * from mv1 order by v1; +---- +3031-01-01 21:00:00 +3031-01-01 21:00:00 + +statement ok +drop materialized view mv1; + +statement ok +drop table t1; \ No newline at end of file diff --git a/e2e_test/streaming/two_phase_agg.slt b/e2e_test/streaming/two_phase_agg.slt new file mode 100644 index 0000000000000..652ae61b34f56 --- /dev/null +++ b/e2e_test/streaming/two_phase_agg.slt @@ -0,0 +1,61 @@ +statement ok +SET RW_IMPLICIT_FLUSH TO true; + +statement ok +SET RW_ENABLE_TWO_PHASE_AGG=false; + +# This should override `RW_ENABLE_TWO_PHASE_AGG`, enabling it. +statement ok +SET RW_FORCE_TWO_PHASE_AGG=true; + +query T +SHOW RW_ENABLE_TWO_PHASE_AGG; +---- +true + +statement ok +create table t(v1 int, v2 smallint, v3 varchar); + +statement ok +insert into t values +(1, 2, 'abc'), +(3, 4, 'aaaaaaaaaaaaabc'), +(1, 2222, 'ajhaajaj'), +(1, -2, 'aj22jj1j1'), +(0, 2, 'aaaaaaaaaanz'); + +statement ok +create materialized view m1 as select min(v3) as c1, sum(v1) as c2 from t group by v1, v3, v2; + +query TI +select * from m1 order by c1 ASC; +---- +aaaaaaaaaaaaabc 3 +aaaaaaaaaanz 0 +abc 1 +aj22jj1j1 1 +ajhaajaj 1 + +statement ok +drop materialized view m1; + +statement ok +create materialized view m1 as +select + min(v3) as c1, + sum(v1) as c2, + min(v1) as c3 +from t group by v1; + +query TII +select * from m1 order by c1 ASC; +---- +aaaaaaaaaaaaabc 3 3 +aaaaaaaaaanz 0 0 +abc 3 1 + +statement ok +drop materialized view m1; + +statement ok +drop table t; \ No newline at end of file diff --git a/grafana/risingwave-dashboard.dashboard.py b/grafana/risingwave-dashboard.dashboard.py index d0914f01d5136..32b976d913acd 100644 --- a/grafana/risingwave-dashboard.dashboard.py +++ b/grafana/risingwave-dashboard.dashboard.py @@ -480,6 +480,7 @@ def metric(name, filter=None): def quantile(f, percentiles): quantile_map = { + "60": ["0.6", "60"], "50": ["0.5", "50"], "90": ["0.9", "90"], "99": ["0.99", "99"], @@ -557,8 +558,8 @@ def section_compaction(outer_panels): "num of compactions from each level to next level", [ panels.target( - f"sum({metric('storage_level_compact_frequency')}) by (compactor, group, result)", - "{{result}} - group-{{group}} @ {{compactor}}", + f"sum({metric('storage_level_compact_frequency')}) by (compactor, group, task_type, result)", + "{{task_type}} - {{result}} - group-{{group}} @ {{compactor}}", ), ], ), @@ -1319,7 +1320,7 @@ def section_streaming_actors(outer_panels): ), panels.target( f"rate({metric('stream_join_insert_cache_miss_count')}[$__rate_interval])", - "total lookups {{actor_id}} {{side}}", + "cache miss when insert{{actor_id}} {{side}}", ), ], ), @@ -1432,27 +1433,7 @@ def section_streaming_exchange(outer_panels): "Streaming Exchange", [ panels.timeseries_bytes_per_sec( - "Exchange Send Throughput", - "", - [ - panels.target( - f"rate({metric('stream_exchange_send_size')}[$__rate_interval])", - "{{up_actor_id}}->{{down_actor_id}}", - ), - ], - ), - panels.timeseries_bytes_per_sec( - "Exchange Recv Throughput", - "", - [ - panels.target( - f"rate({metric('stream_exchange_recv_size')}[$__rate_interval])", - "{{up_actor_id}}->{{down_actor_id}}", - ), - ], - ), - panels.timeseries_bytes_per_sec( - "Fragment Exchange Send Throughput", + "Fragment-level Remote Exchange Send Throughput", "", [ panels.target( @@ -1462,7 +1443,7 @@ def section_streaming_exchange(outer_panels): ], ), panels.timeseries_bytes_per_sec( - "Fragment Exchange Recv Throughput", + "Fragment-level Remote Exchange Recv Throughput", "", [ panels.target( @@ -1480,7 +1461,7 @@ def section_batch_exchange(outer_panels): panels = outer_panels.sub_panel() return [ outer_panels.row_collapsed( - "Batch Exchange", + "Batch Metrics", [ panels.timeseries_row( "Exchange Recv Row Number", @@ -1492,6 +1473,16 @@ def section_batch_exchange(outer_panels): ), ], ), + panels.timeseries_row( + "Batch Mpp Task Number", + "", + [ + panels.target( + f"{metric('batch_task_num')}", + "", + ), + ], + ), ], ), ] @@ -1599,18 +1590,6 @@ def section_hummock(panels): f"sum(rate({metric('state_store_iter_in_process_counts')}[$__rate_interval])) by(job,instance,table_id)", "iter - {{table_id}} @ {{job}} @ {{instance}}", ), - panels.target( - f"sum(rate({metric('state_store_read_req_bloom_filter_positive_counts')}[$__rate_interval])) by (job,instance,table_id,type)", - "read_req bloom filter positive - {{table_id}} - {{type}} @ {{job}} @ {{instance}}", - ), - panels.target( - f"sum(rate({metric('state_store_read_req_positive_but_non_exist_counts')}[$__rate_interval])) by (job,instance,table_id,type)", - "read_req bloom filter true positive - {{table_id}} - {{type}} @ {{job}} @ {{instance}}", - ), - panels.target( - f"sum(rate({metric('state_store_read_req_check_bloom_filter_counts')}[$__rate_interval])) by (job,instance,table_id,type)", - "read_req check bloom filter - {{table_id}} - {{type}} @ {{job}} @ {{instance}}", - ), ], ), panels.timeseries_latency( @@ -1637,13 +1616,13 @@ def section_hummock(panels): *quantile( lambda quantile, legend: panels.target( f"histogram_quantile({quantile}, sum(rate({metric('state_store_iter_duration_bucket')}[$__rate_interval])) by (le, job, instance, table_id))", - f"total_time p{legend} - {{{{table_id}}}} @ {{{{job}}}} @ {{{{instance}}}}", + f"create_iter_time p{legend} - {{{{table_id}}}} @ {{{{job}}}} @ {{{{instance}}}}", ), [90, 99, 999, "max"], ), panels.target( f"sum by(le, job, instance)(rate({metric('state_store_iter_duration_sum')}[$__rate_interval])) / sum by(le, job,instance) (rate({metric('state_store_iter_duration_count')}[$__rate_interval]))", - "total_time avg - {{job}} @ {{instance}}", + "create_iter_time avg - {{job}} @ {{instance}}", ), *quantile( lambda quantile, legend: panels.target( @@ -1717,6 +1696,23 @@ def section_hummock(panels): ), ], ), + panels.timeseries_latency( + "Read Duration - MayExist", + "", + [ + *quantile( + lambda quantile, legend: panels.target( + f"histogram_quantile({quantile}, sum(rate({metric('state_store_may_exist_duration_bucket')}[$__rate_interval])) by (le, job, instance, table_id))", + f"p{legend}" + " - {{table_id}} @ {{job}} @ {{instance}}", + ), + [50, 90, 99, "max"], + ), + panels.target( + f"sum by(le, job, instance, table_id)(rate({metric('state_store_may_exist_duration_sum')}[$__rate_interval])) / sum by(le, job, instance, table_id) (rate({metric('state_store_may_exist_duration_count')}[$__rate_interval]))", + "avg - {{table_id}} {{job}} @ {{instance}}", + ), + ], + ), panels.timeseries_ops( "Read Bloom Filter", "", @@ -1726,8 +1722,16 @@ def section_hummock(panels): "bloom filter true negative - {{table_id}} - {{type}} @ {{job}} @ {{instance}}", ), panels.target( - f"sum(rate({metric('state_bloom_filter_check_counts')}[$__rate_interval])) by (job,instance,table_id,type)", - "bloom filter check count - {{table_id}} - {{type}} @ {{job}} @ {{instance}}", + f"sum(rate({metric('state_store_read_req_positive_but_non_exist_counts')}[$__rate_interval])) by (job,instance,table_id,type)", + "bloom filter false positive count - {{table_id}} - {{type}} @ {{job}} @ {{instance}}", + ), + panels.target( + f"sum(rate({metric('state_store_read_req_bloom_filter_positive_counts')}[$__rate_interval])) by (job,instance,table_id,type)", + "read_req bloom filter positive - {{table_id}} - {{type}} @ {{job}} @ {{instance}}", + ), + panels.target( + f"sum(rate({metric('state_store_read_req_check_bloom_filter_counts')}[$__rate_interval])) by (job,instance,table_id,type)", + "read_req check bloom filter - {{table_id}} - {{type}} @ {{job}} @ {{instance}}", ), ], ), @@ -2105,6 +2109,14 @@ def section_hummock_manager(outer_panels): "table{{table_id}} {{metric}}"), ], ), + panels.timeseries_count( + "Stale SST Total Number", + "total number of SSTs that is no longer referenced by versions but is not yet deleted from storage", + [ + panels.target(f"{metric('storage_stale_ssts_count')}", + "stale SST total number"), + ], + ), ], ) ] @@ -2434,6 +2446,16 @@ def section_memory_manager(outer_panels): ), ], ), + panels.timeseries_memory( + "The memory allocated by streaming", + "", + [ + panels.target( + f"{metric('stream_total_mem_usage')}", + "", + ), + ], + ), ], ), ] diff --git a/grafana/risingwave-dashboard.json b/grafana/risingwave-dashboard.json index c722ea150c697..18a32b84cb724 100644 --- a/grafana/risingwave-dashboard.json +++ b/grafana/risingwave-dashboard.json @@ -1 +1 @@ -{"__inputs":[],"annotations":{"list":[]},"description":"RisingWave Dashboard","editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"panels":[{"cacheTimeout":null,"collapsed":false,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":1,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Cluster Node","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":1},"height":null,"hideTimeOverride":false,"id":2,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["last"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(worker_num) by (worker_type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{worker_type}}","metric":"","query":"sum(worker_num) by (worker_type)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Node Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":1},"height":null,"hideTimeOverride":false,"id":3,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"avg(process_resident_memory_bytes) by (job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{job}} @ {{instance}}","metric":"","query":"avg(process_resident_memory_bytes) by (job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Node Memory","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":9},"height":null,"hideTimeOverride":false,"id":4,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(process_cpu_seconds_total[$__rate_interval])) by (job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{job}} @ {{instance}}","metric":"","query":"sum(rate(process_cpu_seconds_total[$__rate_interval])) by (job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Node CPU","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"collapsed":false,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":17},"height":null,"hideTimeOverride":false,"id":5,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Streaming","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"rows/s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":18},"height":null,"hideTimeOverride":false,"id":6,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_source_output_rows_counts[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"source={{source_name}} {{source_id}} @ {{instance}}","metric":"","query":"rate(stream_source_output_rows_counts[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Source Throughput(rows)","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"rows/s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":18},"height":null,"hideTimeOverride":false,"id":7,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(partition_input_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"actor={{actor_id}} source={{source_id}} partition={{partition}}","metric":"","query":"rate(partition_input_count[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Source Throughput(rows) Per Partition","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"MB/s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":26},"height":null,"hideTimeOverride":false,"id":8,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"(sum by (source_id)(rate(partition_input_bytes[$__rate_interval])))/(1000*1000)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"source={{source_id}}","metric":"","query":"(sum by (source_id)(rate(partition_input_bytes[$__rate_interval])))/(1000*1000)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Source Throughput(bytes)","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"MB/s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":26},"height":null,"hideTimeOverride":false,"id":9,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"(rate(partition_input_bytes[$__rate_interval]))/(1000*1000)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"actor={{actor_id}} source={{source_id}} partition={{partition}}","metric":"","query":"(rate(partition_input_bytes[$__rate_interval]))/(1000*1000)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Source Throughput(bytes) Per Partition","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"rows/s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":34},"height":null,"hideTimeOverride":false,"id":10,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_source_rows_per_barrier_counts[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"actor={{actor_id}} source={{source_id}} @ {{instance}}","metric":"","query":"rate(stream_source_rows_per_barrier_counts[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Source Throughput(rows) per barrier","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":34},"height":null,"hideTimeOverride":false,"id":11,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"all_barrier_nums","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"all_barrier","metric":"","query":"all_barrier_nums","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"in_flight_barrier_nums","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"in_flight_barrier","metric":"","query":"in_flight_barrier_nums","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Barrier Number","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":42},"height":null,"hideTimeOverride":false,"id":12,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_send_latency_p50","metric":"","query":"histogram_quantile(0.5, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_send_latency_p90","metric":"","query":"histogram_quantile(0.9, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_send_latency_p99","metric":"","query":"histogram_quantile(0.99, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_send_latency_p999","metric":"","query":"histogram_quantile(0.999, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_send_latency_pmax","metric":"","query":"histogram_quantile(1.0, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(meta_barrier_send_duration_seconds_sum[$__rate_interval]) / rate(meta_barrier_send_duration_seconds_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_send_latency_avg","metric":"","query":"rate(meta_barrier_send_duration_seconds_sum[$__rate_interval]) / rate(meta_barrier_send_duration_seconds_count[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Barrier Send Latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":42},"height":null,"hideTimeOverride":false,"id":13,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_latency_p50","metric":"","query":"histogram_quantile(0.5, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_latency_p90","metric":"","query":"histogram_quantile(0.9, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_latency_p99","metric":"","query":"histogram_quantile(0.99, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_latency_p999","metric":"","query":"histogram_quantile(0.999, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_latency_pmax","metric":"","query":"histogram_quantile(1.0, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(meta_barrier_duration_seconds_sum[$__rate_interval]) / rate(meta_barrier_duration_seconds_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_latency_avg","metric":"","query":"rate(meta_barrier_duration_seconds_sum[$__rate_interval]) / rate(meta_barrier_duration_seconds_count[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Barrier Latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":50},"height":null,"hideTimeOverride":false,"id":14,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_inflight_latency_p50","metric":"","query":"histogram_quantile(0.5, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_inflight_latency_p90","metric":"","query":"histogram_quantile(0.9, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_inflight_latency_p99","metric":"","query":"histogram_quantile(0.99, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_inflight_latency_p999","metric":"","query":"histogram_quantile(0.999, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_inflight_latency_pmax","metric":"","query":"histogram_quantile(1.0, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"max(sum by(le, instance)(rate(stream_barrier_inflight_duration_seconds_sum[$__rate_interval])) / sum by(le, instance)(rate(stream_barrier_inflight_duration_seconds_count[$__rate_interval])))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_inflight_latency_avg","metric":"","query":"max(sum by(le, instance)(rate(stream_barrier_inflight_duration_seconds_sum[$__rate_interval])) / sum by(le, instance)(rate(stream_barrier_inflight_duration_seconds_count[$__rate_interval])))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Barrier In-Flight Latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":50},"height":null,"hideTimeOverride":false,"id":15,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_sync_latency_p50 - {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_sync_latency_p90 - {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_sync_latency_p99 - {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_sync_latency_p999 - {{instance}}","metric":"","query":"histogram_quantile(0.999, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_sync_latency_pmax - {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, instance)(rate(stream_barrier_sync_storage_duration_seconds_sum[$__rate_interval])) / sum by(le, instance)(rate(stream_barrier_sync_storage_duration_seconds_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_sync_latency_avg - {{instance}}","metric":"","query":"sum by(le, instance)(rate(stream_barrier_sync_storage_duration_seconds_sum[$__rate_interval])) / sum by(le, instance)(rate(stream_barrier_sync_storage_duration_seconds_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Barrier Sync Latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":58},"height":null,"hideTimeOverride":false,"id":16,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_wait_commit_latency_p50","metric":"","query":"histogram_quantile(0.5, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_wait_commit_latency_p90","metric":"","query":"histogram_quantile(0.9, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_wait_commit_latency_p99","metric":"","query":"histogram_quantile(0.99, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_wait_commit_latency_p999","metric":"","query":"histogram_quantile(0.999, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_wait_commit_latency_pmax","metric":"","query":"histogram_quantile(1.0, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(meta_barrier_wait_commit_duration_seconds_sum[$__rate_interval]) / rate(meta_barrier_wait_commit_duration_seconds_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_wait_commit_avg","metric":"","query":"rate(meta_barrier_wait_commit_duration_seconds_sum[$__rate_interval]) / rate(meta_barrier_wait_commit_duration_seconds_count[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Barrier Wait Commit Latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":66},"height":null,"hideTimeOverride":false,"id":17,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"rows/s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":18,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_executor_row_count[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}->{{executor_id}}","metric":"","query":"rate(stream_executor_row_count[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Executor Throughput","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":19,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_output_buffer_blocking_duration_ns[$__rate_interval]) / 1000000000","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_output_buffer_blocking_duration_ns[$__rate_interval]) / 1000000000","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Actor Backpressure","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":8},"height":null,"hideTimeOverride":false,"id":20,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(actor_memory_usage[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(actor_memory_usage[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Actor Memory Usage","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":8},"height":null,"hideTimeOverride":false,"id":21,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_input_buffer_blocking_duration_ns[$__rate_interval]) / 1000000000","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}->{{upstream_fragment_id}}","metric":"","query":"rate(stream_actor_input_buffer_blocking_duration_ns[$__rate_interval]) / 1000000000","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Actor Input Blocking Time Ratio","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":16},"height":null,"hideTimeOverride":false,"id":22,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_barrier_time[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_barrier_time[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Actor Barrier Latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":16},"height":null,"hideTimeOverride":false,"id":23,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_processing_time[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_processing_time[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Actor Processing Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":24},"height":null,"hideTimeOverride":false,"id":24,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_actor_execution_time[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_actor_execution_time[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Actor Execution Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"row"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":24},"height":null,"hideTimeOverride":false,"id":25,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_in_record_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_in_record_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Actor Input Row","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"row"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":32},"height":null,"hideTimeOverride":false,"id":26,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_out_record_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_out_record_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Actor Output Row","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":12,"y":32},"height":null,"hideTimeOverride":false,"id":27,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_fast_poll_duration[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_fast_poll_duration[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Fast Poll Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":40},"height":null,"hideTimeOverride":false,"id":28,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_fast_poll_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_fast_poll_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Fast Poll Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":40},"height":null,"hideTimeOverride":false,"id":29,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_fast_poll_duration[$__rate_interval]) / rate(stream_actor_fast_poll_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_fast_poll_duration[$__rate_interval]) / rate(stream_actor_fast_poll_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Fast Poll Avg Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":16,"y":40},"height":null,"hideTimeOverride":false,"id":30,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_slow_poll_duration[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_slow_poll_duration[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Slow Poll Total Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":48},"height":null,"hideTimeOverride":false,"id":31,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_slow_poll_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_slow_poll_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Slow Poll Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":48},"height":null,"hideTimeOverride":false,"id":32,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_slow_poll_duration[$__rate_interval]) / rate(stream_actor_slow_poll_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_slow_poll_duration[$__rate_interval]) / rate(stream_actor_slow_poll_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Slow Poll Avg Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":16,"y":48},"height":null,"hideTimeOverride":false,"id":33,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_poll_duration[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_poll_duration[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Poll Total Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":56},"height":null,"hideTimeOverride":false,"id":34,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_poll_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_poll_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Poll Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":56},"height":null,"hideTimeOverride":false,"id":35,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_poll_duration[$__rate_interval]) / rate(stream_actor_poll_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_poll_duration[$__rate_interval]) / rate(stream_actor_poll_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Poll Avg Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":16,"y":56},"height":null,"hideTimeOverride":false,"id":36,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_idle_duration[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_idle_duration[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Idle Total Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":64},"height":null,"hideTimeOverride":false,"id":37,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_idle_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_idle_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Idle Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":64},"height":null,"hideTimeOverride":false,"id":38,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_idle_duration[$__rate_interval]) / rate(stream_actor_idle_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_idle_duration[$__rate_interval]) / rate(stream_actor_idle_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Idle Avg Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":16,"y":64},"height":null,"hideTimeOverride":false,"id":39,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_scheduled_duration[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_scheduled_duration[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Scheduled Total Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":72},"height":null,"hideTimeOverride":false,"id":40,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_scheduled_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_scheduled_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Scheduled Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":72},"height":null,"hideTimeOverride":false,"id":41,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_scheduled_duration[$__rate_interval]) / rate(stream_actor_scheduled_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_scheduled_duration[$__rate_interval]) / rate(stream_actor_scheduled_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Scheduled Avg Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":80},"height":null,"hideTimeOverride":false,"id":42,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_join_lookup_miss_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"cache miss {{actor_id}} {{side}}","metric":"","query":"rate(stream_join_lookup_miss_count[$__rate_interval])","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_join_lookup_total_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"total lookups {{actor_id}} {{side}}","metric":"","query":"rate(stream_join_lookup_total_count[$__rate_interval])","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_join_insert_cache_miss_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"total lookups {{actor_id}} {{side}}","metric":"","query":"rate(stream_join_insert_cache_miss_count[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Join Executor Cache","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":80},"height":null,"hideTimeOverride":false,"id":43,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(stream_join_barrier_align_duration_bucket[$__rate_interval])) by (le, actor_id, wait_side, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 {{actor_id}}.{{wait_side}} - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(stream_join_barrier_align_duration_bucket[$__rate_interval])) by (le, actor_id, wait_side, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(stream_join_barrier_align_duration_bucket[$__rate_interval])) by (le, actor_id, wait_side, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 {{actor_id}}.{{wait_side}} - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(stream_join_barrier_align_duration_bucket[$__rate_interval])) by (le, actor_id, wait_side, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(stream_join_barrier_align_duration_bucket[$__rate_interval])) by (le, actor_id, wait_side, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p999 {{actor_id}}.{{wait_side}} - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.999, sum(rate(stream_join_barrier_align_duration_bucket[$__rate_interval])) by (le, actor_id, wait_side, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(stream_join_barrier_align_duration_bucket[$__rate_interval])) by (le, actor_id, wait_side, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax {{actor_id}}.{{wait_side}} - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(stream_join_barrier_align_duration_bucket[$__rate_interval])) by (le, actor_id, wait_side, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, actor_id, wait_side, job, instance)(rate(stream_join_barrier_align_duration_sum[$__rate_interval])) / sum by(le,actor_id,wait_side,job,instance) (rate(stream_join_barrier_align_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"avg {{actor_id}}.{{wait_side}} - {{job}} @ {{instance}}","metric":"","query":"sum by(le, actor_id, wait_side, job, instance)(rate(stream_join_barrier_align_duration_sum[$__rate_interval])) / sum by(le,actor_id,wait_side,job,instance) (rate(stream_join_barrier_align_duration_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Join Executor Barrier Align","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":88},"height":null,"hideTimeOverride":false,"id":44,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_join_actor_input_waiting_duration_ns[$__rate_interval]) / 1000000000","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_join_actor_input_waiting_duration_ns[$__rate_interval]) / 1000000000","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Join Actor Input Blocking Time Ratio","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":88},"height":null,"hideTimeOverride":false,"id":45,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_join_match_duration_ns[$__rate_interval]) / 1000000000","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}.{{side}}","metric":"","query":"rate(stream_join_match_duration_ns[$__rate_interval]) / 1000000000","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Join Actor Match Duration Per Second","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":96},"height":null,"hideTimeOverride":false,"id":46,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"stream_join_cached_entries","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}} {{side}}","metric":"","query":"stream_join_cached_entries","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Join Cached Entries","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":96},"height":null,"hideTimeOverride":false,"id":47,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"stream_join_cached_rows","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}} {{side}}","metric":"","query":"stream_join_cached_rows","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Join Cached Rows","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":104},"height":null,"hideTimeOverride":false,"id":48,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"stream_join_cached_estimated_size","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}} {{side}}","metric":"","query":"stream_join_cached_estimated_size","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Join Cached Estimated Size","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":104},"height":null,"hideTimeOverride":false,"id":49,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_agg_lookup_miss_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"cache miss {{actor_id}}","metric":"","query":"rate(stream_agg_lookup_miss_count[$__rate_interval])","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_agg_lookup_total_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"total lookups {{actor_id}}","metric":"","query":"rate(stream_agg_lookup_total_count[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Aggregation Executor Cache Statistics For Each Key/State","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":112},"height":null,"hideTimeOverride":false,"id":50,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_agg_chunk_lookup_miss_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"chunk-level cache miss {{actor_id}}","metric":"","query":"rate(stream_agg_chunk_lookup_miss_count[$__rate_interval])","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_agg_chunk_lookup_total_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"chunk-level total lookups {{actor_id}}","metric":"","query":"rate(stream_agg_chunk_lookup_total_count[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Aggregation Executor Cache Statistics For Each StreamChunk","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":112},"height":null,"hideTimeOverride":false,"id":51,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"stream_agg_cached_keys","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"stream_agg_cached_keys","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Aggregation Cached Keys","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Streaming Actors","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":67},"height":null,"hideTimeOverride":false,"id":52,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":53,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_exchange_send_size[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{up_actor_id}}->{{down_actor_id}}","metric":"","query":"rate(stream_exchange_send_size[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Exchange Send Throughput","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":54,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_exchange_recv_size[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{up_actor_id}}->{{down_actor_id}}","metric":"","query":"rate(stream_exchange_recv_size[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Exchange Recv Throughput","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":8},"height":null,"hideTimeOverride":false,"id":55,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_exchange_frag_send_size[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{up_fragment_id}}->{{down_fragment_id}}","metric":"","query":"rate(stream_exchange_frag_send_size[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Fragment Exchange Send Throughput","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":8},"height":null,"hideTimeOverride":false,"id":56,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_exchange_frag_recv_size[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{up_fragment_id}}->{{down_fragment_id}}","metric":"","query":"rate(stream_exchange_frag_recv_size[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Fragment Exchange Recv Throughput","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Streaming Exchange","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":68},"height":null,"hideTimeOverride":false,"id":57,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"row"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":58,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"batch_task_exchange_recv_row_number","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{query_id}} : {{source_stage_id}}.{{source_task_id}} -> {{target_stage_id}}.{{target_task_id}}","metric":"","query":"batch_task_exchange_recv_row_number","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Exchange Recv Row Number","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Batch Exchange","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":false,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":69},"height":null,"hideTimeOverride":false,"id":59,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Hummock","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":70},"height":null,"hideTimeOverride":false,"id":60,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(state_store_sync_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p50 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(state_store_sync_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_sync_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_sync_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_sync_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_sync_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_sync_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_sync_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance) (rate(state_store_sync_duration_sum[$__rate_interval])) / sum by(le, job, instance) (rate(state_store_sync_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"avg - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance) (rate(state_store_sync_duration_sum[$__rate_interval])) / sum by(le, job, instance) (rate(state_store_sync_duration_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Build and Sync Sstable Duration","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":70},"height":null,"hideTimeOverride":false,"id":61,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_sst_store_block_request_counts[$__rate_interval])) by (job, instance, table_id, type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{table_id}} @ {{type}} - {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_sst_store_block_request_counts[$__rate_interval])) by (job, instance, table_id, type)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(file_cache_latency_count[$__rate_interval])) by (op, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"file cache {{op}} @ {{instance}}","metric":"","query":"sum(rate(file_cache_latency_count[$__rate_interval])) by (op, instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(file_cache_miss[$__rate_interval])) by (instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"file cache miss @ {{instance}}","metric":"","query":"sum(rate(file_cache_miss[$__rate_interval])) by (instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Cache Ops","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":78},"height":null,"hideTimeOverride":false,"id":62,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_get_duration_count[$__rate_interval])) by (job,instanc,table_id)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"get - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_get_duration_count[$__rate_interval])) by (job,instanc,table_id)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_range_reverse_scan_duration_count[$__rate_interval])) by (job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"backward scan - {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_range_reverse_scan_duration_count[$__rate_interval])) by (job,instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_get_shared_buffer_hit_counts[$__rate_interval])) by (job,instance,table_id)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"shared_buffer hit - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_get_shared_buffer_hit_counts[$__rate_interval])) by (job,instance,table_id)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_iter_in_process_counts[$__rate_interval])) by(job,instance,table_id)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"iter - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_iter_in_process_counts[$__rate_interval])) by(job,instance,table_id)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_read_req_bloom_filter_positive_counts[$__rate_interval])) by (job,instance,table_id,type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"read_req bloom filter positive - {{table_id}} - {{type}} @ {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_read_req_bloom_filter_positive_counts[$__rate_interval])) by (job,instance,table_id,type)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_read_req_positive_but_non_exist_counts[$__rate_interval])) by (job,instance,table_id,type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"read_req bloom filter true positive - {{table_id}} - {{type}} @ {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_read_req_positive_but_non_exist_counts[$__rate_interval])) by (job,instance,table_id,type)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_read_req_check_bloom_filter_counts[$__rate_interval])) by (job,instance,table_id,type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"read_req check bloom filter - {{table_id}} - {{type}} @ {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_read_req_check_bloom_filter_counts[$__rate_interval])) by (job,instance,table_id,type)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Ops","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":78},"height":null,"hideTimeOverride":false,"id":63,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(state_store_get_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p50 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(state_store_get_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_get_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_get_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_get_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_get_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_get_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_get_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance, table_id)(rate(state_store_get_duration_sum[$__rate_interval])) / sum by(le, job, instance, table_id) (rate(state_store_get_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"avg - {{table_id}} {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance, table_id)(rate(state_store_get_duration_sum[$__rate_interval])) / sum by(le, job, instance, table_id) (rate(state_store_get_duration_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Duration - Get","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":86},"height":null,"hideTimeOverride":false,"id":64,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_iter_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"total_time p90 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_iter_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_iter_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"total_time p99 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_iter_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(state_store_iter_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"total_time p999 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.999, sum(rate(state_store_iter_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_iter_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"total_time pmax - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_iter_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance)(rate(state_store_iter_duration_sum[$__rate_interval])) / sum by(le, job,instance) (rate(state_store_iter_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"total_time avg - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance)(rate(state_store_iter_duration_sum[$__rate_interval])) / sum by(le, job,instance) (rate(state_store_iter_duration_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_iter_scan_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pure_scan_time p90 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_iter_scan_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_iter_scan_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pure_scan_time p99 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_iter_scan_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(state_store_iter_scan_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pure_scan_time p999 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.999, sum(rate(state_store_iter_scan_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_iter_scan_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pure_scan_time pmax - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_iter_scan_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance)(rate(state_store_scan_iter_duration_sum[$__rate_interval])) / sum by(le, job,instance) (rate(state_store_iter_scan_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pure_scan_time avg - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance)(rate(state_store_scan_iter_duration_sum[$__rate_interval])) / sum by(le, job,instance) (rate(state_store_iter_scan_duration_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Duration - Iter","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":86},"height":null,"hideTimeOverride":false,"id":65,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_get_key_size_bucket[$__rate_interval])) by (le, job, instance, table_id)) + histogram_quantile(0.9, sum(rate(state_store_get_value_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - {{table_id}} {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_get_key_size_bucket[$__rate_interval])) by (le, job, instance, table_id)) + histogram_quantile(0.9, sum(rate(state_store_get_value_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_get_key_size_bucket[$__rate_interval])) by (le, job, instance, table_id)) + histogram_quantile(0.99, sum(rate(state_store_get_value_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - {{table_id}} {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_get_key_size_bucket[$__rate_interval])) by (le, job, instance, table_id)) + histogram_quantile(0.99, sum(rate(state_store_get_value_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(state_store_get_key_size_bucket[$__rate_interval])) by (le, job, instance, table_id)) + histogram_quantile(0.999, sum(rate(state_store_get_value_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p999 - {{table_id}} {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.999, sum(rate(state_store_get_key_size_bucket[$__rate_interval])) by (le, job, instance, table_id)) + histogram_quantile(0.999, sum(rate(state_store_get_value_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_get_key_size_bucket[$__rate_interval])) by (le, job, instance, table_id)) + histogram_quantile(1.0, sum(rate(state_store_get_value_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - {{table_id}} {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_get_key_size_bucket[$__rate_interval])) by (le, job, instance, table_id)) + histogram_quantile(1.0, sum(rate(state_store_get_value_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Item Size - Get","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":94},"height":null,"hideTimeOverride":false,"id":66,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_iter_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_iter_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_iter_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_iter_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(state_store_iter_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p999 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.999, sum(rate(state_store_iter_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_iter_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_iter_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Item Size - Iter","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":94},"height":null,"hideTimeOverride":false,"id":67,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_iter_item_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_iter_item_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_iter_item_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_iter_item_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(state_store_iter_item_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p999 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.999, sum(rate(state_store_iter_item_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_iter_item_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_iter_item_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Item Count - Iter","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":102},"height":null,"hideTimeOverride":false,"id":68,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_get_key_size_sum[$__rate_interval])) by(job, instance) + sum(rate(state_store_get_value_size_sum[$__rate_interval])) by(job, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_get_key_size_sum[$__rate_interval])) by(job, instance) + sum(rate(state_store_get_value_size_sum[$__rate_interval])) by(job, instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Throughput - Get","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":102},"height":null,"hideTimeOverride":false,"id":69,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_iter_size_sum[$__rate_interval])) by(job, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_iter_size_sum[$__rate_interval])) by(job, instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Throughput - Iter","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":110},"height":null,"hideTimeOverride":false,"id":70,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_bloom_filter_true_negative_counts[$__rate_interval])) by (job,instance,table_id,type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"bloom filter true negative - {{table_id}} - {{type}} @ {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_bloom_filter_true_negative_counts[$__rate_interval])) by (job,instance,table_id,type)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_bloom_filter_check_counts[$__rate_interval])) by (job,instance,table_id,type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"bloom filter check count - {{table_id}} - {{type}} @ {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_bloom_filter_check_counts[$__rate_interval])) by (job,instance,table_id,type)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Bloom Filter","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":110},"height":null,"hideTimeOverride":false,"id":71,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_iter_scan_key_counts[$__rate_interval])) by (instance, type, table_id)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"iter keys flow - {{table_id}} @ {{type}} @ {{instance}} ","metric":"","query":"sum(rate(state_store_iter_scan_key_counts[$__rate_interval])) by (instance, type, table_id)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Iter keys flow","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":118},"height":null,"hideTimeOverride":false,"id":72,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"1 - (sum(rate(state_store_bloom_filter_true_negative_counts[$__rate_interval])) by (job,instance,table_id,type)) / (sum(rate(state_bloom_filter_check_counts[$__rate_interval])) by (job,instance,table_id,type))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"bloom filter miss rate - {{table_id}} - {{type}} @ {{job}} @ {{instance}}","metric":"","query":"1 - (sum(rate(state_store_bloom_filter_true_negative_counts[$__rate_interval])) by (job,instance,table_id,type)) / (sum(rate(state_bloom_filter_check_counts[$__rate_interval])) by (job,instance,table_id,type))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"(sum(rate(state_store_sst_store_block_request_counts{type='meta_miss'}[$__rate_interval])) by (job,instance,table_id)) / (sum(rate(state_store_sst_store_block_request_counts{type='meta_total'}[$__rate_interval])) by (job,instance,table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"meta cache miss rate - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"(sum(rate(state_store_sst_store_block_request_counts{type='meta_miss'}[$__rate_interval])) by (job,instance,table_id)) / (sum(rate(state_store_sst_store_block_request_counts{type='meta_total'}[$__rate_interval])) by (job,instance,table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"(sum(rate(state_store_sst_store_block_request_counts{type='data_miss'}[$__rate_interval])) by (job,instance,table_id)) / (sum(rate(state_store_sst_store_block_request_counts{type='data_total'}[$__rate_interval])) by (job,instance,table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"block cache miss rate - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"(sum(rate(state_store_sst_store_block_request_counts{type='data_miss'}[$__rate_interval])) by (job,instance,table_id)) / (sum(rate(state_store_sst_store_block_request_counts{type='data_total'}[$__rate_interval])) by (job,instance,table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"(sum(rate(file_cache_miss[$__rate_interval])) by (instance)) / (sum(rate(file_cache_latency_count{op='get'}[$__rate_interval])) by (instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"file cache miss rate @ {{instance}}","metric":"","query":"(sum(rate(file_cache_miss[$__rate_interval])) by (instance)) / (sum(rate(file_cache_latency_count{op='get'}[$__rate_interval])) by (instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"1 - (((sum(rate(state_store_read_req_bloom_filter_positive_counts[$__rate_interval])) by (job,instance,table_id,type))) / (sum(rate(state_store_read_req_check_bloom_filter_counts[$__rate_interval])) by (job,instance,table_id,type)))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"read req bloom filter filter rate - {{table_id}} - {{type}} @ {{job}} @ {{instance}}","metric":"","query":"1 - (((sum(rate(state_store_read_req_bloom_filter_positive_counts[$__rate_interval])) by (job,instance,table_id,type))) / (sum(rate(state_store_read_req_check_bloom_filter_counts[$__rate_interval])) by (job,instance,table_id,type)))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"1 - (((sum(rate(state_store_read_req_positive_but_non_exist_counts[$__rate_interval])) by (job,instance,table_id,type))) / (sum(rate(state_store_read_req_bloom_filter_positive_counts[$__rate_interval])) by (job,instance,table_id,type)))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"read req bloom filter false positive rate - {{table_id}} - {{type}} @ {{job}} @ {{instance}}","metric":"","query":"1 - (((sum(rate(state_store_read_req_positive_but_non_exist_counts[$__rate_interval])) by (job,instance,table_id,type))) / (sum(rate(state_store_read_req_bloom_filter_positive_counts[$__rate_interval])) by (job,instance,table_id,type)))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":" Filter/Cache Miss Rate","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":118},"height":null,"hideTimeOverride":false,"id":73,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_iter_merge_sstable_counts_bucket[$__rate_interval])) by (le, job, table_id, type))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"# merged ssts p90 - {{table_id}} @ {{job}} @ {{type}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_iter_merge_sstable_counts_bucket[$__rate_interval])) by (le, job, table_id, type))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_iter_merge_sstable_counts_bucket[$__rate_interval])) by (le, job, table_id, type))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"# merged ssts p99 - {{table_id}} @ {{job}} @ {{type}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_iter_merge_sstable_counts_bucket[$__rate_interval])) by (le, job, table_id, type))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_iter_merge_sstable_counts_bucket[$__rate_interval])) by (le, job, table_id, type))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"# merged ssts pmax - {{table_id}} @ {{job}} @ {{type}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_iter_merge_sstable_counts_bucket[$__rate_interval])) by (le, job, table_id, type))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance, table_id)(rate(state_store_iter_merge_sstable_counts_sum[$__rate_interval])) / sum by(le, job, instance, table_id)(rate(state_store_iter_merge_sstable_counts_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"# merged ssts avg - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance, table_id)(rate(state_store_iter_merge_sstable_counts_sum[$__rate_interval])) / sum by(le, job, instance, table_id)(rate(state_store_iter_merge_sstable_counts_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Merged SSTs","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":126},"height":null,"hideTimeOverride":false,"id":74,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_write_batch_duration_count[$__rate_interval])) by (job,instance,table_id)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write batch - {{table_id}} @ {{job}} @ {{instance}} ","metric":"","query":"sum(rate(state_store_write_batch_duration_count[$__rate_interval])) by (job,instance,table_id)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_sync_duration_count[$__rate_interval])) by (job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"l0 - {{job}} @ {{instance}} ","metric":"","query":"sum(rate(state_store_sync_duration_count[$__rate_interval])) by (job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Write Ops","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":126},"height":null,"hideTimeOverride":false,"id":75,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(state_store_write_batch_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to shared_buffer p50 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(state_store_write_batch_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_write_batch_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to shared_buffer p90 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_write_batch_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_write_batch_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to shared_buffer p99 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_write_batch_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_write_batch_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to shared_buffer pmax - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_write_batch_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance, table_id)(rate(state_store_write_batch_duration_sum[$__rate_interval])) / sum by(le, job, instance, table_id)(rate(state_store_write_batch_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to shared_buffer avg - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance, table_id)(rate(state_store_write_batch_duration_sum[$__rate_interval])) / sum by(le, job, instance, table_id)(rate(state_store_write_batch_duration_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(state_store_write_shared_buffer_sync_time_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to object_store p50 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(state_store_write_shared_buffer_sync_time_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_write_shared_buffer_sync_time_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to object_store p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_write_shared_buffer_sync_time_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_write_shared_buffer_sync_time_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to object_store p99 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_write_shared_buffer_sync_time_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_write_shared_buffer_sync_time_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to object_store pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_write_shared_buffer_sync_time_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance)(rate(state_store_write_shared_buffer_sync_time_sum[$__rate_interval])) / sum by(le, job, instance)(rate(state_store_write_shared_buffer_sync_time_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to object_store - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance)(rate(state_store_write_shared_buffer_sync_time_sum[$__rate_interval])) / sum by(le, job, instance)(rate(state_store_write_shared_buffer_sync_time_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Write Duration","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":134},"height":null,"hideTimeOverride":false,"id":76,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_write_batch_tuple_counts[$__rate_interval])) by (job,instance,table_id)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write_batch_kv_pair_count - {{table_id}} @ {{instance}} ","metric":"","query":"sum(rate(state_store_write_batch_tuple_counts[$__rate_interval])) by (job,instance,table_id)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Write Item Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":134},"height":null,"hideTimeOverride":false,"id":77,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_write_batch_size_sum[$__rate_interval]))by(job,instance) / sum(rate(state_store_write_batch_size_count[$__rate_interval]))by(job,instance,table_id)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"shared_buffer - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_write_batch_size_sum[$__rate_interval]))by(job,instance) / sum(rate(state_store_write_batch_size_count[$__rate_interval]))by(job,instance,table_id)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(compactor_shared_buffer_to_sstable_size[$__rate_interval]))by(job,instance) / sum(rate(state_store_shared_buffer_to_sstable_size_count[$__rate_interval]))by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"sync - {{job}} @ {{instance}}","metric":"","query":"sum(rate(compactor_shared_buffer_to_sstable_size[$__rate_interval]))by(job,instance) / sum(rate(state_store_shared_buffer_to_sstable_size_count[$__rate_interval]))by(job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Write Throughput","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":142},"height":null,"hideTimeOverride":false,"id":78,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(state_store_sync_size_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p50 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(state_store_sync_size_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_sync_size_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_sync_size_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_sync_size_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_sync_size_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_sync_size_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_sync_size_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance) (rate(state_store_sync_size_sum[$__rate_interval])) / sum by(le, job, instance) (rate(state_store_sync_size_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"avg - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance) (rate(state_store_sync_size_sum[$__rate_interval])) / sum by(le, job, instance) (rate(state_store_sync_size_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Checkpoint Sync Size","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":142},"height":null,"hideTimeOverride":false,"id":79,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"avg(state_store_meta_cache_size) by (job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"meta cache - {{job}} @ {{instance}}","metric":"","query":"avg(state_store_meta_cache_size) by (job,instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"avg(state_store_block_cache_size) by (job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"data cache - {{job}} @ {{instance}}","metric":"","query":"avg(state_store_block_cache_size) by (job,instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(state_store_limit_memory_size) by (job)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"uploading memory - {{job}}","metric":"","query":"sum(state_store_limit_memory_size) by (job)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Cache Size","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":150},"height":null,"hideTimeOverride":false,"id":80,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(batch_row_seq_scan_next_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"row_seq_scan next p50 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(batch_row_seq_scan_next_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(batch_row_seq_scan_next_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"row_seq_scan next p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(batch_row_seq_scan_next_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(batch_row_seq_scan_next_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"row_seq_scan next p99 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(batch_row_seq_scan_next_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(batch_row_seq_scan_next_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"row_seq_scan next pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(batch_row_seq_scan_next_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance) (rate(batch_row_seq_scan_next_duration_sum[$__rate_interval])) / sum by(le, job, instance) (rate(batch_row_seq_scan_next_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"row_seq_scan next avg - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance) (rate(batch_row_seq_scan_next_duration_sum[$__rate_interval])) / sum by(le, job, instance) (rate(batch_row_seq_scan_next_duration_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Row SeqScan Next Duration","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":150},"height":null,"hideTimeOverride":false,"id":81,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(state_store_iter_fetch_meta_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"fetch_meta_duration p50 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(state_store_iter_fetch_meta_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_iter_fetch_meta_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"fetch_meta_duration p90 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_iter_fetch_meta_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_iter_fetch_meta_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"fetch_meta_duration p99 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_iter_fetch_meta_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_iter_fetch_meta_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"fetch_meta_duration pmax - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_iter_fetch_meta_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance, table_id) (rate(state_store_iter_fetch_meta_duration_sum[$__rate_interval])) / sum by(le, job, instance, table_id) (rate(state_store_iter_fetch_meta_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"fetch_meta_duration avg - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance, table_id) (rate(state_store_iter_fetch_meta_duration_sum[$__rate_interval])) / sum by(le, job, instance, table_id) (rate(state_store_iter_fetch_meta_duration_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Fetch Meta Duration","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":158},"height":null,"hideTimeOverride":false,"id":82,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"num of SSTs in each level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":83,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(storage_level_sst_num) by (instance, level_index)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}}","metric":"","query":"sum(storage_level_sst_num) by (instance, level_index)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"SST Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"KBs total file bytes in each level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"kbytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":84,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(storage_level_total_file_size) by (instance, level_index)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}}","metric":"","query":"sum(storage_level_total_file_size) by (instance, level_index)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"KBs level sst","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"num of compactions from each level to next level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":8},"height":null,"hideTimeOverride":false,"id":85,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(storage_level_compact_frequency) by (compactor, group, result)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{result}} - group-{{group}} @ {{compactor}}","metric":"","query":"sum(storage_level_compact_frequency) by (compactor, group, result)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compaction Success & Failure Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"num of compaction task which does not trigger","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":8},"height":null,"hideTimeOverride":false,"id":86,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_skip_compact_frequency[$__rate_interval])) by (level, type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{level}}-{{type}}","metric":"","query":"sum(rate(storage_skip_compact_frequency[$__rate_interval])) by (level, type)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compaction Skip Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"num of compactions from each level to next level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":16},"height":null,"hideTimeOverride":false,"id":87,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"avg(storage_compact_task_pending_num) by(job, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"compactor_task_split_count - {{job}} @ {{instance}}","metric":"","query":"avg(storage_compact_task_pending_num) by(job, instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compactor Running Task Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"Total time of compact that have been issued to state store","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":16},"height":null,"hideTimeOverride":false,"id":88,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(compactor_compact_task_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"compact-task p50 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(compactor_compact_task_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(compactor_compact_task_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"compact-task p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(compactor_compact_task_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(compactor_compact_task_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"compact-task pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(compactor_compact_task_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(compactor_compact_sst_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"compact-key-range p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(compactor_compact_sst_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(compactor_compact_sst_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"compact-key-range pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(compactor_compact_sst_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(compactor_get_table_id_total_time_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"get-table-id p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(compactor_get_table_id_total_time_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(compactor_get_table_id_total_time_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"get-table-id pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(compactor_get_table_id_total_time_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(compactor_remote_read_time_per_task_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"remote-io p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(compactor_remote_read_time_per_task_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(compactor_remote_read_time_per_task_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"remote-io pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(compactor_remote_read_time_per_task_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le)(rate(compactor_compact_task_duration_sum[$__rate_interval])) / sum by(le)(rate(compactor_compact_task_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"compact-task avg","metric":"","query":"sum by(le)(rate(compactor_compact_task_duration_sum[$__rate_interval])) / sum by(le)(rate(compactor_compact_task_duration_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le)(rate(state_store_compact_sst_duration_sum[$__rate_interval])) / sum by(le)(rate(state_store_compact_sst_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"compact-key-range avg","metric":"","query":"sum by(le)(rate(state_store_compact_sst_duration_sum[$__rate_interval])) / sum by(le)(rate(state_store_compact_sst_duration_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compaction Duration","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"KBs read from next level during history compactions to next level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":24},"height":null,"hideTimeOverride":false,"id":89,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_level_compact_read_next[$__rate_interval])) by(job,instance) + sum(rate(storage_level_compact_read_curr[$__rate_interval])) by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"read - {{job}} @ {{instance}}","metric":"","query":"sum(rate(storage_level_compact_read_next[$__rate_interval])) by(job,instance) + sum(rate(storage_level_compact_read_curr[$__rate_interval])) by(job,instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_level_compact_write[$__rate_interval])) by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write - {{job}} @ {{instance}}","metric":"","query":"sum(rate(storage_level_compact_write[$__rate_interval])) by(job,instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(compactor_write_build_l0_bytes[$__rate_interval]))by (job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"flush - {{job}} @ {{instance}}","metric":"","query":"sum(rate(compactor_write_build_l0_bytes[$__rate_interval]))by (job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compaction Throughput","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"num of SSTs written into next level during history compactions to next level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":24},"height":null,"hideTimeOverride":false,"id":90,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(storage_level_compact_write) by (job)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write - {{job}}","metric":"","query":"sum(storage_level_compact_write) by (job)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(compactor_write_build_l0_bytes) by (job)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"flush - {{job}}","metric":"","query":"sum(compactor_write_build_l0_bytes) by (job)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compaction Write Bytes","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"num of SSTs written into next level during history compactions to next level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":32},"height":null,"hideTimeOverride":false,"id":91,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(storage_level_compact_write) / sum(state_store_write_build_l0_bytes)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write amplification","metric":"","query":"sum(storage_level_compact_write) / sum(state_store_write_build_l0_bytes)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compaction Write Amplification","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"num of SSTs to be merged to next level in each level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":32},"height":null,"hideTimeOverride":false,"id":92,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_level_compact_cnt","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}}","metric":"","query":"storage_level_compact_cnt","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compacting SST Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":40},"height":null,"hideTimeOverride":false,"id":93,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_level_compact_read_next[$__rate_interval])) by (le, level_index)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}} read","metric":"","query":"sum(rate(storage_level_compact_read_next[$__rate_interval])) by (le, level_index)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"KBs Read from Next Level","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":40},"height":null,"hideTimeOverride":false,"id":94,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_level_compact_read_curr[$__rate_interval])) by (le, level_index)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}} read","metric":"","query":"sum(rate(storage_level_compact_read_curr[$__rate_interval])) by (le, level_index)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"KBs Read from Current Level","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":48},"height":null,"hideTimeOverride":false,"id":95,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_level_compact_read_sstn_curr[$__rate_interval])) by (le, level_index)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}} read","metric":"","query":"sum(rate(storage_level_compact_read_sstn_curr[$__rate_interval])) by (le, level_index)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Count of SSTs Read from Current Level","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":48},"height":null,"hideTimeOverride":false,"id":96,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_level_compact_write[$__rate_interval])) by (le, level_index)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}} write","metric":"","query":"sum(rate(storage_level_compact_write[$__rate_interval])) by (le, level_index)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"KBs Written to Next Level","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":56},"height":null,"hideTimeOverride":false,"id":97,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_level_compact_write_sstn[$__rate_interval])) by (le, level_index)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}} write","metric":"","query":"sum(rate(storage_level_compact_write_sstn[$__rate_interval])) by (le, level_index)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Count of SSTs Written to Next Level","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"num of SSTs read from next level during history compactions to next level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":56},"height":null,"hideTimeOverride":false,"id":98,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_level_compact_read_sstn_next[$__rate_interval])) by (le, level_index)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}} read","metric":"","query":"sum(rate(storage_level_compact_read_sstn_next[$__rate_interval])) by (le, level_index)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Count of SSTs Read from Next Level","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"Total bytes gotten from sstable_bloom_filter, for observing bloom_filter size","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":64},"height":null,"hideTimeOverride":false,"id":99,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance)(rate(compactor_sstable_bloom_filter_size_sum[$__rate_interval])) / sum by(le, job, instance)(rate(compactor_sstable_bloom_filter_size_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"avg_meta - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance)(rate(compactor_sstable_bloom_filter_size_sum[$__rate_interval])) / sum by(le, job, instance)(rate(compactor_sstable_bloom_filter_size_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance)(rate(compactor_sstable_file_size_sum[$__rate_interval])) / sum by(le, job, instance)(rate(compactor_sstable_file_size_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"avg_file - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance)(rate(compactor_sstable_file_size_sum[$__rate_interval])) / sum by(le, job, instance)(rate(compactor_sstable_file_size_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Hummock Sstable Size","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"Total bytes gotten from sstable_avg_key_size, for observing sstable_avg_key_size","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":64},"height":null,"hideTimeOverride":false,"id":100,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance)(rate(compactor_sstable_avg_key_size_sum[$__rate_interval])) / sum by(le, job, instance)(rate(state_store_sstable_avg_key_size_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"avg_key_size - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance)(rate(compactor_sstable_avg_key_size_sum[$__rate_interval])) / sum by(le, job, instance)(rate(state_store_sstable_avg_key_size_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance)(rate(compactor_sstable_avg_value_size_sum[$__rate_interval])) / sum by(le, job, instance)(rate(compactor_sstable_avg_value_size_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"avg_value_size - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance)(rate(compactor_sstable_avg_value_size_sum[$__rate_interval])) / sum by(le, job, instance)(rate(compactor_sstable_avg_value_size_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Hummock Sstable Item Size","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"Total time of operations which read from remote storage when enable prefetch","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":72},"height":null,"hideTimeOverride":false,"id":101,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_remote_read_time_per_task_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"remote-io p90 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_remote_read_time_per_task_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_remote_read_time_per_task_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"remote-io pmax - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_remote_read_time_per_task_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Hummock Remote Read Duration","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":72},"height":null,"hideTimeOverride":false,"id":102,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(compactor_iter_scan_key_counts[$__rate_interval])) by (instance, type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"iter keys flow - {{type}} @ {{instance}} ","metric":"","query":"sum(rate(compactor_iter_scan_key_counts[$__rate_interval])) by (instance, type)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compactor Iter keys","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Compaction","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":159},"height":null,"hideTimeOverride":false,"id":103,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":104,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(object_store_read_bytes[$__rate_interval]))by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"read - {{job}} @ {{instance}}","metric":"","query":"sum(rate(object_store_read_bytes[$__rate_interval]))by(job,instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(object_store_write_bytes[$__rate_interval]))by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write - {{job}} @ {{instance}}","metric":"","query":"sum(rate(object_store_write_bytes[$__rate_interval]))by(job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Operation Throughput","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":105,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(object_store_operation_latency_bucket[$__rate_interval])) by (le, type, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} p50 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(object_store_operation_latency_bucket[$__rate_interval])) by (le, type, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(object_store_operation_latency_bucket[$__rate_interval])) by (le, type, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(object_store_operation_latency_bucket[$__rate_interval])) by (le, type, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(object_store_operation_latency_bucket[$__rate_interval])) by (le, type, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} p99 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(object_store_operation_latency_bucket[$__rate_interval])) by (le, type, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(object_store_operation_latency_bucket[$__rate_interval])) by (le, type, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(object_store_operation_latency_bucket[$__rate_interval])) by (le, type, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, type)(rate(object_store_operation_latency_sum[$__rate_interval])) / sum by(le, type) (rate(object_store_operation_latency_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} avg","metric":"","query":"sum by(le, type)(rate(object_store_operation_latency_sum[$__rate_interval])) / sum by(le, type) (rate(object_store_operation_latency_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Operation Duration","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":8},"height":null,"hideTimeOverride":false,"id":106,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(object_store_operation_latency_count[$__rate_interval])) by (le, type, job, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} - {{job}} @ {{instance}}","metric":"","query":"sum(rate(object_store_operation_latency_count[$__rate_interval])) by (le, type, job, instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(object_store_operation_latency_count{type=~'upload|delete'}[$__rate_interval])) by (le, media_type, job, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{media_type}}-write - {{job}} @ {{instance}}","metric":"","query":"sum(rate(object_store_operation_latency_count{type=~'upload|delete'}[$__rate_interval])) by (le, media_type, job, instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(object_store_operation_latency_count{type=~'read|readv|list|metadata'}[$__rate_interval])) by (le, media_type, job, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{media_type}}-read - {{job}} @ {{instance}}","metric":"","query":"sum(rate(object_store_operation_latency_count{type=~'read|readv|list|metadata'}[$__rate_interval])) by (le, media_type, job, instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Operation Rate","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":8},"height":null,"hideTimeOverride":false,"id":107,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(object_store_operation_bytes_bucket[$__rate_interval])) by (le, type, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} p50 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(object_store_operation_bytes_bucket[$__rate_interval])) by (le, type, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(object_store_operation_bytes_bucket[$__rate_interval])) by (le, type, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(object_store_operation_bytes_bucket[$__rate_interval])) by (le, type, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(object_store_operation_bytes_bucket[$__rate_interval])) by (le, type, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} p99 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(object_store_operation_bytes_bucket[$__rate_interval])) by (le, type, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(object_store_operation_bytes_bucket[$__rate_interval])) by (le, type, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(object_store_operation_bytes_bucket[$__rate_interval])) by (le, type, job, instance))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Operation Size","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":16},"height":null,"hideTimeOverride":false,"id":108,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(object_store_failure_count[$__rate_interval])) by (instance, job, type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} - {{job}} @ {{instance}}","metric":"","query":"sum(rate(object_store_failure_count[$__rate_interval])) by (instance, job, type)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Operation Failure Rate","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"$"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":16},"height":null,"hideTimeOverride":false,"id":109,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(object_store_read_bytes) * 0.01 / 1000 / 1000 / 1000","format":"time_series","hide":true,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"(Cross Region) Data Transfer Cost","metric":"","query":"sum(object_store_read_bytes) * 0.01 / 1000 / 1000 / 1000","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(object_store_operation_latency_count{type=~'read|streaming_read_start|delete'}) * 0.0004 / 1000","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GET, SELECT, and all other Requests Cost","metric":"","query":"sum(object_store_operation_latency_count{type=~'read|streaming_read_start|delete'}) * 0.0004 / 1000","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(object_store_operation_latency_count{type=~'upload|streaming_upload_start|s3_upload_part|streaming_upload_finish|delete_objects|list'}) * 0.005 / 1000","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"PUT, COPY, POST, LIST Requests Cost","metric":"","query":"sum(object_store_operation_latency_count{type=~'upload|streaming_upload_start|s3_upload_part|streaming_upload_finish|delete_objects|list'}) * 0.005 / 1000","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Estimated S3 Cost (Realtime)","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"$"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":24},"height":null,"hideTimeOverride":false,"id":110,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(storage_level_total_file_size) by (instance) * 0.023 / 1000 / 1000","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Monthly Storage Cost","metric":"","query":"sum(storage_level_total_file_size) by (instance) * 0.023 / 1000 / 1000","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Estimated S3 Cost (Monthly)","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Object Storage","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":160},"height":null,"hideTimeOverride":false,"id":111,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":112,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(file_cache_latency_count[$__rate_interval])) by (op, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"file cache {{op}} @ {{instance}}","metric":"","query":"sum(rate(file_cache_latency_count[$__rate_interval])) by (op, instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(file_cache_miss[$__rate_interval])) by (instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"file cache miss @ {{instance}}","metric":"","query":"sum(rate(file_cache_miss[$__rate_interval])) by (instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(file_cache_disk_latency_count[$__rate_interval])) by (op, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"file cache disk {{op}} @ {{instance}}","metric":"","query":"sum(rate(file_cache_disk_latency_count[$__rate_interval])) by (op, instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Ops","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":113,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(file_cache_latency_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p50 - file cache - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(file_cache_latency_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(file_cache_latency_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - file cache - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(file_cache_latency_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(file_cache_latency_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - file cache - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(file_cache_latency_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(file_cache_latency_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - file cache - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(file_cache_latency_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(file_cache_disk_latency_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p50 - file cache disk - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(file_cache_disk_latency_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(file_cache_disk_latency_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - file cache disk - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(file_cache_disk_latency_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(file_cache_disk_latency_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - file cache disk - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(file_cache_disk_latency_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(file_cache_disk_latency_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - file cache disk - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(file_cache_disk_latency_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":8},"height":null,"hideTimeOverride":false,"id":114,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(file_cache_disk_bytes[$__rate_interval])) by (op, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"disk {{op}} @ {{instance}}","metric":"","query":"sum(rate(file_cache_disk_bytes[$__rate_interval])) by (op, instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Throughput","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":8},"height":null,"hideTimeOverride":false,"id":115,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(file_cache_disk_io_size_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p50 - file cache disk - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(file_cache_disk_io_size_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(file_cache_disk_io_size_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - file cache disk - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(file_cache_disk_io_size_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(file_cache_disk_io_size_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - file cache disk - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(file_cache_disk_io_size_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(file_cache_disk_io_size_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - file cache disk - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(file_cache_disk_io_size_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(file_cache_disk_read_entry_size_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p50 - file cache disk read entry - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(file_cache_disk_read_entry_size_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(file_cache_disk_read_entry_size_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - file cache disk read entry - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(file_cache_disk_read_entry_size_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(file_cache_disk_read_entry_size_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - file cache disk read entry - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(file_cache_disk_read_entry_size_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(file_cache_disk_read_entry_size_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - file cache disk read entry - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(file_cache_disk_read_entry_size_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Disk IO Size","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Hummock Tiered Cache","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":161},"height":null,"hideTimeOverride":false,"id":116,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":117,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(hummock_manager_lock_time_bucket[$__rate_interval])) by (le, lock_name, lock_type))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Lock Time p50 - {{lock_type}} @ {{lock_name}}","metric":"","query":"histogram_quantile(0.5, sum(rate(hummock_manager_lock_time_bucket[$__rate_interval])) by (le, lock_name, lock_type))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(hummock_manager_lock_time_bucket[$__rate_interval])) by (le, lock_name, lock_type))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Lock Time p99 - {{lock_type}} @ {{lock_name}}","metric":"","query":"histogram_quantile(0.99, sum(rate(hummock_manager_lock_time_bucket[$__rate_interval])) by (le, lock_name, lock_type))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(hummock_manager_lock_time_bucket[$__rate_interval])) by (le, lock_name, lock_type))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Lock Time p999 - {{lock_type}} @ {{lock_name}}","metric":"","query":"histogram_quantile(0.999, sum(rate(hummock_manager_lock_time_bucket[$__rate_interval])) by (le, lock_name, lock_type))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(hummock_manager_lock_time_bucket[$__rate_interval])) by (le, lock_name, lock_type))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Lock Time pmax - {{lock_type}} @ {{lock_name}}","metric":"","query":"histogram_quantile(1.0, sum(rate(hummock_manager_lock_time_bucket[$__rate_interval])) by (le, lock_name, lock_type))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Lock Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":118,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(meta_hummock_manager_real_process_time_bucket[$__rate_interval])) by (le, method))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Real Process Time p50 - {{method}}","metric":"","query":"histogram_quantile(0.5, sum(rate(meta_hummock_manager_real_process_time_bucket[$__rate_interval])) by (le, method))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(meta_hummock_manager_real_process_time_bucket[$__rate_interval])) by (le, method))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Real Process Time p99 - {{method}}","metric":"","query":"histogram_quantile(0.99, sum(rate(meta_hummock_manager_real_process_time_bucket[$__rate_interval])) by (le, method))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(meta_hummock_manager_real_process_time_bucket[$__rate_interval])) by (le, method))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Real Process Time p999 - {{method}}","metric":"","query":"histogram_quantile(0.999, sum(rate(meta_hummock_manager_real_process_time_bucket[$__rate_interval])) by (le, method))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(meta_hummock_manager_real_process_time_bucket[$__rate_interval])) by (le, method))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Real Process Time pmax - {{method}}","metric":"","query":"histogram_quantile(1.0, sum(rate(meta_hummock_manager_real_process_time_bucket[$__rate_interval])) by (le, method))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Real Process Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":8},"height":null,"hideTimeOverride":false,"id":119,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_version_size","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"version size","metric":"","query":"storage_version_size","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Version Size","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":8},"height":null,"hideTimeOverride":false,"id":120,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_current_version_id","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"current version id","metric":"","query":"storage_current_version_id","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_checkpoint_version_id","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"checkpoint version id","metric":"","query":"storage_checkpoint_version_id","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_min_pinned_version_id","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"min pinned version id","metric":"","query":"storage_min_pinned_version_id","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_min_safepoint_version_id","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"min safepoint version id","metric":"","query":"storage_min_safepoint_version_id","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Version Id","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":16},"height":null,"hideTimeOverride":false,"id":121,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_max_committed_epoch","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"max committed epoch","metric":"","query":"storage_max_committed_epoch","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_safe_epoch","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"safe epoch","metric":"","query":"storage_safe_epoch","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_min_pinned_epoch","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"min pinned epoch","metric":"","query":"storage_min_pinned_epoch","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Epoch","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"kbytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":16},"height":null,"hideTimeOverride":false,"id":122,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_version_stats{metric='total_key_size'}/1024","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"table{{table_id}} {{metric}}","metric":"","query":"storage_version_stats{metric='total_key_size'}/1024","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_version_stats{metric='total_value_size'}/1024","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"table{{table_id}} {{metric}}","metric":"","query":"storage_version_stats{metric='total_value_size'}/1024","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Table KV Size","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":24},"height":null,"hideTimeOverride":false,"id":123,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_version_stats{metric='total_key_count'}","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"table{{table_id}} {{metric}}","metric":"","query":"storage_version_stats{metric='total_key_count'}","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Table KV Count","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Hummock Manager","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":162},"height":null,"hideTimeOverride":false,"id":124,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":125,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"backup_job_count","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"job count","metric":"","query":"backup_job_count","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Job Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":126,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(backup_job_latency_bucket[$__rate_interval])) by (le, state))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Job Process Time p50 - {{state}}","metric":"","query":"histogram_quantile(0.5, sum(rate(backup_job_latency_bucket[$__rate_interval])) by (le, state))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(backup_job_latency_bucket[$__rate_interval])) by (le, state))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Job Process Time p99 - {{state}}","metric":"","query":"histogram_quantile(0.99, sum(rate(backup_job_latency_bucket[$__rate_interval])) by (le, state))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(backup_job_latency_bucket[$__rate_interval])) by (le, state))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Job Process Time p999 - {{state}}","metric":"","query":"histogram_quantile(0.999, sum(rate(backup_job_latency_bucket[$__rate_interval])) by (le, state))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(backup_job_latency_bucket[$__rate_interval])) by (le, state))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Job Process Time pmax - {{state}}","metric":"","query":"histogram_quantile(1.0, sum(rate(backup_job_latency_bucket[$__rate_interval])) by (le, state))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Job Process Time","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Backup Manager","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":163},"height":null,"hideTimeOverride":false,"id":127,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":128,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Create'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Create_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Create'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Create'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Create_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Create'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Create'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Create_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Create'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.CatalogService/Create'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.CatalogService/Create'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Create_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.CatalogService/Create'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.CatalogService/Create'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Create latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":0},"height":null,"hideTimeOverride":false,"id":129,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Drop'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Drop_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Drop'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Drop'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Drop_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Drop'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Drop'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Drop_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Drop'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.CatalogService/Drop'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.CatalogService/Drop'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Drop_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.CatalogService/Drop'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.CatalogService/Drop'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Drop latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":16,"y":0},"height":null,"hideTimeOverride":false,"id":130,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/GetCatalog'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GetCatalog_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/GetCatalog'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/GetCatalog'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GetCatalog_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/GetCatalog'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/GetCatalog'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GetCatalog_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/GetCatalog'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.CatalogService/GetCatalog'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.CatalogService/GetCatalog'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GetCatalog_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.CatalogService/GetCatalog'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.CatalogService/GetCatalog'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"GetCatalog latency","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"gRPC Meta: Catalog Service","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":164},"height":null,"hideTimeOverride":false,"id":131,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":132,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"AddWorkerNode_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"AddWorkerNode_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"AddWorkerNode_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"AddWorkerNode_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"AddWorkerNode latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":0},"height":null,"hideTimeOverride":false,"id":133,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"ListAllNodes_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"ListAllNodes_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"ListAllNodes_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"ListAllNodes_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"ListAllNodes latency","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"gRPC Meta: Cluster Service","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":165},"height":null,"hideTimeOverride":false,"id":134,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":135,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"CreateMaterializedView_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"CreateMaterializedView_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"CreateMaterializedView_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"CreateMaterializedView_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"CreateMaterializedView latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":0},"height":null,"hideTimeOverride":false,"id":136,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"DropMaterializedView_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"DropMaterializedView_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"DropMaterializedView_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"DropMaterializedView_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"DropMaterializedView latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":16,"y":0},"height":null,"hideTimeOverride":false,"id":137,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/Flush'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Flush_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/Flush'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/Flush'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Flush_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/Flush'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/Flush'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Flush_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/Flush'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.StreamManagerService/Flush'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.StreamManagerService/Flush'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Flush_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.StreamManagerService/Flush'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.StreamManagerService/Flush'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Flush latency","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"gRPC Meta: Stream Manager","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":166},"height":null,"hideTimeOverride":false,"id":138,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":139,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"UnpinVersionBefore_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"UnpinVersionBefore_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"UnpinVersionBefore_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"UnpinVersionBefore_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"UnpinVersionBefore latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":0},"height":null,"hideTimeOverride":false,"id":140,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"UnpinSnapshotBefore_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"UnpinSnapshotBefore_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"UnpinSnapshotBefore_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"UnpinSnapshotBefore_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"UnpinSnapshotBefore latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":16,"y":0},"height":null,"hideTimeOverride":false,"id":141,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"ReportCompactionTasks_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"ReportCompactionTasks_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"ReportCompactionTasks_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"ReportCompactionTasks_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"ReportCompactionTasks latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":8},"height":null,"hideTimeOverride":false,"id":142,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GetNewSstIds_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GetNewSstIds_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GetNewSstIds_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GetNewSstIds_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"GetNewSstIds latency","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"gRPC Meta: Hummock Manager","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":167},"height":null,"hideTimeOverride":false,"id":143,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":144,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_report_compaction_task_counts[$__rate_interval])) by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"report_compaction_task_counts - {{instance}} ","metric":"","query":"sum(irate(state_store_report_compaction_task_counts[$__rate_interval])) by(job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"compaction_count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":145,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(state_store_unpin_version_before_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_version_before_latency_p50 - {{instance}} ","metric":"","query":"histogram_quantile(0.5, sum(irate(state_store_unpin_version_before_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(state_store_unpin_version_before_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_version_before_latency_p99 - {{instance}} ","metric":"","query":"histogram_quantile(0.99, sum(irate(state_store_unpin_version_before_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_unpin_version_before_latency_sum[$__rate_interval])) / sum(irate(state_store_unpin_version_before_latency_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_version_before_latency_avg","metric":"","query":"sum(irate(state_store_unpin_version_before_latency_sum[$__rate_interval])) / sum(irate(state_store_unpin_version_before_latency_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.90, sum(irate(state_store_unpin_version_before_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_version_before_latency_p90 - {{instance}} ","metric":"","query":"histogram_quantile(0.90, sum(irate(state_store_unpin_version_before_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"version_latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":8},"height":null,"hideTimeOverride":false,"id":146,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(state_store_pin_snapshot_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pin_snapshot_latency_p50 - {{instance}} ","metric":"","query":"histogram_quantile(0.5, sum(irate(state_store_pin_snapshot_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(state_store_pin_snapshot_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pin_snapshot_latency_p99 - {{instance}} ","metric":"","query":"histogram_quantile(0.99, sum(irate(state_store_pin_snapshot_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(state_store_pin_snapshot_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pin_snapshot_latencyp90 - {{instance}} ","metric":"","query":"histogram_quantile(0.9, sum(irate(state_store_pin_snapshot_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_pin_snapshot_latency_sum[$__rate_interval])) / sum(irate(state_store_pin_snapshot_latency_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pin_snapshot_latency_avg","metric":"","query":"sum(irate(state_store_pin_snapshot_latency_sum[$__rate_interval])) / sum(irate(state_store_pin_snapshot_latency_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(state_store_unpin_version_snapshot_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_snapshot_latency_p50 - {{instance}} ","metric":"","query":"histogram_quantile(0.5, sum(irate(state_store_unpin_version_snapshot_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(state_store_unpin_version_snapshot_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_snapshot_latency_p99 - {{instance}} ","metric":"","query":"histogram_quantile(0.99, sum(irate(state_store_unpin_version_snapshot_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_unpin_snapshot_latency_sum[$__rate_interval])) / sum(irate(state_store_unpin_snapshot_latency_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_snapshot_latency_avg","metric":"","query":"sum(irate(state_store_unpin_snapshot_latency_sum[$__rate_interval])) / sum(irate(state_store_unpin_snapshot_latency_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.90, sum(irate(state_store_unpin_snapshot_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_snapshot_latency_p90 - {{instance}} ","metric":"","query":"histogram_quantile(0.90, sum(irate(state_store_unpin_snapshot_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"snapshot_latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":8},"height":null,"hideTimeOverride":false,"id":147,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_pin_snapshot_counts[$__rate_interval])) by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pin_snapshot_counts - {{instance}} ","metric":"","query":"sum(irate(state_store_pin_snapshot_counts[$__rate_interval])) by(job,instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_unpin_snapshot_counts[$__rate_interval])) by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_snapshot_counts - {{instance}} ","metric":"","query":"sum(irate(state_store_unpin_snapshot_counts[$__rate_interval])) by(job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"snapshot_count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":16},"height":null,"hideTimeOverride":false,"id":148,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(state_store_get_new_sst_ids_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"get_new_sst_ids_latency_latency_p50 - {{instance}} ","metric":"","query":"histogram_quantile(0.5, sum(irate(state_store_get_new_sst_ids_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(state_store_get_new_sst_ids_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"get_new_sst_ids_latency_latency_p99 - {{instance}} ","metric":"","query":"histogram_quantile(0.99, sum(irate(state_store_get_new_sst_ids_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_get_new_sst_ids_latency_sum[$__rate_interval])) / sum(irate(state_store_get_new_sst_ids_latency_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"get_new_sst_ids_latency_latency_avg","metric":"","query":"sum(irate(state_store_get_new_sst_ids_latency_sum[$__rate_interval])) / sum(irate(state_store_get_new_sst_ids_latency_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.90, sum(irate(state_store_get_new_sst_ids_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"get_new_sst_ids_latency_latency_p90 - {{instance}} ","metric":"","query":"histogram_quantile(0.90, sum(irate(state_store_get_new_sst_ids_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"table_latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":16},"height":null,"hideTimeOverride":false,"id":149,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_get_new_sst_ids_latency_counts[$__rate_interval]))by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"get_new_sst_ids_latency_counts - {{instance}} ","metric":"","query":"sum(irate(state_store_get_new_sst_ids_latency_counts[$__rate_interval]))by(job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"table_count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":24},"height":null,"hideTimeOverride":false,"id":150,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(state_store_report_compaction_task_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"report_compaction_task_latency_p50 - {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(irate(state_store_report_compaction_task_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(state_store_report_compaction_task_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"report_compaction_task_latency_p99 - {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(irate(state_store_report_compaction_task_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_report_compaction_task_latency_sum[$__rate_interval])) / sum(irate(state_store_report_compaction_task_latency_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"report_compaction_task_latency_avg","metric":"","query":"sum(irate(state_store_report_compaction_task_latency_sum[$__rate_interval])) / sum(irate(state_store_report_compaction_task_latency_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.90, sum(irate(state_store_report_compaction_task_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"report_compaction_task_latency_p90 - {{instance}}","metric":"","query":"histogram_quantile(0.90, sum(irate(state_store_report_compaction_task_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"compaction_latency","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"gRPC: Hummock Meta Client","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":168},"height":null,"hideTimeOverride":false,"id":151,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Qps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":152,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(frontend_query_counter_local_execution[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"","metric":"","query":"rate(frontend_query_counter_local_execution[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Query Per second in Loacl Execution Mode","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":153,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(frontend_latency_local_execution_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p50 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(frontend_latency_local_execution_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(frontend_latency_local_execution_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(frontend_latency_local_execution_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.95, sum(rate(frontend_latency_local_execution_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.95, sum(rate(frontend_latency_local_execution_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Query Latency in Local Execution Mode","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Frontend","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":169},"height":null,"hideTimeOverride":false,"id":154,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":155,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(lru_runtime_loop_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"","metric":"","query":"rate(lru_runtime_loop_count[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"LRU manager loop count per sec","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":156,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"lru_watermark_step","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"","metric":"","query":"lru_watermark_step","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"LRU manager watermark steps","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":8},"height":null,"hideTimeOverride":false,"id":157,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"lru_current_watermark_time_ms","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"","metric":"","query":"lru_current_watermark_time_ms","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"lru_physical_now_ms","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"","metric":"","query":"lru_physical_now_ms","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"LRU manager watermark_time and physical_now","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":8},"height":null,"hideTimeOverride":false,"id":158,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"jemalloc_allocated_bytes","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"","metric":"","query":"jemalloc_allocated_bytes","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"The memory allocated by jemalloc","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Memory manager","transformations":[],"transparent":false,"type":"row"}],"refresh":"10s","rows":[],"schemaVersion":12,"sharedCrosshair":true,"style":"dark","tags":["risingwave"],"templating":{"list":[]},"time":{"from":"now-30m","to":"now"},"timepicker":{"hidden":false,"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"risingwave_dashboard","uid":"Ecy3uV1nz","version":0} +{"__inputs":[],"annotations":{"list":[]},"description":"RisingWave Dashboard","editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"panels":[{"cacheTimeout":null,"collapsed":false,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":1,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Cluster Node","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":1},"height":null,"hideTimeOverride":false,"id":2,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["last"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(worker_num) by (worker_type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{worker_type}}","metric":"","query":"sum(worker_num) by (worker_type)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Node Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":1},"height":null,"hideTimeOverride":false,"id":3,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"avg(process_resident_memory_bytes) by (job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{job}} @ {{instance}}","metric":"","query":"avg(process_resident_memory_bytes) by (job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Node Memory","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":9},"height":null,"hideTimeOverride":false,"id":4,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(process_cpu_seconds_total[$__rate_interval])) by (job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{job}} @ {{instance}}","metric":"","query":"sum(rate(process_cpu_seconds_total[$__rate_interval])) by (job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Node CPU","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"collapsed":false,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":17},"height":null,"hideTimeOverride":false,"id":5,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Streaming","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"rows/s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":18},"height":null,"hideTimeOverride":false,"id":6,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_source_output_rows_counts[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"source={{source_name}} {{source_id}} @ {{instance}}","metric":"","query":"rate(stream_source_output_rows_counts[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Source Throughput(rows)","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"rows/s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":18},"height":null,"hideTimeOverride":false,"id":7,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(partition_input_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"actor={{actor_id}} source={{source_id}} partition={{partition}}","metric":"","query":"rate(partition_input_count[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Source Throughput(rows) Per Partition","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"MB/s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":26},"height":null,"hideTimeOverride":false,"id":8,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"(sum by (source_id)(rate(partition_input_bytes[$__rate_interval])))/(1000*1000)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"source={{source_id}}","metric":"","query":"(sum by (source_id)(rate(partition_input_bytes[$__rate_interval])))/(1000*1000)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Source Throughput(bytes)","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"MB/s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":26},"height":null,"hideTimeOverride":false,"id":9,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"(rate(partition_input_bytes[$__rate_interval]))/(1000*1000)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"actor={{actor_id}} source={{source_id}} partition={{partition}}","metric":"","query":"(rate(partition_input_bytes[$__rate_interval]))/(1000*1000)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Source Throughput(bytes) Per Partition","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"rows/s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":34},"height":null,"hideTimeOverride":false,"id":10,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_source_rows_per_barrier_counts[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"actor={{actor_id}} source={{source_id}} @ {{instance}}","metric":"","query":"rate(stream_source_rows_per_barrier_counts[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Source Throughput(rows) per barrier","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":34},"height":null,"hideTimeOverride":false,"id":11,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"all_barrier_nums","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"all_barrier","metric":"","query":"all_barrier_nums","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"in_flight_barrier_nums","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"in_flight_barrier","metric":"","query":"in_flight_barrier_nums","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Barrier Number","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":42},"height":null,"hideTimeOverride":false,"id":12,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_send_latency_p50","metric":"","query":"histogram_quantile(0.5, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_send_latency_p90","metric":"","query":"histogram_quantile(0.9, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_send_latency_p99","metric":"","query":"histogram_quantile(0.99, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_send_latency_p999","metric":"","query":"histogram_quantile(0.999, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_send_latency_pmax","metric":"","query":"histogram_quantile(1.0, sum(rate(meta_barrier_send_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(meta_barrier_send_duration_seconds_sum[$__rate_interval]) / rate(meta_barrier_send_duration_seconds_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_send_latency_avg","metric":"","query":"rate(meta_barrier_send_duration_seconds_sum[$__rate_interval]) / rate(meta_barrier_send_duration_seconds_count[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Barrier Send Latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":42},"height":null,"hideTimeOverride":false,"id":13,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_latency_p50","metric":"","query":"histogram_quantile(0.5, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_latency_p90","metric":"","query":"histogram_quantile(0.9, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_latency_p99","metric":"","query":"histogram_quantile(0.99, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_latency_p999","metric":"","query":"histogram_quantile(0.999, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_latency_pmax","metric":"","query":"histogram_quantile(1.0, sum(rate(meta_barrier_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(meta_barrier_duration_seconds_sum[$__rate_interval]) / rate(meta_barrier_duration_seconds_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_latency_avg","metric":"","query":"rate(meta_barrier_duration_seconds_sum[$__rate_interval]) / rate(meta_barrier_duration_seconds_count[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Barrier Latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":50},"height":null,"hideTimeOverride":false,"id":14,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_inflight_latency_p50","metric":"","query":"histogram_quantile(0.5, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_inflight_latency_p90","metric":"","query":"histogram_quantile(0.9, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_inflight_latency_p99","metric":"","query":"histogram_quantile(0.99, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_inflight_latency_p999","metric":"","query":"histogram_quantile(0.999, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_inflight_latency_pmax","metric":"","query":"histogram_quantile(1.0, sum(rate(stream_barrier_inflight_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"max(sum by(le, instance)(rate(stream_barrier_inflight_duration_seconds_sum[$__rate_interval])) / sum by(le, instance)(rate(stream_barrier_inflight_duration_seconds_count[$__rate_interval])))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_inflight_latency_avg","metric":"","query":"max(sum by(le, instance)(rate(stream_barrier_inflight_duration_seconds_sum[$__rate_interval])) / sum by(le, instance)(rate(stream_barrier_inflight_duration_seconds_count[$__rate_interval])))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Barrier In-Flight Latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":50},"height":null,"hideTimeOverride":false,"id":15,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_sync_latency_p50 - {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_sync_latency_p90 - {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_sync_latency_p99 - {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_sync_latency_p999 - {{instance}}","metric":"","query":"histogram_quantile(0.999, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_sync_latency_pmax - {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(stream_barrier_sync_storage_duration_seconds_bucket[$__rate_interval])) by (le,instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, instance)(rate(stream_barrier_sync_storage_duration_seconds_sum[$__rate_interval])) / sum by(le, instance)(rate(stream_barrier_sync_storage_duration_seconds_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_sync_latency_avg - {{instance}}","metric":"","query":"sum by(le, instance)(rate(stream_barrier_sync_storage_duration_seconds_sum[$__rate_interval])) / sum by(le, instance)(rate(stream_barrier_sync_storage_duration_seconds_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Barrier Sync Latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":58},"height":null,"hideTimeOverride":false,"id":16,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_wait_commit_latency_p50","metric":"","query":"histogram_quantile(0.5, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_wait_commit_latency_p90","metric":"","query":"histogram_quantile(0.9, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_wait_commit_latency_p99","metric":"","query":"histogram_quantile(0.99, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_wait_commit_latency_p999","metric":"","query":"histogram_quantile(0.999, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_wait_commit_latency_pmax","metric":"","query":"histogram_quantile(1.0, sum(rate(meta_barrier_wait_commit_duration_seconds_bucket[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(meta_barrier_wait_commit_duration_seconds_sum[$__rate_interval]) / rate(meta_barrier_wait_commit_duration_seconds_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"barrier_wait_commit_avg","metric":"","query":"rate(meta_barrier_wait_commit_duration_seconds_sum[$__rate_interval]) / rate(meta_barrier_wait_commit_duration_seconds_count[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Barrier Wait Commit Latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":66},"height":null,"hideTimeOverride":false,"id":17,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"rows/s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":18,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_executor_row_count[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}->{{executor_id}}","metric":"","query":"rate(stream_executor_row_count[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Executor Throughput","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":19,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_output_buffer_blocking_duration_ns[$__rate_interval]) / 1000000000","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_output_buffer_blocking_duration_ns[$__rate_interval]) / 1000000000","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Actor Backpressure","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":8},"height":null,"hideTimeOverride":false,"id":20,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(actor_memory_usage[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(actor_memory_usage[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Actor Memory Usage","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":8},"height":null,"hideTimeOverride":false,"id":21,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_input_buffer_blocking_duration_ns[$__rate_interval]) / 1000000000","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}->{{upstream_fragment_id}}","metric":"","query":"rate(stream_actor_input_buffer_blocking_duration_ns[$__rate_interval]) / 1000000000","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Actor Input Blocking Time Ratio","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":16},"height":null,"hideTimeOverride":false,"id":22,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_barrier_time[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_barrier_time[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Actor Barrier Latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":16},"height":null,"hideTimeOverride":false,"id":23,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_processing_time[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_processing_time[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Actor Processing Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":24},"height":null,"hideTimeOverride":false,"id":24,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_actor_execution_time[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_actor_execution_time[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Actor Execution Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"row"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":24},"height":null,"hideTimeOverride":false,"id":25,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_in_record_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_in_record_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Actor Input Row","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"row"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":32},"height":null,"hideTimeOverride":false,"id":26,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_out_record_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_out_record_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Actor Output Row","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":12,"y":32},"height":null,"hideTimeOverride":false,"id":27,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_fast_poll_duration[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_fast_poll_duration[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Fast Poll Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":40},"height":null,"hideTimeOverride":false,"id":28,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_fast_poll_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_fast_poll_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Fast Poll Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":40},"height":null,"hideTimeOverride":false,"id":29,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_fast_poll_duration[$__rate_interval]) / rate(stream_actor_fast_poll_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_fast_poll_duration[$__rate_interval]) / rate(stream_actor_fast_poll_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Fast Poll Avg Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":16,"y":40},"height":null,"hideTimeOverride":false,"id":30,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_slow_poll_duration[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_slow_poll_duration[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Slow Poll Total Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":48},"height":null,"hideTimeOverride":false,"id":31,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_slow_poll_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_slow_poll_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Slow Poll Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":48},"height":null,"hideTimeOverride":false,"id":32,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_slow_poll_duration[$__rate_interval]) / rate(stream_actor_slow_poll_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_slow_poll_duration[$__rate_interval]) / rate(stream_actor_slow_poll_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Slow Poll Avg Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":16,"y":48},"height":null,"hideTimeOverride":false,"id":33,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_poll_duration[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_poll_duration[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Poll Total Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":56},"height":null,"hideTimeOverride":false,"id":34,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_poll_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_poll_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Poll Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":56},"height":null,"hideTimeOverride":false,"id":35,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_poll_duration[$__rate_interval]) / rate(stream_actor_poll_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_poll_duration[$__rate_interval]) / rate(stream_actor_poll_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Poll Avg Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":16,"y":56},"height":null,"hideTimeOverride":false,"id":36,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_idle_duration[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_idle_duration[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Idle Total Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":64},"height":null,"hideTimeOverride":false,"id":37,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_idle_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_idle_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Idle Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":64},"height":null,"hideTimeOverride":false,"id":38,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_idle_duration[$__rate_interval]) / rate(stream_actor_idle_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_idle_duration[$__rate_interval]) / rate(stream_actor_idle_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Idle Avg Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":16,"y":64},"height":null,"hideTimeOverride":false,"id":39,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_scheduled_duration[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_scheduled_duration[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Scheduled Total Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":72},"height":null,"hideTimeOverride":false,"id":40,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_scheduled_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_scheduled_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Scheduled Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":72},"height":null,"hideTimeOverride":false,"id":41,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_actor_scheduled_duration[$__rate_interval]) / rate(stream_actor_scheduled_cnt[$__rate_interval]) > 0","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_actor_scheduled_duration[$__rate_interval]) / rate(stream_actor_scheduled_cnt[$__rate_interval]) > 0","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Tokio: Actor Scheduled Avg Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":80},"height":null,"hideTimeOverride":false,"id":42,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_join_lookup_miss_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"cache miss {{actor_id}} {{side}}","metric":"","query":"rate(stream_join_lookup_miss_count[$__rate_interval])","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_join_lookup_total_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"total lookups {{actor_id}} {{side}}","metric":"","query":"rate(stream_join_lookup_total_count[$__rate_interval])","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_join_insert_cache_miss_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"cache miss when insert{{actor_id}} {{side}}","metric":"","query":"rate(stream_join_insert_cache_miss_count[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Join Executor Cache","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":80},"height":null,"hideTimeOverride":false,"id":43,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(stream_join_barrier_align_duration_bucket[$__rate_interval])) by (le, actor_id, wait_side, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 {{actor_id}}.{{wait_side}} - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(stream_join_barrier_align_duration_bucket[$__rate_interval])) by (le, actor_id, wait_side, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(stream_join_barrier_align_duration_bucket[$__rate_interval])) by (le, actor_id, wait_side, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 {{actor_id}}.{{wait_side}} - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(stream_join_barrier_align_duration_bucket[$__rate_interval])) by (le, actor_id, wait_side, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(stream_join_barrier_align_duration_bucket[$__rate_interval])) by (le, actor_id, wait_side, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p999 {{actor_id}}.{{wait_side}} - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.999, sum(rate(stream_join_barrier_align_duration_bucket[$__rate_interval])) by (le, actor_id, wait_side, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(stream_join_barrier_align_duration_bucket[$__rate_interval])) by (le, actor_id, wait_side, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax {{actor_id}}.{{wait_side}} - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(stream_join_barrier_align_duration_bucket[$__rate_interval])) by (le, actor_id, wait_side, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, actor_id, wait_side, job, instance)(rate(stream_join_barrier_align_duration_sum[$__rate_interval])) / sum by(le,actor_id,wait_side,job,instance) (rate(stream_join_barrier_align_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"avg {{actor_id}}.{{wait_side}} - {{job}} @ {{instance}}","metric":"","query":"sum by(le, actor_id, wait_side, job, instance)(rate(stream_join_barrier_align_duration_sum[$__rate_interval])) / sum by(le,actor_id,wait_side,job,instance) (rate(stream_join_barrier_align_duration_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Join Executor Barrier Align","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":88},"height":null,"hideTimeOverride":false,"id":44,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_join_actor_input_waiting_duration_ns[$__rate_interval]) / 1000000000","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"rate(stream_join_actor_input_waiting_duration_ns[$__rate_interval]) / 1000000000","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Join Actor Input Blocking Time Ratio","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":88},"height":null,"hideTimeOverride":false,"id":45,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_join_match_duration_ns[$__rate_interval]) / 1000000000","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}.{{side}}","metric":"","query":"rate(stream_join_match_duration_ns[$__rate_interval]) / 1000000000","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Join Actor Match Duration Per Second","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":96},"height":null,"hideTimeOverride":false,"id":46,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"stream_join_cached_entries","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}} {{side}}","metric":"","query":"stream_join_cached_entries","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Join Cached Entries","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":96},"height":null,"hideTimeOverride":false,"id":47,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"stream_join_cached_rows","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}} {{side}}","metric":"","query":"stream_join_cached_rows","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Join Cached Rows","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":104},"height":null,"hideTimeOverride":false,"id":48,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"stream_join_cached_estimated_size","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}} {{side}}","metric":"","query":"stream_join_cached_estimated_size","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Join Cached Estimated Size","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":104},"height":null,"hideTimeOverride":false,"id":49,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_agg_lookup_miss_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"cache miss {{actor_id}}","metric":"","query":"rate(stream_agg_lookup_miss_count[$__rate_interval])","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_agg_lookup_total_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"total lookups {{actor_id}}","metric":"","query":"rate(stream_agg_lookup_total_count[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Aggregation Executor Cache Statistics For Each Key/State","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":112},"height":null,"hideTimeOverride":false,"id":50,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_agg_chunk_lookup_miss_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"chunk-level cache miss {{actor_id}}","metric":"","query":"rate(stream_agg_chunk_lookup_miss_count[$__rate_interval])","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_agg_chunk_lookup_total_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"chunk-level total lookups {{actor_id}}","metric":"","query":"rate(stream_agg_chunk_lookup_total_count[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Aggregation Executor Cache Statistics For Each StreamChunk","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":112},"height":null,"hideTimeOverride":false,"id":51,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"stream_agg_cached_keys","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{actor_id}}","metric":"","query":"stream_agg_cached_keys","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Aggregation Cached Keys","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Streaming Actors","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":67},"height":null,"hideTimeOverride":false,"id":52,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":53,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_exchange_frag_send_size[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{up_fragment_id}}->{{down_fragment_id}}","metric":"","query":"rate(stream_exchange_frag_send_size[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Fragment-level Remote Exchange Send Throughput","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":54,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(stream_exchange_frag_recv_size[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{up_fragment_id}}->{{down_fragment_id}}","metric":"","query":"rate(stream_exchange_frag_recv_size[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Fragment-level Remote Exchange Recv Throughput","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Streaming Exchange","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":68},"height":null,"hideTimeOverride":false,"id":55,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"row"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":56,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"batch_task_exchange_recv_row_number","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{query_id}} : {{source_stage_id}}.{{source_task_id}} -> {{target_stage_id}}.{{target_task_id}}","metric":"","query":"batch_task_exchange_recv_row_number","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Exchange Recv Row Number","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"row"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":57,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"batch_task_num","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"","metric":"","query":"batch_task_num","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Batch Mpp Task Number","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Batch Metrics","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":false,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":69},"height":null,"hideTimeOverride":false,"id":58,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Hummock","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":70},"height":null,"hideTimeOverride":false,"id":59,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(state_store_sync_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p50 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(state_store_sync_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_sync_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_sync_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_sync_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_sync_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_sync_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_sync_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance) (rate(state_store_sync_duration_sum[$__rate_interval])) / sum by(le, job, instance) (rate(state_store_sync_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"avg - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance) (rate(state_store_sync_duration_sum[$__rate_interval])) / sum by(le, job, instance) (rate(state_store_sync_duration_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Build and Sync Sstable Duration","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":70},"height":null,"hideTimeOverride":false,"id":60,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_sst_store_block_request_counts[$__rate_interval])) by (job, instance, table_id, type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{table_id}} @ {{type}} - {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_sst_store_block_request_counts[$__rate_interval])) by (job, instance, table_id, type)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(file_cache_latency_count[$__rate_interval])) by (op, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"file cache {{op}} @ {{instance}}","metric":"","query":"sum(rate(file_cache_latency_count[$__rate_interval])) by (op, instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(file_cache_miss[$__rate_interval])) by (instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"file cache miss @ {{instance}}","metric":"","query":"sum(rate(file_cache_miss[$__rate_interval])) by (instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Cache Ops","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":78},"height":null,"hideTimeOverride":false,"id":61,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_get_duration_count[$__rate_interval])) by (job,instanc,table_id)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"get - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_get_duration_count[$__rate_interval])) by (job,instanc,table_id)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_range_reverse_scan_duration_count[$__rate_interval])) by (job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"backward scan - {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_range_reverse_scan_duration_count[$__rate_interval])) by (job,instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_get_shared_buffer_hit_counts[$__rate_interval])) by (job,instance,table_id)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"shared_buffer hit - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_get_shared_buffer_hit_counts[$__rate_interval])) by (job,instance,table_id)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_iter_in_process_counts[$__rate_interval])) by(job,instance,table_id)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"iter - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_iter_in_process_counts[$__rate_interval])) by(job,instance,table_id)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Ops","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":78},"height":null,"hideTimeOverride":false,"id":62,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(state_store_get_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p50 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(state_store_get_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_get_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_get_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_get_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_get_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_get_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_get_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance, table_id)(rate(state_store_get_duration_sum[$__rate_interval])) / sum by(le, job, instance, table_id) (rate(state_store_get_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"avg - {{table_id}} {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance, table_id)(rate(state_store_get_duration_sum[$__rate_interval])) / sum by(le, job, instance, table_id) (rate(state_store_get_duration_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Duration - Get","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":86},"height":null,"hideTimeOverride":false,"id":63,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_iter_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"create_iter_time p90 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_iter_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_iter_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"create_iter_time p99 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_iter_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(state_store_iter_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"create_iter_time p999 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.999, sum(rate(state_store_iter_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_iter_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"create_iter_time pmax - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_iter_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance)(rate(state_store_iter_duration_sum[$__rate_interval])) / sum by(le, job,instance) (rate(state_store_iter_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"create_iter_time avg - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance)(rate(state_store_iter_duration_sum[$__rate_interval])) / sum by(le, job,instance) (rate(state_store_iter_duration_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_iter_scan_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pure_scan_time p90 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_iter_scan_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_iter_scan_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pure_scan_time p99 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_iter_scan_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(state_store_iter_scan_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pure_scan_time p999 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.999, sum(rate(state_store_iter_scan_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_iter_scan_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pure_scan_time pmax - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_iter_scan_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance)(rate(state_store_scan_iter_duration_sum[$__rate_interval])) / sum by(le, job,instance) (rate(state_store_iter_scan_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pure_scan_time avg - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance)(rate(state_store_scan_iter_duration_sum[$__rate_interval])) / sum by(le, job,instance) (rate(state_store_iter_scan_duration_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Duration - Iter","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":86},"height":null,"hideTimeOverride":false,"id":64,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_get_key_size_bucket[$__rate_interval])) by (le, job, instance, table_id)) + histogram_quantile(0.9, sum(rate(state_store_get_value_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - {{table_id}} {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_get_key_size_bucket[$__rate_interval])) by (le, job, instance, table_id)) + histogram_quantile(0.9, sum(rate(state_store_get_value_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_get_key_size_bucket[$__rate_interval])) by (le, job, instance, table_id)) + histogram_quantile(0.99, sum(rate(state_store_get_value_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - {{table_id}} {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_get_key_size_bucket[$__rate_interval])) by (le, job, instance, table_id)) + histogram_quantile(0.99, sum(rate(state_store_get_value_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(state_store_get_key_size_bucket[$__rate_interval])) by (le, job, instance, table_id)) + histogram_quantile(0.999, sum(rate(state_store_get_value_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p999 - {{table_id}} {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.999, sum(rate(state_store_get_key_size_bucket[$__rate_interval])) by (le, job, instance, table_id)) + histogram_quantile(0.999, sum(rate(state_store_get_value_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_get_key_size_bucket[$__rate_interval])) by (le, job, instance, table_id)) + histogram_quantile(1.0, sum(rate(state_store_get_value_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - {{table_id}} {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_get_key_size_bucket[$__rate_interval])) by (le, job, instance, table_id)) + histogram_quantile(1.0, sum(rate(state_store_get_value_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Item Size - Get","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":94},"height":null,"hideTimeOverride":false,"id":65,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_iter_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_iter_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_iter_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_iter_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(state_store_iter_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p999 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.999, sum(rate(state_store_iter_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_iter_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_iter_size_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Item Size - Iter","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":94},"height":null,"hideTimeOverride":false,"id":66,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_iter_item_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_iter_item_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_iter_item_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_iter_item_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(state_store_iter_item_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p999 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.999, sum(rate(state_store_iter_item_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_iter_item_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_iter_item_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Item Count - Iter","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":102},"height":null,"hideTimeOverride":false,"id":67,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_get_key_size_sum[$__rate_interval])) by(job, instance) + sum(rate(state_store_get_value_size_sum[$__rate_interval])) by(job, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_get_key_size_sum[$__rate_interval])) by(job, instance) + sum(rate(state_store_get_value_size_sum[$__rate_interval])) by(job, instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Throughput - Get","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":102},"height":null,"hideTimeOverride":false,"id":68,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_iter_size_sum[$__rate_interval])) by(job, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_iter_size_sum[$__rate_interval])) by(job, instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Throughput - Iter","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":110},"height":null,"hideTimeOverride":false,"id":69,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(state_store_may_exist_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p50 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(state_store_may_exist_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_may_exist_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_may_exist_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_may_exist_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_may_exist_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_may_exist_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_may_exist_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance, table_id)(rate(state_store_may_exist_duration_sum[$__rate_interval])) / sum by(le, job, instance, table_id) (rate(state_store_may_exist_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"avg - {{table_id}} {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance, table_id)(rate(state_store_may_exist_duration_sum[$__rate_interval])) / sum by(le, job, instance, table_id) (rate(state_store_may_exist_duration_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Duration - MayExist","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":110},"height":null,"hideTimeOverride":false,"id":70,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_bloom_filter_true_negative_counts[$__rate_interval])) by (job,instance,table_id,type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"bloom filter true negative - {{table_id}} - {{type}} @ {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_bloom_filter_true_negative_counts[$__rate_interval])) by (job,instance,table_id,type)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_read_req_positive_but_non_exist_counts[$__rate_interval])) by (job,instance,table_id,type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"bloom filter false positive count - {{table_id}} - {{type}} @ {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_read_req_positive_but_non_exist_counts[$__rate_interval])) by (job,instance,table_id,type)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_read_req_bloom_filter_positive_counts[$__rate_interval])) by (job,instance,table_id,type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"read_req bloom filter positive - {{table_id}} - {{type}} @ {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_read_req_bloom_filter_positive_counts[$__rate_interval])) by (job,instance,table_id,type)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_read_req_check_bloom_filter_counts[$__rate_interval])) by (job,instance,table_id,type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"read_req check bloom filter - {{table_id}} - {{type}} @ {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_read_req_check_bloom_filter_counts[$__rate_interval])) by (job,instance,table_id,type)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Bloom Filter","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":118},"height":null,"hideTimeOverride":false,"id":71,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_iter_scan_key_counts[$__rate_interval])) by (instance, type, table_id)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"iter keys flow - {{table_id}} @ {{type}} @ {{instance}} ","metric":"","query":"sum(rate(state_store_iter_scan_key_counts[$__rate_interval])) by (instance, type, table_id)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Iter keys flow","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":118},"height":null,"hideTimeOverride":false,"id":72,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"1 - (sum(rate(state_store_bloom_filter_true_negative_counts[$__rate_interval])) by (job,instance,table_id,type)) / (sum(rate(state_bloom_filter_check_counts[$__rate_interval])) by (job,instance,table_id,type))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"bloom filter miss rate - {{table_id}} - {{type}} @ {{job}} @ {{instance}}","metric":"","query":"1 - (sum(rate(state_store_bloom_filter_true_negative_counts[$__rate_interval])) by (job,instance,table_id,type)) / (sum(rate(state_bloom_filter_check_counts[$__rate_interval])) by (job,instance,table_id,type))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"(sum(rate(state_store_sst_store_block_request_counts{type='meta_miss'}[$__rate_interval])) by (job,instance,table_id)) / (sum(rate(state_store_sst_store_block_request_counts{type='meta_total'}[$__rate_interval])) by (job,instance,table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"meta cache miss rate - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"(sum(rate(state_store_sst_store_block_request_counts{type='meta_miss'}[$__rate_interval])) by (job,instance,table_id)) / (sum(rate(state_store_sst_store_block_request_counts{type='meta_total'}[$__rate_interval])) by (job,instance,table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"(sum(rate(state_store_sst_store_block_request_counts{type='data_miss'}[$__rate_interval])) by (job,instance,table_id)) / (sum(rate(state_store_sst_store_block_request_counts{type='data_total'}[$__rate_interval])) by (job,instance,table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"block cache miss rate - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"(sum(rate(state_store_sst_store_block_request_counts{type='data_miss'}[$__rate_interval])) by (job,instance,table_id)) / (sum(rate(state_store_sst_store_block_request_counts{type='data_total'}[$__rate_interval])) by (job,instance,table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"(sum(rate(file_cache_miss[$__rate_interval])) by (instance)) / (sum(rate(file_cache_latency_count{op='get'}[$__rate_interval])) by (instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"file cache miss rate @ {{instance}}","metric":"","query":"(sum(rate(file_cache_miss[$__rate_interval])) by (instance)) / (sum(rate(file_cache_latency_count{op='get'}[$__rate_interval])) by (instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"1 - (((sum(rate(state_store_read_req_bloom_filter_positive_counts[$__rate_interval])) by (job,instance,table_id,type))) / (sum(rate(state_store_read_req_check_bloom_filter_counts[$__rate_interval])) by (job,instance,table_id,type)))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"read req bloom filter filter rate - {{table_id}} - {{type}} @ {{job}} @ {{instance}}","metric":"","query":"1 - (((sum(rate(state_store_read_req_bloom_filter_positive_counts[$__rate_interval])) by (job,instance,table_id,type))) / (sum(rate(state_store_read_req_check_bloom_filter_counts[$__rate_interval])) by (job,instance,table_id,type)))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"1 - (((sum(rate(state_store_read_req_positive_but_non_exist_counts[$__rate_interval])) by (job,instance,table_id,type))) / (sum(rate(state_store_read_req_bloom_filter_positive_counts[$__rate_interval])) by (job,instance,table_id,type)))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"read req bloom filter false positive rate - {{table_id}} - {{type}} @ {{job}} @ {{instance}}","metric":"","query":"1 - (((sum(rate(state_store_read_req_positive_but_non_exist_counts[$__rate_interval])) by (job,instance,table_id,type))) / (sum(rate(state_store_read_req_bloom_filter_positive_counts[$__rate_interval])) by (job,instance,table_id,type)))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":" Filter/Cache Miss Rate","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":126},"height":null,"hideTimeOverride":false,"id":73,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_iter_merge_sstable_counts_bucket[$__rate_interval])) by (le, job, table_id, type))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"# merged ssts p90 - {{table_id}} @ {{job}} @ {{type}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_iter_merge_sstable_counts_bucket[$__rate_interval])) by (le, job, table_id, type))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_iter_merge_sstable_counts_bucket[$__rate_interval])) by (le, job, table_id, type))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"# merged ssts p99 - {{table_id}} @ {{job}} @ {{type}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_iter_merge_sstable_counts_bucket[$__rate_interval])) by (le, job, table_id, type))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_iter_merge_sstable_counts_bucket[$__rate_interval])) by (le, job, table_id, type))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"# merged ssts pmax - {{table_id}} @ {{job}} @ {{type}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_iter_merge_sstable_counts_bucket[$__rate_interval])) by (le, job, table_id, type))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance, table_id)(rate(state_store_iter_merge_sstable_counts_sum[$__rate_interval])) / sum by(le, job, instance, table_id)(rate(state_store_iter_merge_sstable_counts_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"# merged ssts avg - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance, table_id)(rate(state_store_iter_merge_sstable_counts_sum[$__rate_interval])) / sum by(le, job, instance, table_id)(rate(state_store_iter_merge_sstable_counts_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Read Merged SSTs","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":126},"height":null,"hideTimeOverride":false,"id":74,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_write_batch_duration_count[$__rate_interval])) by (job,instance,table_id)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write batch - {{table_id}} @ {{job}} @ {{instance}} ","metric":"","query":"sum(rate(state_store_write_batch_duration_count[$__rate_interval])) by (job,instance,table_id)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_sync_duration_count[$__rate_interval])) by (job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"l0 - {{job}} @ {{instance}} ","metric":"","query":"sum(rate(state_store_sync_duration_count[$__rate_interval])) by (job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Write Ops","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":134},"height":null,"hideTimeOverride":false,"id":75,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(state_store_write_batch_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to shared_buffer p50 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(state_store_write_batch_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_write_batch_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to shared_buffer p90 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_write_batch_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_write_batch_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to shared_buffer p99 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_write_batch_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_write_batch_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to shared_buffer pmax - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_write_batch_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance, table_id)(rate(state_store_write_batch_duration_sum[$__rate_interval])) / sum by(le, job, instance, table_id)(rate(state_store_write_batch_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to shared_buffer avg - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance, table_id)(rate(state_store_write_batch_duration_sum[$__rate_interval])) / sum by(le, job, instance, table_id)(rate(state_store_write_batch_duration_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(state_store_write_shared_buffer_sync_time_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to object_store p50 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(state_store_write_shared_buffer_sync_time_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_write_shared_buffer_sync_time_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to object_store p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_write_shared_buffer_sync_time_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_write_shared_buffer_sync_time_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to object_store p99 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_write_shared_buffer_sync_time_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_write_shared_buffer_sync_time_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to object_store pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_write_shared_buffer_sync_time_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance)(rate(state_store_write_shared_buffer_sync_time_sum[$__rate_interval])) / sum by(le, job, instance)(rate(state_store_write_shared_buffer_sync_time_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write to object_store - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance)(rate(state_store_write_shared_buffer_sync_time_sum[$__rate_interval])) / sum by(le, job, instance)(rate(state_store_write_shared_buffer_sync_time_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Write Duration","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":134},"height":null,"hideTimeOverride":false,"id":76,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_write_batch_tuple_counts[$__rate_interval])) by (job,instance,table_id)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write_batch_kv_pair_count - {{table_id}} @ {{instance}} ","metric":"","query":"sum(rate(state_store_write_batch_tuple_counts[$__rate_interval])) by (job,instance,table_id)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Write Item Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":142},"height":null,"hideTimeOverride":false,"id":77,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(state_store_write_batch_size_sum[$__rate_interval]))by(job,instance) / sum(rate(state_store_write_batch_size_count[$__rate_interval]))by(job,instance,table_id)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"shared_buffer - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"sum(rate(state_store_write_batch_size_sum[$__rate_interval]))by(job,instance) / sum(rate(state_store_write_batch_size_count[$__rate_interval]))by(job,instance,table_id)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(compactor_shared_buffer_to_sstable_size[$__rate_interval]))by(job,instance) / sum(rate(state_store_shared_buffer_to_sstable_size_count[$__rate_interval]))by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"sync - {{job}} @ {{instance}}","metric":"","query":"sum(rate(compactor_shared_buffer_to_sstable_size[$__rate_interval]))by(job,instance) / sum(rate(state_store_shared_buffer_to_sstable_size_count[$__rate_interval]))by(job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Write Throughput","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":142},"height":null,"hideTimeOverride":false,"id":78,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(state_store_sync_size_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p50 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(state_store_sync_size_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_sync_size_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_sync_size_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_sync_size_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_sync_size_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_sync_size_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_sync_size_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance) (rate(state_store_sync_size_sum[$__rate_interval])) / sum by(le, job, instance) (rate(state_store_sync_size_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"avg - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance) (rate(state_store_sync_size_sum[$__rate_interval])) / sum by(le, job, instance) (rate(state_store_sync_size_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Checkpoint Sync Size","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":150},"height":null,"hideTimeOverride":false,"id":79,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"avg(state_store_meta_cache_size) by (job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"meta cache - {{job}} @ {{instance}}","metric":"","query":"avg(state_store_meta_cache_size) by (job,instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"avg(state_store_block_cache_size) by (job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"data cache - {{job}} @ {{instance}}","metric":"","query":"avg(state_store_block_cache_size) by (job,instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(state_store_limit_memory_size) by (job)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"uploading memory - {{job}}","metric":"","query":"sum(state_store_limit_memory_size) by (job)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Cache Size","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":150},"height":null,"hideTimeOverride":false,"id":80,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(batch_row_seq_scan_next_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"row_seq_scan next p50 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(batch_row_seq_scan_next_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(batch_row_seq_scan_next_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"row_seq_scan next p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(batch_row_seq_scan_next_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(batch_row_seq_scan_next_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"row_seq_scan next p99 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(batch_row_seq_scan_next_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(batch_row_seq_scan_next_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"row_seq_scan next pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(batch_row_seq_scan_next_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance) (rate(batch_row_seq_scan_next_duration_sum[$__rate_interval])) / sum by(le, job, instance) (rate(batch_row_seq_scan_next_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"row_seq_scan next avg - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance) (rate(batch_row_seq_scan_next_duration_sum[$__rate_interval])) / sum by(le, job, instance) (rate(batch_row_seq_scan_next_duration_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Row SeqScan Next Duration","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":158},"height":null,"hideTimeOverride":false,"id":81,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(state_store_iter_fetch_meta_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"fetch_meta_duration p50 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(state_store_iter_fetch_meta_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_iter_fetch_meta_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"fetch_meta_duration p90 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_iter_fetch_meta_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(state_store_iter_fetch_meta_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"fetch_meta_duration p99 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(state_store_iter_fetch_meta_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_iter_fetch_meta_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"fetch_meta_duration pmax - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_iter_fetch_meta_duration_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance, table_id) (rate(state_store_iter_fetch_meta_duration_sum[$__rate_interval])) / sum by(le, job, instance, table_id) (rate(state_store_iter_fetch_meta_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"fetch_meta_duration avg - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance, table_id) (rate(state_store_iter_fetch_meta_duration_sum[$__rate_interval])) / sum by(le, job, instance, table_id) (rate(state_store_iter_fetch_meta_duration_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Fetch Meta Duration","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":166},"height":null,"hideTimeOverride":false,"id":82,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"num of SSTs in each level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":83,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(storage_level_sst_num) by (instance, level_index)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}}","metric":"","query":"sum(storage_level_sst_num) by (instance, level_index)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"SST Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"KBs total file bytes in each level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"kbytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":84,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(storage_level_total_file_size) by (instance, level_index)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}}","metric":"","query":"sum(storage_level_total_file_size) by (instance, level_index)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"KBs level sst","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"num of compactions from each level to next level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":8},"height":null,"hideTimeOverride":false,"id":85,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(storage_level_compact_frequency) by (compactor, group, task_type, result)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{task_type}} - {{result}} - group-{{group}} @ {{compactor}}","metric":"","query":"sum(storage_level_compact_frequency) by (compactor, group, task_type, result)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compaction Success & Failure Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"num of compaction task which does not trigger","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":8},"height":null,"hideTimeOverride":false,"id":86,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_skip_compact_frequency[$__rate_interval])) by (level, type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{level}}-{{type}}","metric":"","query":"sum(rate(storage_skip_compact_frequency[$__rate_interval])) by (level, type)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compaction Skip Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"num of compactions from each level to next level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":16},"height":null,"hideTimeOverride":false,"id":87,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"avg(storage_compact_task_pending_num) by(job, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"compactor_task_split_count - {{job}} @ {{instance}}","metric":"","query":"avg(storage_compact_task_pending_num) by(job, instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compactor Running Task Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"Total time of compact that have been issued to state store","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":16},"height":null,"hideTimeOverride":false,"id":88,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(compactor_compact_task_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"compact-task p50 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(compactor_compact_task_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(compactor_compact_task_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"compact-task p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(compactor_compact_task_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(compactor_compact_task_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"compact-task pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(compactor_compact_task_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(compactor_compact_sst_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"compact-key-range p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(compactor_compact_sst_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(compactor_compact_sst_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"compact-key-range pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(compactor_compact_sst_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(compactor_get_table_id_total_time_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"get-table-id p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(compactor_get_table_id_total_time_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(compactor_get_table_id_total_time_duration_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"get-table-id pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(compactor_get_table_id_total_time_duration_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(compactor_remote_read_time_per_task_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"remote-io p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(compactor_remote_read_time_per_task_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(compactor_remote_read_time_per_task_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"remote-io pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(compactor_remote_read_time_per_task_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le)(rate(compactor_compact_task_duration_sum[$__rate_interval])) / sum by(le)(rate(compactor_compact_task_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"compact-task avg","metric":"","query":"sum by(le)(rate(compactor_compact_task_duration_sum[$__rate_interval])) / sum by(le)(rate(compactor_compact_task_duration_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le)(rate(state_store_compact_sst_duration_sum[$__rate_interval])) / sum by(le)(rate(state_store_compact_sst_duration_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"compact-key-range avg","metric":"","query":"sum by(le)(rate(state_store_compact_sst_duration_sum[$__rate_interval])) / sum by(le)(rate(state_store_compact_sst_duration_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compaction Duration","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"KBs read from next level during history compactions to next level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":24},"height":null,"hideTimeOverride":false,"id":89,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_level_compact_read_next[$__rate_interval])) by(job,instance) + sum(rate(storage_level_compact_read_curr[$__rate_interval])) by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"read - {{job}} @ {{instance}}","metric":"","query":"sum(rate(storage_level_compact_read_next[$__rate_interval])) by(job,instance) + sum(rate(storage_level_compact_read_curr[$__rate_interval])) by(job,instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_level_compact_write[$__rate_interval])) by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write - {{job}} @ {{instance}}","metric":"","query":"sum(rate(storage_level_compact_write[$__rate_interval])) by(job,instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(compactor_write_build_l0_bytes[$__rate_interval]))by (job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"flush - {{job}} @ {{instance}}","metric":"","query":"sum(rate(compactor_write_build_l0_bytes[$__rate_interval]))by (job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compaction Throughput","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"num of SSTs written into next level during history compactions to next level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":24},"height":null,"hideTimeOverride":false,"id":90,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(storage_level_compact_write) by (job)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write - {{job}}","metric":"","query":"sum(storage_level_compact_write) by (job)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(compactor_write_build_l0_bytes) by (job)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"flush - {{job}}","metric":"","query":"sum(compactor_write_build_l0_bytes) by (job)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compaction Write Bytes","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"num of SSTs written into next level during history compactions to next level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":32},"height":null,"hideTimeOverride":false,"id":91,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(storage_level_compact_write) / sum(state_store_write_build_l0_bytes)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write amplification","metric":"","query":"sum(storage_level_compact_write) / sum(state_store_write_build_l0_bytes)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compaction Write Amplification","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"num of SSTs to be merged to next level in each level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":32},"height":null,"hideTimeOverride":false,"id":92,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_level_compact_cnt","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}}","metric":"","query":"storage_level_compact_cnt","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compacting SST Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":40},"height":null,"hideTimeOverride":false,"id":93,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_level_compact_read_next[$__rate_interval])) by (le, level_index)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}} read","metric":"","query":"sum(rate(storage_level_compact_read_next[$__rate_interval])) by (le, level_index)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"KBs Read from Next Level","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":40},"height":null,"hideTimeOverride":false,"id":94,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_level_compact_read_curr[$__rate_interval])) by (le, level_index)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}} read","metric":"","query":"sum(rate(storage_level_compact_read_curr[$__rate_interval])) by (le, level_index)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"KBs Read from Current Level","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":48},"height":null,"hideTimeOverride":false,"id":95,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_level_compact_read_sstn_curr[$__rate_interval])) by (le, level_index)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}} read","metric":"","query":"sum(rate(storage_level_compact_read_sstn_curr[$__rate_interval])) by (le, level_index)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Count of SSTs Read from Current Level","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":48},"height":null,"hideTimeOverride":false,"id":96,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_level_compact_write[$__rate_interval])) by (le, level_index)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}} write","metric":"","query":"sum(rate(storage_level_compact_write[$__rate_interval])) by (le, level_index)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"KBs Written to Next Level","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":56},"height":null,"hideTimeOverride":false,"id":97,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_level_compact_write_sstn[$__rate_interval])) by (le, level_index)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}} write","metric":"","query":"sum(rate(storage_level_compact_write_sstn[$__rate_interval])) by (le, level_index)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Count of SSTs Written to Next Level","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"num of SSTs read from next level during history compactions to next level","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":56},"height":null,"hideTimeOverride":false,"id":98,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(storage_level_compact_read_sstn_next[$__rate_interval])) by (le, level_index)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"L{{level_index}} read","metric":"","query":"sum(rate(storage_level_compact_read_sstn_next[$__rate_interval])) by (le, level_index)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Count of SSTs Read from Next Level","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"Total bytes gotten from sstable_bloom_filter, for observing bloom_filter size","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":64},"height":null,"hideTimeOverride":false,"id":99,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance)(rate(compactor_sstable_bloom_filter_size_sum[$__rate_interval])) / sum by(le, job, instance)(rate(compactor_sstable_bloom_filter_size_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"avg_meta - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance)(rate(compactor_sstable_bloom_filter_size_sum[$__rate_interval])) / sum by(le, job, instance)(rate(compactor_sstable_bloom_filter_size_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance)(rate(compactor_sstable_file_size_sum[$__rate_interval])) / sum by(le, job, instance)(rate(compactor_sstable_file_size_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"avg_file - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance)(rate(compactor_sstable_file_size_sum[$__rate_interval])) / sum by(le, job, instance)(rate(compactor_sstable_file_size_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Hummock Sstable Size","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"Total bytes gotten from sstable_avg_key_size, for observing sstable_avg_key_size","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":64},"height":null,"hideTimeOverride":false,"id":100,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance)(rate(compactor_sstable_avg_key_size_sum[$__rate_interval])) / sum by(le, job, instance)(rate(state_store_sstable_avg_key_size_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"avg_key_size - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance)(rate(compactor_sstable_avg_key_size_sum[$__rate_interval])) / sum by(le, job, instance)(rate(state_store_sstable_avg_key_size_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, job, instance)(rate(compactor_sstable_avg_value_size_sum[$__rate_interval])) / sum by(le, job, instance)(rate(compactor_sstable_avg_value_size_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"avg_value_size - {{job}} @ {{instance}}","metric":"","query":"sum by(le, job, instance)(rate(compactor_sstable_avg_value_size_sum[$__rate_interval])) / sum by(le, job, instance)(rate(compactor_sstable_avg_value_size_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Hummock Sstable Item Size","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"Total time of operations which read from remote storage when enable prefetch","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":72},"height":null,"hideTimeOverride":false,"id":101,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(state_store_remote_read_time_per_task_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"remote-io p90 - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(state_store_remote_read_time_per_task_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(state_store_remote_read_time_per_task_bucket[$__rate_interval])) by (le, job, instance, table_id))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"remote-io pmax - {{table_id}} @ {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(state_store_remote_read_time_per_task_bucket[$__rate_interval])) by (le, job, instance, table_id))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Hummock Remote Read Duration","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":72},"height":null,"hideTimeOverride":false,"id":102,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(compactor_iter_scan_key_counts[$__rate_interval])) by (instance, type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"iter keys flow - {{type}} @ {{instance}} ","metric":"","query":"sum(rate(compactor_iter_scan_key_counts[$__rate_interval])) by (instance, type)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Compactor Iter keys","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Compaction","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":167},"height":null,"hideTimeOverride":false,"id":103,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":104,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(object_store_read_bytes[$__rate_interval]))by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"read - {{job}} @ {{instance}}","metric":"","query":"sum(rate(object_store_read_bytes[$__rate_interval]))by(job,instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(object_store_write_bytes[$__rate_interval]))by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"write - {{job}} @ {{instance}}","metric":"","query":"sum(rate(object_store_write_bytes[$__rate_interval]))by(job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Operation Throughput","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":105,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(object_store_operation_latency_bucket[$__rate_interval])) by (le, type, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} p50 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(object_store_operation_latency_bucket[$__rate_interval])) by (le, type, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(object_store_operation_latency_bucket[$__rate_interval])) by (le, type, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(object_store_operation_latency_bucket[$__rate_interval])) by (le, type, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(object_store_operation_latency_bucket[$__rate_interval])) by (le, type, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} p99 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(object_store_operation_latency_bucket[$__rate_interval])) by (le, type, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(object_store_operation_latency_bucket[$__rate_interval])) by (le, type, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(object_store_operation_latency_bucket[$__rate_interval])) by (le, type, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum by(le, type)(rate(object_store_operation_latency_sum[$__rate_interval])) / sum by(le, type) (rate(object_store_operation_latency_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} avg","metric":"","query":"sum by(le, type)(rate(object_store_operation_latency_sum[$__rate_interval])) / sum by(le, type) (rate(object_store_operation_latency_count[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Operation Duration","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":8},"height":null,"hideTimeOverride":false,"id":106,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(object_store_operation_latency_count[$__rate_interval])) by (le, type, job, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} - {{job}} @ {{instance}}","metric":"","query":"sum(rate(object_store_operation_latency_count[$__rate_interval])) by (le, type, job, instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(object_store_operation_latency_count{type=~'upload|delete'}[$__rate_interval])) by (le, media_type, job, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{media_type}}-write - {{job}} @ {{instance}}","metric":"","query":"sum(rate(object_store_operation_latency_count{type=~'upload|delete'}[$__rate_interval])) by (le, media_type, job, instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(object_store_operation_latency_count{type=~'read|readv|list|metadata'}[$__rate_interval])) by (le, media_type, job, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{media_type}}-read - {{job}} @ {{instance}}","metric":"","query":"sum(rate(object_store_operation_latency_count{type=~'read|readv|list|metadata'}[$__rate_interval])) by (le, media_type, job, instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Operation Rate","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":8},"height":null,"hideTimeOverride":false,"id":107,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(object_store_operation_bytes_bucket[$__rate_interval])) by (le, type, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} p50 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(object_store_operation_bytes_bucket[$__rate_interval])) by (le, type, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(object_store_operation_bytes_bucket[$__rate_interval])) by (le, type, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(object_store_operation_bytes_bucket[$__rate_interval])) by (le, type, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(object_store_operation_bytes_bucket[$__rate_interval])) by (le, type, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} p99 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(object_store_operation_bytes_bucket[$__rate_interval])) by (le, type, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(object_store_operation_bytes_bucket[$__rate_interval])) by (le, type, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} pmax - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(object_store_operation_bytes_bucket[$__rate_interval])) by (le, type, job, instance))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Operation Size","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":16},"height":null,"hideTimeOverride":false,"id":108,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(object_store_failure_count[$__rate_interval])) by (instance, job, type)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"{{type}} - {{job}} @ {{instance}}","metric":"","query":"sum(rate(object_store_failure_count[$__rate_interval])) by (instance, job, type)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Operation Failure Rate","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"$"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":16},"height":null,"hideTimeOverride":false,"id":109,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(object_store_read_bytes) * 0.01 / 1000 / 1000 / 1000","format":"time_series","hide":true,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"(Cross Region) Data Transfer Cost","metric":"","query":"sum(object_store_read_bytes) * 0.01 / 1000 / 1000 / 1000","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(object_store_operation_latency_count{type=~'read|streaming_read_start|delete'}) * 0.0004 / 1000","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GET, SELECT, and all other Requests Cost","metric":"","query":"sum(object_store_operation_latency_count{type=~'read|streaming_read_start|delete'}) * 0.0004 / 1000","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(object_store_operation_latency_count{type=~'upload|streaming_upload_start|s3_upload_part|streaming_upload_finish|delete_objects|list'}) * 0.005 / 1000","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"PUT, COPY, POST, LIST Requests Cost","metric":"","query":"sum(object_store_operation_latency_count{type=~'upload|streaming_upload_start|s3_upload_part|streaming_upload_finish|delete_objects|list'}) * 0.005 / 1000","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Estimated S3 Cost (Realtime)","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"$"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":24},"height":null,"hideTimeOverride":false,"id":110,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(storage_level_total_file_size) by (instance) * 0.023 / 1000 / 1000","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Monthly Storage Cost","metric":"","query":"sum(storage_level_total_file_size) by (instance) * 0.023 / 1000 / 1000","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Estimated S3 Cost (Monthly)","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Object Storage","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":168},"height":null,"hideTimeOverride":false,"id":111,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":112,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(file_cache_latency_count[$__rate_interval])) by (op, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"file cache {{op}} @ {{instance}}","metric":"","query":"sum(rate(file_cache_latency_count[$__rate_interval])) by (op, instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(file_cache_miss[$__rate_interval])) by (instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"file cache miss @ {{instance}}","metric":"","query":"sum(rate(file_cache_miss[$__rate_interval])) by (instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(file_cache_disk_latency_count[$__rate_interval])) by (op, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"file cache disk {{op}} @ {{instance}}","metric":"","query":"sum(rate(file_cache_disk_latency_count[$__rate_interval])) by (op, instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Ops","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":113,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(file_cache_latency_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p50 - file cache - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(file_cache_latency_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(file_cache_latency_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - file cache - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(file_cache_latency_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(file_cache_latency_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - file cache - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(file_cache_latency_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(file_cache_latency_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - file cache - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(file_cache_latency_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(file_cache_disk_latency_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p50 - file cache disk - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(file_cache_disk_latency_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(file_cache_disk_latency_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - file cache disk - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(file_cache_disk_latency_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(file_cache_disk_latency_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - file cache disk - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(file_cache_disk_latency_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(file_cache_disk_latency_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - file cache disk - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(file_cache_disk_latency_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Bps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":8},"height":null,"hideTimeOverride":false,"id":114,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(rate(file_cache_disk_bytes[$__rate_interval])) by (op, instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"disk {{op}} @ {{instance}}","metric":"","query":"sum(rate(file_cache_disk_bytes[$__rate_interval])) by (op, instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Throughput","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":8},"height":null,"hideTimeOverride":false,"id":115,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(file_cache_disk_io_size_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p50 - file cache disk - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(file_cache_disk_io_size_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(file_cache_disk_io_size_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - file cache disk - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(file_cache_disk_io_size_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(file_cache_disk_io_size_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - file cache disk - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(file_cache_disk_io_size_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(file_cache_disk_io_size_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - file cache disk - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(file_cache_disk_io_size_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(file_cache_disk_read_entry_size_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p50 - file cache disk read entry - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(file_cache_disk_read_entry_size_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(file_cache_disk_read_entry_size_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - file cache disk read entry - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(file_cache_disk_read_entry_size_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(file_cache_disk_read_entry_size_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - file cache disk read entry - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(rate(file_cache_disk_read_entry_size_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(file_cache_disk_read_entry_size_bucket[$__rate_interval])) by (le, op, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pmax - file cache disk read entry - {{op}} @ {{instance}}","metric":"","query":"histogram_quantile(1.0, sum(rate(file_cache_disk_read_entry_size_bucket[$__rate_interval])) by (le, op, instance))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Disk IO Size","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Hummock Tiered Cache","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":169},"height":null,"hideTimeOverride":false,"id":116,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":117,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(hummock_manager_lock_time_bucket[$__rate_interval])) by (le, lock_name, lock_type))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Lock Time p50 - {{lock_type}} @ {{lock_name}}","metric":"","query":"histogram_quantile(0.5, sum(rate(hummock_manager_lock_time_bucket[$__rate_interval])) by (le, lock_name, lock_type))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(hummock_manager_lock_time_bucket[$__rate_interval])) by (le, lock_name, lock_type))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Lock Time p99 - {{lock_type}} @ {{lock_name}}","metric":"","query":"histogram_quantile(0.99, sum(rate(hummock_manager_lock_time_bucket[$__rate_interval])) by (le, lock_name, lock_type))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(hummock_manager_lock_time_bucket[$__rate_interval])) by (le, lock_name, lock_type))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Lock Time p999 - {{lock_type}} @ {{lock_name}}","metric":"","query":"histogram_quantile(0.999, sum(rate(hummock_manager_lock_time_bucket[$__rate_interval])) by (le, lock_name, lock_type))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(hummock_manager_lock_time_bucket[$__rate_interval])) by (le, lock_name, lock_type))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Lock Time pmax - {{lock_type}} @ {{lock_name}}","metric":"","query":"histogram_quantile(1.0, sum(rate(hummock_manager_lock_time_bucket[$__rate_interval])) by (le, lock_name, lock_type))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Lock Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":118,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(meta_hummock_manager_real_process_time_bucket[$__rate_interval])) by (le, method))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Real Process Time p50 - {{method}}","metric":"","query":"histogram_quantile(0.5, sum(rate(meta_hummock_manager_real_process_time_bucket[$__rate_interval])) by (le, method))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(meta_hummock_manager_real_process_time_bucket[$__rate_interval])) by (le, method))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Real Process Time p99 - {{method}}","metric":"","query":"histogram_quantile(0.99, sum(rate(meta_hummock_manager_real_process_time_bucket[$__rate_interval])) by (le, method))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(meta_hummock_manager_real_process_time_bucket[$__rate_interval])) by (le, method))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Real Process Time p999 - {{method}}","metric":"","query":"histogram_quantile(0.999, sum(rate(meta_hummock_manager_real_process_time_bucket[$__rate_interval])) by (le, method))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(meta_hummock_manager_real_process_time_bucket[$__rate_interval])) by (le, method))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Real Process Time pmax - {{method}}","metric":"","query":"histogram_quantile(1.0, sum(rate(meta_hummock_manager_real_process_time_bucket[$__rate_interval])) by (le, method))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Real Process Time","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":8},"height":null,"hideTimeOverride":false,"id":119,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_version_size","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"version size","metric":"","query":"storage_version_size","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Version Size","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":8},"height":null,"hideTimeOverride":false,"id":120,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_current_version_id","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"current version id","metric":"","query":"storage_current_version_id","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_checkpoint_version_id","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"checkpoint version id","metric":"","query":"storage_checkpoint_version_id","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_min_pinned_version_id","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"min pinned version id","metric":"","query":"storage_min_pinned_version_id","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_min_safepoint_version_id","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"min safepoint version id","metric":"","query":"storage_min_safepoint_version_id","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Version Id","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":16},"height":null,"hideTimeOverride":false,"id":121,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_max_committed_epoch","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"max committed epoch","metric":"","query":"storage_max_committed_epoch","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_safe_epoch","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"safe epoch","metric":"","query":"storage_safe_epoch","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_min_pinned_epoch","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"min pinned epoch","metric":"","query":"storage_min_pinned_epoch","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Epoch","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"kbytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":16},"height":null,"hideTimeOverride":false,"id":122,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_version_stats{metric='total_key_size'}/1024","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"table{{table_id}} {{metric}}","metric":"","query":"storage_version_stats{metric='total_key_size'}/1024","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_version_stats{metric='total_value_size'}/1024","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"table{{table_id}} {{metric}}","metric":"","query":"storage_version_stats{metric='total_value_size'}/1024","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Table KV Size","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":24},"height":null,"hideTimeOverride":false,"id":123,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_version_stats{metric='total_key_count'}","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"table{{table_id}} {{metric}}","metric":"","query":"storage_version_stats{metric='total_key_count'}","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Table KV Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"total number of SSTs that is no longer referenced by versions but is not yet deleted from storage","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":24},"height":null,"hideTimeOverride":false,"id":124,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"storage_stale_ssts_count","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"stale SST total number","metric":"","query":"storage_stale_ssts_count","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Stale SST Total Number","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Hummock Manager","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":170},"height":null,"hideTimeOverride":false,"id":125,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":126,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"backup_job_count","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"job count","metric":"","query":"backup_job_count","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Job Count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":127,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(backup_job_latency_bucket[$__rate_interval])) by (le, state))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Job Process Time p50 - {{state}}","metric":"","query":"histogram_quantile(0.5, sum(rate(backup_job_latency_bucket[$__rate_interval])) by (le, state))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(rate(backup_job_latency_bucket[$__rate_interval])) by (le, state))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Job Process Time p99 - {{state}}","metric":"","query":"histogram_quantile(0.99, sum(rate(backup_job_latency_bucket[$__rate_interval])) by (le, state))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.999, sum(rate(backup_job_latency_bucket[$__rate_interval])) by (le, state))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Job Process Time p999 - {{state}}","metric":"","query":"histogram_quantile(0.999, sum(rate(backup_job_latency_bucket[$__rate_interval])) by (le, state))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(1.0, sum(rate(backup_job_latency_bucket[$__rate_interval])) by (le, state))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Job Process Time pmax - {{state}}","metric":"","query":"histogram_quantile(1.0, sum(rate(backup_job_latency_bucket[$__rate_interval])) by (le, state))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Job Process Time","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Backup Manager","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":171},"height":null,"hideTimeOverride":false,"id":128,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":129,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Create'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Create_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Create'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Create'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Create_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Create'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Create'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Create_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Create'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.CatalogService/Create'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.CatalogService/Create'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Create_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.CatalogService/Create'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.CatalogService/Create'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Create latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":0},"height":null,"hideTimeOverride":false,"id":130,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Drop'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Drop_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Drop'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Drop'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Drop_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Drop'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Drop'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Drop_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/Drop'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.CatalogService/Drop'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.CatalogService/Drop'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Drop_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.CatalogService/Drop'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.CatalogService/Drop'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Drop latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":16,"y":0},"height":null,"hideTimeOverride":false,"id":131,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/GetCatalog'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GetCatalog_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/GetCatalog'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/GetCatalog'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GetCatalog_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/GetCatalog'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/GetCatalog'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GetCatalog_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.CatalogService/GetCatalog'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.CatalogService/GetCatalog'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.CatalogService/GetCatalog'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GetCatalog_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.CatalogService/GetCatalog'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.CatalogService/GetCatalog'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"GetCatalog latency","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"gRPC Meta: Catalog Service","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":172},"height":null,"hideTimeOverride":false,"id":132,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":133,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"AddWorkerNode_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"AddWorkerNode_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"AddWorkerNode_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"AddWorkerNode_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.ClusterService/AddWorkerNode'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"AddWorkerNode latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":0},"height":null,"hideTimeOverride":false,"id":134,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"ListAllNodes_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"ListAllNodes_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"ListAllNodes_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"ListAllNodes_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.ClusterService/ListAllNodes'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"ListAllNodes latency","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"gRPC Meta: Cluster Service","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":173},"height":null,"hideTimeOverride":false,"id":135,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":136,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"CreateMaterializedView_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"CreateMaterializedView_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"CreateMaterializedView_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"CreateMaterializedView_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.StreamManagerService/CreateMaterializedView'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"CreateMaterializedView latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":0},"height":null,"hideTimeOverride":false,"id":137,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"DropMaterializedView_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"DropMaterializedView_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"DropMaterializedView_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"DropMaterializedView_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.StreamManagerService/DropMaterializedView'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"DropMaterializedView latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":16,"y":0},"height":null,"hideTimeOverride":false,"id":138,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/Flush'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Flush_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/Flush'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/Flush'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Flush_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/Flush'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/Flush'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Flush_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.StreamManagerService/Flush'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.StreamManagerService/Flush'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.StreamManagerService/Flush'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"Flush_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.StreamManagerService/Flush'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.StreamManagerService/Flush'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Flush latency","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"gRPC Meta: Stream Manager","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":174},"height":null,"hideTimeOverride":false,"id":139,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":140,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"UnpinVersionBefore_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"UnpinVersionBefore_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"UnpinVersionBefore_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"UnpinVersionBefore_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.HummockManagerService/UnpinVersionBefore'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"UnpinVersionBefore latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":0},"height":null,"hideTimeOverride":false,"id":141,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"UnpinSnapshotBefore_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"UnpinSnapshotBefore_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"UnpinSnapshotBefore_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"UnpinSnapshotBefore_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.HummockManagerService/UnpinSnapshotBefore'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"UnpinSnapshotBefore latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":16,"y":0},"height":null,"hideTimeOverride":false,"id":142,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"ReportCompactionTasks_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"ReportCompactionTasks_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"ReportCompactionTasks_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"ReportCompactionTasks_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.HummockManagerService/ReportCompactionTasks'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"ReportCompactionTasks latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":8},"height":null,"hideTimeOverride":false,"id":143,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GetNewSstIds_p50","metric":"","query":"histogram_quantile(0.5, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GetNewSstIds_p90","metric":"","query":"histogram_quantile(0.9, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval])) by (le))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GetNewSstIds_p99","metric":"","query":"histogram_quantile(0.99, sum(irate(meta_grpc_duration_seconds_bucket{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval])) by (le))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"GetNewSstIds_avg","metric":"","query":"sum(irate(meta_grpc_duration_seconds_sum{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval])) / sum(irate(meta_grpc_duration_seconds_count{path='/meta.HummockManagerService/GetNewSstIds'}[$__rate_interval]))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"GetNewSstIds latency","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"gRPC Meta: Hummock Manager","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":175},"height":null,"hideTimeOverride":false,"id":144,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":145,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_report_compaction_task_counts[$__rate_interval])) by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"report_compaction_task_counts - {{instance}} ","metric":"","query":"sum(irate(state_store_report_compaction_task_counts[$__rate_interval])) by(job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"compaction_count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":146,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(state_store_unpin_version_before_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_version_before_latency_p50 - {{instance}} ","metric":"","query":"histogram_quantile(0.5, sum(irate(state_store_unpin_version_before_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(state_store_unpin_version_before_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_version_before_latency_p99 - {{instance}} ","metric":"","query":"histogram_quantile(0.99, sum(irate(state_store_unpin_version_before_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_unpin_version_before_latency_sum[$__rate_interval])) / sum(irate(state_store_unpin_version_before_latency_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_version_before_latency_avg","metric":"","query":"sum(irate(state_store_unpin_version_before_latency_sum[$__rate_interval])) / sum(irate(state_store_unpin_version_before_latency_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.90, sum(irate(state_store_unpin_version_before_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_version_before_latency_p90 - {{instance}} ","metric":"","query":"histogram_quantile(0.90, sum(irate(state_store_unpin_version_before_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"version_latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":8},"height":null,"hideTimeOverride":false,"id":147,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(state_store_pin_snapshot_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pin_snapshot_latency_p50 - {{instance}} ","metric":"","query":"histogram_quantile(0.5, sum(irate(state_store_pin_snapshot_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(state_store_pin_snapshot_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pin_snapshot_latency_p99 - {{instance}} ","metric":"","query":"histogram_quantile(0.99, sum(irate(state_store_pin_snapshot_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(irate(state_store_pin_snapshot_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pin_snapshot_latencyp90 - {{instance}} ","metric":"","query":"histogram_quantile(0.9, sum(irate(state_store_pin_snapshot_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_pin_snapshot_latency_sum[$__rate_interval])) / sum(irate(state_store_pin_snapshot_latency_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pin_snapshot_latency_avg","metric":"","query":"sum(irate(state_store_pin_snapshot_latency_sum[$__rate_interval])) / sum(irate(state_store_pin_snapshot_latency_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(state_store_unpin_version_snapshot_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_snapshot_latency_p50 - {{instance}} ","metric":"","query":"histogram_quantile(0.5, sum(irate(state_store_unpin_version_snapshot_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(state_store_unpin_version_snapshot_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_snapshot_latency_p99 - {{instance}} ","metric":"","query":"histogram_quantile(0.99, sum(irate(state_store_unpin_version_snapshot_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_unpin_snapshot_latency_sum[$__rate_interval])) / sum(irate(state_store_unpin_snapshot_latency_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_snapshot_latency_avg","metric":"","query":"sum(irate(state_store_unpin_snapshot_latency_sum[$__rate_interval])) / sum(irate(state_store_unpin_snapshot_latency_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.90, sum(irate(state_store_unpin_snapshot_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_snapshot_latency_p90 - {{instance}} ","metric":"","query":"histogram_quantile(0.90, sum(irate(state_store_unpin_snapshot_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"snapshot_latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":8},"height":null,"hideTimeOverride":false,"id":148,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_pin_snapshot_counts[$__rate_interval])) by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"pin_snapshot_counts - {{instance}} ","metric":"","query":"sum(irate(state_store_pin_snapshot_counts[$__rate_interval])) by(job,instance)","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_unpin_snapshot_counts[$__rate_interval])) by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"unpin_snapshot_counts - {{instance}} ","metric":"","query":"sum(irate(state_store_unpin_snapshot_counts[$__rate_interval])) by(job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"snapshot_count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":16},"height":null,"hideTimeOverride":false,"id":149,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(state_store_get_new_sst_ids_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"get_new_sst_ids_latency_latency_p50 - {{instance}} ","metric":"","query":"histogram_quantile(0.5, sum(irate(state_store_get_new_sst_ids_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(state_store_get_new_sst_ids_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"get_new_sst_ids_latency_latency_p99 - {{instance}} ","metric":"","query":"histogram_quantile(0.99, sum(irate(state_store_get_new_sst_ids_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_get_new_sst_ids_latency_sum[$__rate_interval])) / sum(irate(state_store_get_new_sst_ids_latency_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"get_new_sst_ids_latency_latency_avg","metric":"","query":"sum(irate(state_store_get_new_sst_ids_latency_sum[$__rate_interval])) / sum(irate(state_store_get_new_sst_ids_latency_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.90, sum(irate(state_store_get_new_sst_ids_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"get_new_sst_ids_latency_latency_p90 - {{instance}} ","metric":"","query":"histogram_quantile(0.90, sum(irate(state_store_get_new_sst_ids_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"table_latency","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":16},"height":null,"hideTimeOverride":false,"id":150,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_get_new_sst_ids_latency_counts[$__rate_interval]))by(job,instance)","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"get_new_sst_ids_latency_counts - {{instance}} ","metric":"","query":"sum(irate(state_store_get_new_sst_ids_latency_counts[$__rate_interval]))by(job,instance)","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"table_count","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":24},"height":null,"hideTimeOverride":false,"id":151,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(irate(state_store_report_compaction_task_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"report_compaction_task_latency_p50 - {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(irate(state_store_report_compaction_task_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.99, sum(irate(state_store_report_compaction_task_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"report_compaction_task_latency_p99 - {{instance}}","metric":"","query":"histogram_quantile(0.99, sum(irate(state_store_report_compaction_task_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"sum(irate(state_store_report_compaction_task_latency_sum[$__rate_interval])) / sum(irate(state_store_report_compaction_task_latency_count[$__rate_interval]))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"report_compaction_task_latency_avg","metric":"","query":"sum(irate(state_store_report_compaction_task_latency_sum[$__rate_interval])) / sum(irate(state_store_report_compaction_task_latency_count[$__rate_interval]))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.90, sum(irate(state_store_report_compaction_task_latency_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"report_compaction_task_latency_p90 - {{instance}}","metric":"","query":"histogram_quantile(0.90, sum(irate(state_store_report_compaction_task_latency_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"compaction_latency","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"gRPC: Hummock Meta Client","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":176},"height":null,"hideTimeOverride":false,"id":152,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"Qps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":153,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(frontend_query_counter_local_execution[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"","metric":"","query":"rate(frontend_query_counter_local_execution[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Query Per second in Loacl Execution Mode","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":154,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.5, sum(rate(frontend_latency_local_execution_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p50 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.5, sum(rate(frontend_latency_local_execution_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.9, sum(rate(frontend_latency_local_execution_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p90 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.9, sum(rate(frontend_latency_local_execution_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"histogram_quantile(0.95, sum(rate(frontend_latency_local_execution_bucket[$__rate_interval])) by (le, job, instance))","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"p99 - {{job}} @ {{instance}}","metric":"","query":"histogram_quantile(0.95, sum(rate(frontend_latency_local_execution_bucket[$__rate_interval])) by (le, job, instance))","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"Query Latency in Local Execution Mode","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Frontend","transformations":[],"transparent":false,"type":"row"},{"cacheTimeout":null,"collapsed":true,"datasource":null,"description":null,"editable":true,"error":false,"fieldConfig":{"defaults":{"thresholds":{"mode":"absolute","steps":[]}}},"gridPos":{"h":1,"w":24,"x":0,"y":177},"height":null,"hideTimeOverride":false,"id":155,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"panels":[{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":0},"height":null,"hideTimeOverride":false,"id":156,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"rate(lru_runtime_loop_count[$__rate_interval])","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"","metric":"","query":"rate(lru_runtime_loop_count[$__rate_interval])","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"LRU manager loop count per sec","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":0},"height":null,"hideTimeOverride":false,"id":157,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"lru_watermark_step","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"","metric":"","query":"lru_watermark_step","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"LRU manager watermark steps","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":""},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":8},"height":null,"hideTimeOverride":false,"id":158,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":["mean"],"displayMode":"table","placement":"right"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"lru_current_watermark_time_ms","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"","metric":"","query":"lru_current_watermark_time_ms","refId":"","step":10,"target":""},{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"lru_physical_now_ms","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"","metric":"","query":"lru_physical_now_ms","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"LRU manager watermark_time and physical_now","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":8},"height":null,"hideTimeOverride":false,"id":159,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"jemalloc_allocated_bytes","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"","metric":"","query":"jemalloc_allocated_bytes","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"The memory allocated by jemalloc","transformations":[],"transparent":false,"type":"timeseries"},{"cacheTimeout":null,"datasource":null,"description":"","editable":true,"error":false,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"log":2,"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":16},"height":null,"hideTimeOverride":false,"id":160,"interval":null,"links":[],"maxDataPoints":100,"maxPerRow":null,"minSpan":null,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"repeat":null,"repeatDirection":null,"span":null,"targets":[{"datasource":{"type":"prometheus","uid":"risedev-prometheus"},"expr":"stream_total_mem_usage","format":"time_series","hide":false,"instant":false,"interval":"","intervalFactor":2,"legendFormat":"","metric":"","query":"stream_total_mem_usage","refId":"","step":10,"target":""}],"timeFrom":null,"timeShift":null,"title":"The memory allocated by streaming","transformations":[],"transparent":false,"type":"timeseries"}],"repeat":null,"repeatDirection":null,"span":null,"targets":[],"timeFrom":null,"timeShift":null,"title":"Memory manager","transformations":[],"transparent":false,"type":"row"}],"refresh":"10s","rows":[],"schemaVersion":12,"sharedCrosshair":true,"style":"dark","tags":["risingwave"],"templating":{"list":[]},"time":{"from":"now-30m","to":"now"},"timepicker":{"hidden":false,"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"risingwave_dashboard","uid":"Ecy3uV1nz","version":0} diff --git a/proto/batch_plan.proto b/proto/batch_plan.proto index 210be1d88d4f0..4a9202fd267f2 100644 --- a/proto/batch_plan.proto +++ b/proto/batch_plan.proto @@ -24,6 +24,12 @@ message RowSeqScanNode { common.Buffer vnode_bitmap = 4; // Whether the order on output columns should be preserved. bool ordered = 5; + + message ChunkSize { + uint32 chunk_size = 1; + } + // If along with `batch_limit`, `chunk_size` will be set. + ChunkSize chunk_size = 6; } message SysRowSeqScanNode { @@ -71,6 +77,8 @@ message FilterNode { message InsertNode { // Id of the table to perform inserting. uint32 table_id = 1; + // Version of the table. + uint64 table_version_id = 5; repeated uint32 column_indices = 2; // An optional field and will be `None` for tables without user-defined pk. // The `BatchInsertExecutor` should add a column with NULL value which will @@ -82,12 +90,16 @@ message InsertNode { message DeleteNode { // Id of the table to perform deleting. uint32 table_id = 1; + // Version of the table. + uint64 table_version_id = 3; bool returning = 2; } message UpdateNode { // Id of the table to perform updating. uint32 table_id = 1; + // Version of the table. + uint64 table_version_id = 4; repeated expr.ExprNode exprs = 2; bool returning = 3; } diff --git a/proto/buf.yaml b/proto/buf.yaml index 152bc6d602d47..6a4ea7ded34d2 100644 --- a/proto/buf.yaml +++ b/proto/buf.yaml @@ -1,4 +1,7 @@ version: v1 +breaking: + use: + - WIRE_JSON # https://docs.buf.build/breaking/rules lint: use: - DEFAULT diff --git a/proto/catalog.proto b/proto/catalog.proto index 9c08ebf646d4a..870d4562b74be 100644 --- a/proto/catalog.proto +++ b/proto/catalog.proto @@ -10,12 +10,19 @@ option java_package = "com.risingwave.proto"; option optimize_for = SPEED; // The rust prost library always treats uint64 as required and message as -// optional. In order to allow `row_id_index` as optional field in -// `StreamSourceInfo` and `TableSourceInfo`, we wrap uint64 inside this message. +// optional. In order to allow `row_id_index` as an optional field, we wrap +// uint64 inside this message. message ColumnIndex { uint64 index = 1; } +message WatermarkDesc { + // The column idx the watermark is on + uint32 watermark_idx = 1; + // The expression to calculate the watermark value. + expr.ExprNode expr = 2; +} + message StreamSourceInfo { plan_common.RowFormatType row_format = 1; string row_schema_location = 2; @@ -44,6 +51,16 @@ message Source { uint32 owner = 9; StreamSourceInfo info = 10; + // Define watermarks on the source. The `repeated` is just for forward + // compatibility, currently, only one watermark on the source + repeated WatermarkDesc watermark_descs = 11; +} + +enum SinkType { + UNSPECIFIED = 0; + APPEND_ONLY = 1; + FORCE_APPEND_ONLY = 2; + UPSERT = 3; } message Sink { @@ -57,7 +74,7 @@ message Sink { repeated int32 distribution_key = 8; // pk_indices of the corresponding materialize operator's output. repeated int32 stream_key = 9; - bool append_only = 10; + SinkType sink_type = 10; uint32 owner = 11; map properties = 12; string definition = 13; @@ -138,7 +155,7 @@ message Table { string definition = 21; bool handle_pk_conflict = 22; uint32 read_prefix_len_hint = 23; - + repeated int32 watermark_indices = 24; // Per-table catalog version, used by schema change. `None` for internal tables and tests. // Not to be confused with the global catalog version for notification service. TableVersion version = 100; diff --git a/proto/common.proto b/proto/common.proto index bfb07abcb10d5..e9d10e51d4e4c 100644 --- a/proto/common.proto +++ b/proto/common.proto @@ -63,9 +63,8 @@ message Buffer { // Vnode mapping for stream fragments. Stores mapping from virtual node to parallel unit id. message ParallelUnitMapping { - uint32 fragment_id = 1; - repeated uint64 original_indices = 2; - repeated uint32 data = 3; + repeated uint32 original_indices = 1; + repeated uint32 data = 2; } message BatchQueryEpoch { diff --git a/proto/data.proto b/proto/data.proto index 1d101f0a489b8..0db71cf446773 100644 --- a/proto/data.proto +++ b/proto/data.proto @@ -49,6 +49,7 @@ message DataType { STRUCT = 15; LIST = 16; BYTEA = 17; + JSONB = 18; } TypeName type_name = 1; // Data length for char. @@ -95,6 +96,7 @@ enum ArrayType { STRUCT = 13; LIST = 14; BYTEA = 15; + JSONB = 16; } message Array { diff --git a/proto/ddl_service.proto b/proto/ddl_service.proto index 7b2a868c83549..364166d6d111e 100644 --- a/proto/ddl_service.proto +++ b/proto/ddl_service.proto @@ -214,6 +214,27 @@ message ReplaceTablePlanResponse { uint64 version = 2; } +message GetTableRequest { + string database_name = 1; + string table_name = 2; +} + +message GetTableResponse { + catalog.Table table = 1; +} + +message GetDdlProgressRequest {} + +message DdlProgress { + uint64 id = 1; + string statement = 2; + string progress = 3; +} + +message GetDdlProgressResponse { + repeated DdlProgress ddl_progress = 1; +} + service DdlService { rpc CreateDatabase(CreateDatabaseRequest) returns (CreateDatabaseResponse); rpc DropDatabase(DropDatabaseRequest) returns (DropDatabaseResponse); @@ -235,4 +256,6 @@ service DdlService { rpc CreateFunction(CreateFunctionRequest) returns (CreateFunctionResponse); rpc DropFunction(DropFunctionRequest) returns (DropFunctionResponse); rpc ReplaceTablePlan(ReplaceTablePlanRequest) returns (ReplaceTablePlanResponse); + rpc GetTable(GetTableRequest) returns (GetTableResponse); + rpc GetDdlProgress(GetDdlProgressRequest) returns (GetDdlProgressResponse); } diff --git a/proto/expr.proto b/proto/expr.proto index e59a1c3e75da9..519a4835aa1b0 100644 --- a/proto/expr.proto +++ b/proto/expr.proto @@ -93,6 +93,8 @@ message ExprNode { BIT_LENGTH = 230; OVERLAY = 231; REGEXP_MATCH = 232; + POW = 233; + EXP = 234; // Boolean comparison IS_TRUE = 301; @@ -115,6 +117,7 @@ message ExprNode { ARRAY_CAT = 531; ARRAY_APPEND = 532; ARRAY_PREPEND = 533; + FORMAT_TYPE = 534; // Non-pure functions below (> 600) // ------------------------ @@ -203,6 +206,10 @@ message AggCall { ARRAY_AGG = 8; FIRST_VALUE = 9; SUM0 = 10; + VAR_POP = 11; + VAR_SAMP = 12; + STDDEV_POP = 13; + STDDEV_SAMP = 14; } message Arg { InputRefExpr input = 1; diff --git a/proto/hummock.proto b/proto/hummock.proto index 2271e66e9fdcd..e7f84b6ac7273 100644 --- a/proto/hummock.proto +++ b/proto/hummock.proto @@ -58,6 +58,12 @@ message GroupConstruct { // If `parent_group_id` is not 0, it means `parent_group_id` splits into `parent_group_id` and this group, so this group is not empty initially. uint64 parent_group_id = 2; repeated uint32 table_ids = 3; + uint64 group_id = 4; +} + +message GroupMetaChange { + repeated uint32 table_ids_add = 1; + repeated uint32 table_ids_remove = 2; } message GroupDestroy {} @@ -67,6 +73,7 @@ message GroupDelta { IntraLevelDelta intra_level = 1; GroupConstruct group_construct = 2; GroupDestroy group_destroy = 3; + GroupMetaChange group_meta_change = 4; } } @@ -79,6 +86,9 @@ message HummockVersion { message Levels { repeated Level levels = 1; OverlappingLevel l0 = 2; + uint64 group_id = 3; + uint64 parent_group_id = 4; + repeated uint32 member_table_ids = 5; } uint64 id = 1; // Levels of each compaction group @@ -117,16 +127,10 @@ message HummockSnapshot { uint64 current_epoch = 2; } -message PinVersionRequest { - uint32 context_id = 1; - uint64 last_pinned = 2; -} - -message PinVersionResponse { - common.Status status = 1; +message VersionUpdatePayload { oneof payload { - HummockVersionDeltas version_deltas = 2; - HummockVersion pinned_version = 3; + HummockVersionDeltas version_deltas = 1; + HummockVersion pinned_version = 2; } } @@ -250,6 +254,18 @@ message CompactTask { map table_options = 17; uint64 current_epoch_time = 18; uint64 target_sub_level_id = 19; + + enum TaskType { + TYPE_UNSPECIFIED = 0; + DYNAMIC = 1; + SPACE_RECLAIM = 2; + MANUAL = 3; + SHARED_BUFFER = 4; + TTL = 5; + } + + // Identifies whether the task is space_reclaim, if the compact_task_type increases, it will be refactored to enum + TaskType task_type = 20; } message LevelHandler { @@ -268,12 +284,19 @@ message CompactStatus { repeated LevelHandler level_handlers = 2; } +// Config info of compaction group. message CompactionGroup { + uint64 id = 1; + CompactionConfig compaction_config = 4; +} + +// Complete info of compaction group. +// The info is the aggregate of `HummockVersion` and `CompactionGroupConfig` +message CompactionGroupInfo { uint64 id = 1; uint64 parent_id = 2; repeated uint32 member_table_ids = 3; CompactionConfig compaction_config = 4; - map table_id_to_options = 5; } message CompactTaskAssignment { @@ -383,13 +406,6 @@ message ReportVacuumTaskResponse { common.Status status = 1; } -message GetCompactionGroupsRequest {} - -message GetCompactionGroupsResponse { - common.Status status = 1; - repeated CompactionGroup compaction_groups = 2; -} - message TriggerManualCompactionRequest { uint64 compaction_group_id = 1; KeyRange key_range = 2; @@ -452,7 +468,7 @@ message RiseCtlGetPinnedSnapshotsSummaryResponse { message InitMetadataForReplayRequest { repeated catalog.Table tables = 1; - repeated CompactionGroup compaction_groups = 2; + repeated CompactionGroupInfo compaction_groups = 2; } message InitMetadataForReplayResponse {} @@ -483,7 +499,7 @@ message RiseCtlListCompactionGroupRequest {} message RiseCtlListCompactionGroupResponse { common.Status status = 1; - repeated CompactionGroup compaction_groups = 2; + repeated CompactionGroupInfo compaction_groups = 2; } message RiseCtlUpdateCompactionConfigRequest { @@ -514,6 +530,14 @@ message SetCompactorRuntimeConfigRequest { message SetCompactorRuntimeConfigResponse {} +message PinVersionRequest { + uint32 context_id = 1; +} + +message PinVersionResponse { + HummockVersion pinned_version = 1; +} + service HummockManagerService { rpc UnpinVersionBefore(UnpinVersionBeforeRequest) returns (UnpinVersionBeforeResponse); rpc GetCurrentVersion(GetCurrentVersionRequest) returns (GetCurrentVersionResponse); @@ -532,7 +556,6 @@ service HummockManagerService { rpc GetNewSstIds(GetNewSstIdsRequest) returns (GetNewSstIdsResponse); rpc SubscribeCompactTasks(SubscribeCompactTasksRequest) returns (stream SubscribeCompactTasksResponse); rpc ReportVacuumTask(ReportVacuumTaskRequest) returns (ReportVacuumTaskResponse); - rpc GetCompactionGroups(GetCompactionGroupsRequest) returns (GetCompactionGroupsResponse); rpc TriggerManualCompaction(TriggerManualCompactionRequest) returns (TriggerManualCompactionResponse); rpc ReportFullScanTask(ReportFullScanTaskRequest) returns (ReportFullScanTaskResponse); rpc TriggerFullGC(TriggerFullGCRequest) returns (TriggerFullGCResponse); @@ -542,6 +565,7 @@ service HummockManagerService { rpc RiseCtlUpdateCompactionConfig(RiseCtlUpdateCompactionConfigRequest) returns (RiseCtlUpdateCompactionConfigResponse); rpc InitMetadataForReplay(InitMetadataForReplayRequest) returns (InitMetadataForReplayResponse); rpc SetCompactorRuntimeConfig(SetCompactorRuntimeConfigRequest) returns (SetCompactorRuntimeConfigResponse); + rpc PinVersion(PinVersionRequest) returns (PinVersionResponse); } message CompactionConfig { @@ -560,6 +584,7 @@ message CompactionConfig { uint64 target_file_size_base = 10; uint32 compaction_filter_mask = 11; uint32 max_sub_compaction = 12; + uint64 max_space_reclaim_bytes = 13; } message TableStats { diff --git a/proto/java_binding.proto b/proto/java_binding.proto new file mode 100644 index 0000000000000..017655963f256 --- /dev/null +++ b/proto/java_binding.proto @@ -0,0 +1,35 @@ +syntax = "proto3"; + +package java_binding; + +import "catalog.proto"; +import "hummock.proto"; + +option java_package = "com.risingwave.proto"; +option optimize_for = SPEED; + +// When `left` or `right` is none, it represents unbounded. +message KeyRange { + enum Bound { + UNSPECIFIED = 0; + UNBOUNDED = 1; + INCLUDED = 2; + EXCLUDED = 3; + } + bytes left = 1; + bytes right = 2; + Bound left_bound = 3; + Bound right_bound = 4; +} + +message ReadPlan { + string object_store_url = 1; + string data_dir = 2; + + KeyRange key_range = 3; + uint32 table_id = 4; + uint64 epoch = 5; + + hummock.HummockVersion version = 6; + catalog.Table table_catalog = 7; +} diff --git a/proto/leader.proto b/proto/leader.proto deleted file mode 100644 index aa88f1f8ad539..0000000000000 --- a/proto/leader.proto +++ /dev/null @@ -1,27 +0,0 @@ -syntax = "proto3"; - -package leader; - -import "common.proto"; - -message LeaderRequest {} - -message LeaderResponse { - common.HostAddress leader_addr = 1; -} - -message MembersRequest {} - -message Member { - common.HostAddress member_addr = 1; - int64 lease_id = 2; -} - -message MembersResponse { - repeated Member members = 1; -} - -service LeaderService { - rpc Leader(LeaderRequest) returns (LeaderResponse); - rpc Members(MembersRequest) returns (MembersResponse); -} diff --git a/proto/meta.proto b/proto/meta.proto index e80c23a2e66a4..4c3a33d0068a3 100644 --- a/proto/meta.proto +++ b/proto/meta.proto @@ -89,6 +89,12 @@ message TableFragments { stream_plan.StreamEnvironment env = 6; } +/// Parallel unit mapping with fragment id, used for notification. +message FragmentParallelUnitMapping { + uint32 fragment_id = 1; + common.ParallelUnitMapping mapping = 2; +} + // TODO: remove this when dashboard refactored. message ActorLocation { common.WorkerNode node = 1; @@ -104,6 +110,20 @@ message FlushResponse { hummock.HummockSnapshot snapshot = 2; } +message CreatingJobInfo { + uint32 database_id = 1; + uint32 schema_id = 2; + string name = 3; +} + +message CancelCreatingJobsRequest { + repeated CreatingJobInfo infos = 1; +} + +message CancelCreatingJobsResponse { + common.Status status = 1; +} + message ListTableFragmentsRequest { repeated uint32 table_ids = 1; } @@ -127,6 +147,7 @@ message ListTableFragmentsResponse { service StreamManagerService { rpc Flush(FlushRequest) returns (FlushResponse); + rpc CancelCreatingJobs(CancelCreatingJobsRequest) returns (CancelCreatingJobsResponse); rpc ListTableFragments(ListTableFragmentsRequest) returns (ListTableFragmentsResponse); } @@ -139,6 +160,8 @@ message AddWorkerNodeRequest { } message AddWorkerNodeResponse { + reserved 3; + reserved "system_params"; common.Status status = 1; common.WorkerNode node = 2; } @@ -206,7 +229,7 @@ message MetaSnapshot { repeated catalog.View views = 7; repeated catalog.Function functions = 15; repeated user.UserInfo users = 8; - repeated common.ParallelUnitMapping parallel_unit_mappings = 9; + repeated FragmentParallelUnitMapping parallel_unit_mappings = 9; repeated common.WorkerNode nodes = 10; hummock.HummockSnapshot hummock_snapshot = 11; hummock.HummockVersion hummock_version = 12; @@ -236,7 +259,7 @@ message SubscribeResponse { catalog.View view = 10; catalog.Function function = 18; user.UserInfo user = 11; - common.ParallelUnitMapping parallel_unit_mapping = 12; + FragmentParallelUnitMapping parallel_unit_mapping = 12; common.WorkerNode node = 13; hummock.HummockSnapshot hummock_snapshot = 14; hummock.HummockVersionDeltas hummock_version_deltas = 15; @@ -249,17 +272,6 @@ service NotificationService { rpc Subscribe(SubscribeRequest) returns (stream SubscribeResponse); } -message MetaLeaderInfo { - string node_address = 1; - uint64 lease_id = 2; -} - -message MetaLeaseInfo { - MetaLeaderInfo leader = 1; - uint64 lease_register_time = 2; - uint64 lease_expire_time = 3; -} - message PauseRequest {} message PauseResponse {} @@ -297,3 +309,53 @@ service ScaleService { rpc GetClusterInfo(GetClusterInfoRequest) returns (GetClusterInfoResponse); rpc Reschedule(RescheduleRequest) returns (RescheduleResponse); } + +message MembersRequest {} + +message MetaMember { + common.HostAddress address = 1; + bool is_leader = 2; +} + +message MembersResponse { + repeated MetaMember members = 1; +} + +service MetaMemberService { + rpc Members(MembersRequest) returns (MembersResponse); +} + +// The schema for persisted system parameters. +// Note on backward compatibility: +// - Do not remove deprecated fields. +// - To rename, change the type or semantic of a field, introduce a new field postfixed by the version. +message SystemParams { + optional uint32 barrier_interval_ms = 1; + optional uint64 checkpoint_frequency = 2; + optional uint32 sstable_size_mb = 3; + optional uint32 block_size_kb = 4; + optional double bloom_false_positive = 5; + optional string state_store = 6; + optional string data_directory = 7; + optional string backup_storage_url = 8; + optional string backup_storage_directory = 9; +} + +message GetSystemParamsRequest {} + +message GetSystemParamsResponse { + SystemParams params = 1; +} + +message SetSystemParamRequest { + string param = 1; + // None means set to default value. + optional string value = 2; +} + +message SetSystemParamResponse {} + +service SystemParamsService { + rpc GetSystemParams(GetSystemParamsRequest) returns (GetSystemParamsResponse); + rpc SetSystemParam(SetSystemParamRequest) returns (SetSystemParamResponse); +} diff --git a/proto/plan_common.proto b/proto/plan_common.proto index bedcd6eacefaa..3b0941af36e92 100644 --- a/proto/plan_common.proto +++ b/proto/plan_common.proto @@ -79,4 +79,6 @@ enum RowFormatType { MAXWELL = 5; CANAL_JSON = 6; CSV = 7; + NATIVE = 8; + DEBEZIUM_AVRO = 9; } diff --git a/proto/stream_plan.proto b/proto/stream_plan.proto index 5ca7c9d4f52e9..fc900da738405 100644 --- a/proto/stream_plan.proto +++ b/proto/stream_plan.proto @@ -44,6 +44,10 @@ message UpdateMutation { // Merge executor can be uniquely identified by a combination of actor id and upstream fragment id. uint32 actor_id = 1; uint32 upstream_fragment_id = 2; + // - For scaling, this is always `None`. + // - For plan change, the upstream fragment will be changed to a new one, and this will be `Some`. + // In this case, all the upstream actors should be removed and replaced by the `new` ones. + optional uint32 new_upstream_fragment_id = 5; // Added upstream actors. repeated uint32 added_upstream_actor_id = 3; // Removed upstream actors. @@ -114,7 +118,7 @@ message StreamMessage { // Hash mapping for compute node. Stores mapping from virtual node to actor id. message ActorMapping { - repeated uint64 original_indices = 1; + repeated uint32 original_indices = 1; repeated uint32 data = 2; } @@ -135,11 +139,20 @@ message SourceNode { StreamSource source_inner = 1; } +message SinkDesc { + uint32 id = 1; + string name = 2; + string definition = 3; + repeated plan_common.ColumnDesc columns = 4; + repeated plan_common.ColumnOrder pk = 5; + repeated uint32 stream_key = 6; + repeated uint32 distribution_key = 7; + map properties = 8; + catalog.SinkType sink_type = 9; +} + message SinkNode { - uint32 table_id = 1; - map properties = 3; - repeated plan_common.Field fields = 4; - repeated uint32 sink_pk = 5; + SinkDesc sink_desc = 1; } message ProjectNode { @@ -205,6 +218,7 @@ message SimpleAggNode { // Whether to optimize for append only stream. // It is true when the input is append-only bool is_append_only = 5; + map distinct_dedup_tables = 6; } message HashAggNode { @@ -215,6 +229,7 @@ message HashAggNode { // Whether to optimize for append only stream. // It is true when the input is append-only bool is_append_only = 5; + map distinct_dedup_tables = 6; } message TopNNode { @@ -251,10 +266,21 @@ message HashJoinNode { catalog.Table right_degree_table = 9; // The output indices of current node repeated uint32 output_indices = 10; - repeated bool null_safe = 11; + // Left deduped input pk indices. The pk of the left_table and + // left_degree_table is [left_join_key | left_deduped_input_pk_indices] + // and is expected to be the shortest key which starts with + // the join key and satisfies unique constrain. + repeated uint32 left_deduped_input_pk_indices = 11; + // Right deduped input pk indices. The pk of the right_table and + // right_degree_table is [right_join_key | right_deduped_input_pk_indices] + // and is expected to be the shortest key which starts with + // the join key and satisfies unique constrain. + repeated uint32 right_deduped_input_pk_indices = 12; + + repeated bool null_safe = 13; // Whether to optimize for append only stream. // It is true when the input is append-only - bool is_append_only = 12; + bool is_append_only = 14; } message DynamicFilterNode { @@ -325,6 +351,9 @@ enum ChainType { // BACKFILL is corresponding to the backfill executor. BACKFILL = 3; + + // UPSTREAM_ONLY is corresponding to the chain executor, but doesn't consume the snapshot. + UPSTREAM_ONLY = 4; } // ChainNode is used for mv on mv. @@ -332,6 +361,9 @@ enum ChainType { // 1. MergeNode (as a placeholder) for streaming read. // 2. BatchPlanNode for snapshot read. message ChainNode { + reserved 5; + reserved "same_worker_node"; + uint32 table_id = 1; // The schema of input stream, which will be used to build a MergeNode repeated plan_common.Field upstream_fields = 2; @@ -342,8 +374,6 @@ message ChainNode { // large. However, in some cases, e.g., shared state, the barrier cannot be rearranged in ChainNode. // ChainType is used to decide which implementation for the ChainNode. ChainType chain_type = 4; - // Whether to place this chain on the same worker node as upstream actors. - bool same_worker_node = 5; // Whether the upstream materialize is and this chain should be a singleton. // FIXME: This is a workaround for fragmenter since the distribution info will be lost if there's only one // fragment in the downstream mview. Remove this when we refactor the fragmenter. @@ -367,6 +397,8 @@ message ArrangementInfo { repeated plan_common.ColumnOrder arrange_key_orders = 1; // Column descs of the arrangement repeated plan_common.ColumnDesc column_descs = 2; + // Used to build storage table by stream lookup join of delta join. + plan_common.StorageTableDesc table_desc = 4; } // Special node for shared state, which will only be produced in fragmenter. ArrangeNode will @@ -402,18 +434,14 @@ message LookupNode { } // Info about the arrangement ArrangementInfo arrangement_table_info = 7; - // Internal table of arrangement. - catalog.Table arrangement_table = 8; } // WatermarkFilter needs to filter the upstream data by the water mark. message WatermarkFilterNode { - // The expression to calculate the watermark value. - expr.ExprNode watermark_expr = 1; - // The column the event time belongs. - uint64 event_time_col_idx = 2; - // The table used to persist watermark, the key is vnode. - catalog.Table table = 3; + // The watermark descs + repeated catalog.WatermarkDesc watermark_descs = 1; + // The tables used to persist watermarks, the key is vnode. + repeated catalog.Table tables = 2; } // Acts like a merger, but on different inputs. @@ -447,6 +475,8 @@ message SortNode { message DmlNode { // Id of the table on which DML performs. uint32 table_id = 1; + // Version of the table. + uint64 table_version_id = 3; // Column descriptions of the table. repeated plan_common.ColumnDesc column_descs = 2; } @@ -492,6 +522,7 @@ message StreamNode { DmlNode dml = 127; RowIdGenNode row_id_gen = 128; NowNode now = 129; + GroupTopNNode append_only_group_top_n = 130; } // The id for the operator. This is local per mview. // TODO: should better be a uint32. @@ -538,10 +569,7 @@ message Dispatcher { // For dispatcher types other than HASH, this is ignored. ActorMapping hash_mapping = 3; // Dispatcher can be uniquely identified by a combination of actor id and dispatcher id. - // - For dispatchers within actors, the id is the same as its downstream fragment id. - // We can't use the exchange operator id directly as the dispatch id, because an exchange - // could belong to more than one downstream in DAG. - // - For MV on MV, the id is the same as the actor id of chain node in the downstream MV. + // This is exactly the same as its downstream fragment id. uint64 dispatcher_id = 4; // Number of downstreams decides how many endpoints a dispatcher should dispatch. repeated uint32 downstream_actor_id = 5; @@ -549,6 +577,9 @@ message Dispatcher { // A StreamActor is a running fragment of the overall stream graph, message StreamActor { + reserved 7; + reserved "colocated_upstream_actor_id"; + uint32 actor_id = 1; uint32 fragment_id = 2; StreamNode nodes = 3; @@ -558,8 +589,6 @@ message StreamActor { // It is painstaking to traverse through the node tree and get upstream actor id from the root StreamNode. // We duplicate the information here to ease the parsing logic in stream manager. repeated uint32 upstream_actor_id = 6; - // Placement rule for actor, need to stay on the same node as upstream. - bool same_worker_node_as_upstream = 7; // Vnodes that the executors in this actor own. // If the fragment is a singleton, this field will not be set and leave a `None`. common.Buffer vnode_bitmap = 8; @@ -599,10 +628,11 @@ message StreamFragmentGraph { } message StreamFragmentEdge { + reserved 2; + reserved "same_worker_node"; + // Dispatch strategy for the fragment. DispatchStrategy dispatch_strategy = 1; - // Whether the two linked nodes should be placed on the same worker node - bool same_worker_node = 2; // A unique identifier of this edge. Generally it should be exchange node's operator id. When // rewriting fragments into delta joins or when inserting 1-to-1 exchange, there will be // virtual links generated. diff --git a/proto/stream_service.proto b/proto/stream_service.proto index b7d26ac92980d..3603b584e136c 100644 --- a/proto/stream_service.proto +++ b/proto/stream_service.proto @@ -80,6 +80,7 @@ message BarrierCompleteResponse { uint32 chain_actor_id = 1; bool done = 2; uint64 consumed_epoch = 3; + uint64 consumed_rows = 4; } string request_id = 1; common.Status status = 2; diff --git a/risedev b/risedev index 8b4f6032e875f..23aee797e777a 100755 --- a/risedev +++ b/risedev @@ -2,7 +2,7 @@ if [ -z "$(which cargo-make)" ]; then echo "Installing cargo-make..." - cargo install cargo-make --version "^0.35" + cargo install cargo-make --version "^0.35" --locked fi touch risedev-components.user.env diff --git a/risedev.yml b/risedev.yml index ab2a5b9063138..1a310b3222673 100644 --- a/risedev.yml +++ b/risedev.yml @@ -22,12 +22,16 @@ profile: # If you want to use the local s3 storage, enable the following line # - use: minio + # If you want to use aws-s3, configure AK and SK in env var and enable the following lines: # - use: aws-s3 # bucket: test-bucket # If you want to use other s3 compatible object store, open this flag: # s3-compatible: false + # If you want to create CDC source table, uncomment the following line + # - use: connector-node + # if you want to enable etcd backend, uncomment the following lines. # - use: etcd # unsafe-no-fsync: true @@ -35,9 +39,6 @@ profile: - use: compute-node - use: frontend - # If you want to create CDC source, uncomment the following line - # - use: connector-node - # If you want to enable compactor, uncomment the following line, and enable either minio or aws-s3 as well. # - use: compactor @@ -113,6 +114,36 @@ profile: - use: kafka persist-data: true + hdfs: + steps: + # - use: etcd + - use: meta-node + - use: compute-node + - use: frontend + # If you want to use hdfs as storage backend, configure hdfs namenode and root path: + - use: opendal + engine: hdfs + namenode: "127.0.0.1:9000" + root: risingwave + - use: compactor + # - use: prometheus + # - use: grafana + + oss: + steps: + # - use: etcd + - use: meta-node + - use: compute-node + - use: frontend + # If you want to use google cloud stoage as storage backend, configure hdfs namenode and root path: + - use: opendal + engine: oss + bucket: "risingwave-oss-wcy" + root: risingwave + - use: compactor + # - use: prometheus + # - use: grafana + full-benchmark: steps: - use: minio @@ -131,7 +162,6 @@ profile: - use: kafka persist-data: true - 3etcd-3meta: steps: - use: etcd @@ -194,6 +224,21 @@ profile: - use: compute-node - use: frontend + java-binding-demo: + steps: + - use: minio + address: "127.0.0.1" + port: 9301 + root-user: hummockadmin + root-password: hummockadmin + hummock-bucket: hummock001 + - use: meta-node + address: "127.0.0.1" + port: 5690 + - use: compute-node + - use: frontend + - use: compactor + ###################################### ### Configurations used in Compose ### ###################################### @@ -496,6 +541,7 @@ profile: - use: meta-node - use: compute-node enable-tiered-cache: true + total-memory-bytes: 17179869184 - use: frontend - use: compactor @@ -531,6 +577,16 @@ profile: steps: - use: minio + ci-iceberg-test: + config-path: src/config/ci-iceberg-test.toml + steps: + - use: minio + - use: meta-node + - use: compute-node + enable-tiered-cache: true + - use: frontend + - use: compactor + compose: risingwave: "ghcr.io/risingwavelabs/risingwave:latest" prometheus: "prom/prometheus:latest" @@ -621,11 +677,14 @@ template: enable-tiered-cache: false # RPC endpoint for connector node - connector-rpc-endpoint: "127.0.0.1:60061" + connector-rpc-endpoint: "127.0.0.1:50051" # Minio instances used by this compute node provide-minio: "minio*" + # OpenDAL backend used by this compute node + provide-opendal: "opendal*" + # AWS s3 bucket used by this compute node provide-aws-s3: "aws-s3*" @@ -647,7 +706,7 @@ template: # Whether to enable in-memory pure KV state backend enable-in-memory-kv-state-backend: false - # Total available memory to LRU Manager in bytes + # Total available memory for the compute node in bytes total-memory-bytes: 8589934592 # Parallelism of tasks per compute node @@ -673,7 +732,7 @@ template: id: meta-node-${port} # RPC endpoint for connector node colocated with Meta - connector-rpc-endpoint: "127.0.0.1:60061" + connector-rpc-endpoint: "127.0.0.1:50051" # If `user-managed` is true, this service will be started by user with the above config user-managed: false @@ -771,6 +830,8 @@ template: # Minio instances used by this compute node provide-minio: "minio*" + # AWS s3 bucket used by this compute node + provide-opendal: "opendal*" # AWS s3 bucket used by this compute node provide-aws-s3: "aws-s3*" @@ -788,7 +849,7 @@ template: address: "127.0.0.1" # Connector node listen port - port: 60061 + port: 50051 # Id of this instance id: connector-${port} @@ -822,6 +883,18 @@ template: # Jaeger has a lot of ports open, and we don't want to make this config more complex. # So we keep the default value of jaeger instead of making it part of RiseDev config. + # opendal: + opendal: + id: opendal + + engine: hdfs + + namenode: 127.0.0.1:9000" + + bucket: risingwave-test + + root: risingwave + # aws-s3 is a placeholder service to provide configurations aws-s3: # Id to be picked-up by services diff --git a/rust-toolchain b/rust-toolchain index 2f95ad6c7a554..c8e7c8562bc9d 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -nightly-2022-12-12 +nightly-2023-01-18 diff --git a/scripts/cargo-config-disable-simd.sh b/scripts/cargo-config-disable-simd.sh deleted file mode 100644 index ea4476ea47fc4..0000000000000 --- a/scripts/cargo-config-disable-simd.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash - -# Exits as soon as any line fails. -# set -e - -# this needs to be run from risingwave root dir -cat << EOF > .cargo/config.toml -# Add "-Ctarget-feature=+avx2" if your x86_64 target supports AVX2 vector extensions -[target.x86_64-unknown-linux-gnu] -rustflags = [ - "-Clink-arg=-fuse-ld=lld", "-Clink-arg=-Wl,--no-rosegment", "--cfg", "tokio_unstable" -] - -# Add "-Ctarget-feature=+neon" if your aarch64 target supports NEON vector extensions -[target.aarch64-unknown-linux-gnu] -rustflags = [ - "-Clink-arg=-fuse-ld=lld", "-Clink-arg=-Wl,--no-rosegment", "--cfg", "tokio_unstable" -] - -[build] -rustflags = ["-Ctarget-cpu=native", "--cfg", "tokio_unstable"] -EOF \ No newline at end of file diff --git a/scripts/source/prepare_ci_pubsub/Cargo.toml b/scripts/source/prepare_ci_pubsub/Cargo.toml index 4e167f1530b9e..7d7331d3c0d48 100644 --- a/scripts/source/prepare_ci_pubsub/Cargo.toml +++ b/scripts/source/prepare_ci_pubsub/Cargo.toml @@ -4,6 +4,12 @@ version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = "1" google-cloud-googleapis = { version = "0.6.0", features = ["pubsub"] } diff --git a/src/batch/Cargo.toml b/src/batch/Cargo.toml index aae6fb115d580..ca3202bfe29b9 100644 --- a/src/batch/Cargo.toml +++ b/src/batch/Cargo.toml @@ -7,18 +7,18 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = "1" assert_matches = "1" async-recursion = "1" -async-stream = "0.3" async-trait = "0.1" -byteorder = "1" -bytes = "1" -chrono = { version = "0.4", default-features = false, features = ["clock", "std"] } -crc32fast = "1" either = "1" -farmhash = "1" fixedbitset = { version = "0.4", features = ["std"] } futures = { version = "0.3", default-features = false, features = ["alloc"] } futures-async-stream = "0.2" @@ -27,9 +27,7 @@ itertools = "0.10" minitrace = "0.4" num-traits = "0.2" parking_lot = { version = "0.12", features = ["arc_lock"] } -paste = "1" prometheus = { version = "0.13", features = ["process"] } -prost = "0.11" risingwave_common = { path = "../common" } risingwave_connector = { path = "../connector" } risingwave_expr = { path = "../expr" } @@ -38,13 +36,8 @@ risingwave_pb = { path = "../prost" } risingwave_rpc_client = { path = "../rpc_client" } risingwave_source = { path = "../source" } risingwave_storage = { path = "../storage" } -serde = { version = "1", features = ["derive"] } -serde-value = "0.7" serde_json = "1" -smallvec = "1" -static_assertions = "1" task_stats_alloc = { path = "../utils/task_stats_alloc" } -tempfile = "3" thiserror = "1" tokio = { version = "0.2", package = "madsim-tokio", features = [ "rt", @@ -59,9 +52,6 @@ tokio-metrics = "0.1.0" tokio-stream = "0.1" tonic = { version = "0.2", package = "madsim-tonic" } tracing = "0.1" -tracing-futures = "0.2" -twox-hash = "1" -url = "2" uuid = "1" [target.'cfg(not(madsim))'.dependencies] diff --git a/src/batch/benches/expand.rs b/src/batch/benches/expand.rs index af0a5abea8fda..9096c86d9ab0e 100644 --- a/src/batch/benches/expand.rs +++ b/src/batch/benches/expand.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/benches/filter.rs b/src/batch/benches/filter.rs index 67ea2032f3922..c2f56b7895ef7 100644 --- a/src/batch/benches/filter.rs +++ b/src/batch/benches/filter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/benches/hash_agg.rs b/src/batch/benches/hash_agg.rs index 8d307ba1e4cac..a1b070d5f2e84 100644 --- a/src/batch/benches/hash_agg.rs +++ b/src/batch/benches/hash_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/benches/hash_join.rs b/src/batch/benches/hash_join.rs index a03641130fd40..6bfa93495d406 100644 --- a/src/batch/benches/hash_join.rs +++ b/src/batch/benches/hash_join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/benches/limit.rs b/src/batch/benches/limit.rs index c3a16add5a2d4..90136ce705dda 100644 --- a/src/batch/benches/limit.rs +++ b/src/batch/benches/limit.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/benches/nested_loop_join.rs b/src/batch/benches/nested_loop_join.rs index 9bf833c1d65db..90d282d35b413 100644 --- a/src/batch/benches/nested_loop_join.rs +++ b/src/batch/benches/nested_loop_join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/benches/sort.rs b/src/batch/benches/sort.rs index 7665473e1c7dd..f83af98369e98 100644 --- a/src/batch/benches/sort.rs +++ b/src/batch/benches/sort.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/benches/top_n.rs b/src/batch/benches/top_n.rs index 977b8b3788f3b..91032866ea175 100644 --- a/src/batch/benches/top_n.rs +++ b/src/batch/benches/top_n.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/benches/utils/mod.rs b/src/batch/benches/utils/mod.rs index a0e9c454747a8..500fa752feb2a 100644 --- a/src/batch/benches/utils/mod.rs +++ b/src/batch/benches/utils/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/error.rs b/src/batch/src/error.rs index bd8d53a2697e3..2f41c3d0875e2 100644 --- a/src/batch/src/error.rs +++ b/src/batch/src/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/exchange_source.rs b/src/batch/src/exchange_source.rs index a0de01b84d52e..5c34922a7c6df 100644 --- a/src/batch/src/exchange_source.rs +++ b/src/batch/src/exchange_source.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/execution/grpc_exchange.rs b/src/batch/src/execution/grpc_exchange.rs index 6f3efc9794de9..9ed1c8b659a95 100644 --- a/src/batch/src/execution/grpc_exchange.rs +++ b/src/batch/src/execution/grpc_exchange.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -77,7 +77,9 @@ impl ExchangeSource for GrpcExchangeSource { fn take_data(&mut self) -> Self::TakeDataFuture<'_> { async { let res = match self.stream.next().await { - None => return Ok(None), + None => { + return Ok(None); + } Some(r) => r, }; let task_data = res?; diff --git a/src/batch/src/execution/local_exchange.rs b/src/batch/src/execution/local_exchange.rs index 861bd1e33d937..cb2e97e7d9eb3 100644 --- a/src/batch/src/execution/local_exchange.rs +++ b/src/batch/src/execution/local_exchange.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/execution/mod.rs b/src/batch/src/execution/mod.rs index 3096fb739171d..f6e33951a0f61 100644 --- a/src/batch/src/execution/mod.rs +++ b/src/batch/src/execution/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/executor/delete.rs b/src/batch/src/executor/delete.rs index a8c3010fde864..6c2f0529906cb 100644 --- a/src/batch/src/executor/delete.rs +++ b/src/batch/src/executor/delete.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ use anyhow::anyhow; use futures::future::try_join_all; use futures_async_stream::try_stream; use risingwave_common::array::{ArrayBuilder, DataChunk, Op, PrimitiveArrayBuilder, StreamChunk}; -use risingwave_common::catalog::{Field, Schema, TableId}; +use risingwave_common::catalog::{Field, Schema, TableId, TableVersionId}; use risingwave_common::error::{Result, RwError}; use risingwave_common::types::DataType; use risingwave_common::util::chunk_coalesce::DataChunkBuilder; @@ -35,6 +35,7 @@ use crate::task::BatchTaskContext; pub struct DeleteExecutor { /// Target table id. table_id: TableId, + table_version_id: TableVersionId, dml_manager: DmlManagerRef, child: BoxedExecutor, chunk_size: usize, @@ -46,6 +47,7 @@ pub struct DeleteExecutor { impl DeleteExecutor { pub fn new( table_id: TableId, + table_version_id: TableVersionId, dml_manager: DmlManagerRef, child: BoxedExecutor, chunk_size: usize, @@ -55,6 +57,7 @@ impl DeleteExecutor { let table_schema = child.schema().clone(); Self { table_id, + table_version_id, dml_manager, child, chunk_size, @@ -94,14 +97,13 @@ impl DeleteExecutor { let mut notifiers = Vec::new(); // Transform the data chunk to a stream chunk, then write to the source. - let mut write_chunk = |chunk: DataChunk| -> Result<()> { + let write_chunk = |chunk: DataChunk| async { let cap = chunk.capacity(); let stream_chunk = StreamChunk::from_parts(vec![Op::Delete; cap], chunk); - let notifier = self.dml_manager.write_chunk(&self.table_id, stream_chunk)?; - notifiers.push(notifier); - - Ok(()) + self.dml_manager + .write_chunk(self.table_id, self.table_version_id, stream_chunk) + .await }; #[for_await] @@ -111,12 +113,12 @@ impl DeleteExecutor { yield data_chunk.clone(); } for chunk in builder.append_chunk(data_chunk) { - write_chunk(chunk)?; + notifiers.push(write_chunk(chunk).await?); } } if let Some(chunk) = builder.consume_all() { - write_chunk(chunk)?; + notifiers.push(write_chunk(chunk).await?); } // Wait for all chunks to be taken / written. @@ -155,6 +157,7 @@ impl BoxedExecutorBuilder for DeleteExecutor { Ok(Box::new(Self::new( table_id, + delete_node.table_version_id, source.context().dml_manager(), child, source.context.get_config().developer.batch_chunk_size, @@ -171,7 +174,9 @@ mod tests { use futures::StreamExt; use itertools::Itertools; use risingwave_common::array::Array; - use risingwave_common::catalog::{schema_test_utils, ColumnDesc, ColumnId}; + use risingwave_common::catalog::{ + schema_test_utils, ColumnDesc, ColumnId, INITIAL_TABLE_VERSION_ID, + }; use risingwave_common::test_prelude::DataChunkTestExt; use risingwave_source::dml_manager::DmlManager; @@ -207,16 +212,17 @@ mod tests { .enumerate() .map(|(i, field)| ColumnDesc::unnamed(ColumnId::new(i as _), field.data_type.clone())) .collect_vec(); - // We must create a variable to hold this `Arc` here, or it will be dropped due - // to the `Weak` reference in `DmlManager`. + // We must create a variable to hold this `Arc` here, or it will be dropped + // due to the `Weak` reference in `DmlManager`. let reader = dml_manager - .register_reader(table_id, &column_descs) + .register_reader(table_id, INITIAL_TABLE_VERSION_ID, &column_descs) .unwrap(); - let mut reader = reader.stream_reader_v2().into_stream_v2(); + let mut reader = reader.stream_reader().into_stream(); // Delete let delete_executor = Box::new(DeleteExecutor::new( table_id, + INITIAL_TABLE_VERSION_ID, dml_manager, Box::new(mock_executor), 1024, @@ -245,10 +251,10 @@ mod tests { // Read let chunk = reader.next().await.unwrap()?; - assert_eq!(chunk.ops().to_vec(), vec![Op::Delete; 5]); + assert_eq!(chunk.chunk.ops().to_vec(), vec![Op::Delete; 5]); assert_eq!( - chunk.columns()[0] + chunk.chunk.columns()[0] .array() .as_int32() .iter() @@ -257,7 +263,7 @@ mod tests { ); assert_eq!( - chunk.columns()[1] + chunk.chunk.columns()[1] .array() .as_int32() .iter() diff --git a/src/batch/src/executor/expand.rs b/src/batch/src/executor/expand.rs index f3c36e1343d95..1314225d465e2 100644 --- a/src/batch/src/executor/expand.rs +++ b/src/batch/src/executor/expand.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/executor/filter.rs b/src/batch/src/executor/filter.rs index fdba1f43134d8..3c7a2c5998c82 100644 --- a/src/batch/src/executor/filter.rs +++ b/src/batch/src/executor/filter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/executor/generic_exchange.rs b/src/batch/src/executor/generic_exchange.rs index 6ca200542754f..b3f1565352ad0 100644 --- a/src/batch/src/executor/generic_exchange.rs +++ b/src/batch/src/executor/generic_exchange.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ use itertools::Itertools; use risingwave_common::array::DataChunk; use risingwave_common::catalog::{Field, Schema}; use risingwave_common::error::{Result, RwError}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::select_all; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::ExchangeSource as ProstExchangeSource; @@ -166,7 +167,7 @@ impl GenericExchangeExec let mut stream = select_all( self.proto_sources .into_iter() - .zip_eq(self.source_creators) + .zip_eq_fast(self.source_creators) .map(|(prost_source, source_creator)| { Self::data_chunk_stream( prost_source, diff --git a/src/batch/src/executor/group_top_n.rs b/src/batch/src/executor/group_top_n.rs index 791e1a3de9df8..a4a7fe1b0dbc1 100644 --- a/src/batch/src/executor/group_top_n.rs +++ b/src/batch/src/executor/group_top_n.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ use risingwave_common::hash::{HashKey, HashKeyDispatcher}; use risingwave_common::types::DataType; use risingwave_common::util::chunk_coalesce::DataChunkBuilder; use risingwave_common::util::encoding_for_comparison::encode_chunk; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::sort_util::OrderPair; use risingwave_pb::batch_plan::plan_node::NodeBody; @@ -187,7 +188,7 @@ impl GroupTopNExecutor { for (row_id, (encoded_row, key)) in encode_chunk(&chunk, &self.order_pairs) .into_iter() - .zip_eq(keys.into_iter()) + .zip_eq_fast(keys.into_iter()) .enumerate() { let heap = groups diff --git a/src/batch/src/executor/hash_agg.rs b/src/batch/src/executor/hash_agg.rs index 68e71975b3844..9585bc856f015 100644 --- a/src/batch/src/executor/hash_agg.rs +++ b/src/batch/src/executor/hash_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ use risingwave_common::catalog::{Field, Schema}; use risingwave_common::error::{Result, RwError}; use risingwave_common::hash::{HashKey, HashKeyDispatcher, PrecomputedBuildHasher}; use risingwave_common::types::DataType; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_expr::vector_op::agg::{AggStateFactory, BoxedAggState}; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::HashAggNode; @@ -245,7 +246,7 @@ impl HashAggExecutor { key.deserialize_to_builders(&mut group_builders[..], &self.group_key_types)?; states .into_iter() - .zip_eq(&mut agg_builders) + .zip_eq_fast(&mut agg_builders) .try_for_each(|(mut aggregator, builder)| aggregator.output(builder))?; } if !has_next { diff --git a/src/batch/src/executor/hop_window.rs b/src/batch/src/executor/hop_window.rs index fca45e46bbac3..675ada448bea7 100644 --- a/src/batch/src/executor/hop_window.rs +++ b/src/batch/src/executor/hop_window.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,8 +23,7 @@ use risingwave_common::array::{DataChunk, Vis}; use risingwave_common::catalog::{Field, Schema}; use risingwave_common::error::{Result, RwError}; use risingwave_common::types::{DataType, IntervalUnit, ScalarImpl}; -use risingwave_expr::expr::expr_binary_nonnull::new_binary_expr; -use risingwave_expr::expr::{Expression, InputRefExpression, LiteralExpression}; +use risingwave_expr::expr::{new_binary_expr, Expression, InputRefExpression, LiteralExpression}; use risingwave_expr::ExprError; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::expr::expr_node; @@ -239,8 +238,15 @@ impl HopWindowExecutor { let len = hop_start.len(); let hop_start_chunk = DataChunk::new(vec![Column::new(hop_start)], len); let (origin_cols, visibility) = data_chunk.into_parts(); - // SAFETY: Already compacted. - assert!(matches!(visibility, Vis::Compact(_))); + let len = match visibility { + Vis::Compact(len) => len, + Vis::Bitmap(_) => { + return Err(BatchError::Internal(anyhow!( + "Input array should already been compacted!" + )) + .into()); + } + }; for i in 0..units { let window_start_col = if contains_window_start { Some(window_start_exprs[i].eval(&hop_start_chunk)?) @@ -266,18 +272,7 @@ impl HopWindowExecutor { } }) .collect_vec(); - let len = { - if let Some(col) = &window_start_col { - col.len() - } else if let Some(col) = &window_end_col { - col.len() - } else { - // SAFETY: Either window_start or window_end is in output indices. - unreachable!(); - } - }; - let new_chunk = DataChunk::new(new_cols, len); - yield new_chunk; + yield DataChunk::new(new_cols, len); } } } diff --git a/src/batch/src/executor/insert.rs b/src/batch/src/executor/insert.rs index 148f0be46bf83..c7aa4ff969950 100644 --- a/src/batch/src/executor/insert.rs +++ b/src/batch/src/executor/insert.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ use futures_async_stream::try_stream; use risingwave_common::array::{ ArrayBuilder, DataChunk, I64Array, Op, PrimitiveArrayBuilder, StreamChunk, }; -use risingwave_common::catalog::{Field, Schema, TableId}; +use risingwave_common::catalog::{Field, Schema, TableId, TableVersionId}; use risingwave_common::error::{Result, RwError}; use risingwave_common::types::DataType; use risingwave_common::util::chunk_coalesce::DataChunkBuilder; @@ -36,6 +36,7 @@ use crate::task::BatchTaskContext; pub struct InsertExecutor { /// Target table id. table_id: TableId, + table_version_id: TableVersionId, dml_manager: DmlManagerRef, child: BoxedExecutor, @@ -52,6 +53,7 @@ impl InsertExecutor { #[allow(clippy::too_many_arguments)] pub fn new( table_id: TableId, + table_version_id: TableVersionId, dml_manager: DmlManagerRef, child: BoxedExecutor, chunk_size: usize, @@ -63,6 +65,7 @@ impl InsertExecutor { let table_schema = child.schema().clone(); Self { table_id, + table_version_id, dml_manager, child, chunk_size, @@ -104,7 +107,7 @@ impl InsertExecutor { let mut notifiers = Vec::new(); // Transform the data chunk to a stream chunk, then write to the source. - let mut write_chunk = |chunk: DataChunk| -> Result<()> { + let write_chunk = |chunk: DataChunk| async { let cap = chunk.capacity(); let (mut columns, vis) = chunk.into_parts(); @@ -127,10 +130,9 @@ impl InsertExecutor { let stream_chunk = StreamChunk::new(vec![Op::Insert; cap], columns, vis.into_visibility()); - let notifier = self.dml_manager.write_chunk(&self.table_id, stream_chunk)?; - notifiers.push(notifier); - - Ok(()) + self.dml_manager + .write_chunk(self.table_id, self.table_version_id, stream_chunk) + .await }; #[for_await] @@ -140,12 +142,12 @@ impl InsertExecutor { yield data_chunk.clone(); } for chunk in builder.append_chunk(data_chunk) { - write_chunk(chunk)?; + notifiers.push(write_chunk(chunk).await?); } } if let Some(chunk) = builder.consume_all() { - write_chunk(chunk)?; + notifiers.push(write_chunk(chunk).await?); } // Wait for all chunks to be taken / written. @@ -190,6 +192,7 @@ impl BoxedExecutorBuilder for InsertExecutor { Ok(Box::new(Self::new( table_id, + insert_node.table_version_id, source.context().dml_manager(), child, source.context.get_config().developer.batch_chunk_size, @@ -212,7 +215,9 @@ mod tests { use futures::StreamExt; use itertools::Itertools; use risingwave_common::array::{Array, ArrayImpl, I32Array, StructArray}; - use risingwave_common::catalog::{schema_test_utils, ColumnDesc, ColumnId}; + use risingwave_common::catalog::{ + schema_test_utils, ColumnDesc, ColumnId, INITIAL_TABLE_VERSION_ID, + }; use risingwave_common::column_nonnull; use risingwave_common::types::DataType; use risingwave_source::dml_manager::DmlManager; @@ -271,16 +276,17 @@ mod tests { .enumerate() .map(|(i, field)| ColumnDesc::unnamed(ColumnId::new(i as _), field.data_type.clone())) .collect_vec(); - // We must create a variable to hold this `Arc` here, or it will be dropped due - // to the `Weak` reference in `DmlManager`. + // We must create a variable to hold this `Arc` here, or it will be dropped + // due to the `Weak` reference in `DmlManager`. let reader = dml_manager - .register_reader(table_id, &column_descs) + .register_reader(table_id, INITIAL_TABLE_VERSION_ID, &column_descs) .unwrap(); - let mut reader = reader.stream_reader_v2().into_stream_v2(); + let mut reader = reader.stream_reader().into_stream(); // Insert let insert_executor = Box::new(InsertExecutor::new( table_id, + INITIAL_TABLE_VERSION_ID, dml_manager, Box::new(mock_executor), 1024, @@ -308,7 +314,7 @@ mod tests { let chunk = reader.next().await.unwrap()?; assert_eq!( - chunk.columns()[0] + chunk.chunk.columns()[0] .array() .as_int32() .iter() @@ -317,7 +323,7 @@ mod tests { ); assert_eq!( - chunk.columns()[1] + chunk.chunk.columns()[1] .array() .as_int32() .iter() @@ -335,7 +341,7 @@ mod tests { vec![DataType::Int32, DataType::Int32, DataType::Int32], ) .into(); - assert_eq!(*chunk.columns()[2].array(), array); + assert_eq!(*chunk.chunk.columns()[2].array(), array); let epoch = u64::MAX; let full_range = (Bound::>::Unbounded, Bound::>::Unbounded); diff --git a/src/batch/src/executor/join/chunked_data.rs b/src/batch/src/executor/join/chunked_data.rs index e5af1561b65fd..a195069a6eec4 100644 --- a/src/batch/src/executor/join/chunked_data.rs +++ b/src/batch/src/executor/join/chunked_data.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/executor/join/distributed_lookup_join.rs b/src/batch/src/executor/join/distributed_lookup_join.rs index 26474b8cc8051..39cbaf28bf3dc 100644 --- a/src/batch/src/executor/join/distributed_lookup_join.rs +++ b/src/batch/src/executor/join/distributed_lookup_join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ use risingwave_common::hash::{HashKey, HashKeyDispatcher}; use risingwave_common::row::OwnedRow; use risingwave_common::types::{DataType, Datum}; use risingwave_common::util::chunk_coalesce::DataChunkBuilder; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::scan_range::ScanRange; use risingwave_common::util::sort_util::OrderType; -use risingwave_expr::expr::expr_unary::new_unary_expr; -use risingwave_expr::expr::{build_from_prost, BoxedExpression, LiteralExpression}; +use risingwave_expr::expr::{build_from_prost, new_unary_expr, BoxedExpression, LiteralExpression}; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::common::BatchQueryEpoch; use risingwave_pb::expr::expr_node::Type; @@ -354,12 +354,12 @@ impl LookupExecutorBuilder for InnerSideExecutorBuilder { for ((datum, outer_type), inner_type) in key_datums .into_iter() - .zip_eq( + .zip_eq_fast( self.outer_side_key_types .iter() .take(self.lookup_prefix_len), ) - .zip_eq( + .zip_eq_fast( self.inner_side_key_types .iter() .take(self.lookup_prefix_len), diff --git a/src/batch/src/executor/join/hash_join.rs b/src/batch/src/executor/join/hash_join.rs index 3558cda2b8077..c33f10baa6d76 100644 --- a/src/batch/src/executor/join/hash_join.rs +++ b/src/batch/src/executor/join/hash_join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ use risingwave_common::hash::{HashKey, HashKeyDispatcher, PrecomputedBuildHasher use risingwave_common::row::{repeat_n, RowExt}; use risingwave_common::types::{DataType, Datum}; use risingwave_common::util::chunk_coalesce::DataChunkBuilder; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_expr::expr::{build_from_prost, BoxedExpression, Expression}; use risingwave_pb::batch_plan::plan_node::NodeBody; @@ -1529,7 +1530,7 @@ impl DataChunkMutator { ) -> Self { let mut new_visibility = BitmapBuilder::zeroed(self.0.capacity()); for (output_row_id, (output_row_non_null, &build_row_id)) in - filter.iter().zip_eq(build_row_ids.iter()).enumerate() + filter.iter().zip_eq_fast(build_row_ids.iter()).enumerate() { if output_row_non_null { build_row_matched[build_row_id] = true; @@ -1549,7 +1550,8 @@ impl DataChunkMutator { build_row_ids: &mut Vec, build_row_matched: &mut ChunkedData, ) { - for (output_row_non_null, &build_row_id) in filter.iter().zip_eq(build_row_ids.iter()) { + for (output_row_non_null, &build_row_id) in filter.iter().zip_eq_fast(build_row_ids.iter()) + { if output_row_non_null { build_row_matched[build_row_id] = true; } @@ -1605,7 +1607,7 @@ impl DataChunkMutator { first_output_row_id.clear(); for (output_row_id, (output_row_non_null, &build_row_id)) in - filter.iter().zip_eq(build_row_ids.iter()).enumerate() + filter.iter().zip_eq_fast(build_row_ids.iter()).enumerate() { if output_row_non_null { build_row_matched[build_row_id] = true; @@ -1778,15 +1780,14 @@ impl HashJoinExecutor { mod tests { use futures::StreamExt; - use itertools::Itertools; use risingwave_common::array::{ArrayBuilderImpl, DataChunk}; use risingwave_common::catalog::{Field, Schema}; use risingwave_common::error::Result; use risingwave_common::hash::Key32; use risingwave_common::test_prelude::DataChunkTestExt; use risingwave_common::types::DataType; - use risingwave_expr::expr::expr_binary_nonnull::new_binary_expr; - use risingwave_expr::expr::{BoxedExpression, InputRefExpression}; + use risingwave_common::util::iter_util::ZipEqDebug; + use risingwave_expr::expr::{new_binary_expr, BoxedExpression, InputRefExpression}; use risingwave_pb::expr::expr_node::Type; use super::{ @@ -1847,7 +1848,7 @@ mod tests { } left.rows() - .zip_eq(right.rows()) + .zip_eq_debug(right.rows()) .all(|(row1, row2)| row1 == row2) } diff --git a/src/batch/src/executor/join/local_lookup_join.rs b/src/batch/src/executor/join/local_lookup_join.rs index af9e6f7ebbd36..2ea96c7906667 100644 --- a/src/batch/src/executor/join/local_lookup_join.rs +++ b/src/batch/src/executor/join/local_lookup_join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,15 +20,15 @@ use risingwave_common::buffer::BitmapBuilder; use risingwave_common::catalog::{ColumnDesc, Field, Schema}; use risingwave_common::error::{internal_error, Result}; use risingwave_common::hash::{ - HashKey, HashKeyDispatcher, ParallelUnitId, VirtualNode, VnodeMapping, + ExpandedParallelUnitMapping, HashKey, HashKeyDispatcher, ParallelUnitId, VirtualNode, }; use risingwave_common::row::OwnedRow; use risingwave_common::types::{DataType, Datum}; use risingwave_common::util::chunk_coalesce::DataChunkBuilder; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::scan_range::ScanRange; use risingwave_common::util::worker_util::get_pu_to_worker_mapping; -use risingwave_expr::expr::expr_unary::new_unary_expr; -use risingwave_expr::expr::{build_from_prost, BoxedExpression, LiteralExpression}; +use risingwave_expr::expr::{build_from_prost, new_unary_expr, BoxedExpression, LiteralExpression}; use risingwave_pb::batch_plan::exchange_info::DistributionMode; use risingwave_pb::batch_plan::exchange_source::LocalExecutePlan::Plan; use risingwave_pb::batch_plan::plan_node::NodeBody; @@ -50,7 +50,7 @@ use crate::task::{BatchTaskContext, TaskId}; /// Inner side executor builder for the `LocalLookupJoinExecutor` struct InnerSideExecutorBuilder { table_desc: StorageTableDesc, - vnode_mapping: VnodeMapping, + vnode_mapping: ExpandedParallelUnitMapping, outer_side_key_types: Vec, inner_side_schema: Schema, inner_side_column_ids: Vec, @@ -114,6 +114,7 @@ impl InnerSideExecutorBuilder { scan_ranges, ordered: false, vnode_bitmap: Some(vnode_bitmap.finish().to_protobuf()), + chunk_size: None, }); Ok(row_seq_scan_node) @@ -173,12 +174,12 @@ impl LookupExecutorBuilder for InnerSideExecutorBuilder for ((datum, outer_type), inner_type) in key_datums .into_iter() - .zip_eq( + .zip_eq_fast( self.outer_side_key_types .iter() .take(self.lookup_prefix_len), ) - .zip_eq( + .zip_eq_fast( self.inner_side_key_types .iter() .take(self.lookup_prefix_len), @@ -471,8 +472,9 @@ mod tests { use risingwave_common::types::{DataType, ScalarImpl}; use risingwave_common::util::chunk_coalesce::DataChunkBuilder; use risingwave_common::util::sort_util::{OrderPair, OrderType}; - use risingwave_expr::expr::expr_binary_nonnull::new_binary_expr; - use risingwave_expr::expr::{BoxedExpression, InputRefExpression, LiteralExpression}; + use risingwave_expr::expr::{ + new_binary_expr, BoxedExpression, InputRefExpression, LiteralExpression, + }; use risingwave_pb::expr::expr_node::Type; use super::LocalLookupJoinExecutorArgs; @@ -553,7 +555,7 @@ mod tests { lookup_prefix_len: 1, chunk_builder: DataChunkBuilder::new(original_schema.data_types(), CHUNK_SIZE), schema: original_schema.clone(), - output_indices: (0..original_schema.len()).into_iter().collect(), + output_indices: (0..original_schema.len()).collect(), chunk_size: CHUNK_SIZE, identity: "TestLookupJoinExecutor".to_string(), } diff --git a/src/batch/src/executor/join/lookup_join_base.rs b/src/batch/src/executor/join/lookup_join_base.rs index 6f27e7ccdc095..7dccc98a1eb79 100644 --- a/src/batch/src/executor/join/lookup_join_base.rs +++ b/src/batch/src/executor/join/lookup_join_base.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/executor/join/mod.rs b/src/batch/src/executor/join/mod.rs index 0faf14064d2b9..2c25ccbd6769e 100644 --- a/src/batch/src/executor/join/mod.rs +++ b/src/batch/src/executor/join/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -30,12 +30,14 @@ use risingwave_common::array::{DataChunk, RowRef, Vis}; use risingwave_common::error::Result; use risingwave_common::row::Row; use risingwave_common::types::{DataType, DatumRef}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_pb::plan_common::JoinType as JoinTypeProst; use crate::error::BatchError; -use crate::executor::join::JoinType::Inner; -#[derive(Copy, Clone, Debug, PartialEq)] + +#[derive(Copy, Clone, Debug, Default, PartialEq)] pub enum JoinType { + #[default] Inner, LeftOuter, /// Semi join when probe side should output when matched @@ -103,12 +105,6 @@ impl JoinType { } } -impl Default for JoinType { - fn default() -> Self { - Inner - } -} - /// The layout be like: /// /// [ `left` chunk | `right` chunk ] @@ -153,7 +149,7 @@ fn convert_datum_refs_to_chunk( .map(|data_type| data_type.create_array_builder(num_tuples)) .collect(); for _i in 0..num_tuples { - for (builder, datum_ref) in output_array_builders.iter_mut().zip_eq(datum_refs) { + for (builder, datum_ref) in output_array_builders.iter_mut().zip_eq_fast(datum_refs) { builder.append_datum(*datum_ref); } } diff --git a/src/batch/src/executor/join/nested_loop_join.rs b/src/batch/src/executor/join/nested_loop_join.rs index 29434509a5764..c0a46d6865d9b 100644 --- a/src/batch/src/executor/join/nested_loop_join.rs +++ b/src/batch/src/executor/join/nested_loop_join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ use futures::TryStreamExt; use futures_async_stream::try_stream; -use itertools::Itertools; use risingwave_common::array::data_chunk_iter::RowRef; use risingwave_common::array::{Array, DataChunk}; use risingwave_common::buffer::BitmapBuilder; @@ -23,6 +22,7 @@ use risingwave_common::error::{Result, RwError}; use risingwave_common::row::{repeat_n, RowExt}; use risingwave_common::types::{DataType, Datum}; use risingwave_common::util::chunk_coalesce::DataChunkBuilder; +use risingwave_common::util::iter_util::ZipEqDebug; use risingwave_expr::expr::{ build_from_prost as expr_build_from_prost, BoxedExpression, Expression, }; @@ -277,7 +277,7 @@ impl NestedLoopJoinExecutor { for (left_row, _) in left .iter() .flat_map(|chunk| chunk.rows()) - .zip_eq(matched.finish().iter()) + .zip_eq_debug(matched.finish().iter()) .filter(|(_, matched)| !*matched) { let row = left_row.chain(repeat_n(Datum::None, right_data_types.len())); @@ -317,7 +317,7 @@ impl NestedLoopJoinExecutor { for (left_row, _) in left .iter() .flat_map(|chunk| chunk.rows()) - .zip_eq(matched.finish().iter()) + .zip_eq_debug(matched.finish().iter()) .filter(|(_, matched)| if ANTI_JOIN { !*matched } else { *matched }) { if let Some(chunk) = chunk_builder.append_one_row(left_row) { @@ -356,7 +356,7 @@ impl NestedLoopJoinExecutor { } for (right_row, _) in right_chunk .rows() - .zip_eq(matched.iter()) + .zip_eq_debug(matched.iter()) .filter(|(_, matched)| !*matched) { let row = repeat_n(Datum::None, left_data_types.len()).chain(right_row); @@ -436,7 +436,7 @@ impl NestedLoopJoinExecutor { // Yield unmatched rows in the right table for (right_row, _) in right_chunk .rows() - .zip_eq(right_matched.iter()) + .zip_eq_debug(right_matched.iter()) .filter(|(_, matched)| !*matched) { let row = repeat_n(Datum::None, left_data_types.len()).chain(right_row); @@ -449,7 +449,7 @@ impl NestedLoopJoinExecutor { for (left_row, _) in left .iter() .flat_map(|chunk| chunk.rows()) - .zip_eq(left_matched.finish().iter()) + .zip_eq_debug(left_matched.finish().iter()) .filter(|(_, matched)| !*matched) { let row = left_row.chain(repeat_n(Datum::None, right_data_types.len())); @@ -464,8 +464,7 @@ mod tests { use risingwave_common::array::*; use risingwave_common::catalog::{Field, Schema}; use risingwave_common::types::DataType; - use risingwave_expr::expr::expr_binary_nonnull::new_binary_expr; - use risingwave_expr::expr::InputRefExpression; + use risingwave_expr::expr::{new_binary_expr, InputRefExpression}; use risingwave_pb::expr::expr_node::Type; use crate::executor::join::nested_loop_join::NestedLoopJoinExecutor; diff --git a/src/batch/src/executor/limit.rs b/src/batch/src/executor/limit.rs index 14995fdb11232..69e685a59eb92 100644 --- a/src/batch/src/executor/limit.rs +++ b/src/batch/src/executor/limit.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -63,6 +63,9 @@ impl BoxedExecutorBuilder for LimitExecutor { impl LimitExecutor { #[try_stream(boxed, ok = DataChunk, error = RwError)] async fn do_execute(self: Box) { + if self.limit == 0 { + return Ok(()); + } // the number of rows have been skipped due to offset let mut skipped = 0; // the number of rows have been returned as execute result @@ -150,6 +153,7 @@ mod tests { use risingwave_common::array::{Array, BoolArray, DataChunk, PrimitiveArray}; use risingwave_common::catalog::{Field, Schema}; use risingwave_common::types::DataType; + use risingwave_common::util::iter_util::ZipEqDebug; use super::*; use crate::executor::test_utils::MockExecutor; @@ -166,7 +170,6 @@ mod tests { ) { let col = create_column( (0..row_num) - .into_iter() .map(|x| Some(x as i32)) .collect_vec() .as_slice(), @@ -281,7 +284,6 @@ mod tests { assert_eq!(visible.len(), row_num); let col0 = create_column( (0..row_num) - .into_iter() .map(|x| Some(x as i32)) .collect_vec() .as_slice(), @@ -340,7 +342,7 @@ mod tests { result.cardinality() ); MockLimitIter::new(row_num, limit, offset, visible) - .zip_eq(0..result.cardinality()) + .zip_eq_debug(0..result.cardinality()) .for_each(|(expect, chunk_idx)| { assert_eq!(col1.array().as_bool().value_at(chunk_idx), Some(true)); assert_eq!( diff --git a/src/batch/src/executor/merge_sort_exchange.rs b/src/batch/src/executor/merge_sort_exchange.rs index 4028bfcf9b28c..6e9b47dfa304f 100644 --- a/src/batch/src/executor/merge_sort_exchange.rs +++ b/src/batch/src/executor/merge_sort_exchange.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/executor/mod.rs b/src/batch/src/executor/mod.rs index 5adbb9321862e..130d451a087ba 100644 --- a/src/batch/src/executor/mod.rs +++ b/src/batch/src/executor/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/executor/monitor/mod.rs b/src/batch/src/executor/monitor/mod.rs index d3bea48c110a1..1b938ddae600f 100644 --- a/src/batch/src/executor/monitor/mod.rs +++ b/src/batch/src/executor/monitor/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/executor/monitor/stats.rs b/src/batch/src/executor/monitor/stats.rs index bf09a33ffb852..347a39f025631 100644 --- a/src/batch/src/executor/monitor/stats.rs +++ b/src/batch/src/executor/monitor/stats.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ use std::sync::Arc; use prometheus::core::{AtomicF64, AtomicU64, Collector, Desc, GenericCounterVec, GenericGaugeVec}; use prometheus::{ exponential_buckets, opts, proto, GaugeVec, HistogramOpts, HistogramVec, IntCounterVec, - Registry, + IntGauge, Registry, }; use crate::task::TaskId; @@ -207,3 +207,22 @@ impl BatchTaskMetricsWithTaskLabels { self.task_labels.iter().map(AsRef::as_ref).collect() } } + +#[derive(Clone)] +pub struct BatchManagerMetrics { + pub task_num: IntGauge, +} + +impl BatchManagerMetrics { + pub fn new(registry: Registry) -> Self { + let task_num = IntGauge::new("batch_task_num", "Number of batch task in memory").unwrap(); + + registry.register(Box::new(task_num.clone())).unwrap(); + Self { task_num } + } + + #[cfg(test)] + pub fn for_test() -> Self { + Self::new(Registry::new()) + } +} diff --git a/src/batch/src/executor/order_by.rs b/src/batch/src/executor/order_by.rs index 47c73c999a0d3..6688016ab1c62 100644 --- a/src/batch/src/executor/order_by.rs +++ b/src/batch/src/executor/order_by.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/executor/project.rs b/src/batch/src/executor/project.rs index 540701538b9dc..ed8f951e3bbb9 100644 --- a/src/batch/src/executor/project.rs +++ b/src/batch/src/executor/project.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/executor/project_set.rs b/src/batch/src/executor/project_set.rs index b007ac650054a..249728c1e15d6 100644 --- a/src/batch/src/executor/project_set.rs +++ b/src/batch/src/executor/project_set.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ use risingwave_common::array::{ArrayBuilder, DataChunk, I64ArrayBuilder}; use risingwave_common::catalog::{Field, Schema}; use risingwave_common::error::{Result, RwError}; use risingwave_common::types::DataType; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_expr::table_function::ProjectSetSelectItem; use risingwave_pb::batch_plan::plan_node::NodeBody; @@ -108,7 +109,7 @@ impl ProjectSetExecutor { projected_row_id_builder.append(Some(i as i64)); } - for (item, builder) in items.into_iter().zip_eq(builders.iter_mut()) { + for (item, builder) in items.into_iter().zip_eq_fast(builders.iter_mut()) { match item { Either::Left(array_ref) => { builder.append_array(&array_ref); diff --git a/src/batch/src/executor/row_seq_scan.rs b/src/batch/src/executor/row_seq_scan.rs index 00367ac5454f6..c5913072dd10a 100644 --- a/src/batch/src/executor/row_seq_scan.rs +++ b/src/batch/src/executor/row_seq_scan.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -241,7 +241,13 @@ impl BoxedExecutorBuilder for RowSeqScanExecutorBuilder { let ordered = seq_scan_node.ordered; let epoch = source.epoch.clone(); - let chunk_size = source.context.get_config().developer.batch_chunk_size; + let chunk_size = if let Some(chunk_size_) = &seq_scan_node.chunk_size { + chunk_size_ + .get_chunk_size() + .min(source.context.get_config().developer.batch_chunk_size as u32) + } else { + source.context.get_config().developer.batch_chunk_size as u32 + }; let metrics = source.context().task_metrics(); dispatch_state_store!(source.context().state_store(), state_store, { @@ -262,7 +268,7 @@ impl BoxedExecutorBuilder for RowSeqScanExecutorBuilder { scan_ranges, ordered, epoch, - chunk_size, + chunk_size as usize, source.plan_node().get_identity().clone(), metrics, ))) diff --git a/src/batch/src/executor/sort_agg.rs b/src/batch/src/executor/sort_agg.rs index 9b7802241c761..ee78047952f7f 100644 --- a/src/batch/src/executor/sort_agg.rs +++ b/src/batch/src/executor/sort_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ use itertools::Itertools; use risingwave_common::array::{ArrayBuilderImpl, ArrayRef, DataChunk}; use risingwave_common::catalog::{Field, Schema}; use risingwave_common::error::{Result, RwError}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_expr::expr::{build_from_prost, BoxedExpression}; use risingwave_expr::vector_op::agg::{ create_sorted_grouper, AggStateFactory, BoxedAggState, BoxedSortedGrouper, EqGroups, @@ -131,7 +132,7 @@ impl SortAggExecutor { let groups: Vec<_> = self .sorted_groupers .iter() - .zip_eq(&group_columns) + .zip_eq_fast(&group_columns) .map(|(grouper, array)| grouper.detect_groups(array)) .try_collect()?; @@ -222,7 +223,7 @@ impl SortAggExecutor { ) -> Result<()> { sorted_groupers .iter_mut() - .zip_eq(group_columns) + .zip_eq_fast(group_columns) .try_for_each(|(grouper, column)| grouper.update(column, start_row_idx, end_row_idx)) .map_err(Into::into) } @@ -245,7 +246,7 @@ impl SortAggExecutor { ) -> Result<()> { sorted_groupers .iter_mut() - .zip_eq(group_builders) + .zip_eq_fast(group_builders) .try_for_each(|(grouper, builder)| grouper.output(builder)) .map_err(Into::into) } @@ -256,7 +257,7 @@ impl SortAggExecutor { ) -> Result<()> { agg_states .iter_mut() - .zip_eq(agg_builders) + .zip_eq_fast(agg_builders) .try_for_each(|(state, builder)| state.output(builder)) .map_err(Into::into) } diff --git a/src/batch/src/executor/source.rs b/src/batch/src/executor/source.rs index 2b6d7f46314c2..88f8779eb5282 100644 --- a/src/batch/src/executor/source.rs +++ b/src/batch/src/executor/source.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,13 +23,14 @@ use risingwave_common::catalog::{ColumnDesc, ColumnId, Field, Schema, TableId}; use risingwave_common::error::ErrorCode::{ConnectorError, ProtocolError}; use risingwave_common::error::{Result, RwError}; use risingwave_common::types::DataType; -use risingwave_connector::parser::SourceParserImpl; -use risingwave_connector::source::{ConnectorProperties, SplitImpl, SplitMetaData}; -use risingwave_connector::{SourceColumnDesc, SourceFormat}; +use risingwave_connector::parser::SpecificParserConfig; +use risingwave_connector::source::monitor::SourceMetrics; +use risingwave_connector::source::{ + ConnectorProperties, SourceColumnDesc, SourceFormat, SourceInfo, SplitImpl, SplitMetaData, +}; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::plan_common::RowFormatType; -use risingwave_source::connector_source::{ConnectorSource, SourceContext}; -use risingwave_source::monitor::SourceMetrics; +use risingwave_source::connector_source::ConnectorSource; use super::Executor; use crate::error::BatchError; @@ -67,7 +68,7 @@ impl BoxedExecutorBuilder for SourceExecutor { let config = ConnectorProperties::extract(source_props) .map_err(|e| RwError::from(ConnectorError(e.into())))?; - let info = &source_node.get_info().unwrap(); + let info = source_node.get_info().unwrap(); let format = match info.get_row_format()? { RowFormatType::Json => SourceFormat::Json, RowFormatType::Protobuf => SourceFormat::Protobuf, @@ -75,6 +76,8 @@ impl BoxedExecutorBuilder for SourceExecutor { RowFormatType::Avro => SourceFormat::Avro, RowFormatType::Maxwell => SourceFormat::Maxwell, RowFormatType::CanalJson => SourceFormat::CanalJson, + RowFormatType::Native => SourceFormat::Native, + RowFormatType::DebeziumAvro => SourceFormat::DebeziumAvro, _ => unreachable!(), }; if format == SourceFormat::Protobuf && info.row_schema_location.is_empty() { @@ -82,19 +85,9 @@ impl BoxedExecutorBuilder for SourceExecutor { "protobuf file location not provided".to_string(), ))); } - let source_parser_rs = SourceParserImpl::create( - &format, - &source_node.properties, - info.row_schema_location.as_str(), - info.use_schema_registry, - info.proto_message_name.clone(), - ) - .await; - let parser = if let Ok(source_parser) = source_parser_rs { - source_parser - } else { - return Err(source_parser_rs.err().unwrap()); - }; + + let parser_config = + SpecificParserConfig::new(format, info, &source_node.properties).await?; let columns: Vec<_> = source_node .columns @@ -105,7 +98,7 @@ impl BoxedExecutorBuilder for SourceExecutor { let connector_source = ConnectorSource { config, columns, - parser, + parser_config, connector_message_buffer_size: source .context() .get_config() @@ -162,18 +155,16 @@ impl Executor for SourceExecutor { impl SourceExecutor { #[try_stream(ok = DataChunk, error = RwError)] async fn do_execute(self: Box) { - let reader = self + let stream = self .connector_source .stream_reader( Some(vec![self.split]), self.column_ids, self.metrics, - SourceContext::new(u32::MAX, self.source_id), + SourceInfo::new(u32::MAX, self.source_id), ) .await?; - let stream = reader.into_stream(); - #[for_await] for chunk in stream { match chunk { diff --git a/src/batch/src/executor/sys_row_seq_scan.rs b/src/batch/src/executor/sys_row_seq_scan.rs index dd5f71377eb94..4447257b26107 100644 --- a/src/batch/src/executor/sys_row_seq_scan.rs +++ b/src/batch/src/executor/sys_row_seq_scan.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/executor/table_function.rs b/src/batch/src/executor/table_function.rs index 862efe5cf77b2..82ca4521b28cc 100644 --- a/src/batch/src/executor/table_function.rs +++ b/src/batch/src/executor/table_function.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/executor/test_utils.rs b/src/batch/src/executor/test_utils.rs index 0b812a99a5328..2870e700fe6ae 100644 --- a/src/batch/src/executor/test_utils.rs +++ b/src/batch/src/executor/test_utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ use risingwave_common::error::{Result, RwError}; use risingwave_common::field_generator::FieldGeneratorImpl; use risingwave_common::row::Row; use risingwave_common::types::{DataType, Datum, ToOwnedDatum}; +use risingwave_common::util::iter_util::{ZipEqDebug, ZipEqFast}; use risingwave_expr::expr::BoxedExpression; use risingwave_pb::batch_plan::ExchangeSource as ProstExchangeSource; @@ -219,7 +220,7 @@ pub async fn diff_executor_output(actual: BoxedExecutor, expect: BoxedExecutor) expect .columns() .iter() - .zip_eq(actual.columns().iter()) + .zip_eq_fast(actual.columns().iter()) .for_each(|(c1, c2)| assert_eq!(c1.array().to_protobuf(), c2.array().to_protobuf())); is_data_chunk_eq(&expect, &actual) @@ -236,7 +237,7 @@ fn is_data_chunk_eq(left: &DataChunk, right: &DataChunk) { ); left.rows() - .zip_eq(right.rows()) + .zip_eq_debug(right.rows()) .for_each(|(row1, row2)| assert_eq!(row1, row2)); } diff --git a/src/batch/src/executor/top_n.rs b/src/batch/src/executor/top_n.rs index 286fa4314a34a..0ba9b36b53559 100644 --- a/src/batch/src/executor/top_n.rs +++ b/src/batch/src/executor/top_n.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/executor/trace.rs b/src/batch/src/executor/trace.rs index f42bbad067a57..9c9a041b820ed 100644 --- a/src/batch/src/executor/trace.rs +++ b/src/batch/src/executor/trace.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/executor/union.rs b/src/batch/src/executor/union.rs index 196cfd89f4767..e94a5b41b8e88 100644 --- a/src/batch/src/executor/union.rs +++ b/src/batch/src/executor/union.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/executor/update.rs b/src/batch/src/executor/update.rs index 8ac2c6c1eb938..f820d126fee55 100644 --- a/src/batch/src/executor/update.rs +++ b/src/batch/src/executor/update.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,10 +18,11 @@ use futures_async_stream::try_stream; use itertools::Itertools; use risingwave_common::array::column::Column; use risingwave_common::array::{ArrayBuilder, DataChunk, Op, PrimitiveArrayBuilder, StreamChunk}; -use risingwave_common::catalog::{Field, Schema, TableId}; +use risingwave_common::catalog::{Field, Schema, TableId, TableVersionId}; use risingwave_common::error::{Result, RwError}; use risingwave_common::types::DataType; use risingwave_common::util::chunk_coalesce::DataChunkBuilder; +use risingwave_common::util::iter_util::ZipEqDebug; use risingwave_expr::expr::{build_from_prost, BoxedExpression}; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_source::dml_manager::DmlManagerRef; @@ -38,6 +39,7 @@ use crate::task::BatchTaskContext; pub struct UpdateExecutor { /// Target table id. table_id: TableId, + table_version_id: TableVersionId, dml_manager: DmlManagerRef, child: BoxedExecutor, exprs: Vec, @@ -48,8 +50,10 @@ pub struct UpdateExecutor { } impl UpdateExecutor { + #[allow(clippy::too_many_arguments)] pub fn new( table_id: TableId, + table_version_id: TableVersionId, dml_manager: DmlManagerRef, child: BoxedExecutor, exprs: Vec, @@ -68,6 +72,7 @@ impl UpdateExecutor { Self { table_id, + table_version_id, dml_manager, child, exprs, @@ -108,7 +113,7 @@ impl UpdateExecutor { let mut notifiers = Vec::new(); // Transform the data chunk to a stream chunk, then write to the source. - let mut write_chunk = |chunk: DataChunk| -> Result<()> { + let write_chunk = |chunk: DataChunk| async { // TODO: if the primary key is updated, we should use plain `+,-` instead of `U+,U-`. let ops = [Op::UpdateDelete, Op::UpdateInsert] .into_iter() @@ -117,10 +122,9 @@ impl UpdateExecutor { .collect_vec(); let stream_chunk = StreamChunk::from_parts(ops, chunk); - let notifier = self.dml_manager.write_chunk(&self.table_id, stream_chunk)?; - notifiers.push(notifier); - - Ok(()) + self.dml_manager + .write_chunk(self.table_id, self.table_version_id, stream_chunk) + .await }; #[for_await] @@ -141,18 +145,20 @@ impl UpdateExecutor { yield updated_data_chunk.clone(); } - for (row_delete, row_insert) in data_chunk.rows().zip_eq(updated_data_chunk.rows()) { + for (row_delete, row_insert) in + data_chunk.rows().zip_eq_debug(updated_data_chunk.rows()) + { let None = builder.append_one_row(row_delete) else { unreachable!("no chunk should be yielded when appending the deleted row as the chunk size is always even"); }; if let Some(chunk) = builder.append_one_row(row_insert) { - write_chunk(chunk)?; + notifiers.push(write_chunk(chunk).await?); } } } if let Some(chunk) = builder.consume_all() { - write_chunk(chunk)?; + notifiers.push(write_chunk(chunk).await?); } // Wait for all chunks to be taken / written. @@ -199,6 +205,7 @@ impl BoxedExecutorBuilder for UpdateExecutor { Ok(Box::new(Self::new( table_id, + update_node.table_version_id, source.context().dml_manager(), child, exprs, @@ -215,7 +222,9 @@ mod tests { use futures::StreamExt; use risingwave_common::array::Array; - use risingwave_common::catalog::{schema_test_utils, ColumnDesc, ColumnId}; + use risingwave_common::catalog::{ + schema_test_utils, ColumnDesc, ColumnId, INITIAL_TABLE_VERSION_ID, + }; use risingwave_common::test_prelude::DataChunkTestExt; use risingwave_expr::expr::InputRefExpression; use risingwave_source::dml_manager::DmlManager; @@ -260,16 +269,17 @@ mod tests { .enumerate() .map(|(i, field)| ColumnDesc::unnamed(ColumnId::new(i as _), field.data_type.clone())) .collect_vec(); - // We must create a variable to hold this `Arc` here, or it will be dropped due - // to the `Weak` reference in `DmlManager`. + // We must create a variable to hold this `Arc` here, or it will be dropped + // due to the `Weak` reference in `DmlManager`. let reader = dml_manager - .register_reader(table_id, &column_descs) + .register_reader(table_id, INITIAL_TABLE_VERSION_ID, &column_descs) .unwrap(); - let mut reader = reader.stream_reader_v2().into_stream_v2(); + let mut reader = reader.stream_reader().into_stream(); // Update let update_executor = Box::new(UpdateExecutor::new( table_id, + INITIAL_TABLE_VERSION_ID, dml_manager, Box::new(mock_executor), exprs, @@ -303,12 +313,12 @@ mod tests { let chunk = reader.next().await.unwrap()?; assert_eq!( - chunk.ops().chunks(2).collect_vec(), + chunk.chunk.ops().chunks(2).collect_vec(), vec![&[Op::UpdateDelete, Op::UpdateInsert]; updated_rows.clone().count()] ); assert_eq!( - chunk.columns()[0] + chunk.chunk.columns()[0] .array() .as_int32() .iter() @@ -321,7 +331,7 @@ mod tests { ); assert_eq!( - chunk.columns()[1] + chunk.chunk.columns()[1] .array() .as_int32() .iter() diff --git a/src/batch/src/executor/utils.rs b/src/batch/src/executor/utils.rs index 2cad0f2c13012..90f6f190ffd39 100644 --- a/src/batch/src/executor/utils.rs +++ b/src/batch/src/executor/utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/executor/values.rs b/src/batch/src/executor/values.rs index 3547e0efc0d4e..c6cc8bf83c611 100644 --- a/src/batch/src/executor/values.rs +++ b/src/batch/src/executor/values.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ use itertools::Itertools; use risingwave_common::array::DataChunk; use risingwave_common::catalog::{Field, Schema}; use risingwave_common::error::{Result, RwError}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_expr::expr::{build_from_prost, BoxedExpression}; use risingwave_pb::batch_plan::plan_node::NodeBody; @@ -81,7 +82,7 @@ impl ValuesExecutor { let chunk_size = self.chunk_size.min(self.rows.len()); let mut array_builders = self.schema.create_array_builders(chunk_size); for row in self.rows.by_ref().take(chunk_size) { - for (expr, builder) in row.into_iter().zip_eq(&mut array_builders) { + for (expr, builder) in row.into_iter().zip_eq_fast(&mut array_builders) { let out = expr.eval(&one_row_chunk)?; builder.append_array(&out); } diff --git a/src/batch/src/lib.rs b/src/batch/src/lib.rs index 89722fd54fc08..46ba2a6620a47 100644 --- a/src/batch/src/lib.rs +++ b/src/batch/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/rpc/mod.rs b/src/batch/src/rpc/mod.rs index e69b19cd2dbff..d74c1be710567 100644 --- a/src/batch/src/rpc/mod.rs +++ b/src/batch/src/rpc/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/rpc/service/exchange.rs b/src/batch/src/rpc/service/exchange.rs index b1c6bedf242ff..1642304061e5e 100644 --- a/src/batch/src/rpc/service/exchange.rs +++ b/src/batch/src/rpc/service/exchange.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/rpc/service/mod.rs b/src/batch/src/rpc/service/mod.rs index 4a4db4851e1c5..7dd9fa86bb80a 100644 --- a/src/batch/src/rpc/service/mod.rs +++ b/src/batch/src/rpc/service/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/rpc/service/task_service.rs b/src/batch/src/rpc/service/task_service.rs index 5ab487319d800..d9e82821623e1 100644 --- a/src/batch/src/rpc/service/task_service.rs +++ b/src/batch/src/rpc/service/task_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,7 +26,8 @@ use tonic::{Request, Response, Status}; use crate::rpc::service::exchange::GrpcExchangeWriter; use crate::task::{ - self, BatchEnvironment, BatchManager, BatchTaskExecution, ComputeNodeContext, TaskId, + BatchEnvironment, BatchManager, BatchTaskExecution, ComputeNodeContext, StateReporter, TaskId, + TASK_STATUS_BUFFER_SIZE, }; const LOCAL_EXECUTE_BUFFER_SIZE: usize = 64; @@ -43,6 +44,7 @@ impl BatchServiceImpl { } } pub(crate) type TaskInfoResponseResult = std::result::Result; +pub(crate) type GetDataResponseResult = std::result::Result; #[async_trait::async_trait] impl TaskService for BatchServiceImpl { type CreateTaskStream = ReceiverStream; @@ -59,6 +61,8 @@ impl TaskService for BatchServiceImpl { epoch, } = request.into_inner(); + let (state_tx, state_rx) = tokio::sync::mpsc::channel(TASK_STATUS_BUFFER_SIZE); + let state_reporter = StateReporter::new_with_dist_sender(state_tx); let res = self .mgr .fire_task( @@ -69,6 +73,7 @@ impl TaskService for BatchServiceImpl { self.env.clone(), TaskId::from(task_id.as_ref().expect("no task id found")), ), + state_reporter, ) .await; match res { @@ -78,8 +83,7 @@ impl TaskService for BatchServiceImpl { // Will be used for receive task status update. // Note: we introduce this hack cuz `.execute()` do not produce a status stream, // but still share `.async_execute()` and `.try_execute()`. - self.mgr - .get_task_receiver(&task::TaskId::from(&task_id.unwrap())), + state_rx, ))), Err(e) => { error!("failed to fire task {}", e); @@ -94,6 +98,7 @@ impl TaskService for BatchServiceImpl { req: Request, ) -> Result, Status> { let req = req.into_inner(); + tracing::trace!("Aborting task: {:?}", req.get_task_id().unwrap()); self.mgr .abort_task(req.get_task_id().expect("no task id found")); Ok(Response::new(AbortTaskResponse { status: None })) @@ -120,8 +125,9 @@ impl TaskService for BatchServiceImpl { ); let task = BatchTaskExecution::new(&task_id, plan, context, epoch, self.mgr.runtime())?; let task = Arc::new(task); - - if let Err(e) = task.clone().async_execute().await { + let (tx, rx) = tokio::sync::mpsc::channel(LOCAL_EXECUTE_BUFFER_SIZE); + let state_reporter = StateReporter::new_with_local_sender(tx.clone()); + if let Err(e) = task.clone().async_execute(state_reporter).await { error!( "failed to build executors and trigger execution of Task {:?}: {}", task_id, e @@ -142,20 +148,14 @@ impl TaskService for BatchServiceImpl { ); e })?; - let (tx, rx) = tokio::sync::mpsc::channel(LOCAL_EXECUTE_BUFFER_SIZE); - let mut writer = GrpcExchangeWriter::new(tx.clone()); - let finish = output - .take_data_with_num(&mut writer, tx.capacity()) - .await?; - if !finish { - self.mgr.runtime().spawn(async move { - match output.take_data(&mut writer).await { - Ok(_) => Ok(()), - Err(e) => tx.send(Err(e.into())).await, - } - }); - } + // Always spawn a task and do not block current function. + self.mgr.runtime().spawn(async move { + match output.take_data(&mut writer).await { + Ok(_) => Ok(()), + Err(e) => tx.send(Err(e.into())).await, + } + }); Ok(Response::new(ReceiverStream::new(rx))) } } diff --git a/src/batch/src/task/broadcast_channel.rs b/src/batch/src/task/broadcast_channel.rs index 5a3684915b06a..b8307c1bc33a9 100644 --- a/src/batch/src/task/broadcast_channel.rs +++ b/src/batch/src/task/broadcast_channel.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ use crate::task::channel::{ChanReceiver, ChanReceiverImpl, ChanSender, ChanSende use crate::task::data_chunk_in_channel::DataChunkInChannel; /// `BroadcastSender` sends the same chunk to a number of `BroadcastReceiver`s. +#[derive(Clone)] pub struct BroadcastSender { senders: Vec>>, broadcast_info: BroadcastInfo, diff --git a/src/batch/src/task/channel.rs b/src/batch/src/task/channel.rs index 941b1975acc1f..06332f2067a6e 100644 --- a/src/batch/src/task/channel.rs +++ b/src/batch/src/task/channel.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ pub(super) trait ChanSender: Send { fn send(&mut self, chunk: Option) -> Self::SendFuture<'_>; } -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum ChanSenderImpl { HashShuffle(HashShuffleSender), ConsistentHashShuffle(ConsistentHashShuffleSender), diff --git a/src/batch/src/task/consistent_hash_shuffle_channel.rs b/src/batch/src/task/consistent_hash_shuffle_channel.rs index 7bcde8b808a84..5ac15257cb074 100644 --- a/src/batch/src/task/consistent_hash_shuffle_channel.rs +++ b/src/batch/src/task/consistent_hash_shuffle_channel.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -32,6 +32,7 @@ use crate::error::Result as BatchResult; use crate::task::channel::{ChanReceiver, ChanReceiverImpl, ChanSender, ChanSenderImpl}; use crate::task::data_chunk_in_channel::DataChunkInChannel; +#[derive(Clone)] pub struct ConsistentHashShuffleSender { senders: Vec>>, consistent_hash_info: ConsistentHashInfo, diff --git a/src/batch/src/task/context.rs b/src/batch/src/task/context.rs index 3849529af0c3a..3449004fa1978 100644 --- a/src/batch/src/task/context.rs +++ b/src/batch/src/task/context.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,9 +19,9 @@ use risingwave_common::catalog::SysCatalogReaderRef; use risingwave_common::config::BatchConfig; use risingwave_common::error::Result; use risingwave_common::util::addr::{is_local_address, HostAddr}; +use risingwave_connector::source::monitor::SourceMetrics; use risingwave_rpc_client::ComputeClientPoolRef; use risingwave_source::dml_manager::DmlManagerRef; -use risingwave_source::monitor::SourceMetrics; use risingwave_storage::StateStoreImpl; use super::TaskId; @@ -62,7 +62,7 @@ pub trait BatchTaskContext: Clone + Send + Sync + 'static { fn store_mem_usage(&self, val: usize); - fn get_mem_usage(&self) -> usize; + fn mem_usage(&self) -> usize; } /// Batch task context on compute node. @@ -130,7 +130,7 @@ impl BatchTaskContext for ComputeNodeContext { self.cur_mem_val.store(val, Ordering::Relaxed); } - fn get_mem_usage(&self) -> usize { + fn mem_usage(&self) -> usize { self.cur_mem_val.load(Ordering::Relaxed) } } diff --git a/src/batch/src/task/data_chunk_in_channel.rs b/src/batch/src/task/data_chunk_in_channel.rs index 290f59d6f6ebe..984c451e8119a 100644 --- a/src/batch/src/task/data_chunk_in_channel.rs +++ b/src/batch/src/task/data_chunk_in_channel.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/task/env.rs b/src/batch/src/task/env.rs index db02467500915..2a4c8f916578b 100644 --- a/src/batch/src/task/env.rs +++ b/src/batch/src/task/env.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,9 +16,9 @@ use std::sync::Arc; use risingwave_common::config::BatchConfig; use risingwave_common::util::addr::HostAddr; +use risingwave_connector::source::monitor::SourceMetrics; use risingwave_rpc_client::ComputeClientPoolRef; use risingwave_source::dml_manager::DmlManagerRef; -use risingwave_source::monitor::SourceMetrics; use risingwave_storage::StateStoreImpl; use crate::executor::BatchTaskMetrics; @@ -91,8 +91,13 @@ impl BatchEnvironment { use risingwave_source::dml_manager::DmlManager; use risingwave_storage::monitor::MonitoredStorageMetrics; + use crate::executor::monitor::BatchManagerMetrics; + BatchEnvironment { - task_manager: Arc::new(BatchManager::new(BatchConfig::default())), + task_manager: Arc::new(BatchManager::new( + BatchConfig::default(), + BatchManagerMetrics::for_test(), + )), server_addr: "127.0.0.1:5688".parse().unwrap(), config: Arc::new(BatchConfig::default()), worker_id: WorkerNodeId::default(), diff --git a/src/batch/src/task/fifo_channel.rs b/src/batch/src/task/fifo_channel.rs index a1a5ffe218804..3d6ae1fe8a6da 100644 --- a/src/batch/src/task/fifo_channel.rs +++ b/src/batch/src/task/fifo_channel.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ use crate::error::BatchError::SenderError; use crate::error::Result as BatchResult; use crate::task::channel::{ChanReceiver, ChanReceiverImpl, ChanSender, ChanSenderImpl}; use crate::task::data_chunk_in_channel::DataChunkInChannel; +#[derive(Clone)] pub struct FifoSender { sender: mpsc::Sender>, } diff --git a/src/batch/src/task/hash_shuffle_channel.rs b/src/batch/src/task/hash_shuffle_channel.rs index 0b0909a5ef587..cb4edbfa3f618 100644 --- a/src/batch/src/task/hash_shuffle_channel.rs +++ b/src/batch/src/task/hash_shuffle_channel.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ use crate::error::BatchError::SenderError; use crate::error::Result as BatchResult; use crate::task::channel::{ChanReceiver, ChanReceiverImpl, ChanSender, ChanSenderImpl}; use crate::task::data_chunk_in_channel::DataChunkInChannel; - +#[derive(Clone)] pub struct HashShuffleSender { senders: Vec>>, hash_info: HashInfo, diff --git a/src/batch/src/task/mod.rs b/src/batch/src/task/mod.rs index 713e193971e25..3d6b7c6eaba2f 100644 --- a/src/batch/src/task/mod.rs +++ b/src/batch/src/task/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/batch/src/task/task_execution.rs b/src/batch/src/task/task_execution.rs index fc29b90c1af49..61e1ea2a2a51e 100644 --- a/src/batch/src/task/task_execution.rs +++ b/src/batch/src/task/task_execution.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -39,12 +39,12 @@ use crate::error::BatchError::SenderError; use crate::error::{BatchError, Result as BatchResult}; use crate::executor::{BoxedExecutor, ExecutorBuilder}; use crate::rpc::service::exchange::ExchangeWriter; -use crate::rpc::service::task_service::TaskInfoResponseResult; +use crate::rpc::service::task_service::{GetDataResponseResult, TaskInfoResponseResult}; use crate::task::channel::{create_output_channel, ChanReceiverImpl, ChanSenderImpl}; use crate::task::BatchTaskContext; // Now we will only at most have 2 status for each status channel. Running -> Failed or Finished. -const TASK_STATUS_BUFFER_SIZE: usize = 2; +pub const TASK_STATUS_BUFFER_SIZE: usize = 2; /// A special version for batch allocation stat, passed in another task `context` C to report task /// mem usage 0 bytes at the end. @@ -76,8 +76,8 @@ where biased; _ = monitor => unreachable!(), output = future => { - // Report mem usage as 0 after ends immediately. - context.store_mem_usage(0); + // Report bytes allocated when actor ends. Note we should not report 0, cuz actor may allocate memory in block cache and may not be dealloc. + BYTES_ALLOCATED.with(|bytes| context.store_mem_usage(bytes.val())); output }, }; @@ -86,6 +86,48 @@ where .await } +/// Send batch task status (local/distributed) to frontend. +/// +/// +/// Local mode use `StateReporter::Local`, Distributed mode use `StateReporter::Distributed` to send +/// status (Failed/Finished) update. `StateReporter::Mock` is only used in test and do not takes any +/// effect. Local sender only report Failed update, Distributed sender will also report +/// Finished/Pending/Starting/Aborted etc. +pub enum StateReporter { + Local(tokio::sync::mpsc::Sender), + Distributed(tokio::sync::mpsc::Sender), + Mock(), +} + +impl StateReporter { + pub async fn send(&mut self, val: TaskInfoResponseResult) -> BatchResult<()> { + match self { + Self::Local(s) => { + if let Err(e) = val { + s.send(Err(e)).await.map_err(|_| SenderError) + } else { + // do nothing and just return. + Ok(()) + } + } + Self::Distributed(s) => s.send(val).await.map_err(|_| SenderError), + Self::Mock() => Ok(()), + } + } + + pub fn new_with_local_sender(s: tokio::sync::mpsc::Sender) -> Self { + Self::Local(s) + } + + pub fn new_with_dist_sender(s: tokio::sync::mpsc::Sender) -> Self { + Self::Distributed(s) + } + + pub fn new_with_test() -> Self { + Self::Mock() + } +} + #[derive(PartialEq, Eq, Hash, Clone, Debug, Default)] pub struct TaskId { pub task_id: u32, @@ -247,6 +289,9 @@ pub struct BatchTaskExecution { /// Receivers data of the task. receivers: Mutex>>, + /// Sender for sending chunks between different executors. + sender: ChanSenderImpl, + /// Context for task execution context: C, @@ -275,17 +320,27 @@ impl BatchTaskExecution { runtime: &'static Runtime, ) -> Result { let task_id = TaskId::from(prost_tid); + + let (sender, receivers) = create_output_channel( + plan.get_exchange_info()?, + context.get_config().developer.batch_output_channel_size, + )?; + + let mut rts = Vec::new(); + rts.extend(receivers.into_iter().map(Some)); + Ok(Self { task_id, plan, state: Mutex::new(TaskStatus::Pending), - receivers: Mutex::new(Vec::new()), + receivers: Mutex::new(rts), failure: Arc::new(Mutex::new(None)), epoch, shutdown_tx: Mutex::new(None), state_rx: Mutex::new(None), context, runtime, + sender, }) } @@ -299,7 +354,8 @@ impl BatchTaskExecution { /// hash partitioned across multiple channels. /// To obtain the result, one must pick one of the channels to consume via [`TaskOutputId`]. As /// such, parallel consumers are able to consume the result independently. - pub async fn async_execute(self: Arc) -> Result<()> { + pub async fn async_execute(self: Arc, state_tx: StateReporter) -> Result<()> { + let mut state_tx = state_tx; trace!( "Prepare executing plan [{:?}]: {}", self.task_id, @@ -316,26 +372,15 @@ impl BatchTaskExecution { .await?; // Init shutdown channel and data receivers. - let (sender, receivers) = create_output_channel( - self.plan.get_exchange_info()?, - self.context - .get_config() - .developer - .batch_output_channel_size, - )?; + let sender = self.sender.clone(); let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::(); *self.shutdown_tx.lock() = Some(shutdown_tx); - self.receivers - .lock() - .extend(receivers.into_iter().map(Some)); let failure = self.failure.clone(); let task_id = self.task_id.clone(); // After we init the output receivers, it's must safe to schedule next stage -- able to send // TaskStatus::Running here. - let (mut state_tx, state_rx) = tokio::sync::mpsc::channel(TASK_STATUS_BUFFER_SIZE); // Init the state receivers. Swap out later. - *self.state_rx.lock() = Some(state_rx); self.change_state_notify(TaskStatus::Running, &mut state_tx, None) .await?; @@ -373,6 +418,11 @@ impl BatchTaskExecution { // It's possible to send fail. Same reason in `.try_execute`. warn!("send task execution error message fail!"); } + + // There will be no more chunks, so send None. + if let Err(_e) = sender.send(None).await { + warn!("failed to send None to annotate end"); + } } }; @@ -437,15 +487,12 @@ impl BatchTaskExecution { pub async fn change_state_notify( &self, task_status: TaskStatus, - state_tx: &mut tokio::sync::mpsc::Sender, + state_tx: &mut StateReporter, err_str: Option, ) -> BatchResult<()> { self.change_state(task_status); if let Some(err_str) = err_str { - state_tx - .send(Err(Status::internal(err_str))) - .await - .map_err(|_| SenderError) + state_tx.send(Err(Status::internal(err_str))).await } else { // Notify frontend the task status. state_tx @@ -458,7 +505,6 @@ impl BatchTaskExecution { ..Default::default() })) .await - .map_err(|_| SenderError) } } @@ -471,7 +517,7 @@ impl BatchTaskExecution { root: BoxedExecutor, sender: &mut ChanSenderImpl, mut shutdown_rx: Receiver, - state_tx: &mut tokio::sync::mpsc::Sender, + state_tx: &mut StateReporter, ) -> Result<()> { let mut data_chunk_stream = root.execute(); let mut state = TaskStatus::Unspecified; @@ -537,7 +583,7 @@ impl BatchTaskExecution { // No need to set state to be Aborted here cuz it will be set by shutdown receiver. // Stop task execution. if sender.send(0).is_err() { - warn!("The task has already died before this request, so the abort did no-op") + debug!("The task has already died before this request, so the abort did no-op") } else { info!("Abort task {:?} done", self.task_id); } @@ -593,8 +639,8 @@ impl BatchTaskExecution { .expect("The state receivers must have been inited!") } - pub fn get_mem_usage(&self) -> usize { - self.context.get_mem_usage() + pub fn mem_usage(&self) -> usize { + self.context.mem_usage() } /// Check the task status: whether has ended. diff --git a/src/batch/src/task/task_manager.rs b/src/batch/src/task/task_manager.rs index d4cb284e63427..c19e7a549179c 100644 --- a/src/batch/src/task/task_manager.rs +++ b/src/batch/src/task/task_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -30,9 +30,12 @@ use tokio::runtime::Runtime; use tokio::sync::mpsc::Sender; use tonic::Status; +use crate::executor::BatchManagerMetrics; use crate::rpc::service::exchange::GrpcExchangeWriter; use crate::rpc::service::task_service::TaskInfoResponseResult; -use crate::task::{BatchTaskExecution, ComputeNodeContext, TaskId, TaskOutput, TaskOutputId}; +use crate::task::{ + BatchTaskExecution, ComputeNodeContext, StateReporter, TaskId, TaskOutput, TaskOutputId, +}; /// `BatchManager` is responsible for managing all batch tasks. #[derive(Clone)] @@ -50,10 +53,13 @@ pub struct BatchManager { /// When each task context report their own usage, it will apply the diff into this total mem /// value for all tasks. total_mem_val: Arc>, + + /// Metrics for batch manager. + metrics: BatchManagerMetrics, } impl BatchManager { - pub fn new(config: BatchConfig) -> Self { + pub fn new(config: BatchConfig, metrics: BatchManagerMetrics) -> Self { let runtime = { let mut builder = tokio::runtime::Builder::new_multi_thread(); if let Some(worker_threads_num) = config.worker_threads_num { @@ -73,6 +79,7 @@ impl BatchManager { runtime: Box::leak(Box::new(runtime)), config, total_mem_val: TrAdder::new().into(), + metrics, } } @@ -82,6 +89,7 @@ impl BatchManager { plan: PlanFragment, epoch: BatchQueryEpoch, context: ComputeNodeContext, + state_reporter: StateReporter, ) -> Result<()> { trace!("Received task id: {:?}, plan: {:?}", tid, plan); let task = BatchTaskExecution::new(tid, plan, context, epoch, self.runtime)?; @@ -92,6 +100,7 @@ impl BatchManager { // it's possible do not found parent task id in theory. let ret = if let hash_map::Entry::Vacant(e) = self.tasks.lock().entry(task_id.clone()) { e.insert(task.clone()); + self.metrics.task_num.inc(); Ok(()) } else { Err(ErrorCode::InternalError(format!( @@ -100,7 +109,7 @@ impl BatchManager { )) .into()) }; - task.clone().async_execute().await?; + task.clone().async_execute(state_reporter).await?; ret } @@ -141,25 +150,18 @@ impl BatchManager { pub fn abort_task(&self, sid: &ProstTaskId) { let sid = TaskId::from(sid); - match self.tasks.lock().get(&sid) { - Some(task) => task.abort_task(), + match self.tasks.lock().remove(&sid) { + Some(task) => { + tracing::trace!("Removed task: {:?}", task.get_task_id()); + task.abort_task(); + self.metrics.task_num.dec() + } None => { warn!("Task id not found for abort task") } }; } - pub fn remove_task( - &self, - sid: &ProstTaskId, - ) -> Result>>> { - let task_id = TaskId::from(sid); - match self.tasks.lock().remove(&task_id) { - Some(t) => Ok(Some(t)), - None => Err(TaskNotFound.into()), - } - } - /// Returns error if task is not running. pub fn check_if_task_running(&self, task_id: &TaskId) -> Result<()> { match self.tasks.lock().get(task_id) { @@ -223,7 +225,7 @@ impl BatchManager { } // Alternatively, we can use a bool flag to indicate end of execution. // Now we use only store 0 bytes in Context after execution ends. - let mem_usage = t.get_mem_usage(); + let mem_usage = t.mem_usage(); if mem_usage > max_mem { max_mem = mem_usage; max_mem_task_id = Some(t_id.clone()); @@ -231,13 +233,15 @@ impl BatchManager { } if let Some(id) = max_mem_task_id { let t = guard.get(&id).unwrap(); + // FIXME: `Abort` will not report error but truncated results to user. We should + // consider throw error. t.abort_task(); } } /// Called by global memory manager for total usage of batch tasks. This op is designed to be /// light-weight - pub fn get_all_memory_usage(&self) -> usize { + pub fn total_mem_usage(&self) -> usize { self.total_mem_val.get() as usize } @@ -248,17 +252,11 @@ impl BatchManager { } } -impl Default for BatchManager { - fn default() -> Self { - BatchManager::new(BatchConfig::default()) - } -} - #[cfg(test)] mod tests { use risingwave_common::config::BatchConfig; use risingwave_common::types::DataType; - use risingwave_expr::expr::make_i32_literal; + use risingwave_expr::expr::test_utils::make_i32_literal; use risingwave_hummock_sdk::to_committed_batch_query_epoch; use risingwave_pb::batch_plan::exchange_info::DistributionMode; use risingwave_pb::batch_plan::plan_node::NodeBody; @@ -270,12 +268,13 @@ mod tests { use risingwave_pb::expr::TableFunction; use tonic::Code; - use crate::task::{BatchManager, ComputeNodeContext, TaskId}; + use crate::executor::BatchManagerMetrics; + use crate::task::{BatchManager, ComputeNodeContext, StateReporter, TaskId}; #[test] fn test_task_not_found() { use tonic::Status; - let manager = BatchManager::new(BatchConfig::default()); + let manager = BatchManager::new(BatchConfig::default(), BatchManagerMetrics::for_test()); let task_id = TaskId { task_id: 0, stage_id: 0, @@ -303,7 +302,7 @@ mod tests { #[tokio::test] async fn test_task_id_conflict() { - let manager = BatchManager::new(BatchConfig::default()); + let manager = BatchManager::new(BatchConfig::default(), BatchManagerMetrics::for_test()); let plan = PlanFragment { root: Some(PlanNode { children: vec![], @@ -330,11 +329,18 @@ mod tests { plan.clone(), to_committed_batch_query_epoch(0), context.clone(), + StateReporter::new_with_test(), ) .await .unwrap(); let err = manager - .fire_task(&task_id, plan, to_committed_batch_query_epoch(0), context) + .fire_task( + &task_id, + plan, + to_committed_batch_query_epoch(0), + context, + StateReporter::new_with_test(), + ) .await .unwrap_err(); assert!(err @@ -344,7 +350,7 @@ mod tests { #[tokio::test] async fn test_task_aborted() { - let manager = BatchManager::new(BatchConfig::default()); + let manager = BatchManager::new(BatchConfig::default(), BatchManagerMetrics::for_test()); let plan = PlanFragment { root: Some(PlanNode { children: vec![], @@ -380,12 +386,12 @@ mod tests { plan.clone(), to_committed_batch_query_epoch(0), context.clone(), + StateReporter::new_with_test(), ) .await .unwrap(); manager.abort_task(&task_id); let task_id = TaskId::from(&task_id); - let res = manager.wait_until_task_aborted(&task_id).await; - assert_eq!(res, Ok(())); + assert!(!manager.tasks.lock().contains_key(&task_id)); } } diff --git a/src/bench/Cargo.toml b/src/bench/Cargo.toml index db35545149df3..4183f44423152 100644 --- a/src/bench/Cargo.toml +++ b/src/bench/Cargo.toml @@ -21,10 +21,6 @@ hdrhistogram = "7" isahc = { version = "1", default-features = false } itertools = "0.10" libc = "0.2" -minitrace = "0.4" -minitrace-jaeger = "0.4" -moka = { version = "0.9", features = ["future"] } -nix = { version = "0.25", features = ["fs", "mman"] } opentelemetry = { version = "0.17", optional = true, features = ["rt-tokio"] } opentelemetry-jaeger = { version = "0.16", optional = true, features = [ "rt-tokio", @@ -36,10 +32,6 @@ parking_lot = "0.12" prometheus = { version = "0.13", features = ["process"] } rand = "0.8" risingwave_common = { path = "../common" } -risingwave_hummock_sdk = { path = "../storage/hummock_sdk" } -risingwave_meta = { path = "../meta", features = ["test"] } -risingwave_pb = { path = "../prost" } -risingwave_rpc_client = { path = "../rpc_client" } risingwave_storage = { path = "../storage" } serde = { version = "1", features = ["derive"] } tokio = { version = "0.2", package = "madsim-tokio", features = [ @@ -60,6 +52,9 @@ tracing-subscriber = "0.3.16" [target.'cfg(not(madsim))'.dependencies] workspace-hack = { path = "../workspace-hack" } +[target.'cfg(target_os = "linux")'.dependencies] +nix = { version = "0.25", features = ["fs", "mman"] } + [[bin]] name = "file-cache-bench" path = "file_cache_bench/main.rs" diff --git a/src/bench/file_cache_bench/analyze.rs b/src/bench/file_cache_bench/analyze.rs index a44b97a59ae11..77e1c9f6adb9b 100644 --- a/src/bench/file_cache_bench/analyze.rs +++ b/src/bench/file_cache_bench/analyze.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/bench/file_cache_bench/bench.rs b/src/bench/file_cache_bench/bench.rs index abd9656340a8c..d99955f10b174 100644 --- a/src/bench/file_cache_bench/bench.rs +++ b/src/bench/file_cache_bench/bench.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/bench/file_cache_bench/bpf.rs b/src/bench/file_cache_bench/bpf.rs index 1761430788a63..1fbb9d440f298 100644 --- a/src/bench/file_cache_bench/bpf.rs +++ b/src/bench/file_cache_bench/bpf.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/bench/file_cache_bench/main.rs b/src/bench/file_cache_bench/main.rs index db4d52b29f83a..286298784770d 100644 --- a/src/bench/file_cache_bench/main.rs +++ b/src/bench/file_cache_bench/main.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/bench/file_cache_bench/rate.rs b/src/bench/file_cache_bench/rate.rs index e8408b9430a59..a2d0853ab7695 100644 --- a/src/bench/file_cache_bench/rate.rs +++ b/src/bench/file_cache_bench/rate.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/bench/file_cache_bench/utils.rs b/src/bench/file_cache_bench/utils.rs index 7ac22402e208b..511f8946869ee 100644 --- a/src/bench/file_cache_bench/utils.rs +++ b/src/bench/file_cache_bench/utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/bench/s3_bench/main.rs b/src/bench/s3_bench/main.rs index d06df40830566..93a3e241ad8c6 100644 --- a/src/bench/s3_bench/main.rs +++ b/src/bench/s3_bench/main.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -225,7 +225,6 @@ async fn multi_part_upload( }; let chunks = obj .chunks(part_size.as_u64() as usize) - .into_iter() .map(|chunk| chunk.to_owned()) .collect_vec(); diff --git a/src/cmd/Cargo.toml b/src/cmd/Cargo.toml index e7eff00e37130..71f4725eec957 100644 --- a/src/cmd/Cargo.toml +++ b/src/cmd/Cargo.toml @@ -11,12 +11,16 @@ repository = { workspace = true } static-link = ["workspace-config/enable-static-link"] static-log-level = ["workspace-config/enable-static-log-level"] +[package.metadata.cargo-machete] +ignored = ["workspace-hack", "workspace-config", "task_stats_alloc"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack", "workspace-config", "task_stats_alloc"] + [dependencies] anyhow = "1" clap = { version = "3", features = ["derive"] } -log = { version = "0.4" } risingwave_common = { path = "../common" } -risingwave_compaction_test = { path = "../tests/compaction_test" } risingwave_compactor = { path = "../storage/compactor" } risingwave_compute = { path = "../compute" } risingwave_ctl = { path = "../ctl" } @@ -34,7 +38,6 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [ "fs", "tracing", ] } -tracing = { version = "0.1" } workspace-config = { path = "../utils/workspace-config", optional = true } workspace-hack = { path = "../workspace-hack" } @@ -61,4 +64,3 @@ path = "src/bin/compactor.rs" [[bin]] name = "risectl" path = "src/bin/ctl.rs" - diff --git a/src/cmd/src/bin/compactor.rs b/src/cmd/src/bin/compactor.rs index 0d4eb83749042..c684b6420d555 100644 --- a/src/cmd/src/bin/compactor.rs +++ b/src/cmd/src/bin/compactor.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ fn main() { let opts = risingwave_compactor::CompactorOpts::parse(); - risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new_default()); + risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new()); risingwave_rt::main_okk(risingwave_compactor::start(opts)) } diff --git a/src/cmd/src/bin/compute_node.rs b/src/cmd/src/bin/compute_node.rs index 49309d563e21d..eb31a290a274c 100644 --- a/src/cmd/src/bin/compute_node.rs +++ b/src/cmd/src/bin/compute_node.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,7 +24,9 @@ fn main() { let opts = risingwave_compute::ComputeNodeOpts::parse(); - risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new(false)); + risingwave_rt::init_risingwave_logger( + risingwave_rt::LoggerSettings::new().enable_tokio_console(false), + ); risingwave_rt::main_okk(risingwave_compute::start(opts)) } diff --git a/src/cmd/src/bin/ctl.rs b/src/cmd/src/bin/ctl.rs index 2ed47be73bca0..a97fa1fa2b0cf 100644 --- a/src/cmd/src/bin/ctl.rs +++ b/src/cmd/src/bin/ctl.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ fn main() -> Result<()> { let opts = risingwave_ctl::CliOpts::parse(); - risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new_default()); + risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new()); // Note: Use a simple current thread runtime for ctl. // When there's a heavy workload, multiple thread runtime seems to respond slowly. May need diff --git a/src/cmd/src/bin/frontend_node.rs b/src/cmd/src/bin/frontend_node.rs index d130c0a7f4b57..de02b05e92a8b 100644 --- a/src/cmd/src/bin/frontend_node.rs +++ b/src/cmd/src/bin/frontend_node.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ fn main() { let opts = risingwave_frontend::FrontendOpts::parse(); - risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new_default()); + risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new()); risingwave_rt::main_okk(risingwave_frontend::start(opts)) } diff --git a/src/cmd/src/bin/meta_node.rs b/src/cmd/src/bin/meta_node.rs index 8c71639bfe31e..a4e3f19795c02 100644 --- a/src/cmd/src/bin/meta_node.rs +++ b/src/cmd/src/bin/meta_node.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ fn main() { let opts = risingwave_meta::MetaNodeOpts::parse(); - risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new_default()); + risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new()); risingwave_rt::main_okk(risingwave_meta::start(opts)) } diff --git a/src/cmd_all/Cargo.toml b/src/cmd_all/Cargo.toml index 713f40424fde2..8d66f7f6425f1 100644 --- a/src/cmd_all/Cargo.toml +++ b/src/cmd_all/Cargo.toml @@ -11,12 +11,16 @@ repository = { workspace = true } static-link = ["workspace-config/enable-static-link"] static-log-level = ["workspace-config/enable-static-log-level"] +[package.metadata.cargo-machete] +ignored = ["workspace-hack", "workspace-config", "task_stats_alloc"] + +[package.metadata.cargo-udeps.ignore] +ignored = ["workspace-hack", "workspace-config", "task_stats_alloc"] + [dependencies] anyhow = "1" clap = { version = "3", features = ["derive"] } console = "0.15.2" -log = { version = "0.4" } -risedev = { path = "../risedevtool" } risingwave_common = { path = "../common" } risingwave_compactor = { path = "../storage/compactor" } risingwave_compute = { path = "../compute" } @@ -24,10 +28,8 @@ risingwave_ctl = { path = "../ctl" } risingwave_frontend = { path = "../frontend" } risingwave_meta = { path = "../meta" } risingwave_rt = { path = "../utils/runtime" } -sync-point = { path = "../utils/sync-point" } task_stats_alloc = { path = "../utils/task_stats_alloc" } tempfile = "3.3.0" -tikv-jemallocator = { version = "0.5", features = ["profiling", "stats"] } tokio = { version = "0.2", package = "madsim-tokio", features = [ "rt", "rt-multi-thread", @@ -41,6 +43,9 @@ tracing = { version = "0.1" } workspace-config = { path = "../utils/workspace-config", optional = true } workspace-hack = { path = "../workspace-hack" } +[target.'cfg(target_os = "linux")'.dependencies] +tikv-jemallocator = { version = "0.5", features = ["profiling", "stats"] } + [[bin]] name = "risingwave" path = "src/bin/risingwave.rs" diff --git a/src/cmd_all/src/bin/risingwave.rs b/src/cmd_all/src/bin/risingwave.rs index 6fd7627abd89d..67b3e4a1a33b1 100644 --- a/src/cmd_all/src/bin/risingwave.rs +++ b/src/cmd_all/src/bin/risingwave.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ use anyhow::{bail, Result}; use clap::StructOpt; use risingwave_cmd_all::playground; use risingwave_common::enable_task_local_jemalloc_on_linux; +use tracing::Level; enable_task_local_jemalloc_on_linux!(); @@ -40,7 +41,9 @@ fn main() -> Result<()> { let opts = risingwave_compute::ComputeNodeOpts::parse_from(args); - risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new(false)); + risingwave_rt::init_risingwave_logger( + risingwave_rt::LoggerSettings::new().enable_tokio_console(false), + ); risingwave_rt::main_okk(risingwave_compute::start(opts)); @@ -58,7 +61,7 @@ fn main() -> Result<()> { let opts = risingwave_meta::MetaNodeOpts::parse_from(args); - risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new_default()); + risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new()); risingwave_rt::main_okk(risingwave_meta::start(opts)); @@ -76,7 +79,7 @@ fn main() -> Result<()> { let opts = risingwave_frontend::FrontendOpts::parse_from(args); - risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new_default()); + risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new()); risingwave_rt::main_okk(risingwave_frontend::start(opts)); @@ -85,7 +88,7 @@ fn main() -> Result<()> { ); } - // frontend node configuration + // compactor node configuration for fn_name in ["compactor", "compactor-node", "compactor_node"] { fns.insert( fn_name, @@ -94,7 +97,7 @@ fn main() -> Result<()> { let opts = risingwave_compactor::CompactorOpts::parse_from(args); - risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new_default()); + risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new()); risingwave_rt::main_okk(risingwave_compactor::start(opts)); @@ -111,7 +114,7 @@ fn main() -> Result<()> { eprintln!("launching risectl"); let opts = risingwave_ctl::CliOpts::parse_from(args); - risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new_default()); + risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new()); risingwave_rt::main_okk(risingwave_ctl::start(opts)) }), @@ -123,11 +126,10 @@ fn main() -> Result<()> { fns.insert( fn_name, Box::new(move |_: Vec| { - risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new_default()); - - // Enable tokio console for `./risedev p` by replacing the above statement to: - // risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new(false, - // true)); + let settings = risingwave_rt::LoggerSettings::new() + .enable_tokio_console(false) + .with_target("risingwave_storage", Level::INFO); + risingwave_rt::init_risingwave_logger(settings); risingwave_rt::main_okk(playground()) }), diff --git a/src/cmd_all/src/lib.rs b/src/cmd_all/src/lib.rs index 66a312f057ae9..b31ce9f7e57b6 100644 --- a/src/cmd_all/src/lib.rs +++ b/src/cmd_all/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/cmd_all/src/playground.rs b/src/cmd_all/src/playground.rs index 295611a1f3c4e..2ab99e7aa1fa5 100644 --- a/src/cmd_all/src/playground.rs +++ b/src/cmd_all/src/playground.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,12 +14,14 @@ use std::ffi::OsString; use std::io::Write; +use std::path::Path; use std::sync::LazyLock; use anyhow::Result; use clap::StructOpt; -use risingwave_common::config::load_config; use tempfile::TempPath; +use tokio::io::{AsyncBufReadExt, BufReader}; +use tokio::process::Command; use tokio::signal; pub enum RisingWaveService { @@ -27,6 +29,7 @@ pub enum RisingWaveService { Meta(Vec), Frontend(Vec), Compactor(Vec), + ConnectorNode(Vec), } impl RisingWaveService { @@ -36,7 +39,10 @@ impl RisingWaveService { RisingWaveService::Compute(args0) | RisingWaveService::Meta(args0) | RisingWaveService::Frontend(args0) - | RisingWaveService::Compactor(args0) => args0.extend(args.iter().map(|s| s.into())), + | RisingWaveService::Compactor(args0) + | RisingWaveService::ConnectorNode(args0) => { + args0.extend(args.iter().map(|s| s.into())) + } } } } @@ -60,33 +66,40 @@ max_heartbeat_interval_secs = 600", fn get_services(profile: &str) -> (Vec, bool) { let mut services = match profile { "playground" => vec![ - RisingWaveService::Meta(osstrs([])), - RisingWaveService::Compute(osstrs([])), + RisingWaveService::Meta(osstrs([ + "--dashboard-host", + "0.0.0.0:5691", + "--state-store", + "hummock+memory", + "--connector-rpc-endpoint", + "127.0.0.1:50051", + ])), + RisingWaveService::Compute(osstrs(["--connector-rpc-endpoint", "127.0.0.1:50051"])), RisingWaveService::Frontend(osstrs([])), + RisingWaveService::ConnectorNode(osstrs([])), ], "playground-3cn" => vec![ - RisingWaveService::Meta(osstrs([])), - RisingWaveService::Compute(osstrs([ - "--host", - "127.0.0.1:5687", + RisingWaveService::Meta(osstrs([ + "--dashboard-host", + "0.0.0.0:5691", "--state-store", "hummock+memory-shared", + ])), + RisingWaveService::Compute(osstrs([ + "--listen-addr", + "127.0.0.1:5687", "--parallelism", "4", ])), RisingWaveService::Compute(osstrs([ - "--host", + "--listen-addr", "127.0.0.1:5688", - "--state-store", - "hummock+memory-shared", "--parallelism", "4", ])), RisingWaveService::Compute(osstrs([ - "--host", + "--listen-addr", "127.0.0.1:5689", - "--state-store", - "hummock+memory-shared", "--parallelism", "4", ])), @@ -97,23 +110,30 @@ fn get_services(profile: &str) -> (Vec, bool) { RisingWaveService::Meta(osstrs([ "--listen-addr", "0.0.0.0:5690", - "--host", - "127.0.0.1", + "--advertise-addr", + "127.0.0.1:5690", "--dashboard-host", "0.0.0.0:5691", + "--state-store", + "hummock+memory", + "--connector-rpc-endpoint", + "127.0.0.1:50051", ])), RisingWaveService::Compute(osstrs([ - "--host", + "--listen-addr", "0.0.0.0:5688", - "--client-address", + "--advertise-addr", "127.0.0.1:5688", + "--connector-rpc-endpoint", + "127.0.0.1:50051", ])), RisingWaveService::Frontend(osstrs([ - "--host", + "--listen-addr", "0.0.0.0:4566", - "--client-address", + "--advertise-addr", "127.0.0.1:4566", ])), + RisingWaveService::ConnectorNode(osstrs([])), ] } _ => { @@ -154,10 +174,6 @@ pub async fn playground() -> Result<()> { opts.insert(0, "meta-node".into()); tracing::info!("starting meta-node thread with cli args: {:?}", opts); let opts = risingwave_meta::MetaNodeOpts::parse_from(opts); - - let _config = load_config(&opts.config_path); - - tracing::info!("opts: {:#?}", opts); let _meta_handle = tokio::spawn(async move { risingwave_meta::start(opts).await; tracing::warn!("meta is stopped, shutdown all nodes"); @@ -177,7 +193,6 @@ pub async fn playground() -> Result<()> { opts.insert(0, "compute-node".into()); tracing::info!("starting compute-node thread with cli args: {:?}", opts); let opts = risingwave_compute::ComputeNodeOpts::parse_from(opts); - tracing::info!("opts: {:#?}", opts); let _compute_handle = tokio::spawn(async move { risingwave_compute::start(opts).await }); } @@ -185,7 +200,6 @@ pub async fn playground() -> Result<()> { opts.insert(0, "frontend-node".into()); tracing::info!("starting frontend-node thread with cli args: {:?}", opts); let opts = risingwave_frontend::FrontendOpts::parse_from(opts); - tracing::info!("opts: {:#?}", opts); let _frontend_handle = tokio::spawn(async move { risingwave_frontend::start(opts).await }); } @@ -193,10 +207,41 @@ pub async fn playground() -> Result<()> { opts.insert(0, "compactor".into()); tracing::info!("starting compactor thread with cli args: {:?}", opts); let opts = risingwave_compactor::CompactorOpts::parse_from(opts); - tracing::info!("opts: {:#?}", opts); let _compactor_handle = tokio::spawn(async move { risingwave_compactor::start(opts).await }); } + // connector node only supports in docker-playground profile + RisingWaveService::ConnectorNode(_) => { + let prefix_bin = match profile.as_str() { + "docker-playground" | "online-docker-playground" => { + "/risingwave/bin".to_string() + } + "playground" => std::env::var("PREFIX_BIN").unwrap_or_default(), + _ => "".to_string(), + }; + let cmd_path = Path::new(&prefix_bin) + .join("connector-node") + .join("start-service.sh"); + if cmd_path.exists() { + tracing::info!("start connector-node with prefix_bin {}", prefix_bin); + let mut cmd = Command::new(cmd_path); + cmd.arg("-p").arg("50051"); + cmd.stdout(std::process::Stdio::piped()); + let mut child = cmd.spawn().expect("failed to start connector node"); + let stdout = child.stdout.take().expect("failed to open stdout"); + let _child_handle = tokio::spawn(async move { child.wait().await }); + let _stdout_handle = tokio::spawn(async move { + let mut reader = BufReader::new(stdout).lines(); + while let Some(line) = + reader.next_line().await.expect("failed to read line") + { + eprintln!("{}", line); + } + }); + } else { + eprintln!("connector node path not exist!"); + } + } } } diff --git a/src/common/Cargo.toml b/src/common/Cargo.toml index 33951be1d903f..fe34788209496 100644 --- a/src/common/Cargo.toml +++ b/src/common/Cargo.toml @@ -7,11 +7,16 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = "1" -arrow-array = { git = "https://github.com/apache/arrow-rs", rev = "6139d898" } -arrow-schema = { git = "https://github.com/apache/arrow-rs", rev = "6139d898" } -async-stream = "0.3" +arrow-array = "31" +arrow-schema = "31" async-trait = "0.1" auto_enums = "0.7" bitflags = "1.3.2" @@ -19,11 +24,10 @@ byteorder = "1" bytes = "1" chrono = { version = "0.4", default-features = false, features = ["clock", "std"] } chrono-tz = { version = "0.7", features = ["case-insensitive"] } +clap = { version = "3", features = ["derive"] } comfy-table = "6" crc32fast = "1" derivative = "2" -either = "1" -enum-as-inner = "0.5" fixedbitset = { version = "0.4", features = ["std"] } futures = { version = "0.3", default-features = false, features = ["alloc"] } futures-async-stream = "0.2" @@ -31,15 +35,12 @@ hex = "0.4.3" humantime = "2.1" itertools = "0.10" itoa = "1.0" -lru = { git = "https://github.com/risingwavelabs/lru-rs.git", branch = "evict_by_timestamp" } memcomparable = { version = "0.1", features = ["decimal"] } -more-asserts = "0.3" num-traits = "0.2" parking_lot = "0.12" parse-display = "0.6" paste = "1" -pin-project = "1" -postgres-types = { version = "0.2.4", features = ["derive","with-chrono-0_4"] } +postgres-types = { version = "0.2.4", features = ["derive","with-chrono-0_4","with-serde_json-1"] } prometheus = { version = "0.13" } prost = "0.11" rand = "0.8" @@ -48,28 +49,22 @@ risingwave_pb = { path = "../prost" } rust_decimal = { version = "1", features = ["db-tokio-postgres"] } ryu = "1.0" serde = { version = "1", features = ["derive"] } -serde_derive = "1" serde_json = "1" -smallvec = "1" -spin = "0.9" static_assertions = "1" strum = "0.24" strum_macros = "0.24" sysinfo = "0.26" thiserror = "1" tokio = { version = "0.2", package = "madsim-tokio", features = ["rt", "rt-multi-thread", "sync", "macros", "time", "signal"] } -tokio-stream = "0.1" toml = "0.5" tonic = { version = "0.2", package = "madsim-tonic" } tracing = "0.1" -twox-hash = "1" url = "2" [target.'cfg(not(madsim))'.dependencies] workspace-hack = { path = "../workspace-hack" } [target.'cfg(target_os = "linux")'.dependencies] -procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229b2c1fcc44118bef7eff127128" } procfs = { version = "0.12", default-features = false } libc = "0.2" @@ -80,13 +75,19 @@ mach = "0.3.2" [dev-dependencies] criterion = "0.4" +more-asserts = "0.3" rand = "0.8" tempfile = "3" +twox-hash = "1" [[bench]] name = "bench_encoding" harness = false +[[bench]] +name = "bench_row" +harness = false + [[bench]] name = "bitmap" harness = false diff --git a/src/common/benches/bench_encoding.rs b/src/common/benches/bench_encoding.rs index 68d2ef76ea0a1..24eecabd407f4 100644 --- a/src/common/benches/bench_encoding.rs +++ b/src/common/benches/bench_encoding.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/benches/bench_row.rs b/src/common/benches/bench_row.rs new file mode 100644 index 0000000000000..2b58df0ae02b6 --- /dev/null +++ b/src/common/benches/bench_row.rs @@ -0,0 +1,253 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::BTreeMap; + +use criterion::{criterion_group, criterion_main, Criterion}; +use risingwave_common::catalog::ColumnId; +use risingwave_common::error::Result; +use risingwave_common::row::{OwnedRow, Row, RowDeserializer}; +use risingwave_common::types::{DataType, Datum, ScalarImpl}; +use risingwave_common::util::ordered::OrderedRowSerde; +use risingwave_common::util::sort_util::OrderType; +use risingwave_common::util::value_encoding::column_aware_row_encoding; + +struct Case { + name: String, + schema: Vec, + column_ids: Vec, + rows: Vec, + needed_schema: Vec, + needed_ids: Vec, +} + +impl Case { + pub fn new( + name: &str, + schema: Vec, + column_ids: Vec, + rows: Vec, + needed_schema: Option>, + needed_ids: Option>, + ) -> Self { + Self { + name: name.to_string(), + schema: schema.clone(), + column_ids: column_ids.clone(), + rows, + needed_ids: needed_ids.unwrap_or(column_ids), + needed_schema: needed_schema.unwrap_or(schema), + } + } +} + +fn memcmp_encode(c: &Case) -> Vec> { + let serde = OrderedRowSerde::new( + c.schema.clone(), + vec![OrderType::Descending; c.schema.len()], + ); + let mut array = vec![]; + for row in &c.rows { + let mut row_bytes = vec![]; + serde.serialize(row, &mut row_bytes); + array.push(row_bytes); + } + array +} + +fn basic_encode(c: &Case) -> Vec> { + let mut array = vec![]; + for row in &c.rows { + let row_encoding = row.value_serialize(); + array.push(row_encoding); + } + array +} + +fn column_aware_encode(c: &Case) -> Vec> { + let seralizer = column_aware_row_encoding::Serializer::new(&c.column_ids); + let mut array = vec![]; + for row in &c.rows { + let row_bytes = seralizer.serialize_row_column_aware(row); + array.push(row_bytes); + } + array +} + +fn memcmp_decode(c: &Case, bytes: &Vec>) -> Result>> { + let serde = OrderedRowSerde::new( + c.schema.clone(), + vec![OrderType::Descending; c.schema.len()], + ); + let mut res = vec![]; + if c.column_ids == c.needed_ids { + for byte in bytes { + let row = serde.deserialize(byte)?.into_inner(); + res.push(row); + } + } else { + let column_id_to_index = c + .column_ids + .iter() + .enumerate() + .map(|(v, k)| (k, v)) + .collect::>(); + let needed_to_row = c + .needed_ids + .iter() + .map(|id| (id, *column_id_to_index.get(id).unwrap_or(&65536))) + .collect::>(); + + for byte in bytes.iter().enumerate() { + let row = serde.deserialize(byte.1)?.into_inner(); + let mut needed = vec![None; c.needed_ids.len()]; + for (i, c) in c.needed_ids.iter().enumerate() { + let ri = *needed_to_row.get(c).unwrap(); + if ri != 65536 { + if let Some(v) = &row[ri] { + needed[i] = Some(v.clone()); + } + } + } + res.push(needed); + } + } + + Ok(res) +} + +fn basic_decode(c: &Case, bytes: &Vec>) -> Result>> { + let deserializer = RowDeserializer::new(c.schema.clone()); + let mut res = vec![]; + if c.column_ids == c.needed_ids { + for byte in bytes { + let row = deserializer.deserialize(&byte[..])?.into_inner(); + res.push(row); + } + } else { + let column_id_to_index = c + .column_ids + .iter() + .enumerate() + .map(|(v, k)| (k, v)) + .collect::>(); + let needed_to_row = c + .needed_ids + .iter() + .map(|id| (id, *column_id_to_index.get(id).unwrap_or(&65536))) + .collect::>(); + for byte in bytes { + let row = deserializer.deserialize(&byte[..])?.into_inner(); + let mut needed = vec![None; c.needed_ids.len()]; + for (i, c) in c.needed_ids.iter().enumerate() { + let ri = *needed_to_row.get(c).unwrap(); + if ri != 65536 { + if let Some(v) = &row[ri] { + needed[i] = Some(v.clone()); + } + } + } + res.push(needed); + } + } + + Ok(res) +} + +fn column_aware_decode(c: &Case, bytes: &Vec>) -> Result>> { + let deserializer = + column_aware_row_encoding::Deserializer::new(&c.needed_ids, &c.needed_schema); + let mut res = vec![]; + for byte in bytes { + let row = deserializer.decode(byte)?; + res.push(row); + } + Ok(res) +} + +fn bench_row(c: &mut Criterion) { + let cases = vec![ + Case::new( + "Int16", + vec![DataType::INT16], + vec![ColumnId::new(0)], + vec![OwnedRow::new(vec![Some(ScalarImpl::Int16(5))]); 100000], + None, + None, + ), + Case::new( + "Int16 and String", + vec![DataType::Int16, DataType::Varchar], + vec![ColumnId::new(0), ColumnId::new(1)], + vec![ + OwnedRow::new(vec![ + Some(ScalarImpl::Int16(5)), + Some(ScalarImpl::Utf8("abc".into())) + ]); + 100000 + ], + None, + None, + ), + Case::new( + "Int16 and String (Only need String)", + vec![DataType::Int16, DataType::Varchar], + vec![ColumnId::new(0), ColumnId::new(1)], + vec![ + OwnedRow::new(vec![ + Some(ScalarImpl::Int16(5)), + Some(ScalarImpl::Utf8("abc".into())) + ]); + 100000 + ], + Some(vec![DataType::Varchar]), + Some(vec![ColumnId::new(1)]), + ), + ]; + for case in &cases { + c.bench_function( + format!("memcmp encoding on {}", case.name).as_str(), + |bencher| bencher.iter(|| memcmp_encode(case)), + ); + c.bench_function( + format!("basic encoding on {}", case.name).as_str(), + |bencher| bencher.iter(|| basic_encode(case)), + ); + c.bench_function( + format!("column aware encoding on {}", case.name).as_str(), + |bencher| bencher.iter(|| column_aware_encode(case)), + ); + } + + for case in &cases { + let encode_result = memcmp_encode(case); + c.bench_function( + format!("memcmp decoding on {}", case.name).as_str(), + |bencher| bencher.iter(|| memcmp_decode(case, &encode_result)), + ); + let encode_result = basic_encode(case); + c.bench_function( + format!("basic decoding on {}", case.name).as_str(), + |bencher| bencher.iter(|| basic_decode(case, &encode_result)), + ); + let encode_result = column_aware_encode(case); + c.bench_function( + format!("column aware decoding on {}", case.name).as_str(), + |bencher| bencher.iter(|| column_aware_decode(case, &encode_result)), + ); + } +} + +criterion_group!(benches, bench_row); +criterion_main!(benches); diff --git a/src/common/benches/bitmap.rs b/src/common/benches/bitmap.rs index c9ebcda2abfa2..776e546896b3f 100644 --- a/src/common/benches/bitmap.rs +++ b/src/common/benches/bitmap.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/common_service/Cargo.toml b/src/common/common_service/Cargo.toml index 988210c4d1d24..b2e43e386227f 100644 --- a/src/common/common_service/Cargo.toml +++ b/src/common/common_service/Cargo.toml @@ -8,6 +8,12 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] async-trait = "0.1" hyper = "0.14" diff --git a/src/common/common_service/src/lib.rs b/src/common/common_service/src/lib.rs index d514e2743b53a..1d2db407ceba3 100644 --- a/src/common/common_service/src/lib.rs +++ b/src/common/common_service/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/common_service/src/metrics_manager.rs b/src/common/common_service/src/metrics_manager.rs index aadee32f64662..e9885b3fc41b2 100644 --- a/src/common/common_service/src/metrics_manager.rs +++ b/src/common/common_service/src/metrics_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/common_service/src/observer_manager.rs b/src/common/common_service/src/observer_manager.rs index 9a641a141dedc..a63892ae0095a 100644 --- a/src/common/common_service/src/observer_manager.rs +++ b/src/common/common_service/src/observer_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/proc_macro/Cargo.toml b/src/common/proc_macro/Cargo.toml new file mode 100644 index 0000000000000..99aeb5f33403d --- /dev/null +++ b/src/common/proc_macro/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "risingwave_common_proc_macro" +version = { workspace = true } +edition = { workspace = true } +homepage = { workspace = true } +keywords = { workspace = true } +license = { workspace = true } +repository = { workspace = true } + +[lib] +proc-macro = true + +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + +[dependencies] +proc-macro-error = "1.0" +quote = "1" +proc-macro2 = { version = "1", default-features = false } +syn = "1" +bae = "0.1.7" + +[target.'cfg(not(madsim))'.dependencies] +workspace-hack = { path = "../../workspace-hack" } \ No newline at end of file diff --git a/src/common/proc_macro/src/config.rs b/src/common/proc_macro/src/config.rs new file mode 100644 index 0000000000000..55f5c98559be5 --- /dev/null +++ b/src/common/proc_macro/src/config.rs @@ -0,0 +1,64 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use bae::FromAttributes; +use proc_macro2::TokenStream; +use proc_macro_error::ResultExt; +use quote::quote; +use syn::DeriveInput; + +#[derive(FromAttributes)] +pub struct OverrideOpts { + pub path: Option, + pub optional_in_config: Option<()>, +} + +#[cfg_attr(coverage, no_coverage)] +pub fn produce_override_config(input: DeriveInput) -> TokenStream { + let struct_ident = input.ident; + let mut override_stmts = Vec::new(); + + if let syn::Data::Struct(syn::DataStruct { fields, .. }) = input.data { + for field in fields { + let override_opts = OverrideOpts::from_attributes(&field.attrs) + .expect_or_abort("Failed to parse `override_opts` attribute"); + let path = override_opts.path.expect("`path` must exist"); + let field_ident = field.ident; + + let override_stmt = if override_opts.optional_in_config.is_some() { + quote! { + if self.#field_ident.is_some() { + config.#path = self.#field_ident; + } + } + } else { + quote! { + if let Some(v) = self.#field_ident { + config.#path = v; + } + } + }; + + override_stmts.push(override_stmt); + } + } + + quote! { + impl risingwave_common::config::OverrideConfig for #struct_ident { + fn r#override(self, config: &mut risingwave_common::config::RwConfig) { + #(#override_stmts)* + } + } + } +} diff --git a/src/common/proc_macro/src/lib.rs b/src/common/proc_macro/src/lib.rs new file mode 100644 index 0000000000000..33808a2c496a4 --- /dev/null +++ b/src/common/proc_macro/src/lib.rs @@ -0,0 +1,57 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![cfg_attr(coverage, feature(no_coverage))] + +use proc_macro_error::proc_macro_error; +use syn::parse_macro_input; + +mod config; + +/// Sections in the configuration file can use `#[derive(OverrideConfig)]` to generate the +/// implementation of overwriting configs from the file. +/// +/// In the struct definition, use #[override_opts(path = ...)] on a field to indicate the field in +/// `RwConfig` to override. +/// +/// An example: +/// +/// ```ignore +/// #[derive(OverrideConfig)] +/// struct Opts { +/// #[override_opts(path = meta.listen_addr)] +/// listen_addr: Option, +/// } +/// ``` +/// +/// will generate +/// +/// impl OverrideConfig for Opts { +/// fn r#override(self, config: &mut RwConfig) { +/// if let Some(v) = self.required_str { +/// config.meta.listen_addr = v; +/// } +/// } +/// } +/// ``` +#[cfg_attr(coverage, no_coverage)] +#[proc_macro_derive(OverrideConfig, attributes(override_opts))] +#[proc_macro_error] +pub fn override_config(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let input = parse_macro_input!(input); + + let gen = config::produce_override_config(input); + + gen.into() +} diff --git a/src/common/src/array/arrow.rs b/src/common/src/array/arrow.rs index af04c3a75040f..4ef6b89a0b5fa 100644 --- a/src/common/src/array/arrow.rs +++ b/src/common/src/array/arrow.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,11 +15,11 @@ //! Converts between arrays and Apache Arrow arrays. use arrow_schema::Field; use chrono::{NaiveDateTime, NaiveTime}; -use itertools::Itertools; use super::column::Column; use super::*; use crate::types::struct_type::StructType; +use crate::util::iter_util::ZipEqFast; // Implement bi-directional `From` between `DataChunk` and `arrow_array::RecordBatch`. @@ -179,7 +179,7 @@ fn get_field_vector_from_struct_type(struct_type: &StructType) -> Vec { struct_type .fields .iter() - .zip_eq(struct_type.field_names.clone()) + .zip_eq_fast(struct_type.field_names.clone()) .map(|(f, f_name)| Field::new(f_name, f.into(), true)) .collect() } @@ -443,6 +443,7 @@ impl From<&ListArray> for arrow_array::ListArray { Time64NanosecondBuilder::with_capacity(a.len()), |b, v| b.append_option(v.map(|d| d.into_arrow())), ), + ArrayImpl::Jsonb(_) => todo!("list of jsonb"), ArrayImpl::Struct(_) => todo!("list of struct"), ArrayImpl::List(_) => todo!("list of list"), ArrayImpl::Bytea(a) => build( @@ -469,15 +470,15 @@ impl From<&StructArray> for arrow_array::StructArray { array .field_arrays() .iter() - .zip_eq(array.children_array_types()) + .zip_eq_fast(array.children_array_types()) .map(|(arr, datatype)| (Field::new("", datatype.into(), true), (*arr).into())) .collect() } else { array .field_arrays() .iter() - .zip_eq(array.children_array_types()) - .zip_eq(array.children_names()) + .zip_eq_fast(array.children_array_types()) + .zip_eq_fast(array.children_names()) .map(|((arr, datatype), field_name)| { (Field::new(field_name, datatype.into(), true), (*arr).into()) }) diff --git a/src/common/src/array/bool_array.rs b/src/common/src/array/bool_array.rs index 350f31498e2fd..6e392b34ce041 100644 --- a/src/common/src/array/bool_array.rs +++ b/src/common/src/array/bool_array.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -170,6 +170,7 @@ mod tests { use super::*; use crate::array::{read_bool_array, NULL_VAL_FOR_HASH}; + use crate::util::iter_util::ZipEqFast; fn helper_test_builder(data: Vec>) -> BoolArray { let mut builder = BoolArrayBuilder::new(data.len()); @@ -193,7 +194,7 @@ mod tests { }) .collect_vec(); let array = helper_test_builder(v.clone()); - let res = v.iter().zip_eq(array.iter()).all(|(a, b)| *a == b); + let res = v.iter().zip_eq_fast(array.iter()).all(|(a, b)| *a == b); assert!(res); } @@ -217,7 +218,10 @@ mod tests { let encoded = array.to_protobuf(); let decoded = read_bool_array(&encoded, num_bits).unwrap().into_bool(); - let equal = array.iter().zip_eq(decoded.iter()).all(|(a, b)| a == b); + let equal = array + .iter() + .zip_eq_fast(decoded.iter()) + .all(|(a, b)| a == b); assert!(equal); } } @@ -258,10 +262,12 @@ mod tests { let hasher_builder = RandomXxHashBuilder64::default(); let mut states = vec![hasher_builder.build_hasher(); ARR_LEN]; vecs.iter().for_each(|v| { - v.iter().zip_eq(&mut states).for_each(|(x, state)| match x { - Some(inner) => inner.hash(state), - None => NULL_VAL_FOR_HASH.hash(state), - }) + v.iter() + .zip_eq_fast(&mut states) + .for_each(|(x, state)| match x { + Some(inner) => inner.hash(state), + None => NULL_VAL_FOR_HASH.hash(state), + }) }); let hashes = hash_finish(&mut states[..]); diff --git a/src/common/src/array/bytes_array.rs b/src/common/src/array/bytes_array.rs index d6f3d0e7f4ed5..ea27ee37131de 100644 --- a/src/common/src/array/bytes_array.rs +++ b/src/common/src/array/bytes_array.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,7 +15,6 @@ use std::iter; use std::mem::size_of; -use itertools::Itertools; use risingwave_pb::common::buffer::CompressionType; use risingwave_pb::common::Buffer; use risingwave_pb::data::{Array as ProstArray, ArrayType}; @@ -23,6 +22,7 @@ use risingwave_pb::data::{Array as ProstArray, ArrayType}; use super::{Array, ArrayBuilder, ArrayMeta}; use crate::array::ArrayBuilderImpl; use crate::buffer::{Bitmap, BitmapBuilder}; +use crate::util::iter_util::ZipEqDebug; /// `BytesArray` is a collection of Rust `[u8]`s. #[derive(Debug, Clone, PartialEq, Eq)] @@ -55,7 +55,7 @@ impl Array for BytesArray { // of null_bitmap is n, chain iterator of null_bitmapƒ // with one single true here to push the end of offset // to offset_buffer - .zip_eq(self.null_bitmap().iter().chain(iter::once(true))) + .zip_eq_debug(self.null_bitmap().iter().chain(iter::once(true))) .fold( Vec::::with_capacity(self.data.len() * size_of::()), |mut buffer, (offset, not_null)| { diff --git a/src/common/src/array/chrono_array.rs b/src/common/src/array/chrono_array.rs index c4d477169f63d..5d8461508e350 100644 --- a/src/common/src/array/chrono_array.rs +++ b/src/common/src/array/chrono_array.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ mod tests { use super::*; use crate::array::{Array, ArrayBuilder}; + use crate::util::iter_util::ZipEqFast; #[test] fn test_naivedate_builder() { @@ -41,7 +42,7 @@ mod tests { builder.append(*i); } let a = builder.finish(); - let res = v.iter().zip_eq(a.iter()).all(|(a, b)| *a == b); + let res = v.iter().zip_eq_fast(a.iter()).all(|(a, b)| *a == b); assert!(res) } diff --git a/src/common/src/array/column.rs b/src/common/src/array/column.rs index dd3c8bff66120..edc0952a9d9b1 100644 --- a/src/common/src/array/column.rs +++ b/src/common/src/array/column.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/array/column_proto_readers.rs b/src/common/src/array/column_proto_readers.rs index 5e88a2aa197a1..f1e85f2e40d22 100644 --- a/src/common/src/array/column_proto_readers.rs +++ b/src/common/src/array/column_proto_readers.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -55,6 +55,8 @@ pub fn read_numeric_array> } } let arr = builder.finish(); + ensure_eq!(arr.len(), cardinality); + Ok(arr.into()) } @@ -68,7 +70,7 @@ pub fn read_bool_array(array: &ProstArray, cardinality: usize) -> ArrayResult>( offset ) })?; - let v = R::read(buf.as_slice())?; - builder.append(Some(v)); + R::read(buf.as_slice(), &mut builder)?; } else { builder.append(None); } } let arr = builder.finish(); + ensure_eq!(arr.len(), cardinality); + Ok(arr.into()) } diff --git a/src/common/src/array/data_chunk.rs b/src/common/src/array/data_chunk.rs index 2559e3e43f13c..7270fc8a0768f 100644 --- a/src/common/src/array/data_chunk.rs +++ b/src/common/src/array/data_chunk.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -31,6 +31,7 @@ use crate::types::struct_type::StructType; use crate::types::to_text::ToText; use crate::types::{DataType, Datum, NaiveDateTimeWrapper, ToOwnedDatum}; use crate::util::hash_util::finalize_hashers; +use crate::util::iter_util::{ZipEqDebug, ZipEqFast}; use crate::util::value_encoding::serialize_datum_into; /// `DataChunk` is a collection of arrays with visibility mask. @@ -70,7 +71,7 @@ impl DataChunk { .collect::>(); for row in rows { - for (datum, builder) in row.iter().zip_eq(array_builders.iter_mut()) { + for (datum, builder) in row.iter().zip_eq_debug(array_builders.iter_mut()) { builder.append_datum(datum); } } @@ -241,7 +242,7 @@ impl DataChunk { let end_row_idx = start_row_idx + actual_acquire - 1; array_builders .iter_mut() - .zip_eq(chunks[chunk_idx].columns()) + .zip_eq_fast(chunks[chunk_idx].columns()) .for_each(|(builder, column)| { let mut array_builder = column .array_ref() @@ -344,11 +345,7 @@ impl DataChunk { /// `[c, b, a]`. If `column_mapping` is [2, 0], then the output will be `[c, a]` /// If the input mapping is identity mapping, no reorder will be performed. pub fn reorder_columns(self, column_mapping: &[usize]) -> Self { - if column_mapping - .iter() - .copied() - .eq((0..self.columns().len()).into_iter()) - { + if column_mapping.iter().copied().eq(0..self.columns().len()) { return self; } let mut new_columns = Vec::with_capacity(column_mapping.len()); @@ -369,7 +366,7 @@ impl DataChunk { .map(|col| col.array_ref().create_builder(indexes.len())) .collect(); for &i in indexes { - for (builder, col) in array_builders.iter_mut().zip_eq(&self.columns) { + for (builder, col) in array_builders.iter_mut().zip_eq_fast(&self.columns) { builder.append_datum(col.array_ref().value_at(i)); } } @@ -552,7 +549,7 @@ impl DataChunkTestExt for DataChunk { assert!(s.starts_with('{') && s.ends_with('}')); let fields = s[1..s.len() - 1] .split(',') - .zip_eq(&builder.children_array) + .zip_eq_debug(&builder.children_array) .map(|(s, builder)| parse_datum(s, builder)) .collect_vec(); ScalarImpl::Struct(StructValue::new(fields)) diff --git a/src/common/src/array/data_chunk_iter.rs b/src/common/src/array/data_chunk_iter.rs index 66a320489b057..07c2483b6b139 100644 --- a/src/common/src/array/data_chunk_iter.rs +++ b/src/common/src/array/data_chunk_iter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -64,6 +64,22 @@ impl<'a> Iterator for DataChunkRefIter<'a> { } } } + + fn size_hint(&self) -> (usize, Option) { + if let Some(idx) = self.idx { + ( + // if all following rows are invisible + 0, + // if all following rows are visible + Some(std::cmp::min( + self.chunk.capacity() - idx, + self.chunk.cardinality(), + )), + ) + } else { + (0, Some(0)) + } + } } pub struct DataChunkRefIterWithHoles<'a> { @@ -92,8 +108,16 @@ impl<'a> Iterator for DataChunkRefIterWithHoles<'a> { ret } } + + fn size_hint(&self) -> (usize, Option) { + let size = self.chunk.capacity() - self.idx; + (size, Some(size)) + } } +impl ExactSizeIterator for DataChunkRefIterWithHoles<'_> {} +unsafe impl TrustedLen for DataChunkRefIterWithHoles<'_> {} + #[derive(Clone, Copy)] pub struct RowRef<'a> { chunk: &'a DataChunk, @@ -199,4 +223,5 @@ impl<'a> Iterator for RowRefIter<'a> { } } +impl ExactSizeIterator for RowRefIter<'_> {} unsafe impl TrustedLen for RowRefIter<'_> {} diff --git a/src/common/src/array/decimal_array.rs b/src/common/src/array/decimal_array.rs index 150253856f94b..cf64149d9d929 100644 --- a/src/common/src/array/decimal_array.rs +++ b/src/common/src/array/decimal_array.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ mod tests { use super::*; use crate::array::{Array, ArrayBuilder, ArrayImpl, NULL_VAL_FOR_HASH}; + use crate::util::iter_util::ZipEqFast; #[test] fn test_decimal_builder() { @@ -37,7 +38,7 @@ mod tests { builder.append(*i); } let a = builder.finish(); - let res = v.iter().zip_eq(a.iter()).all(|(a, b)| *a == b); + let res = v.iter().zip_eq_fast(a.iter()).all(|(a, b)| *a == b); assert!(res); } @@ -59,7 +60,7 @@ mod tests { assert_eq!(prost_array.values.len(), 1); - let decoded_array = ArrayImpl::from_protobuf(&prost_array, 4) + let decoded_array = ArrayImpl::from_protobuf(&prost_array, 8) .unwrap() .into_decimal(); @@ -121,10 +122,12 @@ mod tests { let hasher_builder = RandomXxHashBuilder64::default(); let mut states = vec![hasher_builder.build_hasher(); ARR_LEN]; vecs.iter().for_each(|v| { - v.iter().zip_eq(&mut states).for_each(|(x, state)| match x { - Some(inner) => inner.hash(state), - None => NULL_VAL_FOR_HASH.hash(state), - }) + v.iter() + .zip_eq_fast(&mut states) + .for_each(|(x, state)| match x { + Some(inner) => inner.hash(state), + None => NULL_VAL_FOR_HASH.hash(state), + }) }); let hashes = hash_finish(&mut states[..]); diff --git a/src/common/src/array/error.rs b/src/common/src/array/error.rs index fb5e47ea8d0e1..ed6decb050685 100644 --- a/src/common/src/array/error.rs +++ b/src/common/src/array/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/array/interval_array.rs b/src/common/src/array/interval_array.rs index d89ef1f6c10a4..aca1cefed79a4 100644 --- a/src/common/src/array/interval_array.rs +++ b/src/common/src/array/interval_array.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/array/iterator.rs b/src/common/src/array/iterator.rs index 26f6ddf9b4399..5072903489a4b 100644 --- a/src/common/src/array/iterator.rs +++ b/src/common/src/array/iterator.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,8 +27,6 @@ impl<'a, A: Array> ArrayIterator<'a, A> { } } -unsafe impl<'a, A: Array> TrustedLen for ArrayIterator<'a, A> {} - impl<'a, A: Array> Iterator for ArrayIterator<'a, A> { type Item = Option>; @@ -48,6 +46,9 @@ impl<'a, A: Array> Iterator for ArrayIterator<'a, A> { } } +impl<'a, A: Array> ExactSizeIterator for ArrayIterator<'a, A> {} +unsafe impl<'a, A: Array> TrustedLen for ArrayIterator<'a, A> {} + #[cfg(test)] mod tests { use paste::paste; diff --git a/src/common/src/array/jsonb_array.rs b/src/common/src/array/jsonb_array.rs new file mode 100644 index 0000000000000..7690a38f9baac --- /dev/null +++ b/src/common/src/array/jsonb_array.rs @@ -0,0 +1,317 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use postgres_types::{FromSql as _, ToSql as _, Type}; +use serde_json::Value; + +use super::{Array, ArrayBuilder}; +use crate::buffer::{Bitmap, BitmapBuilder}; +use crate::types::{Scalar, ScalarImpl, ScalarRef}; +use crate::util::iter_util::ZipEqFast; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct JsonbVal(Box); // The `Box` is just to keep `size_of::` smaller. + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct JsonbRef<'a>(&'a Value); + +impl Scalar for JsonbVal { + type ScalarRefType<'a> = JsonbRef<'a>; + + fn as_scalar_ref(&self) -> Self::ScalarRefType<'_> { + JsonbRef(self.0.as_ref()) + } + + fn to_scalar_value(self) -> ScalarImpl { + ScalarImpl::Jsonb(self) + } +} + +impl<'a> ScalarRef<'a> for JsonbRef<'a> { + type ScalarType = JsonbVal; + + fn to_owned_scalar(&self) -> Self::ScalarType { + JsonbVal(self.0.clone().into()) + } + + fn hash_scalar(&self, state: &mut H) { + // We do not intend to support hashing `jsonb` type. + // Before #7981 is done, we do not panic but just hash its string representation. + // Note that `serde_json` without feature `preserve_order` uses `BTreeMap` for json object. + // So its string form always have keys sorted. + use std::hash::Hash as _; + self.0.to_string().hash(state) + } +} + +impl PartialOrd for JsonbVal { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for JsonbVal { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.as_scalar_ref().cmp(&other.as_scalar_ref()) + } +} + +impl PartialOrd for JsonbRef<'_> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for JsonbRef<'_> { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + // We do not intend to support ordering `jsonb` type. + // Before #7981 is done, we do not panic but just compare its string representation. + // Note that `serde_json` without feature `preserve_order` uses `BTreeMap` for json object. + // So its string form always have keys sorted. + // + // In PostgreSQL, Object > Array > Boolean > Number > String > Null. + // But here we have Object > true > Null > false > Array > Number > String. + // Because in ascii: `{` > `t` > `n` > `f` > `[` > `9` `-` > `"`. + // + // This is just to keep consistent with the memcomparable encoding, which uses string form. + // If we implemented the same typed comparison as PostgreSQL, we would need a corresponding + // memcomparable encoding for it. + self.0.to_string().cmp(&other.0.to_string()) + } +} + +impl crate::types::to_text::ToText for JsonbRef<'_> { + fn write(&self, f: &mut W) -> std::fmt::Result { + write!(f, "{}", self.0) + } + + fn write_with_type( + &self, + _ty: &crate::types::DataType, + f: &mut W, + ) -> std::fmt::Result { + self.write(f) + } +} + +impl crate::types::to_binary::ToBinary for JsonbRef<'_> { + fn to_binary_with_type( + &self, + _ty: &crate::types::DataType, + ) -> crate::error::Result> { + let mut output = bytes::BytesMut::new(); + self.0.to_sql(&Type::JSONB, &mut output).unwrap(); + Ok(Some(output.freeze())) + } +} + +impl std::str::FromStr for JsonbVal { + type Err = ::Err; + + fn from_str(s: &str) -> Result { + let v: Value = s.parse()?; + Ok(Self(v.into())) + } +} + +impl JsonbVal { + /// Avoid this function (or `impl From`) which is leak of abstraction. + /// In most cases you would be using `JsonbRef`. + pub fn from_serde(v: Value) -> Self { + Self(v.into()) + } + + /// Constructs a value without specific meaning. Usually used as a lightweight placeholder. + pub fn dummy() -> Self { + Self(Value::Null.into()) + } + + pub fn memcmp_deserialize( + deserializer: &mut memcomparable::Deserializer, + ) -> memcomparable::Result { + let v: Value = ::deserialize(deserializer)? + .parse() + .map_err(|_| memcomparable::Error::Message("invalid json".into()))?; + Ok(Self(v.into())) + } + + pub fn value_deserialize(buf: &[u8]) -> Option { + let v = Value::from_sql(&Type::JSONB, buf).ok()?; + Some(Self(v.into())) + } +} + +impl JsonbRef<'_> { + pub fn memcmp_serialize( + &self, + serializer: &mut memcomparable::Serializer, + ) -> memcomparable::Result<()> { + // As mentioned with `cmp`, this implementation is not intended to be used. + // But before #7981 is done, we do not want to `panic` here. + let s = self.0.to_string(); + serde::Serialize::serialize(&s, serializer) + } + + pub fn value_serialize(&self) -> Vec { + // Reuse the pgwire "BINARY" encoding for jsonb type. + // It is not truly binary, but one byte of version `1u8` followed by string form. + // This version number helps us maintain compatibility when we switch to more efficient + // encoding later. + let mut output = bytes::BytesMut::new(); + self.0.to_sql(&Type::JSONB, &mut output).unwrap(); + output.freeze().into() + } +} + +#[derive(Debug)] +pub struct JsonbArrayBuilder { + bitmap: BitmapBuilder, + data: Vec, +} + +#[derive(Debug, Clone)] +pub struct JsonbArray { + bitmap: Bitmap, + data: Vec, +} + +impl ArrayBuilder for JsonbArrayBuilder { + type ArrayType = JsonbArray; + + fn with_meta(capacity: usize, _meta: super::ArrayMeta) -> Self { + Self { + bitmap: BitmapBuilder::with_capacity(capacity), + data: Vec::with_capacity(capacity), + } + } + + fn append_n(&mut self, n: usize, value: Option<::RefItem<'_>>) { + match value { + Some(x) => { + self.bitmap.append_n(n, true); + self.data + .extend(std::iter::repeat(x).take(n).map(|x| x.0.clone())); + } + None => { + self.bitmap.append_n(n, false); + self.data + .extend(std::iter::repeat(*JsonbVal::dummy().0).take(n)); + } + } + } + + fn append_array(&mut self, other: &Self::ArrayType) { + for bit in other.bitmap.iter() { + self.bitmap.append(bit); + } + self.data.extend_from_slice(&other.data); + } + + fn pop(&mut self) -> Option<()> { + self.data.pop().map(|_| self.bitmap.pop().unwrap()) + } + + fn finish(self) -> Self::ArrayType { + Self::ArrayType { + bitmap: self.bitmap.finish(), + data: self.data, + } + } +} + +impl JsonbArrayBuilder { + pub fn append_move( + &mut self, + value: <::ArrayType as Array>::OwnedItem, + ) { + self.bitmap.append(true); + self.data.push(*value.0); + } +} + +impl Array for JsonbArray { + type Builder = JsonbArrayBuilder; + type OwnedItem = JsonbVal; + type RefItem<'a> = JsonbRef<'a>; + + unsafe fn raw_value_at_unchecked(&self, idx: usize) -> Self::RefItem<'_> { + JsonbRef(self.data.get_unchecked(idx)) + } + + fn len(&self) -> usize { + self.data.len() + } + + fn to_protobuf(&self) -> super::ProstArray { + // The memory layout contains `serde_json::Value` trees, but in protobuf we transmit this as + // variable length bytes in value encoding. That is, one buffer of length n+1 containing + // start and end offsets into the 2nd buffer containing all value bytes concatenated. + + use risingwave_pb::common::buffer::CompressionType; + use risingwave_pb::common::Buffer; + + let mut offset_buffer = + Vec::::with_capacity((1 + self.data.len()) * std::mem::size_of::()); + let mut data_buffer = Vec::::with_capacity(self.data.len()); + + let mut offset = 0; + for (v, not_null) in self.data.iter().zip_eq_fast(self.null_bitmap().iter()) { + if !not_null { + continue; + } + let d = JsonbRef(v).value_serialize(); + offset_buffer.extend_from_slice(&(offset as u64).to_be_bytes()); + data_buffer.extend_from_slice(&d); + offset += d.len(); + } + offset_buffer.extend_from_slice(&(offset as u64).to_be_bytes()); + + let values = vec![ + Buffer { + compression: CompressionType::None as i32, + body: offset_buffer, + }, + Buffer { + compression: CompressionType::None as i32, + body: data_buffer, + }, + ]; + + let null_bitmap = self.null_bitmap().to_protobuf(); + super::ProstArray { + null_bitmap: Some(null_bitmap), + values, + array_type: super::ProstArrayType::Jsonb as i32, + struct_array_data: None, + list_array_data: None, + } + } + + fn null_bitmap(&self) -> &Bitmap { + &self.bitmap + } + + fn into_null_bitmap(self) -> Bitmap { + self.bitmap + } + + fn set_bitmap(&mut self, bitmap: Bitmap) { + self.bitmap = bitmap; + } + + fn create_builder(&self, capacity: usize) -> super::ArrayBuilderImpl { + let array_builder = Self::Builder::new(capacity); + super::ArrayBuilderImpl::Jsonb(array_builder) + } +} diff --git a/src/common/src/array/list_array.rs b/src/common/src/array/list_array.rs index 2c1473e445f66..1c060b92c8c5e 100644 --- a/src/common/src/array/list_array.rs +++ b/src/common/src/array/list_array.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -32,8 +32,6 @@ use crate::types::{ DatumRef, Scalar, ScalarRefImpl, ToDatumRef, }; -/// This is a naive implementation of list array. -/// We will eventually move to a more efficient flatten implementation. #[derive(Debug)] pub struct ListArrayBuilder { bitmap: BitmapBuilder, @@ -146,8 +144,15 @@ impl ListArrayBuilder { } } -/// This is a naive implementation of list array. -/// We will eventually move to a more efficient flatten implementation. +/// Each item of this `ListArray` is a `List`, or called `T[]` (T array). +/// +/// * As other arrays, there is a null bitmap, with `1` meaning nonnull and `0` meaning null. +/// * As [`BytesArray`], there is an offsets `Vec` and a value `Array`. The value `Array` has all +/// items concatenated, and the offsets `Vec` stores start and end indices into it for slicing. +/// Effectively, the inner array is the flattened form, and `offsets.len() == n + 1`. +/// +/// For example, `values (array[1]), (array[]::int[]), (null), (array[2, 3]);` stores an inner +/// `I32Array` with `[1, 2, 3]`, along with offsets `[0, 1, 1, 1, 3]` and null bitmap `TTFT`. #[derive(Debug, Clone, PartialEq)] pub struct ListArray { bitmap: Bitmap, @@ -221,7 +226,11 @@ impl ListArray { ); let bitmap: Bitmap = array.get_null_bitmap()?.into(); let array_data = array.get_list_array_data()?.to_owned(); - let value = ArrayImpl::from_protobuf(array_data.value.as_ref().unwrap(), bitmap.len())?; + let flatten_len = match array_data.offsets.last() { + Some(&n) => n as usize, + None => bail!("Must have at least one element in offsets"), + }; + let value = ArrayImpl::from_protobuf(array_data.value.as_ref().unwrap(), flatten_len)?; let arr = ListArray { bitmap, offsets: array_data.offsets, @@ -297,12 +306,17 @@ impl Ord for ListValue { // Used to display ListValue in explain for better readibilty. pub fn display_for_explain(list: &ListValue) -> String { - // Example of ListValue display: ARRAY[1, 2] + // Example of ListValue display: ARRAY[1, 2, null] format!( "ARRAY[{}]", list.values .iter() - .map(|v| v.as_ref().unwrap().as_scalar_ref_impl().to_text()) + .map(|v| { + match v.as_ref() { + None => "null".into(), + Some(scalar) => scalar.as_scalar_ref_impl().to_text(), + } + }) .collect::>() .join(", ") ) diff --git a/src/common/src/array/macros.rs b/src/common/src/array/macros.rs index 9fe4bc37c51f3..d0fed5ee5ffd8 100644 --- a/src/common/src/array/macros.rs +++ b/src/common/src/array/macros.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/array/mod.rs b/src/common/src/array/mod.rs index 58eb8012fe82a..48dd6d7543497 100644 --- a/src/common/src/array/mod.rs +++ b/src/common/src/array/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ mod decimal_array; pub mod error; pub mod interval_array; mod iterator; +mod jsonb_array; pub mod list_array; mod macros; mod primitive_array; @@ -52,6 +53,7 @@ pub use data_chunk_iter::RowRef; pub use decimal_array::{DecimalArray, DecimalArrayBuilder}; pub use interval_array::{IntervalArray, IntervalArrayBuilder}; pub use iterator::ArrayIterator; +pub use jsonb_array::{JsonbArray, JsonbArrayBuilder, JsonbRef, JsonbVal}; pub use list_array::{ListArray, ListArrayBuilder, ListRef, ListValue}; use paste::paste; pub use primitive_array::{PrimitiveArray, PrimitiveArrayBuilder, PrimitiveArrayItemType}; @@ -64,6 +66,7 @@ pub use vis::{Vis, VisRef}; pub use self::error::ArrayError; use crate::buffer::Bitmap; use crate::types::*; +use crate::util::iter_util::ZipEqFast; pub type ArrayResult = std::result::Result; pub type I64Array = PrimitiveArray; @@ -308,9 +311,8 @@ trait CompactableArray: Array { impl CompactableArray for A { fn compact(&self, visibility: &Bitmap, cardinality: usize) -> Self { - use itertools::Itertools; let mut builder = A::Builder::with_meta(cardinality, self.array_meta()); - for (elem, visible) in self.iter().zip_eq(visibility.iter()) { + for (elem, visible) in self.iter().zip_eq_fast(visibility.iter()) { if visible { builder.append(elem); } @@ -343,6 +345,7 @@ macro_rules! for_all_variants { { NaiveDate, naivedate, NaiveDateArray, NaiveDateArrayBuilder }, { NaiveDateTime, naivedatetime, NaiveDateTimeArray, NaiveDateTimeArrayBuilder }, { NaiveTime, naivetime, NaiveTimeArray, NaiveTimeArrayBuilder }, + { Jsonb, jsonb, JsonbArray, JsonbArrayBuilder }, { Struct, struct, StructArray, StructArrayBuilder }, { List, list, ListArray, ListArrayBuilder }, { Bytea, bytea, BytesArray, BytesArrayBuilder} @@ -381,6 +384,12 @@ impl From for ArrayImpl { } } +impl From for ArrayImpl { + fn from(arr: JsonbArray) -> Self { + Self::Jsonb(arr) + } +} + impl From for ArrayImpl { fn from(arr: StructArray) -> Self { Self::Struct(arr) @@ -646,7 +655,7 @@ macro_rules! impl_array { for_all_variants! { impl_array } impl ArrayImpl { - pub fn iter(&self) -> impl DoubleEndedIterator> { + pub fn iter(&self) -> impl DoubleEndedIterator> + ExactSizeIterator { (0..self.len()).map(|i| self.value_at(i)) } @@ -674,6 +683,9 @@ impl ArrayImpl { ProstArrayType::Time => read_naive_time_array(array, cardinality)?, ProstArrayType::Timestamp => read_naive_date_time_array(array, cardinality)?, ProstArrayType::Interval => read_interval_unit_array(array, cardinality)?, + ProstArrayType::Jsonb => { + read_string_array::(array, cardinality)? + } ProstArrayType::Struct => StructArray::from_protobuf(array)?, ProstArrayType::List => ListArray::from_protobuf(array)?, ProstArrayType::Unspecified => unreachable!(), @@ -702,7 +714,6 @@ impl PartialEq for ArrayImpl { #[cfg(test)] mod tests { - use itertools::Itertools; use super::*; @@ -743,7 +754,7 @@ mod tests { T3: PrimitiveArrayItemType + CheckedAdd, { let mut builder = PrimitiveArrayBuilder::::new(a.len()); - for (a, b) in a.iter().zip_eq(b.iter()) { + for (a, b) in a.iter().zip_eq_fast(b.iter()) { let item = match (a, b) { (Some(a), Some(b)) => Some(a.as_() + b.as_()), _ => None, @@ -780,9 +791,8 @@ mod tests { mod test_util { use std::hash::{BuildHasher, Hasher}; - use itertools::Itertools; - use super::Array; + use crate::util::iter_util::ZipEqFast; pub fn hash_finish(hashers: &mut [H]) -> Vec { return hashers @@ -808,8 +818,8 @@ mod test_util { itertools::cons_tuples( expects .iter() - .zip_eq(hash_finish(&mut states_scalar[..])) - .zip_eq(hash_finish(&mut states_vec[..])), + .zip_eq_fast(hash_finish(&mut states_scalar[..])) + .zip_eq_fast(hash_finish(&mut states_vec[..])), ) .all(|(a, b, c)| *a == b && b == c); } diff --git a/src/common/src/array/primitive_array.rs b/src/common/src/array/primitive_array.rs index 0a4d6ceec40c8..8d521bea3fcb0 100644 --- a/src/common/src/array/primitive_array.rs +++ b/src/common/src/array/primitive_array.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/array/stream_chunk.rs b/src/common/src/array/stream_chunk.rs index 5b1eb27712905..46fbb511e0603 100644 --- a/src/common/src/array/stream_chunk.rs +++ b/src/common/src/array/stream_chunk.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ use crate::buffer::Bitmap; use crate::row::{OwnedRow, Row}; use crate::types::to_text::ToText; use crate::types::DataType; +use crate::util::iter_util::ZipEqFast; /// `Op` represents three operations in `StreamChunk`. /// @@ -110,7 +111,7 @@ impl StreamChunk { for (op, row) in rows { ops.push(*op); - for (datum, builder) in row.iter().zip_eq(array_builders.iter_mut()) { + for (datum, builder) in row.iter().zip_eq_fast(array_builders.iter_mut()) { builder.append_datum(datum); } } @@ -165,7 +166,7 @@ impl StreamChunk { }) .collect(); let mut new_ops = Vec::with_capacity(cardinality); - for (op, visible) in ops.into_iter().zip_eq(visibility.iter()) { + for (op, visible) in ops.into_iter().zip_eq_fast(visibility.iter()) { if visible { new_ops.push(op); } @@ -254,7 +255,7 @@ impl StreamChunk { if column_mapping .iter() .copied() - .eq((0..self.data.columns().len()).into_iter()) + .eq(0..self.data.columns().len()) { // no reorder is needed self diff --git a/src/common/src/array/stream_chunk_iter.rs b/src/common/src/array/stream_chunk_iter.rs index 5db5d2cc7af4b..9b3b6f7584a12 100644 --- a/src/common/src/array/stream_chunk_iter.rs +++ b/src/common/src/array/stream_chunk_iter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,12 +16,13 @@ use std::iter::once; use auto_enums::auto_enum; +use super::data_chunk_iter::DataChunkRefIter; use super::RowRef; use crate::array::{Op, StreamChunk}; impl StreamChunk { /// Return an iterator on stream records of this stream chunk. - pub fn records(&self) -> impl Iterator> { + pub fn records(&self) -> StreamChunkRefIter<'_> { StreamChunkRefIter { chunk: self, inner: self.data.rows(), @@ -42,12 +43,10 @@ impl StreamChunk { } } -type RowRefIter<'a> = impl Iterator>; - -struct StreamChunkRefIter<'a> { +pub struct StreamChunkRefIter<'a> { chunk: &'a StreamChunk, - inner: RowRefIter<'a>, + inner: DataChunkRefIter<'a>, } impl<'a> Iterator for StreamChunkRefIter<'a> { @@ -75,6 +74,11 @@ impl<'a> Iterator for StreamChunkRefIter<'a> { Op::UpdateInsert => panic!("expect a U- before U+"), } } + + fn size_hint(&self) -> (usize, Option) { + let (lower, upper) = self.inner.size_hint(); + (lower / 2, upper) + } } #[derive(Debug, Clone)] diff --git a/src/common/src/array/struct_array.rs b/src/common/src/array/struct_array.rs index 576f5caec0944..e1d52cb1a7234 100644 --- a/src/common/src/array/struct_array.rs +++ b/src/common/src/array/struct_array.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -30,6 +30,7 @@ use crate::types::{ hash_datum, memcmp_deserialize_datum_from, memcmp_serialize_datum_into, DataType, Datum, DatumRef, Scalar, ScalarRefImpl, ToDatumRef, }; +use crate::util::iter_util::ZipEqFast; #[derive(Debug)] pub struct StructArrayBuilder { @@ -93,7 +94,7 @@ impl ArrayBuilder for StructArrayBuilder { self.bitmap.append_n(n, true); let fields = v.fields_ref(); assert_eq!(fields.len(), self.children_array.len()); - for (child, f) in self.children_array.iter_mut().zip_eq(fields) { + for (child, f) in self.children_array.iter_mut().zip_eq_fast(fields) { child.append_datum_n(n, f); } } @@ -153,7 +154,7 @@ impl StructArrayBuilder { self.bitmap.append(true); } self.len += len; - for (a, r) in self.children_array.iter_mut().zip_eq(refs.iter()) { + for (a, r) in self.children_array.iter_mut().zip_eq_fast(refs.iter()) { a.append_array(r); } } diff --git a/src/common/src/array/utf8_array.rs b/src/common/src/array/utf8_array.rs index 0b90d339d3d8c..b2d080e6cacf4 100644 --- a/src/common/src/array/utf8_array.rs +++ b/src/common/src/array/utf8_array.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -201,6 +201,7 @@ mod tests { use super::*; use crate::array::NULL_VAL_FOR_HASH; + use crate::util::iter_util::ZipEqFast; #[test] fn test_utf8_builder() { @@ -350,10 +351,12 @@ mod tests { let hasher_builder = RandomXxHashBuilder64::default(); let mut states = vec![hasher_builder.build_hasher(); ARR_LEN]; vecs.iter().for_each(|v| { - v.iter().zip_eq(&mut states).for_each(|(x, state)| match x { - Some(inner) => inner.hash(state), - None => NULL_VAL_FOR_HASH.hash(state), - }) + v.iter() + .zip_eq_fast(&mut states) + .for_each(|(x, state)| match x { + Some(inner) => inner.hash(state), + None => NULL_VAL_FOR_HASH.hash(state), + }) }); let hashes = hash_finish(&mut states[..]); diff --git a/src/common/src/array/value_reader.rs b/src/common/src/array/value_reader.rs index d036105f74c57..371f08bcd8002 100644 --- a/src/common/src/array/value_reader.rs +++ b/src/common/src/array/value_reader.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ use byteorder::{BigEndian, ReadBytesExt}; use super::ArrayResult; use crate::array::{ - Array, ArrayBuilder, BytesArrayBuilder, PrimitiveArrayItemType, Utf8ArrayBuilder, + ArrayBuilder, BytesArrayBuilder, JsonbArrayBuilder, PrimitiveArrayItemType, Utf8ArrayBuilder, }; use crate::types::{Decimal, OrderedF32, OrderedF64}; @@ -62,24 +62,39 @@ impl PrimitiveValueReader for DecimalValueReader { } pub trait VarSizedValueReader { - fn read(buf: &[u8]) -> ArrayResult<<::ArrayType as Array>::RefItem<'_>>; + fn read(buf: &[u8], builder: &mut AB) -> ArrayResult<()>; } pub struct Utf8ValueReader; impl VarSizedValueReader for Utf8ValueReader { - fn read(buf: &[u8]) -> ArrayResult<&str> { - match from_utf8(buf) { - Ok(s) => Ok(s), + fn read(buf: &[u8], builder: &mut Utf8ArrayBuilder) -> ArrayResult<()> { + let s = match from_utf8(buf) { + Ok(s) => s, Err(e) => bail!("failed to read utf8 string from bytes: {}", e), - } + }; + builder.append(Some(s)); + Ok(()) } } pub struct BytesValueReader; impl VarSizedValueReader for BytesValueReader { - fn read(buf: &[u8]) -> ArrayResult<&[u8]> { - Ok(buf) + fn read(buf: &[u8], builder: &mut BytesArrayBuilder) -> ArrayResult<()> { + builder.append(Some(buf)); + Ok(()) + } +} + +pub struct JsonbValueReader; + +impl VarSizedValueReader for JsonbValueReader { + fn read(buf: &[u8], builder: &mut JsonbArrayBuilder) -> ArrayResult<()> { + let Some(v) = super::JsonbVal::value_deserialize(buf) else { + bail!("failed to read jsonb from bytes"); + }; + builder.append_move(v); + Ok(()) } } diff --git a/src/common/src/array/vis.rs b/src/common/src/array/vis.rs index fbd3672e1102f..02607e6836f99 100644 --- a/src/common/src/array/vis.rs +++ b/src/common/src/array/vis.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,9 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::iter; - use auto_enums::auto_enum; +use itertools::repeat_n; use crate::buffer::{Bitmap, BitmapBuilder}; @@ -53,7 +52,7 @@ impl Vis { self.as_ref().is_set(idx) } - pub fn iter(&self) -> impl Iterator + '_ { + pub fn iter(&self) -> impl ExactSizeIterator + '_ { self.as_ref().iter() } @@ -160,11 +159,11 @@ impl<'a> VisRef<'a> { } } - #[auto_enum(Iterator)] - pub fn iter(self) -> impl Iterator + 'a { + #[auto_enum(ExactSizeIterator)] + pub fn iter(self) -> impl ExactSizeIterator + 'a { match self { VisRef::Bitmap(b) => b.iter(), - VisRef::Compact(c) => iter::repeat(true).take(c), + VisRef::Compact(c) => repeat_n(true, c), } } diff --git a/src/common/src/buffer/bitmap.rs b/src/common/src/buffer/bitmap.rs index 929d09f855dfd..d149dc6096c27 100644 --- a/src/common/src/buffer/bitmap.rs +++ b/src/common/src/buffer/bitmap.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -550,6 +550,8 @@ impl From<&ProstBuffer> for Bitmap { } /// Bitmap iterator. +/// +/// TODO: add `count_ones` to make it [`ExactSizeIterator`]? pub struct BitmapIter<'a> { bits: &'a [usize], idx: usize, @@ -579,6 +581,7 @@ impl<'a> iter::Iterator for BitmapIter<'a> { } } +impl ExactSizeIterator for BitmapIter<'_> {} unsafe impl TrustedLen for BitmapIter<'_> {} #[cfg(test)] diff --git a/src/common/src/buffer/mod.rs b/src/common/src/buffer/mod.rs index eeb3fcc190213..457bf25809da9 100644 --- a/src/common/src/buffer/mod.rs +++ b/src/common/src/buffer/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/cache.rs b/src/common/src/cache.rs index 7ebae3d455243..8e9cf19d35b4c 100644 --- a/src/common/src/cache.rs +++ b/src/common/src/cache.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,8 +27,11 @@ use std::ptr::null_mut; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; +use futures::FutureExt; use parking_lot::Mutex; +use tokio::sync::oneshot::error::RecvError; use tokio::sync::oneshot::{channel, Receiver, Sender}; +use tokio::task::JoinHandle; const IN_CACHE: u8 = 1; const REVERSE_IN_CACHE: u8 = !IN_CACHE; @@ -143,6 +146,10 @@ impl LruHandle { self.refs += 1; } + fn add_multi_refs(&mut self, ref_count: u32) { + self.refs += ref_count; + } + fn unref(&mut self) -> bool { debug_assert!(self.refs > 0); self.refs -= 1; @@ -319,7 +326,7 @@ impl LruHandleTable { } } -type RequestQueue = Vec>; +type RequestQueue = Vec>>; pub struct LruCacheShard { /// The dummy header node of a ring linked list. The linked list is a LRU list, holding the /// cache handles that are not used externally. @@ -327,7 +334,7 @@ pub struct LruCacheShard { table: LruHandleTable, // TODO: may want to use an atomic object linked list shared by all shards. object_pool: Vec>>, - write_request: HashMap, + write_request: HashMap>, lru_usage: Arc, usage: Arc, capacity: usize, @@ -679,21 +686,28 @@ impl LruCache { ) -> CacheableEntry { let mut to_delete = vec![]; // Drop the entries outside lock to avoid deadlock. + let mut senders = vec![]; let handle = unsafe { let mut shard = self.shards[self.shard(hash)].lock(); let pending_request = shard.write_request.remove(&key); let ptr = shard.insert(key, hash, charge, value, &mut to_delete); debug_assert!(!ptr.is_null()); - if let Some(que) = pending_request { - for sender in que { - let _ = sender.send(()); - } + if let Some(mut que) = pending_request { + (*ptr).add_multi_refs(que.len() as u32); + senders = std::mem::take(&mut que); } CacheableEntry { cache: self.clone(), handle: ptr, } }; + for sender in senders { + let _ = sender.send(CacheableEntry { + cache: self.clone(), + handle: handle.handle, + }); + } + // do not deallocate data with holding mutex. if let Some(listener) = &self.listener { for (key, value) in to_delete { @@ -769,62 +783,101 @@ impl LruCache { pub struct CleanCacheGuard<'a, K: LruKey + Clone + 'static, T: LruValue + 'static> { cache: &'a Arc>, - key: K, + key: Option, hash: u64, - success: bool, +} + +impl<'a, K: LruKey + Clone + 'static, T: LruValue + 'static> CleanCacheGuard<'a, K, T> { + fn mark_success(mut self) -> K { + self.key.take().unwrap() + } } impl<'a, K: LruKey + Clone + 'static, T: LruValue + 'static> Drop for CleanCacheGuard<'a, K, T> { fn drop(&mut self) { - if !self.success { - self.cache.clear_pending_request(&self.key, self.hash); + if let Some(key) = self.key.as_ref() { + self.cache.clear_pending_request(key, self.hash); + } + } +} + +/// `lookup_with_request_dedup.await` can directly return `Result, E>`, but if +/// we do not want to wait when cache hit does not happen, we can directly call +/// `lookup_with_request_dedup` which will return a `LookupResponse` which contains +/// `Receiver>` or `JoinHandle, E>>` when cache hit +/// does not happen. +pub enum LookupResponse { + Invalid, + Cached(CacheableEntry), + WaitPendingRequest(Receiver>), + Miss(JoinHandle, E>>), +} + +impl Default for LookupResponse { + fn default() -> Self { + Self::Invalid + } +} + +impl> Future + for LookupResponse +{ + type Output = Result, E>; + + fn poll( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll { + match &mut *self { + Self::Invalid => unreachable!(), + Self::Cached(_) => std::task::Poll::Ready(Ok( + must_match!(std::mem::take(&mut *self), Self::Cached(entry) => entry), + )), + Self::WaitPendingRequest(receiver) => { + receiver.poll_unpin(cx).map_err(|recv_err| recv_err.into()) + } + Self::Miss(join_handle) => join_handle + .poll_unpin(cx) + .map(|join_result| join_result.unwrap()), } } } -/// Only implement `lookup_with_request_dedup` for static values, as they can be sent across tokio -/// spawned futures. +/// Only implement `lookup_with_request_dedup` for static +/// values, as they can be sent across tokio spawned futures. impl LruCache { - pub async fn lookup_with_request_dedup( + pub fn lookup_with_request_dedup( self: &Arc, hash: u64, key: K, fetch_value: F, - ) -> Result, E> + ) -> LookupResponse where F: FnOnce() -> VC, - E: Error + Send + 'static, + E: Error + Send + 'static + From, VC: Future> + Send + 'static, { - loop { - match self.lookup_for_request(hash, key.clone()) { - LookupResult::Cached(entry) => return Ok(entry), - LookupResult::WaitPendingRequest(recv) => { - let _ = recv.await; - continue; - } - LookupResult::Miss => { - let this = self.clone(); - let fetch_value = fetch_value(); - let key2 = key.clone(); - let mut guard = CleanCacheGuard { - cache: self, - key, + match self.lookup_for_request(hash, key.clone()) { + LookupResult::Cached(entry) => LookupResponse::Cached(entry), + LookupResult::WaitPendingRequest(receiver) => { + LookupResponse::WaitPendingRequest(receiver) + } + LookupResult::Miss => { + let this = self.clone(); + let fetch_value = fetch_value(); + let key2 = key; + let join_handle = tokio::spawn(async move { + let guard = CleanCacheGuard { + cache: &this, + key: Some(key2), hash, - success: false, }; - let ret = tokio::spawn(async move { - let (value, charge) = fetch_value.await?; - let entry = this.insert(key2, hash, charge, value); - Ok(entry) - }) - .await - .unwrap(); - if ret.is_ok() { - guard.success = true; - } - return ret; - } + let (value, charge) = fetch_value.await?; + let key2 = guard.mark_success(); + let entry = this.insert(key2, hash, charge, value); + Ok(entry) + }); + LookupResponse::Miss(join_handle) } } } @@ -838,7 +891,7 @@ pub struct CacheableEntry { pub enum LookupResult { Cached(CacheableEntry), Miss, - WaitPendingRequest(Receiver<()>), + WaitPendingRequest(Receiver>), } unsafe impl Send for CacheableEntry {} diff --git a/src/common/src/catalog/column.rs b/src/common/src/catalog/column.rs index 8503518b80e06..98fdd758090fc 100644 --- a/src/common/src/catalog/column.rs +++ b/src/common/src/catalog/column.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,10 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::borrow::Cow; + use itertools::Itertools; -use risingwave_pb::plan_common::ColumnDesc as ProstColumnDesc; +use risingwave_pb::plan_common::{ + ColumnCatalog as ProstColumnCatalog, ColumnDesc as ProstColumnDesc, +}; -use crate::catalog::Field; +use super::row_id_column_desc; +use crate::catalog::{Field, ROW_ID_COLUMN_ID}; use crate::error::ErrorCode; use crate::types::DataType; @@ -46,6 +51,12 @@ impl ColumnId { pub const fn next(self) -> Self { Self(self.0 + 1) } + + pub fn apply_delta_if_not_row_id(&mut self, delta: i32) { + if self.0 != ROW_ID_COLUMN_ID.get_id() { + self.0 += delta; + } + } } impl From for ColumnId { @@ -223,6 +234,99 @@ impl From<&ColumnDesc> for ProstColumnDesc { } } +#[derive(Debug, Clone, PartialEq)] +pub struct ColumnCatalog { + pub column_desc: ColumnDesc, + pub is_hidden: bool, +} + +impl ColumnCatalog { + /// Get the column catalog's is hidden. + pub fn is_hidden(&self) -> bool { + self.is_hidden + } + + /// Get a reference to the column desc's data type. + pub fn data_type(&self) -> &DataType { + &self.column_desc.data_type + } + + /// Get the column desc's column id. + pub fn column_id(&self) -> ColumnId { + self.column_desc.column_id + } + + /// Get a reference to the column desc's name. + pub fn name(&self) -> &str { + self.column_desc.name.as_ref() + } + + /// Convert column catalog to proto + pub fn to_protobuf(&self) -> ProstColumnCatalog { + ProstColumnCatalog { + column_desc: Some(self.column_desc.to_protobuf()), + is_hidden: self.is_hidden, + } + } + + /// Creates a row ID column (for implicit primary key). + pub fn row_id_column() -> Self { + Self { + column_desc: row_id_column_desc(), + is_hidden: true, + } + } +} + +impl From for ColumnCatalog { + fn from(prost: ProstColumnCatalog) -> Self { + Self { + column_desc: prost.column_desc.unwrap().into(), + is_hidden: prost.is_hidden, + } + } +} + +impl ColumnCatalog { + pub fn name_with_hidden(&self) -> Cow<'_, str> { + if self.is_hidden { + Cow::Owned(format!("{}(hidden)", self.column_desc.name)) + } else { + Cow::Borrowed(&self.column_desc.name) + } + } +} + +pub fn columns_extend(preserved_columns: &mut Vec, columns: Vec) { + debug_assert_eq!(ROW_ID_COLUMN_ID.get_id(), 0); + let mut max_incoming_column_id = ROW_ID_COLUMN_ID.get_id(); + columns.iter().for_each(|column| { + let column_id = column.column_id().get_id(); + if column_id > max_incoming_column_id { + max_incoming_column_id = column_id; + } + }); + preserved_columns.iter_mut().for_each(|column| { + column + .column_desc + .column_id + .apply_delta_if_not_row_id(max_incoming_column_id) + }); + + preserved_columns.extend(columns); +} + +pub fn is_column_ids_dedup(columns: &[ColumnCatalog]) -> bool { + let mut column_ids = columns + .iter() + .map(|column| column.column_id().get_id()) + .collect_vec(); + column_ids.sort(); + let original_len = column_ids.len(); + column_ids.dedup(); + column_ids.len() == original_len +} + #[cfg(test)] pub mod tests { use risingwave_pb::plan_common::ColumnDesc as ProstColumnDesc; diff --git a/src/common/src/catalog/internal_table.rs b/src/common/src/catalog/internal_table.rs index 8bd7c34fc346e..1fa483eac9f64 100644 --- a/src/common/src/catalog/internal_table.rs +++ b/src/common/src/catalog/internal_table.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/catalog/mod.rs b/src/common/src/catalog/mod.rs index 506bd8bc76028..5368017974873 100644 --- a/src/common/src/catalog/mod.rs +++ b/src/common/src/catalog/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -31,10 +31,16 @@ pub use schema::{test_utils as schema_test_utils, Field, FieldDisplay, Schema}; pub use crate::constants::hummock; use crate::error::Result; use crate::row::OwnedRow; +use crate::types::DataType; /// The global version of the catalog. pub type CatalogVersion = u64; +/// The version number of the per-table catalog. +pub type TableVersionId = u64; +/// The default version ID for a new table. +pub const INITIAL_TABLE_VERSION_ID: u64 = 0; + pub const DEFAULT_DATABASE_NAME: &str = "dev"; pub const DEFAULT_SCHEMA_NAME: &str = "public"; pub const PG_CATALOG_SCHEMA_NAME: &str = "pg_catalog"; @@ -56,6 +62,35 @@ pub const SYSTEM_SCHEMAS: [&str; 3] = [ RW_CATALOG_SCHEMA_NAME, ]; +pub const ROWID_PREFIX: &str = "_row_id"; + +pub fn row_id_column_name() -> String { + ROWID_PREFIX.to_string() +} + +pub fn is_row_id_column_name(name: &str) -> bool { + name.starts_with(ROWID_PREFIX) +} + +/// The column ID preserved for the row ID column. +pub const ROW_ID_COLUMN_ID: ColumnId = ColumnId::new(0); + +/// The column ID offset for user-defined columns. +/// +/// All IDs of user-defined columns must be greater or equal to this value. +pub const USER_COLUMN_ID_OFFSET: i32 = ROW_ID_COLUMN_ID.next().get_id(); + +/// Creates a row ID column (for implicit primary key). It'll always have the ID `0` for now. +pub fn row_id_column_desc() -> ColumnDesc { + ColumnDesc { + data_type: DataType::Int64, + column_id: ROW_ID_COLUMN_ID, + name: row_id_column_name(), + field_descs: vec![], + type_name: "".to_string(), + } +} + /// The local system catalog reader in the frontend node. #[async_trait] pub trait SysCatalogReader: Sync + Send + 'static { @@ -66,31 +101,71 @@ pub type SysCatalogReaderRef = Arc; #[derive(Clone, Debug, Default, Hash, PartialOrd, PartialEq, Eq)] pub struct DatabaseId { - database_id: i32, + pub database_id: u32, } impl DatabaseId { - pub fn new(database_id: i32) -> Self { + pub fn new(database_id: u32) -> Self { DatabaseId { database_id } } - pub fn placeholder() -> i32 { - i32::MAX - 1 + pub fn placeholder() -> Self { + DatabaseId { + database_id: u32::MAX - 1, + } + } +} + +impl From for DatabaseId { + fn from(id: u32) -> Self { + Self::new(id) + } +} + +impl From<&u32> for DatabaseId { + fn from(id: &u32) -> Self { + Self::new(*id) + } +} + +impl From for u32 { + fn from(id: DatabaseId) -> Self { + id.database_id } } #[derive(Clone, Debug, Default, Hash, PartialOrd, PartialEq, Eq)] pub struct SchemaId { - schema_id: i32, + pub schema_id: u32, } impl SchemaId { - pub fn new(schema_id: i32) -> Self { + pub fn new(schema_id: u32) -> Self { SchemaId { schema_id } } - pub fn placeholder() -> i32 { - i32::MAX - 1 + pub fn placeholder() -> Self { + SchemaId { + schema_id: u32::MAX - 1, + } + } +} + +impl From for SchemaId { + fn from(id: u32) -> Self { + Self::new(id) + } +} + +impl From<&u32> for SchemaId { + fn from(id: &u32) -> Self { + Self::new(*id) + } +} + +impl From for u32 { + fn from(id: SchemaId) -> Self { + id.schema_id } } @@ -134,8 +209,6 @@ impl From for u32 { } } -// TODO: TableOption is duplicated with the properties in table catalog, We can refactor later to -// directly fetch such options from catalog when creating compaction jobs. #[derive(Clone, Debug, PartialEq, Default, Copy)] pub struct TableOption { pub retention_seconds: Option, // second @@ -253,3 +326,38 @@ impl From for u32 { id.0 } } + +#[derive(Clone, Copy, Debug, Display, Default, Hash, PartialOrd, PartialEq, Eq, Ord)] +pub struct UserId { + pub user_id: u32, +} + +impl UserId { + pub const fn new(user_id: u32) -> Self { + UserId { user_id } + } + + pub const fn placeholder() -> Self { + UserId { + user_id: u32::MAX - 1, + } + } +} + +impl From for UserId { + fn from(id: u32) -> Self { + Self::new(id) + } +} + +impl From<&u32> for UserId { + fn from(id: &u32) -> Self { + Self::new(*id) + } +} + +impl From for u32 { + fn from(id: UserId) -> Self { + id.user_id + } +} diff --git a/src/common/src/catalog/physical_table.rs b/src/common/src/catalog/physical_table.rs index b3d13402255c0..8f5963ff2473e 100644 --- a/src/common/src/catalog/physical_table.rs +++ b/src/common/src/catalog/physical_table.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ use std::collections::HashMap; +use fixedbitset::FixedBitSet; use risingwave_pb::plan_common::{ColumnOrder, StorageTableDesc}; use super::{ColumnDesc, ColumnId, TableId}; @@ -46,6 +47,9 @@ pub struct TableDesc { /// The prefix len of pk, used in bloom filter. pub read_prefix_len_hint: usize, + + /// the column indices which could receive watermarks. + pub watermark_columns: FixedBitSet, } impl TableDesc { diff --git a/src/common/src/catalog/schema.rs b/src/common/src/catalog/schema.rs index 0c2b09f413eae..b00606a6c98dc 100644 --- a/src/common/src/catalog/schema.rs +++ b/src/common/src/catalog/schema.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ use std::ops::Index; use itertools::Itertools; -use risingwave_pb::plan_common::Field as ProstField; +use risingwave_pb::plan_common::{ColumnDesc as ProstColumnDesc, Field as ProstField}; use super::ColumnDesc; use crate::array::ArrayBuilderImpl; @@ -74,6 +74,17 @@ impl From for Field { } } +impl From<&ProstColumnDesc> for Field { + fn from(pb_column_desc: &ProstColumnDesc) -> Self { + Self { + data_type: pb_column_desc.column_type.as_ref().unwrap().into(), + name: pb_column_desc.name.clone(), + sub_fields: pb_column_desc.field_descs.iter().map(Into::into).collect(), + type_name: pb_column_desc.type_name.clone(), + } + } +} + pub struct FieldDisplay<'a>(pub &'a Field); impl std::fmt::Debug for FieldDisplay<'_> { diff --git a/src/common/src/catalog/test_utils.rs b/src/common/src/catalog/test_utils.rs index 9952f980f451a..54251062b7908 100644 --- a/src/common/src/catalog/test_utils.rs +++ b/src/common/src/catalog/test_utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/collection/estimate_size.rs b/src/common/src/collection/estimate_size.rs index 00f8dab610256..5d3ed13f0c17b 100644 --- a/src/common/src/collection/estimate_size.rs +++ b/src/common/src/collection/estimate_size.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/collection/mod.rs b/src/common/src/collection/mod.rs index 8308e89ed15ae..aee11b88bd18f 100644 --- a/src/common/src/collection/mod.rs +++ b/src/common/src/collection/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/config.rs b/src/common/src/config.rs index 997a6513bf288..0760b787b9b8e 100644 --- a/src/common/src/config.rs +++ b/src/common/src/config.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ use std::fs; +use clap::ArgEnum; use serde::{Deserialize, Serialize}; /// Use the maximum value for HTTP/2 connection window size to avoid deadlock among multiplexed @@ -27,17 +28,50 @@ pub const MAX_CONNECTION_WINDOW_SIZE: u32 = (1 << 31) - 1; /// Use a large value for HTTP/2 stream window size to improve the performance of remote exchange, /// as we don't rely on this for back-pressure. pub const STREAM_WINDOW_SIZE: u32 = 32 * 1024 * 1024; // 32 MB +/// For non-user-facing components where the CLI arguments do not override the config file. +pub const NO_OVERRIDE: Option = None; -pub fn load_config(path: &str) -> RwConfig +/// A workaround for a bug in clap where the attribute `from_flag` on `Option` results in +/// compilation error. +pub type Flag = Option; + +pub fn load_config(path: &str, cli_override: Option) -> RwConfig where { - if path.is_empty() { + let mut config = if path.is_empty() { tracing::warn!("risingwave.toml not found, using default config."); - return RwConfig::default(); + RwConfig::default() + } else { + let config_str = fs::read_to_string(path) + .unwrap_or_else(|e| panic!("failed to open config file '{}': {}", path, e)); + toml::from_str(config_str.as_str()).unwrap_or_else(|e| panic!("parse error {}", e)) + }; + // TODO(zhidong): warn deprecated config + if let Some(cli_override) = cli_override { + cli_override.r#override(&mut config); } - let config_str = fs::read_to_string(path) - .unwrap_or_else(|e| panic!("failed to open config file '{}': {}", path, e)); - toml::from_str(config_str.as_str()).unwrap_or_else(|e| panic!("parse error {}", e)) + config +} + +/// Map command line flag to `Flag`. Should only be used in `#[derive(OverrideConfig)]`. +pub fn true_if_present(b: bool) -> Flag { + if b { + Some(true) + } else { + None + } +} + +pub trait OverrideConfig { + fn r#override(self, config: &mut RwConfig); +} + +/// A dummy struct for `NO_OVERRIDE`. Do NOT use it directly. +#[derive(Clone, Copy)] +pub struct NoOverride {} + +impl OverrideConfig for NoOverride { + fn r#override(self, _config: &mut RwConfig) {} } /// [`RwConfig`] corresponds to the whole config file `risingwave.toml`. Each field corresponds to a @@ -63,6 +97,13 @@ pub struct RwConfig { pub backup: BackupConfig, } +#[derive(Copy, Clone, Debug, Default, ArgEnum, Serialize, Deserialize)] +pub enum MetaBackend { + #[default] + Mem, + Etcd, +} + /// The section `[meta]` in `risingwave.toml`. #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(deny_unknown_fields)] @@ -111,6 +152,13 @@ pub struct MetaConfig { #[serde(default = "default::meta::node_num_monitor_interval_sec")] pub node_num_monitor_interval_sec: u64, + + #[serde(default = "default::meta::backend")] + pub backend: MetaBackend, + + /// Schedule space_reclaim compaction for all compaction groups with this interval. + #[serde(default = "default::meta::periodic_space_reclaim_compaction_interval_sec")] + pub periodic_space_reclaim_compaction_interval_sec: u64, } impl Default for MetaConfig { @@ -133,6 +181,12 @@ pub struct ServerConfig { #[serde(default = "default::server::connection_pool_size")] pub connection_pool_size: u16, + + #[serde(default = "default::server::metrics_level")] + /// Used for control the metrics level, similar to log level. + /// 0 = close metrics + /// >0 = open metrics + pub metrics_level: u32, } impl Default for ServerConfig { @@ -181,6 +235,14 @@ pub struct StreamingConfig { #[serde(default)] pub actor_runtime_worker_threads_num: Option, + /// Enable reporting tracing information to jaeger. + #[serde(default = "default::streaming::enable_jaegar_tracing")] + pub enable_jaeger_tracing: bool, + + /// Enable async stack tracing for risectl. + #[serde(default = "default::streaming::async_stack_trace")] + pub async_stack_trace: AsyncStackTraceOption, + #[serde(default)] pub developer: DeveloperConfig, } @@ -195,14 +257,20 @@ impl Default for StreamingConfig { #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct StorageConfig { + // TODO(zhidong): Remove in 0.1.18 release + // NOTE: It is now a system parameter and should not be used directly. /// Target size of the Sstable. #[serde(default = "default::storage::sst_size_mb")] pub sstable_size_mb: u32, + // TODO(zhidong): Remove in 0.1.18 release + // NOTE: It is now a system parameter and should not be used directly. /// Size of each block in bytes in SST. #[serde(default = "default::storage::block_size_kb")] pub block_size_kb: u32, + // TODO(zhidong): Remove in 0.1.18 release + // NOTE: It is now a system parameter and should not be used directly. /// False positive probability of bloom filter. #[serde(default = "default::storage::bloom_false_positive")] pub bloom_false_positive: f64, @@ -219,8 +287,10 @@ pub struct StorageConfig { /// Maximum shared buffer size, writes attempting to exceed the capacity will stall until there /// is enough space. #[serde(default = "default::storage::shared_buffer_capacity_mb")] - pub shared_buffer_capacity_mb: u32, + pub shared_buffer_capacity_mb: usize, + // TODO(zhidong): Remove in 0.1.18 release + // NOTE: It is now a system parameter and should not be used directly. /// Remote directory for storing data and metadata objects. #[serde(default = "default::storage::data_directory")] pub data_directory: String, @@ -270,8 +340,8 @@ pub struct StorageConfig { #[serde(default = "default::storage::max_sub_compaction")] pub max_sub_compaction: u32, - #[serde(default = "default::storage::object_store_use_batch_delete")] - pub object_store_use_batch_delete: bool, + #[serde(default = "default::storage::max_concurrent_compaction_task_number")] + pub max_concurrent_compaction_task_number: u64, /// Whether to enable state_store_v1 for hummock #[serde(default = "default::storage::enable_state_store_v1")] @@ -290,6 +360,9 @@ impl Default for StorageConfig { #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct FileCacheConfig { + #[serde(default = "default::file_cache::dir")] + pub dir: String, + #[serde(default = "default::file_cache::capacity_mb")] pub capacity_mb: usize, @@ -312,6 +385,14 @@ impl Default for FileCacheConfig { } } +#[derive(Debug, Default, Clone, ArgEnum, Serialize, Deserialize)] +pub enum AsyncStackTraceOption { + Off, + #[default] + On, + Verbose, +} + /// The subsections `[batch.developer]` and `[streaming.developer]`. /// /// It is put at [`BatchConfig::developer`] and [`StreamingConfig::developer`]. @@ -366,9 +447,13 @@ impl Default for DeveloperConfig { #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct BackupConfig { + // TODO: Remove in 0.1.18 release + // NOTE: It is now a system parameter and should not be used directly. /// Remote storage url for storing snapshots. #[serde(default = "default::backup::storage_url")] pub storage_url: String, + // TODO: Remove in 0.1.18 release + // NOTE: It is now a system parameter and should not be used directly. /// Remote directory for storing snapshots. #[serde(default = "default::backup::storage_directory")] pub storage_directory: String, @@ -382,6 +467,8 @@ impl Default for BackupConfig { mod default { pub mod meta { + use crate::config::MetaBackend; + pub fn min_sst_retention_time_sec() -> u64 { 604800 } @@ -409,6 +496,14 @@ mod default { pub fn node_num_monitor_interval_sec() -> u64 { 10 } + + pub fn backend() -> MetaBackend { + MetaBackend::Mem + } + + pub fn periodic_space_reclaim_compaction_interval_sec() -> u64 { + 3600 // 60min + } } pub mod server { @@ -424,6 +519,10 @@ mod default { pub fn connection_pool_size() -> u16 { 16 } + + pub fn metrics_level() -> u32 { + 0 + } } pub mod storage { @@ -448,7 +547,7 @@ mod default { 4 } - pub fn shared_buffer_capacity_mb() -> u32 { + pub fn shared_buffer_capacity_mb() -> usize { 1024 } @@ -501,15 +600,18 @@ mod default { 4 } - pub fn object_store_use_batch_delete() -> bool { - true + pub fn max_concurrent_compaction_task_number() -> u64 { + 16 } + pub fn enable_state_store_v1() -> bool { false } } pub mod streaming { + use crate::config::AsyncStackTraceOption; + pub fn barrier_interval_ms() -> u32 { 1000 } @@ -523,10 +625,22 @@ mod default { pub fn checkpoint_frequency() -> usize { 10 } + + pub fn enable_jaegar_tracing() -> bool { + false + } + + pub fn async_stack_trace() -> AsyncStackTraceOption { + AsyncStackTraceOption::On + } } pub mod file_cache { + pub fn dir() -> String { + "".to_string() + } + pub fn capacity_mb() -> usize { 1024 } @@ -549,6 +663,7 @@ mod default { } pub mod developer { + pub fn batch_output_channel_size() -> usize { 64 } diff --git a/src/common/src/constants.rs b/src/common/src/constants.rs index b4a287e81a9aa..6c15e62fb06b3 100644 --- a/src/common/src/constants.rs +++ b/src/common/src/constants.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/error.rs b/src/common/src/error.rs index 285b53a7039a5..afb768e7ed25d 100644 --- a/src/common/src/error.rs +++ b/src/common/src/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -128,18 +128,12 @@ pub enum ErrorCode { InvalidParameterValue(String), #[error("Sink error: {0}")] SinkError(BoxedError), - #[error("Permission denied: {0}")] PermissionDenied(String), - #[error("unrecognized configuration parameter \"{0}\"")] UnrecognizedConfigurationParameter(String), } -pub fn internal_err(msg: impl Into) -> RwError { - ErrorCode::InternalError(msg.into().to_string()).into() -} - pub fn internal_error(msg: impl Into) -> RwError { ErrorCode::InternalError(msg.into()).into() } @@ -272,6 +266,7 @@ impl From for RwError { ErrorCode::CatalogError(err.message().to_string().into()).into() } Code::PermissionDenied => ErrorCode::PermissionDenied(err.message().to_string()).into(), + Code::Cancelled => ErrorCode::SchedulerError(err.message().to_string().into()).into(), _ => ErrorCode::InternalError(err.message().to_string()).into(), } } diff --git a/src/common/src/field_generator/mod.rs b/src/common/src/field_generator/mod.rs index bdd57320d0757..d313097cfe5ca 100644 --- a/src/common/src/field_generator/mod.rs +++ b/src/common/src/field_generator/mod.rs @@ -1,10 +1,10 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // -// http://www.apache.org/licenses/LICENSE-2.0 +// http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, @@ -61,17 +61,13 @@ pub trait NumericFieldSequenceGenerator { } /// the way that datagen create the field data. such as 'sequence' or 'random'. +#[derive(Default)] pub enum FieldKind { Sequence, + #[default] Random, } -impl Default for FieldKind { - fn default() -> Self { - FieldKind::Random - } -} - pub enum FieldGeneratorImpl { I16Sequence(I16SequenceField), I32Sequence(I32SequenceField), @@ -189,7 +185,7 @@ impl FieldGeneratorImpl { Ok(FieldGeneratorImpl::List(Box::new(field), list_length)) } - pub fn generate(&mut self, offset: u64) -> Value { + pub fn generate_json(&mut self, offset: u64) -> Value { match self { FieldGeneratorImpl::I16Sequence(f) => f.generate(), FieldGeneratorImpl::I32Sequence(f) => f.generate(), @@ -206,13 +202,13 @@ impl FieldGeneratorImpl { FieldGeneratorImpl::Struct(fields) => { let map = fields .iter_mut() - .map(|(name, gen)| (name.clone(), gen.generate(offset))) + .map(|(name, gen)| (name.clone(), gen.generate_json(offset))) .collect(); Value::Object(map) } FieldGeneratorImpl::List(field, list_length) => { let vec = (0..*list_length) - .map(|_| field.generate(offset)) + .map(|_| field.generate_json(offset)) .collect::>(); Value::Array(vec) } @@ -273,7 +269,7 @@ mod tests { for step in 0..5 { for (index, i32_field) in i32_fields.iter_mut().enumerate() { - let value = i32_field.generate(0); + let value = i32_field.generate_json(0); assert!(value.is_number()); let num = value.as_u64(); let expected_num = split_num * step + 1 + index as u64; @@ -302,13 +298,13 @@ mod tests { _ => FieldGeneratorImpl::with_number_random(data_type, None, None, seed).unwrap(), }; - let val1 = generator.generate(1); - let val2 = generator.generate(2); + let val1 = generator.generate_json(1); + let val2 = generator.generate_json(2); assert_ne!(val1, val2); - let val1_new = generator.generate(1); - let val2_new = generator.generate(2); + let val1_new = generator.generate_json(1); + let val2_new = generator.generate_json(2); assert_eq!(val1_new, val1); assert_eq!(val2_new, val2); diff --git a/src/common/src/field_generator/numeric.rs b/src/common/src/field_generator/numeric.rs index d9c38176e828c..18c0ec4fe6091 100644 --- a/src/common/src/field_generator/numeric.rs +++ b/src/common/src/field_generator/numeric.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/field_generator/timestamp.rs b/src/common/src/field_generator/timestamp.rs index b0244a42eeb19..2959913958734 100644 --- a/src/common/src/field_generator/timestamp.rs +++ b/src/common/src/field_generator/timestamp.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/field_generator/varchar.rs b/src/common/src/field_generator/varchar.rs index 4284d0081c14c..fcb1e8d1e277c 100644 --- a/src/common/src/field_generator/varchar.rs +++ b/src/common/src/field_generator/varchar.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/hash/consistent_hash/mapping.rs b/src/common/src/hash/consistent_hash/mapping.rs new file mode 100644 index 0000000000000..b9b9f467da3b1 --- /dev/null +++ b/src/common/src/hash/consistent_hash/mapping.rs @@ -0,0 +1,430 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::fmt::Debug; +use std::hash::Hash; +use std::ops::Index; + +use derivative::Derivative; +use itertools::Itertools; +use risingwave_pb::common::{ParallelUnit, ParallelUnitMapping as ParallelUnitMappingProto}; +use risingwave_pb::stream_plan::ActorMapping as ActorMappingProto; + +use super::vnode::{ParallelUnitId, VirtualNode}; +use crate::buffer::{Bitmap, BitmapBuilder}; +use crate::util::compress::compress_data; +use crate::util::iter_util::ZipEqDebug; + +// TODO: find a better place for this. +pub type ActorId = u32; + +/// Trait for items that can be used as keys in [`VnodeMapping`]. +pub trait VnodeMappingItem { + /// The type of the item. + /// + /// Currently, there are two types of items: [`ParallelUnitId`] and [`ActorId`]. We don't use + /// them directly as the generic parameter because they're the same type aliases. + type Item: Copy + Ord + Hash + Debug; +} + +/// Exapnded mapping from virtual nodes to items, essentially a vector of items and can be indexed +/// by virtual nodes. +pub type ExpandedMapping = Vec<::Item>; + +/// Generic mapping from virtual nodes to items. +/// +/// The representation is compressed as described in [`compress_data`], which is optimized for the +/// mapping with a small number of items and good locality. +#[derive(Derivative)] +#[derivative(Debug, Clone, PartialEq, Eq, Hash)] +pub struct VnodeMapping { + original_indices: Vec, + data: Vec, +} + +#[expect( + clippy::len_without_is_empty, + reason = "empty vnode mapping makes no sense" +)] +impl VnodeMapping { + /// Create a uniform vnode mapping with a **set** of items. + /// + /// For example, if `items` is `[0, 1, 2]`, and the total vnode count is 10, we'll generate + /// mapping like `[0, 0, 0, 0, 1, 1, 1, 2, 2, 2]`. + pub fn new_uniform(items: impl ExactSizeIterator) -> Self { + // If the number of items is greater than the total vnode count, no vnode will be mapped to + // some items and the mapping will be invalid. + assert!(items.len() <= VirtualNode::COUNT); + + let mut original_indices = Vec::with_capacity(items.len()); + let mut data = Vec::with_capacity(items.len()); + + let hash_shard_size = VirtualNode::COUNT / items.len(); + let mut one_more_count = VirtualNode::COUNT % items.len(); + let mut init_bound = 0; + + for item in items { + let vnode_count = if one_more_count > 0 { + one_more_count -= 1; + hash_shard_size + 1 + } else { + hash_shard_size + }; + init_bound += vnode_count; + + original_indices.push(init_bound as u32 - 1); + data.push(item); + } + + // Assert that there's no duplicated items. + debug_assert_eq!(data.iter().duplicates().count(), 0); + + Self { + original_indices, + data, + } + } + + /// Create a vnode mapping where all vnodes are mapped to the same single item. + pub fn new_single(item: T::Item) -> Self { + Self::new_uniform(std::iter::once(item)) + } + + /// The length of the vnode in this mapping, typically [`VirtualNode::COUNT`]. + pub fn len(&self) -> usize { + self.original_indices + .last() + .map(|&i| i as usize + 1) + .unwrap_or(0) + } + + /// Get the item mapped to the given `vnode` by binary search. + /// + /// Note: to achieve better mapping performance, one should convert the mapping to the + /// [`ExpandedMapping`] first and directly access the item by index. + pub fn get(&self, vnode: VirtualNode) -> T::Item { + self[vnode] + } + + /// Get the item matched by the virtual nodes indicated by high bits in the given `bitmap`. + /// Returns `None` if the no virtual node is set in the bitmap. + pub fn get_matched(&self, bitmap: &Bitmap) -> Option { + bitmap + .iter_ones() + .next() // only need to check the first one + .map(|i| self.get(VirtualNode::from_index(i))) + } + + /// Iterate over all items in this mapping, in the order of vnodes. + pub fn iter(&self) -> impl Iterator + '_ { + self.data + .iter() + .copied() + .zip_eq_debug( + std::iter::once(0) + .chain(self.original_indices.iter().copied().map(|i| i + 1)) + .tuple_windows() + .map(|(a, b)| (b - a) as usize), + ) + .flat_map(|(item, c)| std::iter::repeat(item).take(c)) + } + + /// Iterate over all vnode-item pairs in this mapping. + pub fn iter_with_vnode(&self) -> impl Iterator + '_ { + self.iter() + .enumerate() + .map(|(v, item)| (VirtualNode::from_index(v), item)) + } + + /// Iterate over all unique items in this mapping. The order is deterministic. + pub fn iter_unique(&self) -> impl Iterator + '_ { + // Note: we can't ensure there's no duplicated items in the `data` after some scaling. + self.data.iter().copied().sorted().dedup() + } + + /// Returns the item if it's the only item in this mapping, otherwise returns `None`. + pub fn to_single(&self) -> Option { + self.data.iter().copied().dedup().exactly_one().ok() + } + + /// Convert this vnode mapping to a mapping from items to bitmaps, where each bitmap represents + /// the vnodes mapped to the item. + pub fn to_bitmaps(&self) -> HashMap { + let mut vnode_bitmaps = HashMap::new(); + + for (vnode, item) in self.iter_with_vnode() { + vnode_bitmaps + .entry(item) + .or_insert_with(|| BitmapBuilder::zeroed(VirtualNode::COUNT)) + .set(vnode.to_index(), true); + } + + vnode_bitmaps + .into_iter() + .map(|(item, b)| (item, b.finish())) + .collect() + } + + /// Create a vnode mapping from the given mapping from items to bitmaps, where each bitmap + /// represents the vnodes mapped to the item. + pub fn from_bitmaps(bitmaps: &HashMap) -> Self { + let mut items = vec![None; VirtualNode::COUNT]; + + for (&item, bitmap) in bitmaps { + assert_eq!(bitmap.len(), VirtualNode::COUNT); + for idx in bitmap.iter_ones() { + if let Some(prev) = items[idx].replace(item) { + panic!("mapping at index `{idx}` is set to both `{prev:?}` and `{item:?}`"); + } + } + } + + let items = items + .into_iter() + .enumerate() + .map(|(i, o)| o.unwrap_or_else(|| panic!("mapping at index `{i}` is not set"))) + .collect_vec(); + Self::from_expanded(&items) + } + + /// Create a vnode mapping from the expanded slice of items with length [`VirtualNode::COUNT`]. + pub fn from_expanded(items: &[T::Item]) -> Self { + assert_eq!(items.len(), VirtualNode::COUNT); + let (original_indices, data) = compress_data(items); + Self { + original_indices, + data, + } + } + + /// Convert this vnode mapping to a expanded vector of items with length [`VirtualNode::COUNT`]. + pub fn to_expanded(&self) -> ExpandedMapping { + self.iter().collect() + } + + /// Transform this vnode mapping to another type of vnode mapping, with the given mapping from + /// items of this mapping to items of the other mapping. + pub fn transform( + &self, + to_map: &HashMap, + ) -> VnodeMapping { + VnodeMapping { + original_indices: self.original_indices.clone(), + data: self.data.iter().map(|item| to_map[item]).collect(), + } + } +} + +impl Index for VnodeMapping { + type Output = T::Item; + + fn index(&self, vnode: VirtualNode) -> &Self::Output { + let index = self + .original_indices + .partition_point(|&i| i < vnode.to_index() as u32); + &self.data[index] + } +} + +pub mod marker { + use super::*; + + /// A marker type for items of [`ActorId`]. + pub struct Actor; + impl VnodeMappingItem for Actor { + type Item = ActorId; + } + + /// A marker type for items of [`ParallelUnitId`]. + pub struct ParallelUnit; + impl VnodeMappingItem for ParallelUnit { + type Item = ParallelUnitId; + } +} + +/// A mapping from [`VirtualNode`] to [`ActorId`]. +pub type ActorMapping = VnodeMapping; +/// An expanded mapping from [`VirtualNode`] to [`ActorId`]. +pub type ExpandedActorMapping = ExpandedMapping; + +/// A mapping from [`VirtualNode`] to [`ParallelUnitId`]. +pub type ParallelUnitMapping = VnodeMapping; +/// An expanded mapping from [`VirtualNode`] to [`ParallelUnitId`]. +pub type ExpandedParallelUnitMapping = ExpandedMapping; + +impl ActorMapping { + /// Transform this actor mapping to a parallel unit mapping, essentially `transform`. + pub fn to_parallel_unit( + &self, + to_map: &HashMap, + ) -> ParallelUnitMapping { + self.transform(to_map) + } + + /// Create an actor mapping from the protobuf representation. + pub fn from_protobuf(proto: &ActorMappingProto) -> Self { + assert_eq!(proto.original_indices.len(), proto.data.len()); + Self { + original_indices: proto.original_indices.clone(), + data: proto.data.clone(), + } + } + + /// Convert this actor mapping to the protobuf representation. + pub fn to_protobuf(&self) -> ActorMappingProto { + ActorMappingProto { + original_indices: self.original_indices.clone(), + data: self.data.clone(), + } + } +} + +impl ParallelUnitMapping { + /// Create a uniform parallel unit mapping from the given parallel units, essentially + /// `new_uniform`. + pub fn build(parallel_units: &[ParallelUnit]) -> Self { + Self::new_uniform(parallel_units.iter().map(|pu| pu.id)) + } + + /// Transform this parallel unit mapping to an actor mapping, essentially `transform`. + pub fn to_actor(&self, to_map: &HashMap) -> ActorMapping { + self.transform(to_map) + } + + /// Create a parallel unit mapping from the protobuf representation. + pub fn from_protobuf(proto: &ParallelUnitMappingProto) -> Self { + assert_eq!(proto.original_indices.len(), proto.data.len()); + Self { + original_indices: proto.original_indices.clone(), + data: proto.data.clone(), + } + } + + /// Convert this parallel unit mapping to the protobuf representation. + pub fn to_protobuf(&self) -> ParallelUnitMappingProto { + ParallelUnitMappingProto { + original_indices: self.original_indices.clone(), + data: self.data.clone(), + } + } +} + +#[cfg(test)] +mod tests { + use std::iter::repeat_with; + + use rand::Rng; + + use super::*; + use crate::util::iter_util::ZipEqDebug; + + struct Test; + impl VnodeMappingItem for Test { + type Item = u32; + } + + struct Test2; + impl VnodeMappingItem for Test2 { + type Item = u32; + } + + type TestMapping = VnodeMapping; + type Test2Mapping = VnodeMapping; + + const COUNTS: &[usize] = &[1, 3, 12, 42, VirtualNode::COUNT]; + + fn uniforms() -> impl Iterator { + COUNTS + .iter() + .map(|&count| TestMapping::new_uniform(0..count as u32)) + } + + fn randoms() -> impl Iterator { + COUNTS.iter().map(|&count| { + let raw = repeat_with(|| rand::thread_rng().gen_range(0..count as u32)) + .take(VirtualNode::COUNT) + .collect_vec(); + TestMapping::from_expanded(&raw) + }) + } + + fn mappings() -> impl Iterator { + uniforms().chain(randoms()) + } + + #[test] + fn test_uniform() { + for vnode_mapping in uniforms() { + assert_eq!(vnode_mapping.len(), VirtualNode::COUNT); + let item_count = vnode_mapping.iter_unique().count(); + + let mut check: HashMap> = HashMap::new(); + for (vnode, item) in vnode_mapping.iter_with_vnode() { + check.entry(item).or_default().push(vnode); + } + + assert_eq!(check.len(), item_count); + + let (min, max) = check + .values() + .map(|indexes| indexes.len()) + .minmax() + .into_option() + .unwrap(); + + assert!(max - min <= 1); + } + } + + #[test] + fn test_iter_with_get() { + for vnode_mapping in mappings() { + for (vnode, item) in vnode_mapping.iter_with_vnode() { + assert_eq!(vnode_mapping.get(vnode), item); + } + } + } + + #[test] + fn test_from_to_bitmaps() { + for vnode_mapping in mappings() { + let bitmaps = vnode_mapping.to_bitmaps(); + let new_vnode_mapping = TestMapping::from_bitmaps(&bitmaps); + + assert_eq!(vnode_mapping, new_vnode_mapping); + } + } + + #[test] + fn test_transform() { + for vnode_mapping in mappings() { + let transform_map: HashMap<_, _> = vnode_mapping + .iter_unique() + .map(|item| (item, item + 1)) + .collect(); + let vnode_mapping_2: Test2Mapping = vnode_mapping.transform(&transform_map); + + for (item, item_2) in vnode_mapping.iter().zip_eq_debug(vnode_mapping_2.iter()) { + assert_eq!(item + 1, item_2); + } + + let transform_back_map: HashMap<_, _> = + transform_map.into_iter().map(|(k, v)| (v, k)).collect(); + let new_vnode_mapping: TestMapping = vnode_mapping_2.transform(&transform_back_map); + + assert_eq!(vnode_mapping, new_vnode_mapping); + } + } +} diff --git a/src/common/src/hash/consistent_hash/mod.rs b/src/common/src/hash/consistent_hash/mod.rs new file mode 100644 index 0000000000000..85339419f6e4e --- /dev/null +++ b/src/common/src/hash/consistent_hash/mod.rs @@ -0,0 +1,16 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod mapping; +pub mod vnode; diff --git a/src/common/src/hash/vnode.rs b/src/common/src/hash/consistent_hash/vnode.rs similarity index 95% rename from src/common/src/hash/vnode.rs rename to src/common/src/hash/consistent_hash/vnode.rs index c54e66000effb..89528a784cfd0 100644 --- a/src/common/src/hash/vnode.rs +++ b/src/common/src/hash/consistent_hash/vnode.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,7 +19,6 @@ use crate::hash::HashCode; /// Parallel unit is the minimal scheduling unit. // TODO: make it a newtype pub type ParallelUnitId = u32; -pub type VnodeMapping = Vec; /// `VirtualNode` (a.k.a. VNode) is a minimal partition that a set of keys belong to. It is used for /// consistent hashing. @@ -49,7 +48,7 @@ impl VirtualNode { } /// An iterator over all virtual nodes. -pub type AllVirtualNodeIter = impl Iterator; +pub type AllVirtualNodeIter = std::iter::Map, fn(usize) -> VirtualNode>; impl VirtualNode { /// The maximum value of the virtual node. diff --git a/src/common/src/hash/dispatcher.rs b/src/common/src/hash/dispatcher.rs index 9fd61a7ca96f6..f4eaba71da1ca 100644 --- a/src/common/src/hash/dispatcher.rs +++ b/src/common/src/hash/dispatcher.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -101,6 +101,7 @@ fn hash_key_size(data_type: &DataType) -> HashKeySize { DataType::Varchar => HashKeySize::Variable, DataType::Bytea => HashKeySize::Variable, + DataType::Jsonb => HashKeySize::Variable, DataType::Struct { .. } => HashKeySize::Variable, DataType::List { .. } => HashKeySize::Variable, } diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index 98bc93b217e37..48901111f3288 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -29,20 +29,20 @@ use std::io::{Cursor, Read}; use chrono::{Datelike, Timelike}; use fixedbitset::FixedBitSet; -use itertools::Itertools; use crate::array::{ - Array, ArrayBuilder, ArrayBuilderImpl, ArrayError, ArrayImpl, ArrayResult, DataChunk, ListRef, - StructRef, + Array, ArrayBuilder, ArrayBuilderImpl, ArrayError, ArrayImpl, ArrayResult, DataChunk, JsonbRef, + ListRef, StructRef, }; use crate::collection::estimate_size::EstimateSize; -use crate::hash::vnode::VirtualNode; +use crate::hash::VirtualNode; use crate::row::{OwnedRow, RowDeserializer}; use crate::types::{ DataType, Decimal, IntervalUnit, NaiveDateTimeWrapper, NaiveDateWrapper, NaiveTimeWrapper, OrderedF32, OrderedF64, ScalarRef, }; use crate::util::hash_util::Crc32FastBuilder; +use crate::util::iter_util::ZipEqFast; use crate::util::value_encoding::{deserialize_datum, serialize_datum_into}; /// A wrapper for u64 hash result. @@ -463,6 +463,20 @@ impl HashKeySerDe<'_> for NaiveTimeWrapper { } } +impl<'a> HashKeySerDe<'a> for JsonbRef<'a> { + type S = Vec; + + /// This should never be called + fn serialize(self) -> Self::S { + todo!() + } + + /// This should never be called + fn deserialize(_source: &mut R) -> Self { + todo!() + } +} + impl<'a> HashKeySerDe<'a> for StructRef<'a> { type S = Vec; @@ -618,7 +632,7 @@ where A::RefItem<'a>: HashKeySerDe<'a>, S: HashKeySerializer, { - for (item, serializer) in array.iter().zip_eq(serializers.iter_mut()) { + for (item, serializer) in array.iter().zip_eq_fast(serializers.iter_mut()) { serializer.append(item); } } @@ -719,7 +733,7 @@ impl HashKey for SerializedKey { for (datum_result, array_builder) in data_types .iter() .map(|ty| deserialize_datum(&mut key_buffer, ty)) - .zip_eq(array_builders.iter_mut()) + .zip_eq_fast(array_builders.iter_mut()) { array_builder.append_datum(&datum_result.map_err(ArrayError::internal)?); } @@ -736,6 +750,8 @@ mod tests { use std::collections::HashMap; use std::str::FromStr; + use itertools::Itertools; + use super::*; use crate::array; use crate::array::column::Column; diff --git a/src/common/src/hash/mod.rs b/src/common/src/hash/mod.rs index 3c73cca4b8e19..91f3e917b3483 100644 --- a/src/common/src/hash/mod.rs +++ b/src/common/src/hash/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,9 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -mod key; -pub use key::*; +mod consistent_hash; // TODO: move this to a separate module mod dispatcher; +mod key; + +pub use consistent_hash::mapping::*; +pub use consistent_hash::vnode::*; pub use dispatcher::HashKeyDispatcher; -mod vnode; -pub use vnode::*; +pub use key::*; diff --git a/src/common/src/jemalloc.rs b/src/common/src/jemalloc.rs index 22be75c3d82cc..3c13db1866ca3 100644 --- a/src/common/src/jemalloc.rs +++ b/src/common/src/jemalloc.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/lib.rs b/src/common/src/lib.rs index 3b6f98e919d15..cce75fade92c8 100644 --- a/src/common/src/lib.rs +++ b/src/common/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,11 +13,9 @@ // limitations under the License. #![allow(rustdoc::private_intra_doc_links)] -#![allow(clippy::derive_partial_eq_without_eq)] #![feature(trait_alias)] #![feature(binary_heap_drain_sorted)] #![feature(is_sorted)] -#![feature(fn_traits)] #![feature(type_alias_impl_trait)] #![feature(test)] #![feature(trusted_len)] @@ -32,7 +30,9 @@ #![feature(return_position_impl_trait_in_trait)] #![feature(portable_simd)] #![feature(array_chunks)] +#![feature(inline_const_pat)] #![allow(incomplete_features)] +#![feature(const_option_ext)] #[macro_use] pub mod jemalloc; @@ -53,6 +53,7 @@ pub mod hash; pub mod monitor; pub mod row; pub mod session_config; +pub mod system_param; #[cfg(test)] pub mod test_utils; pub mod types; @@ -63,3 +64,5 @@ pub mod test_prelude { } pub const RW_VERSION: &str = env!("CARGO_PKG_VERSION"); + +pub const GIT_SHA: &str = option_env!("GIT_SHA").unwrap_or("unknown"); diff --git a/src/common/src/monitor/mod.rs b/src/common/src/monitor/mod.rs index bd5df43331c2e..1db7bb5d91247 100644 --- a/src/common/src/monitor/mod.rs +++ b/src/common/src/monitor/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/monitor/my_stats.rs b/src/common/src/monitor/my_stats.rs index a13768440da04..08add49223956 100644 --- a/src/common/src/monitor/my_stats.rs +++ b/src/common/src/monitor/my_stats.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,6 +17,8 @@ use std::fmt::{Display, Formatter}; use itertools::Itertools; use prometheus::proto::Histogram; +use crate::util::iter_util::ZipEqFast; + #[derive(Clone, Default, Debug)] pub struct MyHistogram { pub upper_bound_list: Vec, @@ -57,7 +59,7 @@ impl MyHistogram { false => prev .count_list .iter() - .zip_eq(cur.count_list.iter()) + .zip_eq_fast(cur.count_list.iter()) .map(|(&pb, &cb)| cb - pb) .collect_vec(), }, @@ -76,7 +78,11 @@ impl MyHistogram { let threshold = (sample_count as f64 * (p / 100.0_f64)).ceil() as u64; let mut last_upper_bound = 0.0; let mut last_count = 0; - for (&upper_bound, &count) in self.upper_bound_list.iter().zip_eq(self.count_list.iter()) { + for (&upper_bound, &count) in self + .upper_bound_list + .iter() + .zip_eq_fast(self.count_list.iter()) + { if count >= threshold { // assume scale linearly within this bucket, // return a value between last_upper_bound and upper_bound diff --git a/src/common/src/monitor/process_linux.rs b/src/common/src/monitor/process_linux.rs index 9505c38597a04..b7aa65e375c46 100644 --- a/src/common/src/monitor/process_linux.rs +++ b/src/common/src/monitor/process_linux.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/monitor/rwlock.rs b/src/common/src/monitor/rwlock.rs index dc6f1cfc6b438..983aeb8d5affa 100644 --- a/src/common/src/monitor/rwlock.rs +++ b/src/common/src/monitor/rwlock.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/row/chain.rs b/src/common/src/row/chain.rs index a76595f68186f..d154f490a19c9 100644 --- a/src/common/src/row/chain.rs +++ b/src/common/src/row/chain.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/row/compacted_row.rs b/src/common/src/row/compacted_row.rs index cf0cc280bc5b4..2408e4058d774 100644 --- a/src/common/src/row/compacted_row.rs +++ b/src/common/src/row/compacted_row.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/row/empty.rs b/src/common/src/row/empty.rs index f07f83e854d22..519ee8d765988 100644 --- a/src/common/src/row/empty.rs +++ b/src/common/src/row/empty.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/row/mod.rs b/src/common/src/row/mod.rs index 0de114b74091f..58419ff917ea9 100644 --- a/src/common/src/row/mod.rs +++ b/src/common/src/row/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/row/once.rs b/src/common/src/row/once.rs index 33b09b4b43009..c6ff7643279a9 100644 --- a/src/common/src/row/once.rs +++ b/src/common/src/row/once.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/row/owned_row.rs b/src/common/src/row/owned_row.rs index 9f1fd75336ced..7d0029af70821 100644 --- a/src/common/src/row/owned_row.rs +++ b/src/common/src/row/owned_row.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,20 +14,19 @@ use std::ops::{self, Deref}; -use itertools::Itertools; - use super::Row; use crate::collection::estimate_size::EstimateSize; use crate::types::{ DataType, Datum, DatumRef, Decimal, IntervalUnit, NaiveDateTimeWrapper, NaiveDateWrapper, NaiveTimeWrapper, ScalarImpl, ToDatumRef, }; +use crate::util::iter_util::ZipEqDebug; use crate::util::value_encoding; use crate::util::value_encoding::deserialize_datum; /// An owned row type with a `Vec`. #[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] -pub struct OwnedRow(Vec); // made private to avoid abuse +pub struct OwnedRow(Vec); /// Do not implement `IndexMut` to make it immutable. impl ops::Index for OwnedRow { @@ -69,7 +68,7 @@ impl OwnedRow { pub fn from_pretty_with_tys(tys: &[DataType], s: impl AsRef) -> Self { let datums: Vec<_> = tys .iter() - .zip_eq(s.as_ref().split_ascii_whitespace()) + .zip_eq_debug(s.as_ref().split_ascii_whitespace()) .map(|(ty, x)| { let scalar: ScalarImpl = match ty { DataType::Int16 => x.parse::().unwrap().into(), diff --git a/src/common/src/row/project.rs b/src/common/src/row/project.rs index 53c0ba0562aa8..84280a2925aea 100644 --- a/src/common/src/row/project.rs +++ b/src/common/src/row/project.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/row/repeat_n.rs b/src/common/src/row/repeat_n.rs index 384afbe6affdd..b349c7961c185 100644 --- a/src/common/src/row/repeat_n.rs +++ b/src/common/src/row/repeat_n.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/session_config/mod.rs b/src/common/src/session_config/mod.rs index 769f4c4be43f8..fa763cd26f61a 100644 --- a/src/common/src/session_config/mod.rs +++ b/src/common/src/session_config/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ use crate::util::epoch::Epoch; // This is a hack, &'static str is not allowed as a const generics argument. // TODO: refine this using the adt_const_params feature. -const CONFIG_KEYS: [&str; 15] = [ +const CONFIG_KEYS: [&str; 19] = [ "RW_IMPLICIT_FLUSH", "CREATE_COMPACTION_GROUP_FOR_MV", "QUERY_MODE", @@ -48,6 +48,10 @@ const CONFIG_KEYS: [&str; 15] = [ "VISIBILITY_MODE", "TIMEZONE", "STREAMING_PARALLELISM", + "RW_STREAMING_ENABLE_DELTA_JOIN", + "RW_ENABLE_TWO_PHASE_AGG", + "RW_FORCE_TWO_PHASE_AGG", + "RW_ENABLE_SHARE_PLAN", ]; // MUST HAVE 1v1 relationship to CONFIG_KEYS. e.g. CONFIG_KEYS[IMPLICIT_FLUSH] = @@ -67,6 +71,10 @@ const BATCH_ENABLE_SORT_AGG: usize = 11; const VISIBILITY_MODE: usize = 12; const TIMEZONE: usize = 13; const STREAMING_PARALLELISM: usize = 14; +const STREAMING_ENABLE_DELTA_JOIN: usize = 15; +const ENABLE_TWO_PHASE_AGG: usize = 16; +const FORCE_TWO_PHASE_AGG: usize = 17; +const RW_ENABLE_SHARE_PLAN: usize = 18; trait ConfigEntry: Default + for<'a> TryFrom<&'a [&'a str], Error = RwError> { fn entry_name() -> &'static str; @@ -262,6 +270,10 @@ type MaxSplitRangeGap = ConfigI32; type QueryEpoch = ConfigU64; type Timezone = ConfigString; type StreamingParallelism = ConfigU64; +type StreamingEnableDeltaJoin = ConfigBool; +type EnableTwoPhaseAgg = ConfigBool; +type ForceTwoPhaseAgg = ConfigBool; +type EnableSharePlan = ConfigBool; #[derive(Derivative)] #[derivative(Default)] @@ -317,6 +329,23 @@ pub struct ConfigMap { /// If `STREAMING_PARALLELISM` is non-zero, CREATE MATERIALIZED VIEW/TABLE/INDEX will use it as /// streaming parallelism. streaming_parallelism: StreamingParallelism, + + /// Enable delta join in streaming query. Defaults to false. + streaming_enable_delta_join: StreamingEnableDeltaJoin, + + /// Enable two phase agg optimization. Defaults to true. + /// Setting this to true will always set `FORCE_TWO_PHASE_AGG` to false. + enable_two_phase_agg: EnableTwoPhaseAgg, + + /// Force two phase agg optimization whenever there's a choice between + /// optimizations. Defaults to false. + /// Setting this to true will always set `ENABLE_TWO_PHASE_AGG` to false. + force_two_phase_agg: ForceTwoPhaseAgg, + + /// Enable sharing of common sub-plans. + /// This means that DAG structured query plans can be constructed, + /// rather than only tree structured query plans. + enable_share_plan: EnableSharePlan, } impl ConfigMap { @@ -356,6 +385,20 @@ impl ConfigMap { self.timezone = raw; } else if key.eq_ignore_ascii_case(StreamingParallelism::entry_name()) { self.streaming_parallelism = val.as_slice().try_into()?; + } else if key.eq_ignore_ascii_case(StreamingEnableDeltaJoin::entry_name()) { + self.streaming_enable_delta_join = val.as_slice().try_into()?; + } else if key.eq_ignore_ascii_case(EnableTwoPhaseAgg::entry_name()) { + self.enable_two_phase_agg = val.as_slice().try_into()?; + if !*self.enable_two_phase_agg { + self.force_two_phase_agg = ConfigBool(false); + } + } else if key.eq_ignore_ascii_case(ForceTwoPhaseAgg::entry_name()) { + self.force_two_phase_agg = val.as_slice().try_into()?; + if *self.force_two_phase_agg { + self.enable_two_phase_agg = ConfigBool(true); + } + } else if key.eq_ignore_ascii_case(EnableSharePlan::entry_name()) { + self.enable_share_plan = val.as_slice().try_into()?; } else { return Err(ErrorCode::UnrecognizedConfigurationParameter(key.to_string()).into()); } @@ -394,6 +437,14 @@ impl ConfigMap { Ok(self.timezone.clone()) } else if key.eq_ignore_ascii_case(StreamingParallelism::entry_name()) { Ok(self.streaming_parallelism.to_string()) + } else if key.eq_ignore_ascii_case(StreamingEnableDeltaJoin::entry_name()) { + Ok(self.streaming_enable_delta_join.to_string()) + } else if key.eq_ignore_ascii_case(EnableTwoPhaseAgg::entry_name()) { + Ok(self.enable_two_phase_agg.to_string()) + } else if key.eq_ignore_ascii_case(ForceTwoPhaseAgg::entry_name()) { + Ok(self.force_two_phase_agg.to_string()) + } else if key.eq_ignore_ascii_case(EnableSharePlan::entry_name()) { + Ok(self.enable_share_plan.to_string()) } else { Err(ErrorCode::UnrecognizedConfigurationParameter(key.to_string()).into()) } @@ -470,7 +521,27 @@ impl ConfigMap { name : StreamingParallelism::entry_name().to_lowercase(), setting : self.streaming_parallelism.to_string(), description: String::from("Sets the parallelism for streaming. If 0, use default value.") - } + }, + VariableInfo{ + name : StreamingEnableDeltaJoin::entry_name().to_lowercase(), + setting : self.streaming_enable_delta_join.to_string(), + description: String::from("Enable delta join in streaming query.") + }, + VariableInfo{ + name : EnableTwoPhaseAgg::entry_name().to_lowercase(), + setting : self.enable_two_phase_agg.to_string(), + description: String::from("Enable two phase aggregation.") + }, + VariableInfo{ + name : EnableTwoPhaseAgg::entry_name().to_lowercase(), + setting : self.enable_two_phase_agg.to_string(), + description: String::from("Force two phase aggregation.") + }, + VariableInfo{ + name : EnableSharePlan::entry_name().to_lowercase(), + setting : self.enable_share_plan.to_string(), + description: String::from("Enable sharing of common sub-plans. This means that DAG structured query plans can be constructed, rather than only tree structured query plans.") + }, ] } @@ -539,4 +610,20 @@ impl ConfigMap { } None } + + pub fn get_streaming_enable_delta_join(&self) -> bool { + *self.streaming_enable_delta_join + } + + pub fn get_enable_two_phase_agg(&self) -> bool { + *self.enable_two_phase_agg + } + + pub fn get_force_two_phase_agg(&self) -> bool { + *self.force_two_phase_agg + } + + pub fn get_enable_share_plan(&self) -> bool { + *self.enable_share_plan + } } diff --git a/src/common/src/session_config/query_mode.rs b/src/common/src/session_config/query_mode.rs index ac2bc93ee2410..37a8dfbeb363c 100644 --- a/src/common/src/session_config/query_mode.rs +++ b/src/common/src/session_config/query_mode.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/session_config/search_path.rs b/src/common/src/session_config/search_path.rs index dd3d1128405ea..a7bc457039931 100644 --- a/src/common/src/session_config/search_path.rs +++ b/src/common/src/session_config/search_path.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/session_config/transaction_isolation_level.rs b/src/common/src/session_config/transaction_isolation_level.rs index 77026619b99d4..7f50f74866c91 100644 --- a/src/common/src/session_config/transaction_isolation_level.rs +++ b/src/common/src/session_config/transaction_isolation_level.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/session_config/visibility_mode.rs b/src/common/src/session_config/visibility_mode.rs index a9e37ae2c8821..88e63814acbe4 100644 --- a/src/common/src/session_config/visibility_mode.rs +++ b/src/common/src/session_config/visibility_mode.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,20 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Copyright 2022 Singularity Data -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - //! Contains configurations that could be accessed via "set" command. use std::fmt::Formatter; diff --git a/src/common/src/system_param.rs b/src/common/src/system_param.rs new file mode 100644 index 0000000000000..d2d0e57f12bc1 --- /dev/null +++ b/src/common/src/system_param.rs @@ -0,0 +1,268 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashSet; +use std::fmt::Debug; +use std::ops::RangeBounds; + +use paste::paste; +use risingwave_pb::meta::SystemParams; + +pub type SystemParamsError = String; + +type Result = core::result::Result; + +// Only includes undeprecated params. +// Macro input is { field identifier, default value } +macro_rules! for_all_undeprecated_params { + ($macro:ident) => { + $macro! { + { barrier_interval_ms, u32, 1000_u32 }, + { checkpoint_frequency, u64, 10_u64 }, + { sstable_size_mb, u32, 256_u32 }, + { block_size_kb, u32, 64_u32 }, + { bloom_false_positive, f64, 0.001_f64 }, + { state_store, String, "hummock+memory".to_string() }, + { data_directory, String, "hummock_001".to_string() }, + { backup_storage_url, String, "memory".to_string() }, + { backup_storage_directory, String, "backup".to_string() }, + } + }; +} + +// Only includes deprecated params. Used to define key constants. +// Macro input is { field identifier, default value } +macro_rules! for_all_deprecated_params { + ($macro:ident) => { + $macro! {} + }; +} + +/// Convert field name to string. +macro_rules! key_of { + ($field:ident) => { + stringify!($field) + }; +} + +// Define key constants for fields in `SystemParams` for use of other modules. +macro_rules! def_key { + ($({ $field:ident, $type:ty, $default:expr },)*) => { + paste! { + $( + pub const [<$field:upper _KEY>]: &str = key_of!($field); + )* + } + }; +} + +for_all_undeprecated_params!(def_key); +for_all_deprecated_params!(def_key); + +// Derive serialization to kv pairs. +macro_rules! impl_system_params_to_kv { + ($({ $field:ident, $type:ty, $default:expr },)*) => { + /// All undeprecated fields are guaranteed to be contained in the returned map. + /// Return error if there are missing fields. + pub fn system_params_to_kv(params: &SystemParams) -> Result> { + let mut ret = Vec::with_capacity(9); + $(ret.push(( + key_of!($field).to_string(), + params + .$field.as_ref() + .ok_or_else(||format!( + "missing system param {:?}", + key_of!($field) + ))? + .to_string(), + ));)* + Ok(ret) + } + }; +} + +// Derive deserialization from kv pairs. +macro_rules! impl_system_params_from_kv { + ($({ $field:ident, $type:ty, $default:expr },)*) => { + /// For each field in `SystemParams`, one of these rules apply: + /// - Up-to-date: Guaranteed to be `Some`. If it is not present, may try to derive it from previous + /// versions of this field. + /// - Deprecated: Guaranteed to be `None`. + /// - Unrecognized: Not allowed. + pub fn system_params_from_kv(kvs: Vec<(impl AsRef<[u8]>, impl AsRef<[u8]>)>) -> Result { + let mut ret = SystemParams::default(); + let mut expected_keys: HashSet<_> = [ + $(key_of!($field),)* + ] + .iter() + .cloned() + .collect(); + for (k, v) in kvs { + let k = std::str::from_utf8(k.as_ref()).unwrap(); + let v = std::str::from_utf8(v.as_ref()).unwrap(); + match k { + $( + key_of!($field) => ret.$field = Some(v.parse().unwrap()), + )* + _ => { + return Err(format!( + "unrecognized system param {:?}", + k + )); + } + } + expected_keys.remove(k); + } + if !expected_keys.is_empty() { + return Err(format!( + "missing system param {:?}", + expected_keys + )); + } + Ok(ret) + } + }; +} + +// Define check rules when a field is changed. By default all fields are immutable. +// If you want custom rules, please override the default implementation in +// `OverrideValidateOnSet` below. +macro_rules! impl_default_validation_on_set { + ($({ $field:ident, $type:ty, $default:expr },)*) => { + #[allow(clippy::ptr_arg)] + trait ValidateOnSet { + $( + fn $field(_v: &$type) -> Result<()> { + Self::expect_immutable(key_of!($field)) + } + )* + + fn expect_immutable(field: &str) -> Result<()> { + Err(format!("{:?} is immutable", field)) + } + + fn expect_range(v: T, range: R) -> Result<()> + where + T: Debug + PartialOrd, + R: RangeBounds + Debug, + { + if !range.contains::(&v) { + Err(format!("value {:?} out of range, expect {:?}", v, range)) + } else { + Ok(()) + } + } + } + } +} + +macro_rules! impl_set_system_param { + ($({ $field:ident, $type:ty, $default:expr },)*) => { + pub fn set_system_param(params: &mut SystemParams, key: &str, value: Option) -> Result<()> { + match key { + $( + key_of!($field) => { + let v = if let Some(v) = value { + v.parse().map_err(|_| format!("cannot parse parameter value"))? + } else { + $default + }; + OverrideValidateOnSet::$field(&v)?; + params.$field = Some(v); + }, + )* + _ => { + return Err(format!( + "unrecognized system param {:?}", + key + )); + } + }; + Ok(()) + } + }; +} + +for_all_undeprecated_params!(impl_system_params_from_kv); + +for_all_undeprecated_params!(impl_system_params_to_kv); + +for_all_undeprecated_params!(impl_set_system_param); + +for_all_undeprecated_params!(impl_default_validation_on_set); + +struct OverrideValidateOnSet; +impl ValidateOnSet for OverrideValidateOnSet { + fn barrier_interval_ms(v: &u32) -> Result<()> { + Self::expect_range(*v, 1..) + } + + fn checkpoint_frequency(v: &u64) -> Result<()> { + Self::expect_range(*v, 1..) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_to_from_kv() { + // Include all fields (deprecated also). + let kvs = vec![ + (BARRIER_INTERVAL_MS_KEY, "1"), + (CHECKPOINT_FREQUENCY_KEY, "1"), + (SSTABLE_SIZE_MB_KEY, "1"), + (BLOCK_SIZE_KB_KEY, "1"), + (BLOOM_FALSE_POSITIVE_KEY, "1"), + (STATE_STORE_KEY, "a"), + (DATA_DIRECTORY_KEY, "a"), + (BACKUP_STORAGE_URL_KEY, "a"), + (BACKUP_STORAGE_DIRECTORY_KEY, "a"), + ]; + + // To kv - missing field. + let p = SystemParams::default(); + assert!(system_params_to_kv(&p).is_err()); + + // From kv - missing field. + assert!(system_params_from_kv(vec![(BARRIER_INTERVAL_MS_KEY, "1")]).is_err()); + + // From kv - unrecognized field. + assert!(system_params_from_kv(vec![("?", "?")]).is_err()); + + // Deser & ser. + let p = system_params_from_kv(kvs).unwrap(); + assert_eq!( + p, + system_params_from_kv(system_params_to_kv(&p).unwrap()).unwrap() + ); + } + + #[test] + fn test_set() { + let mut p = SystemParams::default(); + // Unrecognized param. + assert!(set_system_param(&mut p, "?", Some("?".to_string())).is_err()); + // Value out of range. + assert!(set_system_param(&mut p, BARRIER_INTERVAL_MS_KEY, Some("-1".to_string())).is_err()); + // Set immutable. + assert!(set_system_param(&mut p, STATE_STORE_KEY, Some("?".to_string())).is_err()); + // Parse error. + assert!(set_system_param(&mut p, BARRIER_INTERVAL_MS_KEY, Some("?".to_string())).is_err()); + // Normal set. + assert!(set_system_param(&mut p, BARRIER_INTERVAL_MS_KEY, Some("500".to_string())).is_ok()); + assert_eq!(p.barrier_interval_ms, Some(500)); + } +} diff --git a/src/common/src/test_utils/mod.rs b/src/common/src/test_utils/mod.rs index b36ea66c84b02..bd2c56a0f7917 100644 --- a/src/common/src/test_utils/mod.rs +++ b/src/common/src/test_utils/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/test_utils/rand_array.rs b/src/common/src/test_utils/rand_array.rs index aabd79cf14430..01365d85c325d 100644 --- a/src/common/src/test_utils/rand_array.rs +++ b/src/common/src/test_utils/rand_array.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ use rand::prelude::Distribution; use rand::rngs::SmallRng; use rand::{Rng, SeedableRng}; -use crate::array::{Array, ArrayBuilder, ArrayRef, ListValue, StructValue}; +use crate::array::{Array, ArrayBuilder, ArrayRef, JsonbVal, ListValue, StructValue}; use crate::types::{ Decimal, IntervalUnit, NaiveDateTimeWrapper, NaiveDateWrapper, NaiveTimeWrapper, NativeType, Scalar, @@ -117,6 +117,12 @@ impl RandValue for bool { } } +impl RandValue for JsonbVal { + fn rand_value(_rand: &mut R) -> Self { + JsonbVal::dummy() + } +} + impl RandValue for StructValue { fn rand_value(_rand: &mut R) -> Self { StructValue::new(vec![]) diff --git a/src/common/src/test_utils/test_stream_chunk.rs b/src/common/src/test_utils/test_stream_chunk.rs index 0eb531f650dac..68cd27478f1c2 100644 --- a/src/common/src/test_utils/test_stream_chunk.rs +++ b/src/common/src/test_utils/test_stream_chunk.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/types/chrono_wrapper.rs b/src/common/src/types/chrono_wrapper.rs index 0b9881789ba2b..1a3c4ba397553 100644 --- a/src/common/src/types/chrono_wrapper.rs +++ b/src/common/src/types/chrono_wrapper.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/types/decimal.rs b/src/common/src/types/decimal.rs index adb24c357b56c..7cf41a2b2c16a 100644 --- a/src/common/src/types/decimal.rs +++ b/src/common/src/types/decimal.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -636,9 +636,9 @@ impl Zero for Decimal { #[cfg(test)] mod tests { - use itertools::Itertools; use super::*; + use crate::util::iter_util::ZipEqFast; fn check(lhs: f32, rhs: f32) -> bool { if lhs.is_nan() && rhs.is_nan() { @@ -674,8 +674,8 @@ mod tests { -1.0f32, 0.0f32, ]; - for (d_lhs, f_lhs) in decimals.iter().zip_eq(floats.iter()) { - for (d_rhs, f_rhs) in decimals.iter().zip_eq(floats.iter()) { + for (d_lhs, f_lhs) in decimals.iter().zip_eq_fast(floats.iter()) { + for (d_rhs, f_rhs) in decimals.iter().zip_eq_fast(floats.iter()) { assert!(check((*d_lhs + *d_rhs).to_f32().unwrap(), f_lhs + f_rhs)); assert!(check((*d_lhs - *d_rhs).to_f32().unwrap(), f_lhs - f_rhs)); assert!(check((*d_lhs * *d_rhs).to_f32().unwrap(), f_lhs * f_rhs)); diff --git a/src/common/src/types/interval.rs b/src/common/src/types/interval.rs index c323011bcdd79..2f4c73f9a5417 100644 --- a/src/common/src/types/interval.rs +++ b/src/common/src/types/interval.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ use std::ops::{Add, Neg, Sub}; use anyhow::anyhow; use byteorder::{BigEndian, NetworkEndian, ReadBytesExt, WriteBytesExt}; use bytes::BytesMut; -use num_traits::{CheckedAdd, CheckedSub, Zero}; +use num_traits::{CheckedAdd, CheckedNeg, CheckedSub, Zero}; use postgres_types::{to_sql_checked, FromSql}; use risingwave_pb::data::IntervalUnit as IntervalUnitProto; @@ -128,15 +128,6 @@ impl IntervalUnit { interval } - #[must_use] - pub fn negative(&self) -> Self { - IntervalUnit { - months: -self.months, - days: -self.days, - ms: -self.ms, - } - } - #[must_use] pub fn from_total_ms(ms: i64) -> Self { let mut remaining_ms = ms; @@ -585,6 +576,15 @@ impl Ord for IntervalUnit { } } +impl CheckedNeg for IntervalUnit { + fn checked_neg(&self) -> Option { + let months = self.months.checked_neg()?; + let days = self.days.checked_neg()?; + let ms = self.ms.checked_neg()?; + Some(IntervalUnit { months, days, ms }) + } +} + impl CheckedAdd for IntervalUnit { fn checked_add(&self, other: &Self) -> Option { let months = self.months.checked_add(other.months)?; @@ -686,11 +686,17 @@ impl Display for IntervalUnit { write(format_args!("{days} days"))?; } if self.ms != 0 || self.months == 0 && self.days == 0 { - let hours = self.ms / 1000 / 3600; - let minutes = (self.ms / 1000 / 60) % 60; - let seconds = self.ms % 60000 / 1000; - let secs_fract = self.ms % 1000; - write(format_args!("{hours:0>2}:{minutes:0>2}:{seconds:0>2}"))?; + let ms = self.ms.abs(); + let hours = ms / 1000 / 3600; + let minutes = (ms / 1000 / 60) % 60; + let seconds = ms % 60000 / 1000; + let secs_fract = ms % 1000; + + if self.ms < 0 { + write(format_args!("-{hours:0>2}:{minutes:0>2}:{seconds:0>2}"))?; + } else { + write(format_args!("{hours:0>2}:{minutes:0>2}:{seconds:0>2}"))?; + } if secs_fract != 0 { let mut buf = [0u8; 4]; write!(buf.as_mut_slice(), ".{:03}", secs_fract).unwrap(); @@ -1122,6 +1128,15 @@ mod tests { "-1 years -2 mons 3 days" ); assert_eq!(IntervalUnit::default().to_string(), "00:00:00"); + assert_eq!( + IntervalUnit::new( + -14, + 3, + -(11 * 3600 * 1000 + 45 * 60 * 1000 + 14 * 1000 + 233) + ) + .to_string(), + "-1 years -2 mons 3 days -11:45:14.233" + ); } #[test] diff --git a/src/common/src/types/mod.rs b/src/common/src/types/mod.rs index 3da640c988898..6045aa9f2961d 100644 --- a/src/common/src/types/mod.rs +++ b/src/common/src/types/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ use crate::error::BoxedError; mod native_type; mod ops; mod scalar_impl; +mod successor; use std::fmt::Debug; use std::io::Cursor; @@ -37,6 +38,7 @@ pub use native_type::*; use risingwave_pb::data::data_type::IntervalType::*; use risingwave_pb::data::data_type::{IntervalType, TypeName}; pub use scalar_impl::*; +pub use successor::*; pub mod chrono_wrapper; pub mod decimal; pub mod interval; @@ -64,8 +66,8 @@ use self::struct_type::StructType; use self::to_binary::ToBinary; use self::to_text::ToText; use crate::array::{ - read_interval_unit, ArrayBuilderImpl, ListRef, ListValue, PrimitiveArrayItemType, StructRef, - StructValue, + read_interval_unit, ArrayBuilderImpl, JsonbRef, JsonbVal, ListRef, ListValue, + PrimitiveArrayItemType, StructRef, StructValue, }; use crate::error::Result as RwResult; @@ -126,6 +128,9 @@ pub enum DataType { #[display("bytea")] #[from_str(regex = "(?i)^bytea$")] Bytea, + #[display("jsonb")] + #[from_str(regex = "(?i)^jsonb$")] + Jsonb, } impl std::str::FromStr for Box { @@ -152,6 +157,7 @@ impl DataTypeName { | DataTypeName::Timestamptz | DataTypeName::Time | DataTypeName::Bytea + | DataTypeName::Jsonb | DataTypeName::Interval => true, DataTypeName::Struct | DataTypeName::List => false, @@ -174,6 +180,7 @@ impl DataTypeName { DataTypeName::Timestamptz => DataType::Timestamptz, DataTypeName::Time => DataType::Time, DataTypeName::Interval => DataType::Interval, + DataTypeName::Jsonb => DataType::Jsonb, DataTypeName::Struct | DataTypeName::List => { return None; } @@ -212,6 +219,7 @@ impl From<&ProstDataType> for DataType { TypeName::Decimal => DataType::Decimal, TypeName::Interval => DataType::Interval, TypeName::Bytea => DataType::Bytea, + TypeName::Jsonb => DataType::Jsonb, TypeName::Struct => { let fields: Vec = proto.field_type.iter().map(|f| f.into()).collect_vec(); let field_names: Vec = proto.field_names.iter().cloned().collect_vec(); @@ -257,6 +265,7 @@ impl DataType { DataType::Timestamp => NaiveDateTimeArrayBuilder::new(capacity).into(), DataType::Timestamptz => PrimitiveArrayBuilder::::new(capacity).into(), DataType::Interval => IntervalArrayBuilder::new(capacity).into(), + DataType::Jsonb => JsonbArrayBuilder::new(capacity).into(), DataType::Struct(t) => { StructArrayBuilder::with_meta(capacity, t.to_array_meta()).into() } @@ -286,6 +295,7 @@ impl DataType { DataType::Timestamptz => TypeName::Timestamptz, DataType::Decimal => TypeName::Decimal, DataType::Interval => TypeName::Interval, + DataType::Jsonb => TypeName::Jsonb, DataType::Struct { .. } => TypeName::Struct, DataType::List { .. } => TypeName::List, DataType::Bytea => TypeName::Bytea, @@ -340,18 +350,6 @@ impl DataType { } } - /// Checks if memcomparable encoding of datatype is equivalent to its value encoding. - pub fn mem_cmp_eq_value_enc(&self) -> bool { - use DataType::*; - match self { - Boolean | Int16 | Int32 | Int64 => true, - Float32 | Float64 | Decimal | Date | Varchar | Time | Timestamp | Timestamptz - | Interval | Bytea => false, - Struct(t) => t.fields.iter().all(|dt| dt.mem_cmp_eq_value_enc()), - List { datatype } => datatype.mem_cmp_eq_value_enc(), - } - } - pub fn new_struct(fields: Vec, field_names: Vec) -> Self { Self::Struct( StructType { @@ -383,6 +381,7 @@ impl DataType { DataType::Timestamptz => ScalarImpl::Int64(i64::MIN), DataType::Decimal => ScalarImpl::Decimal(Decimal::NegativeInf), DataType::Interval => ScalarImpl::Interval(IntervalUnit::MIN), + DataType::Jsonb => ScalarImpl::Jsonb(JsonbVal::dummy()), // NOT `min` #7981 DataType::Struct(data_types) => ScalarImpl::Struct(StructValue::new( data_types .fields @@ -479,6 +478,7 @@ macro_rules! for_all_scalar_variants { { NaiveDate, naivedate, NaiveDateWrapper, NaiveDateWrapper }, { NaiveDateTime, naivedatetime, NaiveDateTimeWrapper, NaiveDateTimeWrapper }, { NaiveTime, naivetime, NaiveTimeWrapper, NaiveTimeWrapper }, + { Jsonb, jsonb, JsonbVal, JsonbRef<'scalar> }, { Struct, struct, StructValue, StructRef<'scalar> }, { List, list, ListValue, ListRef<'scalar> }, { Bytea, bytea, Box<[u8]>, &'scalar [u8] } @@ -802,7 +802,6 @@ macro_rules! scalar_impl_hash { } } - #[expect(clippy::derive_hash_xor_eq)] impl Hash for ScalarImpl { fn hash(&self, state: &mut H) { match self { @@ -865,6 +864,7 @@ impl ScalarRefImpl<'_> { v.0.num_seconds_from_midnight().serialize(&mut *ser)?; v.0.nanosecond().serialize(ser)?; } + Self::Jsonb(v) => v.memcmp_serialize(ser)?, Self::Struct(v) => v.memcmp_serialize(ser)?, Self::List(v) => v.memcmp_serialize(ser)?, }; @@ -916,6 +916,7 @@ impl ScalarImpl { NaiveDateWrapper::with_days(days) .map_err(|e| memcomparable::Error::Message(format!("{e}")))? }), + Ty::Jsonb => Self::Jsonb(JsonbVal::memcmp_deserialize(de)?), Ty::Struct(t) => StructValue::memcmp_deserialize(&t.fields, de)?.to_scalar_value(), Ty::List { datatype } => ListValue::memcmp_deserialize(datatype, de)?.to_scalar_value(), }) @@ -961,6 +962,7 @@ impl ScalarImpl { .iter() .map(|field| Self::encoding_data_size(field, deserializer)) .try_fold(0, |a, b| b.map(|b| a + b))?, + DataType::Jsonb => deserializer.skip_bytes()?, DataType::Varchar => deserializer.skip_bytes()?, DataType::Bytea => deserializer.skip_bytes()?, }; @@ -997,6 +999,7 @@ pub fn literal_type_match(data_type: &DataType, literal: Option<&ScalarImpl>) -> | (DataType::Timestamptz, ScalarImpl::Int64(_)) | (DataType::Decimal, ScalarImpl::Decimal(_)) | (DataType::Interval, ScalarImpl::Interval(_)) + | (DataType::Jsonb, ScalarImpl::Jsonb(_)) | (DataType::Struct { .. }, ScalarImpl::Struct(_)) | (DataType::List { .. }, ScalarImpl::List(_)) ) @@ -1198,6 +1201,7 @@ mod tests { ScalarImpl::Interval(IntervalUnit::new(2, 3, 3333)), DataType::Interval, ), + DataTypeName::Jsonb => (ScalarImpl::Jsonb(JsonbVal::dummy()), DataType::Jsonb), DataTypeName::Struct => ( ScalarImpl::Struct(StructValue::new(vec![ ScalarImpl::Int64(233).into(), diff --git a/src/common/src/types/native_type.rs b/src/common/src/types/native_type.rs index 8d09ab16870fe..ed42ead8865e8 100644 --- a/src/common/src/types/native_type.rs +++ b/src/common/src/types/native_type.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/types/ops.rs b/src/common/src/types/ops.rs index 43b3b2c5ad33b..a2ac37d899a56 100644 --- a/src/common/src/types/ops.rs +++ b/src/common/src/types/ops.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/types/ordered_float.rs b/src/common/src/types/ordered_float.rs index 8bbf2df6080a3..20e0412e23ff1 100644 --- a/src/common/src/types/ordered_float.rs +++ b/src/common/src/types/ordered_float.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -54,7 +54,7 @@ use core::str::FromStr; pub use num_traits::Float; use num_traits::{ AsPrimitive, Bounded, CheckedAdd, CheckedDiv, CheckedMul, CheckedNeg, CheckedRem, CheckedSub, - FromPrimitive, Num, NumCast, One, Signed, ToPrimitive, Zero, + FromPrimitive, Num, NumCast, One, Pow, Signed, ToPrimitive, Zero, }; // masks for the parts of the IEEE 754 float @@ -389,6 +389,17 @@ impl_ordered_float_binop! {Mul, mul, MulAssign, mul_assign} impl_ordered_float_binop! {Div, div, DivAssign, div_assign} impl_ordered_float_binop! {Rem, rem, RemAssign, rem_assign} +impl Pow> for OrderedFloat +where + T: Float, +{ + type Output = OrderedFloat; + + fn pow(self, rhs: Self) -> Self::Output { + OrderedFloat(self.0.powf(rhs.0)) + } +} + impl CheckedAdd for OrderedFloat where T: Float, diff --git a/src/common/src/types/postgres_type.rs b/src/common/src/types/postgres_type.rs index bc56c5f2eef15..c520260ca91f8 100644 --- a/src/common/src/types/postgres_type.rs +++ b/src/common/src/types/postgres_type.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -31,6 +31,7 @@ impl DataType { | DataType::Varchar | DataType::Bytea | DataType::Interval + | DataType::Jsonb | DataType::Struct(_) | DataType::List { .. } => -1, } @@ -57,6 +58,7 @@ impl DataType { 1114 => Ok(DataType::Timestamp), 1184 => Ok(DataType::Timestamptz), 1186 => Ok(DataType::Interval), + 3802 => Ok(DataType::Jsonb), 1000 => Ok(DataType::List { datatype: Box::new(DataType::Boolean), }), @@ -99,6 +101,9 @@ impl DataType { 1187 => Ok(DataType::List { datatype: Box::new(DataType::Interval), }), + 3807 => Ok(DataType::List { + datatype: Box::new(DataType::Jsonb), + }), _ => Err(ErrorCode::InternalError(format!("Unsupported oid {}", oid)).into()), } } @@ -121,6 +126,7 @@ impl DataType { // NOTE: Struct type don't have oid in postgres, here we use varchar oid so that struct // will be considered as a varchar. DataType::Struct(_) => 1043, + DataType::Jsonb => 3802, DataType::Bytea => 17, DataType::List { datatype } => match unnested_list_type(datatype.as_ref().clone()) { DataType::Boolean => 1000, @@ -137,6 +143,7 @@ impl DataType { DataType::Timestamp => 1115, DataType::Timestamptz => 1185, DataType::Interval => 1187, + DataType::Jsonb => 3807, DataType::Struct(_) => 1015, DataType::List { .. } => unreachable!("Never reach here!"), }, diff --git a/src/common/src/types/scalar_impl.rs b/src/common/src/types/scalar_impl.rs index 2230be54f41d0..78444daf4e5e0 100644 --- a/src/common/src/types/scalar_impl.rs +++ b/src/common/src/types/scalar_impl.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/types/struct_type.rs b/src/common/src/types/struct_type.rs index b861a77f363c8..45d3664f54b4f 100644 --- a/src/common/src/types/struct_type.rs +++ b/src/common/src/types/struct_type.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ use itertools::Itertools; use super::DataType; use crate::array::ArrayMeta; +use crate::util::iter_util::ZipEqFast; /// Details about a struct type. There are 2 cases for a struct: /// 1. `field_names.len() == fields.len()`: it represents a struct with named fields, e.g. @@ -61,7 +62,7 @@ impl Display for StructType { f, "struct<{}>", (self.fields.iter()) - .zip_eq(self.field_names.iter()) + .zip_eq_fast(self.field_names.iter()) .map(|(d, s)| format!("{} {}", s, d)) .join(",") ) diff --git a/src/common/src/types/successor.rs b/src/common/src/types/successor.rs new file mode 100644 index 0000000000000..f3c4b26eb9c7d --- /dev/null +++ b/src/common/src/types/successor.rs @@ -0,0 +1,84 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use chrono::Duration; + +use super::{NaiveDateTimeWrapper, NaiveDateWrapper, ScalarImpl}; + +/// A successor is a term that comes right after a particular value. Suppose n is a number (where n +/// belongs to any whole number), then the successor of n is 'n+1'. The other terminologies used for +/// a successor are just after, immediately after, and next value. +pub trait Successor { + /// Returns the successor of the current value if it exists, otherwise returns None. + fn successor(&self) -> Option + where + Self: Sized, + { + None + } +} + +impl Successor for i16 { + fn successor(&self) -> Option { + self.checked_add(1) + } +} + +impl Successor for i32 { + fn successor(&self) -> Option { + self.checked_add(1) + } +} + +impl Successor for i64 { + fn successor(&self) -> Option { + self.checked_add(1) + } +} + +impl Successor for NaiveDateTimeWrapper { + fn successor(&self) -> Option { + self.0 + .checked_add_signed(Duration::nanoseconds(1)) + .map(NaiveDateTimeWrapper) + } +} + +impl Successor for NaiveDateWrapper { + fn successor(&self) -> Option { + self.0 + .checked_add_signed(Duration::days(1)) + .map(NaiveDateWrapper) + } +} + +impl ScalarImpl { + /// Returns the successor of the current value if it exists. + /// + /// See also [`Successor`]. + /// + /// The function may return None when: + /// 1. The current value is the maximum value of the type. + /// 2. The successor value of the type is not well-defined. + pub fn successor(&self) -> Option { + match self { + ScalarImpl::Int16(v) => v.successor().map(ScalarImpl::Int16), + ScalarImpl::Int32(v) => v.successor().map(ScalarImpl::Int32), + ScalarImpl::Int64(v) => v.successor().map(ScalarImpl::Int64), + ScalarImpl::NaiveDateTime(v) => v.successor().map(ScalarImpl::NaiveDateTime), + ScalarImpl::NaiveDate(v) => v.successor().map(ScalarImpl::NaiveDate), + _ => None, + } + } +} diff --git a/src/common/src/types/to_binary.rs b/src/common/src/types/to_binary.rs index 64757378d7e39..6ade3ed70f7b1 100644 --- a/src/common/src/types/to_binary.rs +++ b/src/common/src/types/to_binary.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -93,6 +93,7 @@ impl ToBinary for ScalarRefImpl<'_> { ScalarRefImpl::NaiveDateTime(v) => v.to_binary_with_type(ty), ScalarRefImpl::NaiveTime(v) => v.to_binary_with_type(ty), ScalarRefImpl::Bytea(v) => v.to_binary_with_type(ty), + ScalarRefImpl::Jsonb(v) => v.to_binary_with_type(ty), ScalarRefImpl::Struct(_) => todo!(), ScalarRefImpl::List(_) => todo!(), } diff --git a/src/common/src/types/to_text.rs b/src/common/src/types/to_text.rs index 18a03c7ba8fd4..e62b070bb7209 100644 --- a/src/common/src/types/to_text.rs +++ b/src/common/src/types/to_text.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/util/addr.rs b/src/common/src/util/addr.rs index c31ad8ed67878..a3050a8f66149 100644 --- a/src/common/src/util/addr.rs +++ b/src/common/src/util/addr.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,16 +16,9 @@ use std::net::SocketAddr; use std::str::FromStr; use risingwave_pb::common::HostAddress as ProstHostAddress; -use risingwave_pb::meta::MetaLeaderInfo; use crate::error::{internal_error, Result}; -pub fn leader_info_to_host_addr(mli: MetaLeaderInfo) -> HostAddr { - mli.node_address - .parse::() - .expect("invalid leader addr") -} - /// General host address and port. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct HostAddr { diff --git a/src/common/src/util/chunk_coalesce.rs b/src/common/src/util/chunk_coalesce.rs index bbc7789c4e4ef..ac5e053bb427c 100644 --- a/src/common/src/util/chunk_coalesce.rs +++ b/src/common/src/util/chunk_coalesce.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,8 +15,7 @@ use std::iter::FusedIterator; use std::mem::swap; -use itertools::Itertools; - +use super::iter_util::ZipEqDebug; use crate::array::column::Column; use crate::array::{ArrayBuilderImpl, ArrayImpl, DataChunk}; use crate::row::Row; @@ -149,7 +148,7 @@ impl DataChunkBuilder { } fn do_append_one_row_from_datums(&mut self, datums: impl Iterator) { - for (array_builder, datum) in self.array_builders.iter_mut().zip_eq(datums) { + for (array_builder, datum) in self.array_builders.iter_mut().zip_eq_debug(datums) { array_builder.append_datum(datum); } self.buffered_count += 1; @@ -187,7 +186,7 @@ impl DataChunkBuilder { assert!(self.buffered_count < self.batch_size); self.ensure_builders(); - for (array_builder, (array, row_id)) in self.array_builders.iter_mut().zip_eq( + for (array_builder, (array, row_id)) in self.array_builders.iter_mut().zip_eq_debug( left_arrays .map(|array| (array, left_row_id)) .chain(right_arrays.map(|array| (array, right_row_id))), diff --git a/src/common/src/util/compress.rs b/src/common/src/util/compress.rs index 898665291ebba..0e402b4079852 100644 --- a/src/common/src/util/compress.rs +++ b/src/common/src/util/compress.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -use itertools::Itertools; +use super::iter_util::ZipEqFast; /// This function compresses sequential repeated data in a vector. The compression result contains /// two vectors, one for the last indices of sequential repeated elements, and another for the /// repeated data. For example, [14, 14, 14, 27, 27] will be compressed to [2, 4], [14, 27]. -pub fn compress_data(original_data: &[T]) -> (Vec, Vec) +pub fn compress_data(original_data: &[T]) -> (Vec, Vec) where T: PartialEq + Copy, { @@ -26,13 +26,13 @@ where for i in 1..original_data.len() { if original_data[i - 1] != original_data[i] { - original_indices.push(i as u64 - 1); + original_indices.push(i as u32 - 1); data.push(original_data[i - 1]); } } if let Some(&last) = original_data.last() { - original_indices.push(original_data.len() as u64 - 1); + original_indices.push(original_data.len() as u32 - 1); data.push(last); } @@ -40,16 +40,19 @@ where } /// Works in a reversed way as `compress_data`. -pub fn decompress_data(original_indices: &[u64], data: &[T]) -> Vec +pub fn decompress_data(original_indices: &[u32], data: &[T]) -> Vec where T: Copy, { match original_indices.last() { Some(last_idx) => { let mut original_data = Vec::with_capacity(*last_idx as usize + 1); - original_indices.iter().zip_eq(data).for_each(|(&idx, &x)| { - original_data.resize(idx as usize + 1, x); - }); + original_indices + .iter() + .zip_eq_fast(data) + .for_each(|(&idx, &x)| { + original_data.resize(idx as usize + 1, x); + }); original_data } None => Vec::new(), @@ -65,7 +68,7 @@ mod tests { // Simple let original_data = [3u32, 3, 3, 3, 3, 4, 4, 5, 5, 6, 7, 8, 8, 8, 9]; let (compressed_original_indices, compressed_data) = compress_data(&original_data); - let expect_original_indices = Vec::from([4u64, 6, 8, 9, 10, 13, 14]); + let expect_original_indices = Vec::from([4u32, 6, 8, 9, 10, 13, 14]); let expect_data = Vec::from([3u32, 4, 5, 6, 7, 8, 9]); assert_eq!(compressed_original_indices, expect_original_indices); assert_eq!(compressed_data, expect_data); @@ -81,7 +84,7 @@ mod tests { long_original_data[0] = 5; long_original_data[2046] = 5; let (compressed_original_indices, compressed_data) = compress_data(&long_original_data); - let expect_original_indices = Vec::from([0u64, 511, 1023, 1535, 2045, 2046, 2047]); + let expect_original_indices = Vec::from([0u32, 511, 1023, 1535, 2045, 2046, 2047]); let expect_data = Vec::from([5u32, 1, 2, 3, 4, 5, 4]); assert_eq!(compressed_original_indices, expect_original_indices); assert_eq!(compressed_data, expect_data); diff --git a/src/common/src/util/encoding_for_comparison.rs b/src/common/src/util/encoding_for_comparison.rs index 5200d1ac8a8df..10e845a16218b 100644 --- a/src/common/src/util/encoding_for_comparison.rs +++ b/src/common/src/util/encoding_for_comparison.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,27 +14,13 @@ use itertools::Itertools; +use super::iter_util::ZipEqFast; use crate::array::{ArrayImpl, DataChunk}; use crate::error::Result; use crate::row::OwnedRow; -use crate::types::{memcmp_serialize_datum_into, DataType, ScalarRefImpl}; +use crate::types::{memcmp_serialize_datum_into, ScalarRefImpl}; use crate::util::sort_util::{OrderPair, OrderType}; -/// This function is used to check whether we can perform encoding on this type. -/// TODO: based on `memcomparable`, we may support more data type in the future. -pub fn is_type_encodable(t: DataType) -> bool { - matches!( - t, - DataType::Boolean - | DataType::Int16 - | DataType::Int32 - | DataType::Int64 - | DataType::Float32 - | DataType::Float64 - | DataType::Varchar - ) -} - fn encode_value(value: Option>, order: &OrderType) -> Result> { let mut serializer = memcomparable::Serializer::new(vec![]); serializer.set_reverse(order == &OrderType::Descending); @@ -63,7 +49,7 @@ pub fn encode_chunk(chunk: &DataChunk, order_pairs: &[OrderPair]) -> Vec let mut encoded_chunk = vec![vec![]; chunk.capacity()]; for encoded_column in encoded_columns { - for (encoded_row, data) in encoded_chunk.iter_mut().zip_eq(encoded_column) { + for (encoded_row, data) in encoded_chunk.iter_mut().zip_eq_fast(encoded_column) { encoded_row.extend(data); } } diff --git a/src/common/src/util/env_var.rs b/src/common/src/util/env_var.rs index 74143deb9055d..166f75fe10201 100644 --- a/src/common/src/util/env_var.rs +++ b/src/common/src/util/env_var.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/util/epoch.rs b/src/common/src/util/epoch.rs index c20c01b36bb2b..3cfcd9976f222 100644 --- a/src/common/src/util/epoch.rs +++ b/src/common/src/util/epoch.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,12 +18,12 @@ use std::time::{Duration, SystemTime}; use parse_display::Display; -static UNIX_SINGULARITY_DATE_SEC: u64 = 1_617_235_200; +static UNIX_RISINGWAVE_DATE_SEC: u64 = 1_617_235_200; -/// `UNIX_SINGULARITY_DATE_EPOCH` represents the singularity date of the UNIX epoch: +/// [`UNIX_RISINGWAVE_DATE_EPOCH`] represents the risingwave date of the UNIX epoch: /// 2021-04-01T00:00:00Z. -pub static UNIX_SINGULARITY_DATE_EPOCH: LazyLock = - LazyLock::new(|| SystemTime::UNIX_EPOCH + Duration::from_secs(UNIX_SINGULARITY_DATE_SEC)); +pub static UNIX_RISINGWAVE_DATE_EPOCH: LazyLock = + LazyLock::new(|| SystemTime::UNIX_EPOCH + Duration::from_secs(UNIX_RISINGWAVE_DATE_SEC)); #[derive(Clone, Copy, Debug, Display, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Epoch(pub u64); @@ -68,19 +68,19 @@ impl Epoch { } pub fn physical_now() -> u64 { - UNIX_SINGULARITY_DATE_EPOCH + UNIX_RISINGWAVE_DATE_EPOCH .elapsed() - .expect("system clock set earlier than singularity date!") + .expect("system clock set earlier than risingwave date!") .as_millis() as u64 } pub fn as_unix_millis(&self) -> u64 { - UNIX_SINGULARITY_DATE_SEC * 1000 + self.physical_time() + UNIX_RISINGWAVE_DATE_SEC * 1000 + self.physical_time() } /// Returns the epoch in real system time. pub fn as_system_time(&self) -> SystemTime { - *UNIX_SINGULARITY_DATE_EPOCH + Duration::from_millis(self.physical_time()) + *UNIX_RISINGWAVE_DATE_EPOCH + Duration::from_millis(self.physical_time()) } /// Returns the epoch subtract `relative_time_ms`, which used for ttl to get epoch corresponding @@ -131,11 +131,11 @@ mod tests { use super::*; #[test] - fn test_singularity_system_time() { + fn test_risingwave_system_time() { let utc = Utc.with_ymd_and_hms(2021, 4, 1, 0, 0, 0).unwrap(); - let singularity_dt = Local.from_utc_datetime(&utc.naive_utc()); - let singularity_st = SystemTime::from(singularity_dt); - assert_eq!(singularity_st, *UNIX_SINGULARITY_DATE_EPOCH); + let risingwave_dt = Local.from_utc_datetime(&utc.naive_utc()); + let risingwave_st = SystemTime::from(risingwave_dt); + assert_eq!(risingwave_st, *UNIX_RISINGWAVE_DATE_EPOCH); } #[test] diff --git a/src/common/src/util/future_utils.rs b/src/common/src/util/future_utils.rs index 402ad596d98b9..0dec62f91e128 100644 --- a/src/common/src/util/future_utils.rs +++ b/src/common/src/util/future_utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/util/hash_util.rs b/src/common/src/util/hash_util.rs index 1f9ffbdaf3d03..077c18aac8f3a 100644 --- a/src/common/src/util/hash_util.rs +++ b/src/common/src/util/hash_util.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/util/iter_util.rs b/src/common/src/util/iter_util.rs new file mode 100644 index 0000000000000..037673cc72eed --- /dev/null +++ b/src/common/src/util/iter_util.rs @@ -0,0 +1,56 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub trait ZipEqFast: ExactSizeIterator + Sized +where + B::IntoIter: ExactSizeIterator, +{ + /// A specialized version of `zip_eq` for [`ExactSizeIterator`]. + /// + /// It's a separate trait because Rust doesn't support specialization yet. + /// See [tracking issue for specialization (RFC 1210)](https://github.com/rust-lang/rust/issues/31844). + #[expect(clippy::disallowed_methods)] + fn zip_eq_fast(self, other: B) -> impl ExactSizeIterator { + let other = other.into_iter(); + assert_eq!(self.len(), other.len()); + self.zip(other) + } +} + +impl ZipEqFast for A where B::IntoIter: ExactSizeIterator {} + +pub trait ZipEqDebug: itertools::Itertools + Sized { + /// Use `zip_eq` when `debug_assertions` is enabled, otherwise use `zip`. + /// + /// It's because `zip_eq` has a very large overhead of checking each item in the iterators. + #[expect(clippy::disallowed_methods)] + fn zip_eq_debug(self, other: B) -> impl Iterator { + #[cfg(debug_assertions)] + return self.zip_eq(other); + #[cfg(not(debug_assertions))] + return self.zip(other); + } +} + +impl ZipEqDebug for A {} + +pub fn zip_eq_fast(a: A, b: B) -> impl Iterator +where + A: IntoIterator, + B: IntoIterator, + A::IntoIter: ExactSizeIterator, + B::IntoIter: ExactSizeIterator, +{ + a.into_iter().zip_eq_fast(b) +} diff --git a/src/common/src/util/match_util.rs b/src/common/src/util/match_util.rs index 0e6916a58f669..26982812d6499 100644 --- a/src/common/src/util/match_util.rs +++ b/src/common/src/util/match_util.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/util/mod.rs b/src/common/src/util/mod.rs index 3fe66c5d5e8da..310c6d047e913 100644 --- a/src/common/src/util/mod.rs +++ b/src/common/src/util/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ pub mod env_var; pub mod epoch; mod future_utils; pub mod hash_util; +pub mod iter_util; pub mod ordered; pub mod prost; pub mod resource_util; diff --git a/src/common/src/util/ordered/mod.rs b/src/common/src/util/ordered/mod.rs index 7c7331eba694a..ee3629b5803e2 100644 --- a/src/common/src/util/ordered/mod.rs +++ b/src/common/src/util/ordered/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,10 +16,10 @@ mod serde; use std::cmp::Reverse; -use itertools::Itertools; use OrderedDatum::{NormalOrder, ReversedOrder}; pub use self::serde::*; +use super::iter_util::ZipEqFast; use crate::row::OwnedRow; use crate::types::{memcmp_serialize_datum_into, Datum}; use crate::util::sort_util::OrderType; @@ -61,7 +61,7 @@ impl OrderedRow { OrderedRow( row.into_inner() .into_iter() - .zip_eq(order_types.iter()) + .zip_eq_fast(order_types.iter()) .map(|(datum, order_type)| match order_type { OrderType::Ascending => NormalOrder(datum), OrderType::Descending => ReversedOrder(Reverse(datum)), diff --git a/src/common/src/util/ordered/serde.rs b/src/common/src/util/ordered/serde.rs index a3a19dd7d9072..ac878c02418d3 100644 --- a/src/common/src/util/ordered/serde.rs +++ b/src/common/src/util/ordered/serde.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,12 +15,12 @@ use std::borrow::Cow; use bytes::BufMut; -use itertools::Itertools; use crate::row::{OwnedRow, Row}; use crate::types::{ memcmp_deserialize_datum_from, memcmp_serialize_datum_into, DataType, ToDatumRef, }; +use crate::util::iter_util::{ZipEqDebug, ZipEqFast}; use crate::util::sort_util::OrderType; /// `OrderedRowSerde` is responsible for serializing and deserializing Ordered Row. @@ -64,7 +64,7 @@ impl OrderedRowSerde { datum_refs: impl Iterator, mut append_to: impl BufMut, ) { - for (datum, order_type) in datum_refs.zip_eq(self.order_types.iter()) { + for (datum, order_type) in datum_refs.zip_eq_debug(self.order_types.iter()) { let mut serializer = memcomparable::Serializer::new(&mut append_to); serializer.set_reverse(*order_type == OrderType::Descending); memcmp_serialize_datum_into(datum, &mut serializer).unwrap(); @@ -74,7 +74,7 @@ impl OrderedRowSerde { pub fn deserialize(&self, data: &[u8]) -> memcomparable::Result { let mut values = Vec::with_capacity(self.schema.len()); let mut deserializer = memcomparable::Deserializer::new(data); - for (data_type, order_type) in self.schema.iter().zip_eq(self.order_types.iter()) { + for (data_type, order_type) in self.schema.iter().zip_eq_fast(self.order_types.iter()) { deserializer.set_reverse(*order_type == OrderType::Descending); let datum = memcmp_deserialize_datum_from(data_type, &mut deserializer)?; values.push(datum); diff --git a/src/common/src/util/prost.rs b/src/common/src/util/prost.rs index 071eb06d854db..60f6a85ed4935 100644 --- a/src/common/src/util/prost.rs +++ b/src/common/src/util/prost.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/util/resource_util.rs b/src/common/src/util/resource_util.rs index 9674871940c26..9b40e720eaae0 100644 --- a/src/common/src/util/resource_util.rs +++ b/src/common/src/util/resource_util.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -109,15 +109,10 @@ pub mod memory { match get_memory_used_in_container(super::util::get_cgroup_version()) { Ok(mem_used) => std::cmp::min(mem_used, get_system_memory_used()), Err(err) => { - match err.kind() { - std::io::ErrorKind::InvalidData => { - println!("Invalid data error: {}", err) - } - std::io::ErrorKind::NotFound => { - println!("Cgroup interface file was not found: {}", err) - } - _ => panic!("Unexpected error: {}", err), - } + tracing::warn!( + err = err.to_string(), + "failed to get memory used in container, use system value instead" + ); get_system_memory_used() } } @@ -147,15 +142,10 @@ pub mod memory { match get_container_memory_limit(super::util::get_cgroup_version()) { Ok(mem_limit) => std::cmp::min(mem_limit, get_system_memory()), Err(err) => { - match err.kind() { - std::io::ErrorKind::InvalidData => { - println!("Invalid data error: {}", err) - } - std::io::ErrorKind::NotFound => { - println!("Cgroup interface file was not found: {}", err) - } - _ => panic!("Unexpected error: {}", err), - } + tracing::warn!( + err = err.to_string(), + "failed to get memory available in container, use system value instead" + ); get_system_memory() } } @@ -241,15 +231,10 @@ pub mod cpu { match get_container_cpu_limit(super::util::get_cgroup_version()) { Ok(cpu_limit) => cpu_limit, Err(err) => { - match err.kind() { - std::io::ErrorKind::InvalidData => { - println!("Invalid data error: {}", err) - } - std::io::ErrorKind::NotFound => { - println!("Cgroup interface file was not found: {}", err) - } - _ => panic!("Unexpected error: {}", err), - }; + tracing::warn!( + err = err.to_string(), + "failed to get cpu quota in container, use system value instead" + ); get_system_cpu() } } diff --git a/src/common/src/util/scan_range.rs b/src/common/src/util/scan_range.rs index ed07bcd64fbf2..24ddeb88a20eb 100644 --- a/src/common/src/util/scan_range.rs +++ b/src/common/src/util/scan_range.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/util/schema_check.rs b/src/common/src/util/schema_check.rs index 6111bb33577a5..981d7370790dc 100644 --- a/src/common/src/util/schema_check.rs +++ b/src/common/src/util/schema_check.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/util/sort_util.rs b/src/common/src/util/sort_util.rs index 00f040d8096f8..080b58136ce80 100644 --- a/src/common/src/util/sort_util.rs +++ b/src/common/src/util/sort_util.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,8 +20,6 @@ use risingwave_pb::plan_common::{ColumnOrder, OrderType as ProstOrderType}; use crate::array::{Array, ArrayImpl, DataChunk}; use crate::error::ErrorCode::InternalError; use crate::error::Result; -use crate::row::OwnedRow; -use crate::types::ScalarImpl; #[derive(PartialEq, Eq, Copy, Clone, Debug)] pub enum OrderType { @@ -146,52 +144,6 @@ where } } -pub fn compare_rows(lhs: &OwnedRow, rhs: &OwnedRow, order_pairs: &[OrderPair]) -> Result { - for order_pair in order_pairs.iter() { - let lhs = lhs[order_pair.column_idx].as_ref(); - let rhs = rhs[order_pair.column_idx].as_ref(); - - macro_rules! gen_match { - ($lhs: ident, $rhs: ident, [$( $tt: ident), *]) => { - match ($lhs, $rhs) { - $((Some(ScalarImpl::$tt(l)), Some(ScalarImpl::$tt(r))) => Ok(compare_values(Some(l), Some(r), &order_pair.order_type)),)* - $((Some(ScalarImpl::$tt(l)), None) => Ok(compare_values(Some(l), None, &order_pair.order_type)),)* - $((None, Some(ScalarImpl::$tt(r))) => Ok(compare_values(None, Some(r), &order_pair.order_type)),)* - (None, None) => Ok(compare_values::<()>(None, None, &order_pair.order_type)), - (Some(l), Some(r)) => Err(InternalError(format!("Unmatched scalar types, lhs is: {:?}, rhs is: {:?}", l, r))), - }? - } - } - - let res = gen_match!( - lhs, - rhs, - [ - Int16, - Int32, - Int64, - Float32, - Float64, - Utf8, - Bool, - Decimal, - Interval, - NaiveDate, - NaiveDateTime, - NaiveTime, - Struct, - List, - Bytea - ] - ); - - if res != Ordering::Equal { - return Ok(res); - } - } - Ok(Ordering::Equal) -} - fn compare_values_in_array<'a, T>( lhs_array: &'a T, lhs_idx: usize, @@ -221,34 +173,14 @@ pub fn compare_rows_in_chunk( let lhs_array = lhs_data_chunk.column_at(order_pair.column_idx).array(); let rhs_array = rhs_data_chunk.column_at(order_pair.column_idx).array(); macro_rules! gen_match { - ($lhs: ident, $rhs: ident, [$( $tt: ident), *]) => { - match ($lhs, $rhs) { - $((ArrayImpl::$tt(lhs_inner), ArrayImpl::$tt(rhs_inner)) => Ok(compare_values_in_array(lhs_inner, lhs_idx, rhs_inner, rhs_idx, &order_pair.order_type)),)* + ( $( { $variant_name:ident, $suffix_name:ident, $array:ty, $builder:ty } ),*) => { + match (lhs_array.as_ref(), rhs_array.as_ref()) { + $((ArrayImpl::$variant_name(lhs_inner), ArrayImpl::$variant_name(rhs_inner)) => Ok(compare_values_in_array(lhs_inner, lhs_idx, rhs_inner, rhs_idx, &order_pair.order_type)),)* (l_arr, r_arr) => Err(InternalError(format!("Unmatched array types, lhs array is: {}, rhs array is: {}", l_arr.get_ident(), r_arr.get_ident()))), }? } } - let (lhs_array, rhs_array) = (lhs_array.as_ref(), rhs_array.as_ref()); - let res = gen_match!( - lhs_array, - rhs_array, - [ - Int16, - Int32, - Int64, - Float32, - Float64, - Utf8, - Bool, - Decimal, - Interval, - NaiveDate, - NaiveDateTime, - NaiveTime, - Struct, - List - ] - ); + let res = for_all_variants! { gen_match }; if res != Ordering::Equal { return Ok(res); } @@ -262,38 +194,12 @@ mod tests { use itertools::Itertools; - use super::{compare_rows, OrderPair, OrderType}; + use super::{OrderPair, OrderType}; use crate::array::{DataChunk, ListValue, StructValue}; use crate::row::{OwnedRow, Row}; use crate::types::{DataType, ScalarImpl}; use crate::util::sort_util::compare_rows_in_chunk; - #[test] - fn test_compare_rows() { - let v10 = Some(ScalarImpl::Int32(42)); - let v11 = Some(ScalarImpl::Utf8("hello".into())); - let v12 = Some(ScalarImpl::Float32(4.0.into())); - let v20 = Some(ScalarImpl::Int32(42)); - let v21 = Some(ScalarImpl::Utf8("hell".into())); - let v22 = Some(ScalarImpl::Float32(3.0.into())); - - let row1 = OwnedRow::new(vec![v10, v11, v12]); - let row2 = OwnedRow::new(vec![v20, v21, v22]); - let order_pairs = vec![ - OrderPair::new(0, OrderType::Ascending), - OrderPair::new(1, OrderType::Descending), - ]; - - assert_eq!( - Ordering::Equal, - compare_rows(&row1, &row1, &order_pairs).unwrap() - ); - assert_eq!( - Ordering::Less, - compare_rows(&row1, &row2, &order_pairs).unwrap() - ); - } - #[test] fn test_compare_rows_in_chunk() { let v10 = Some(ScalarImpl::Int32(42)); @@ -374,14 +280,6 @@ mod tests { let order_pairs = (0..row1.len()) .map(|i| OrderPair::new(i, OrderType::Ascending)) .collect_vec(); - assert_eq!( - Ordering::Equal, - compare_rows(&row1, &row1, &order_pairs).unwrap() - ); - assert_eq!( - Ordering::Less, - compare_rows(&row1, &row2, &order_pairs).unwrap() - ); let chunk = DataChunk::from_rows( &[row1, row2], diff --git a/src/common/src/util/stream_cancel.rs b/src/common/src/util/stream_cancel.rs index 4242287dd9eea..80915723bdc85 100644 --- a/src/common/src/util/stream_cancel.rs +++ b/src/common/src/util/stream_cancel.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/common/src/util/value_encoding/column_aware_row_encoding.rs b/src/common/src/util/value_encoding/column_aware_row_encoding.rs new file mode 100644 index 0000000000000..713938295604d --- /dev/null +++ b/src/common/src/util/value_encoding/column_aware_row_encoding.rs @@ -0,0 +1,301 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Column-aware row encoding is an encoding format which converts row into a binary form that +//! remains explanable after schema changes +//! Current design of flag just contains 1 meaningful information: the 2 LSBs represents +//! the size of offsets: `u8`/`u16`/`u32` +//! We have a `Serializer` and a `Deserializer` for each schema of `Row`, which can be reused +//! until schema changes + +use std::collections::BTreeMap; + +use bitflags::bitflags; + +use super::*; +use crate::catalog::ColumnId; +use crate::row::Row; + +// deprecated design of have a Width to represent number of datum +// may be considered should `ColumnId` representation be optimized +// #[derive(Clone, Copy)] +// enum Width { +// Mid(u8), +// Large(u16), +// Extra(u32), +// } + +bitflags! { + struct Flag: u8 { + const EMPTY = 0b_1000_0000; + const OFFSET8 = 0b01; + const OFFSET16 = 0b10; + const OFFSET32 = 0b11; + } +} + +/// `RowEncoding` holds row-specific information for Column-Aware Encoding +struct RowEncoding { + flag: Flag, + offsets: Vec, + buf: Vec, +} + +impl RowEncoding { + fn new() -> Self { + RowEncoding { + flag: Flag::EMPTY, + offsets: vec![], + buf: vec![], + } + } + + fn set_offsets(&mut self, usize_offsets: &[usize], max_offset: usize) { + debug_assert!(self.offsets.is_empty()); + match max_offset { + _n @ ..=const { u8::MAX as usize } => { + self.flag |= Flag::OFFSET8; + usize_offsets + .iter() + .for_each(|m| self.offsets.put_u8(*m as u8)); + } + _n @ ..=const { u16::MAX as usize } => { + self.flag |= Flag::OFFSET16; + usize_offsets + .iter() + .for_each(|m| self.offsets.put_u16(*m as u16)); + } + _n @ ..=const { u32::MAX as usize } => { + self.flag |= Flag::OFFSET32; + usize_offsets + .iter() + .for_each(|m| self.offsets.put_u32(*m as u32)); + } + _ => unreachable!("encoding length exceeds u32"), + } + } + + fn encode(&mut self, datum_refs: impl Iterator) { + debug_assert!( + self.buf.is_empty(), + "should not encode one RowEncoding object multiple times." + ); + let mut offset_usize = vec![]; + for datum in datum_refs { + offset_usize.push(self.buf.len()); + if let Some(v) = datum.to_datum_ref() { + serialize_scalar(v, &mut self.buf); + } + } + let max_offset = *offset_usize + .last() + .expect("should encode at least one column"); + self.set_offsets(&offset_usize, max_offset); + } +} + +/// Column-Aware `Serializer` holds schema related information, and shall be +/// created again once the schema changes +pub struct Serializer { + encoded_column_ids: Vec, + datum_num: u32, +} + +impl Serializer { + /// Create a new `Serializer` with current `column_ids` + pub fn new(column_ids: &[ColumnId]) -> Self { + // currently we hard-code ColumnId as i32 + let mut encoded_column_ids = Vec::with_capacity(column_ids.len() * 4); + for id in column_ids { + encoded_column_ids.put_i32_le(id.get_id()); + } + let datum_num = column_ids.len() as u32; + Self { + encoded_column_ids, + datum_num, + } + } + + /// Serialize a row under the schema of the Serializer + pub fn serialize_row_column_aware(&self, row: impl Row) -> Vec { + assert_eq!(row.len(), self.datum_num as usize); + let mut encoding = RowEncoding::new(); + encoding.encode(row.iter()); + self.serialize(encoding) + } + + fn serialize(&self, encoding: RowEncoding) -> Vec { + let mut row_bytes = Vec::with_capacity( + 5 + self.encoded_column_ids.len() + encoding.offsets.len() + encoding.buf.len(), /* 5 comes from u8+u32 */ + ); + row_bytes.put_u8(encoding.flag.bits); + row_bytes.put_u32_le(self.datum_num); + row_bytes.extend(&self.encoded_column_ids); + row_bytes.extend(&encoding.offsets); + row_bytes.extend(&encoding.buf); + + row_bytes + } +} + +/// Column-Aware `Deserializer` holds needed `ColumnIds` and their corresponding schema +/// Should non-null default values be specified, a new field could be added to Deserializer +pub struct Deserializer<'a> { + needed_column_ids: BTreeMap, + schema: &'a [DataType], +} + +impl<'a> Deserializer<'a> { + pub fn new(column_ids: &'a [ColumnId], schema: &'a [DataType]) -> Self { + assert_eq!(column_ids.len(), schema.len()); + Self { + needed_column_ids: column_ids + .iter() + .enumerate() + .map(|(i, c)| (c.get_id(), i)) + .collect::>(), + schema, + } + } + + pub fn decode(&self, mut encoded_bytes: &[u8]) -> Result> { + let flag = Flag::from_bits(encoded_bytes.get_u8()).expect("should be a valid flag"); + let offset_bytes = match flag - Flag::EMPTY { + Flag::OFFSET8 => 1, + Flag::OFFSET16 => 2, + Flag::OFFSET32 => 4, + _ => return Err(ValueEncodingError::InvalidFlag(flag.bits)), + }; + let datum_num = encoded_bytes.get_u32_le() as usize; + let offsets_start_idx = 4 * datum_num; + let data_start_idx = offsets_start_idx + datum_num * offset_bytes; + let offsets = &encoded_bytes[offsets_start_idx..data_start_idx]; + let data = &encoded_bytes[data_start_idx..]; + let mut datums = vec![None; self.schema.len()]; + for i in 0..datum_num { + let this_id = encoded_bytes.get_i32_le(); + if let Some(&decoded_idx) = self.needed_column_ids.get(&this_id) { + let this_offset_start_idx = i * offset_bytes; + let mut this_offset_slice = + &offsets[this_offset_start_idx..(this_offset_start_idx + offset_bytes)]; + let this_offset = deserialize_width(offset_bytes, &mut this_offset_slice); + let data = if i + 1 < datum_num { + let mut next_offset_slice = &offsets[(this_offset_start_idx + offset_bytes) + ..(this_offset_start_idx + 2 * offset_bytes)]; + let next_offset = deserialize_width(offset_bytes, &mut next_offset_slice); + if this_offset == next_offset { + None + } else { + let mut data_slice = &data[this_offset..next_offset]; + Some(deserialize_value( + &self.schema[decoded_idx], + &mut data_slice, + )?) + } + } else if this_offset == data.len() { + None + } else { + let mut data_slice = &data[this_offset..]; + Some(deserialize_value( + &self.schema[decoded_idx], + &mut data_slice, + )?) + }; + datums[decoded_idx] = data; + } + } + Ok(datums) + } +} + +fn deserialize_width(len: usize, data: &mut impl Buf) -> usize { + match len { + 1 => data.get_u8() as usize, + 2 => data.get_u16_le() as usize, + 4 => data.get_u32_le() as usize, + _ => unreachable!("Width's len should be either 1, 2, or 4"), + } +} + +#[cfg(test)] +mod tests { + use column_aware_row_encoding; + + use super::*; + use crate::catalog::ColumnId; + use crate::row::OwnedRow; + use crate::types::ScalarImpl::*; + + #[test] + fn test_row_encoding() { + let column_ids = vec![ColumnId::new(0), ColumnId::new(1)]; + let row1 = OwnedRow::new(vec![Some(Int16(5)), Some(Utf8("abc".into()))]); + let row2 = OwnedRow::new(vec![Some(Int16(5)), Some(Utf8("abd".into()))]); + let row3 = OwnedRow::new(vec![Some(Int16(6)), Some(Utf8("abc".into()))]); + let rows = vec![row1, row2, row3]; + let mut array = vec![]; + let serializer = column_aware_row_encoding::Serializer::new(&column_ids); + for row in &rows { + let row_bytes = serializer.serialize_row_column_aware(row); + array.push(row_bytes); + } + let zero_le_bytes = 0_i32.to_le_bytes(); + let one_le_bytes = 1_i32.to_le_bytes(); + + assert_eq!( + array[0], + [ + 0b10000001, // flag mid WW mid BB + 2, + 0, + 0, + 0, // column nums + zero_le_bytes[0], // start id 0 + zero_le_bytes[1], + zero_le_bytes[2], + zero_le_bytes[3], + one_le_bytes[0], // start id 1 + one_le_bytes[1], + one_le_bytes[2], + one_le_bytes[3], + 0, // offset0: 0 + 2, // offset1: 2 + 5, // i16: 5 + 0, + 3, // str: abc + 0, + 0, + 0, + b'a', + b'b', + b'c' + ] + ); + } + #[test] + fn test_row_decoding() { + let column_ids = vec![ColumnId::new(0), ColumnId::new(1)]; + let row1 = OwnedRow::new(vec![Some(Int16(5)), Some(Utf8("abc".into()))]); + let serializer = column_aware_row_encoding::Serializer::new(&column_ids); + let row_bytes = serializer.serialize_row_column_aware(row1); + let data_types = vec![DataType::Int16, DataType::Varchar]; + let deserializer = + column_aware_row_encoding::Deserializer::new(&column_ids[..], &data_types[..]); + let decoded = deserializer.decode(&row_bytes[..]); + assert_eq!( + decoded.unwrap(), + vec![Some(Int16(5)), Some(Utf8("abc".into()))] + ); + } +} diff --git a/src/common/src/util/value_encoding/error.rs b/src/common/src/util/value_encoding/error.rs index 3f40de41401c4..e4252b2f91aa9 100644 --- a/src/common/src/util/value_encoding/error.rs +++ b/src/common/src/util/value_encoding/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -28,8 +28,12 @@ pub enum ValueEncodingError { InvalidNaiveTimeEncoding(u32, u32), #[error("Invalid null tag value encoding: {0}")] InvalidTagEncoding(u8), + #[error("Invalid jsonb encoding")] + InvalidJsonbEncoding, #[error("Invalid struct encoding: {0}")] InvalidStructEncoding(crate::array::ArrayError), #[error("Invalid list encoding: {0}")] InvalidListEncoding(crate::array::ArrayError), + #[error("Invalid flag: {0}")] + InvalidFlag(u8), } diff --git a/src/common/src/util/value_encoding/mod.rs b/src/common/src/util/value_encoding/mod.rs index f75f2abcaa184..dcf471a0b2f33 100644 --- a/src/common/src/util/value_encoding/mod.rs +++ b/src/common/src/util/value_encoding/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ use bytes::{Buf, BufMut}; use chrono::{Datelike, Timelike}; use itertools::Itertools; -use crate::array::{ListRef, ListValue, StructRef, StructValue}; +use crate::array::{JsonbVal, ListRef, ListValue, StructRef, StructValue}; use crate::types::struct_type::StructType; use crate::types::{ DataType, Datum, Decimal, IntervalUnit, NaiveDateTimeWrapper, NaiveDateWrapper, @@ -28,6 +28,7 @@ use crate::types::{ pub mod error; use error::ValueEncodingError; +pub mod column_aware_row_encoding; pub type Result = std::result::Result; @@ -83,6 +84,7 @@ fn serialize_scalar(value: ScalarRefImpl<'_>, buf: &mut impl BufMut) { ScalarRefImpl::NaiveTime(v) => { serialize_naivetime(v.0.num_seconds_from_midnight(), v.0.nanosecond(), buf) } + ScalarRefImpl::Jsonb(v) => serialize_str(&v.value_serialize(), buf), ScalarRefImpl::Struct(s) => serialize_struct(s, buf), ScalarRefImpl::List(v) => serialize_list(v, buf), } @@ -154,6 +156,10 @@ fn deserialize_value(ty: &DataType, data: &mut impl Buf) -> Result { DataType::Timestamp => ScalarImpl::NaiveDateTime(deserialize_naivedatetime(data)?), DataType::Timestamptz => ScalarImpl::Int64(data.get_i64_le()), DataType::Date => ScalarImpl::NaiveDate(deserialize_naivedate(data)?), + DataType::Jsonb => ScalarImpl::Jsonb( + JsonbVal::value_deserialize(&deserialize_bytea(data)) + .ok_or(ValueEncodingError::InvalidJsonbEncoding)?, + ), DataType::Struct(struct_def) => deserialize_struct(struct_def, data)?, DataType::Bytea => ScalarImpl::Bytea(deserialize_bytea(data).into()), DataType::List { diff --git a/src/common/src/util/worker_util.rs b/src/common/src/util/worker_util.rs index ec818c2583028..4abfb0fd958d8 100644 --- a/src/common/src/util/worker_util.rs +++ b/src/common/src/util/worker_util.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/compute/Cargo.toml b/src/compute/Cargo.toml index 9731fb96ab248..b9162b1156a4b 100644 --- a/src/compute/Cargo.toml +++ b/src/compute/Cargo.toml @@ -7,36 +7,28 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] -anyhow = "1" -async-stream = "0.3" async-trait = "0.1" async_stack_trace = { path = "../utils/async_stack_trace" } -byteorder = "1" -bytes = "1" -chrono = { version = "0.4", default-features = false, features = [ - "clock", - "std", -] } clap = { version = "3", features = ["derive"] } -crc32fast = "1" -dyn-clone = "1" either = "1" -farmhash = "1" futures = { version = "0.3", default-features = false, features = ["alloc"] } futures-async-stream = "0.2" hyper = "0.14" itertools = "0.10" -lru = { git = "https://github.com/risingwavelabs/lru-rs.git", branch = "evict_by_timestamp" } maplit = "1.0.2" -num-traits = "0.2" -parking_lot = "0.12" -paste = "1" pprof = { version = "0.11", features = ["flamegraph"] } +pretty-bytes = "0.2.2" prometheus = { version = "0.13" } -prost = "0.11" risingwave_batch = { path = "../batch" } risingwave_common = { path = "../common" } +risingwave_common_proc_macro = { path = "../common/proc_macro" } risingwave_common_service = { path = "../common/common_service" } risingwave_connector = { path = "../connector" } risingwave_hummock_sdk = { path = "../storage/hummock_sdk" } @@ -46,13 +38,7 @@ risingwave_source = { path = "../source" } risingwave_storage = { path = "../storage" } risingwave_stream = { path = "../stream" } risingwave_tracing = { path = "../tracing" } -serde = { version = "1", features = ["derive"] } -serde-value = "0.7" serde_json = "1" -smallvec = "1" -static_assertions = "1" -sysinfo = "0.26" -thiserror = "1" tokio = { version = "0.2", package = "madsim-tokio", features = [ "rt", "rt-multi-thread", @@ -65,10 +51,7 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [ tokio-stream = "0.1" tonic = { version = "0.2", package = "madsim-tonic" } tower = { version = "0.4", features = ["util", "load-shed"] } -tower-http = { version = "0.3", features = ["add-extension", "cors"] } tracing = "0.1" -twox-hash = "1" -url = "2" [target.'cfg(target_os = "linux")'.dependencies] tikv-jemalloc-ctl = "0.5" diff --git a/src/compute/src/lib.rs b/src/compute/src/lib.rs index ff3ae06dc3a73..459fc91261194 100644 --- a/src/compute/src/lib.rs +++ b/src/compute/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -28,79 +28,99 @@ pub mod memory_management; pub mod rpc; pub mod server; -use clap::clap_derive::ArgEnum; use clap::Parser; +use risingwave_common::config::{true_if_present, AsyncStackTraceOption, Flag}; use risingwave_common::util::resource_util::cpu::total_cpu_available; use risingwave_common::util::resource_util::memory::total_memory_available_bytes; - -#[derive(Debug, Clone, ArgEnum)] -pub enum AsyncStackTraceOption { - Off, - On, // default - Verbose, -} +use risingwave_common_proc_macro::OverrideConfig; /// Command-line arguments for compute-node. #[derive(Parser, Clone, Debug)] pub struct ComputeNodeOpts { - // TODO: rename to listen_address and separate out the port. - #[clap(long, default_value = "127.0.0.1:5688")] - pub host: String, - - /// The address of the compute node's meta client. - /// - /// Optional, we will use listen_address if not specified. - #[clap(long)] - pub client_address: Option, - - #[clap(long, default_value = "hummock+memory")] - pub state_store: String, - - #[clap(long, default_value = "127.0.0.1:1222")] + // TODO: rename to listen_addr and separate out the port. + /// The address that this service listens to. + /// Usually the localhost + desired port. + #[clap( + long, + alias = "host", + env = "RW_LISTEN_ADDR", + default_value = "127.0.0.1:5688" + )] + pub listen_addr: String, + + /// The address for contacting this instance of the service. + /// This would be synonymous with the service's "public address" + /// or "identifying address". + /// Optional, we will use listen_addr if not specified. + #[clap(long, alias = "client-address", env = "RW_ADVERTISE_ADDR", long)] + pub advertise_addr: Option, + + #[clap( + long, + env = "RW_PROMETHEUS_LISTENER_ADDR", + default_value = "127.0.0.1:1222" + )] pub prometheus_listener_addr: String, - /// Used for control the metrics level, similar to log level. - /// 0 = close metrics - /// >0 = open metrics - #[clap(long, default_value = "0")] - pub metrics_level: u32, - - #[clap(long, default_value = "http://127.0.0.1:5690")] + #[clap(long, env = "RW_META_ADDRESS", default_value = "http://127.0.0.1:5690")] pub meta_address: String, - /// Enable reporting tracing information to jaeger. - #[clap(long)] - pub enable_jaeger_tracing: bool, - - /// Enable async stack tracing for risectl. - #[clap(long, arg_enum, default_value_t = AsyncStackTraceOption::On)] - pub async_stack_trace: AsyncStackTraceOption, - - /// Path to file cache data directory. - /// Left empty to disable file cache. - #[clap(long, default_value = "")] - pub file_cache_dir: String, - /// Endpoint of the connector node - #[clap(long, env = "CONNECTOR_RPC_ENDPOINT")] + #[clap(long, env = "RW_CONNECTOR_RPC_ENDPOINT")] pub connector_rpc_endpoint: Option, + /// One of: + /// 1. `hummock+{object_store}` where `object_store` + /// is one of `s3://{path}`, `s3-compatible://{path}`, `minio://{path}`, `disk://{path}`, + /// `memory` or `memory-shared`. + /// 2. `in-memory` + /// 3. `sled://{path}` + #[clap(long, env = "RW_STATE_STORE")] + pub state_store: Option, + /// The path of `risingwave.toml` configuration file. /// /// If empty, default configuration values will be used. - /// - /// Note that internal system parameters should be defined in the configuration file at - /// [`risingwave_common::config`] instead of command line arguments. - #[clap(long, default_value = "")] + #[clap(long, env = "RW_CONFIG_PATH", default_value = "")] pub config_path: String, - /// Total available memory in bytes, used by LRU Manager - #[clap(long, default_value_t = default_total_memory_bytes())] + /// Total available memory for the compute node in bytes. Used by both computing and storage. + #[clap(long, env = "RW_TOTAL_MEMORY_BYTES", default_value_t = default_total_memory_bytes())] pub total_memory_bytes: usize, /// The parallelism that the compute node will register to the scheduler of the meta service. - #[clap(long, default_value_t = default_parallelism())] + #[clap(long, env = "RW_PARALLELISM", default_value_t = default_parallelism())] pub parallelism: usize, + + #[clap(flatten)] + override_config: OverrideConfigOpts, +} + +/// Command-line arguments for compute-node that overrides the config file. +#[derive(Parser, Clone, Debug, OverrideConfig)] +struct OverrideConfigOpts { + /// Used for control the metrics level, similar to log level. + /// 0 = close metrics + /// >0 = open metrics + #[clap(long, env = "RW_METRICS_LEVEL")] + #[override_opts(path = server.metrics_level)] + pub metrics_level: Option, + + /// Path to file cache data directory. + /// Left empty to disable file cache. + #[clap(long, env = "RW_FILE_CACHE_DIR")] + #[override_opts(path = storage.file_cache.dir)] + pub file_cache_dir: Option, + + /// Enable reporting tracing information to jaeger. + #[clap(long, env = "RW_ENABLE_JAEGER_TRACING", parse(from_flag = true_if_present))] + #[override_opts(path = streaming.enable_jaeger_tracing)] + pub enable_jaeger_tracing: Flag, + + /// Enable async stack tracing for risectl. + #[clap(long, env = "RW_ASYNC_STACK_TRACE", arg_enum)] + #[override_opts(path = streaming.async_stack_trace)] + pub async_stack_trace: Option, } fn validate_opts(opts: &ComputeNodeOpts) { @@ -135,25 +155,26 @@ pub fn start(opts: ComputeNodeOpts) -> Pin + Send>> // WARNING: don't change the function signature. Making it `async fn` will cause // slow compile in release mode. Box::pin(async move { - tracing::info!("Compute node options: {:?}", opts); + tracing::info!("options: {:?}", opts); + warn_future_deprecate_options(&opts); validate_opts(&opts); - let listen_address = opts.host.parse().unwrap(); - tracing::info!("Server Listening at {}", listen_address); + let listen_addr = opts.listen_addr.parse().unwrap(); + tracing::info!("Server Listening at {}", listen_addr); - let client_address = opts - .client_address + let advertise_addr = opts + .advertise_addr .as_ref() .unwrap_or_else(|| { - tracing::warn!("Client address is not specified, defaulting to host address"); - &opts.host + tracing::warn!("advertise addr is not specified, defaulting to listen_addr"); + &opts.listen_addr }) .parse() .unwrap(); - tracing::info!("Client address is {}", client_address); + tracing::info!("advertise addr is {}", advertise_addr); let (join_handle_vec, _shutdown_send) = - compute_node_serve(listen_address, client_address, opts).await; + compute_node_serve(listen_addr, advertise_addr, opts).await; for join_handle in join_handle_vec { join_handle.await.unwrap(); @@ -168,3 +189,9 @@ fn default_total_memory_bytes() -> usize { fn default_parallelism() -> usize { total_cpu_available().ceil() as usize } + +fn warn_future_deprecate_options(opts: &ComputeNodeOpts) { + if opts.state_store.is_some() { + tracing::warn!("`--state-store` will not be accepted by compute node in the next release. Please consider moving this argument to the meta node."); + } +} diff --git a/src/compute/src/memory_management/memory_manager.rs b/src/compute/src/memory_management/memory_manager.rs index c0865c0c9de9a..2307811a11683 100644 --- a/src/compute/src/memory_management/memory_manager.rs +++ b/src/compute/src/memory_management/memory_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,18 +14,18 @@ use std::sync::atomic::AtomicU64; use std::sync::Arc; -#[cfg(target_os = "linux")] -use std::time::Duration; use risingwave_batch::task::BatchManager; #[cfg(target_os = "linux")] use risingwave_common::util::epoch::Epoch; use risingwave_stream::executor::monitor::StreamingMetrics; use risingwave_stream::task::LocalStreamManager; -#[cfg(target_os = "linux")] -use tikv_jemalloc_ctl::{epoch as jemalloc_epoch, stats as jemalloc_stats}; -#[cfg(target_os = "linux")] -use tracing; + +/// The minimal memory requirement of computing tasks in megabytes. +pub const MIN_COMPUTE_MEMORY_MB: usize = 512; +/// The memory reserved for system usage (stack and code segment of processes, allocation overhead, +/// network buffer, etc.) in megabytes. +pub const SYSTEM_RESERVED_MEMORY_MB: usize = 512; /// When `enable_managed_cache` is set, compute node will launch a [`GlobalMemoryManager`] to limit /// the memory usage. @@ -94,6 +94,9 @@ impl GlobalMemoryManager { _batch_mgr: Arc, _stream_mgr: Arc, ) { + use std::time::Duration; + + use tikv_jemalloc_ctl::{epoch as jemalloc_epoch, stats as jemalloc_stats}; let mem_threshold_graceful = (self.total_memory_available_bytes as f64 * Self::EVICTION_THRESHOLD_GRACEFUL) as usize; let mem_threshold_aggressive = (self.total_memory_available_bytes as f64 @@ -180,6 +183,9 @@ impl GlobalMemoryManager { self.metrics .jemalloc_allocated_bytes .set(cur_total_bytes_used as i64); + self.metrics + .stream_total_mem_usage + .set(_stream_mgr.get_total_mem_val().get()); self.set_watermark_time_ms(watermark_time_ms); } diff --git a/src/compute/src/memory_management/mod.rs b/src/compute/src/memory_management/mod.rs index c5c6c90b95a32..242dc4e8e67e9 100644 --- a/src/compute/src/memory_management/mod.rs +++ b/src/compute/src/memory_management/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/compute/src/rpc/mod.rs b/src/compute/src/rpc/mod.rs index e69b19cd2dbff..d74c1be710567 100644 --- a/src/compute/src/rpc/mod.rs +++ b/src/compute/src/rpc/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/compute/src/rpc/service/config_service.rs b/src/compute/src/rpc/service/config_service.rs index 1ea5994163c67..b191263bda090 100644 --- a/src/compute/src/rpc/service/config_service.rs +++ b/src/compute/src/rpc/service/config_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/compute/src/rpc/service/exchange_metrics.rs b/src/compute/src/rpc/service/exchange_metrics.rs index 9e3b04143d02f..7be8739eb0818 100644 --- a/src/compute/src/rpc/service/exchange_metrics.rs +++ b/src/compute/src/rpc/service/exchange_metrics.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,21 +17,12 @@ use prometheus::{register_int_counter_vec_with_registry, Registry}; pub struct ExchangeServiceMetrics { pub registry: Registry, - pub stream_exchange_bytes: GenericCounterVec, pub stream_fragment_exchange_bytes: GenericCounterVec, pub actor_sampled_serialize_duration_ns: GenericCounterVec, } impl ExchangeServiceMetrics { pub fn new(registry: Registry) -> Self { - let stream_exchange_bytes = register_int_counter_vec_with_registry!( - "stream_exchange_send_size", - "Total size of messages that have been send to downstream Actor", - &["up_actor_id", "down_actor_id"], - registry - ) - .unwrap(); - let stream_fragment_exchange_bytes = register_int_counter_vec_with_registry!( "stream_exchange_frag_send_size", "Total size of messages that have been send to downstream Fragment", @@ -50,7 +41,6 @@ impl ExchangeServiceMetrics { Self { registry, - stream_exchange_bytes, stream_fragment_exchange_bytes, actor_sampled_serialize_duration_ns, } diff --git a/src/compute/src/rpc/service/exchange_service.rs b/src/compute/src/rpc/service/exchange_service.rs index dede714e74ea7..f6b6cabd7e91a 100644 --- a/src/compute/src/rpc/service/exchange_service.rs +++ b/src/compute/src/rpc/service/exchange_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -140,7 +140,6 @@ impl ExchangeServiceImpl { ) { tracing::trace!(target: "events::compute::exchange", peer_addr = %peer_addr, "serve stream exchange RPC"); let up_actor_id = up_down_actor_ids.0.to_string(); - let down_actor_id = up_down_actor_ids.1.to_string(); let up_fragment_id = up_down_fragment_ids.0.to_string(); let down_fragment_id = up_down_fragment_ids.1.to_string(); @@ -189,10 +188,6 @@ impl ExchangeServiceImpl { yield response; - metrics - .stream_exchange_bytes - .with_label_values(&[&up_actor_id, &down_actor_id]) - .inc_by(bytes as u64); metrics .stream_fragment_exchange_bytes .with_label_values(&[&up_fragment_id, &down_fragment_id]) diff --git a/src/compute/src/rpc/service/health_service.rs b/src/compute/src/rpc/service/health_service.rs index c5164a9633a75..5ff1f4a85f8c6 100644 --- a/src/compute/src/rpc/service/health_service.rs +++ b/src/compute/src/rpc/service/health_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/compute/src/rpc/service/mod.rs b/src/compute/src/rpc/service/mod.rs index fa3cd3b1bea5e..12a16266b1793 100644 --- a/src/compute/src/rpc/service/mod.rs +++ b/src/compute/src/rpc/service/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/compute/src/rpc/service/monitor_service.rs b/src/compute/src/rpc/service/monitor_service.rs index fbc90179e024b..f4712d344920a 100644 --- a/src/compute/src/rpc/service/monitor_service.rs +++ b/src/compute/src/rpc/service/monitor_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/compute/src/rpc/service/stream_service.rs b/src/compute/src/rpc/service/stream_service.rs index 825ff0e91ef70..e787e65d0f944 100644 --- a/src/compute/src/rpc/service/stream_service.rs +++ b/src/compute/src/rpc/service/stream_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/compute/src/server.rs b/src/compute/src/server.rs index ed815b99f5e4e..3fc6b3b4e179b 100644 --- a/src/compute/src/server.rs +++ b/src/compute/src/server.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,13 +17,19 @@ use std::sync::Arc; use std::time::Duration; use async_stack_trace::StackTraceManager; -use risingwave_batch::executor::BatchTaskMetrics; +use pretty_bytes::converter::convert; +use risingwave_batch::executor::{BatchManagerMetrics, BatchTaskMetrics}; use risingwave_batch::rpc::service::task_service::BatchServiceImpl; use risingwave_batch::task::{BatchEnvironment, BatchManager}; -use risingwave_common::config::{load_config, MAX_CONNECTION_WINDOW_SIZE, STREAM_WINDOW_SIZE}; +use risingwave_common::config::{ + load_config, AsyncStackTraceOption, StorageConfig, MAX_CONNECTION_WINDOW_SIZE, + STREAM_WINDOW_SIZE, +}; use risingwave_common::monitor::process_linux::monitor_process; use risingwave_common::util::addr::HostAddr; +use risingwave_common::{GIT_SHA, RW_VERSION}; use risingwave_common_service::metrics_manager::MetricsManager; +use risingwave_connector::source::monitor::SourceMetrics; use risingwave_hummock_sdk::compact::CompactorRuntimeConfig; use risingwave_pb::common::WorkerType; use risingwave_pb::compute::config_service_server::ConfigServiceServer; @@ -34,10 +40,7 @@ use risingwave_pb::task_service::exchange_service_server::ExchangeServiceServer; use risingwave_pb::task_service::task_service_server::TaskServiceServer; use risingwave_rpc_client::{ComputeClientPool, ExtraInfoSourceRef, MetaClient}; use risingwave_source::dml_manager::DmlManager; -use risingwave_source::monitor::SourceMetrics; -use risingwave_storage::hummock::compactor::{ - CompactionExecutor, Compactor, CompactorContext, Context, -}; +use risingwave_storage::hummock::compactor::{CompactionExecutor, Compactor, CompactorContext}; use risingwave_storage::hummock::hummock_meta_client::MonitoredHummockMetaClient; use risingwave_storage::hummock::{ HummockMemoryCollector, MemoryLimiter, TieredCacheMetricsBuilder, @@ -46,13 +49,16 @@ use risingwave_storage::monitor::{ monitor_cache, CompactorMetrics, HummockMetrics, HummockStateStoreMetrics, MonitoredStorageMetrics, ObjectStoreMetrics, }; +use risingwave_storage::opts::StorageOpts; use risingwave_storage::StateStoreImpl; use risingwave_stream::executor::monitor::StreamingMetrics; use risingwave_stream::task::{LocalStreamManager, StreamEnvironment}; use tokio::sync::oneshot::Sender; use tokio::task::JoinHandle; -use crate::memory_management::memory_manager::GlobalMemoryManager; +use crate::memory_management::memory_manager::{ + GlobalMemoryManager, MIN_COMPUTE_MEMORY_MB, SYSTEM_RESERVED_MEMORY_MB, +}; use crate::rpc::service::config_service::ConfigServiceImpl; use crate::rpc::service::exchange_metrics::ExchangeServiceMetrics; use crate::rpc::service::exchange_service::ExchangeServiceImpl; @@ -61,35 +67,51 @@ use crate::rpc::service::monitor_service::{ GrpcStackTraceManagerRef, MonitorServiceImpl, StackTraceMiddlewareLayer, }; use crate::rpc::service::stream_service::StreamServiceImpl; -use crate::{AsyncStackTraceOption, ComputeNodeOpts}; +use crate::ComputeNodeOpts; /// Bootstraps the compute-node. pub async fn compute_node_serve( listen_addr: SocketAddr, - client_addr: HostAddr, + advertise_addr: HostAddr, opts: ComputeNodeOpts, ) -> (Vec>, Sender<()>) { // Load the configuration. - let config = load_config(&opts.config_path); + let config = load_config(&opts.config_path, Some(opts.override_config)); + info!("Starting compute node",); + info!("> config: {:?}", config); info!( - "Starting compute node with config {:?} with debug assertions {}", - config, + "> debug assertions: {}", if cfg!(debug_assertions) { "on" } else { "off" } ); + info!("> version: {} ({})", RW_VERSION, GIT_SHA); + // Initialize all the configs - let storage_config = Arc::new(config.storage.clone()); let stream_config = Arc::new(config.streaming.clone()); let batch_config = Arc::new(config.batch.clone()); // Register to the cluster. We're not ready to serve until activate is called. - let meta_client = MetaClient::register_new( + let (meta_client, system_params) = MetaClient::register_new( &opts.meta_address, WorkerType::ComputeNode, - &client_addr, + &advertise_addr, opts.parallelism, ) .await .unwrap(); + let storage_opts = Arc::new(StorageOpts::from((&config, &system_params))); + + let state_store_url = { + let from_local = opts.state_store.unwrap_or("hummock+memory".to_string()); + system_params.state_store(from_local) + }; + + let embedded_compactor_enabled = + embedded_compactor_enabled(&state_store_url, config.storage.disable_remote_compactor); + validate_compute_node_memory_config( + opts.total_memory_bytes, + embedded_compactor_enabled, + &config.storage, + ); let worker_id = meta_client.worker_id(); info!("Assigned worker node id {}", worker_id); @@ -102,6 +124,7 @@ pub async fn compute_node_serve( let hummock_metrics = Arc::new(HummockMetrics::new(registry.clone())); let streaming_metrics = Arc::new(StreamingMetrics::new(registry.clone())); let batch_task_metrics = Arc::new(BatchTaskMetrics::new(registry.clone())); + let batch_manager_metrics = BatchManagerMetrics::new(registry.clone()); let exchange_srv_metrics = Arc::new(ExchangeServiceMetrics::new(registry.clone())); // Initialize state store. @@ -118,14 +141,13 @@ pub async fn compute_node_serve( let mut join_handle_vec = vec![]; let state_store = StateStoreImpl::new( - &opts.state_store, - &opts.file_cache_dir, - &config, + &state_store_url, + storage_opts.clone(), hummock_meta_client.clone(), state_store_metrics.clone(), object_store_metrics, TieredCacheMetricsBuilder::new(registry.clone()), - if opts.enable_jaeger_tracing { + if config.streaming.enable_jaeger_tracing { Arc::new( risingwave_tracing::RwTracingService::new(risingwave_tracing::TracingConfig::new( "127.0.0.1:6831".to_string(), @@ -144,18 +166,13 @@ pub async fn compute_node_serve( let mut extra_info_sources: Vec = vec![]; if let Some(storage) = state_store.as_hummock_trait() { extra_info_sources.push(storage.sstable_id_manager().clone()); - // Note: we treat `hummock+memory-shared` as a shared storage, so we won't start the - // compactor along with compute node. - if opts.state_store == "hummock+memory" - || opts.state_store.starts_with("hummock+disk") - || storage_config.disable_remote_compactor - { + if embedded_compactor_enabled { tracing::info!("start embedded compactor"); let read_memory_limiter = Arc::new(MemoryLimiter::new( - storage_config.compactor_memory_limit_mb as u64 * 1024 * 1024 / 2, + storage_opts.compactor_memory_limit_mb as u64 * 1024 * 1024 / 2, )); - let context = Arc::new(Context { - options: storage_config, + let compactor_context = Arc::new(CompactorContext { + storage_opts, hummock_meta_client: hummock_meta_client.clone(), sstable_store: storage.sstable_store(), compactor_metrics: compactor_metrics.clone(), @@ -165,13 +182,12 @@ pub async fn compute_node_serve( read_memory_limiter, sstable_id_manager: storage.sstable_id_manager().clone(), task_progress_manager: Default::default(), + compactor_runtime_config: Arc::new(tokio::sync::Mutex::new( + CompactorRuntimeConfig { + max_concurrent_task_number: 1, + }, + )), }); - let compactor_context = Arc::new(CompactorContext::with_config( - context, - CompactorRuntimeConfig { - max_concurrent_task_number: 1, - }, - )); let (handle, shutdown_sender) = Compactor::start_compactor(compactor_context, hummock_meta_client); @@ -192,7 +208,7 @@ pub async fn compute_node_serve( extra_info_sources, )); - let async_stack_trace_config = match opts.async_stack_trace { + let async_stack_trace_config = match &config.streaming.async_stack_trace { AsyncStackTraceOption::Off => None, c => Some(async_stack_trace::TraceConfig { report_detached: true, @@ -202,9 +218,12 @@ pub async fn compute_node_serve( }; // Initialize the managers. - let batch_mgr = Arc::new(BatchManager::new(config.batch.clone())); + let batch_mgr = Arc::new(BatchManager::new( + config.batch.clone(), + batch_manager_metrics, + )); let stream_mgr = Arc::new(LocalStreamManager::new( - client_addr.clone(), + advertise_addr.clone(), state_store.clone(), streaming_metrics.clone(), config.streaming.clone(), @@ -216,7 +235,7 @@ pub async fn compute_node_serve( let stream_mgr_clone = stream_mgr.clone(); let mgr = GlobalMemoryManager::new( opts.total_memory_bytes, - config.streaming.barrier_interval_ms, + system_params.barrier_interval_ms(), streaming_metrics.clone(), ); // Run a background memory monitor @@ -235,7 +254,7 @@ pub async fn compute_node_serve( let client_pool = Arc::new(ComputeClientPool::new(config.server.connection_pool_size)); let batch_env = BatchEnvironment::new( batch_mgr.clone(), - client_addr.clone(), + advertise_addr.clone(), batch_config, worker_id, state_store.clone(), @@ -250,7 +269,7 @@ pub async fn compute_node_serve( }; // Initialize the streaming environment. let stream_env = StreamEnvironment::new( - client_addr.clone(), + advertise_addr.clone(), connector_params, stream_config, worker_id, @@ -314,7 +333,7 @@ pub async fn compute_node_serve( join_handle_vec.push(join_handle); // Boot metrics service. - if opts.metrics_level > 0 { + if config.server.metrics_level > 0 { MetricsManager::boot_metrics_service( opts.prometheus_listener_addr.clone(), registry.clone(), @@ -322,7 +341,52 @@ pub async fn compute_node_serve( } // All set, let the meta service know we're ready. - meta_client.activate(&client_addr).await.unwrap(); + meta_client.activate(&advertise_addr).await.unwrap(); (join_handle_vec, shutdown_send) } + +/// Check whether the compute node has enough memory to perform computing tasks. Apart from storage, +/// it must reserve at least `MIN_COMPUTE_MEMORY_MB` for computing and `SYSTEM_RESERVED_MEMORY_MB` +/// for other system usage. Otherwise, it is not allowed to start. +fn validate_compute_node_memory_config( + cn_total_memory_bytes: usize, + embedded_compactor_enabled: bool, + storage_config: &StorageConfig, +) { + let storage_memory_mb = { + let total_memory = storage_config.block_cache_capacity_mb + + storage_config.meta_cache_capacity_mb + + storage_config.shared_buffer_capacity_mb + + storage_config.file_cache.total_buffer_capacity_mb; + if embedded_compactor_enabled { + total_memory + storage_config.compactor_memory_limit_mb + } else { + total_memory + } + }; + if storage_memory_mb << 20 > cn_total_memory_bytes { + panic!( + "The storage memory exceeds the total compute node memory:\nTotal compute node memory: {}\nStorage memory: {}\nAt least 1 GB memory should be reserved apart from the storage memory. Please increase the total compute node memory or decrease the storage memory in configurations and restart the compute node.", + convert(cn_total_memory_bytes as _), + convert((storage_memory_mb << 20) as _) + ); + } else if (storage_memory_mb + MIN_COMPUTE_MEMORY_MB + SYSTEM_RESERVED_MEMORY_MB) << 20 + >= cn_total_memory_bytes + { + panic!( + "No enough memory for computing and other system usage:\nTotal compute node memory: {}\nStorage memory: {}\nAt least 1 GB memory should be reserved apart from the storage memory. Please increase the total compute node memory or decrease the storage memory in configurations and restart the compute node.", + convert(cn_total_memory_bytes as _), + convert((storage_memory_mb << 20) as _) + ); + } +} + +/// Checks whether an embedded compactor starts with a compute node. +fn embedded_compactor_enabled(state_store_url: &str, disable_remote_compactor: bool) -> bool { + // We treat `hummock+memory-shared` as a shared storage, so we won't start the compactor + // along with the compute node. + state_store_url == "hummock+memory" + || state_store_url.starts_with("hummock+disk") + || disable_remote_compactor +} diff --git a/src/compute/tests/integration_tests.rs b/src/compute/tests/integration_tests.rs index eae0c541c2bdd..690188a81e563 100644 --- a/src/compute/tests/integration_tests.rs +++ b/src/compute/tests/integration_tests.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -28,13 +28,16 @@ use risingwave_batch::executor::{ }; use risingwave_common::array::{Array, DataChunk, F64Array, I64Array}; use risingwave_common::buffer::Bitmap; -use risingwave_common::catalog::{ColumnDesc, ColumnId, Field, Schema, TableId}; +use risingwave_common::catalog::{ + ColumnDesc, ColumnId, Field, Schema, TableId, INITIAL_TABLE_VERSION_ID, +}; use risingwave_common::column_nonnull; use risingwave_common::error::{Result, RwError}; use risingwave_common::row::OwnedRow; use risingwave_common::test_prelude::DataChunkTestExt; use risingwave_common::types::{DataType, IntoOrdered}; use risingwave_common::util::epoch::EpochPair; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::sort_util::{OrderPair, OrderType}; use risingwave_hummock_sdk::to_committed_batch_query_epoch; use risingwave_pb::catalog::StreamSourceInfo; @@ -49,7 +52,7 @@ use risingwave_stream::error::StreamResult; use risingwave_stream::executor::dml::DmlExecutor; use risingwave_stream::executor::monitor::StreamingMetrics; use risingwave_stream::executor::row_id_gen::RowIdGenExecutor; -use risingwave_stream::executor::source_executor_v2::SourceExecutorV2; +use risingwave_stream::executor::source_executor::SourceExecutor; use risingwave_stream::executor::{ ActorContext, Barrier, Executor, MaterializeExecutor, Message, PkIndices, }; @@ -147,7 +150,7 @@ async fn test_table_materialize() -> StreamResult<()> { let pk_indices = PkIndices::from([0]); let column_descs = all_column_ids .iter() - .zip_eq(all_schema.fields.iter().cloned()) + .zip_eq_fast(all_schema.fields.iter().cloned()) .map(|(column_id, field)| ColumnDesc { data_type: field.data_type, column_id: *column_id, @@ -162,7 +165,7 @@ async fn test_table_materialize() -> StreamResult<()> { let actor_ctx = ActorContext::create(0x3f3f3f); // Create a `SourceExecutor` to read the changes. - let source_executor = SourceExecutorV2::::new( + let source_executor = SourceExecutor::::new( actor_ctx.clone(), all_schema.clone(), pk_indices.clone(), @@ -181,6 +184,7 @@ async fn test_table_materialize() -> StreamResult<()> { 2, dml_manager.clone(), table_id, + INITIAL_TABLE_VERSION_ID, column_descs.clone(), ); @@ -225,6 +229,7 @@ async fn test_table_materialize() -> StreamResult<()> { )); let insert = Box::new(InsertExecutor::new( table_id, + INITIAL_TABLE_VERSION_ID, dml_manager.clone(), insert_inner, 1024, @@ -345,6 +350,7 @@ async fn test_table_materialize() -> StreamResult<()> { let delete_inner: BoxedExecutor = Box::new(SingleChunkExecutor::new(chunk, all_schema.clone())); let delete = Box::new(DeleteExecutor::new( table_id, + INITIAL_TABLE_VERSION_ID, dml_manager.clone(), delete_inner, 1024, diff --git a/src/config/ci-iceberg-test.toml b/src/config/ci-iceberg-test.toml new file mode 100644 index 0000000000000..a7a5db0a603fd --- /dev/null +++ b/src/config/ci-iceberg-test.toml @@ -0,0 +1,9 @@ +[meta] +disable_recovery = true +enable_committed_sst_sanity_check = true +max_heartbeat_interval_secs = 600 + +[streaming] +barrier_interval_ms = 5000 +in_flight_barrier_nums = 10 +checkpoint_frequency = 1 \ No newline at end of file diff --git a/src/connector/Cargo.toml b/src/connector/Cargo.toml index 96a4bdf63b164..4c4196df2f15b 100644 --- a/src/connector/Cargo.toml +++ b/src/connector/Cargo.toml @@ -7,65 +7,56 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = "1" -apache-avro = { git = "https://github.com/risingwavelabs/avro", branch = "master", features = ["snappy", "zstandard", "bzip", "xz"] } -async-stream = "0.3" +apache-avro = { git = "https://github.com/risingwavelabs/avro", branch = "waruto/modify-decimal", features = ["snappy", "zstandard", "bzip", "xz"] } async-trait = "0.1" aws-config = { workspace = true } aws-sdk-kinesis = { workspace = true } aws-sdk-s3 = { workspace = true } -aws-sdk-sqs = { workspace = true } aws-smithy-http = { workspace = true } -aws-smithy-types = { workspace = true } aws-types = { workspace = true } byteorder = "1" bytes = { version = "1", features = ["serde"] } chrono = { version = "0.4", default-features = false, features = ["clock", "std"] } -crc32fast = "1" csv-core = "0.1.10" -either = "1" +duration-str = "0.5.0" enum-as-inner = "0.5" -farmhash = "1" futures = { version = "0.3", default-features = false, features = ["alloc"] } futures-async-stream = "0.2" -futures-concurrency = "3" globset = "0.4.8" -google-cloud-googleapis = { version = "0.6.0", features = ["pubsub"] } google-cloud-pubsub = "0.7.0" http = "0.2" http-serde = "1.1.0" -humantime = "2.1" hyper = "0.14" itertools = "0.10" maplit = "1.0.2" moka = { version = "0.9", features = ["future"] } -mysql_async = "0.31" nexmark = { version = "0.2", features = ["serde"] } num-traits = "0.2" -paste = "1" +prometheus = { version = "0.13", features = ["process"] } prost = { version = "0.11.0", features = ["no-recursion-limit"] } prost-reflect = "0.9.2" protobuf-native = "0.2.1" pulsar = { version = "4.2", default-features = false, features = ["tokio-runtime"] } -rand = "0.8" -rdkafka = { package = "madsim-rdkafka", version = "=0.2.13-alpha", features = ["cmake-build", "ssl-vendored", "gssapi"] } +rdkafka = { package = "madsim-rdkafka", version = "=0.2.14-alpha", features = ["cmake-build", "ssl-vendored", "gssapi"] } reqwest = { version = "0.11", features = ["json"] } risingwave_common = { path = "../common" } risingwave_expr = { path = "../expr" } risingwave_pb = { path = "../prost" } risingwave_rpc_client = { path = "../rpc_client" } -risingwave_storage = { path = "../storage" } +rust_decimal = "1" serde = { version = "1", features = ["derive", "rc"] } serde_derive = "1" serde_json = "1" serde_with = "2" simd-json = { git = "https://github.com/tabVersion/simd-json.git", branch = "main", features = ["key-to-lowercase"] } -smallvec = "1" -static_assertions = "1" -strum = { version = "0.24", features = ["derive"] } -strum_macros = "0.24" -tempfile = "3" thiserror = "1" tokio = { version = "0.2", package = "madsim-tokio", features = ["rt", "rt-multi-thread", "sync", "macros", "time", "signal", "fs"] } tokio-retry = "0.3" @@ -73,7 +64,6 @@ tokio-stream = "0.1" tokio-util = { version = "0.7", features = ["codec", "io"] } tonic = { version = "0.2", package = "madsim-tonic" } tracing = "0.1" -twox-hash = "1" url = "2" urlencoding = "2" @@ -82,6 +72,5 @@ workspace-hack = { path = "../workspace-hack" } [dev-dependencies] rand = "0.8" -rust_decimal = "1" tempfile = "3" wiremock = "0.5" diff --git a/src/connector/src/aws_utils.rs b/src/connector/src/aws_utils.rs index f3852c19f4659..aef2794ae4d03 100644 --- a/src/connector/src/aws_utils.rs +++ b/src/connector/src/aws_utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/common.rs b/src/connector/src/common.rs new file mode 100644 index 0000000000000..a27db4c043f9f --- /dev/null +++ b/src/connector/src/common.rs @@ -0,0 +1,201 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use aws_sdk_kinesis::Client as KinesisClient; +use http::Uri; +use rdkafka::ClientConfig; +use serde_derive::{Deserialize, Serialize}; + +use crate::source::kinesis::config::AwsConfigInfo; + +// The file describes the common abstractions for each connector and can be used in both source and +// sink. + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct KafkaCommon { + #[serde(rename = "properties.bootstrap.server", alias = "kafka.brokers")] + pub brokers: String, + + #[serde(rename = "topic", alias = "kafka.topic")] + pub topic: String, + + /// Security protocol used for RisingWave to communicate with Kafka brokers. Could be + /// PLAINTEXT, SSL, SASL_PLAINTEXT or SASL_SSL. + #[serde(rename = "properties.security.protocol")] + security_protocol: Option, + + // For the properties below, please refer to [librdkafka](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md) for more information. + /// Path to CA certificate file for verifying the broker's key. + #[serde(rename = "properties.ssl.ca.location")] + ssl_ca_location: Option, + + /// Path to client's certificate file (PEM). + #[serde(rename = "properties.ssl.certificate.location")] + ssl_certificate_location: Option, + + /// Path to client's private key file (PEM). + #[serde(rename = "properties.ssl.key.location")] + ssl_key_location: Option, + + /// Passphrase of client's private key. + #[serde(rename = "properties.ssl.key.password")] + ssl_key_password: Option, + + /// SASL mechanism if SASL is enabled. Currently support PLAIN, SCRAM and GSSAPI. + #[serde(rename = "properties.sasl.mechanism")] + sasl_mechanism: Option, + + /// SASL username for SASL/PLAIN and SASL/SCRAM. + #[serde(rename = "properties.sasl.username")] + sasl_username: Option, + + /// SASL password for SASL/PLAIN and SASL/SCRAM. + #[serde(rename = "properties.sasl.password")] + sasl_password: Option, + + /// Kafka server's Kerberos principal name under SASL/GSSAPI, not including /hostname@REALM. + #[serde(rename = "properties.sasl.kerberos.service.name")] + sasl_kerberos_service_name: Option, + + /// Path to client's Kerberos keytab file under SASL/GSSAPI. + #[serde(rename = "properties.sasl.kerberos.keytab")] + sasl_kerberos_keytab: Option, + + /// Client's Kerberos principal name under SASL/GSSAPI. + #[serde(rename = "properties.sasl.kerberos.principal")] + sasl_kerberos_principal: Option, + + /// Shell command to refresh or acquire the client's Kerberos ticket under SASL/GSSAPI. + #[serde(rename = "properties.sasl.kerberos.kinit.cmd")] + sasl_kerberos_kinit_cmd: Option, + + /// Minimum time in milliseconds between key refresh attempts under SASL/GSSAPI. + #[serde(rename = "properties.sasl.kerberos.min.time.before.relogin")] + sasl_kerberos_min_time_before_relogin: Option, + + /// Configurations for SASL/OAUTHBEARER. + #[serde(rename = "properties.sasl.oauthbearer.config")] + sasl_oathbearer_config: Option, +} + +impl KafkaCommon { + pub(crate) fn set_security_properties(&self, config: &mut ClientConfig) { + // Security protocol + if let Some(security_protocol) = self.security_protocol.as_ref() { + config.set("security.protocol", security_protocol); + } + + // SSL + if let Some(ssl_ca_location) = self.ssl_ca_location.as_ref() { + config.set("ssl.ca.location", ssl_ca_location); + } + if let Some(ssl_certificate_location) = self.ssl_certificate_location.as_ref() { + config.set("ssl.certificate.location", ssl_certificate_location); + } + if let Some(ssl_key_location) = self.ssl_key_location.as_ref() { + config.set("ssl.key.location", ssl_key_location); + } + if let Some(ssl_key_password) = self.ssl_key_password.as_ref() { + config.set("ssl.key.password", ssl_key_password); + } + + // SASL mechanism + if let Some(sasl_mechanism) = self.sasl_mechanism.as_ref() { + config.set("sasl.mechanism", sasl_mechanism); + } + + // SASL/PLAIN & SASL/SCRAM + if let Some(sasl_username) = self.sasl_username.as_ref() { + config.set("sasl.username", sasl_username); + } + if let Some(sasl_password) = self.sasl_password.as_ref() { + config.set("sasl.password", sasl_password); + } + + // SASL/GSSAPI + if let Some(sasl_kerberos_service_name) = self.sasl_kerberos_service_name.as_ref() { + config.set("sasl.kerberos.service.name", sasl_kerberos_service_name); + } + if let Some(sasl_kerberos_keytab) = self.sasl_kerberos_keytab.as_ref() { + config.set("sasl.kerberos.keytab", sasl_kerberos_keytab); + } + if let Some(sasl_kerberos_principal) = self.sasl_kerberos_principal.as_ref() { + config.set("sasl.kerberos.principal", sasl_kerberos_principal); + } + if let Some(sasl_kerberos_kinit_cmd) = self.sasl_kerberos_kinit_cmd.as_ref() { + config.set("sasl.kerberos.kinit.cmd", sasl_kerberos_kinit_cmd); + } + if let Some(sasl_kerberos_min_time_before_relogin) = + self.sasl_kerberos_min_time_before_relogin.as_ref() + { + config.set( + "sasl.kerberos.min.time.before.relogin", + sasl_kerberos_min_time_before_relogin, + ); + } + + // SASL/OAUTHBEARER + if let Some(sasl_oathbearer_config) = self.sasl_oathbearer_config.as_ref() { + config.set("sasl.oauthbearer.config", sasl_oathbearer_config); + } + // Currently, we only support unsecured OAUTH. + config.set("enable.sasl.oauthbearer.unsecure.jwt", "true"); + } +} + +#[derive(Deserialize, Serialize, Debug, Clone)] +pub struct KinesisCommon { + #[serde(rename = "stream", alias = "kinesis.stream.name")] + pub stream_name: String, + #[serde(rename = "aws.region", alias = "kinesis.stream.region")] + pub stream_region: String, + #[serde(rename = "endpoint", alias = "kinesis.endpoint")] + pub endpoint: Option, + #[serde( + rename = "aws.credentials.access_key_id", + alias = "kinesis.credentials.access" + )] + pub credentials_access_key: Option, + #[serde( + rename = "aws.credentials.secret_access_key", + alias = "kinesis.credentials.secret" + )] + pub credentials_secret_access_key: Option, + #[serde( + rename = "aws.credentials.session_token", + alias = "kinesis.credentials.session_token" + )] + pub session_token: Option, + #[serde(rename = "aws.credentials.role.arn", alias = "kinesis.assumerole.arn")] + pub assume_role_arn: Option, + #[serde( + rename = "aws.credentials.role.external_id", + alias = "kinesis.assumerole.external_id" + )] + pub assume_role_external_id: Option, +} + +impl KinesisCommon { + pub(crate) async fn build_client(&self) -> anyhow::Result { + let config = AwsConfigInfo::build(self.clone())?; + let aws_config = config.load().await?; + let mut builder = aws_sdk_kinesis::config::Builder::from(&aws_config); + if let Some(endpoint) = &config.endpoint { + let uri = endpoint.clone().parse::().unwrap(); + builder = + builder.endpoint_resolver(aws_smithy_http::endpoint::Endpoint::immutable(uri)); + } + Ok(KinesisClient::from_conf(builder.build())) + } +} diff --git a/src/connector/src/error.rs b/src/connector/src/error.rs index 4bd07d02be858..23a24f3768ae9 100644 --- a/src/connector/src/error.rs +++ b/src/connector/src/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/lib.rs b/src/connector/src/lib.rs index d56c0727af95d..ea2a45df11e1d 100644 --- a/src/connector/src/lib.rs +++ b/src/connector/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,22 +27,21 @@ #![feature(box_into_inner)] #![feature(type_alias_impl_trait)] -use std::collections::HashMap; +use std::time::Duration; -use futures::stream::BoxStream; -use risingwave_common::array::StreamChunk; -use risingwave_common::error::RwError; -use source::SplitId; +use duration_str::parse_std; +use serde::de; pub mod aws_utils; pub mod error; mod macros; -mod manager; -pub use manager::SourceColumnDesc; + pub mod parser; pub mod sink; pub mod source; +pub mod common; + #[derive(Clone, Debug, Default)] pub struct ConnectorParams { pub connector_rpc_endpoint: Option, @@ -56,35 +55,31 @@ impl ConnectorParams { } } -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum SourceFormat { - Invalid, - Json, - Protobuf, - DebeziumJson, - Avro, - Maxwell, - CanalJson, - Csv, -} - -pub type BoxSourceWithStateStream = BoxStream<'static, Result>; - -/// [`StreamChunkWithState`] returns stream chunk together with offset for each split. In the -/// current design, one connector source can have multiple split reader. The keys are unique -/// `split_id` and values are the latest offset for each split. -#[derive(Clone, Debug)] -pub struct StreamChunkWithState { - pub chunk: StreamChunk, - pub split_offset_mapping: Option>, +pub(crate) fn deserialize_bool_from_string<'de, D>(deserializer: D) -> Result +where + D: de::Deserializer<'de>, +{ + let s: String = de::Deserialize::deserialize(deserializer)?; + let s = s.to_ascii_lowercase(); + match s.as_str() { + "true" => Ok(true), + "false" => Ok(false), + _ => Err(de::Error::invalid_value( + de::Unexpected::Str(&s), + &"true or false", + )), + } } -/// The `split_offset_mapping` field is unused for the table source, so we implement `From` for it. -impl From for StreamChunkWithState { - fn from(chunk: StreamChunk) -> Self { - Self { - chunk, - split_offset_mapping: None, - } - } +pub(crate) fn deserialize_duration_from_string<'de, D>( + deserializer: D, +) -> Result +where + D: de::Deserializer<'de>, +{ + let s: String = de::Deserialize::deserialize(deserializer)?; + parse_std(&s).map_err(|_| de::Error::invalid_value( + de::Unexpected::Str(&s), + &"The String value unit support for one of:[“y”,“mon”,“w”,“d”,“h”,“m”,“s”, “ms”, “µs”, “ns”]", + )) } diff --git a/src/connector/src/macros.rs b/src/connector/src/macros.rs index d5cecb12f03c4..f369bf1ef9845 100644 --- a/src/connector/src/macros.rs +++ b/src/connector/src/macros.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -106,24 +106,25 @@ macro_rules! impl_split { macro_rules! impl_split_reader { ($({ $variant_name:ident, $split_reader_name:ident} ),*) => { impl SplitReaderImpl { - pub fn into_stream(self) -> BoxSourceStream { + pub fn into_stream(self) -> BoxSourceWithStateStream { match self { - $( Self::$variant_name(inner) => $crate::source::SplitReader::into_stream(*inner), )* - } + $( Self::$variant_name(inner) => inner.into_stream(), )* } } pub async fn create( config: ConnectorProperties, state: ConnectorState, + parser_config: ParserConfig, + metrics: Arc, + source_info: SourceInfo, columns: Option>, ) -> Result { if state.is_none() { return Ok(Self::Dummy(Box::new(DummySplitReader {}))); } - + let splits = state.unwrap(); let connector = match config { - $( ConnectorProperties::$variant_name(props) => Self::$variant_name(Box::new(<$split_reader_name as $crate::source::SplitReader>::new(*props, state, columns).await?)), )* - _ => todo!() + $( ConnectorProperties::$variant_name(props) => Self::$variant_name(Box::new($split_reader_name::new(*props, splits, parser_config, metrics, source_info, columns).await?)), )* }; Ok(connector) @@ -158,3 +159,118 @@ macro_rules! impl_connector_properties { } } } + +#[macro_export] +macro_rules! impl_common_parser_logic { + ($parser_name:ty) => { + impl $parser_name { + #[try_stream(boxed, ok = $crate::source::StreamChunkWithState, error = RwError)] + async fn into_chunk_stream(self, data_stream: $crate::source::BoxSourceStream) { + #[for_await] + for batch in data_stream { + let batch = batch?; + let mut builder = + $crate::parser::SourceStreamChunkBuilder::with_capacity(self.rw_columns.clone(), batch.len()); + let mut split_offset_mapping: std::collections::HashMap<$crate::source::SplitId, String> = std::collections::HashMap::new(); + + for msg in batch { + if let Some(content) = msg.payload { + split_offset_mapping.insert(msg.split_id, msg.offset); + + let old_op_num = builder.op_num(); + + if let Err(e) = self.parse_inner(content.as_ref(), builder.row_writer()) + .await + { + tracing::warn!("message parsing failed {}, skipping", e.to_string()); + continue; + } + + let new_op_num = builder.op_num(); + + // new_op_num - old_op_num is the number of rows added to the builder + for _ in old_op_num..new_op_num { + // TODO: support more kinds of SourceMeta + if let $crate::source::SourceMeta::Kafka(kafka_meta) = msg.meta.clone() { + let f = |desc: &SourceColumnDesc| -> Option { + if !desc.is_meta { + return None; + } + match desc.name.as_str() { + "_rw_kafka_timestamp" => Some( + kafka_meta + .timestamp + .map(|ts| risingwave_expr::vector_op::cast::i64_to_timestamptz(ts).unwrap().into()), + ), + _ => unreachable!( + "kafka will not have this meta column: {}", + desc.name + ), + } + }; + builder.row_writer().fulfill_meta_column(f)?; + } + } + } + } + yield $crate::source::StreamChunkWithState { + chunk: builder.finish(), + split_offset_mapping: Some(split_offset_mapping), + }; + } + } + } + + impl $crate::parser::ByteStreamSourceParser for $parser_name { + fn into_stream(self, data_stream: $crate::source::BoxSourceStream) -> $crate::source::BoxSourceWithStateStream { + self.into_chunk_stream(data_stream) + } + } + + } +} + +#[macro_export] +macro_rules! impl_common_split_reader_logic { + ($reader:ty, $props:ty) => { + impl $reader { + #[try_stream(boxed, ok = $crate::source::StreamChunkWithState, error = risingwave_common::error::RwError)] + pub(crate) async fn into_chunk_stream(self) { + let parser_config = self.parser_config.clone(); + let actor_id = self.source_info.actor_id.to_string(); + let source_id = self.source_info.source_id.to_string(); + let split_id = self.split_id.clone(); + let metrics = self.metrics.clone(); + + let data_stream = self.into_data_stream(); + + let data_stream = data_stream + .map_ok(move |data_batch| { + metrics + .partition_input_count + .with_label_values(&[&actor_id, &source_id, &split_id]) + .inc_by(data_batch.len() as u64); + let sum_bytes = data_batch + .iter() + .map(|msg| match &msg.payload { + None => 0, + Some(payload) => payload.len() as u64, + }) + .sum(); + metrics + .partition_input_bytes + .with_label_values(&[&actor_id, &source_id, &split_id]) + .inc_by(sum_bytes); + data_batch + }) + .boxed(); + let parser = + $crate::parser::ByteStreamSourceParserImpl::create(parser_config)?; + #[for_await] + for msg_batch in parser.into_stream(data_stream) { + yield msg_batch?; + } + } + } + }; +} diff --git a/src/connector/src/parser/avro/mod.rs b/src/connector/src/parser/avro/mod.rs index c284b44bfdf00..77aba903ef447 100644 --- a/src/connector/src/parser/avro/mod.rs +++ b/src/connector/src/parser/avro/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ // limitations under the License. mod parser; -mod schema_resolver; +pub mod schema_resolver; +pub mod util; pub use parser::*; diff --git a/src/connector/src/parser/avro/parser.rs b/src/connector/src/parser/avro/parser.rs index 6d350231e48ee..25e41e2cca7a3 100644 --- a/src/connector/src/parser/avro/parser.rs +++ b/src/connector/src/parser/avro/parser.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,54 +14,56 @@ use std::collections::HashMap; use std::fmt::Debug; +use std::sync::Arc; use apache_avro::types::Value; use apache_avro::{from_avro_datum, Reader, Schema}; -use chrono::Datelike; -use itertools::Itertools; -use risingwave_common::array::{ListValue, StructValue}; +use futures_async_stream::try_stream; use risingwave_common::error::ErrorCode::{InternalError, ProtocolError}; use risingwave_common::error::{Result, RwError}; -use risingwave_common::types::{ - DataType, Datum, IntervalUnit, NaiveDateTimeWrapper, NaiveDateWrapper, OrderedF32, OrderedF64, - ScalarImpl, -}; use risingwave_pb::plan_common::ColumnDesc; use url::Url; use super::schema_resolver::*; +use super::util::{extract_inner_field_schema, from_avro_value}; +use crate::impl_common_parser_logic; +use crate::parser::avro::util::avro_field_to_column_desc; use crate::parser::schema_registry::{extract_schema_id, Client}; use crate::parser::util::get_kafka_topic; -use crate::parser::{ParseFuture, SourceParser, SourceStreamChunkRowWriter, WriteGuard}; +use crate::parser::{SourceStreamChunkRowWriter, WriteGuard}; +use crate::source::SourceColumnDesc; -fn unix_epoch_days() -> i32 { - NaiveDateWrapper::from_ymd_uncheck(1970, 1, 1) - .0 - .num_days_from_ce() -} +impl_common_parser_logic!(AvroParser); #[derive(Debug)] pub struct AvroParser { - schema: Schema, - schema_resolver: Option, + schema: Arc, + schema_resolver: Option>, + rw_columns: Vec, } -// confluent_wire_format, kafka only, subject-name: "${topic-name}-value" -impl AvroParser { + +#[derive(Debug, Clone)] +pub struct AvroParserConfig { + pub schema: Arc, + pub schema_resolver: Option>, +} + +impl AvroParserConfig { pub async fn new( + props: &HashMap, schema_location: &str, use_schema_registry: bool, - props: HashMap, ) -> Result { let url = Url::parse(schema_location).map_err(|e| { InternalError(format!("failed to parse url ({}): {}", schema_location, e)) })?; let (schema, schema_resolver) = if use_schema_registry { - let kafka_topic = get_kafka_topic(&props)?; - let client = Client::new(url, &props)?; + let kafka_topic = get_kafka_topic(props)?; + let client = Client::new(url, props)?; let (schema, resolver) = ConfluentSchemaResolver::new(format!("{}-value", kafka_topic).as_str(), client) .await?; - (schema, Some(resolver)) + (Arc::new(schema), Some(Arc::new(resolver))) } else { let schema_content = match url.scheme() { "file" => read_schema_from_local(url.path()), @@ -75,7 +77,7 @@ impl AvroParser { let schema = Schema::parse_str(&schema_content).map_err(|e| { RwError::from(InternalError(format!("Avro schema parse error {}", e))) })?; - (schema, None) + (Arc::new(schema), None) }; Ok(Self { schema, @@ -85,15 +87,12 @@ impl AvroParser { pub fn map_to_columns(&self) -> Result> { // there must be a record at top level - if let Schema::Record { fields, .. } = &self.schema { + if let Schema::Record { fields, .. } = self.schema.as_ref() { let mut index = 0; let fields = fields .iter() - .map(|field| { - Self::avro_field_to_column_desc(&field.name, &field.schema, &mut index) - }) + .map(|field| avro_field_to_column_desc(&field.name, &field.schema, &mut index)) .collect::>>()?; - tracing::info!("fields is {:?}", fields); Ok(fields) } else { Err(RwError::from(InternalError( @@ -101,83 +100,23 @@ impl AvroParser { ))) } } +} - fn avro_field_to_column_desc( - name: &str, - schema: &Schema, - index: &mut i32, - ) -> Result { - let data_type = Self::avro_type_mapping(schema)?; - match schema { - Schema::Record { - name: schema_name, - fields, - .. - } => { - let vec_column = fields - .iter() - .map(|f| Self::avro_field_to_column_desc(&f.name, &f.schema, index)) - .collect::>>()?; - *index += 1; - Ok(ColumnDesc { - column_type: Some(data_type.to_protobuf()), - column_id: *index, - name: name.to_owned(), - field_descs: vec_column, - type_name: schema_name.to_string(), - }) - } - _ => { - *index += 1; - Ok(ColumnDesc { - column_type: Some(data_type.to_protobuf()), - column_id: *index, - name: name.to_owned(), - ..Default::default() - }) - } - } - } - - fn avro_type_mapping(schema: &Schema) -> Result { - let data_type = match schema { - Schema::String => DataType::Varchar, - Schema::Int => DataType::Int32, - Schema::Long => DataType::Int64, - Schema::Boolean => DataType::Boolean, - Schema::Float => DataType::Float32, - Schema::Double => DataType::Float64, - Schema::Date => DataType::Date, - Schema::TimestampMillis => DataType::Timestamp, - Schema::TimestampMicros => DataType::Timestamp, - Schema::Duration => DataType::Interval, - Schema::Enum { .. } => DataType::Varchar, - Schema::Record { fields, .. } => { - let struct_fields = fields - .iter() - .map(|f| Self::avro_type_mapping(&f.schema)) - .collect::>>()?; - let struct_names = fields.iter().map(|f| f.name.clone()).collect_vec(); - DataType::new_struct(struct_fields, struct_names) - } - Schema::Array(item_schema) => { - let item_type = Self::avro_type_mapping(item_schema.as_ref())?; - DataType::List { - datatype: Box::new(item_type), - } - } - _ => { - return Err(RwError::from(InternalError(format!( - "unsupported type in Avro: {:?}", - schema - )))); - } - }; - - Ok(data_type) +// confluent_wire_format, kafka only, subject-name: "${topic-name}-value" +impl AvroParser { + pub fn new(rw_columns: Vec, config: AvroParserConfig) -> Result { + let AvroParserConfig { + schema, + schema_resolver, + } = config; + Ok(Self { + schema, + schema_resolver, + rw_columns, + }) } - async fn parse_inner( + pub(crate) async fn parse_inner( &self, payload: &[u8], mut writer: SourceStreamChunkRowWriter<'_>, @@ -198,15 +137,25 @@ impl AvroParser { None => { return Err(RwError::from(ProtocolError( "avro parse unexpected eof".to_string(), - ))) + ))); } } }; - // parse the valur to rw value + + // parse the value to rw value if let Value::Record(fields) = avro_value { writer.insert(|column| { - let tuple = fields.iter().find(|val| column.name.eq(&val.0)).unwrap(); - from_avro_value(tuple.1.clone()).map_err(|e| { + let tuple = fields + .iter() + .find(|val| column.name.eq(&val.0)) + .ok_or_else(|| { + RwError::from(InternalError(format!( + "no field named {} in avro msg", + column.name + ))) + })?; + let field_schema = extract_inner_field_schema(&self.schema, Some(&column.name))?; + from_avro_value(tuple.1.clone(), field_schema).map_err(|e| { tracing::error!( "failed to process value ({}): {}", String::from_utf8_lossy(payload), @@ -223,102 +172,6 @@ impl AvroParser { } } -/// Convert Avro value to datum.For now, support the following [Avro type](https://avro.apache.org/docs/current/spec.html). -/// - boolean -/// - int : i32 -/// - long: i64 -/// - float: f32 -/// - double: f64 -/// - string: String -/// - Date (the number of days from the unix epoch, 1970-1-1 UTC) -/// - Timestamp (the number of milliseconds from the unix epoch, 1970-1-1 00:00:00.000 UTC) -#[inline] -fn from_avro_value(value: Value) -> Result { - let v = match value { - Value::Boolean(b) => ScalarImpl::Bool(b), - Value::String(s) => ScalarImpl::Utf8(s.into_boxed_str()), - Value::Int(i) => ScalarImpl::Int32(i), - Value::Long(i) => ScalarImpl::Int64(i), - Value::Float(f) => ScalarImpl::Float32(OrderedF32::from(f)), - Value::Double(f) => ScalarImpl::Float64(OrderedF64::from(f)), - Value::Date(days) => ScalarImpl::NaiveDate( - NaiveDateWrapper::with_days(days + unix_epoch_days()).map_err(|e| { - let err_msg = format!("avro parse error.wrong date value {}, err {:?}", days, e); - RwError::from(InternalError(err_msg)) - })?, - ), - Value::TimestampMillis(millis) => ScalarImpl::NaiveDateTime( - NaiveDateTimeWrapper::with_secs_nsecs( - millis / 1_000, - (millis % 1_000) as u32 * 1_000_000, - ) - .map_err(|e| { - let err_msg = format!( - "avro parse error.wrong timestamp millis value {}, err {:?}", - millis, e - ); - RwError::from(InternalError(err_msg)) - })?, - ), - Value::TimestampMicros(micros) => ScalarImpl::NaiveDateTime( - NaiveDateTimeWrapper::with_secs_nsecs( - micros / 1_000_000, - (micros % 1_000_000) as u32 * 1_000, - ) - .map_err(|e| { - let err_msg = format!( - "avro parse error.wrong timestamp micros value {}, err {:?}", - micros, e - ); - RwError::from(InternalError(err_msg)) - })?, - ), - Value::Duration(duration) => { - let months = u32::from(duration.months()) as i32; - let days = u32::from(duration.days()) as i32; - let millis = u32::from(duration.millis()) as i64; - ScalarImpl::Interval(IntervalUnit::new(months, days, millis)) - } - Value::Enum(_, symbol) => ScalarImpl::Utf8(symbol.into_boxed_str()), - Value::Record(descs) => { - let rw_values = descs - .into_iter() - .map(|(_, value)| from_avro_value(value)) - .collect::>>()?; - ScalarImpl::Struct(StructValue::new(rw_values)) - } - Value::Array(values) => { - let rw_values = values - .into_iter() - .map(from_avro_value) - .collect::>>()?; - ScalarImpl::List(ListValue::new(rw_values)) - } - _ => { - let err_msg = format!("avro parse error.unsupported value {:?}", value); - return Err(RwError::from(InternalError(err_msg))); - } - }; - - Ok(Some(v)) -} - -impl SourceParser for AvroParser { - type ParseResult<'a> = impl ParseFuture<'a, Result>; - - fn parse<'a, 'b, 'c>( - &'a self, - payload: &'b [u8], - writer: SourceStreamChunkRowWriter<'c>, - ) -> Self::ParseResult<'a> - where - 'b: 'a, - 'c: 'a, - { - self.parse_inner(payload, writer) - } -} - #[cfg(test)] mod test { use std::collections::HashMap; @@ -326,7 +179,8 @@ mod test { use std::ops::Sub; use apache_avro::types::{Record, Value}; - use apache_avro::{Codec, Days, Duration, Millis, Months, Schema, Writer}; + use apache_avro::{Codec, Days, Duration, Millis, Months, Reader, Schema, Writer}; + use itertools::Itertools; use risingwave_common::array::Op; use risingwave_common::catalog::ColumnId; use risingwave_common::error; @@ -337,11 +191,12 @@ mod test { use url::Url; use super::{ - read_schema_from_http, read_schema_from_local, read_schema_from_s3, unix_epoch_days, - AvroParser, + read_schema_from_http, read_schema_from_local, read_schema_from_s3, AvroParser, + AvroParserConfig, }; - use crate::parser::{SourceParser, SourceStreamChunkBuilder}; - use crate::SourceColumnDesc; + use crate::parser::avro::util::unix_epoch_days; + use crate::parser::SourceStreamChunkBuilder; + use crate::source::SourceColumnDesc; fn test_data_path(file_name: &str) -> String { let curr_dir = env::current_dir().unwrap().into_os_string(); @@ -362,7 +217,7 @@ mod test { let mut s3_config_props = HashMap::new(); s3_config_props.insert("region".to_string(), "ap-southeast-1".to_string()); let url = Url::parse(&schema_location).unwrap(); - let schema_content = read_schema_from_s3(&url, s3_config_props).await; + let schema_content = read_schema_from_s3(&url, &s3_config_props).await; assert!(schema_content.is_ok()); let schema = Schema::parse_str(&schema_content.unwrap()); assert!(schema.is_ok()); @@ -392,22 +247,26 @@ mod test { println!("schema = {:?}", schema.unwrap()); } - async fn new_avro_parser_from_local(file_name: &str) -> error::Result { + async fn new_avro_conf_from_local(file_name: &str) -> error::Result { let schema_path = "file://".to_owned() + &test_data_path(file_name); - AvroParser::new(schema_path.as_str(), false, HashMap::new()).await + AvroParserConfig::new(&HashMap::new(), schema_path.as_str(), false).await + } + + async fn new_avro_parser_from_local(file_name: &str) -> error::Result { + let conf = new_avro_conf_from_local(file_name).await?; + AvroParser::new(Vec::default(), conf) } #[tokio::test] async fn test_avro_parser() { - let avro_parser_rs = new_avro_parser_from_local("simple-schema.avsc").await; - assert!(avro_parser_rs.is_ok()); - let avro_parser = avro_parser_rs.unwrap(); + let avro_parser = new_avro_parser_from_local("simple-schema.avsc") + .await + .unwrap(); let schema = &avro_parser.schema; let record = build_avro_data(schema); assert_eq!(record.fields.len(), 10); let mut writer = Writer::with_codec(schema, Vec::new(), Codec::Snappy); - let append_rs = writer.append(record.clone()); - assert!(append_rs.is_ok()); + writer.append(record.clone()).unwrap(); let flush = writer.flush().unwrap(); assert!(flush > 0); let input_data = writer.into_inner().unwrap(); @@ -415,7 +274,10 @@ mod test { let mut builder = SourceStreamChunkBuilder::with_capacity(columns, 1); { let writer = builder.row_writer(); - avro_parser.parse(&input_data[..], writer).await.unwrap(); + avro_parser + .parse_inner(&input_data[..], writer) + .await + .unwrap(); } let chunk = builder.finish(); let (op, row) = chunk.rows().next().unwrap(); @@ -424,7 +286,7 @@ mod test { for (i, field) in record.fields.iter().enumerate() { let value = field.clone().1; match value { - Value::String(str) => { + Value::String(str) | Value::Union(_, box Value::String(str)) => { assert_eq!(row[i], Some(ScalarImpl::Utf8(str.into_boxed_str()))); } Value::Boolean(bool_val) => { @@ -569,6 +431,65 @@ mod test { ] } + fn build_field(schema: &Schema) -> Option { + match schema { + Schema::String => Some(Value::String("str_value".to_string())), + Schema::Int => Some(Value::Int(32_i32)), + Schema::Long => Some(Value::Long(64_i64)), + Schema::Float => Some(Value::Float(32_f32)), + Schema::Double => Some(Value::Double(64_f64)), + Schema::Boolean => Some(Value::Boolean(true)), + + Schema::Date => { + let original_date = + NaiveDateWrapper::from_ymd_uncheck(1970, 1, 1).and_hms_uncheck(0, 0, 0); + let naive_date = + NaiveDateWrapper::from_ymd_uncheck(1970, 1, 1).and_hms_uncheck(0, 0, 0); + let num_days = naive_date.0.sub(original_date.0).num_days() as i32; + Some(Value::Date(num_days)) + } + Schema::TimestampMillis => { + let datetime = + NaiveDateWrapper::from_ymd_uncheck(1970, 1, 1).and_hms_uncheck(0, 0, 0); + let timestamp_mills = Value::TimestampMillis(datetime.0.timestamp() * 1_000); + Some(timestamp_mills) + } + Schema::TimestampMicros => { + let datetime = + NaiveDateWrapper::from_ymd_uncheck(1970, 1, 1).and_hms_uncheck(0, 0, 0); + let timestamp_micros = Value::TimestampMicros(datetime.0.timestamp() * 1_000_000); + Some(timestamp_micros) + } + Schema::Duration => { + let months = Months::new(1); + let days = Days::new(1); + let millis = Millis::new(1000); + Some(Value::Duration(Duration::new(months, days, millis))) + } + + Schema::Union(union_schema) => { + let inner_schema = union_schema + .variants() + .iter() + .find_or_first(|s| s != &&Schema::Null) + .unwrap(); + + match build_field(inner_schema) { + None => { + let index_of_union = + union_schema.find_schema(&Value::Null).unwrap().0 as u32; + Some(Value::Union(index_of_union, Box::new(Value::Null))) + } + Some(value) => { + let index_of_union = union_schema.find_schema(&value).unwrap().0 as u32; + Some(Value::Union(index_of_union, Box::new(value))) + } + } + } + _ => None, + } + } + fn build_avro_data(schema: &Schema) -> Record<'_> { let mut record = Record::new(schema).unwrap(); if let Schema::Record { @@ -576,60 +497,9 @@ mod test { } = schema.clone() { for field in &fields { - match field.schema { - Schema::String => { - record.put(field.name.as_str(), "str_value".to_string()); - } - Schema::Int => { - record.put(field.name.as_str(), 32_i32); - } - Schema::Long => { - record.put(field.name.as_str(), 64_i64); - } - Schema::Float => { - record.put(field.name.as_str(), 32_f32); - } - Schema::Double => { - record.put(field.name.as_str(), 64_f64); - } - Schema::Boolean => { - record.put(field.name.as_str(), true); - } - Schema::Date => { - let original_date = - NaiveDateWrapper::from_ymd_uncheck(1970, 1, 1).and_hms_uncheck(0, 0, 0); - let naive_date = - NaiveDateWrapper::from_ymd_uncheck(1970, 1, 1).and_hms_uncheck(0, 0, 0); - let num_days = naive_date.0.sub(original_date.0).num_days() as i32; - record.put(field.name.as_str(), Value::Date(num_days)); - } - Schema::TimestampMillis => { - let datetime = - NaiveDateWrapper::from_ymd_uncheck(1970, 1, 1).and_hms_uncheck(0, 0, 0); - let timestamp_mills = - Value::TimestampMillis(datetime.0.timestamp() * 1_000); - record.put(field.name.as_str(), timestamp_mills); - } - Schema::TimestampMicros => { - let datetime = - NaiveDateWrapper::from_ymd_uncheck(1970, 1, 1).and_hms_uncheck(0, 0, 0); - let timestamp_micros = - Value::TimestampMicros(datetime.0.timestamp() * 1_000_000); - record.put(field.name.as_str(), timestamp_micros); - } - Schema::Duration => { - let months = Months::new(1); - let days = Days::new(1); - let millis = Millis::new(1000); - record.put( - field.name.as_str(), - Value::Duration(Duration::new(months, days, millis)), - ); - } - _ => { - unreachable!() - } - } + let value = build_field(&field.schema) + .unwrap_or_else(|| panic!("No value defined for field, {}", field.name)); + record.put(field.name.as_str(), value) } } record @@ -637,10 +507,12 @@ mod test { #[tokio::test] async fn test_map_to_columns() { - let avro_parser_rs = new_avro_parser_from_local("simple-schema.avsc") + let conf = new_avro_conf_from_local("simple-schema.avsc") .await .unwrap(); - println!("{:?}", avro_parser_rs.map_to_columns().unwrap()); + let columns = conf.map_to_columns().unwrap(); + assert_eq!(columns.len(), 10); + println!("{:?}", columns); } #[tokio::test] @@ -650,4 +522,73 @@ mod test { let avro_parser = avro_parser_rs.unwrap(); println!("avro_parser = {:?}", avro_parser); } + + #[tokio::test] + async fn test_avro_union_type() { + let avro_parser = new_avro_parser_from_local("union-schema.avsc") + .await + .unwrap(); + let schema = &avro_parser.schema; + let mut null_record = Record::new(schema).unwrap(); + null_record.put("id", Value::Int(5)); + null_record.put("age", Value::Union(0, Box::new(Value::Null))); + null_record.put("sequence_id", Value::Union(0, Box::new(Value::Null))); + null_record.put("name", Value::Union(0, Box::new(Value::Null))); + null_record.put("score", Value::Union(1, Box::new(Value::Null))); + null_record.put("avg_score", Value::Union(0, Box::new(Value::Null))); + null_record.put("is_lasted", Value::Union(0, Box::new(Value::Null))); + null_record.put("entrance_date", Value::Union(0, Box::new(Value::Null))); + null_record.put("birthday", Value::Union(0, Box::new(Value::Null))); + null_record.put("anniversary", Value::Union(0, Box::new(Value::Null))); + + let mut writer = Writer::new(schema, Vec::new()); + writer.append(null_record).unwrap(); + writer.flush().unwrap(); + + let record = build_avro_data(schema); + writer.append(record).unwrap(); + writer.flush().unwrap(); + + let records = writer.into_inner().unwrap(); + + let reader: Vec<_> = Reader::with_schema(schema, &records[..]).unwrap().collect(); + assert_eq!(2, reader.len()); + let null_record_expected: Vec<(String, Value)> = vec![ + ("id".to_string(), Value::Int(5)), + ("age".to_string(), Value::Union(0, Box::new(Value::Null))), + ( + "sequence_id".to_string(), + Value::Union(0, Box::new(Value::Null)), + ), + ("name".to_string(), Value::Union(0, Box::new(Value::Null))), + ("score".to_string(), Value::Union(1, Box::new(Value::Null))), + ( + "avg_score".to_string(), + Value::Union(0, Box::new(Value::Null)), + ), + ( + "is_lasted".to_string(), + Value::Union(0, Box::new(Value::Null)), + ), + ( + "entrance_date".to_string(), + Value::Union(0, Box::new(Value::Null)), + ), + ( + "birthday".to_string(), + Value::Union(0, Box::new(Value::Null)), + ), + ( + "anniversary".to_string(), + Value::Union(0, Box::new(Value::Null)), + ), + ]; + let null_record_value = reader.get(0).unwrap().as_ref().unwrap(); + match null_record_value { + Value::Record(values) => { + assert_eq!(values, &null_record_expected) + } + _ => unreachable!(), + } + } } diff --git a/src/connector/src/parser/avro/schema_resolver.rs b/src/connector/src/parser/avro/schema_resolver.rs index cb65e8ce930fb..6e30599dec547 100644 --- a/src/connector/src/parser/avro/schema_resolver.rs +++ b/src/connector/src/parser/avro/schema_resolver.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ const AVRO_SCHEMA_LOCATION_S3_REGION: &str = "region"; /// S3 file location format: pub(super) async fn read_schema_from_s3( url: &Url, - properties: HashMap, + properties: &HashMap, ) -> Result { let bucket = url .domain() diff --git a/src/connector/src/parser/avro/util.rs b/src/connector/src/parser/avro/util.rs new file mode 100644 index 0000000000000..4761855bd7970 --- /dev/null +++ b/src/connector/src/parser/avro/util.rs @@ -0,0 +1,367 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use apache_avro::types::Value; +use apache_avro::{Decimal as AvroDecimal, Schema}; +use chrono::Datelike; +use itertools::Itertools; +use risingwave_common::array::{ListValue, StructValue}; +use risingwave_common::error::ErrorCode::{InternalError, ProtocolError}; +use risingwave_common::error::{Result, RwError}; +use risingwave_common::types::{ + DataType, Datum, IntervalUnit, NaiveDateTimeWrapper, NaiveDateWrapper, OrderedF32, OrderedF64, + ScalarImpl, +}; +use risingwave_pb::plan_common::ColumnDesc; + +const RW_DECIMAL_MAX_PRECISION: usize = 28; + +pub(crate) fn avro_field_to_column_desc( + name: &str, + schema: &Schema, + index: &mut i32, +) -> Result { + let data_type = avro_type_mapping(schema)?; + match schema { + Schema::Record { + name: schema_name, + fields, + .. + } => { + let vec_column = fields + .iter() + .map(|f| avro_field_to_column_desc(&f.name, &f.schema, index)) + .collect::>>()?; + *index += 1; + Ok(ColumnDesc { + column_type: Some(data_type.to_protobuf()), + column_id: *index, + name: name.to_owned(), + field_descs: vec_column, + type_name: schema_name.to_string(), + }) + } + _ => { + *index += 1; + Ok(ColumnDesc { + column_type: Some(data_type.to_protobuf()), + column_id: *index, + name: name.to_owned(), + ..Default::default() + }) + } + } +} + +fn avro_type_mapping(schema: &Schema) -> Result { + let data_type = match schema { + Schema::String => DataType::Varchar, + Schema::Int => DataType::Int32, + Schema::Long => DataType::Int64, + Schema::Boolean => DataType::Boolean, + Schema::Float => DataType::Float32, + Schema::Double => DataType::Float64, + Schema::Decimal { .. } => DataType::Decimal, + Schema::Date => DataType::Date, + Schema::TimestampMillis => DataType::Timestamp, + Schema::TimestampMicros => DataType::Timestamp, + Schema::Duration => DataType::Interval, + Schema::Enum { .. } => DataType::Varchar, + Schema::Record { fields, .. } => { + let struct_fields = fields + .iter() + .map(|f| avro_type_mapping(&f.schema)) + .collect::>>()?; + let struct_names = fields.iter().map(|f| f.name.clone()).collect_vec(); + DataType::new_struct(struct_fields, struct_names) + } + Schema::Array(item_schema) => { + let item_type = avro_type_mapping(item_schema.as_ref())?; + DataType::List { + datatype: Box::new(item_type), + } + } + Schema::Union(union_schema) => { + let nested_schema = union_schema + .variants() + .iter() + .find_or_first(|s| **s != Schema::Null) + .ok_or_else(|| { + RwError::from(InternalError(format!( + "unsupported type in Avro: {:?}", + union_schema + ))) + })?; + + avro_type_mapping(nested_schema)? + } + _ => { + return Err(RwError::from(InternalError(format!( + "unsupported type in Avro: {:?}", + schema + )))); + } + }; + + Ok(data_type) +} + +pub(crate) fn get_field_from_avro_value<'a>( + avro_value: &'a Value, + field_name: &str, +) -> Result<&'a Value> { + match avro_value { + Value::Record(fields) => fields + .iter() + .find(|val| val.0.eq(field_name)) + .map(|entry| &entry.1) + .ok_or_else(|| { + RwError::from(ProtocolError(format!( + "field {} not found in debezium event", + field_name + ))) + }), + Value::Union(_, boxed_value) => get_field_from_avro_value(boxed_value.as_ref(), field_name), + _ => Err(RwError::from(ProtocolError(format!( + "avro parse unexpected field {}", + field_name + )))), + } +} + +pub(crate) fn avro_decimal_to_rust_decimal( + avro_decimal: AvroDecimal, + precision: usize, + scale: usize, +) -> Result { + if precision > RW_DECIMAL_MAX_PRECISION { + return Err(RwError::from(ProtocolError(format!( + "only support decimal with max precision {} but given avro decimal with precision {}", + RW_DECIMAL_MAX_PRECISION, precision + )))); + } + + let negative = !avro_decimal.is_positive(); + let bytes = avro_decimal.to_vec_unsigned(); + + let (lo, mid, hi) = match bytes.len() { + len @ 0..=4 => { + let mut pad = vec![0; 4 - len]; + pad.extend_from_slice(&bytes); + let lo = u32::from_be_bytes(pad.try_into().unwrap()); + (lo, 0, 0) + } + len @ 5..=8 => { + let mid = u32::from_be_bytes(bytes[..4].to_owned().try_into().unwrap()); + let mut pad = vec![0; 8 - len]; + pad.extend_from_slice(&bytes[4..]); + let lo = u32::from_be_bytes(pad.try_into().unwrap()); + (lo, mid, 0) + } + len @ 9..=12 => { + let hi = u32::from_be_bytes(bytes[..4].to_owned().try_into().unwrap()); + let mid = u32::from_be_bytes(bytes[4..8].to_owned().try_into().unwrap()); + let mut pad = vec![0; 12 - len]; + pad.extend_from_slice(&bytes[8..]); + let lo = u32::from_be_bytes(pad.try_into().unwrap()); + (lo, mid, hi) + } + _ => unreachable!(), + }; + Ok(rust_decimal::Decimal::from_parts( + lo, + mid, + hi, + negative, + scale as u32, + )) +} + +pub(crate) fn unix_epoch_days() -> i32 { + NaiveDateWrapper::from_ymd_uncheck(1970, 1, 1) + .0 + .num_days_from_ce() +} + +// extract inner filed/item schema of record/array/union +pub(crate) fn extract_inner_field_schema<'a>( + schema: &'a Schema, + name: Option<&'a str>, +) -> Result<&'a Schema> { + match schema { + Schema::Record { fields, lookup, .. } => { + let name = name.ok_or_else(|| { + RwError::from(InternalError( + "no name provided for a field in record".to_owned(), + )) + })?; + let index = lookup.get(name).ok_or_else(|| { + RwError::from(InternalError(format!( + "No filed named {} in record {:?}", + name, schema + ))) + })?; + let field = fields.get(*index).ok_or_else(|| { + RwError::from(InternalError(format!( + "illegal avro record schema {:?}", + schema + ))) + })?; + Ok(&field.schema) + } + Schema::Array(schema) => Ok(schema), + Schema::Union(union_schema) => { + let inner_schema = union_schema + .variants() + .iter() + .find(|s| **s != Schema::Null) + .ok_or_else(|| { + RwError::from(InternalError(format!( + "illegal avro record schema {:?}", + union_schema + ))) + })?; + Ok(inner_schema) + } + _ => Err(RwError::from(InternalError( + "avro schema is not a record or array".to_owned(), + ))), + } +} + +/// Convert Avro value to datum.For now, support the following [Avro type](https://avro.apache.org/docs/current/spec.html). +/// - boolean +/// - int : i32 +/// - long: i64 +/// - float: f32 +/// - double: f64 +/// - string: String +/// - Date (the number of days from the unix epoch, 1970-1-1 UTC) +/// - Timestamp (the number of milliseconds from the unix epoch, 1970-1-1 00:00:00.000 UTC) +#[inline] +pub(crate) fn from_avro_value(value: Value, value_schema: &Schema) -> Result { + let v = match value { + Value::Null => { + return Ok(None); + } + Value::Boolean(b) => ScalarImpl::Bool(b), + Value::String(s) => ScalarImpl::Utf8(s.into_boxed_str()), + Value::Int(i) => ScalarImpl::Int32(i), + Value::Long(i) => ScalarImpl::Int64(i), + Value::Float(f) => ScalarImpl::Float32(OrderedF32::from(f)), + Value::Double(f) => ScalarImpl::Float64(OrderedF64::from(f)), + Value::Decimal(avro_decimal) => { + let (precision, scale) = match value_schema { + Schema::Decimal { + precision, scale, .. + } => (*precision, *scale), + _ => { + return Err(RwError::from(InternalError( + "avro value is and decimal but schema not".to_owned(), + ))); + } + }; + let decimal = avro_decimal_to_rust_decimal(avro_decimal, precision, scale)?; + ScalarImpl::Decimal(risingwave_common::types::Decimal::Normalized(decimal)) + } + Value::Date(days) => ScalarImpl::NaiveDate( + NaiveDateWrapper::with_days(days + unix_epoch_days()).map_err(|e| { + let err_msg = format!("avro parse error.wrong date value {}, err {:?}", days, e); + RwError::from(InternalError(err_msg)) + })?, + ), + Value::TimestampMillis(millis) => ScalarImpl::NaiveDateTime( + NaiveDateTimeWrapper::with_secs_nsecs( + millis / 1_000, + (millis % 1_000) as u32 * 1_000_000, + ) + .map_err(|e| { + let err_msg = format!( + "avro parse error.wrong timestamp millis value {}, err {:?}", + millis, e + ); + RwError::from(InternalError(err_msg)) + })?, + ), + Value::TimestampMicros(micros) => ScalarImpl::NaiveDateTime( + NaiveDateTimeWrapper::with_secs_nsecs( + micros / 1_000_000, + (micros % 1_000_000) as u32 * 1_000, + ) + .map_err(|e| { + let err_msg = format!( + "avro parse error.wrong timestamp micros value {}, err {:?}", + micros, e + ); + RwError::from(InternalError(err_msg)) + })?, + ), + Value::Duration(duration) => { + let months = u32::from(duration.months()) as i32; + let days = u32::from(duration.days()) as i32; + let millis = u32::from(duration.millis()) as i64; + ScalarImpl::Interval(IntervalUnit::new(months, days, millis)) + } + Value::Enum(_, symbol) => ScalarImpl::Utf8(symbol.into_boxed_str()), + Value::Record(descs) => { + let rw_values = descs + .into_iter() + .map(|(field_name, field_value)| { + extract_inner_field_schema(value_schema, Some(&field_name)) + .and_then(|field_schema| from_avro_value(field_value, field_schema)) + }) + .collect::>>()?; + ScalarImpl::Struct(StructValue::new(rw_values)) + } + Value::Array(values) => { + let item_schema = extract_inner_field_schema(value_schema, None)?; + let rw_values = values + .into_iter() + .map(|item_value| from_avro_value(item_value, item_schema)) + .collect::>>()?; + ScalarImpl::List(ListValue::new(rw_values)) + } + Value::Union(_, value) => { + let inner_schema = extract_inner_field_schema(value_schema, None)?; + return from_avro_value(*value, inner_schema); + } + _ => { + let err_msg = format!("avro parse error.unsupported value {:?}", value); + return Err(RwError::from(InternalError(err_msg))); + } + }; + + Ok(Some(v)) +} + +#[cfg(test)] +mod tests { + use num_traits::FromPrimitive; + + use super::*; + #[test] + fn test_convert_decimal() { + // 280 + let v = vec![1, 24]; + let avro_decimal = AvroDecimal::from(v); + let rust_decimal = avro_decimal_to_rust_decimal(avro_decimal, 28, 0).unwrap(); + assert_eq!(rust_decimal, rust_decimal::Decimal::from_i32(280).unwrap()); + + // 28.1 + let v = vec![1, 25]; + let avro_decimal = AvroDecimal::from(v); + let rust_decimal = avro_decimal_to_rust_decimal(avro_decimal, 28, 1).unwrap(); + assert_eq!(rust_decimal, rust_decimal::Decimal::from_f32(28.1).unwrap()); + } +} diff --git a/src/connector/src/parser/canal/mod.rs b/src/connector/src/parser/canal/mod.rs index 2d7e3f1650336..4b25ea7efb4ee 100644 --- a/src/connector/src/parser/canal/mod.rs +++ b/src/connector/src/parser/canal/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,131 +18,3 @@ mod operators; mod util; pub use simd_json_parser::*; - -#[cfg(test)] -mod tests { - - use std::str::FromStr; - - use risingwave_common::array::Op; - use risingwave_common::row::Row; - use risingwave_common::types::{DataType, Decimal, ScalarImpl, ToOwnedDatum}; - use risingwave_expr::vector_op::cast::str_to_timestamp; - - use super::*; - use crate::parser::{SourceParser, SourceStreamChunkBuilder}; - use crate::SourceColumnDesc; - - #[tokio::test] - async fn test_json_parser() { - let payload = br#"{"data":[{"id":"1","name":"mike","is_adult":"0","balance":"1500.62","reg_time":"2018-01-01 00:00:01","win_rate":"0.65"}],"database":"demo","es":1668673476000,"id":7,"isDdl":false,"mysqlType":{"id":"int","name":"varchar(40)","is_adult":"boolean","balance":"decimal(10,2)","reg_time":"timestamp","win_rate":"double"},"old":[{"balance":"1000.62"}],"pkNames":null,"sql":"","sqlType":{"id":4,"name":12,"is_adult":-6,"balance":3,"reg_time":93,"win_rate":8},"table":"demo","ts":1668673476732,"type":"UPDATE"}"#; - let parser = CanalJsonParser; - let descs = vec![ - SourceColumnDesc::simple("ID", DataType::Int64, 0.into()), - SourceColumnDesc::simple("NAME", DataType::Varchar, 1.into()), - SourceColumnDesc::simple("is_adult", DataType::Boolean, 2.into()), - SourceColumnDesc::simple("balance", DataType::Decimal, 3.into()), - SourceColumnDesc::simple("reg_time", DataType::Timestamp, 4.into()), - SourceColumnDesc::simple("win_rate", DataType::Float64, 5.into()), - ]; - - let mut builder = SourceStreamChunkBuilder::with_capacity(descs, 2); - - let writer = builder.row_writer(); - parser.parse(payload, writer).await.unwrap(); - - let chunk = builder.finish(); - - let mut rows = chunk.rows(); - - { - let (op, row) = rows.next().unwrap(); - assert_eq!(op, Op::UpdateDelete); - assert_eq!(row.datum_at(0).to_owned_datum(), Some(ScalarImpl::Int64(1))); - assert_eq!( - row.datum_at(1).to_owned_datum(), - (Some(ScalarImpl::Utf8("mike".into()))) - ); - assert_eq!( - row.datum_at(2).to_owned_datum(), - (Some(ScalarImpl::Bool(false))) - ); - assert_eq!( - row.datum_at(3).to_owned_datum(), - (Some(Decimal::from_str("1000.62").unwrap().into())) - ); - assert_eq!( - row.datum_at(4).to_owned_datum(), - (Some(ScalarImpl::NaiveDateTime( - str_to_timestamp("2018-01-01 00:00:01").unwrap() - ))) - ); - assert_eq!( - row.datum_at(5).to_owned_datum(), - (Some(ScalarImpl::Float64(0.65.into()))) - ); - } - - { - let (op, row) = rows.next().unwrap(); - assert_eq!(op, Op::UpdateInsert); - assert_eq!(row.datum_at(0).to_owned_datum(), Some(ScalarImpl::Int64(1))); - assert_eq!( - row.datum_at(1).to_owned_datum(), - (Some(ScalarImpl::Utf8("mike".into()))) - ); - assert_eq!( - row.datum_at(2).to_owned_datum(), - (Some(ScalarImpl::Bool(false))) - ); - assert_eq!( - row.datum_at(3).to_owned_datum(), - (Some(Decimal::from_str("1500.62").unwrap().into())) - ); - assert_eq!( - row.datum_at(4).to_owned_datum(), - (Some(ScalarImpl::NaiveDateTime( - str_to_timestamp("2018-01-01 00:00:01").unwrap() - ))) - ); - assert_eq!( - row.datum_at(5).to_owned_datum(), - (Some(ScalarImpl::Float64(0.65.into()))) - ); - } - } - - #[tokio::test] - async fn test_parse_multi_rows() { - let payload = br#"{"data": [{"v1": "1", "v2": "2"}, {"v1": "3", "v2": "4"}], "old": null, "mysqlType":{"v1": "int", "v2": "int"}, "sqlType":{"v1": 4, "v2": 4}, "database":"demo","es":1668673394000,"id":5,"isDdl":false, "table":"demo","ts":1668673394788,"type":"INSERT"}"#; - - let parser = CanalJsonParser; - let descs = vec![ - SourceColumnDesc::simple("v1", DataType::Int32, 0.into()), - SourceColumnDesc::simple("v2", DataType::Int32, 1.into()), - ]; - - let mut builder = SourceStreamChunkBuilder::with_capacity(descs, 2); - - let writer = builder.row_writer(); - parser.parse(payload, writer).await.unwrap(); - - let chunk = builder.finish(); - - let mut rows = chunk.rows(); - - { - let (op, row) = rows.next().unwrap(); - assert_eq!(op, Op::Insert); - assert_eq!(row.datum_at(0).to_owned_datum(), Some(ScalarImpl::Int32(1))); - assert_eq!(row.datum_at(1).to_owned_datum(), Some(ScalarImpl::Int32(2))); - } - - { - let (op, row) = rows.next().unwrap(); - assert_eq!(op, Op::Insert); - assert_eq!(row.datum_at(0).to_owned_datum(), Some(ScalarImpl::Int32(3))); - assert_eq!(row.datum_at(1).to_owned_datum(), Some(ScalarImpl::Int32(4))); - } - } -} diff --git a/src/connector/src/parser/canal/operators.rs b/src/connector/src/parser/canal/operators.rs index e3e0cbdbc7126..06cdce0bb06e1 100644 --- a/src/connector/src/parser/canal/operators.rs +++ b/src/connector/src/parser/canal/operators.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/parser/canal/simd_json_parser.rs b/src/connector/src/parser/canal/simd_json_parser.rs index a41adcc6b6835..aec5423bc9c23 100644 --- a/src/connector/src/parser/canal/simd_json_parser.rs +++ b/src/connector/src/parser/canal/simd_json_parser.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,11 +15,11 @@ use std::str::FromStr; use anyhow::anyhow; -use futures::future::ready; -use itertools::Itertools; +use futures_async_stream::try_stream; use risingwave_common::error::ErrorCode::{InternalError, ProtocolError}; use risingwave_common::error::{Result, RwError}; use risingwave_common::types::{DataType, Datum, Decimal, ScalarImpl}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_expr::vector_op::cast::{ str_to_date, str_to_timestamp, str_with_time_zone_to_timestamptz, }; @@ -27,19 +27,28 @@ use simd_json::{BorrowedValue, StaticNode, ValueAccess}; use super::util::at_least_one_ok; use crate::parser::canal::operators::*; -use crate::parser::{ParseFuture, SourceParser, SourceStreamChunkRowWriter, WriteGuard}; -use crate::{ensure_rust_type, ensure_str}; +use crate::parser::{SourceStreamChunkRowWriter, WriteGuard}; +use crate::source::SourceColumnDesc; +use crate::{ensure_rust_type, ensure_str, impl_common_parser_logic}; const AFTER: &str = "data"; const BEFORE: &str = "old"; const OP: &str = "type"; const IS_DDL: &str = "isddl"; +impl_common_parser_logic!(CanalJsonParser); #[derive(Debug)] -pub struct CanalJsonParser; +pub struct CanalJsonParser { + pub(crate) rw_columns: Vec, +} impl CanalJsonParser { - fn parse_inner( + pub fn new(rw_columns: Vec) -> Result { + Ok(Self { rw_columns }) + } + + #[allow(clippy::unused_async)] + pub async fn parse_inner( &self, payload: &[u8], mut writer: SourceStreamChunkRowWriter<'_>, @@ -116,7 +125,7 @@ impl CanalJsonParser { })?; let results = before - .zip_eq(after) + .zip_eq_fast(after) .map(|(before, after)| { writer.update(|column| { // in origin canal, old only contains the changed columns but data @@ -172,22 +181,6 @@ impl CanalJsonParser { } } -impl SourceParser for CanalJsonParser { - type ParseResult<'a> = impl ParseFuture<'a, Result>; - - fn parse<'a, 'b, 'c>( - &'a self, - payload: &'b [u8], - writer: SourceStreamChunkRowWriter<'c>, - ) -> Self::ParseResult<'a> - where - 'b: 'a, - 'c: 'a, - { - ready(self.parse_inner(payload, writer)) - } -} - #[inline] fn cannal_simd_json_parse_value( dtype: &DataType, @@ -234,3 +227,134 @@ fn cannal_do_parse_simd_json_value(dtype: &DataType, v: &BorrowedValue<'_>) -> R }; Ok(v) } + +#[cfg(test)] +mod tests { + + use std::str::FromStr; + + use risingwave_common::array::Op; + use risingwave_common::row::Row; + use risingwave_common::types::{DataType, Decimal, ScalarImpl, ToOwnedDatum}; + use risingwave_expr::vector_op::cast::str_to_timestamp; + + use super::*; + use crate::parser::SourceStreamChunkBuilder; + use crate::source::SourceColumnDesc; + + #[tokio::test] + async fn test_json_parser() { + let payload = br#"{"data":[{"id":"1","name":"mike","is_adult":"0","balance":"1500.62","reg_time":"2018-01-01 00:00:01","win_rate":"0.65"}],"database":"demo","es":1668673476000,"id":7,"isDdl":false,"mysqlType":{"id":"int","name":"varchar(40)","is_adult":"boolean","balance":"decimal(10,2)","reg_time":"timestamp","win_rate":"double"},"old":[{"balance":"1000.62"}],"pkNames":null,"sql":"","sqlType":{"id":4,"name":12,"is_adult":-6,"balance":3,"reg_time":93,"win_rate":8},"table":"demo","ts":1668673476732,"type":"UPDATE"}"#; + + let descs = vec![ + SourceColumnDesc::simple("ID", DataType::Int64, 0.into()), + SourceColumnDesc::simple("NAME", DataType::Varchar, 1.into()), + SourceColumnDesc::simple("is_adult", DataType::Boolean, 2.into()), + SourceColumnDesc::simple("balance", DataType::Decimal, 3.into()), + SourceColumnDesc::simple("reg_time", DataType::Timestamp, 4.into()), + SourceColumnDesc::simple("win_rate", DataType::Float64, 5.into()), + ]; + + let parser = CanalJsonParser::new(descs.clone()).unwrap(); + + let mut builder = SourceStreamChunkBuilder::with_capacity(descs, 2); + + let writer = builder.row_writer(); + parser.parse_inner(payload, writer).await.unwrap(); + + let chunk = builder.finish(); + + let mut rows = chunk.rows(); + + { + let (op, row) = rows.next().unwrap(); + assert_eq!(op, Op::UpdateDelete); + assert_eq!(row.datum_at(0).to_owned_datum(), Some(ScalarImpl::Int64(1))); + assert_eq!( + row.datum_at(1).to_owned_datum(), + (Some(ScalarImpl::Utf8("mike".into()))) + ); + assert_eq!( + row.datum_at(2).to_owned_datum(), + (Some(ScalarImpl::Bool(false))) + ); + assert_eq!( + row.datum_at(3).to_owned_datum(), + (Some(Decimal::from_str("1000.62").unwrap().into())) + ); + assert_eq!( + row.datum_at(4).to_owned_datum(), + (Some(ScalarImpl::NaiveDateTime( + str_to_timestamp("2018-01-01 00:00:01").unwrap() + ))) + ); + assert_eq!( + row.datum_at(5).to_owned_datum(), + (Some(ScalarImpl::Float64(0.65.into()))) + ); + } + + { + let (op, row) = rows.next().unwrap(); + assert_eq!(op, Op::UpdateInsert); + assert_eq!(row.datum_at(0).to_owned_datum(), Some(ScalarImpl::Int64(1))); + assert_eq!( + row.datum_at(1).to_owned_datum(), + (Some(ScalarImpl::Utf8("mike".into()))) + ); + assert_eq!( + row.datum_at(2).to_owned_datum(), + (Some(ScalarImpl::Bool(false))) + ); + assert_eq!( + row.datum_at(3).to_owned_datum(), + (Some(Decimal::from_str("1500.62").unwrap().into())) + ); + assert_eq!( + row.datum_at(4).to_owned_datum(), + (Some(ScalarImpl::NaiveDateTime( + str_to_timestamp("2018-01-01 00:00:01").unwrap() + ))) + ); + assert_eq!( + row.datum_at(5).to_owned_datum(), + (Some(ScalarImpl::Float64(0.65.into()))) + ); + } + } + + #[tokio::test] + async fn test_parse_multi_rows() { + let payload = br#"{"data": [{"v1": "1", "v2": "2"}, {"v1": "3", "v2": "4"}], "old": null, "mysqlType":{"v1": "int", "v2": "int"}, "sqlType":{"v1": 4, "v2": 4}, "database":"demo","es":1668673394000,"id":5,"isDdl":false, "table":"demo","ts":1668673394788,"type":"INSERT"}"#; + + let descs = vec![ + SourceColumnDesc::simple("v1", DataType::Int32, 0.into()), + SourceColumnDesc::simple("v2", DataType::Int32, 1.into()), + ]; + + let parser = CanalJsonParser::new(descs.clone()).unwrap(); + + let mut builder = SourceStreamChunkBuilder::with_capacity(descs, 2); + + let writer = builder.row_writer(); + parser.parse_inner(payload, writer).await.unwrap(); + + let chunk = builder.finish(); + + let mut rows = chunk.rows(); + + { + let (op, row) = rows.next().unwrap(); + assert_eq!(op, Op::Insert); + assert_eq!(row.datum_at(0).to_owned_datum(), Some(ScalarImpl::Int32(1))); + assert_eq!(row.datum_at(1).to_owned_datum(), Some(ScalarImpl::Int32(2))); + } + + { + let (op, row) = rows.next().unwrap(); + assert_eq!(op, Op::Insert); + assert_eq!(row.datum_at(0).to_owned_datum(), Some(ScalarImpl::Int32(3))); + assert_eq!(row.datum_at(1).to_owned_datum(), Some(ScalarImpl::Int32(4))); + } + } +} diff --git a/src/connector/src/parser/canal/util.rs b/src/connector/src/parser/canal/util.rs index 361fdfc00496b..863432cd18424 100644 --- a/src/connector/src/parser/canal/util.rs +++ b/src/connector/src/parser/canal/util.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/parser/common.rs b/src/connector/src/parser/common.rs index e91d376f5440d..ccc696509761c 100644 --- a/src/connector/src/parser/common.rs +++ b/src/connector/src/parser/common.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,10 +13,10 @@ // limitations under the License. use anyhow::{anyhow, Result}; -use itertools::Itertools; use num_traits::FromPrimitive; use risingwave_common::array::{ListValue, StructValue}; use risingwave_common::types::{DataType, Datum, Decimal, ScalarImpl}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_expr::vector_op::cast::{ i64_to_timestamp, i64_to_timestamptz, str_to_date, str_to_time, str_to_timestamp, str_with_time_zone_to_timestamptz, @@ -62,11 +62,16 @@ fn do_parse_simd_json_value(dtype: &DataType, v: &BorrowedValue<'_>) -> Result i64_to_timestamptz(ensure_int!(v, i64))?.into(), _ => anyhow::bail!("expect timestamptz, but found {v}"), }, + DataType::Jsonb => { + let v: serde_json::Value = v.clone().try_into()?; + #[expect(clippy::disallowed_methods)] + ScalarImpl::Jsonb(risingwave_common::array::JsonbVal::from_serde(v)) + } DataType::Struct(struct_type_info) => { let fields = struct_type_info .field_names .iter() - .zip_eq(struct_type_info.fields.iter()) + .zip_eq_fast(struct_type_info.fields.iter()) .map(|field| { simd_json_parse_value(field.1, v.get(field.0.to_ascii_lowercase().as_str())) }) diff --git a/src/connector/src/parser/csv_parser.rs b/src/connector/src/parser/csv_parser.rs index 06e8150c1d7cd..a113f1d7f49fa 100644 --- a/src/connector/src/parser/csv_parser.rs +++ b/src/connector/src/parser/csv_parser.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -161,7 +161,7 @@ impl CsvParser { } #[try_stream(boxed, ok = StreamChunkWithState, error = RwError)] - pub async fn into_stream(mut self, data_stream: BoxSourceStream) { + async fn into_stream(mut self, data_stream: BoxSourceStream) { // the remain length of the last seen message let mut remain_len = 0; // current offset diff --git a/src/connector/src/parser/debezium/avro_parser.rs b/src/connector/src/parser/debezium/avro_parser.rs new file mode 100644 index 0000000000000..51376d3947d06 --- /dev/null +++ b/src/connector/src/parser/debezium/avro_parser.rs @@ -0,0 +1,409 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::fmt::Debug; +use std::sync::Arc; + +use apache_avro::types::Value; +use apache_avro::{from_avro_datum, Schema}; +use futures_async_stream::try_stream; +use reqwest::Url; +use risingwave_common::error::ErrorCode::{InternalError, ProtocolError}; +use risingwave_common::error::{Result, RwError}; +use risingwave_pb::plan_common::ColumnDesc; + +use super::operators::*; +use crate::impl_common_parser_logic; +use crate::parser::avro::util::{ + avro_field_to_column_desc, extract_inner_field_schema, from_avro_value, + get_field_from_avro_value, +}; +use crate::parser::schema_registry::{extract_schema_id, Client}; +use crate::parser::schema_resolver::ConfluentSchemaResolver; +use crate::parser::util::get_kafka_topic; +use crate::parser::{SourceStreamChunkRowWriter, WriteGuard}; +use crate::source::SourceColumnDesc; + +const BEFORE: &str = "before"; +const AFTER: &str = "after"; +const OP: &str = "op"; +const PAYLOAD: &str = "payload"; + +impl_common_parser_logic!(DebeziumAvroParser); + +// TODO: avoid duplicated codes with `AvroParser` +#[derive(Debug)] +pub struct DebeziumAvroParser { + outer_schema: Arc, + inner_schema: Arc, + schema_resolver: Arc, + rw_columns: Vec, +} + +#[derive(Debug, Clone)] +pub struct DebeziumAvroParserConfig { + pub outer_schema: Arc, + pub inner_schema: Arc, + pub schema_resolver: Arc, +} + +impl DebeziumAvroParserConfig { + pub async fn new(props: &HashMap, schema_location: &str) -> Result { + let url = Url::parse(schema_location).map_err(|e| { + InternalError(format!("failed to parse url ({}): {}", schema_location, e)) + })?; + let kafka_topic = get_kafka_topic(props)?; + let client = Client::new(url, props)?; + let (outer_schema, resolver) = + ConfluentSchemaResolver::new(format!("{}-value", kafka_topic).as_str(), client).await?; + let inner_schema = Self::extract_inner_schema(&outer_schema)?; + Ok(Self { + outer_schema: Arc::new(outer_schema), + inner_schema: Arc::new(inner_schema), + schema_resolver: Arc::new(resolver), + }) + } + + fn extract_inner_schema(outer_schema: &Schema) -> Result { + match outer_schema { + Schema::Record { fields, lookup, .. } => { + let index = lookup.get(BEFORE).ok_or_else(|| { + RwError::from(ProtocolError( + "debezium avro msg schema invalid, before field required".to_owned(), + )) + })?; + let before_schema = &fields + .get(*index) + .ok_or_else(|| { + RwError::from(ProtocolError("debezium avro msg schema illegal".to_owned())) + })? + .schema; + match before_schema { + Schema::Union(union_schema) => { + let inner_schema = union_schema + .variants() + .iter() + .find(|s| **s != Schema::Null) + .ok_or_else(|| { + RwError::from(InternalError( + "before field of debezium avro msg schema invalid".to_owned(), + )) + })? + .clone(); + Ok(inner_schema) + } + _ => Err(RwError::from(ProtocolError( + "before field of debezium avro msg schema invalid, union required" + .to_owned(), + ))), + } + } + _ => Err(RwError::from(ProtocolError( + "debezium avro msg schema invalid, record required".to_owned(), + ))), + } + } + + pub fn map_to_columns(&self) -> Result> { + Self::map_to_columns_inner(&self.inner_schema) + } + + // more convenient for testing + pub(crate) fn map_to_columns_inner(schema: &Schema) -> Result> { + // there must be a record at top level + if let Schema::Record { fields, .. } = schema { + let mut index = 0; + let fields = fields + .iter() + .map(|field| avro_field_to_column_desc(&field.name, &field.schema, &mut index)) + .collect::>>()?; + Ok(fields) + } else { + Err(RwError::from(InternalError( + "inner avro schema invalid, record required".into(), + ))) + } + } +} + +impl DebeziumAvroParser { + pub fn new( + rw_columns: Vec, + config: DebeziumAvroParserConfig, + ) -> Result { + let DebeziumAvroParserConfig { + outer_schema, + inner_schema, + schema_resolver, + } = config; + Ok(Self { + outer_schema, + inner_schema, + schema_resolver, + rw_columns, + }) + } + + pub(crate) async fn parse_inner( + &self, + payload: &[u8], + mut writer: SourceStreamChunkRowWriter<'_>, + ) -> Result { + let (schema_id, mut raw_payload) = extract_schema_id(payload)?; + let writer_schema = self.schema_resolver.get(schema_id).await?; + + let avro_value = from_avro_datum(writer_schema.as_ref(), &mut raw_payload, None) + .map_err(|e| RwError::from(ProtocolError(e.to_string())))?; + + let op = get_field_from_avro_value(&avro_value, OP)?; + if let Value::String(op_str) = op { + match op_str.as_str() { + DEBEZIUM_UPDATE_OP => { + let before = get_field_from_avro_value(&avro_value, BEFORE) + .map_err(|_| { + RwError::from(ProtocolError( + "before is missing for updating event. If you are using postgres, you may want to try ALTER TABLE $TABLE_NAME REPLICA IDENTITY FULL;".to_string(), + )) + })?; + let after = get_field_from_avro_value(&avro_value, AFTER)?; + + writer.update(|column| { + let field_schema = + extract_inner_field_schema(&self.inner_schema, Some(&column.name))?; + let before = from_avro_value( + get_field_from_avro_value(before, column.name.as_str())?.clone(), + field_schema, + )?; + let after = from_avro_value( + get_field_from_avro_value(after, column.name.as_str())?.clone(), + field_schema, + )?; + + Ok((before, after)) + }) + } + DEBEZIUM_CREATE_OP | DEBEZIUM_READ_OP => { + let after = get_field_from_avro_value(&avro_value, AFTER)?; + + writer.insert(|column| { + let field_schema = + extract_inner_field_schema(&self.inner_schema, Some(&column.name))?; + from_avro_value( + get_field_from_avro_value(after, column.name.as_str())?.clone(), + field_schema, + ) + }) + } + DEBEZIUM_DELETE_OP => { + let before = get_field_from_avro_value(&avro_value, BEFORE) + .map_err(|_| { + RwError::from(ProtocolError( + "before is missing for updating event. If you are using postgres, you may want to try ALTER TABLE $TABLE_NAME REPLICA IDENTITY FULL;".to_string(), + )) + })?; + + writer.delete(|column| { + let field_schema = + extract_inner_field_schema(&self.inner_schema, Some(&column.name))?; + from_avro_value( + get_field_from_avro_value(before, column.name.as_str())?.clone(), + field_schema, + ) + }) + } + _ => Err(RwError::from(ProtocolError(format!( + "unknown debezium op: {}", + op_str + )))), + } + } else { + Err(RwError::from(ProtocolError( + "payload op is not a string ".to_owned(), + ))) + } + } +} + +#[cfg(test)] +mod tests { + use std::io::Read; + use std::path::PathBuf; + + use apache_avro::Schema; + use itertools::Itertools; + use maplit::{convert_args, hashmap}; + use risingwave_common::array::Op; + use risingwave_common::catalog::ColumnDesc as CatColumnDesc; + use risingwave_common::row::{OwnedRow, Row}; + use risingwave_common::types::{DataType, ScalarImpl}; + + use super::*; + use crate::parser::{DebeziumAvroParserConfig, SourceStreamChunkBuilder}; + + const DEBEZIUM_AVRO_DATA: &[u8] = b"\x00\x00\x00\x00\x06\x00\x02\xd2\x0f\x0a\x53\x61\x6c\x6c\x79\x0c\x54\x68\x6f\x6d\x61\x73\x2a\x73\x61\x6c\x6c\x79\x2e\x74\x68\x6f\x6d\x61\x73\x40\x61\x63\x6d\x65\x2e\x63\x6f\x6d\x16\x32\x2e\x31\x2e\x32\x2e\x46\x69\x6e\x61\x6c\x0a\x6d\x79\x73\x71\x6c\x12\x64\x62\x73\x65\x72\x76\x65\x72\x31\xc0\xb4\xe8\xb7\xc9\x61\x00\x30\x66\x69\x72\x73\x74\x5f\x69\x6e\x5f\x64\x61\x74\x61\x5f\x63\x6f\x6c\x6c\x65\x63\x74\x69\x6f\x6e\x12\x69\x6e\x76\x65\x6e\x74\x6f\x72\x79\x00\x02\x12\x63\x75\x73\x74\x6f\x6d\x65\x72\x73\x00\x00\x20\x6d\x79\x73\x71\x6c\x2d\x62\x69\x6e\x2e\x30\x30\x30\x30\x30\x33\x8c\x06\x00\x00\x00\x02\x72\x02\x92\xc3\xe8\xb7\xc9\x61\x00"; + + fn schema_dir() -> String { + let dir = PathBuf::from("src/test_data"); + std::fs::canonicalize(dir) + .unwrap() + .to_string_lossy() + .to_string() + } + + async fn parse_one( + parser: DebeziumAvroParser, + columns: Vec, + payload: &[u8], + ) -> Vec<(Op, OwnedRow)> { + let mut builder = SourceStreamChunkBuilder::with_capacity(columns, 2); + { + let writer = builder.row_writer(); + parser.parse_inner(payload, writer).await.unwrap(); + } + let chunk = builder.finish(); + chunk + .rows() + .map(|(op, row_ref)| (op, row_ref.into_owned_row())) + .collect::>() + } + + fn get_outer_schema() -> Schema { + let mut outer_schema_str = String::new(); + let location = schema_dir() + "/debezium_avro_msg_schema.avsc"; + std::fs::File::open(location) + .unwrap() + .read_to_string(&mut outer_schema_str) + .unwrap(); + Schema::parse_str(&outer_schema_str).unwrap() + } + + #[test] + fn test_extract_inner_schema() { + let inner_shema_str = r#"{ + "type": "record", + "name": "Value", + "fields": [ + { + "name": "id", + "type": "int" + }, + { + "name": "first_name", + "type": "string" + }, + { + "name": "last_name", + "type": "string" + }, + { + "name": "email", + "type": "string" + } + ] +}"#; + + let outer_schema = get_outer_schema(); + let expected_inner_schema = Schema::parse_str(inner_shema_str).unwrap(); + let extracted_inner_schema = + DebeziumAvroParserConfig::extract_inner_schema(&outer_schema).unwrap(); + assert_eq!(expected_inner_schema, extracted_inner_schema); + } + + #[test] + fn test_map_to_columns() { + let outer_schema = get_outer_schema(); + let inner_schema = DebeziumAvroParserConfig::extract_inner_schema(&outer_schema).unwrap(); + let columns = DebeziumAvroParserConfig::map_to_columns_inner(&inner_schema) + .unwrap() + .into_iter() + .map(CatColumnDesc::from) + .collect_vec(); + + assert_eq!(columns.len(), 4); + assert_eq!( + CatColumnDesc { + data_type: DataType::Int32, + column_id: 1.into(), + name: "id".to_owned(), + field_descs: Vec::new(), + type_name: "".to_owned() + }, + columns[0] + ); + + assert_eq!( + CatColumnDesc { + data_type: DataType::Varchar, + column_id: 2.into(), + name: "first_name".to_owned(), + field_descs: Vec::new(), + type_name: "".to_owned() + }, + columns[1] + ); + + assert_eq!( + CatColumnDesc { + data_type: DataType::Varchar, + column_id: 3.into(), + name: "last_name".to_owned(), + field_descs: Vec::new(), + type_name: "".to_owned() + }, + columns[2] + ); + + assert_eq!( + CatColumnDesc { + data_type: DataType::Varchar, + column_id: 4.into(), + name: "email".to_owned(), + field_descs: Vec::new(), + type_name: "".to_owned() + }, + columns[3] + ); + } + + #[ignore] + #[tokio::test] + async fn test_debezium_avro_parser() -> Result<()> { + let props = convert_args!(hashmap!( + "kafka.topic" => "dbserver1.inventory.customers" + )); + let config = DebeziumAvroParserConfig::new(&props, "http://127.0.0.1:8081").await?; + let columns = config + .map_to_columns()? + .into_iter() + .map(CatColumnDesc::from) + .map(|c| SourceColumnDesc::from(&c)) + .collect_vec(); + + let parser = DebeziumAvroParser::new(columns.clone(), config)?; + let [(op, row)]: [_; 1] = parse_one(parser, columns, DEBEZIUM_AVRO_DATA) + .await + .try_into() + .unwrap(); + assert_eq!(op, Op::Insert); + assert_eq!(row[0], Some(ScalarImpl::Int32(1001))); + assert_eq!(row[1], Some(ScalarImpl::Utf8("Sally".into()))); + assert_eq!(row[2], Some(ScalarImpl::Utf8("Thomas".into()))); + assert_eq!( + row[3], + Some(ScalarImpl::Utf8("sally.thomas@acme.com".into())) + ); + Ok(()) + } +} diff --git a/src/connector/src/parser/debezium/mod.rs b/src/connector/src/parser/debezium/mod.rs index cb2866dc9ced7..39eabe3875784 100644 --- a/src/connector/src/parser/debezium/mod.rs +++ b/src/connector/src/parser/debezium/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,197 +14,7 @@ pub use simd_json_parser::*; +mod avro_parser; mod operators; mod simd_json_parser; - -#[cfg(test)] -mod test { - - use std::convert::TryInto; - - use risingwave_common::array::Op; - use risingwave_common::catalog::ColumnId; - use risingwave_common::row::{OwnedRow, Row}; - use risingwave_common::types::{DataType, ScalarImpl}; - - use super::*; - use crate::parser::{SourceColumnDesc, SourceParser, SourceStreamChunkBuilder}; - - fn get_test_columns() -> Vec { - let descs = vec![ - SourceColumnDesc { - name: "id".to_string(), - data_type: DataType::Int32, - column_id: ColumnId::from(0), - is_row_id: false, - is_meta: false, - fields: vec![], - }, - SourceColumnDesc { - name: "name".to_string(), - data_type: DataType::Varchar, - column_id: ColumnId::from(1), - is_row_id: false, - is_meta: false, - fields: vec![], - }, - SourceColumnDesc { - name: "description".to_string(), - data_type: DataType::Varchar, - column_id: ColumnId::from(2), - is_row_id: false, - is_meta: false, - fields: vec![], - }, - SourceColumnDesc { - name: "weight".to_string(), - data_type: DataType::Float64, - column_id: ColumnId::from(3), - is_row_id: false, - is_meta: false, - fields: vec![], - }, - ]; - - descs - } - - async fn parse_one( - parser: impl SourceParser, - columns: Vec, - payload: &[u8], - ) -> Vec<(Op, OwnedRow)> { - let mut builder = SourceStreamChunkBuilder::with_capacity(columns, 2); - { - let writer = builder.row_writer(); - parser.parse(payload, writer).await.unwrap(); - } - let chunk = builder.finish(); - chunk - .rows() - .map(|(op, row_ref)| (op, row_ref.into_owned_row())) - .collect::>() - } - - #[tokio::test] - async fn test_debezium_json_parser_read() { - // "before": null, - // "after": { - // "id": 101, - // "name": "scooter", - // "description": "Small 2-wheel scooter", - // "weight": 1.234 - // }, - let data = br#"{"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"before"},{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":true,"field":"table"},{"type":"int64","optional":false,"field":"server_id"},{"type":"string","optional":true,"field":"gtid"},{"type":"string","optional":false,"field":"file"},{"type":"int64","optional":false,"field":"pos"},{"type":"int32","optional":false,"field":"row"},{"type":"int64","optional":true,"field":"thread"},{"type":"string","optional":true,"field":"query"}],"optional":false,"name":"io.debezium.connector.mysql.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"field":"transaction"}],"optional":false,"name":"dbserver1.inventory.products.Envelope"},"payload":{"before":null,"after":{"id":101,"name":"scooter","description":"Small 2-wheel scooter","weight":1.234},"source":{"version":"1.7.1.Final","connector":"mysql","name":"dbserver1","ts_ms":1639547113601,"snapshot":"true","db":"inventory","sequence":null,"table":"products","server_id":0,"gtid":null,"file":"mysql-bin.000003","pos":156,"row":0,"thread":null,"query":null},"op":"r","ts_ms":1639547113602,"transaction":null}}"#; - let parser = DebeziumJsonParser; - let columns = get_test_columns(); - - let [(_op, row)]: [_; 1] = parse_one(parser, columns, data).await.try_into().unwrap(); - - assert!(row[0].eq(&Some(ScalarImpl::Int32(101)))); - assert!(row[1].eq(&Some(ScalarImpl::Utf8("scooter".into())))); - assert!(row[2].eq(&Some(ScalarImpl::Utf8("Small 2-wheel scooter".into())))); - assert!(row[3].eq(&Some(ScalarImpl::Float64(1.234.into())))); - } - - #[tokio::test] - async fn test_debezium_json_parser_insert() { - // "before": null, - // "after": { - // "id": 102, - // "name": "car battery", - // "description": "12V car battery", - // "weight": 8.1 - // }, - let data = br#"{"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"before"},{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":true,"field":"table"},{"type":"int64","optional":false,"field":"server_id"},{"type":"string","optional":true,"field":"gtid"},{"type":"string","optional":false,"field":"file"},{"type":"int64","optional":false,"field":"pos"},{"type":"int32","optional":false,"field":"row"},{"type":"int64","optional":true,"field":"thread"},{"type":"string","optional":true,"field":"query"}],"optional":false,"name":"io.debezium.connector.mysql.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"field":"transaction"}],"optional":false,"name":"dbserver1.inventory.products.Envelope"},"payload":{"before":null,"after":{"id":102,"name":"car battery","description":"12V car battery","weight":8.1},"source":{"version":"1.7.1.Final","connector":"mysql","name":"dbserver1","ts_ms":1639551564000,"snapshot":"false","db":"inventory","sequence":null,"table":"products","server_id":223344,"gtid":null,"file":"mysql-bin.000003","pos":717,"row":0,"thread":null,"query":null},"op":"c","ts_ms":1639551564960,"transaction":null}}"#; - let parser = DebeziumJsonParser; - let columns = get_test_columns(); - let [(op, row)]: [_; 1] = parse_one(parser, columns, data).await.try_into().unwrap(); - assert_eq!(op, Op::Insert); - - assert!(row[0].eq(&Some(ScalarImpl::Int32(102)))); - assert!(row[1].eq(&Some(ScalarImpl::Utf8("car battery".into())))); - assert!(row[2].eq(&Some(ScalarImpl::Utf8("12V car battery".into())))); - assert!(row[3].eq(&Some(ScalarImpl::Float64(8.1.into())))); - } - - #[tokio::test] - async fn test_debezium_json_parser_delete() { - // "before": { - // "id": 101, - // "name": "scooter", - // "description": "Small 2-wheel scooter", - // "weight": 1.234 - // }, - // "after": null, - let data = br#"{"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"before"},{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":true,"field":"table"},{"type":"int64","optional":false,"field":"server_id"},{"type":"string","optional":true,"field":"gtid"},{"type":"string","optional":false,"field":"file"},{"type":"int64","optional":false,"field":"pos"},{"type":"int32","optional":false,"field":"row"},{"type":"int64","optional":true,"field":"thread"},{"type":"string","optional":true,"field":"query"}],"optional":false,"name":"io.debezium.connector.mysql.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"field":"transaction"}],"optional":false,"name":"dbserver1.inventory.products.Envelope"},"payload":{"before":{"id":101,"name":"scooter","description":"Small 2-wheel scooter","weight":1.234},"after":null,"source":{"version":"1.7.1.Final","connector":"mysql","name":"dbserver1","ts_ms":1639551767000,"snapshot":"false","db":"inventory","sequence":null,"table":"products","server_id":223344,"gtid":null,"file":"mysql-bin.000003","pos":1045,"row":0,"thread":null,"query":null},"op":"d","ts_ms":1639551767775,"transaction":null}}"#; - let parser = DebeziumJsonParser {}; - let columns = get_test_columns(); - let [(op, row)]: [_; 1] = parse_one(parser, columns, data).await.try_into().unwrap(); - - assert_eq!(op, Op::Delete); - - assert!(row[0].eq(&Some(ScalarImpl::Int32(101)))); - assert!(row[1].eq(&Some(ScalarImpl::Utf8("scooter".into())))); - assert!(row[2].eq(&Some(ScalarImpl::Utf8("Small 2-wheel scooter".into())))); - assert!(row[3].eq(&Some(ScalarImpl::Float64(1.234.into())))); - } - - #[tokio::test] - async fn test_debezium_json_parser_update() { - // "before": { - // "id": 102, - // "name": "car battery", - // "description": "12V car battery", - // "weight": 8.1 - // }, - // "after": { - // "id": 102, - // "name": "car battery", - // "description": "24V car battery", - // "weight": 9.1 - // }, - let data = br#"{"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"before"},{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":true,"field":"table"},{"type":"int64","optional":false,"field":"server_id"},{"type":"string","optional":true,"field":"gtid"},{"type":"string","optional":false,"field":"file"},{"type":"int64","optional":false,"field":"pos"},{"type":"int32","optional":false,"field":"row"},{"type":"int64","optional":true,"field":"thread"},{"type":"string","optional":true,"field":"query"}],"optional":false,"name":"io.debezium.connector.mysql.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"field":"transaction"}],"optional":false,"name":"dbserver1.inventory.products.Envelope"},"payload":{"before":{"id":102,"name":"car battery","description":"12V car battery","weight":8.1},"after":{"id":102,"name":"car battery","description":"24V car battery","weight":9.1},"source":{"version":"1.7.1.Final","connector":"mysql","name":"dbserver1","ts_ms":1639551901000,"snapshot":"false","db":"inventory","sequence":null,"table":"products","server_id":223344,"gtid":null,"file":"mysql-bin.000003","pos":1382,"row":0,"thread":null,"query":null},"op":"u","ts_ms":1639551901165,"transaction":null}}"#; - let parser = DebeziumJsonParser {}; - let columns = get_test_columns(); - - let [(op1, row1), (op2, row2)]: [_; 2] = - parse_one(parser, columns, data).await.try_into().unwrap(); - - assert_eq!(op1, Op::UpdateDelete); - assert_eq!(op2, Op::UpdateInsert); - - assert!(row1[0].eq(&Some(ScalarImpl::Int32(102)))); - assert!(row1[1].eq(&Some(ScalarImpl::Utf8("car battery".into())))); - assert!(row1[2].eq(&Some(ScalarImpl::Utf8("12V car battery".into())))); - assert!(row1[3].eq(&Some(ScalarImpl::Float64(8.1.into())))); - - assert!(row2[0].eq(&Some(ScalarImpl::Int32(102)))); - assert!(row2[1].eq(&Some(ScalarImpl::Utf8("car battery".into())))); - assert!(row2[2].eq(&Some(ScalarImpl::Utf8("24V car battery".into())))); - assert!(row2[3].eq(&Some(ScalarImpl::Float64(9.1.into())))); - } - - #[tokio::test] - async fn test_update_with_before_null() { - // the test case it identical with test_debezium_json_parser_insert but op is 'u' - // "before": null, - // "after": { - // "id": 102, - // "name": "car battery", - // "description": "12V car battery", - // "weight": 8.1 - // }, - let data = br#"{"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"before"},{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":true,"field":"table"},{"type":"int64","optional":false,"field":"server_id"},{"type":"string","optional":true,"field":"gtid"},{"type":"string","optional":false,"field":"file"},{"type":"int64","optional":false,"field":"pos"},{"type":"int32","optional":false,"field":"row"},{"type":"int64","optional":true,"field":"thread"},{"type":"string","optional":true,"field":"query"}],"optional":false,"name":"io.debezium.connector.mysql.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"field":"transaction"}],"optional":false,"name":"dbserver1.inventory.products.Envelope"},"payload":{"before":null,"after":{"id":102,"name":"car battery","description":"12V car battery","weight":8.1},"source":{"version":"1.7.1.Final","connector":"mysql","name":"dbserver1","ts_ms":1639551564000,"snapshot":"false","db":"inventory","sequence":null,"table":"products","server_id":223344,"gtid":null,"file":"mysql-bin.000003","pos":717,"row":0,"thread":null,"query":null},"op":"u","ts_ms":1639551564960,"transaction":null}}"#; - let parser = DebeziumJsonParser; - let columns = get_test_columns(); - - let mut builder = SourceStreamChunkBuilder::with_capacity(columns, 2); - let writer = builder.row_writer(); - if let Err(e) = parser.parse(data, writer).await { - println!("{:?}", e.to_string()); - } else { - panic!("the test case is expected to be failed"); - } - } -} +pub use avro_parser::*; diff --git a/src/connector/src/parser/debezium/operators.rs b/src/connector/src/parser/debezium/operators.rs index ec447ee10f102..ad99dbf0e0e6a 100644 --- a/src/connector/src/parser/debezium/operators.rs +++ b/src/connector/src/parser/debezium/operators.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/parser/debezium/simd_json_parser.rs b/src/connector/src/parser/debezium/simd_json_parser.rs index 136732e09559c..d0891697e4096 100644 --- a/src/connector/src/parser/debezium/simd_json_parser.rs +++ b/src/connector/src/parser/debezium/simd_json_parser.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,14 +14,16 @@ use std::fmt::Debug; -use futures::future::ready; +use futures_async_stream::try_stream; use risingwave_common::error::ErrorCode::ProtocolError; use risingwave_common::error::{Result, RwError}; use simd_json::{BorrowedValue, StaticNode, ValueAccess}; use super::operators::*; +use crate::impl_common_parser_logic; use crate::parser::common::simd_json_parse_value; -use crate::parser::{ParseFuture, SourceParser, SourceStreamChunkRowWriter, WriteGuard}; +use crate::parser::{SourceStreamChunkRowWriter, WriteGuard}; +use crate::source::SourceColumnDesc; const BEFORE: &str = "before"; const AFTER: &str = "after"; @@ -36,11 +38,20 @@ fn ensure_not_null<'a, 'b: 'a>(value: &'a BorrowedValue<'b>) -> Option<&'a Borro } } +impl_common_parser_logic!(DebeziumJsonParser); + #[derive(Debug)] -pub struct DebeziumJsonParser; +pub struct DebeziumJsonParser { + pub(crate) rw_columns: Vec, +} impl DebeziumJsonParser { - fn parse_inner( + pub fn new(rw_columns: Vec) -> Result { + Ok(Self { rw_columns }) + } + + #[allow(clippy::unused_async)] + pub async fn parse_inner( &self, payload: &[u8], mut writer: SourceStreamChunkRowWriter<'_>, @@ -136,18 +147,200 @@ impl DebeziumJsonParser { } } -impl SourceParser for DebeziumJsonParser { - type ParseResult<'a> = impl ParseFuture<'a, Result>; - - fn parse<'a, 'b, 'c>( - &'a self, - payload: &'b [u8], - writer: SourceStreamChunkRowWriter<'c>, - ) -> Self::ParseResult<'a> - where - 'b: 'a, - 'c: 'a, - { - ready(self.parse_inner(payload, writer)) +#[cfg(test)] +mod tests { + + use std::convert::TryInto; + + use risingwave_common::array::Op; + use risingwave_common::catalog::ColumnId; + use risingwave_common::row::{OwnedRow, Row}; + use risingwave_common::types::{DataType, ScalarImpl}; + + use super::*; + use crate::parser::{SourceColumnDesc, SourceStreamChunkBuilder}; + + fn get_test_columns() -> Vec { + let descs = vec![ + SourceColumnDesc { + name: "id".to_string(), + data_type: DataType::Int32, + column_id: ColumnId::from(0), + is_row_id: false, + is_meta: false, + fields: vec![], + }, + SourceColumnDesc { + name: "name".to_string(), + data_type: DataType::Varchar, + column_id: ColumnId::from(1), + is_row_id: false, + is_meta: false, + fields: vec![], + }, + SourceColumnDesc { + name: "description".to_string(), + data_type: DataType::Varchar, + column_id: ColumnId::from(2), + is_row_id: false, + is_meta: false, + fields: vec![], + }, + SourceColumnDesc { + name: "weight".to_string(), + data_type: DataType::Float64, + column_id: ColumnId::from(3), + is_row_id: false, + is_meta: false, + fields: vec![], + }, + ]; + + descs + } + + async fn parse_one( + parser: DebeziumJsonParser, + columns: Vec, + payload: &[u8], + ) -> Vec<(Op, OwnedRow)> { + let mut builder = SourceStreamChunkBuilder::with_capacity(columns, 2); + { + let writer = builder.row_writer(); + parser.parse_inner(payload, writer).await.unwrap(); + } + let chunk = builder.finish(); + chunk + .rows() + .map(|(op, row_ref)| (op, row_ref.into_owned_row())) + .collect::>() + } + + #[tokio::test] + async fn test_debezium_json_parser_read() { + // "before": null, + // "after": { + // "id": 101, + // "name": "scooter", + // "description": "Small 2-wheel scooter", + // "weight": 1.234 + // }, + let data = br#"{"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"before"},{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":true,"field":"table"},{"type":"int64","optional":false,"field":"server_id"},{"type":"string","optional":true,"field":"gtid"},{"type":"string","optional":false,"field":"file"},{"type":"int64","optional":false,"field":"pos"},{"type":"int32","optional":false,"field":"row"},{"type":"int64","optional":true,"field":"thread"},{"type":"string","optional":true,"field":"query"}],"optional":false,"name":"io.debezium.connector.mysql.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"field":"transaction"}],"optional":false,"name":"dbserver1.inventory.products.Envelope"},"payload":{"before":null,"after":{"id":101,"name":"scooter","description":"Small 2-wheel scooter","weight":1.234},"source":{"version":"1.7.1.Final","connector":"mysql","name":"dbserver1","ts_ms":1639547113601,"snapshot":"true","db":"inventory","sequence":null,"table":"products","server_id":0,"gtid":null,"file":"mysql-bin.000003","pos":156,"row":0,"thread":null,"query":null},"op":"r","ts_ms":1639547113602,"transaction":null}}"#; + + let columns = get_test_columns(); + + let parser = DebeziumJsonParser::new(columns.clone()).unwrap(); + + let [(_op, row)]: [_; 1] = parse_one(parser, columns, data).await.try_into().unwrap(); + + assert!(row[0].eq(&Some(ScalarImpl::Int32(101)))); + assert!(row[1].eq(&Some(ScalarImpl::Utf8("scooter".into())))); + assert!(row[2].eq(&Some(ScalarImpl::Utf8("Small 2-wheel scooter".into())))); + assert!(row[3].eq(&Some(ScalarImpl::Float64(1.234.into())))); + } + + #[tokio::test] + async fn test_debezium_json_parser_insert() { + // "before": null, + // "after": { + // "id": 102, + // "name": "car battery", + // "description": "12V car battery", + // "weight": 8.1 + // }, + let data = br#"{"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"before"},{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":true,"field":"table"},{"type":"int64","optional":false,"field":"server_id"},{"type":"string","optional":true,"field":"gtid"},{"type":"string","optional":false,"field":"file"},{"type":"int64","optional":false,"field":"pos"},{"type":"int32","optional":false,"field":"row"},{"type":"int64","optional":true,"field":"thread"},{"type":"string","optional":true,"field":"query"}],"optional":false,"name":"io.debezium.connector.mysql.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"field":"transaction"}],"optional":false,"name":"dbserver1.inventory.products.Envelope"},"payload":{"before":null,"after":{"id":102,"name":"car battery","description":"12V car battery","weight":8.1},"source":{"version":"1.7.1.Final","connector":"mysql","name":"dbserver1","ts_ms":1639551564000,"snapshot":"false","db":"inventory","sequence":null,"table":"products","server_id":223344,"gtid":null,"file":"mysql-bin.000003","pos":717,"row":0,"thread":null,"query":null},"op":"c","ts_ms":1639551564960,"transaction":null}}"#; + + let columns = get_test_columns(); + let parser = DebeziumJsonParser::new(columns.clone()).unwrap(); + let [(op, row)]: [_; 1] = parse_one(parser, columns, data).await.try_into().unwrap(); + assert_eq!(op, Op::Insert); + + assert!(row[0].eq(&Some(ScalarImpl::Int32(102)))); + assert!(row[1].eq(&Some(ScalarImpl::Utf8("car battery".into())))); + assert!(row[2].eq(&Some(ScalarImpl::Utf8("12V car battery".into())))); + assert!(row[3].eq(&Some(ScalarImpl::Float64(8.1.into())))); + } + + #[tokio::test] + async fn test_debezium_json_parser_delete() { + // "before": { + // "id": 101, + // "name": "scooter", + // "description": "Small 2-wheel scooter", + // "weight": 1.234 + // }, + // "after": null, + let data = br#"{"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"before"},{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":true,"field":"table"},{"type":"int64","optional":false,"field":"server_id"},{"type":"string","optional":true,"field":"gtid"},{"type":"string","optional":false,"field":"file"},{"type":"int64","optional":false,"field":"pos"},{"type":"int32","optional":false,"field":"row"},{"type":"int64","optional":true,"field":"thread"},{"type":"string","optional":true,"field":"query"}],"optional":false,"name":"io.debezium.connector.mysql.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"field":"transaction"}],"optional":false,"name":"dbserver1.inventory.products.Envelope"},"payload":{"before":{"id":101,"name":"scooter","description":"Small 2-wheel scooter","weight":1.234},"after":null,"source":{"version":"1.7.1.Final","connector":"mysql","name":"dbserver1","ts_ms":1639551767000,"snapshot":"false","db":"inventory","sequence":null,"table":"products","server_id":223344,"gtid":null,"file":"mysql-bin.000003","pos":1045,"row":0,"thread":null,"query":null},"op":"d","ts_ms":1639551767775,"transaction":null}}"#; + + let columns = get_test_columns(); + let parser = DebeziumJsonParser::new(columns.clone()).unwrap(); + let [(op, row)]: [_; 1] = parse_one(parser, columns, data).await.try_into().unwrap(); + + assert_eq!(op, Op::Delete); + + assert!(row[0].eq(&Some(ScalarImpl::Int32(101)))); + assert!(row[1].eq(&Some(ScalarImpl::Utf8("scooter".into())))); + assert!(row[2].eq(&Some(ScalarImpl::Utf8("Small 2-wheel scooter".into())))); + assert!(row[3].eq(&Some(ScalarImpl::Float64(1.234.into())))); + } + + #[tokio::test] + async fn test_debezium_json_parser_update() { + // "before": { + // "id": 102, + // "name": "car battery", + // "description": "12V car battery", + // "weight": 8.1 + // }, + // "after": { + // "id": 102, + // "name": "car battery", + // "description": "24V car battery", + // "weight": 9.1 + // }, + let data = br#"{"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"before"},{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":true,"field":"table"},{"type":"int64","optional":false,"field":"server_id"},{"type":"string","optional":true,"field":"gtid"},{"type":"string","optional":false,"field":"file"},{"type":"int64","optional":false,"field":"pos"},{"type":"int32","optional":false,"field":"row"},{"type":"int64","optional":true,"field":"thread"},{"type":"string","optional":true,"field":"query"}],"optional":false,"name":"io.debezium.connector.mysql.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"field":"transaction"}],"optional":false,"name":"dbserver1.inventory.products.Envelope"},"payload":{"before":{"id":102,"name":"car battery","description":"12V car battery","weight":8.1},"after":{"id":102,"name":"car battery","description":"24V car battery","weight":9.1},"source":{"version":"1.7.1.Final","connector":"mysql","name":"dbserver1","ts_ms":1639551901000,"snapshot":"false","db":"inventory","sequence":null,"table":"products","server_id":223344,"gtid":null,"file":"mysql-bin.000003","pos":1382,"row":0,"thread":null,"query":null},"op":"u","ts_ms":1639551901165,"transaction":null}}"#; + + let columns = get_test_columns(); + + let parser = DebeziumJsonParser::new(columns.clone()).unwrap(); + let [(op1, row1), (op2, row2)]: [_; 2] = + parse_one(parser, columns, data).await.try_into().unwrap(); + + assert_eq!(op1, Op::UpdateDelete); + assert_eq!(op2, Op::UpdateInsert); + + assert!(row1[0].eq(&Some(ScalarImpl::Int32(102)))); + assert!(row1[1].eq(&Some(ScalarImpl::Utf8("car battery".into())))); + assert!(row1[2].eq(&Some(ScalarImpl::Utf8("12V car battery".into())))); + assert!(row1[3].eq(&Some(ScalarImpl::Float64(8.1.into())))); + + assert!(row2[0].eq(&Some(ScalarImpl::Int32(102)))); + assert!(row2[1].eq(&Some(ScalarImpl::Utf8("car battery".into())))); + assert!(row2[2].eq(&Some(ScalarImpl::Utf8("24V car battery".into())))); + assert!(row2[3].eq(&Some(ScalarImpl::Float64(9.1.into())))); + } + + #[tokio::test] + async fn test_update_with_before_null() { + // the test case it identical with test_debezium_json_parser_insert but op is 'u' + // "before": null, + // "after": { + // "id": 102, + // "name": "car battery", + // "description": "12V car battery", + // "weight": 8.1 + // }, + let data = br#"{"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"before"},{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"name"},{"type":"string","optional":true,"field":"description"},{"type":"double","optional":true,"field":"weight"}],"optional":true,"name":"dbserver1.inventory.products.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":true,"field":"table"},{"type":"int64","optional":false,"field":"server_id"},{"type":"string","optional":true,"field":"gtid"},{"type":"string","optional":false,"field":"file"},{"type":"int64","optional":false,"field":"pos"},{"type":"int32","optional":false,"field":"row"},{"type":"int64","optional":true,"field":"thread"},{"type":"string","optional":true,"field":"query"}],"optional":false,"name":"io.debezium.connector.mysql.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"field":"transaction"}],"optional":false,"name":"dbserver1.inventory.products.Envelope"},"payload":{"before":null,"after":{"id":102,"name":"car battery","description":"12V car battery","weight":8.1},"source":{"version":"1.7.1.Final","connector":"mysql","name":"dbserver1","ts_ms":1639551564000,"snapshot":"false","db":"inventory","sequence":null,"table":"products","server_id":223344,"gtid":null,"file":"mysql-bin.000003","pos":717,"row":0,"thread":null,"query":null},"op":"u","ts_ms":1639551564960,"transaction":null}}"#; + + let columns = get_test_columns(); + let parser = DebeziumJsonParser::new(columns.clone()).unwrap(); + + let mut builder = SourceStreamChunkBuilder::with_capacity(columns, 2); + let writer = builder.row_writer(); + if let Err(e) = parser.parse_inner(data, writer).await { + println!("{:?}", e.to_string()); + } else { + panic!("the test case is expected to be failed"); + } } } diff --git a/src/connector/src/parser/json_parser.rs b/src/connector/src/parser/json_parser.rs index 97c8182a29530..3c20bf62acaf3 100644 --- a/src/connector/src/parser/json_parser.rs +++ b/src/connector/src/parser/json_parser.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,26 +12,35 @@ // See the License for the specific language governing permissions and // limitations under the License. -use futures::future::ready; +use futures_async_stream::try_stream; use risingwave_common::error::ErrorCode::ProtocolError; use risingwave_common::error::{Result, RwError}; +use simd_json::{BorrowedValue, ValueAccess}; -use crate::parser::{ParseFuture, SourceParser, SourceStreamChunkRowWriter, WriteGuard}; +use crate::impl_common_parser_logic; +use crate::parser::common::simd_json_parse_value; +use crate::parser::{SourceStreamChunkRowWriter, WriteGuard}; +use crate::source::SourceColumnDesc; + +impl_common_parser_logic!(JsonParser); /// Parser for JSON format #[derive(Debug)] -pub struct JsonParser; +pub struct JsonParser { + rw_columns: Vec, +} impl JsonParser { - fn parse_inner( + pub fn new(rw_columns: Vec) -> Result { + Ok(Self { rw_columns }) + } + + #[allow(clippy::unused_async)] + pub async fn parse_inner( &self, payload: &[u8], mut writer: SourceStreamChunkRowWriter<'_>, ) -> Result { - use simd_json::{BorrowedValue, ValueAccess}; - - use crate::parser::common::simd_json_parse_value; - let mut payload_mut = payload.to_vec(); let value: BorrowedValue<'_> = simd_json::to_borrowed_value(&mut payload_mut) @@ -54,22 +63,6 @@ impl JsonParser { } } -impl SourceParser for JsonParser { - type ParseResult<'a> = impl ParseFuture<'a, Result>; - - fn parse<'a, 'b, 'c>( - &'a self, - payload: &'b [u8], - writer: SourceStreamChunkRowWriter<'c>, - ) -> Self::ParseResult<'a> - where - 'b: 'a, - 'c: 'a, - { - ready(self.parse_inner(payload, writer)) - } -} - #[cfg(test)] mod tests { use std::str::FromStr; @@ -82,11 +75,10 @@ mod tests { use risingwave_common::types::{DataType, Decimal, ScalarImpl, ToOwnedDatum}; use risingwave_expr::vector_op::cast::{str_to_date, str_to_timestamp}; - use crate::parser::{JsonParser, SourceColumnDesc, SourceParser, SourceStreamChunkBuilder}; + use crate::parser::{JsonParser, SourceColumnDesc, SourceStreamChunkBuilder}; #[tokio::test] async fn test_json_parser() { - let parser = JsonParser; let descs = vec![ SourceColumnDesc::simple("i32", DataType::Int32, 0.into()), SourceColumnDesc::simple("bool", DataType::Boolean, 2.into()), @@ -100,6 +92,8 @@ mod tests { SourceColumnDesc::simple("decimal", DataType::Decimal, 10.into()), ]; + let parser = JsonParser::new(descs.clone()).unwrap(); + let mut builder = SourceStreamChunkBuilder::with_capacity(descs, 2); for payload in [ @@ -107,7 +101,7 @@ mod tests { br#"{"i32":1,"f32":12345e+10,"f64":12345,"decimal":12345}"#.as_slice(), ] { let writer = builder.row_writer(); - parser.parse(payload, writer).await.unwrap(); + parser.parse_inner(payload, writer).await.unwrap(); } let chunk = builder.finish(); @@ -185,19 +179,19 @@ mod tests { #[tokio::test] async fn test_json_parser_failed() { - let parser = JsonParser; let descs = vec![ SourceColumnDesc::simple("v1", DataType::Int32, 0.into()), SourceColumnDesc::simple("v2", DataType::Int16, 1.into()), SourceColumnDesc::simple("v3", DataType::Varchar, 2.into()), ]; + let parser = JsonParser::new(descs.clone()).unwrap(); let mut builder = SourceStreamChunkBuilder::with_capacity(descs, 3); // Parse a correct record. { let writer = builder.row_writer(); let payload = br#"{"v1": 1, "v2": 2, "v3": "3"}"#; - parser.parse(payload, writer).await.unwrap(); + parser.parse_inner(payload, writer).await.unwrap(); } // Parse an incorrect record. @@ -205,14 +199,14 @@ mod tests { let writer = builder.row_writer(); // `v2` overflowed. let payload = br#"{"v1": 1, "v2": 65536, "v3": "3"}"#; - parser.parse(payload, writer).await.unwrap_err(); + parser.parse_inner(payload, writer).await.unwrap_err(); } // Parse a correct record. { let writer = builder.row_writer(); let payload = br#"{"v1": 1, "v2": 2, "v3": "3"}"#; - parser.parse(payload, writer).await.unwrap(); + parser.parse_inner(payload, writer).await.unwrap(); } let chunk = builder.finish(); @@ -223,8 +217,6 @@ mod tests { #[tokio::test] async fn test_json_parse_struct() { - let parser = JsonParser; - let descs = vec![ ColumnDesc::new_struct( "data", @@ -252,6 +244,8 @@ mod tests { .iter() .map(SourceColumnDesc::from) .collect_vec(); + + let parser = JsonParser::new(descs.clone()).unwrap(); let payload = br#" { "data": { @@ -271,7 +265,7 @@ mod tests { let mut builder = SourceStreamChunkBuilder::with_capacity(descs, 1); { let writer = builder.row_writer(); - parser.parse(payload, writer).await.unwrap(); + parser.parse_inner(payload, writer).await.unwrap(); } let chunk = builder.finish(); let (op, row) = chunk.rows().next().unwrap(); diff --git a/src/connector/src/parser/macros.rs b/src/connector/src/parser/macros.rs index 4c530f9dfccad..10058f0bd8aa0 100644 --- a/src/connector/src/parser/macros.rs +++ b/src/connector/src/parser/macros.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/parser/maxwell/mod.rs b/src/connector/src/parser/maxwell/mod.rs index d9af4223ef436..f21a7dd647ac7 100644 --- a/src/connector/src/parser/maxwell/mod.rs +++ b/src/connector/src/parser/maxwell/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,120 +16,3 @@ mod operators; mod simd_json_parser; pub use simd_json_parser::*; - -#[cfg(test)] -mod test { - use risingwave_common::array::Op; - use risingwave_common::row::Row; - use risingwave_common::types::{DataType, ScalarImpl, ToOwnedDatum}; - use risingwave_expr::vector_op::cast::str_to_timestamp; - - use super::*; - use crate::parser::{SourceColumnDesc, SourceParser, SourceStreamChunkBuilder}; - #[tokio::test] - async fn test_json_parser() { - let parser = MaxwellParser; - let descs = vec![ - SourceColumnDesc::simple("ID", DataType::Int32, 0.into()), - SourceColumnDesc::simple("NAME", DataType::Varchar, 1.into()), - SourceColumnDesc::simple("is_adult", DataType::Int16, 2.into()), - SourceColumnDesc::simple("birthday", DataType::Timestamp, 3.into()), - ]; - - let mut builder = SourceStreamChunkBuilder::with_capacity(descs, 4); - let payloads = vec![ - br#"{"database":"test","table":"t","type":"insert","ts":1666937996,"xid":1171,"commit":true,"data":{"id":1,"name":"tom","is_adult":0,"birthday":"2017-12-31 16:00:01"}}"#.as_slice(), - br#"{"database":"test","table":"t","type":"insert","ts":1666938023,"xid":1254,"commit":true,"data":{"id":2,"name":"alex","is_adult":1,"birthday":"1999-12-31 16:00:01"}}"#.as_slice(), - br#"{"database":"test","table":"t","type":"update","ts":1666938068,"xid":1373,"commit":true,"data":{"id":2,"name":"chi","is_adult":1,"birthday":"1999-12-31 16:00:01"},"old":{"name":"alex"}}"#.as_slice() - ]; - - for payload in payloads { - let writer = builder.row_writer(); - parser.parse(payload, writer).await.unwrap(); - } - - let chunk = builder.finish(); - - let mut rows = chunk.rows(); - - { - let (op, row) = rows.next().unwrap(); - assert_eq!(op, Op::Insert); - assert_eq!(row.datum_at(0).to_owned_datum(), Some(ScalarImpl::Int32(1))); - assert_eq!( - row.datum_at(1).to_owned_datum(), - (Some(ScalarImpl::Utf8("tom".into()))) - ); - assert_eq!( - row.datum_at(2).to_owned_datum(), - (Some(ScalarImpl::Int16(0))) - ); - assert_eq!( - row.datum_at(3).to_owned_datum(), - (Some(ScalarImpl::NaiveDateTime( - str_to_timestamp("2017-12-31 16:00:01").unwrap() - ))) - ) - } - - { - let (op, row) = rows.next().unwrap(); - assert_eq!(op, Op::Insert); - assert_eq!(row.datum_at(0).to_owned_datum(), Some(ScalarImpl::Int32(2))); - assert_eq!( - row.datum_at(1).to_owned_datum(), - (Some(ScalarImpl::Utf8("alex".into()))) - ); - assert_eq!( - row.datum_at(2).to_owned_datum(), - (Some(ScalarImpl::Int16(1))) - ); - assert_eq!( - row.datum_at(3).to_owned_datum(), - (Some(ScalarImpl::NaiveDateTime( - str_to_timestamp("1999-12-31 16:00:01").unwrap() - ))) - ) - } - - { - let (op, row) = rows.next().unwrap(); - assert_eq!(op, Op::UpdateDelete); - assert_eq!(row.datum_at(0).to_owned_datum(), Some(ScalarImpl::Int32(2))); - assert_eq!( - row.datum_at(1).to_owned_datum(), - (Some(ScalarImpl::Utf8("alex".into()))) - ); - assert_eq!( - row.datum_at(2).to_owned_datum(), - (Some(ScalarImpl::Int16(1))) - ); - assert_eq!( - row.datum_at(3).to_owned_datum(), - (Some(ScalarImpl::NaiveDateTime( - str_to_timestamp("1999-12-31 16:00:01").unwrap() - ))) - ) - } - - { - let (op, row) = rows.next().unwrap(); - assert_eq!(op, Op::UpdateInsert); - assert_eq!(row.datum_at(0).to_owned_datum(), Some(ScalarImpl::Int32(2))); - assert_eq!( - row.datum_at(1).to_owned_datum(), - (Some(ScalarImpl::Utf8("chi".into()))) - ); - assert_eq!( - row.datum_at(2).to_owned_datum(), - (Some(ScalarImpl::Int16(1))) - ); - assert_eq!( - row.datum_at(3).to_owned_datum(), - (Some(ScalarImpl::NaiveDateTime( - str_to_timestamp("1999-12-31 16:00:01").unwrap() - ))) - ) - } - } -} diff --git a/src/connector/src/parser/maxwell/operators.rs b/src/connector/src/parser/maxwell/operators.rs index da7ee2878e0ba..f269598b19cc9 100644 --- a/src/connector/src/parser/maxwell/operators.rs +++ b/src/connector/src/parser/maxwell/operators.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/parser/maxwell/simd_json_parser.rs b/src/connector/src/parser/maxwell/simd_json_parser.rs index ca985c322c720..712b52747ab9e 100644 --- a/src/connector/src/parser/maxwell/simd_json_parser.rs +++ b/src/connector/src/parser/maxwell/simd_json_parser.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,24 +14,35 @@ use std::fmt::Debug; -use futures::future::ready; +use futures_async_stream::try_stream; use risingwave_common::error::ErrorCode::ProtocolError; use risingwave_common::error::{Result, RwError}; use simd_json::{BorrowedValue, ValueAccess}; use super::operators::*; +use crate::impl_common_parser_logic; use crate::parser::common::simd_json_parse_value; -use crate::parser::{ParseFuture, SourceParser, SourceStreamChunkRowWriter, WriteGuard}; +use crate::parser::{SourceStreamChunkRowWriter, WriteGuard}; +use crate::source::SourceColumnDesc; const AFTER: &str = "data"; const BEFORE: &str = "old"; const OP: &str = "type"; +impl_common_parser_logic!(MaxwellParser); + #[derive(Debug)] -pub struct MaxwellParser; +pub struct MaxwellParser { + pub(crate) rw_columns: Vec, +} impl MaxwellParser { - fn parse_inner( + pub fn new(rw_columns: Vec) -> Result { + Ok(Self { rw_columns }) + } + + #[allow(clippy::unused_async)] + pub async fn parse_inner( &self, payload: &[u8], mut writer: SourceStreamChunkRowWriter<'_>, @@ -105,18 +116,120 @@ impl MaxwellParser { } } -impl SourceParser for MaxwellParser { - type ParseResult<'a> = impl ParseFuture<'a, Result>; - - fn parse<'a, 'b, 'c>( - &'a self, - payload: &'b [u8], - writer: SourceStreamChunkRowWriter<'c>, - ) -> Self::ParseResult<'a> - where - 'b: 'a, - 'c: 'a, - { - ready(self.parse_inner(payload, writer)) +#[cfg(test)] +mod tests { + use risingwave_common::array::Op; + use risingwave_common::row::Row; + use risingwave_common::types::{DataType, ScalarImpl, ToOwnedDatum}; + use risingwave_expr::vector_op::cast::str_to_timestamp; + + use super::*; + use crate::parser::{SourceColumnDesc, SourceStreamChunkBuilder}; + #[tokio::test] + async fn test_json_parser() { + let descs = vec![ + SourceColumnDesc::simple("ID", DataType::Int32, 0.into()), + SourceColumnDesc::simple("NAME", DataType::Varchar, 1.into()), + SourceColumnDesc::simple("is_adult", DataType::Int16, 2.into()), + SourceColumnDesc::simple("birthday", DataType::Timestamp, 3.into()), + ]; + + let parser = MaxwellParser::new(descs.clone()).unwrap(); + + let mut builder = SourceStreamChunkBuilder::with_capacity(descs, 4); + let payloads = vec![ + br#"{"database":"test","table":"t","type":"insert","ts":1666937996,"xid":1171,"commit":true,"data":{"id":1,"name":"tom","is_adult":0,"birthday":"2017-12-31 16:00:01"}}"#.as_slice(), + br#"{"database":"test","table":"t","type":"insert","ts":1666938023,"xid":1254,"commit":true,"data":{"id":2,"name":"alex","is_adult":1,"birthday":"1999-12-31 16:00:01"}}"#.as_slice(), + br#"{"database":"test","table":"t","type":"update","ts":1666938068,"xid":1373,"commit":true,"data":{"id":2,"name":"chi","is_adult":1,"birthday":"1999-12-31 16:00:01"},"old":{"name":"alex"}}"#.as_slice() + ]; + + for payload in payloads { + let writer = builder.row_writer(); + parser.parse_inner(payload, writer).await.unwrap(); + } + + let chunk = builder.finish(); + + let mut rows = chunk.rows(); + + { + let (op, row) = rows.next().unwrap(); + assert_eq!(op, Op::Insert); + assert_eq!(row.datum_at(0).to_owned_datum(), Some(ScalarImpl::Int32(1))); + assert_eq!( + row.datum_at(1).to_owned_datum(), + (Some(ScalarImpl::Utf8("tom".into()))) + ); + assert_eq!( + row.datum_at(2).to_owned_datum(), + (Some(ScalarImpl::Int16(0))) + ); + assert_eq!( + row.datum_at(3).to_owned_datum(), + (Some(ScalarImpl::NaiveDateTime( + str_to_timestamp("2017-12-31 16:00:01").unwrap() + ))) + ) + } + + { + let (op, row) = rows.next().unwrap(); + assert_eq!(op, Op::Insert); + assert_eq!(row.datum_at(0).to_owned_datum(), Some(ScalarImpl::Int32(2))); + assert_eq!( + row.datum_at(1).to_owned_datum(), + (Some(ScalarImpl::Utf8("alex".into()))) + ); + assert_eq!( + row.datum_at(2).to_owned_datum(), + (Some(ScalarImpl::Int16(1))) + ); + assert_eq!( + row.datum_at(3).to_owned_datum(), + (Some(ScalarImpl::NaiveDateTime( + str_to_timestamp("1999-12-31 16:00:01").unwrap() + ))) + ) + } + + { + let (op, row) = rows.next().unwrap(); + assert_eq!(op, Op::UpdateDelete); + assert_eq!(row.datum_at(0).to_owned_datum(), Some(ScalarImpl::Int32(2))); + assert_eq!( + row.datum_at(1).to_owned_datum(), + (Some(ScalarImpl::Utf8("alex".into()))) + ); + assert_eq!( + row.datum_at(2).to_owned_datum(), + (Some(ScalarImpl::Int16(1))) + ); + assert_eq!( + row.datum_at(3).to_owned_datum(), + (Some(ScalarImpl::NaiveDateTime( + str_to_timestamp("1999-12-31 16:00:01").unwrap() + ))) + ) + } + + { + let (op, row) = rows.next().unwrap(); + assert_eq!(op, Op::UpdateInsert); + assert_eq!(row.datum_at(0).to_owned_datum(), Some(ScalarImpl::Int32(2))); + assert_eq!( + row.datum_at(1).to_owned_datum(), + (Some(ScalarImpl::Utf8("chi".into()))) + ); + assert_eq!( + row.datum_at(2).to_owned_datum(), + (Some(ScalarImpl::Int16(1))) + ); + assert_eq!( + row.datum_at(3).to_owned_datum(), + (Some(ScalarImpl::NaiveDateTime( + str_to_timestamp("1999-12-31 16:00:01").unwrap() + ))) + ) + } } } diff --git a/src/connector/src/parser/mod.rs b/src/connector/src/parser/mod.rs index 119be717e4842..e75b49c1f8c0c 100644 --- a/src/connector/src/parser/mod.rs +++ b/src/connector/src/parser/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,14 +14,11 @@ use std::collections::HashMap; use std::fmt::Debug; -use std::sync::Arc; pub use avro::*; pub use canal::*; use csv_parser::CsvParser; pub use debezium::*; -use enum_as_inner::EnumAsInner; -use futures::Future; use itertools::Itertools; pub use json_parser::*; pub use protobuf::*; @@ -29,12 +26,14 @@ use risingwave_common::array::{ArrayBuilderImpl, Op, StreamChunk}; use risingwave_common::error::ErrorCode::ProtocolError; use risingwave_common::error::{Result, RwError}; use risingwave_common::types::Datum; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_pb::catalog::StreamSourceInfo; pub use self::csv_parser::CsvParserConfig; use crate::parser::maxwell::MaxwellParser; -use crate::source::BoxSourceStream; -use crate::{BoxSourceWithStateStream, SourceColumnDesc, SourceFormat, StreamChunkWithState}; +use crate::source::{ + BoxSourceStream, BoxSourceWithStateStream, SourceColumnDesc, SourceFormat, StreamChunkWithState, +}; mod avro; mod canal; @@ -191,12 +190,15 @@ impl OpAction for OpActionUpdate { } impl SourceStreamChunkRowWriter<'_> { + #[expect( + clippy::disallowed_methods, + reason = "FIXME: why zip_eq_fast leads to compile error?" + )] fn do_action( &mut self, mut f: impl FnMut(&SourceColumnDesc) -> Result, ) -> Result { let mut modify_col = vec![]; - self.descs .iter() .zip_eq(self.builders.iter_mut()) @@ -253,7 +255,7 @@ impl SourceStreamChunkRowWriter<'_> { ) -> Result { self.descs .iter() - .zip_eq(self.builders.iter_mut()) + .zip_eq_fast(self.builders.iter_mut()) .for_each(|(desc, builder)| { if let Some(output) = f(desc) { builder.append_datum(output); @@ -291,179 +293,168 @@ impl SourceStreamChunkRowWriter<'_> { } } -pub trait ParseFuture<'a, Out> = Future + Send + 'a; +/// `ByteStreamSourceParser` is a new message parser, the parser should consume +/// the input data stream and return a stream of parsed msgs. +pub trait ByteStreamSourceParser: Send + Debug + 'static { + /// Parse a data stream of one source split into a stream of [`StreamChunk`]. -// TODO: use `async_fn_in_traits` to implement it -/// `SourceParser` is the message parser, `ChunkReader` will parse the messages in `SourceReader` -/// one by one through `SourceParser` and assemble them into `DataChunk` -/// Note that the `skip_parse` parameter in `SourceColumnDesc`, when it is true, should skip the -/// parse and return `Datum` of `None` -pub trait SourceParser: Send + Debug + 'static { - type ParseResult<'a>: ParseFuture<'a, Result>; - /// Parse the payload and append the result to the [`StreamChunk`] directly. - /// /// # Arguments - /// - /// - `self`: A needs to be a member method because some format like Protobuf needs to be - /// pre-compiled. - /// - writer: Write exactly one record during a `parse` call. + /// - `data_stream`: A data stream of one source split. + /// To be able to split multiple messages from mq, so it is not a pure byte stream /// /// # Returns /// - /// A [`WriteGuard`] to ensure that at least one record was appended or error occurred. - fn parse<'a, 'b, 'c>( - &'a self, - payload: &'b [u8], - writer: SourceStreamChunkRowWriter<'c>, - ) -> Self::ParseResult<'a> - where - 'b: 'a, - 'c: 'a; + /// A [`BoxSourceWithStateStream`] which is a stream of parsed msgs. + fn into_stream(self, data_stream: BoxSourceStream) -> BoxSourceWithStateStream; } #[derive(Debug)] -pub enum SourceParserImpl { +pub enum ByteStreamSourceParserImpl { + Csv(CsvParser), Json(JsonParser), Protobuf(ProtobufParser), DebeziumJson(DebeziumJsonParser), Avro(AvroParser), Maxwell(MaxwellParser), CanalJson(CanalJsonParser), + DebeziumAvro(DebeziumAvroParser), } -impl SourceParserImpl { - pub async fn parse( - &self, - payload: &[u8], - writer: SourceStreamChunkRowWriter<'_>, - ) -> Result { +impl ByteStreamSourceParserImpl { + pub fn into_stream(self, msg_stream: BoxSourceStream) -> BoxSourceWithStateStream { match self { - Self::Json(parser) => parser.parse(payload, writer).await, - Self::Protobuf(parser) => parser.parse(payload, writer).await, - Self::DebeziumJson(parser) => parser.parse(payload, writer).await, - Self::Avro(avro_parser) => avro_parser.parse(payload, writer).await, - Self::Maxwell(maxwell_parser) => maxwell_parser.parse(payload, writer).await, - Self::CanalJson(parser) => parser.parse(payload, writer).await, + Self::Csv(parser) => parser.into_stream(msg_stream), + Self::Json(parser) => parser.into_stream(msg_stream), + Self::Protobuf(parser) => parser.into_stream(msg_stream), + Self::DebeziumJson(parser) => parser.into_stream(msg_stream), + Self::Avro(parser) => parser.into_stream(msg_stream), + Self::Maxwell(parser) => parser.into_stream(msg_stream), + Self::CanalJson(parser) => parser.into_stream(msg_stream), + Self::DebeziumAvro(parser) => parser.into_stream(msg_stream), } } - pub async fn create( - format: &SourceFormat, - properties: &HashMap, - schema_location: &str, - use_schema_registry: bool, - proto_message_name: String, - ) -> Result> { - const PROTOBUF_MESSAGE_KEY: &str = "proto.message"; - const USE_SCHEMA_REGISTRY: &str = "use_schema_registry"; - let parser = match format { - SourceFormat::Json => SourceParserImpl::Json(JsonParser), - SourceFormat::Protobuf => SourceParserImpl::Protobuf( - ProtobufParser::new( - schema_location, - &proto_message_name, - use_schema_registry, - properties.clone(), - ) - .await?, - ), - SourceFormat::DebeziumJson => SourceParserImpl::DebeziumJson(DebeziumJsonParser), - SourceFormat::Avro => SourceParserImpl::Avro( - AvroParser::new(schema_location, use_schema_registry, properties.clone()).await?, - ), - SourceFormat::Maxwell => SourceParserImpl::Maxwell(MaxwellParser), - SourceFormat::CanalJson => SourceParserImpl::CanalJson(CanalJsonParser), - _ => { - return Err(RwError::from(ProtocolError( - "format not support".to_string(), - ))); + pub fn create(parser_config: ParserConfig) -> Result { + let CommonParserConfig { rw_columns } = parser_config.common; + match parser_config.specific { + SpecificParserConfig::Csv(config) => CsvParser::new(rw_columns, config).map(Self::Csv), + SpecificParserConfig::Avro(config) => { + AvroParser::new(rw_columns, config).map(Self::Avro) } - }; - Ok(Arc::new(parser)) + SpecificParserConfig::Protobuf(config) => { + ProtobufParser::new(rw_columns, config).map(Self::Protobuf) + } + SpecificParserConfig::Json => JsonParser::new(rw_columns).map(Self::Json), + SpecificParserConfig::CanalJson => { + CanalJsonParser::new(rw_columns).map(Self::CanalJson) + } + SpecificParserConfig::DebeziumJson => { + DebeziumJsonParser::new(rw_columns).map(Self::DebeziumJson) + } + SpecificParserConfig::Maxwell => MaxwellParser::new(rw_columns).map(Self::Maxwell), + SpecificParserConfig::DebeziumAvro(config) => { + DebeziumAvroParser::new(rw_columns, config).map(Self::DebeziumAvro) + } + SpecificParserConfig::Native => { + unreachable!("Native parser should not be created") + } + } } } -/// `ByteStreamSourceParser` is a new message parser, the parser should consume -/// the input data stream and return a stream of parsed msgs. -pub trait ByteStreamSourceParser: Send + Debug + 'static { - /// Parse a data stream of one source split into a stream of [`StreamChunk`]. - - /// # Arguments - /// - `data_stream`: A data stream of one source split. - /// To be able to split multiple messages from mq, so it is not a pure byte stream - /// - /// # Returns - /// - /// A [`BoxSourceWithStateStream`] which is a stream of parsed msgs. - fn into_stream(self, data_stream: BoxSourceStream) -> BoxSourceWithStateStream; -} - -#[derive(Debug)] -pub enum ByteStreamSourceParserImpl { - Csv(CsvParser), -} - -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct ParserConfig { pub common: CommonParserConfig, pub specific: SpecificParserConfig, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct CommonParserConfig { - pub props: HashMap, pub rw_columns: Vec, } -#[derive(Debug, Clone, EnumAsInner)] +#[derive(Debug, Clone, Default)] pub enum SpecificParserConfig { Csv(CsvParserConfig), + Avro(AvroParserConfig), + Protobuf(ProtobufParserConfig), + Json, + DebeziumJson, + Maxwell, + CanalJson, + #[default] + Native, + DebeziumAvro(DebeziumAvroParserConfig), } impl SpecificParserConfig { - pub fn new(format: &SourceFormat, info: &StreamSourceInfo) -> Self { - match format { + pub fn get_source_format(&self) -> SourceFormat { + match self { + SpecificParserConfig::Avro(_) => SourceFormat::Avro, + SpecificParserConfig::Csv(_) => SourceFormat::Csv, + SpecificParserConfig::Protobuf(_) => SourceFormat::Protobuf, + SpecificParserConfig::Json => SourceFormat::Json, + SpecificParserConfig::DebeziumJson => SourceFormat::DebeziumJson, + SpecificParserConfig::Maxwell => SourceFormat::Maxwell, + SpecificParserConfig::CanalJson => SourceFormat::CanalJson, + SpecificParserConfig::Native => SourceFormat::Native, + SpecificParserConfig::DebeziumAvro(_) => SourceFormat::DebeziumAvro, + } + } + + pub async fn new( + format: SourceFormat, + info: &StreamSourceInfo, + props: &HashMap, + ) -> Result { + let conf = match format { SourceFormat::Csv => SpecificParserConfig::Csv(CsvParserConfig { delimiter: info.csv_delimiter as u8, has_header: info.csv_has_header, }), - _ => unreachable!(), - } + SourceFormat::Avro => SpecificParserConfig::Avro( + AvroParserConfig::new(props, &info.row_schema_location, info.use_schema_registry) + .await?, + ), + SourceFormat::Protobuf => SpecificParserConfig::Protobuf( + ProtobufParserConfig::new( + props, + &info.row_schema_location, + &info.proto_message_name, + info.use_schema_registry, + ) + .await?, + ), + SourceFormat::Json => SpecificParserConfig::Json, + SourceFormat::DebeziumJson => SpecificParserConfig::DebeziumJson, + SourceFormat::Maxwell => SpecificParserConfig::Maxwell, + SourceFormat::CanalJson => SpecificParserConfig::CanalJson, + SourceFormat::Native => SpecificParserConfig::Native, + SourceFormat::DebeziumAvro => SpecificParserConfig::DebeziumAvro( + DebeziumAvroParserConfig::new(props, &info.row_schema_location).await?, + ), + _ => { + return Err(RwError::from(ProtocolError( + "invalid source format".to_string(), + ))); + } + }; + Ok(conf) } } impl ParserConfig { - pub fn new( - format: &SourceFormat, + pub async fn new( + format: SourceFormat, info: &StreamSourceInfo, props: &HashMap, rw_columns: &Vec, - ) -> Self { + ) -> Result { let common = CommonParserConfig { - props: props.clone(), rw_columns: rw_columns.to_owned(), }; - let specific = SpecificParserConfig::new(format, info); - - Self { common, specific } - } -} + let specific = SpecificParserConfig::new(format, info, props).await?; -impl ByteStreamSourceParserImpl { - pub fn into_stream(self, msg_stream: BoxSourceStream) -> BoxSourceWithStateStream { - match self { - Self::Csv(parser) => parser.into_stream(msg_stream), - } - } - - // Keep this `async` in consideration of other parsers in the future. - #[allow(clippy::unused_async)] - pub async fn create(parser_config: ParserConfig) -> Result { - let CommonParserConfig { rw_columns, .. } = parser_config.common; - match parser_config.specific { - SpecificParserConfig::Csv(csv_parser_config) => { - CsvParser::new(rw_columns, csv_parser_config).map(Self::Csv) - } - } + Ok(Self { common, specific }) } } diff --git a/src/connector/src/parser/protobuf/mod.rs b/src/connector/src/parser/protobuf/mod.rs index 442e99971f032..8870ee8f67b48 100644 --- a/src/connector/src/parser/protobuf/mod.rs +++ b/src/connector/src/parser/protobuf/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/parser/protobuf/parser.rs b/src/connector/src/parser/protobuf/parser.rs index e138d8a5ba140..bd4e9f69a483d 100644 --- a/src/connector/src/parser/protobuf/parser.rs +++ b/src/connector/src/parser/protobuf/parser.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ use std::collections::HashMap; use std::path::Path; -use futures::future::ready; +use futures_async_stream::try_stream; use itertools::Itertools; use prost_reflect::{ Cardinality, DescriptorPool, DynamicMessage, FieldDescriptor, Kind, MessageDescriptor, @@ -29,29 +29,40 @@ use risingwave_pb::plan_common::ColumnDesc; use url::Url; use super::schema_resolver::*; +use crate::impl_common_parser_logic; use crate::parser::schema_registry::{extract_schema_id, Client}; use crate::parser::util::get_kafka_topic; -use crate::parser::{ParseFuture, SourceParser, SourceStreamChunkRowWriter, WriteGuard}; +use crate::parser::{SourceStreamChunkRowWriter, WriteGuard}; +use crate::source::SourceColumnDesc; + +impl_common_parser_logic!(ProtobufParser); #[derive(Debug, Clone)] pub struct ProtobufParser { message_descriptor: MessageDescriptor, confluent_wire_type: bool, + rw_columns: Vec, } -impl ProtobufParser { +#[derive(Debug, Clone)] +pub struct ProtobufParserConfig { + confluent_wire_type: bool, + message_descriptor: MessageDescriptor, +} + +impl ProtobufParserConfig { pub async fn new( + props: &HashMap, location: &str, message_name: &str, use_schema_registry: bool, - props: HashMap, ) -> Result { let url = Url::parse(location) .map_err(|e| InternalError(format!("failed to parse url ({}): {}", location, e)))?; let schema_bytes = if use_schema_registry { - let kafka_topic = get_kafka_topic(&props)?; - let client = Client::new(url, &props)?; + let kafka_topic = get_kafka_topic(props)?; + let client = Client::new(url, props)?; compile_file_descriptor_from_schema_registry( format!("{}-value", kafka_topic).as_str(), &client, @@ -153,8 +164,23 @@ impl ProtobufParser { }) } } +} + +impl ProtobufParser { + pub fn new(rw_columns: Vec, config: ProtobufParserConfig) -> Result { + let ProtobufParserConfig { + confluent_wire_type, + message_descriptor, + } = config; + Ok(Self { + message_descriptor, + confluent_wire_type, + rw_columns, + }) + } - fn parse_inner( + #[allow(clippy::unused_async)] + pub async fn parse_inner( &self, mut payload: &[u8], mut writer: SourceStreamChunkRowWriter<'_>, @@ -304,22 +330,6 @@ pub(crate) fn resolve_pb_header(payload: &[u8]) -> Result<&[u8]> { } } -impl SourceParser for ProtobufParser { - type ParseResult<'a> = impl ParseFuture<'a, Result>; - - fn parse<'a, 'b, 'c>( - &'a self, - payload: &'b [u8], - writer: SourceStreamChunkRowWriter<'c>, - ) -> Self::ParseResult<'a> - where - 'b: 'a, - 'c: 'a, - { - ready(self.parse_inner(payload, writer)) - } -} - #[cfg(test)] mod test { @@ -350,7 +360,9 @@ mod test { let location = schema_dir() + "/simple-schema"; let message_name = "test.TestRecord"; println!("location: {}", location); - let parser = ProtobufParser::new(&location, message_name, false, HashMap::new()).await?; + let conf = + ProtobufParserConfig::new(&HashMap::new(), &location, message_name, false).await?; + let parser = ProtobufParser::new(Vec::default(), conf)?; let value = DynamicMessage::decode(parser.message_descriptor, PRE_GEN_PROTO_DATA).unwrap(); assert_eq!( @@ -386,8 +398,9 @@ mod test { let location = schema_dir() + "/complex-schema"; let message_name = "test.User"; - let parser = ProtobufParser::new(&location, message_name, false, HashMap::new()).await?; - let columns = parser.map_to_columns().unwrap(); + let conf = + ProtobufParserConfig::new(&HashMap::new(), &location, message_name, false).await?; + let columns = conf.map_to_columns().unwrap(); assert_eq!(columns[0].name, "id".to_string()); assert_eq!(columns[1].name, "code".to_string()); diff --git a/src/connector/src/parser/protobuf/schema_resolver.rs b/src/connector/src/parser/protobuf/schema_resolver.rs index 472c8d35c0cc0..ac6f19ff3d392 100644 --- a/src/connector/src/parser/protobuf/schema_resolver.rs +++ b/src/connector/src/parser/protobuf/schema_resolver.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -31,7 +31,7 @@ const PB_SCHEMA_LOCATION_S3_REGION: &str = "region"; // TODO(Tao): Probably we should never allow to use S3 URI. pub(super) async fn load_file_descriptor_from_s3( location: &Url, - properties: HashMap, + properties: &HashMap, ) -> Result> { let bucket = location.domain().ok_or_else(|| { RwError::from(InternalError(format!( diff --git a/src/connector/src/parser/schema_registry/client.rs b/src/connector/src/parser/schema_registry/client.rs index 5616d1189a9cc..6ece063ac043c 100644 --- a/src/connector/src/parser/schema_registry/client.rs +++ b/src/connector/src/parser/schema_registry/client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/parser/schema_registry/mod.rs b/src/connector/src/parser/schema_registry/mod.rs index 0006a23dd9775..402ad3b614d30 100644 --- a/src/connector/src/parser/schema_registry/mod.rs +++ b/src/connector/src/parser/schema_registry/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/parser/schema_registry/util.rs b/src/connector/src/parser/schema_registry/util.rs index c49d6d88c2a89..b4309a40f7ead 100644 --- a/src/connector/src/parser/schema_registry/util.rs +++ b/src/connector/src/parser/schema_registry/util.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/parser/util.rs b/src/connector/src/parser/util.rs index 36b363ad98ec2..fb4f0ee0191ee 100644 --- a/src/connector/src/parser/util.rs +++ b/src/connector/src/parser/util.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/sink/catalog/desc.rs b/src/connector/src/sink/catalog/desc.rs new file mode 100644 index 0000000000000..81c619a32b0bd --- /dev/null +++ b/src/connector/src/sink/catalog/desc.rs @@ -0,0 +1,101 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; + +use itertools::Itertools; +use risingwave_common::catalog::{ColumnCatalog, DatabaseId, SchemaId, TableId, UserId}; +use risingwave_common::util::sort_util::OrderPair; +use risingwave_pb::plan_common::ColumnDesc as ProstColumnDesc; +use risingwave_pb::stream_plan::SinkDesc as ProstSinkDesc; + +use super::{SinkCatalog, SinkId, SinkType}; + +#[derive(Debug, Clone)] +pub struct SinkDesc { + /// Id of the sink. For debug now. + pub id: SinkId, + + /// Name of the sink. For debug now. + pub name: String, + + /// Full SQL definition of the sink. For debug now. + pub definition: String, + + /// All columns of the sink. Note that this is NOT sorted by columnId in the vector. + pub columns: Vec, + + /// Primiary keys of the sink (connector). Now the sink does not care about a field's + /// order (ASC/DESC). + pub pk: Vec, + + /// Primary key indices of the corresponding sink operator's output. + pub stream_key: Vec, + + /// Distribution key indices of the sink. For example, if `distribution_key = [1, 2]`, then the + /// distribution keys will be `columns[1]` and `columns[2]`. + pub distribution_key: Vec, + + /// The properties of the sink. + pub properties: HashMap, + + // The append-only behavior of the physical sink connector. Frontend will determine `sink_type` + // based on both its own derivation on the append-only attribute and other user-specified + // options in `properties`. + pub sink_type: SinkType, +} + +impl SinkDesc { + pub fn into_catalog( + self, + schema_id: SchemaId, + database_id: DatabaseId, + owner: UserId, + dependent_relations: Vec, + ) -> SinkCatalog { + SinkCatalog { + id: self.id, + schema_id, + database_id, + name: self.name, + definition: self.definition, + columns: self.columns, + pk: self.pk, + stream_key: self.stream_key, + distribution_key: self.distribution_key, + owner, + dependent_relations, + properties: self.properties, + sink_type: self.sink_type, + } + } + + pub fn to_proto(&self) -> ProstSinkDesc { + ProstSinkDesc { + id: self.id.sink_id, + name: self.name.clone(), + definition: self.definition.clone(), + columns: self + .columns + .iter() + .map(|column| Into::::into(&column.column_desc)) + .collect_vec(), + pk: self.pk.iter().map(|k| k.to_protobuf()).collect_vec(), + stream_key: self.stream_key.iter().map(|idx| *idx as _).collect_vec(), + distribution_key: self.distribution_key.iter().map(|k| *k as _).collect_vec(), + properties: self.properties.clone(), + sink_type: self.sink_type.to_proto() as i32, + } + } +} diff --git a/src/connector/src/sink/catalog/mod.rs b/src/connector/src/sink/catalog/mod.rs new file mode 100644 index 0000000000000..f258995c50bfc --- /dev/null +++ b/src/connector/src/sink/catalog/mod.rs @@ -0,0 +1,203 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod desc; + +use std::collections::HashMap; + +use itertools::Itertools; +use risingwave_common::catalog::{ColumnCatalog, DatabaseId, SchemaId, TableId, UserId}; +use risingwave_common::util::sort_util::OrderPair; +use risingwave_pb::catalog::{Sink as ProstSink, SinkType as ProstSinkType}; + +#[derive(Clone, Copy, Debug, Default, Hash, PartialOrd, PartialEq, Eq)] +pub struct SinkId { + pub sink_id: u32, +} + +impl SinkId { + pub const fn new(sink_id: u32) -> Self { + SinkId { sink_id } + } + + /// Sometimes the id field is filled later, we use this value for better debugging. + pub const fn placeholder() -> Self { + SinkId { + sink_id: u32::MAX - 1, + } + } + + pub fn sink_id(&self) -> u32 { + self.sink_id + } +} + +impl From for SinkId { + fn from(id: u32) -> Self { + Self::new(id) + } +} +impl From for u32 { + fn from(id: SinkId) -> Self { + id.sink_id + } +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum SinkType { + /// The data written into the sink connector can only be INSERT. No UPDATE or DELETE is + /// allowed. + AppendOnly, + /// The input of the sink operator can be INSERT, UPDATE, or DELETE, but it must drop any + /// UPDATE or DELETE and write only INSERT into the sink connector. + ForceAppendOnly, + /// The data written into the sink connector can be INSERT, UPDATE, or DELETE. + Upsert, +} + +impl SinkType { + pub fn is_append_only(&self) -> bool { + self == &Self::AppendOnly || self == &Self::ForceAppendOnly + } + + pub fn is_upsert(&self) -> bool { + self == &Self::Upsert + } + + pub fn to_proto(self) -> ProstSinkType { + match self { + SinkType::AppendOnly => ProstSinkType::AppendOnly, + SinkType::ForceAppendOnly => ProstSinkType::ForceAppendOnly, + SinkType::Upsert => ProstSinkType::Upsert, + } + } + + pub fn from_proto(pb: ProstSinkType) -> Self { + match pb { + ProstSinkType::AppendOnly => SinkType::AppendOnly, + ProstSinkType::ForceAppendOnly => SinkType::ForceAppendOnly, + ProstSinkType::Upsert => SinkType::Upsert, + ProstSinkType::Unspecified => unreachable!(), + } + } +} + +#[derive(Clone, Debug)] +pub struct SinkCatalog { + /// Id of the sink. + pub id: SinkId, + + /// Schema of the sink. + pub schema_id: SchemaId, + + /// Database of the sink. + pub database_id: DatabaseId, + + /// Name of the sink. + pub name: String, + + /// The full `CREATE SINK` definition of the sink. + pub definition: String, + + /// All columns of the sink. Note that this is NOT sorted by columnId in the vector. + pub columns: Vec, + + /// Primiary keys of the sink (connector). Now the sink does not care about a field's + /// order (ASC/DESC). + pub pk: Vec, + + /// Primary key indices of the corresponding sink operator's output. + pub stream_key: Vec, + + /// Distribution key indices of the sink. For example, if `distribution_key = [1, 2]`, then the + /// distribution keys will be `columns[1]` and `columns[2]`. + pub distribution_key: Vec, + + /// The properties of the sink. + pub properties: HashMap, + + /// Owner of the sink. + pub owner: UserId, + + // Relations on which the sink depends. + pub dependent_relations: Vec, + + // The append-only behavior of the physical sink connector. Frontend will determine `sink_type` + // based on both its own derivation on the append-only attribute and other user-specified + // options in `properties`. + pub sink_type: SinkType, +} + +impl SinkCatalog { + pub fn to_proto(&self) -> ProstSink { + ProstSink { + id: self.id.into(), + schema_id: self.schema_id.schema_id, + database_id: self.database_id.database_id, + name: self.name.clone(), + definition: self.definition.clone(), + columns: self.columns.iter().map(|c| c.to_protobuf()).collect_vec(), + pk: self.pk.iter().map(|o| o.to_protobuf()).collect(), + stream_key: self.stream_key.iter().map(|idx| *idx as i32).collect_vec(), + dependent_relations: self + .dependent_relations + .iter() + .map(|id| id.table_id) + .collect_vec(), + distribution_key: self + .distribution_key + .iter() + .map(|k| *k as i32) + .collect_vec(), + owner: self.owner.into(), + properties: self.properties.clone(), + sink_type: self.sink_type.to_proto() as i32, + } + } +} + +impl From for SinkCatalog { + fn from(pb: ProstSink) -> Self { + let sink_type = pb.get_sink_type().unwrap(); + SinkCatalog { + id: pb.id.into(), + name: pb.name.clone(), + schema_id: pb.schema_id.into(), + database_id: pb.database_id.into(), + definition: pb.definition.clone(), + columns: pb + .columns + .into_iter() + .map(ColumnCatalog::from) + .collect_vec(), + pk: pb.pk.iter().map(OrderPair::from_prost).collect_vec(), + stream_key: pb.stream_key.iter().map(|k| *k as _).collect_vec(), + distribution_key: pb.distribution_key.iter().map(|k| *k as _).collect_vec(), + properties: pb.properties.clone(), + owner: pb.owner.into(), + dependent_relations: pb + .dependent_relations + .into_iter() + .map(TableId::from) + .collect_vec(), + sink_type: SinkType::from_proto(sink_type), + } + } +} + +impl From<&ProstSink> for SinkCatalog { + fn from(pb: &ProstSink) -> Self { + pb.clone().into() + } +} diff --git a/src/connector/src/sink/console.rs b/src/connector/src/sink/console.rs index 37671419048c8..7d9b67297795e 100644 --- a/src/connector/src/sink/console.rs +++ b/src/connector/src/sink/console.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/sink/kafka.rs b/src/connector/src/sink/kafka.rs index 7393958287c97..b6063225c1ecc 100644 --- a/src/connector/src/sink/kafka.rs +++ b/src/connector/src/sink/kafka.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ use std::fmt::{Debug, Formatter}; use std::future::Future; use std::time::{Duration, SystemTime, UNIX_EPOCH}; -use itertools::Itertools; +use anyhow::anyhow; use rdkafka::error::{KafkaError, KafkaResult}; use rdkafka::message::ToBytes; use rdkafka::producer::{BaseRecord, DefaultProducerContext, Producer, ThreadedProducer}; @@ -28,64 +28,78 @@ use risingwave_common::catalog::{Field, Schema}; use risingwave_common::row::Row; use risingwave_common::types::to_text::ToText; use risingwave_common::types::{DataType, DatumRef, ScalarRefImpl}; -use serde::Deserialize; +use risingwave_common::util::iter_util::ZipEqFast; +use serde_derive::Deserialize; use serde_json::{json, Map, Value}; use tracing::warn; -use super::{Sink, SinkError}; +use super::{Sink, SinkError, SINK_FORMAT_APPEND_ONLY, SINK_FORMAT_DEBEZIUM}; +use crate::common::KafkaCommon; use crate::sink::Result; +use crate::{deserialize_bool_from_string, deserialize_duration_from_string}; pub const KAFKA_SINK: &str = "kafka"; -#[derive(Debug, Clone, Deserialize)] -pub struct KafkaConfig { - #[serde(rename = "kafka.brokers")] - pub brokers: String, +const fn _default_timeout() -> Duration { + Duration::from_secs(5) +} - #[serde(rename = "kafka.topic")] - pub topic: String, +const fn _default_max_retries() -> u32 { + 3 +} - // Optional. If not specified, the default value is None and messages are sent to random - // partition. if we want to guarantee exactly once delivery, we need to specify the - // partition number. The partition number should set by meta. - pub partition: Option, +const fn _default_retry_backoff() -> Duration { + Duration::from_millis(100) +} + +const fn _default_use_transaction() -> bool { + true +} + +#[derive(Debug, Clone, Deserialize)] +pub struct KafkaConfig { + #[serde(flatten)] + pub common: KafkaCommon, pub format: String, // accept "append_only" or "debezium" pub identifier: String, + #[serde( + rename = "properties.timeout", + default = "_default_timeout", + deserialize_with = "deserialize_duration_from_string" + )] pub timeout: Duration, + + #[serde(rename = "properties.retry.max", default = "_default_max_retries")] pub max_retry_num: u32, + + #[serde( + rename = "properties.retry.interval", + default = "_default_retry_backoff", + deserialize_with = "deserialize_duration_from_string" + )] pub retry_interval: Duration, + + #[serde( + deserialize_with = "deserialize_bool_from_string", + default = "_default_use_transaction" + )] + pub use_transaction: bool, } impl KafkaConfig { pub fn from_hashmap(values: HashMap) -> Result { - let brokers = values - .get("kafka.brokers") - .expect("kafka.brokers must be set"); - let identifier = values - .get("identifier") - .expect("kafka.identifier must be set"); - let format = values.get("format").expect("format must be set"); - if format != "append_only" && format != "debezium" { - return Err(SinkError::Config( - "format must be set to \"append_only\" or \"debezium\"".to_string(), - )); - } + let config = serde_json::from_value::(serde_json::to_value(values).unwrap()) + .map_err(|e| SinkError::Config(anyhow!(e)))?; - let topic = values.get("kafka.topic").expect("kafka.topic must be set"); - - Ok(KafkaConfig { - brokers: brokers.to_string(), - topic: topic.to_string(), - identifier: identifier.to_owned(), - partition: None, - timeout: Duration::from_secs(5), // default timeout is 5 seconds - max_retry_num: 3, // default max retry num is 3 - retry_interval: Duration::from_millis(100), // default retry interval is 100ms - format: format.to_string(), - }) + if config.format != SINK_FORMAT_APPEND_ONLY && config.format != SINK_FORMAT_DEBEZIUM { + return Err(SinkError::Config(anyhow!( + "format must be either append_only or debezium" + ))); + } + Ok(config) } } @@ -96,7 +110,7 @@ enum KafkaSinkState { Running(u64), } -pub struct KafkaSink { +pub struct KafkaSink { pub config: KafkaConfig, pub conductor: KafkaTransactionConductor, state: KafkaSinkState, @@ -104,7 +118,7 @@ pub struct KafkaSink { in_transaction_epoch: Option, } -impl KafkaSink { +impl KafkaSink { pub async fn new(config: KafkaConfig, schema: Schema) -> Result { Ok(KafkaSink { config: config.clone(), @@ -215,7 +229,7 @@ impl KafkaSink { }; if let Some(obj) = event_object { self.send( - BaseRecord::to(self.config.topic.as_str()) + BaseRecord::to(self.config.common.topic.as_str()) .key(self.gen_message_key().as_bytes()) .payload(obj.to_string().as_bytes()), ) @@ -230,7 +244,7 @@ impl KafkaSink { if op == Op::Insert { let record = Value::Object(record_to_json(row, schema.fields.clone())?).to_string(); self.send( - BaseRecord::to(self.config.topic.as_str()) + BaseRecord::to(self.config.common.topic.as_str()) .key(self.gen_message_key().as_bytes()) .payload(record.as_bytes()), ) @@ -242,31 +256,25 @@ impl KafkaSink { } #[async_trait::async_trait] -impl Sink for KafkaSink { +impl Sink for KafkaSink { async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()> { - // when sinking the snapshot, it is required to begin epoch 0 for transaction - // if let (KafkaSinkState::Running(epoch), in_txn_epoch) = (&self.state, - // &self.in_transaction_epoch.unwrap()) && in_txn_epoch <= epoch { return Ok(()) - // } - - match self.config.format.as_str() { - "append_only" => self.append_only(chunk, &self.schema).await, - "debezium" => { - self.debezium_update( - chunk, - &self.schema, - SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_millis() as u64, - ) - .await - } - _ => unreachable!(), + if APPEND_ONLY { + self.append_only(chunk, &self.schema).await + } else { + // TODO: Distinguish "upsert" from "debezium" later. + self.debezium_update( + chunk, + &self.schema, + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_millis() as u64, + ) + .await } } - // Note that epoch 0 is reserved for initializing, so we should not use epoch 0 for + // Note that epoch 0 is reserved for initializing, so we should not use epoch 0 for // transaction. async fn begin_epoch(&mut self, epoch: u64) -> Result<()> { self.in_transaction_epoch = Some(epoch); @@ -304,7 +312,7 @@ impl Sink for KafkaSink { } } -impl Debug for KafkaSink { +impl Debug for KafkaSink { fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result { unimplemented!(); } @@ -368,10 +376,10 @@ fn datum_to_json_object(field: &Field, datum: DatumRef<'_>) -> ArrayResult { let mut map = Map::with_capacity(st.fields.len()); - for (sub_datum_ref, sub_field) in struct_ref.fields_ref().into_iter().zip_eq( + for (sub_datum_ref, sub_field) in struct_ref.fields_ref().into_iter().zip_eq_fast( st.fields .iter() - .zip_eq(st.field_names.iter()) + .zip_eq_fast(st.field_names.iter()) .map(|(dt, name)| Field::with_name(dt.clone(), name)), ) { let value = datum_to_json_object(&sub_field, sub_datum_ref)?; @@ -391,7 +399,7 @@ fn datum_to_json_object(field: &Field, datum: DatumRef<'_>) -> ArrayResult, schema: Vec) -> Result> { let mut mappings = Map::with_capacity(schema.len()); - for (field, datum_ref) in schema.iter().zip_eq(row.iter()) { + for (field, datum_ref) in schema.iter().zip_eq_fast(row.iter()) { let key = field.name.clone(); let value = datum_to_json_object(field, datum_ref) .map_err(|e| SinkError::JsonParse(e.to_string()))?; @@ -452,14 +460,20 @@ pub struct KafkaTransactionConductor { impl KafkaTransactionConductor { async fn new(config: KafkaConfig) -> Result { - let inner: ThreadedProducer = ClientConfig::new() - .set("bootstrap.servers", &config.brokers) - .set("message.timeout.ms", "5000") - .set("transactional.id", &config.identifier) // required by kafka transaction - .create() - .await?; + let inner: ThreadedProducer = { + let mut c = ClientConfig::new(); + config.common.set_security_properties(&mut c); + c.set("bootstrap.servers", &config.common.brokers) + .set("message.timeout.ms", "5000"); + if config.use_transaction { + c.set("transactional.id", &config.identifier); // required by kafka transaction + } + c.create().await? + }; - inner.init_transactions(config.timeout).await?; + if config.use_transaction { + inner.init_transactions(config.timeout).await?; + } Ok(KafkaTransactionConductor { properties: config, @@ -469,15 +483,27 @@ impl KafkaTransactionConductor { #[expect(clippy::unused_async)] async fn start_transaction(&self) -> KafkaResult<()> { - self.inner.begin_transaction() + if self.properties.use_transaction { + self.inner.begin_transaction() + } else { + Ok(()) + } } async fn commit_transaction(&self) -> KafkaResult<()> { - self.inner.commit_transaction(self.properties.timeout).await + if self.properties.use_transaction { + self.inner.commit_transaction(self.properties.timeout).await + } else { + Ok(()) + } } async fn abort_transaction(&self) -> KafkaResult<()> { - self.inner.abort_transaction(self.properties.timeout).await + if self.properties.use_transaction { + self.inner.abort_transaction(self.properties.timeout).await + } else { + Ok(()) + } } async fn flush(&self) -> KafkaResult<()> { @@ -504,6 +530,25 @@ mod test { use super::*; + #[test] + fn parse_kafka_config() { + let properties: HashMap = hashmap! { + "properties.bootstrap.server".to_string() => "localhost:9092".to_string(), + "topic".to_string() => "test".to_string(), + "format".to_string() => "append_only".to_string(), + "use_transaction".to_string() => "False".to_string(), + "security_protocol".to_string() => "SASL".to_string(), + "sasl_mechanism".to_string() => "SASL".to_string(), + "sasl_username".to_string() => "test".to_string(), + "sasl_password".to_string() => "test".to_string(), + "identifier".to_string() => "test_sink_1".to_string(), + "properties.timeout".to_string() => "5s".to_string(), + }; + + let config = KafkaConfig::from_hashmap(properties).unwrap(); + println!("{:?}", config); + } + #[ignore] #[tokio::test] async fn test_kafka_producer() -> Result<()> { @@ -528,7 +573,9 @@ mod test { }, ]); let kafka_config = KafkaConfig::from_hashmap(properties)?; - let mut sink = KafkaSink::new(kafka_config.clone(), schema).await.unwrap(); + let mut sink = KafkaSink::::new(kafka_config.clone(), schema) + .await + .unwrap(); for i in 0..10 { let mut fail_flag = false; @@ -536,7 +583,7 @@ mod test { for i in 0..100 { match sink .send( - BaseRecord::to(kafka_config.topic.as_str()) + BaseRecord::to(kafka_config.common.topic.as_str()) .payload(format!("value-{}", i).as_bytes()) .key(sink.gen_message_key().as_bytes()), ) diff --git a/src/connector/src/sink/mod.rs b/src/connector/src/sink/mod.rs index 2a41a89ae198d..6cfb9266689d8 100644 --- a/src/connector/src/sink/mod.rs +++ b/src/connector/src/sink/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +pub mod catalog; pub mod console; pub mod kafka; pub mod redis; @@ -19,6 +20,7 @@ pub mod remote; use std::collections::HashMap; +use anyhow::anyhow; use async_trait::async_trait; use enum_as_inner::EnumAsInner; use risingwave_common::array::StreamChunk; @@ -29,12 +31,18 @@ use serde::{Deserialize, Serialize}; use thiserror::Error; pub use tracing; +use self::catalog::SinkType; use crate::sink::console::{ConsoleConfig, ConsoleSink, CONSOLE_SINK}; use crate::sink::kafka::{KafkaConfig, KafkaSink, KAFKA_SINK}; use crate::sink::redis::{RedisConfig, RedisSink}; use crate::sink::remote::{RemoteConfig, RemoteSink}; use crate::ConnectorParams; +pub const SINK_FORMAT_OPTION: &str = "format"; +pub const SINK_FORMAT_APPEND_ONLY: &str = "append_only"; +pub const SINK_FORMAT_DEBEZIUM: &str = "debezium"; +pub const SINK_USER_FORCE_APPEND_ONLY_OPTION: &str = "force_append_only"; + #[async_trait] pub trait Sink { async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()>; @@ -54,7 +62,7 @@ pub trait Sink { #[derive(Clone, Debug, EnumAsInner)] pub enum SinkConfig { Redis(RedisConfig), - Kafka(KafkaConfig), + Kafka(Box), Remote(RemoteConfig), Console(ConsoleConfig), BlackHole, @@ -76,9 +84,11 @@ impl SinkConfig { const SINK_TYPE_KEY: &str = "connector"; let sink_type = properties .get(SINK_TYPE_KEY) - .ok_or_else(|| SinkError::Config(format!("missing config: {}", SINK_TYPE_KEY)))?; + .ok_or_else(|| SinkError::Config(anyhow!("missing config: {}", SINK_TYPE_KEY)))?; match sink_type.to_lowercase().as_str() { - KAFKA_SINK => Ok(SinkConfig::Kafka(KafkaConfig::from_hashmap(properties)?)), + KAFKA_SINK => Ok(SinkConfig::Kafka(Box::new(KafkaConfig::from_hashmap( + properties, + )?))), CONSOLE_SINK => Ok(SinkConfig::Console(ConsoleConfig::from_hashmap( properties, )?)), @@ -101,8 +111,10 @@ impl SinkConfig { #[derive(Debug)] pub enum SinkImpl { Redis(Box), - Kafka(Box), - Remote(Box), + Kafka(Box>), + UpsertKafka(Box>), + Remote(Box>), + UpsertRemote(Box>), Console(Box), Blackhole, } @@ -113,60 +125,80 @@ impl SinkImpl { schema: Schema, pk_indices: Vec, connector_params: ConnectorParams, + sink_type: SinkType, ) -> Result { Ok(match cfg { SinkConfig::Redis(cfg) => SinkImpl::Redis(Box::new(RedisSink::new(cfg, schema)?)), - SinkConfig::Kafka(cfg) => SinkImpl::Kafka(Box::new(KafkaSink::new(cfg, schema).await?)), + SinkConfig::Kafka(cfg) => { + if sink_type.is_append_only() { + // Append-only Kafka sink + SinkImpl::Kafka(Box::new(KafkaSink::::new(*cfg, schema).await?)) + } else { + // Upsert Kafka sink + SinkImpl::UpsertKafka(Box::new(KafkaSink::::new(*cfg, schema).await?)) + } + } SinkConfig::Console(cfg) => SinkImpl::Console(Box::new(ConsoleSink::new(cfg, schema)?)), - SinkConfig::Remote(cfg) => SinkImpl::Remote(Box::new( - RemoteSink::new(cfg, schema, pk_indices, connector_params).await?, - )), + SinkConfig::Remote(cfg) => { + if sink_type.is_append_only() { + // Append-only remote sink + SinkImpl::Remote(Box::new( + RemoteSink::::new(cfg, schema, pk_indices, connector_params).await?, + )) + } else { + // Upsert remote sink + SinkImpl::UpsertRemote(Box::new( + RemoteSink::::new(cfg, schema, pk_indices, connector_params).await?, + )) + } + } SinkConfig::BlackHole => SinkImpl::Blackhole, }) } } -#[async_trait] -impl Sink for SinkImpl { - async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()> { - match self { - SinkImpl::Redis(sink) => sink.write_batch(chunk).await, - SinkImpl::Kafka(sink) => sink.write_batch(chunk).await, - SinkImpl::Remote(sink) => sink.write_batch(chunk).await, - SinkImpl::Console(sink) => sink.write_batch(chunk).await, - SinkImpl::Blackhole => Ok(()), - } - } - - async fn begin_epoch(&mut self, epoch: u64) -> Result<()> { - match self { - SinkImpl::Redis(sink) => sink.begin_epoch(epoch).await, - SinkImpl::Kafka(sink) => sink.begin_epoch(epoch).await, - SinkImpl::Remote(sink) => sink.begin_epoch(epoch).await, - SinkImpl::Console(sink) => sink.begin_epoch(epoch).await, - SinkImpl::Blackhole => Ok(()), - } - } - - async fn commit(&mut self) -> Result<()> { - match self { - SinkImpl::Redis(sink) => sink.commit().await, - SinkImpl::Kafka(sink) => sink.commit().await, - SinkImpl::Remote(sink) => sink.commit().await, - SinkImpl::Console(sink) => sink.commit().await, - SinkImpl::Blackhole => Ok(()), +macro_rules! impl_sink { + ($($variant_name:ident),*) => { + #[async_trait] + impl Sink for SinkImpl { + async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()> { + match self { + $( SinkImpl::$variant_name(inner) => inner.write_batch(chunk).await, )* + SinkImpl::Blackhole => Ok(()), + } + } + + async fn begin_epoch(&mut self, epoch: u64) -> Result<()> { + match self { + $( SinkImpl::$variant_name(inner) => inner.begin_epoch(epoch).await, )* + SinkImpl::Blackhole => Ok(()), + } + } + + async fn commit(&mut self) -> Result<()> { + match self { + $( SinkImpl::$variant_name(inner) => inner.commit().await, )* + SinkImpl::Blackhole => Ok(()), + } + } + + async fn abort(&mut self) -> Result<()> { + match self { + $( SinkImpl::$variant_name(inner) => inner.abort().await, )* + SinkImpl::Blackhole => Ok(()), + } + } } } +} - async fn abort(&mut self) -> Result<()> { - match self { - SinkImpl::Redis(sink) => sink.abort().await, - SinkImpl::Kafka(sink) => sink.abort().await, - SinkImpl::Remote(sink) => sink.abort().await, - SinkImpl::Console(sink) => sink.abort().await, - SinkImpl::Blackhole => Ok(()), - } - } +impl_sink! { + Redis, + Kafka, + UpsertKafka, + Remote, + UpsertRemote, + Console } pub type Result = std::result::Result; @@ -180,7 +212,7 @@ pub enum SinkError { #[error("Json parse error: {0}")] JsonParse(String), #[error("config error: {0}")] - Config(String), + Config(#[from] anyhow::Error), } impl From for SinkError { diff --git a/src/connector/src/sink/redis.rs b/src/connector/src/sink/redis.rs index c8fa4be2c0293..68b7839417a3a 100644 --- a/src/connector/src/sink/redis.rs +++ b/src/connector/src/sink/redis.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/sink/remote.rs b/src/connector/src/sink/remote.rs index 6f69153d7955b..266a2425622d0 100644 --- a/src/connector/src/sink/remote.rs +++ b/src/connector/src/sink/remote.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ use std::collections::HashMap; use std::time::Duration; +use anyhow::anyhow; use async_trait::async_trait; -use itertools::Itertools; use risingwave_common::array::StreamChunk; #[cfg(test)] use risingwave_common::catalog::Field; @@ -26,6 +26,7 @@ use risingwave_common::row::Row; #[cfg(test)] use risingwave_common::types::DataType; use risingwave_common::types::{DatumRef, ScalarRefImpl}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_pb::connector_service::connector_service_client::ConnectorServiceClient; use risingwave_pb::connector_service::sink_stream_request::write_batch::json_payload::RowOp; use risingwave_pb::connector_service::sink_stream_request::write_batch::{JsonPayload, Payload}; @@ -46,10 +47,10 @@ use tonic::{Request, Status, Streaming}; use crate::sink::{Result, Sink, SinkError}; use crate::ConnectorParams; -pub const VALID_REMOTE_SINKS: [&str; 2] = ["jdbc", "file"]; +pub const VALID_REMOTE_SINKS: [&str; 3] = ["jdbc", "file", "iceberg"]; -pub fn is_valid_remote_sink(sink_type: String) -> bool { - return VALID_REMOTE_SINKS.contains(&sink_type.as_str()); +pub fn is_valid_remote_sink(sink_type: &str) -> bool { + VALID_REMOTE_SINKS.contains(&sink_type) } #[derive(Clone, Debug)] @@ -65,8 +66,8 @@ impl RemoteConfig { .expect("sink type must be specified") .to_string(); - if !is_valid_remote_sink(sink_type.clone()) { - return Err(SinkError::Config(format!("invalid sink type: {sink_type}"))); + if !is_valid_remote_sink(sink_type.as_str()) { + return Err(SinkError::Config(anyhow!("invalid sink type: {sink_type}"))); } Ok(RemoteConfig { @@ -84,7 +85,7 @@ enum ResponseStreamImpl { impl ResponseStreamImpl { pub async fn next(&mut self) -> Result { - return match self { + match self { ResponseStreamImpl::Grpc(ref mut response) => response .next() .await @@ -95,12 +96,12 @@ impl ResponseStreamImpl { SinkError::Remote("response stream closed unexpectedly".to_string()) }) } - }; + } } } #[derive(Debug)] -pub struct RemoteSink { +pub struct RemoteSink { pub sink_type: String, properties: HashMap, epoch: Option, @@ -111,7 +112,7 @@ pub struct RemoteSink { response_stream: ResponseStreamImpl, } -impl RemoteSink { +impl RemoteSink { pub async fn new( config: RemoteConfig, schema: Schema, @@ -172,14 +173,11 @@ impl RemoteSink { }) .map_err(|e| SinkError::Remote(e.to_string()))?; - let mut response = tokio::time::timeout( - Duration::from_secs(3), - client.sink_stream(Request::new(UnboundedReceiverStream::new(request_receiver))), - ) - .await - .map_err(|e| SinkError::Remote(format!("failed to start sink: {:?}", e)))? - .map_err(|e| SinkError::Remote(format!("{:?}", e)))? - .into_inner(); + let mut response = client + .sink_stream(Request::new(UnboundedReceiverStream::new(request_receiver))) + .await + .map_err(|e| SinkError::Remote(format!("failed to start sink: {:?}", e)))? + .into_inner(); let _ = response.next().await.unwrap(); Ok(RemoteSink { @@ -236,14 +234,14 @@ impl RemoteSink { } #[async_trait] -impl Sink for RemoteSink { +impl Sink for RemoteSink { async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()> { let mut row_ops = vec![]; for (op, row_ref) in chunk.rows() { let mut map = serde_json::Map::new(); row_ref .iter() - .zip_eq(self.schema.fields.iter()) + .zip_eq_fast(self.schema.fields.iter()) .for_each(|(v, f)| { map.insert(f.name.clone(), parse_datum(v)); }); @@ -359,7 +357,7 @@ mod test { let (request_sender, mut request_recv) = mpsc::unbounded_channel(); let (_, resp_recv) = mpsc::unbounded_channel(); - let mut sink = RemoteSink::for_test(resp_recv, request_sender); + let mut sink = RemoteSink::::for_test(resp_recv, request_sender); let chunk = StreamChunk::new( vec![Op::Insert], vec![ @@ -402,7 +400,7 @@ mod test { async fn test_remote_sink() { let (request_sender, mut request_receiver) = mpsc::unbounded_channel(); let (response_sender, response_receiver) = mpsc::unbounded_channel(); - let mut sink = RemoteSink::for_test(response_receiver, request_sender); + let mut sink = RemoteSink::::for_test(response_receiver, request_sender); let chunk_a = StreamChunk::new( vec![Op::Insert, Op::Insert, Op::Insert], diff --git a/src/connector/src/source/base.rs b/src/connector/src/source/base.rs index df10b878d0c45..ea9e659938d8f 100644 --- a/src/connector/src/source/base.rs +++ b/src/connector/src/source/base.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,27 +13,27 @@ // limitations under the License. use std::collections::HashMap; -use std::sync::{Arc, LazyLock}; +use std::sync::Arc; use anyhow::{anyhow, Result}; use async_trait::async_trait; use bytes::Bytes; use enum_as_inner::EnumAsInner; use futures::stream::BoxStream; -use futures::{pin_mut, Stream, StreamExt}; use itertools::Itertools; use prost::Message; -use risingwave_common::error::ErrorCode; +use risingwave_common::array::StreamChunk; +use risingwave_common::catalog::TableId; +use risingwave_common::error::{ErrorCode, RwError}; use risingwave_pb::connector_service::TableSchema; use risingwave_pb::source::ConnectorSplit; use serde::{Deserialize, Serialize}; -use tokio::runtime::Runtime; -use tokio::sync::mpsc; use super::datagen::DatagenMeta; use super::filesystem::{FsSplit, S3FileReader, S3Properties, S3SplitEnumerator, S3_CONNECTOR}; use super::google_pubsub::GooglePubsubMeta; use super::kafka::KafkaMeta; +use super::monitor::SourceMetrics; use super::nexmark::source::message::NexmarkMeta; use crate::parser::ParserConfig; use crate::source::cdc::{ @@ -63,10 +63,7 @@ use crate::source::pulsar::source::reader::PulsarSplitReader; use crate::source::pulsar::{ PulsarProperties, PulsarSplit, PulsarSplitEnumerator, PULSAR_CONNECTOR, }; -use crate::{ - impl_connector_properties, impl_split, impl_split_enumerator, impl_split_reader, - BoxSourceWithStateStream, -}; +use crate::{impl_connector_properties, impl_split, impl_split_enumerator, impl_split_reader}; /// [`SplitEnumerator`] fetches the split metadata from the external source service. /// NOTE: It runs in the meta server, so probably it should be moved to the `meta` crate. @@ -79,41 +76,76 @@ pub trait SplitEnumerator: Sized { async fn list_splits(&mut self) -> Result>; } -/// [`SplitReader`] is an abstraction of the external connector read interface, -/// used to read messages from the outside and transform them into source-oriented -/// [`SourceMessage`], in order to improve throughput, it is recommended to return a batch of -/// messages at a time. -#[async_trait] -pub trait SplitReader: Sized { - type Properties; +#[derive(Clone, Copy, Debug, Default)] +pub struct SourceInfo { + pub actor_id: u32, + pub source_id: TableId, +} - async fn new( - properties: Self::Properties, - state: ConnectorState, - columns: Option>, - ) -> Result; +impl SourceInfo { + pub fn new(actor_id: u32, source_id: TableId) -> Self { + SourceInfo { + actor_id, + source_id, + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum SourceFormat { + Invalid, + Json, + Protobuf, + DebeziumJson, + Avro, + Maxwell, + CanalJson, + Csv, + Native, + DebeziumAvro, +} - fn into_stream(self) -> BoxSourceStream; +pub type BoxSourceStream = BoxStream<'static, Result>>; +pub type BoxSourceWithStateStream = BoxStream<'static, Result>; + +/// [`StreamChunkWithState`] returns stream chunk together with offset for each split. In the +/// current design, one connector source can have multiple split reader. The keys are unique +/// `split_id` and values are the latest offset for each split. +#[derive(Clone, Debug, PartialEq)] +pub struct StreamChunkWithState { + pub chunk: StreamChunk, + pub split_offset_mapping: Option>, } -/// [`SplitReaderV2`] is a new abstraction of the external connector read interface which is +/// The `split_offset_mapping` field is unused for the table source, so we implement `From` for it. +impl From for StreamChunkWithState { + fn from(chunk: StreamChunk) -> Self { + Self { + chunk, + split_offset_mapping: None, + } + } +} + +/// [`SplitReader`] is a new abstraction of the external connector read interface which is /// responsible for parsing, it is used to read messages from the outside and transform them into a /// stream of parsed [`StreamChunk`] #[async_trait] -pub trait SplitReaderV2: Sized { +pub trait SplitReader: Sized { type Properties; async fn new( properties: Self::Properties, state: Vec, parser_config: ParserConfig, + metrics: Arc, + source_info: SourceInfo, + columns: Option>, ) -> Result; fn into_stream(self) -> BoxSourceWithStateStream; } -pub type BoxSourceStream = BoxStream<'static, Result>>; - /// The max size of a chunk yielded by source stream. pub const MAX_CHUNK_SIZE: usize = 1024; @@ -200,9 +232,10 @@ impl SplitImpl { } pub enum SplitReaderImpl { + S3(Box), + Dummy(Box), Kinesis(Box), Kafka(Box), - Dummy(Box), Nexmark(Box), Pulsar(Box), Datagen(Box), @@ -211,39 +244,6 @@ pub enum SplitReaderImpl { GooglePubsub(Box), } -pub enum SplitReaderV2Impl { - S3(Box), - Dummy(Box), -} - -impl SplitReaderV2Impl { - pub fn into_stream(self) -> BoxSourceWithStateStream { - match self { - Self::S3(s3_reader) => SplitReaderV2::into_stream(*s3_reader), - Self::Dummy(dummy_reader) => SplitReaderV2::into_stream(*dummy_reader), - } - } - - pub async fn create( - config: ConnectorProperties, - state: ConnectorState, - parser_config: ParserConfig, - _columns: Option>, - ) -> Result { - if state.is_none() { - return Ok(Self::Dummy(Box::new(DummySplitReader {}))); - } - let state = state.unwrap(); - let reader = match config { - ConnectorProperties::S3(s3_props) => Self::S3(Box::new( - S3FileReader::new(*s3_props, state, parser_config).await?, - )), - _ => todo!(), - }; - Ok(reader) - } -} - pub enum SplitEnumeratorImpl { Kafka(KafkaSplitEnumerator), Pulsar(PulsarSplitEnumerator), @@ -293,6 +293,7 @@ impl_split! { } impl_split_reader! { + { S3, S3FileReader }, { Kafka, KafkaSplitReader }, { Pulsar, PulsarSplitReader }, { Kinesis, KinesisSplitReader }, @@ -346,16 +347,6 @@ impl PartialEq for SourceMessage { } impl Eq for SourceMessage {} -/// The message pumped from the external source service. -/// The third-party message structs will eventually be transformed into this struct. -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct FsSourceMessage { - pub payload: Option, - pub offset: usize, - pub split_size: usize, - pub split_id: SplitId, -} - /// The metadata of a split. pub trait SplitMetaData: Sized { fn id(&self) -> SplitId; @@ -364,40 +355,11 @@ pub trait SplitMetaData: Sized { } /// [`ConnectorState`] maintains the consuming splits' info. In specific split readers, -/// `ConnectorState` cannot be [`None`] and only contains one [`SplitImpl`]. If no split is assigned -/// to source executor, `ConnectorState` is [`None`] and [`DummySplitReader`] is up instead of other -/// split readers. +/// `ConnectorState` cannot be [`None`] and contains one(for mq split readers) or many(for fs +/// split readers) [`SplitImpl`]. If no split is assigned to source executor, `ConnectorState` is +/// [`None`] and [`DummySplitReader`] is up instead of other split readers. pub type ConnectorState = Option>; -/// Spawn the data generator to a dedicated runtime, returns a channel receiver -/// for acquiring the generated data. This is used for the [`DatagenSplitReader`] and -/// [`NexmarkSplitReader`] in case that they are CPU intensive and may block the streaming actors. -pub fn spawn_data_generation_stream( - stream: impl Stream + Send + 'static, - buffer_size: usize, -) -> impl Stream + Send + 'static { - static RUNTIME: LazyLock = LazyLock::new(|| { - tokio::runtime::Builder::new_multi_thread() - .thread_name("risingwave-data-generation") - .enable_all() - .build() - .expect("failed to build data-generation runtime") - }); - - let (generation_tx, generation_rx) = mpsc::channel(buffer_size); - RUNTIME.spawn(async move { - pin_mut!(stream); - while let Some(result) = stream.next().await { - if generation_tx.send(result).await.is_err() { - tracing::warn!("failed to send next event to reader, exit"); - break; - } - } - }); - - tokio_stream::wrappers::ReceiverStream::new(generation_rx) -} - #[cfg(test)] mod tests { use maplit::*; diff --git a/src/connector/src/source/cdc/enumerator/mod.rs b/src/connector/src/source/cdc/enumerator/mod.rs index 2ee2419250aca..6858337875813 100644 --- a/src/connector/src/source/cdc/enumerator/mod.rs +++ b/src/connector/src/source/cdc/enumerator/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/cdc/mod.rs b/src/connector/src/source/cdc/mod.rs index a1da7fb30c982..e97e1a17c3243 100644 --- a/src/connector/src/source/cdc/mod.rs +++ b/src/connector/src/source/cdc/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/cdc/source/message.rs b/src/connector/src/source/cdc/source/message.rs index e9c3505eb9fc3..cbc8fc929a2d1 100644 --- a/src/connector/src/source/cdc/source/message.rs +++ b/src/connector/src/source/cdc/source/message.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/cdc/source/mod.rs b/src/connector/src/source/cdc/source/mod.rs index e272070dc8b86..4a2141fa0fe7c 100644 --- a/src/connector/src/source/cdc/source/mod.rs +++ b/src/connector/src/source/cdc/source/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/cdc/source/reader.rs b/src/connector/src/source/cdc/source/reader.rs index 9f94fe435479b..4fbfb45ed64b3 100644 --- a/src/connector/src/source/cdc/source/reader.rs +++ b/src/connector/src/source/cdc/source/reader.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,70 +13,78 @@ // limitations under the License. use std::str::FromStr; +use std::sync::Arc; use anyhow::{anyhow, Result}; use async_trait::async_trait; -use futures::pin_mut; +use futures::{pin_mut, StreamExt, TryStreamExt}; use futures_async_stream::try_stream; -use itertools::Itertools; use risingwave_common::util::addr::HostAddr; use risingwave_pb::connector_service::GetEventStreamResponse; use risingwave_rpc_client::ConnectorClient; -use crate::source::base::{SourceMessage, SplitReader}; +use crate::impl_common_split_reader_logic; +use crate::parser::ParserConfig; +use crate::source::base::SourceMessage; use crate::source::cdc::CdcProperties; -use crate::source::{BoxSourceStream, Column, ConnectorState, SplitImpl}; +use crate::source::monitor::SourceMetrics; +use crate::source::{ + BoxSourceWithStateStream, Column, SourceInfo, SplitId, SplitImpl, SplitMetaData, SplitReader, +}; + +impl_common_split_reader_logic!(CdcSplitReader, CdcProperties); pub struct CdcSplitReader { source_id: u64, start_offset: Option, conn_props: CdcProperties, + + split_id: SplitId, + parser_config: ParserConfig, + metrics: Arc, + source_info: SourceInfo, } #[async_trait] impl SplitReader for CdcSplitReader { type Properties = CdcProperties; + #[allow(clippy::unused_async)] async fn new( conn_props: CdcProperties, - state: ConnectorState, + splits: Vec, + parser_config: ParserConfig, + metrics: Arc, + source_info: SourceInfo, _columns: Option>, ) -> Result { - if let Some(splits) = state { - let split = splits - .into_iter() - .exactly_one() - .map_err(|e| anyhow!("failed to create cdc split reader: {e}"))?; - - match split { - SplitImpl::MySqlCdc(split) => { - return Ok(Self { - source_id: split.source_id as u64, - start_offset: split.start_offset, - conn_props, - }); - } - SplitImpl::PostgresCdc(split) => { - return Ok(Self { - source_id: split.source_id as u64, - start_offset: split.start_offset, - conn_props, - }); - } - _ => {} - } + assert!(splits.len() == 1); + let split = splits.into_iter().next().unwrap(); + let split_id = split.id(); + match split { + SplitImpl::MySqlCdc(split) | SplitImpl::PostgresCdc(split) => Ok(Self { + source_id: split.source_id as u64, + start_offset: split.start_offset, + conn_props, + split_id, + parser_config, + metrics, + source_info, + }), + _ => Err(anyhow!( + "failed to create cdc split reader: invalid splis info" + )), } - Err(anyhow!("failed to create cdc split reader: invalid state")) } - fn into_stream(self) -> BoxSourceStream { - self.into_stream() + fn into_stream(self) -> BoxSourceWithStateStream { + self.into_chunk_stream() } } impl CdcSplitReader { #[try_stream(boxed, ok = Vec, error = anyhow::Error)] - pub async fn into_stream(self) { + async fn into_data_stream(self) { tracing::debug!("cdc props: {:?}", self.conn_props); let cdc_client = ConnectorClient::new(HostAddr::from_str(&self.conn_props.connector_node_addr)?).await?; diff --git a/src/connector/src/source/cdc/split.rs b/src/connector/src/source/cdc/split.rs index b541e6ba2312a..f92a4bc674975 100644 --- a/src/connector/src/source/cdc/split.rs +++ b/src/connector/src/source/cdc/split.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/data_gen_util.rs b/src/connector/src/source/data_gen_util.rs new file mode 100644 index 0000000000000..1780807bdef51 --- /dev/null +++ b/src/connector/src/source/data_gen_util.rs @@ -0,0 +1,48 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::LazyLock; + +use futures::{pin_mut, Stream, StreamExt}; +use tokio::runtime::Runtime; +use tokio::sync::mpsc; + +/// Spawn the data generator to a dedicated runtime, returns a channel receiver +/// for acquiring the generated data. This is used for the [`DatagenSplitReader`] and +/// [`NexmarkSplitReader`] in case that they are CPU intensive and may block the streaming actors. +pub fn spawn_data_generation_stream( + stream: impl Stream + Send + 'static, + buffer_size: usize, +) -> impl Stream + Send + 'static { + static RUNTIME: LazyLock = LazyLock::new(|| { + tokio::runtime::Builder::new_multi_thread() + .thread_name("risingwave-data-generation") + .enable_all() + .build() + .expect("failed to build data-generation runtime") + }); + + let (generation_tx, generation_rx) = mpsc::channel(buffer_size); + RUNTIME.spawn(async move { + pin_mut!(stream); + while let Some(result) = stream.next().await { + if generation_tx.send(result).await.is_err() { + tracing::warn!("failed to send next event to reader, exit"); + break; + } + } + }); + + tokio_stream::wrappers::ReceiverStream::new(generation_rx) +} diff --git a/src/connector/src/source/datagen/enumerator/mod.rs b/src/connector/src/source/datagen/enumerator/mod.rs index 9df05bd9c333d..cfeab47031efa 100644 --- a/src/connector/src/source/datagen/enumerator/mod.rs +++ b/src/connector/src/source/datagen/enumerator/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/datagen/mod.rs b/src/connector/src/source/datagen/mod.rs index 2182a05dae991..c0d9717db5366 100644 --- a/src/connector/src/source/datagen/mod.rs +++ b/src/connector/src/source/datagen/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/datagen/source/generator.rs b/src/connector/src/source/datagen/source/generator.rs index 02d49805c4292..3d747921aef02 100644 --- a/src/connector/src/source/datagen/source/generator.rs +++ b/src/connector/src/source/datagen/source/generator.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,19 +11,27 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use anyhow::Result; use bytes::Bytes; use futures_async_stream::try_stream; +use maplit::hashmap; +use risingwave_common::array::{Op, StreamChunk}; +use risingwave_common::error::RwError; use risingwave_common::field_generator::FieldGeneratorImpl; -use serde_json::Value; +use risingwave_common::row::OwnedRow; +use risingwave_common::types::DataType; +use risingwave_common::util::iter_util::ZipEqFast; -use crate::source::{SourceMessage, SourceMeta, SplitId}; +use crate::source::{SourceFormat, SourceMessage, SourceMeta, SplitId, StreamChunkWithState}; pub struct DatagenEventGenerator { - fields_map: HashMap, + // fields_map: HashMap, + field_names: Vec, + fields_vec: Vec, + source_format: SourceFormat, + data_types: Vec, offset: u64, split_id: SplitId, partition_rows_per_second: u64, @@ -36,8 +44,12 @@ pub struct DatagenMeta { } impl DatagenEventGenerator { + #[allow(clippy::too_many_arguments)] pub fn new( - fields_map: HashMap, + fields_vec: Vec, + field_names: Vec, + source_format: SourceFormat, + data_types: Vec, rows_per_second: u64, offset: u64, split_id: SplitId, @@ -50,38 +62,60 @@ impl DatagenEventGenerator { rows_per_second / split_num }; Ok(Self { - fields_map, + field_names, + fields_vec, + source_format, + data_types, offset, split_id, partition_rows_per_second, }) } - #[try_stream(ok = Vec, error = anyhow::Error)] - pub async fn into_stream(mut self) { + #[try_stream(boxed, ok = Vec, error = anyhow::Error)] + pub async fn into_msg_stream(mut self) { let mut interval = tokio::time::interval(Duration::from_secs(1)); const MAX_ROWS_PER_YIELD: u64 = 1024; + let mut reach_end = false; loop { // generate `partition_rows_per_second` rows per second interval.tick().await; let mut rows_generated_this_second = 0; while rows_generated_this_second < self.partition_rows_per_second { - let mut msgs = vec![]; let num_rows_to_generate = std::cmp::min( MAX_ROWS_PER_YIELD, self.partition_rows_per_second - rows_generated_this_second, ); - for _ in 0..num_rows_to_generate { - let value = Value::Object( - self.fields_map - .iter_mut() - .map(|(name, field_generator)| { - (name.to_string(), field_generator.generate(self.offset)) - }) - .collect(), - ); + let mut msgs = Vec::with_capacity(num_rows_to_generate as usize); + 'outer: for _ in 0..num_rows_to_generate { + let payload = match self.source_format { + SourceFormat::Json => { + let mut map = serde_json::Map::with_capacity(self.fields_vec.len()); + for (name, field_generator) in self + .field_names + .iter() + .zip_eq_fast(self.fields_vec.iter_mut()) + { + let value = field_generator.generate_json(self.offset); + if value.is_null() { + reach_end = true; + tracing::info!( + "datagen split {} stop generate, offset {}", + self.split_id, + self.offset + ); + break 'outer; + } + map.insert(name.clone(), value); + } + Bytes::from(serde_json::Value::from(map).to_string()) + } + _ => { + unimplemented!("only json format is supported for now") + } + }; msgs.push(SourceMessage { - payload: Some(Bytes::from(value.to_string())), + payload: Some(payload), offset: self.offset.to_string(), split_id: self.split_id.clone(), meta: SourceMeta::Datagen(DatagenMeta { @@ -96,7 +130,67 @@ impl DatagenEventGenerator { self.offset += 1; rows_generated_this_second += 1; } - yield msgs; + if !msgs.is_empty() { + yield msgs; + } + + if reach_end { + return Ok(()); + } + } + } + } + + #[try_stream(ok = StreamChunkWithState, error = RwError)] + pub async fn into_native_stream(mut self) { + let mut interval = tokio::time::interval(Duration::from_secs(1)); + const MAX_ROWS_PER_YIELD: u64 = 1024; + let mut reach_end = false; + loop { + // generate `partition_rows_per_second` rows per second + interval.tick().await; + let mut rows_generated_this_second = 0; + while rows_generated_this_second < self.partition_rows_per_second { + let mut rows = vec![]; + let num_rows_to_generate = std::cmp::min( + MAX_ROWS_PER_YIELD, + self.partition_rows_per_second - rows_generated_this_second, + ); + 'outer: for _ in 0..num_rows_to_generate { + let mut row = Vec::with_capacity(self.fields_vec.len()); + for field_generator in &mut self.fields_vec { + let datum = field_generator.generate_datum(self.offset); + if datum.is_none() { + reach_end = true; + tracing::info!( + "datagen split {} stop generate, offset {}", + self.split_id, + self.offset + ); + break 'outer; + } + row.push(datum); + } + + rows.push((Op::Insert, OwnedRow::new(row))); + self.offset += 1; + rows_generated_this_second += 1; + } + + if !rows.is_empty() { + let chunk = StreamChunk::from_rows(&rows, &self.data_types); + let mapping = hashmap! { + self.split_id.clone() => (self.offset - 1).to_string() + }; + yield StreamChunkWithState { + chunk, + split_offset_mapping: Some(mapping), + }; + } + + if reach_end { + return Ok(()); + } } } } @@ -115,33 +209,34 @@ mod tests { expected_length: usize, ) { let split_id = format!("{}-{}", split_num, split_index).into(); - let mut fields_map = HashMap::new(); - fields_map.insert( - "v1".to_string(), + let start = 1; + let end = 10; + + let data_types = vec![DataType::Int32, DataType::Float32]; + let fields_vec = vec![ FieldGeneratorImpl::with_number_sequence( - risingwave_common::types::DataType::Int32, - Some("1".to_string()), - Some("10".to_string()), + data_types[0].clone(), + Some(start.to_string()), + Some(end.to_string()), split_index, split_num, ) .unwrap(), - ); - - fields_map.insert( - "v2".to_string(), FieldGeneratorImpl::with_number_sequence( - risingwave_common::types::DataType::Float32, - Some("1".to_string()), - Some("10".to_string()), + data_types[1].clone(), + Some(start.to_string()), + Some(end.to_string()), split_index, split_num, ) .unwrap(), - ); + ]; let generator = DatagenEventGenerator::new( - fields_map, + fields_vec, + vec!["c1".to_owned(), "c2".to_owned()], + SourceFormat::Json, + data_types, rows_per_second, 0, split_id, @@ -150,14 +245,17 @@ mod tests { ) .unwrap(); - let chunk = generator - .into_stream() - .boxed() - .next() - .await - .unwrap() - .unwrap(); + let mut stream = generator.into_msg_stream().boxed(); + + let chunk = stream.next().await.unwrap().unwrap(); assert_eq!(expected_length, chunk.len()); + + let empty_chunk = stream.next().await; + if rows_per_second >= (end - start + 1) { + assert!(empty_chunk.is_none()); + } else { + assert!(empty_chunk.is_some()); + } } #[tokio::test] @@ -177,4 +275,22 @@ mod tests { check_sequence_partition_result(3, 1, 10, 3).await; check_sequence_partition_result(3, 2, 10, 3).await; } + + #[tokio::test] + async fn test_one_partition_sequence_reach_end() { + check_sequence_partition_result(1, 0, 15, 10).await; + } + + #[tokio::test] + async fn test_two_partition_sequence_reach_end() { + check_sequence_partition_result(2, 0, 15, 5).await; + check_sequence_partition_result(2, 1, 15, 5).await; + } + + #[tokio::test] + async fn test_three_partition_sequence_reach_end() { + check_sequence_partition_result(3, 0, 15, 4).await; + check_sequence_partition_result(3, 1, 15, 3).await; + check_sequence_partition_result(3, 2, 15, 3).await; + } } diff --git a/src/connector/src/source/datagen/source/mod.rs b/src/connector/src/source/datagen/source/mod.rs index 4e7e3797d5cb6..435c86fe43988 100644 --- a/src/connector/src/source/datagen/source/mod.rs +++ b/src/connector/src/source/datagen/source/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/datagen/source/reader.rs b/src/connector/src/source/datagen/source/reader.rs index f1129cb9aa1a1..3066303769c69 100644 --- a/src/connector/src/source/datagen/source/reader.rs +++ b/src/connector/src/source/datagen/source/reader.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,53 +13,67 @@ // limitations under the License. use std::collections::HashMap; +use std::sync::Arc; use anyhow::Result; use async_trait::async_trait; -use futures::StreamExt; -use itertools::zip_eq; +use futures::{StreamExt, TryStreamExt}; +use futures_async_stream::try_stream; use risingwave_common::field_generator::FieldGeneratorImpl; +use risingwave_common::util::iter_util::zip_eq_fast; use super::generator::DatagenEventGenerator; +use crate::impl_common_split_reader_logic; +use crate::parser::{ParserConfig, SpecificParserConfig}; +use crate::source::data_gen_util::spawn_data_generation_stream; use crate::source::datagen::source::SEQUENCE_FIELD_KIND; use crate::source::datagen::{DatagenProperties, DatagenSplit}; +use crate::source::monitor::SourceMetrics; use crate::source::{ - spawn_data_generation_stream, BoxSourceStream, Column, ConnectorState, DataType, SplitId, - SplitImpl, SplitMetaData, SplitReader, + BoxSourceStream, BoxSourceWithStateStream, Column, DataType, SourceInfo, SplitId, SplitImpl, + SplitMetaData, SplitReader, }; +impl_common_split_reader_logic!(DatagenSplitReader, DatagenProperties); + pub struct DatagenSplitReader { generator: DatagenEventGenerator, assigned_split: DatagenSplit, + + split_id: SplitId, + parser_config: ParserConfig, + metrics: Arc, + source_info: SourceInfo, } #[async_trait] impl SplitReader for DatagenSplitReader { type Properties = DatagenProperties; + #[allow(clippy::unused_async)] async fn new( properties: DatagenProperties, - state: ConnectorState, + splits: Vec, + parser_config: ParserConfig, + metrics: Arc, + source_info: SourceInfo, columns: Option>, ) -> Result { let mut assigned_split = DatagenSplit::default(); - let mut split_id: SplitId = "".into(); let mut events_so_far = u64::default(); - if let Some(splits) = state { - tracing::debug!("Splits for datagen found! {:?}", splits); - for split in splits { - // TODO: currently, assume there's only on split in one reader - split_id = split.id(); - if let SplitImpl::Datagen(n) = split { - if let Some(s) = n.start_offset { - // start_offset in `SplitImpl` indicates the latest successfully generated - // index, so here we use start_offset+1 - events_so_far = s + 1; - }; - assigned_split = n; - break; - } - } + tracing::debug!("Splits for datagen found! {:?}", splits); + + assert!(splits.len() == 1); + let split = splits.into_iter().next().unwrap(); + // TODO: currently, assume there's only on split in one reader + let split_id = split.id(); + if let SplitImpl::Datagen(n) = split { + if let Some(s) = n.start_offset { + // start_offset in `SplitImpl` indicates the latest successfully generated + // index, so here we use start_offset+1 + events_so_far = s + 1; + }; + assigned_split = n; } let split_index = assigned_split.split_index as u64; @@ -67,11 +81,13 @@ impl SplitReader for DatagenSplitReader { let rows_per_second = properties.rows_per_second; let fields_option_map = properties.fields; - let mut fields_map = HashMap::::new(); // check columns - assert!(columns.as_ref().is_some()); + assert!(columns.is_some()); let columns = columns.unwrap(); + let mut fields_vec = Vec::with_capacity(columns.len()); + let mut data_types = Vec::with_capacity(columns.len()); + let mut field_names = Vec::with_capacity(columns.len()); // parse field connector option to build FieldGeneratorImpl // for example: @@ -92,22 +108,28 @@ impl SplitReader for DatagenSplitReader { // ) for column in columns { - let name = column.name.clone(); + // let name = column.name.clone(); + let data_type = column.data_type.clone(); let gen = generator_from_data_type( column.data_type, &fields_option_map, - &name, + &column.name, split_index, split_num, )?; - fields_map.insert(name, gen); + fields_vec.push(gen); + data_types.push(data_type); + field_names.push(column.name); } let generator = DatagenEventGenerator::new( - fields_map, + fields_vec, + field_names, + parser_config.specific.get_source_format(), + data_types, rows_per_second, events_so_far, - split_id, + split_id.clone(), split_num, split_index, )?; @@ -115,13 +137,32 @@ impl SplitReader for DatagenSplitReader { Ok(DatagenSplitReader { generator, assigned_split, + split_id, + parser_config, + metrics, + source_info, }) } - fn into_stream(self) -> BoxSourceStream { + fn into_stream(self) -> BoxSourceWithStateStream { + // Will buffer at most 4 event chunks. + const BUFFER_SIZE: usize = 4; + // spawn_data_generation_stream(self.generator.into_native_stream(), BUFFER_SIZE).boxed() + match self.parser_config.specific { + SpecificParserConfig::Native => { + spawn_data_generation_stream(self.generator.into_native_stream(), BUFFER_SIZE) + .boxed() + } + _ => self.into_chunk_stream(), + } + } +} + +impl DatagenSplitReader { + pub(crate) fn into_data_stream(self) -> BoxSourceStream { // Will buffer at most 4 event chunks. const BUFFER_SIZE: usize = 4; - spawn_data_generation_stream(self.generator.into_stream(), BUFFER_SIZE).boxed() + spawn_data_generation_stream(self.generator.into_msg_stream(), BUFFER_SIZE).boxed() } } @@ -172,18 +213,19 @@ fn generator_from_data_type( FieldGeneratorImpl::with_varchar(length_value, random_seed) } DataType::Struct(struct_type) => { - let struct_fields = zip_eq(struct_type.field_names.clone(), struct_type.fields.clone()) - .map(|(field_name, data_type)| { - let gen = generator_from_data_type( - data_type, - fields_option_map, - &format!("{}.{}", name, field_name), - split_index, - split_num, - )?; - Ok((field_name, gen)) - }) - .collect::>()?; + let struct_fields = + zip_eq_fast(struct_type.field_names.clone(), struct_type.fields.clone()) + .map(|(field_name, data_type)| { + let gen = generator_from_data_type( + data_type, + fields_option_map, + &format!("{}.{}", name, field_name), + split_index, + split_num, + )?; + Ok((field_name, gen)) + }) + .collect::>()?; FieldGeneratorImpl::with_struct_fields(struct_fields) } DataType::List { datatype } => { @@ -234,7 +276,10 @@ mod tests { use std::sync::Arc; use maplit::{convert_args, hashmap}; + use risingwave_common::array::{Op, StructValue}; + use risingwave_common::row::Row; use risingwave_common::types::struct_type::StructType; + use risingwave_common::types::{ScalarImpl, ToDatumRef}; use super::*; @@ -261,11 +306,11 @@ mod tests { })), }, ]; - let state = Some(vec![SplitImpl::Datagen(DatagenSplit { + let state = vec![SplitImpl::Datagen(DatagenSplit { split_index: 0, split_num: 1, start_offset: None, - })]); + })]; let properties = DatagenProperties { split_num: None, rows_per_second: 10, @@ -288,14 +333,32 @@ mod tests { )), }; - let mut reader = DatagenSplitReader::new(properties, state, Some(mock_datum)) - .await? - .into_stream(); + let mut reader = DatagenSplitReader::new( + properties, + state, + Default::default(), + Default::default(), + Default::default(), + Some(mock_datum), + ) + .await? + .into_stream(); - let msg = reader.next().await.unwrap().unwrap(); + let stream_chunk = reader.next().await.unwrap().unwrap(); + let (op, row) = stream_chunk.chunk.rows().next().unwrap(); + assert_eq!(op, Op::Insert); + assert_eq!(row.datum_at(0), Some(ScalarImpl::Int32(533)).to_datum_ref(),); assert_eq!( - std::str::from_utf8(msg[0].payload.as_ref().unwrap().as_ref()).unwrap(), - "{\"random_float\":533.1488647460938,\"random_int\":533,\"sequence_int\":1,\"struct\":{\"random_int\":1533}}" + row.datum_at(1), + Some(ScalarImpl::Float32(533.148_86.into())).to_datum_ref(), + ); + assert_eq!(row.datum_at(2), Some(ScalarImpl::Int32(1)).to_datum_ref()); + assert_eq!( + row.datum_at(3), + Some(ScalarImpl::Struct(StructValue::new(vec![Some( + ScalarImpl::Int32(1533) + )]))) + .to_datum_ref() ); Ok(()) @@ -313,32 +376,48 @@ mod tests { data_type: DataType::Int32, }, ]; - let state = Some(vec![SplitImpl::Datagen(DatagenSplit { + let state = vec![SplitImpl::Datagen(DatagenSplit { split_index: 0, split_num: 1, start_offset: None, - })]); + })]; let properties = DatagenProperties { split_num: None, rows_per_second: 10, fields: HashMap::new(), }; - let stream = DatagenSplitReader::new(properties.clone(), state, Some(mock_datum.clone())) - .await? - .into_stream(); + let stream = DatagenSplitReader::new( + properties.clone(), + state, + Default::default(), + Default::default(), + Default::default(), + Some(mock_datum.clone()), + ) + .await? + .into_stream(); + let v1 = stream.skip(1).next().await.unwrap()?; - let state = Some(vec![SplitImpl::Datagen(DatagenSplit { + let state = vec![SplitImpl::Datagen(DatagenSplit { split_index: 0, split_num: 1, start_offset: Some(9), - })]); - let mut stream = DatagenSplitReader::new(properties, state, Some(mock_datum)) - .await? - .into_stream(); + })]; + let mut stream = DatagenSplitReader::new( + properties, + state, + Default::default(), + Default::default(), + Default::default(), + Some(mock_datum), + ) + .await? + .into_stream(); let v2 = stream.next().await.unwrap()?; assert_eq!(v1, v2); + Ok(()) } } diff --git a/src/connector/src/source/datagen/split.rs b/src/connector/src/source/datagen/split.rs index e650bacfc8167..61b8c266a5371 100644 --- a/src/connector/src/source/datagen/split.rs +++ b/src/connector/src/source/datagen/split.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/dummy_connector.rs b/src/connector/src/source/dummy_connector.rs index 31ac1b7a80366..bd627643f589e 100644 --- a/src/connector/src/source/dummy_connector.rs +++ b/src/connector/src/source/dummy_connector.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,14 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; + use anyhow::Result; use async_trait::async_trait; use futures::StreamExt; -use super::{SplitImpl, SplitReaderV2}; +use super::monitor::SourceMetrics; +use super::{SourceInfo, SplitImpl, SplitReader}; use crate::parser::ParserConfig; -use crate::source::{BoxSourceStream, Column, ConnectorState, SplitReader}; -use crate::BoxSourceWithStateStream; +use crate::source::{BoxSourceWithStateStream, Column}; /// [`DummySplitReader`] is a placeholder for source executor that is assigned no split. It will /// wait forever when calling `next`. @@ -30,27 +32,13 @@ pub struct DummySplitReader; impl SplitReader for DummySplitReader { type Properties = (); - async fn new( - _properties: Self::Properties, - _state: ConnectorState, - _columns: Option>, - ) -> Result { - Ok(Self {}) - } - - fn into_stream(self) -> BoxSourceStream { - futures::stream::pending().boxed() - } -} - -#[async_trait] -impl SplitReaderV2 for DummySplitReader { - type Properties = (); - async fn new( _properties: Self::Properties, _state: Vec, _parser_config: ParserConfig, + _metrics: Arc, + _source_info: SourceInfo, + _columns: Option>, ) -> Result { Ok(Self {}) } diff --git a/src/connector/src/source/filesystem/file_common.rs b/src/connector/src/source/filesystem/file_common.rs index 95b0a84307ac5..2647f3497a892 100644 --- a/src/connector/src/source/filesystem/file_common.rs +++ b/src/connector/src/source/filesystem/file_common.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/filesystem/mod.rs b/src/connector/src/source/filesystem/mod.rs index fa0fd439f1f8b..62ac85589d57c 100644 --- a/src/connector/src/source/filesystem/mod.rs +++ b/src/connector/src/source/filesystem/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/filesystem/s3/enumerator.rs b/src/connector/src/source/filesystem/s3/enumerator.rs index f94de94d4645c..458092ad314b4 100644 --- a/src/connector/src/source/filesystem/s3/enumerator.rs +++ b/src/connector/src/source/filesystem/s3/enumerator.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,8 +14,10 @@ use std::collections::HashMap; +use anyhow::Context; use async_trait::async_trait; use aws_sdk_s3::client::Client; +use globset::{Glob, GlobMatcher}; use itertools::Itertools; use crate::aws_utils::{default_conn_config, s3_client, AwsConfigV2}; @@ -23,9 +25,45 @@ use crate::source::filesystem::file_common::FsSplit; use crate::source::filesystem::s3::S3Properties; use crate::source::SplitEnumerator; +/// Get the prefix from a glob +fn get_prefix(glob: &str) -> String { + let mut escaped = false; + let mut escaped_filter = false; + glob.chars() + .take_while(|c| match (c, &escaped) { + ('*', false) => false, + ('[', false) => false, + ('{', false) => false, + ('\\', false) => { + escaped = true; + true + } + (_, false) => true, + (_, true) => { + escaped = false; + true + } + }) + .filter(|c| match (c, &escaped_filter) { + (_, true) => { + escaped_filter = false; + true + } + ('\\', false) => { + escaped_filter = true; + false + } + (_, _) => true, + }) + .collect() +} + #[derive(Debug, Clone)] pub struct S3SplitEnumerator { bucket_name: String, + // prefix is used to reduce the number of objects to be listed + prefix: Option, + matcher: Option, client: Client, } @@ -38,8 +76,19 @@ impl SplitEnumerator for S3SplitEnumerator { let config = AwsConfigV2::from(HashMap::from(properties.clone())); let sdk_config = config.load_config(None).await; let s3_client = s3_client(&sdk_config, Some(default_conn_config())); + let matcher = if let Some(pattern) = properties.match_pattern.as_ref() { + let glob = Glob::new(pattern) + .with_context(|| format!("Invalid match_pattern: {}", pattern))?; + Some(glob.compile_matcher()) + } else { + None + }; + let prefix = matcher.as_ref().map(|m| get_prefix(m.glob().glob())); + Ok(S3SplitEnumerator { bucket_name: properties.bucket_name, + matcher, + prefix, client: s3_client, }) } @@ -49,20 +98,63 @@ impl SplitEnumerator for S3SplitEnumerator { .client .list_objects_v2() .bucket(&self.bucket_name) + .set_prefix(self.prefix.clone()) .send() .await?; let objects = list_obj_out.contents(); - let splits = objects - .map(|objs| { - objs.iter() - .map(|obj| { - let obj_name = obj.key().unwrap().to_string(); - FsSplit::new(obj_name, 0, obj.size() as usize) - }) - .collect_vec() - }) - .unwrap_or_else(Vec::default); + let splits = if let Some(objs) = objects { + let matched_objs = objs + .iter() + .filter(|obj| obj.key().is_some()) + .filter(|obj| { + self.matcher + .as_ref() + .map(|m| m.is_match(obj.key().unwrap())) + .unwrap_or(true) + }) + .collect_vec(); + + matched_objs + .into_iter() + .map(|obj| FsSplit::new(obj.key().unwrap().to_owned(), 0, obj.size() as usize)) + .collect_vec() + } else { + Vec::new() + }; Ok(splits) } } + +#[cfg(test)] +mod tests { + + #[test] + fn test_get_prefix() { + assert_eq!(&get_prefix("a/"), "a/"); + assert_eq!(&get_prefix("a/**"), "a/"); + assert_eq!(&get_prefix("[ab]*"), ""); + assert_eq!(&get_prefix("a/{a,b}*"), "a/"); + assert_eq!(&get_prefix(r"a/\{a,b}"), "a/{a,b}"); + assert_eq!(&get_prefix(r"a/\[ab]"), "a/[ab]"); + } + + use super::*; + #[tokio::test] + #[ignore] + async fn test_s3_split_enumerator() { + let props = S3Properties { + region_name: "ap-southeast-1".to_owned(), + bucket_name: "mingchao-s3-source".to_owned(), + match_pattern: Some("happy[0-9].csv".to_owned()), + access: None, + secret: None, + }; + let mut enumerator = S3SplitEnumerator::new(props.clone()).await.unwrap(); + let splits = enumerator.list_splits().await.unwrap(); + let names = splits.into_iter().map(|split| split.name).collect_vec(); + assert_eq!(names.len(), 2); + assert!(names.contains(&"happy1.csv".to_owned())); + assert!(names.contains(&"happy2.csv".to_owned())); + } +} diff --git a/src/connector/src/source/filesystem/s3/mod.rs b/src/connector/src/source/filesystem/s3/mod.rs index 23184b9348c55..e239d00b485d7 100644 --- a/src/connector/src/source/filesystem/s3/mod.rs +++ b/src/connector/src/source/filesystem/s3/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/filesystem/s3/source/mod.rs b/src/connector/src/source/filesystem/s3/source/mod.rs index c18366024459e..124372482a2c9 100644 --- a/src/connector/src/source/filesystem/s3/source/mod.rs +++ b/src/connector/src/source/filesystem/s3/source/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/filesystem/s3/source/reader.rs b/src/connector/src/source/filesystem/s3/source/reader.rs index 4ad13e9bb0404..b14eca23708ce 100644 --- a/src/connector/src/source/filesystem/s3/source/reader.rs +++ b/src/connector/src/source/filesystem/s3/source/reader.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ // limitations under the License. use std::collections::HashMap; +use std::sync::Arc; use anyhow::{anyhow, Result}; use async_trait::async_trait; @@ -28,11 +29,14 @@ use tokio_util::io::ReaderStream; use crate::aws_utils::{default_conn_config, s3_client, AwsConfigV2}; use crate::parser::{ByteStreamSourceParserImpl, ParserConfig}; -use crate::source::base::{SplitMetaData, SplitReaderV2, MAX_CHUNK_SIZE}; +use crate::source::base::{SplitMetaData, SplitReader, MAX_CHUNK_SIZE}; use crate::source::filesystem::file_common::FsSplit; use crate::source::filesystem::s3::S3Properties; -use crate::source::{SourceMessage, SourceMeta, SplitImpl}; -use crate::{BoxSourceWithStateStream, StreamChunkWithState}; +use crate::source::monitor::SourceMetrics; +use crate::source::{ + BoxSourceWithStateStream, Column, SourceInfo, SourceMessage, SourceMeta, SplitImpl, + StreamChunkWithState, +}; const MAX_CHANNEL_BUFFER_SIZE: usize = 2048; const STREAM_READER_CAPACITY: usize = 4096; @@ -43,11 +47,24 @@ pub struct S3FileReader { s3_client: s3_client::Client, splits: Vec, parser_config: ParserConfig, + // for stats + metrics: Arc, + source_info: SourceInfo, } impl S3FileReader { #[try_stream(boxed, ok = Vec, error = anyhow::Error)] - async fn stream_read(client_for_s3: s3_client::Client, bucket_name: String, split: FsSplit) { + async fn stream_read_object( + client_for_s3: s3_client::Client, + bucket_name: String, + split: FsSplit, + metrics: Arc, + source_info: SourceInfo, + ) { + let actor_id = source_info.actor_id.to_string(); + let source_id = source_info.source_id.to_string(); + let split_id = split.id(); + let object_name = split.name.clone(); let byte_stream = S3FileReader::get_object(&client_for_s3, &bucket_name, &object_name, split.offset) @@ -61,6 +78,7 @@ impl S3FileReader { let stream = ReaderStream::with_capacity(reader, STREAM_READER_CAPACITY); let mut offset: usize = split.offset; + let mut batch_size: usize = 0; let mut batch = Vec::new(); #[for_await] for read in stream { @@ -73,13 +91,23 @@ impl S3FileReader { meta: SourceMeta::Empty, }; offset += len; + batch_size += len; batch.push(msg); if batch.len() >= MAX_CHUNK_SIZE { + metrics + .partition_input_bytes + .with_label_values(&[&actor_id, &source_id, &split_id]) + .inc_by(batch_size as u64); + batch_size = 0; yield batch.clone(); batch.clear(); } } if !batch.is_empty() { + metrics + .partition_input_bytes + .with_label_values(&[&actor_id, &source_id, &split_id]) + .inc_by(batch_size as u64); yield batch; } } @@ -116,13 +144,16 @@ impl S3FileReader { } #[async_trait] -impl SplitReaderV2 for S3FileReader { +impl SplitReader for S3FileReader { type Properties = S3Properties; async fn new( props: S3Properties, state: Vec, parser_config: ParserConfig, + metrics: Arc, + source_info: SourceInfo, + _columns: Option>, ) -> Result { let config = AwsConfigV2::from(HashMap::from(props.clone())); let sdk_config = config.load_config(None).await; @@ -140,27 +171,44 @@ impl SplitReaderV2 for S3FileReader { s3_client, splits, parser_config, + metrics, + source_info, }; Ok(s3_file_reader) } fn into_stream(self) -> BoxSourceWithStateStream { - self.into_stream() + self.into_chunk_stream() } } impl S3FileReader { #[try_stream(boxed, ok = StreamChunkWithState, error = RwError)] - pub async fn into_stream(self) { + async fn into_chunk_stream(self) { for split in self.splits { - let data_stream = - Self::stream_read(self.s3_client.clone(), self.bucket_name.clone(), split); - let parser = ByteStreamSourceParserImpl::create(self.parser_config.clone()).await?; - let msg_stream = parser.into_stream(data_stream); + let actor_id = self.source_info.actor_id.to_string(); + let source_id = self.source_info.source_id.to_string(); + let split_id = split.id(); + + let data_stream = Self::stream_read_object( + self.s3_client.clone(), + self.bucket_name.clone(), + split, + self.metrics.clone(), + self.source_info, + ); + + let parser = ByteStreamSourceParserImpl::create(self.parser_config.clone())?; + let msg_stream = parser.into_stream(Box::pin(data_stream)); #[for_await] for msg in msg_stream { - yield msg?; + let msg = msg?; + self.metrics + .partition_input_count + .with_label_values(&[&actor_id, &source_id, &split_id]) + .inc_by(msg.chunk.cardinality() as u64); + yield msg; } } } @@ -174,8 +222,7 @@ mod tests { use super::*; use crate::parser::{CommonParserConfig, CsvParserConfig, SpecificParserConfig}; use crate::source::filesystem::{S3Properties, S3SplitEnumerator}; - use crate::source::SplitEnumerator; - use crate::SourceColumnDesc; + use crate::source::{SourceColumnDesc, SplitEnumerator}; #[tokio::test] #[ignore] @@ -205,16 +252,22 @@ mod tests { }; let config = ParserConfig { - common: CommonParserConfig { - props: HashMap::new(), - rw_columns: descs, - }, + common: CommonParserConfig { rw_columns: descs }, specific: SpecificParserConfig::Csv(csv_config), }; - let reader = S3FileReader::new(props, splits, config).await.unwrap(); + let reader = S3FileReader::new( + props, + splits, + config, + Arc::new(SourceMetrics::default()), + SourceInfo::default(), + None, + ) + .await + .unwrap(); - let msg_stream = reader.into_stream(); + let msg_stream = reader.into_chunk_stream(); #[for_await] for msg in msg_stream { println!("msg {:?}", msg); diff --git a/src/connector/src/source/google_pubsub/enumerator/client.rs b/src/connector/src/source/google_pubsub/enumerator/client.rs index 4c7212aafa790..8a21bbefd9f3b 100644 --- a/src/connector/src/source/google_pubsub/enumerator/client.rs +++ b/src/connector/src/source/google_pubsub/enumerator/client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/google_pubsub/enumerator/mod.rs b/src/connector/src/source/google_pubsub/enumerator/mod.rs index 8f3f74c7acdf8..b2474885cb8ad 100644 --- a/src/connector/src/source/google_pubsub/enumerator/mod.rs +++ b/src/connector/src/source/google_pubsub/enumerator/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/google_pubsub/mod.rs b/src/connector/src/source/google_pubsub/mod.rs index 07272b89e75c6..0619edde5a8bd 100644 --- a/src/connector/src/source/google_pubsub/mod.rs +++ b/src/connector/src/source/google_pubsub/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/google_pubsub/source/message.rs b/src/connector/src/source/google_pubsub/source/message.rs index 2813adf6a954b..dfdd2f2e3f535 100644 --- a/src/connector/src/source/google_pubsub/source/message.rs +++ b/src/connector/src/source/google_pubsub/source/message.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/google_pubsub/source/mod.rs b/src/connector/src/source/google_pubsub/source/mod.rs index f47fa279d9d2d..b1aee99db99d1 100644 --- a/src/connector/src/source/google_pubsub/source/mod.rs +++ b/src/connector/src/source/google_pubsub/source/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/google_pubsub/source/reader.rs b/src/connector/src/source/google_pubsub/source/reader.rs index 8f9793b9eaf4a..6a55713461e03 100644 --- a/src/connector/src/source/google_pubsub/source/reader.rs +++ b/src/connector/src/source/google_pubsub/source/reader.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,33 +12,45 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; + use anyhow::{anyhow, ensure, Context, Result}; use async_trait::async_trait; use chrono::{NaiveDateTime, TimeZone, Utc}; +use futures::{StreamExt, TryStreamExt}; use futures_async_stream::try_stream; use google_cloud_pubsub::client::Client; use google_cloud_pubsub::subscription::{SeekTo, Subscription}; -use risingwave_common::{bail, try_match_expand}; +use risingwave_common::bail; use tonic::Code; use super::TaggedReceivedMessage; +use crate::impl_common_split_reader_logic; +use crate::parser::ParserConfig; use crate::source::google_pubsub::PubsubProperties; +use crate::source::monitor::SourceMetrics; use crate::source::{ - BoxSourceStream, Column, ConnectorState, SourceMessage, SplitId, SplitImpl, SplitMetaData, + BoxSourceWithStateStream, Column, SourceInfo, SourceMessage, SplitId, SplitImpl, SplitMetaData, SplitReader, }; const PUBSUB_MAX_FETCH_MESSAGES: usize = 1024; +impl_common_split_reader_logic!(PubsubSplitReader, PubsubProperties); + pub struct PubsubSplitReader { subscription: Subscription, - split_id: SplitId, stop_offset: Option, + + split_id: SplitId, + parser_config: ParserConfig, + metrics: Arc, + source_info: SourceInfo, } impl PubsubSplitReader { #[try_stream(boxed, ok = Vec, error = anyhow::Error)] - pub async fn into_stream(self) { + async fn into_data_stream(self) { loop { let pull_result = self .subscription @@ -106,16 +118,22 @@ impl SplitReader for PubsubSplitReader { async fn new( properties: PubsubProperties, - state: ConnectorState, + splits: Vec, + parser_config: ParserConfig, + metrics: Arc, + source_info: SourceInfo, _columns: Option>, ) -> Result { - let splits = state.ok_or_else(|| anyhow!("no default state for reader"))?; ensure!( splits.len() == 1, "the pubsub reader only supports a single split" ); - let split = try_match_expand!(splits.into_iter().next().unwrap(), SplitImpl::GooglePubsub) - .map_err(|e| anyhow!(e))?; + let split = splits + .into_iter() + .next() + .unwrap() + .into_google_pubsub() + .unwrap(); // Set environment variables consumed by `google_cloud_pubsub` properties.initialize_env(); @@ -152,10 +170,13 @@ impl SplitReader for PubsubSplitReader { subscription, split_id: split.id(), stop_offset, + parser_config, + metrics, + source_info, }) } - fn into_stream(self) -> BoxSourceStream { - self.into_stream() + fn into_stream(self) -> BoxSourceWithStateStream { + self.into_chunk_stream() } } diff --git a/src/connector/src/source/google_pubsub/split.rs b/src/connector/src/source/google_pubsub/split.rs index e4e3f46f86dd2..359aa8f48f648 100644 --- a/src/connector/src/source/google_pubsub/split.rs +++ b/src/connector/src/source/google_pubsub/split.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index 5c8ec803b7659..7f90cbe5ff68d 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -50,8 +50,13 @@ impl SplitEnumerator for KafkaSplitEnumerator { type Split = KafkaSplit; async fn new(properties: KafkaProperties) -> anyhow::Result { - let broker_address = properties.brokers.clone(); - let topic = properties.topic.clone(); + let mut config = rdkafka::ClientConfig::new(); + let common_props = &properties.common; + + let broker_address = common_props.brokers.clone(); + let topic = common_props.topic.clone(); + config.set("bootstrap.servers", &broker_address); + common_props.set_security_properties(&mut config); let mut scan_start_offset = match properties .scan_startup_mode @@ -74,9 +79,6 @@ impl SplitEnumerator for KafkaSplitEnumerator { scan_start_offset = KafkaEnumeratorOffset::Timestamp(time_offset) } - let mut config = rdkafka::ClientConfig::new(); - config.set("bootstrap.servers", &broker_address); - properties.set_security_properties(&mut config); let client: BaseConsumer = config.create_with_context(DefaultConsumerContext).await?; Ok(Self { diff --git a/src/connector/src/source/kafka/enumerator/mod.rs b/src/connector/src/source/kafka/enumerator/mod.rs index 8f3f74c7acdf8..b2474885cb8ad 100644 --- a/src/connector/src/source/kafka/enumerator/mod.rs +++ b/src/connector/src/source/kafka/enumerator/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/kafka/mod.rs b/src/connector/src/source/kafka/mod.rs index 9192ef7a35045..24e350b1a0783 100644 --- a/src/connector/src/source/kafka/mod.rs +++ b/src/connector/src/source/kafka/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ use std::time::Duration; -use rdkafka::ClientConfig; use serde::Deserialize; pub mod enumerator; @@ -25,6 +24,8 @@ pub use enumerator::*; pub use source::*; pub use split::*; +use crate::common::KafkaCommon; + pub const KAFKA_CONNECTOR: &str = "kafka"; #[derive(Clone, Debug, Deserialize)] @@ -43,12 +44,6 @@ pub struct KafkaProperties { #[serde(rename = "max.num.messages", alias = "kafka.max.num.messages")] pub max_num_messages: Option, - #[serde(rename = "properties.bootstrap.server", alias = "kafka.brokers")] - pub brokers: String, - - #[serde(rename = "topic", alias = "kafka.topic")] - pub topic: String, - #[serde(rename = "scan.startup.mode", alias = "kafka.scan.startup.mode")] pub scan_startup_mode: Option, @@ -58,128 +53,8 @@ pub struct KafkaProperties { #[serde(rename = "properties.group.id", alias = "kafka.consumer.group")] pub consumer_group: Option, - /// Security protocol used for RisingWave to communicate with Kafka brokers. Could be - /// PLAINTEXT, SSL, SASL_PLAINTEXT or SASL_SSL. - #[serde(rename = "properties.security.protocol")] - security_protocol: Option, - - // For the properties below, please refer to [librdkafka](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md) for more information. - /// Path to CA certificate file for verifying the broker's key. - #[serde(rename = "properties.ssl.ca.location")] - ssl_ca_location: Option, - - /// Path to client's certificate file (PEM). - #[serde(rename = "properties.ssl.certificate.location")] - ssl_certificate_location: Option, - - /// Path to client's private key file (PEM). - #[serde(rename = "properties.ssl.key.location")] - ssl_key_location: Option, - - /// Passphrase of client's private key. - #[serde(rename = "properties.ssl.key.password")] - ssl_key_password: Option, - - /// SASL mechanism if SASL is enabled. Currently support PLAIN, SCRAM and GSSAPI. - #[serde(rename = "properties.sasl.mechanism")] - sasl_mechanism: Option, - - /// SASL username for SASL/PLAIN and SASL/SCRAM. - #[serde(rename = "properties.sasl.username")] - sasl_username: Option, - - /// SASL password for SASL/PLAIN and SASL/SCRAM. - #[serde(rename = "properties.sasl.password")] - sasl_password: Option, - - /// Kafka server's Kerberos principal name under SASL/GSSAPI, not including /hostname@REALM. - #[serde(rename = "properties.sasl.kerberos.service.name")] - sasl_kerberos_service_name: Option, - - /// Path to client's Kerberos keytab file under SASL/GSSAPI. - #[serde(rename = "properties.sasl.kerberos.keytab")] - sasl_kerberos_keytab: Option, - - /// Client's Kerberos principal name under SASL/GSSAPI. - #[serde(rename = "properties.sasl.kerberos.principal")] - sasl_kerberos_principal: Option, - - /// Shell command to refresh or acquire the client's Kerberos ticket under SASL/GSSAPI. - #[serde(rename = "properties.sasl.kerberos.kinit.cmd")] - sasl_kerberos_kinit_cmd: Option, - - /// Minimum time in milliseconds between key refresh attempts under SASL/GSSAPI. - #[serde(rename = "properties.sasl.kerberos.min.time.before.relogin")] - sasl_kerberos_min_time_before_relogin: Option, - - /// Configurations for SASL/OAUTHBEARER. - #[serde(rename = "properties.sasl.oauthbearer.config")] - sasl_oathbearer_config: Option, -} - -impl KafkaProperties { - fn set_security_properties(&self, config: &mut ClientConfig) { - // Security protocol - if let Some(security_protocol) = self.security_protocol.as_ref() { - config.set("security.protocol", security_protocol); - } - - // SSL - if let Some(ssl_ca_location) = self.ssl_ca_location.as_ref() { - config.set("ssl.ca.location", ssl_ca_location); - } - if let Some(ssl_certificate_location) = self.ssl_certificate_location.as_ref() { - config.set("ssl.certificate.location", ssl_certificate_location); - } - if let Some(ssl_key_location) = self.ssl_key_location.as_ref() { - config.set("ssl.key.location", ssl_key_location); - } - if let Some(ssl_key_password) = self.ssl_key_password.as_ref() { - config.set("ssl.key.password", ssl_key_password); - } - - // SASL mechanism - if let Some(sasl_mechanism) = self.sasl_mechanism.as_ref() { - config.set("sasl.mechanism", sasl_mechanism); - } - - // SASL/PLAIN & SASL/SCRAM - if let Some(sasl_username) = self.sasl_username.as_ref() { - config.set("sasl.username", sasl_username); - } - if let Some(sasl_password) = self.sasl_password.as_ref() { - config.set("sasl.password", sasl_password); - } - - // SASL/GSSAPI - if let Some(sasl_kerberos_service_name) = self.sasl_kerberos_service_name.as_ref() { - config.set("sasl.kerberos.service.name", sasl_kerberos_service_name); - } - if let Some(sasl_kerberos_keytab) = self.sasl_kerberos_keytab.as_ref() { - config.set("sasl.kerberos.keytab", sasl_kerberos_keytab); - } - if let Some(sasl_kerberos_principal) = self.sasl_kerberos_principal.as_ref() { - config.set("sasl.kerberos.principal", sasl_kerberos_principal); - } - if let Some(sasl_kerberos_kinit_cmd) = self.sasl_kerberos_kinit_cmd.as_ref() { - config.set("sasl.kerberos.kinit.cmd", sasl_kerberos_kinit_cmd); - } - if let Some(sasl_kerberos_min_time_before_relogin) = - self.sasl_kerberos_min_time_before_relogin.as_ref() - { - config.set( - "sasl.kerberos.min.time.before.relogin", - sasl_kerberos_min_time_before_relogin, - ); - } - - // SASL/OAUTHBEARER - if let Some(sasl_oathbearer_config) = self.sasl_oathbearer_config.as_ref() { - config.set("sasl.oauthbearer.config", sasl_oathbearer_config); - } - // Currently, we only support unsecured OAUTH. - config.set("enable.sasl.oauthbearer.unsecure.jwt", "true"); - } + #[serde(flatten)] + pub common: KafkaCommon, } const KAFKA_SYNC_CALL_TIMEOUT: Duration = Duration::from_secs(1); diff --git a/src/connector/src/source/kafka/source/message.rs b/src/connector/src/source/kafka/source/message.rs index 86e5f94cf8dd3..6c52455c779e7 100644 --- a/src/connector/src/source/kafka/source/message.rs +++ b/src/connector/src/source/kafka/source/message.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/kafka/source/mod.rs b/src/connector/src/source/kafka/source/mod.rs index 10c1681b9a96b..53f8cdc0bdbef 100644 --- a/src/connector/src/source/kafka/source/mod.rs +++ b/src/connector/src/source/kafka/source/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/kafka/source/reader.rs b/src/connector/src/source/kafka/source/reader.rs index 3a08d3009f65b..2ad056953327f 100644 --- a/src/connector/src/source/kafka/source/reader.rs +++ b/src/connector/src/source/kafka/source/reader.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,19 +13,27 @@ // limitations under the License. use std::mem::swap; +use std::sync::Arc; use std::time::{Duration, SystemTime, UNIX_EPOCH}; -use anyhow::{Context, Result}; +use anyhow::{anyhow, Result}; use async_trait::async_trait; -use futures::StreamExt; +use futures::{StreamExt, TryStreamExt}; use futures_async_stream::try_stream; use rdkafka::config::RDKafkaLogLevel; use rdkafka::consumer::{Consumer, DefaultConsumerContext, StreamConsumer}; use rdkafka::{ClientConfig, Message, Offset, TopicPartitionList}; -use crate::source::base::{SourceMessage, SplitReader, MAX_CHUNK_SIZE}; +use crate::impl_common_split_reader_logic; +use crate::parser::ParserConfig; +use crate::source::base::{SourceMessage, MAX_CHUNK_SIZE}; use crate::source::kafka::KafkaProperties; -use crate::source::{BoxSourceStream, Column, ConnectorState, SplitImpl}; +use crate::source::monitor::SourceMetrics; +use crate::source::{ + BoxSourceWithStateStream, Column, SourceInfo, SplitId, SplitImpl, SplitMetaData, SplitReader, +}; + +impl_common_split_reader_logic!(KafkaSplitReader, KafkaProperties); pub struct KafkaSplitReader { consumer: StreamConsumer, @@ -33,6 +41,11 @@ pub struct KafkaSplitReader { stop_offset: Option, bytes_per_second: usize, max_num_messages: usize, + + split_id: SplitId, + parser_config: ParserConfig, + metrics: Arc, + source_info: SourceInfo, } #[async_trait] @@ -41,20 +54,23 @@ impl SplitReader for KafkaSplitReader { async fn new( properties: KafkaProperties, - state: ConnectorState, + splits: Vec, + parser_config: ParserConfig, + metrics: Arc, + source_info: SourceInfo, _columns: Option>, ) -> Result { - let bootstrap_servers = &properties.brokers; - let mut config = ClientConfig::new(); + let bootstrap_servers = &properties.common.brokers; + // disable partition eof config.set("enable.partition.eof", "false"); config.set("enable.auto.commit", "false"); config.set("auto.offset.reset", "smallest"); config.set("bootstrap.servers", bootstrap_servers); - properties.set_security_properties(&mut config); + properties.common.set_security_properties(&mut config); if config.get("group.id").is_none() { config.set( @@ -73,32 +89,30 @@ impl SplitReader for KafkaSplitReader { .set_log_level(RDKafkaLogLevel::Info) .create_with_context(DefaultConsumerContext) .await - .context("failed to create kafka consumer")?; + .map_err(|e| anyhow!("failed to create kafka consumer: {}", e))?; let mut start_offset = None; - let mut stop_offset = None; - if let Some(splits) = state { - assert_eq!(splits.len(), 1); - let mut tpl = TopicPartitionList::with_capacity(splits.len()); - - for split in &splits { - if let SplitImpl::Kafka(k) = split { - if let Some(offset) = k.start_offset { - start_offset = Some(offset); - tpl.add_partition_offset( - k.topic.as_str(), - k.partition, - Offset::Offset(offset + 1), - )?; - } else { - tpl.add_partition(k.topic.as_str(), k.partition); - } - stop_offset = k.stop_offset; - } - } - consumer.assign(&tpl)?; + assert_eq!(splits.len(), 1); + let mut tpl = TopicPartitionList::with_capacity(splits.len()); + + let split = splits.into_iter().next().unwrap().into_kafka().unwrap(); + + let split_id = split.id(); + + if let Some(offset) = split.start_offset { + start_offset = Some(offset); + tpl.add_partition_offset( + split.topic.as_str(), + split.partition, + Offset::Offset(offset + 1), + )?; + } else { + tpl.add_partition(split.topic.as_str(), split.partition); } + let stop_offset = split.stop_offset; + + consumer.assign(&tpl)?; // The two parameters below are only used by developers for performance testing purposes, // so we panic here on purpose if the input is not correctly recognized. @@ -121,17 +135,21 @@ impl SplitReader for KafkaSplitReader { stop_offset, bytes_per_second, max_num_messages, + split_id, + parser_config, + metrics, + source_info, }) } - fn into_stream(self) -> BoxSourceStream { - self.into_stream() + fn into_stream(self) -> BoxSourceWithStateStream { + self.into_chunk_stream() } } impl KafkaSplitReader { #[try_stream(boxed, ok = Vec, error = anyhow::Error)] - pub async fn into_stream(self) { + pub async fn into_data_stream(self) { if let Some(stop_offset) = self.stop_offset { if let Some(start_offset) = self.start_offset && (start_offset+1) >= stop_offset { yield Vec::new(); diff --git a/src/connector/src/source/kafka/split.rs b/src/connector/src/source/kafka/split.rs index c89c489a5fde4..4fdb5a71f04fc 100644 --- a/src/connector/src/source/kafka/split.rs +++ b/src/connector/src/source/kafka/split.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/kinesis/config.rs b/src/connector/src/source/kinesis/config.rs index f372f0dd2ec5d..da834f825dde4 100644 --- a/src/connector/src/source/kinesis/config.rs +++ b/src/connector/src/source/kinesis/config.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,14 +17,12 @@ use std::collections::HashMap; use anyhow::{anyhow, Result}; use aws_config::default_provider::credentials::DefaultCredentialsChain; use aws_config::sts::AssumeRoleProvider; -use aws_sdk_kinesis::Client; use aws_types::credentials::SharedCredentialsProvider; use aws_types::region::Region; -use http::Uri; use maplit::hashmap; use serde::{Deserialize, Serialize}; -use crate::source::kinesis::KinesisProperties; +use crate::common::KinesisCommon; #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub struct AwsAssumeRole { @@ -91,7 +89,7 @@ impl AwsConfigInfo { Ok(config_loader.load().await) } - pub fn build(properties: KinesisProperties) -> Result { + pub fn build(properties: KinesisCommon) -> Result { let stream_name = properties.stream_name; let region = properties.stream_region; @@ -140,14 +138,3 @@ pub fn kinesis_demo_properties() -> HashMap { properties } - -pub async fn build_client(properties: KinesisProperties) -> Result { - let config = AwsConfigInfo::build(properties)?; - let aws_config = config.load().await?; - let mut builder = aws_sdk_kinesis::config::Builder::from(&aws_config); - if let Some(endpoint) = &config.endpoint { - let uri = endpoint.clone().parse::().unwrap(); - builder = builder.endpoint_resolver(aws_smithy_http::endpoint::Endpoint::immutable(uri)); - } - Ok(Client::from_conf(builder.build())) -} diff --git a/src/connector/src/source/kinesis/enumerator/client.rs b/src/connector/src/source/kinesis/enumerator/client.rs index 26392fc1bf26a..c11f444a9c853 100644 --- a/src/connector/src/source/kinesis/enumerator/client.rs +++ b/src/connector/src/source/kinesis/enumerator/client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -34,8 +34,8 @@ impl SplitEnumerator for KinesisSplitEnumerator { type Split = KinesisSplit; async fn new(properties: KinesisProperties) -> Result { - let client = build_client(properties.clone()).await?; - let stream_name = properties.stream_name.clone(); + let client = properties.common.build_client().await?; + let stream_name = properties.common.stream_name.clone(); Ok(Self { stream_name, client, diff --git a/src/connector/src/source/kinesis/enumerator/mod.rs b/src/connector/src/source/kinesis/enumerator/mod.rs index 78eebb055a11c..3ba0b65d40d2e 100644 --- a/src/connector/src/source/kinesis/enumerator/mod.rs +++ b/src/connector/src/source/kinesis/enumerator/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/kinesis/mod.rs b/src/connector/src/source/kinesis/mod.rs index a3aea1b46c24d..33bfe563c8f6c 100644 --- a/src/connector/src/source/kinesis/mod.rs +++ b/src/connector/src/source/kinesis/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,42 +17,14 @@ pub mod enumerator; pub mod source; pub mod split; -pub use config::build_client; use serde::Deserialize; +use crate::common::KinesisCommon; + pub const KINESIS_CONNECTOR: &str = "kinesis"; #[derive(Clone, Debug, Deserialize)] pub struct KinesisProperties { - #[serde(rename = "stream", alias = "kinesis.stream.name")] - pub stream_name: String, - #[serde(rename = "aws.region", alias = "kinesis.stream.region")] - pub stream_region: String, - #[serde(rename = "endpoint", alias = "kinesis.endpoint")] - pub endpoint: Option, - #[serde( - rename = "aws.credentials.access_key_id", - alias = "kinesis.credentials.access" - )] - pub credentials_access_key: Option, - #[serde( - rename = "aws.credentials.secret_access_key", - alias = "kinesis.credentials.secret" - )] - pub credentials_secret_access_key: Option, - #[serde( - rename = "aws.credentials.session_token", - alias = "kinesis.credentials.session_token" - )] - pub session_token: Option, - #[serde(rename = "aws.credentials.role.arn", alias = "kinesis.assumerole.arn")] - pub assume_role_arn: Option, - #[serde( - rename = "aws.credentials.role.external_id", - alias = "kinesis.assumerole.external_id" - )] - pub assume_role_external_id: Option, - #[serde(rename = "scan.startup.mode", alias = "kinesis.scan.startup.mode")] // accepted values: "latest", "earliest", "sequence_number" pub scan_startup_mode: Option, @@ -61,4 +33,7 @@ pub struct KinesisProperties { alias = "kinesis.scan.startup.sequence_number" )] pub seq_offset: Option, + + #[serde(flatten)] + pub common: KinesisCommon, } diff --git a/src/connector/src/source/kinesis/source/message.rs b/src/connector/src/source/kinesis/source/message.rs index 39c6ea097873f..f29a83c4afc72 100644 --- a/src/connector/src/source/kinesis/source/message.rs +++ b/src/connector/src/source/kinesis/source/message.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/kinesis/source/mod.rs b/src/connector/src/source/kinesis/source/mod.rs index f1a1d9aeb16c1..cd90a79f28913 100644 --- a/src/connector/src/source/kinesis/source/mod.rs +++ b/src/connector/src/source/kinesis/source/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/kinesis/source/reader.rs b/src/connector/src/source/kinesis/source/reader.rs index e385410738f95..24b09cbe660e5 100644 --- a/src/connector/src/source/kinesis/source/reader.rs +++ b/src/connector/src/source/kinesis/source/reader.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; use std::time::Duration; use anyhow::{anyhow, Result}; @@ -21,16 +22,23 @@ use aws_sdk_kinesis::model::ShardIteratorType; use aws_sdk_kinesis::output::GetRecordsOutput; use aws_sdk_kinesis::types::SdkError; use aws_sdk_kinesis::Client as KinesisClient; +use futures::{StreamExt, TryStreamExt}; use futures_async_stream::try_stream; use tokio_retry; +use crate::impl_common_split_reader_logic; +use crate::parser::ParserConfig; use crate::source::kinesis::source::message::KinesisMessage; use crate::source::kinesis::split::KinesisOffset; -use crate::source::kinesis::{build_client, KinesisProperties}; +use crate::source::kinesis::KinesisProperties; +use crate::source::monitor::SourceMetrics; use crate::source::{ - BoxSourceStream, Column, ConnectorState, SourceMessage, SplitId, SplitImpl, SplitReader, + BoxSourceWithStateStream, Column, SourceInfo, SourceMessage, SplitId, SplitImpl, SplitMetaData, + SplitReader, }; +impl_common_split_reader_logic!(KinesisSplitReader, KinesisProperties); + #[derive(Debug, Clone)] pub struct KinesisSplitReader { client: KinesisClient, @@ -40,6 +48,11 @@ pub struct KinesisSplitReader { shard_iter: Option, start_position: KinesisOffset, end_position: KinesisOffset, + + split_id: SplitId, + parser_config: ParserConfig, + metrics: Arc, + source_info: SourceInfo, } #[async_trait] @@ -48,13 +61,15 @@ impl SplitReader for KinesisSplitReader { async fn new( properties: KinesisProperties, - state: ConnectorState, + splits: Vec, + parser_config: ParserConfig, + metrics: Arc, + source_info: SourceInfo, _columns: Option>, ) -> Result { - let split = match state.unwrap().into_iter().next().unwrap() { - SplitImpl::Kinesis(ks) => ks, - split => return Err(anyhow!("expect KinesisSplit, got {:?}", split)), - }; + assert!(splits.len() == 1); + + let split = splits.into_iter().next().unwrap().into_kinesis().unwrap(); let start_position = match &split.start_position { KinesisOffset::None => match &properties.scan_startup_mode { @@ -79,9 +94,10 @@ impl SplitReader for KinesisSplitReader { start_position => start_position.to_owned(), }; - let stream_name = properties.stream_name.clone(); - let client = build_client(properties).await?; + let stream_name = properties.common.stream_name.clone(); + let client = properties.common.build_client().await?; + let split_id = split.id(); Ok(Self { client, stream_name, @@ -90,17 +106,21 @@ impl SplitReader for KinesisSplitReader { latest_offset: None, start_position, end_position: split.end_position, + split_id, + parser_config, + metrics, + source_info, }) } - fn into_stream(self) -> BoxSourceStream { - self.into_stream() + fn into_stream(self) -> BoxSourceWithStateStream { + self.into_chunk_stream() } } impl KinesisSplitReader { #[try_stream(boxed, ok = Vec, error = anyhow::Error)] - pub async fn into_stream(mut self) { + pub(crate) async fn into_data_stream(mut self) { self.new_shard_iter().await?; loop { if self.shard_iter.is_none() { @@ -248,50 +268,60 @@ mod tests { use futures::StreamExt; use super::*; + use crate::common::KinesisCommon; use crate::source::kinesis::split::KinesisSplit; #[tokio::test] #[ignore] async fn test_single_thread_kinesis_reader() -> Result<()> { let properties = KinesisProperties { - assume_role_arn: None, - credentials_access_key: None, - credentials_secret_access_key: None, - stream_name: "kinesis_debug".to_string(), - stream_region: "cn-northwest-1".to_string(), - endpoint: None, - session_token: None, - assume_role_external_id: None, + common: KinesisCommon { + assume_role_arn: None, + credentials_access_key: None, + credentials_secret_access_key: None, + stream_name: "kinesis_debug".to_string(), + stream_region: "cn-northwest-1".to_string(), + endpoint: None, + session_token: None, + assume_role_external_id: None, + }, + scan_startup_mode: None, seq_offset: None, }; let mut trim_horizen_reader = KinesisSplitReader::new( properties.clone(), - Some(vec![SplitImpl::Kinesis(KinesisSplit { + vec![SplitImpl::Kinesis(KinesisSplit { shard_id: "shardId-000000000001".to_string().into(), start_position: KinesisOffset::Earliest, end_position: KinesisOffset::None, - })]), + })], + Default::default(), + Default::default(), + Default::default(), None, ) .await? - .into_stream(); + .into_data_stream(); println!("{:?}", trim_horizen_reader.next().await.unwrap()?); let mut offset_reader = KinesisSplitReader::new( properties.clone(), - Some(vec![SplitImpl::Kinesis(KinesisSplit { + vec![SplitImpl::Kinesis(KinesisSplit { shard_id: "shardId-000000000001".to_string().into(), start_position: KinesisOffset::SequenceNumber( "49629139817504901062972448413535783695568426186596941842".to_string(), ), end_position: KinesisOffset::None, - })]), + })], + Default::default(), + Default::default(), + Default::default(), None, ) .await? - .into_stream(); + .into_data_stream(); println!("{:?}", offset_reader.next().await.unwrap()?); Ok(()) diff --git a/src/connector/src/source/kinesis/split.rs b/src/connector/src/source/kinesis/split.rs index 71a3d9b95aa67..941c3edfe9038 100644 --- a/src/connector/src/source/kinesis/split.rs +++ b/src/connector/src/source/kinesis/split.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/manager.rs b/src/connector/src/source/manager.rs similarity index 98% rename from src/connector/src/manager.rs rename to src/connector/src/source/manager.rs index c3ea5f5924902..25afb4f55917c 100644 --- a/src/connector/src/manager.rs +++ b/src/connector/src/source/manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/mod.rs b/src/connector/src/source/mod.rs index 1b099a81c4a3f..ac10ded25c688 100644 --- a/src/connector/src/source/mod.rs +++ b/src/connector/src/source/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,18 +14,22 @@ pub mod base; pub mod cdc; +pub mod data_gen_util; pub mod datagen; pub mod dummy_connector; pub mod filesystem; pub mod google_pubsub; pub mod kafka; pub mod kinesis; +pub mod monitor; pub mod nexmark; pub mod pulsar; pub use base::*; pub use google_pubsub::GOOGLE_PUBSUB_CONNECTOR; pub use kafka::KAFKA_CONNECTOR; pub use kinesis::KINESIS_CONNECTOR; +mod manager; +pub use manager::SourceColumnDesc; pub use crate::source::nexmark::NEXMARK_CONNECTOR; pub use crate::source::pulsar::PULSAR_CONNECTOR; diff --git a/src/source/src/monitor/metrics.rs b/src/connector/src/source/monitor/metrics.rs similarity index 98% rename from src/source/src/monitor/metrics.rs rename to src/connector/src/source/monitor/metrics.rs index 5b8d6a8c0fa68..c0e922bb734df 100644 --- a/src/source/src/monitor/metrics.rs +++ b/src/connector/src/source/monitor/metrics.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/source/src/monitor/mod.rs b/src/connector/src/source/monitor/mod.rs similarity index 94% rename from src/source/src/monitor/mod.rs rename to src/connector/src/source/monitor/mod.rs index 70c0c6f0bedad..873f7622a79d1 100644 --- a/src/source/src/monitor/mod.rs +++ b/src/connector/src/source/monitor/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/nexmark/enumerator/mod.rs b/src/connector/src/source/nexmark/enumerator/mod.rs index a65e27e9e2fe3..e65378887371d 100644 --- a/src/connector/src/source/nexmark/enumerator/mod.rs +++ b/src/connector/src/source/nexmark/enumerator/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/nexmark/mod.rs b/src/connector/src/source/nexmark/mod.rs index b52fb1763b6ae..679306cf96b22 100644 --- a/src/connector/src/source/nexmark/mod.rs +++ b/src/connector/src/source/nexmark/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/nexmark/source/combined_event.rs b/src/connector/src/source/nexmark/source/combined_event.rs index 8d9c50d603181..290ee1596c26b 100644 --- a/src/connector/src/source/nexmark/source/combined_event.rs +++ b/src/connector/src/source/nexmark/source/combined_event.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,7 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -use nexmark::event::{Auction, Bid, Person}; +use nexmark::event::{Auction, Bid, Event, EventType, Person}; +use risingwave_common::array::StructValue; +use risingwave_common::row::OwnedRow; +use risingwave_common::types::struct_type::StructType; +use risingwave_common::types::{DataType, Datum, NaiveDateTimeWrapper, ScalarImpl}; use serde::{Deserialize, Serialize}; #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] @@ -53,3 +57,235 @@ impl CombinedEvent { Self::new(2, None, None, Some(bid)) } } + +pub fn new_combined_event(event: Event) -> CombinedEvent { + match event { + Event::Person(p) => CombinedEvent::person(p), + Event::Auction(a) => CombinedEvent::auction(a), + Event::Bid(b) => CombinedEvent::bid(b), + } +} + +pub(crate) fn get_event_data_types( + event_type: Option, + row_id_index: Option, +) -> Vec { + let mut fields = match event_type { + None => { + vec![ + DataType::Int64, + DataType::Struct(get_person_struct_type().into()), + DataType::Struct(get_auction_struct_type().into()), + DataType::Struct(get_bid_struct_type().into()), + ] + } + Some(EventType::Person) => get_person_struct_type().fields, + Some(EventType::Auction) => get_auction_struct_type().fields, + Some(EventType::Bid) => get_bid_struct_type().fields, + }; + + if let Some(row_id_index) = row_id_index { + // _row_id + fields.insert(row_id_index, DataType::Int64); + } + + fields +} + +pub(crate) fn get_person_struct_type() -> StructType { + let fields = vec![ + DataType::Int64, + DataType::Varchar, + DataType::Varchar, + DataType::Varchar, + DataType::Varchar, + DataType::Varchar, + DataType::Timestamp, + DataType::Varchar, + ]; + let field_names = vec![ + "id", + "name", + "email_address", + "credit_card", + "city", + "state", + "date_time", + "extra", + ] + .into_iter() + .map(ToOwned::to_owned) + .collect(); + StructType { + fields, + field_names, + } +} + +pub(crate) fn get_auction_struct_type() -> StructType { + let fields = vec![ + DataType::Int64, + DataType::Varchar, + DataType::Varchar, + DataType::Int64, + DataType::Int64, + DataType::Timestamp, + DataType::Timestamp, + DataType::Int64, + DataType::Int64, + DataType::Varchar, + ]; + let field_names = vec![ + "id", + "item_name", + "description", + "initial_bid", + "reserve", + "date_time", + "expires", + "seller", + "category", + "extra", + ] + .into_iter() + .map(ToOwned::to_owned) + .collect(); + + StructType { + fields, + field_names, + } +} + +pub(crate) fn get_bid_struct_type() -> StructType { + let fields = vec![ + DataType::Int64, + DataType::Int64, + DataType::Int64, + DataType::Varchar, + DataType::Varchar, + DataType::Timestamp, + DataType::VARCHAR, + ]; + let field_names = vec![ + "auction", + "bidder", + "price", + "channel", + "url", + "date_time", + "extra", + ] + .into_iter() + .map(ToOwned::to_owned) + .collect(); + + StructType { + fields, + field_names, + } +} + +pub(crate) fn combined_event_to_row(e: CombinedEvent, row_id_index: Option) -> OwnedRow { + let mut fields = vec![ + Some(ScalarImpl::Int64(e.event_type as i64)), + e.person + .map(person_to_datum) + .map(|fields| StructValue::new(fields).into()), + e.auction + .map(auction_to_datum) + .map(|fields| StructValue::new(fields).into()), + e.bid + .map(bid_to_datum) + .map(|fields| StructValue::new(fields).into()), + ]; + + if let Some(row_id_index) = row_id_index { + // _row_id + fields.insert(row_id_index, None); + } + + OwnedRow::new(fields) +} + +pub(crate) fn event_to_row(e: Event, row_id_index: Option) -> OwnedRow { + let mut fields = match e { + Event::Person(p) => person_to_datum(p), + Event::Auction(a) => auction_to_datum(a), + Event::Bid(b) => bid_to_datum(b), + }; + if let Some(row_id_index) = row_id_index { + // _row_id + fields.insert(row_id_index, None); + } + OwnedRow::new(fields) +} + +fn person_to_datum(p: Person) -> Vec { + let fields = vec![ + Some(ScalarImpl::Int64(p.id as i64)), + Some(ScalarImpl::Utf8(p.name.into())), + Some(ScalarImpl::Utf8(p.email_address.into())), + Some(ScalarImpl::Utf8(p.credit_card.into())), + Some(ScalarImpl::Utf8(p.city.into())), + Some(ScalarImpl::Utf8(p.state.into())), + Some(ScalarImpl::NaiveDateTime( + NaiveDateTimeWrapper::with_secs_nsecs( + (p.date_time / 1_000) as i64, + (p.date_time % 1_000) as u32 * 1_000_000, + ) + .unwrap(), + )), + Some(ScalarImpl::Utf8(p.extra.into())), + ]; + fields +} + +fn auction_to_datum(a: Auction) -> Vec { + let fields = vec![ + Some(ScalarImpl::Int64(a.id as i64)), + Some(ScalarImpl::Utf8(a.item_name.into())), + Some(ScalarImpl::Utf8(a.description.into())), + Some(ScalarImpl::Int64(a.initial_bid as i64)), + Some(ScalarImpl::Int64(a.reserve as i64)), + Some(ScalarImpl::NaiveDateTime( + NaiveDateTimeWrapper::with_secs_nsecs( + (a.date_time / 1_000) as i64, + (a.date_time % 1_000) as u32 * 1_000_000, + ) + .unwrap(), + )), + Some(ScalarImpl::NaiveDateTime( + NaiveDateTimeWrapper::with_secs_nsecs( + (a.expires / 1_000) as i64, + (a.expires % 1_000) as u32 * 1_000_000, + ) + .unwrap(), + )), + Some(ScalarImpl::Int64(a.seller as i64)), + Some(ScalarImpl::Int64(a.category as i64)), + Some(ScalarImpl::Utf8(a.extra.into())), + ]; + + fields +} + +fn bid_to_datum(b: Bid) -> Vec { + let fields = vec![ + Some(ScalarImpl::Int64(b.auction as i64)), + Some(ScalarImpl::Int64(b.bidder as i64)), + Some(ScalarImpl::Int64(b.price as i64)), + Some(ScalarImpl::Utf8(b.channel.into())), + Some(ScalarImpl::Utf8(b.url.into())), + Some(ScalarImpl::NaiveDateTime( + NaiveDateTimeWrapper::with_secs_nsecs( + (b.date_time / 1_000) as i64, + (b.date_time % 1_000) as u32 * 1_000_000, + ) + .unwrap(), + )), + Some(ScalarImpl::Utf8(b.extra.into())), + ]; + + fields +} diff --git a/src/connector/src/source/nexmark/source/message.rs b/src/connector/src/source/nexmark/source/message.rs index 5328e7b4e830b..ab96bcdee312d 100644 --- a/src/connector/src/source/nexmark/source/message.rs +++ b/src/connector/src/source/nexmark/source/message.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - use std::time::{SystemTime, UNIX_EPOCH}; use bytes::Bytes; @@ -19,12 +18,10 @@ use nexmark::event::Event; use crate::source::nexmark::source::combined_event::CombinedEvent; use crate::source::{SourceMessage, SourceMeta, SplitId}; - #[derive(Clone, Debug)] pub struct NexmarkMeta { pub timestamp: Option, } - #[derive(Clone, Debug)] pub struct NexmarkMessage { pub split_id: SplitId, diff --git a/src/connector/src/source/nexmark/source/mod.rs b/src/connector/src/source/nexmark/source/mod.rs index fc4b2b4500f0e..79623d242a958 100644 --- a/src/connector/src/source/nexmark/source/mod.rs +++ b/src/connector/src/source/nexmark/source/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/nexmark/source/reader.rs b/src/connector/src/source/nexmark/source/reader.rs index 55bd997288a80..a7cf1f5f15c80 100644 --- a/src/connector/src/source/nexmark/source/reader.rs +++ b/src/connector/src/source/nexmark/source/reader.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,63 +12,72 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; use std::time::Duration; use anyhow::Result; use async_trait::async_trait; use futures::StreamExt; use futures_async_stream::try_stream; -use itertools::Itertools; +use maplit::hashmap; use nexmark::config::NexmarkConfig; use nexmark::event::EventType; use nexmark::EventGenerator; +use risingwave_common::array::{Op, StreamChunk}; +use risingwave_common::error::RwError; use tokio::time::Instant; -use crate::source::nexmark::source::message::NexmarkMessage; +use crate::parser::ParserConfig; +use crate::source::data_gen_util::spawn_data_generation_stream; +use crate::source::monitor::SourceMetrics; +use crate::source::nexmark::source::combined_event::{ + combined_event_to_row, event_to_row, get_event_data_types, new_combined_event, +}; use crate::source::nexmark::{NexmarkProperties, NexmarkSplit}; use crate::source::{ - spawn_data_generation_stream, BoxSourceStream, Column, ConnectorState, SourceMessage, SplitId, - SplitMetaData, SplitReader, + BoxSourceWithStateStream, Column, SourceInfo, SplitId, SplitImpl, SplitMetaData, SplitReader, + StreamChunkWithState, }; #[derive(Debug)] pub struct NexmarkSplitReader { generator: EventGenerator, assigned_split: NexmarkSplit, - split_id: SplitId, event_num: u64, event_type: Option, use_real_time: bool, min_event_gap_in_ns: u64, max_chunk_size: u64, + + row_id_index: Option, + split_id: SplitId, + metrics: Arc, + source_info: SourceInfo, } #[async_trait] impl SplitReader for NexmarkSplitReader { type Properties = NexmarkProperties; + #[allow(clippy::unused_async)] async fn new( properties: NexmarkProperties, - state: ConnectorState, + splits: Vec, + parser_config: ParserConfig, + metrics: Arc, + source_info: SourceInfo, _columns: Option>, ) -> Result { - let mut assigned_split = NexmarkSplit::default(); - let mut split_id = "".into(); - let mut split_num = 1; - let mut offset = 0; - - if let Some(splits) = state { - tracing::debug!("Splits for nexmark found! {:?}", splits); - // TODO: currently, assume there's only one split in one reader - let split = splits.into_iter().exactly_one().unwrap(); - split_id = split.id(); - let split = split.into_nexmark().unwrap(); - - let split_index = split.split_index as u64; - split_num = split.split_num as u64; - offset = split.start_offset.unwrap_or(split_index); - assigned_split = split; - } + tracing::debug!("Splits for nexmark found! {:?}", splits); + assert!(splits.len() == 1); + // TODO: currently, assume there's only one split in one reader + let split = splits.into_iter().next().unwrap().into_nexmark().unwrap(); + let split_id = split.id(); + + let split_index = split.split_index as u64; + let split_num = split.split_num as u64; + let offset = split.start_offset.unwrap_or(split_index); + let assigned_split = split; let mut generator = EventGenerator::new(NexmarkConfig::from(&*properties)) .with_offset(offset) @@ -79,6 +88,12 @@ impl SplitReader for NexmarkSplitReader { generator = generator.with_type_filter(*event_type); } + let row_id_index = parser_config + .common + .rw_columns + .into_iter() + .position(|column| column.is_row_id); + Ok(NexmarkSplitReader { generator, assigned_split, @@ -88,44 +103,40 @@ impl SplitReader for NexmarkSplitReader { event_type: properties.table_type, use_real_time: properties.use_real_time, min_event_gap_in_ns: properties.min_event_gap_in_ns, + row_id_index, + metrics, + source_info, }) } - fn into_stream(self) -> BoxSourceStream { + fn into_stream(self) -> BoxSourceWithStateStream { // Will buffer at most 4 event chunks. const BUFFER_SIZE: usize = 4; - spawn_data_generation_stream(self.into_stream(), BUFFER_SIZE).boxed() + spawn_data_generation_stream(self.into_chunk_stream(), BUFFER_SIZE).boxed() } } impl NexmarkSplitReader { - #[try_stream(boxed, ok = Vec, error = anyhow::Error)] - async fn into_stream(mut self) { + #[try_stream(boxed, ok = StreamChunkWithState, error = RwError)] + async fn into_chunk_stream(mut self) { let start_time = Instant::now(); let start_offset = self.generator.global_offset(); let start_ts = self.generator.timestamp(); + let event_dtypes = get_event_data_types(self.event_type, self.row_id_index); loop { - let mut msgs: Vec = vec![]; - while (msgs.len() as u64) < self.max_chunk_size { + let mut rows = vec![]; + while (rows.len() as u64) < self.max_chunk_size { if self.generator.global_offset() >= self.event_num { break; } let event = self.generator.next().unwrap(); - let event = match self.event_type { - Some(_) => NexmarkMessage::new_single_event( - self.split_id.clone(), - self.generator.offset(), - event, - ), - None => NexmarkMessage::new_combined_event( - self.split_id.clone(), - self.generator.offset(), - event, - ), + let row = match self.event_type { + Some(_) => event_to_row(event, self.row_id_index), + None => combined_event_to_row(new_combined_event(event), self.row_id_index), }; - msgs.push(event.into()); + rows.push((Op::Insert, row)); } - if msgs.is_empty() { + if rows.is_empty() { break; } if self.use_real_time { @@ -143,7 +154,12 @@ impl NexmarkSplitReader { ) .await; } - yield msgs; + let mapping = hashmap! {self.split_id.clone() => self.generator.offset().to_string()}; + let stream_chunk = StreamChunk::from_rows(&rows, &event_dtypes); + yield StreamChunkWithState { + chunk: stream_chunk, + split_offset_mapping: Some(mapping), + }; } tracing::debug!(?self.event_type, "nexmark generator finished"); @@ -179,10 +195,17 @@ mod tests { assert_eq!(list_splits_resp.len(), 2); for split in list_splits_resp { - let state = Some(vec![split]); - let mut reader = NexmarkSplitReader::new(props.clone(), state, None) - .await? - .into_stream(); + let state = vec![split]; + let mut reader = NexmarkSplitReader::new( + props.clone(), + state, + Default::default(), + Default::default(), + Default::default(), + None, + ) + .await? + .into_stream(); let _chunk = reader.next().await.unwrap()?; } diff --git a/src/connector/src/source/nexmark/split.rs b/src/connector/src/source/nexmark/split.rs index 269ab077e8f8e..9bb765a8acf7d 100644 --- a/src/connector/src/source/nexmark/split.rs +++ b/src/connector/src/source/nexmark/split.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/pulsar/admin/client.rs b/src/connector/src/source/pulsar/admin/client.rs index 24a3a52dc77cd..c446bc217b456 100644 --- a/src/connector/src/source/pulsar/admin/client.rs +++ b/src/connector/src/source/pulsar/admin/client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/pulsar/admin/mod.rs b/src/connector/src/source/pulsar/admin/mod.rs index e331e367c7d76..ea9e0a396a09d 100644 --- a/src/connector/src/source/pulsar/admin/mod.rs +++ b/src/connector/src/source/pulsar/admin/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/pulsar/enumerator/client.rs b/src/connector/src/source/pulsar/enumerator/client.rs index 9e8faf04b70b7..527193bca943d 100644 --- a/src/connector/src/source/pulsar/enumerator/client.rs +++ b/src/connector/src/source/pulsar/enumerator/client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -92,7 +92,6 @@ impl SplitEnumerator for PulsarSplitEnumerator { let splits = if topic_metadata.partitions > 0 { // partitioned topic (0..topic_metadata.partitions as i32) - .into_iter() .map(|p| PulsarSplit { topic: self.topic.sub_topic(p).unwrap(), start_offset: offset.clone(), diff --git a/src/connector/src/source/pulsar/enumerator/mod.rs b/src/connector/src/source/pulsar/enumerator/mod.rs index 8f3f74c7acdf8..b2474885cb8ad 100644 --- a/src/connector/src/source/pulsar/enumerator/mod.rs +++ b/src/connector/src/source/pulsar/enumerator/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/pulsar/mod.rs b/src/connector/src/source/pulsar/mod.rs index fe44eaa13d97f..950c6d09053f4 100644 --- a/src/connector/src/source/pulsar/mod.rs +++ b/src/connector/src/source/pulsar/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/pulsar/source/message.rs b/src/connector/src/source/pulsar/source/message.rs index 1e5f00d746fae..91c4b0f4ed78c 100644 --- a/src/connector/src/source/pulsar/source/message.rs +++ b/src/connector/src/source/pulsar/source/message.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/pulsar/source/mod.rs b/src/connector/src/source/pulsar/source/mod.rs index f1a1d9aeb16c1..cd90a79f28913 100644 --- a/src/connector/src/source/pulsar/source/mod.rs +++ b/src/connector/src/source/pulsar/source/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/pulsar/source/reader.rs b/src/connector/src/source/pulsar/source/reader.rs index 1176a32e249e3..26138dd83ed8a 100644 --- a/src/connector/src/source/pulsar/source/reader.rs +++ b/src/connector/src/source/pulsar/source/reader.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; use anyhow::{anyhow, ensure, Result}; use async_trait::async_trait; -use futures::StreamExt; +use futures::{StreamExt, TryStreamExt}; use futures_async_stream::try_stream; use itertools::Itertools; use pulsar::consumer::InitialPosition; @@ -24,16 +25,27 @@ use pulsar::message::proto::MessageIdData; use pulsar::{Consumer, ConsumerBuilder, ConsumerOptions, Pulsar, SubType, TokioExecutor}; use risingwave_common::try_match_expand; +use crate::impl_common_split_reader_logic; +use crate::parser::ParserConfig; +use crate::source::monitor::SourceMetrics; use crate::source::pulsar::split::PulsarSplit; use crate::source::pulsar::{PulsarEnumeratorOffset, PulsarProperties}; use crate::source::{ - BoxSourceStream, Column, ConnectorState, SourceMessage, SplitImpl, SplitReader, MAX_CHUNK_SIZE, + BoxSourceWithStateStream, Column, SourceInfo, SourceMessage, SplitId, SplitImpl, SplitMetaData, + SplitReader, MAX_CHUNK_SIZE, }; +impl_common_split_reader_logic!(PulsarSplitReader, PulsarProperties); + pub struct PulsarSplitReader { pulsar: Pulsar, consumer: Consumer, TokioExecutor>, split: PulsarSplit, + + split_id: SplitId, + parser_config: ParserConfig, + metrics: Arc, + source_info: SourceInfo, } // {ledger_id}:{entry_id}:{partition}:{batch_index} @@ -84,10 +96,12 @@ impl SplitReader for PulsarSplitReader { async fn new( props: PulsarProperties, - state: ConnectorState, + splits: Vec, + parser_config: ParserConfig, + metrics: Arc, + source_info: SourceInfo, _columns: Option>, ) -> Result { - let splits = state.ok_or_else(|| anyhow!("no default state for reader"))?; ensure!(splits.len() == 1, "only support single split"); let split = try_match_expand!(splits.into_iter().next().unwrap(), SplitImpl::Pulsar)?; @@ -140,18 +154,22 @@ impl SplitReader for PulsarSplitReader { Ok(Self { pulsar, consumer, + split_id: split.id(), split, + parser_config, + metrics, + source_info, }) } - fn into_stream(self) -> BoxSourceStream { - self.into_stream() + fn into_stream(self) -> BoxSourceWithStateStream { + self.into_chunk_stream() } } impl PulsarSplitReader { #[try_stream(boxed, ok = Vec, error = anyhow::Error)] - pub async fn into_stream(self) { + pub(crate) async fn into_data_stream(self) { #[for_await] for msgs in self.consumer.ready_chunks(MAX_CHUNK_SIZE) { let mut res = Vec::with_capacity(msgs.len()); diff --git a/src/connector/src/source/pulsar/split.rs b/src/connector/src/source/pulsar/split.rs index 8ef4be829d09c..d97508d6c581f 100644 --- a/src/connector/src/source/pulsar/split.rs +++ b/src/connector/src/source/pulsar/split.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/source/pulsar/topic.rs b/src/connector/src/source/pulsar/topic.rs index f5b0f19ab287f..9ce22f6ce6206 100644 --- a/src/connector/src/source/pulsar/topic.rs +++ b/src/connector/src/source/pulsar/topic.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/connector/src/test_data/debezium_avro_msg_schema.avsc b/src/connector/src/test_data/debezium_avro_msg_schema.avsc new file mode 100644 index 0000000000000..2c6944a58e5f8 --- /dev/null +++ b/src/connector/src/test_data/debezium_avro_msg_schema.avsc @@ -0,0 +1,190 @@ +{ + "type": "record", + "name": "Envelope", + "namespace": "dbserver1.inventory.customers", + "fields": [ + { + "name": "before", + "type": [ + "null", + { + "type": "record", + "name": "Value", + "fields": [ + { + "name": "id", + "type": "int" + }, + { + "name": "first_name", + "type": "string" + }, + { + "name": "last_name", + "type": "string" + }, + { + "name": "email", + "type": "string" + } + ], + "connect.name": "dbserver1.inventory.customers.Value" + } + ], + "default": null + }, + { + "name": "after", + "type": [ + "null", + "Value" + ], + "default": null + }, + { + "name": "source", + "type": { + "type": "record", + "name": "Source", + "namespace": "io.debezium.connector.mysql", + "fields": [ + { + "name": "version", + "type": "string" + }, + { + "name": "connector", + "type": "string" + }, + { + "name": "name", + "type": "string" + }, + { + "name": "ts_ms", + "type": "long" + }, + { + "name": "snapshot", + "type": [ + { + "type": "string", + "connect.version": 1, + "connect.parameters": { + "allowed": "true,last,false,incremental" + }, + "connect.default": "false", + "connect.name": "io.debezium.data.Enum" + }, + "null" + ], + "default": "false" + }, + { + "name": "db", + "type": "string" + }, + { + "name": "sequence", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "table", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "server_id", + "type": "long" + }, + { + "name": "gtid", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "file", + "type": "string" + }, + { + "name": "pos", + "type": "long" + }, + { + "name": "row", + "type": "int" + }, + { + "name": "thread", + "type": [ + "null", + "long" + ], + "default": null + }, + { + "name": "query", + "type": [ + "null", + "string" + ], + "default": null + } + ], + "connect.name": "io.debezium.connector.mysql.Source" + } + }, + { + "name": "op", + "type": "string" + }, + { + "name": "ts_ms", + "type": [ + "null", + "long" + ], + "default": null + }, + { + "name": "transaction", + "type": [ + "null", + { + "type": "record", + "name": "block", + "namespace": "event", + "fields": [ + { + "name": "id", + "type": "string" + }, + { + "name": "total_order", + "type": "long" + }, + { + "name": "data_collection_order", + "type": "long" + } + ], + "connect.version": 1, + "connect.name": "event.block" + } + ], + "default": null + } + ], + "connect.version": 1, + "connect.name": "dbserver1.inventory.customers.Envelope" +} \ No newline at end of file diff --git a/src/connector/src/test_data/simple-schema.avsc b/src/connector/src/test_data/simple-schema.avsc index b476ee910d059..7f24ab16499d7 100644 --- a/src/connector/src/test_data/simple-schema.avsc +++ b/src/connector/src/test_data/simple-schema.avsc @@ -14,8 +14,7 @@ }, { "name": "name", - "type": "string", - "default": "" + "type": ["null", "string"] }, { "name": "score", diff --git a/src/connector/src/test_data/union-schema.avsc b/src/connector/src/test_data/union-schema.avsc new file mode 100644 index 0000000000000..4176d8f771f09 --- /dev/null +++ b/src/connector/src/test_data/union-schema.avsc @@ -0,0 +1,88 @@ +{ + "name": "test_student", + "type": "record", + "fields": [ + { + "name": "id", + "type": "int", + "default": 0 + }, + { + "name": "age", + "type": ["null", "int"] + }, + { + "name": "sequence_id", + "type": ["null", "long"] + }, + { + "name": "name", + "type": ["null", "string"], + "default": null + }, + { + "name": "score", + "type": [ "float", "null" ], + "default": 1.0 + }, + { + "name": "avg_score", + "type": ["null", "double"] + }, + { + "name": "is_lasted", + "type": ["null", "boolean"] + }, + { + "name": "entrance_date", + "type": [ + "null", + { + "type": "int", + "logicalType": "date", + "arg.properties": { + "range": { + "min": 1, + "max": 19374 + } + } + } + ], + "default": null + }, + { + "name": "birthday", + "type": [ + "null", + { + "type": "long", + "logicalType": "timestamp-millis", + "arg.properties": { + "range": { + "min": 1, + "max": 1673970376213 + } + } + } + ], + "default": null + }, + { + "name": "anniversary", + "type": [ + "null", + { + "type" : "long", + "logicalType": "timestamp-micros", + "arg.properties": { + "range": { + "min": 1, + "max": 1673970376213000 + } + } + } + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/src/ctl/Cargo.toml b/src/ctl/Cargo.toml index cb272d1c0a8b6..e1db5f276c4e1 100644 --- a/src/ctl/Cargo.toml +++ b/src/ctl/Cargo.toml @@ -7,6 +7,12 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = "1" bytes = "1" @@ -15,10 +21,8 @@ clap = { version = "3", features = ["derive"] } comfy-table = "6" futures = { version = "0.3", default-features = false, features = ["alloc"] } itertools = "0.10" -parking_lot = "0.12" regex = "1.6.0" risingwave_common = { path = "../common" } -risingwave_common_service = { path = "../common/common_service" } risingwave_frontend = { path = "../frontend" } risingwave_hummock_sdk = { path = "../storage/hummock_sdk" } risingwave_object_store = { path = "../object_store" } diff --git a/src/ctl/src/cmd_impl.rs b/src/ctl/src/cmd_impl.rs index d1563ba3e9742..ce3f86d49157e 100644 --- a/src/ctl/src/cmd_impl.rs +++ b/src/ctl/src/cmd_impl.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/bench.rs b/src/ctl/src/cmd_impl/bench.rs index 2d57252e6891d..59e5c71debda7 100644 --- a/src/ctl/src/cmd_impl/bench.rs +++ b/src/ctl/src/cmd_impl/bench.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/compute.rs b/src/ctl/src/cmd_impl/compute.rs index 60ae34242246e..61cde8ffa85df 100644 --- a/src/ctl/src/cmd_impl/compute.rs +++ b/src/ctl/src/cmd_impl/compute.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,8 +19,8 @@ use risingwave_rpc_client::ComputeClient; use serde_json; pub async fn show_config(host: &str) -> anyhow::Result<()> { - let listen_address = HostAddr::try_from(host)?; - let client = ComputeClient::new(listen_address).await?; + let listen_addr = HostAddr::try_from(host)?; + let client = ComputeClient::new(listen_addr).await?; let config_response = client.show_config().await?; let batch_config: BatchConfig = serde_json::from_str(&config_response.batch_config)?; let stream_config: StreamingConfig = serde_json::from_str(&config_response.stream_config)?; diff --git a/src/ctl/src/cmd_impl/hummock.rs b/src/ctl/src/cmd_impl/hummock.rs index acc45580534b2..c820482705a48 100644 --- a/src/ctl/src/cmd_impl/hummock.rs +++ b/src/ctl/src/cmd_impl/hummock.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/hummock/compaction_group.rs b/src/ctl/src/cmd_impl/hummock/compaction_group.rs index a8cb08d40d578..ec5cf1ab3435d 100644 --- a/src/ctl/src/cmd_impl/hummock/compaction_group.rs +++ b/src/ctl/src/cmd_impl/hummock/compaction_group.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/hummock/disable_commit_epoch.rs b/src/ctl/src/cmd_impl/hummock/disable_commit_epoch.rs index 31e3faafba9f3..5b4b5356dcb7a 100644 --- a/src/ctl/src/cmd_impl/hummock/disable_commit_epoch.rs +++ b/src/ctl/src/cmd_impl/hummock/disable_commit_epoch.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/hummock/list_kv.rs b/src/ctl/src/cmd_impl/hummock/list_kv.rs index c3eaafe73610d..8d9372841cb4c 100644 --- a/src/ctl/src/cmd_impl/hummock/list_kv.rs +++ b/src/ctl/src/cmd_impl/hummock/list_kv.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/hummock/list_version.rs b/src/ctl/src/cmd_impl/hummock/list_version.rs index 384595acab19c..6336c1d32fc96 100644 --- a/src/ctl/src/cmd_impl/hummock/list_version.rs +++ b/src/ctl/src/cmd_impl/hummock/list_version.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/hummock/list_version_deltas.rs b/src/ctl/src/cmd_impl/hummock/list_version_deltas.rs index 035fe7837a5a7..a80ca3985e5e1 100644 --- a/src/ctl/src/cmd_impl/hummock/list_version_deltas.rs +++ b/src/ctl/src/cmd_impl/hummock/list_version_deltas.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/hummock/sst_dump.rs b/src/ctl/src/cmd_impl/hummock/sst_dump.rs index 10be3dd0c9127..40570c4d17fec 100644 --- a/src/ctl/src/cmd_impl/hummock/sst_dump.rs +++ b/src/ctl/src/cmd_impl/hummock/sst_dump.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ use bytes::{Buf, Bytes}; use itertools::Itertools; use risingwave_common::row::{Row, RowDeserializer}; use risingwave_common::types::to_text::ToText; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_frontend::TableCatalog; use risingwave_hummock_sdk::compaction_group::hummock_version_ext::HummockVersionExt; use risingwave_hummock_sdk::key::FullKey; @@ -212,7 +213,7 @@ fn print_table_column( .collect_vec(); let row_deserializer = RowDeserializer::new(data_types); let row = row_deserializer.deserialize(user_val)?; - for (c, v) in column_desc.iter().zip_eq(row.iter()) { + for (c, v) in column_desc.iter().zip_eq_fast(row.iter()) { println!("\t\t column: {} {}", c, v.to_text()); } diff --git a/src/ctl/src/cmd_impl/hummock/trigger_full_gc.rs b/src/ctl/src/cmd_impl/hummock/trigger_full_gc.rs index 90d4d8a431e2c..2b245ec03fbe9 100644 --- a/src/ctl/src/cmd_impl/hummock/trigger_full_gc.rs +++ b/src/ctl/src/cmd_impl/hummock/trigger_full_gc.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/hummock/trigger_manual_compaction.rs b/src/ctl/src/cmd_impl/hummock/trigger_manual_compaction.rs index 0f21ae613658b..961a44662bafa 100644 --- a/src/ctl/src/cmd_impl/hummock/trigger_manual_compaction.rs +++ b/src/ctl/src/cmd_impl/hummock/trigger_manual_compaction.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/meta.rs b/src/ctl/src/cmd_impl/meta.rs index d8cc9e37acbc0..97dbdf48c32f7 100644 --- a/src/ctl/src/cmd_impl/meta.rs +++ b/src/ctl/src/cmd_impl/meta.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/meta/backup_meta.rs b/src/ctl/src/cmd_impl/meta/backup_meta.rs index 0eeca9b4a9417..f93804035555c 100644 --- a/src/ctl/src/cmd_impl/meta/backup_meta.rs +++ b/src/ctl/src/cmd_impl/meta/backup_meta.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/meta/cluster_info.rs b/src/ctl/src/cmd_impl/meta/cluster_info.rs index cc310fd6b13ca..1226e39f2bbd4 100644 --- a/src/ctl/src/cmd_impl/meta/cluster_info.rs +++ b/src/ctl/src/cmd_impl/meta/cluster_info.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/meta/pause_resume.rs b/src/ctl/src/cmd_impl/meta/pause_resume.rs index 0ab178361ac70..a75b653fa5bc4 100644 --- a/src/ctl/src/cmd_impl/meta/pause_resume.rs +++ b/src/ctl/src/cmd_impl/meta/pause_resume.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/meta/reschedule.rs b/src/ctl/src/cmd_impl/meta/reschedule.rs index 34461694d71ab..41117f7ff65e7 100644 --- a/src/ctl/src/cmd_impl/meta/reschedule.rs +++ b/src/ctl/src/cmd_impl/meta/reschedule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/profile.rs b/src/ctl/src/cmd_impl/profile.rs index c178b5948602c..41d03bfdcfa06 100644 --- a/src/ctl/src/cmd_impl/profile.rs +++ b/src/ctl/src/cmd_impl/profile.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/table.rs b/src/ctl/src/cmd_impl/table.rs index 604deaf707f46..dfbe3eaf973b9 100644 --- a/src/ctl/src/cmd_impl/table.rs +++ b/src/ctl/src/cmd_impl/table.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/table/list.rs b/src/ctl/src/cmd_impl/table/list.rs index 40e15fa5ae9e1..f60e5edc58a3b 100644 --- a/src/ctl/src/cmd_impl/table/list.rs +++ b/src/ctl/src/cmd_impl/table/list.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/table/scan.rs b/src/ctl/src/cmd_impl/table/scan.rs index 1caa3d319a5d1..92340710d6279 100644 --- a/src/ctl/src/cmd_impl/table/scan.rs +++ b/src/ctl/src/cmd_impl/table/scan.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/cmd_impl/trace.rs b/src/ctl/src/cmd_impl/trace.rs index 4b3e32098242f..c69a8ade6c463 100644 --- a/src/ctl/src/cmd_impl/trace.rs +++ b/src/ctl/src/cmd_impl/trace.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/common.rs b/src/ctl/src/common.rs index 250bb7ebdb1d1..e914961f86eff 100644 --- a/src/ctl/src/common.rs +++ b/src/ctl/src/common.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/common/context.rs b/src/ctl/src/common/context.rs index 7b0fd35ea08ee..e47917052d87f 100644 --- a/src/ctl/src/common/context.rs +++ b/src/ctl/src/common/context.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/ctl/src/common/hummock_service.rs b/src/ctl/src/common/hummock_service.rs index 403aafda6e527..763892e4b89af 100644 --- a/src/ctl/src/common/hummock_service.rs +++ b/src/ctl/src/common/hummock_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ use std::sync::Arc; use std::time::Duration; use anyhow::{anyhow, bail, Result}; -use risingwave_common::config::{RwConfig, StorageConfig}; use risingwave_rpc_client::MetaClient; use risingwave_storage::hummock::hummock_meta_client::MonitoredHummockMetaClient; use risingwave_storage::hummock::{HummockStorage, TieredCacheMetricsBuilder}; @@ -25,6 +24,7 @@ use risingwave_storage::monitor::{ CompactorMetrics, HummockMetrics, HummockStateStoreMetrics, MonitoredStateStore, MonitoredStorageMetrics, ObjectStoreMetrics, }; +use risingwave_storage::opts::StorageOpts; use risingwave_storage::{StateStore, StateStoreImpl}; use tokio::sync::oneshot::Sender; use tokio::task::JoinHandle; @@ -93,16 +93,12 @@ For `./risedev apply-compose-deploy` users, self.heartbeat_shutdown_sender = Some(heartbeat_shutdown_sender); // FIXME: allow specify custom config - let config = StorageConfig { + let opts = StorageOpts { share_buffer_compaction_worker_threads_number: 0, ..Default::default() }; - let rw_config = RwConfig { - storage: config.clone(), - ..Default::default() - }; - tracing::info!("using Hummock config: {:#?}", config); + tracing::info!("using StorageOpts: {:#?}", opts); let metrics = Metrics { hummock_metrics: Arc::new(HummockMetrics::unused()), @@ -114,8 +110,7 @@ For `./risedev apply-compose-deploy` users, let state_store_impl = StateStoreImpl::new( &self.hummock_url, - "", - &rw_config, + Arc::new(opts), Arc::new(MonitoredHummockMetaClient::new( meta_client.clone(), metrics.hummock_metrics.clone(), diff --git a/src/ctl/src/common/meta_service.rs b/src/ctl/src/common/meta_service.rs index 607074895c359..c0617cfa2473d 100644 --- a/src/ctl/src/common/meta_service.rs +++ b/src/ctl/src/common/meta_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -53,7 +53,7 @@ Note: the default value of `RW_META_ADDR` is 'http://127.0.0.1:5690'."; /// Create meta client from options, and register as rise-ctl worker pub async fn create_meta_client(&self) -> Result { - let client = MetaClient::register_new( + let (client, _) = MetaClient::register_new( &self.meta_addr, WorkerType::RiseCtl, &get_new_ctl_identity(), diff --git a/src/ctl/src/lib.rs b/src/ctl/src/lib.rs index 8fee16b205d59..bd1a5bc2b7af7 100644 --- a/src/ctl/src/lib.rs +++ b/src/ctl/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/Cargo.toml b/src/expr/Cargo.toml index 64dc828462f0a..cc09f522ca9d8 100644 --- a/src/expr/Cargo.toml +++ b/src/expr/Cargo.toml @@ -8,42 +8,34 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] aho-corasick = "0.7" anyhow = "1" -arrow-array = { git = "https://github.com/apache/arrow-rs", rev = "6139d898" } -arrow-schema = { git = "https://github.com/apache/arrow-rs", rev = "6139d898" } -async-stream = "0.3" -async-trait = "0.1" -byteorder = "1" -bytes = "1" +arrow-array = "31" +arrow-schema = "31" chrono = { version = "0.4", default-features = false, features = ["clock", "std"] } chrono-tz = { version = "0.7", features = ["case-insensitive"] } -crc32fast = "1" dyn-clone = "1" either = "1" -hex = "0.4.3" itertools = "0.10" md5 = "0.7.0" num-traits = "0.2" ouroboros = "0.15" parse-display = "0.6" paste = "1" -postgres-types = { version = "0.2.4", features = ["derive","with-chrono-0_4"] } -prost = "0.11" regex = "1" risingwave_common = { path = "../common" } risingwave_pb = { path = "../prost" } risingwave_udf = { path = "../udf" } -rust_decimal = "1" -serde = { version = "1", features = ["derive"] } -smallvec = "1" speedate = "0.7.0" thiserror = "1" tokio = { version = "0.2", package = "madsim-tokio", features = ["rt", "rt-multi-thread", "sync", "macros", "time", "signal"] } -tokio-stream = "0.1" -toml = "0.5" -tonic = { version = "0.2", package = "madsim-tonic" } [target.'cfg(not(madsim))'.dependencies] workspace-hack = { path = "../workspace-hack" } diff --git a/src/expr/benches/expr.rs b/src/expr/benches/expr.rs index 5d54c5a4afae9..b09126f8f93b3 100644 --- a/src/expr/benches/expr.rs +++ b/src/expr/benches/expr.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ use risingwave_common::types::{ DataType, DataTypeName, Decimal, IntervalUnit, NaiveDateTimeWrapper, NaiveDateWrapper, NaiveTimeWrapper, OrderedF32, OrderedF64, }; -use risingwave_expr::expr::expr_unary::new_unary_expr; +use risingwave_expr::expr::test_utils::{make_expression, make_string_literal}; use risingwave_expr::expr::*; use risingwave_expr::sig::agg::agg_func_sigs; use risingwave_expr::sig::cast::cast_sigs; diff --git a/src/expr/src/error.rs b/src/expr/src/error.rs index f6ce638e0fa4f..5e39e2aac9480 100644 --- a/src/expr/src/error.rs +++ b/src/expr/src/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,14 +14,16 @@ use std::borrow::Cow; -pub use anyhow::anyhow; -use regex; use risingwave_common::array::ArrayError; use risingwave_common::error::{ErrorCode, RwError}; use risingwave_common::types::DataType; use risingwave_pb::ProstFieldNotFound; use thiserror::Error; +/// A specialized Result type for expression operations. +pub type Result = std::result::Result; + +/// The error type for expression operations. #[derive(Error, Debug)] pub enum ExprError { // Ideally "Unsupported" errors are caught by frontend. But when the match arms between diff --git a/src/expr/src/expr/README.md b/src/expr/src/expr/README.md deleted file mode 100644 index 2c35e1e31d28e..0000000000000 --- a/src/expr/src/expr/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Expression - -Files start with `expr_` is a concrete expression struct in RisingWave. \ No newline at end of file diff --git a/src/expr/src/expr/agg.rs b/src/expr/src/expr/agg.rs index c92947b4d50df..6506930eddcdf 100644 --- a/src/expr/src/expr/agg.rs +++ b/src/expr/src/expr/agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -34,6 +34,10 @@ pub enum AggKind { ApproxCountDistinct, ArrayAgg, FirstValue, + VarPop, + VarSamp, + StddevPop, + StddevSamp, } impl TryFrom for AggKind { @@ -51,6 +55,10 @@ impl TryFrom for AggKind { Type::ApproxCountDistinct => Ok(AggKind::ApproxCountDistinct), Type::ArrayAgg => Ok(AggKind::ArrayAgg), Type::FirstValue => Ok(AggKind::FirstValue), + Type::StddevPop => Ok(AggKind::StddevPop), + Type::StddevSamp => Ok(AggKind::StddevSamp), + Type::VarPop => Ok(AggKind::VarPop), + Type::VarSamp => Ok(AggKind::VarSamp), Type::Unspecified => bail!("Unrecognized agg."), } } @@ -69,6 +77,10 @@ impl AggKind { Self::ApproxCountDistinct => Type::ApproxCountDistinct, Self::ArrayAgg => Type::ArrayAgg, Self::FirstValue => Type::FirstValue, + Self::StddevPop => Type::StddevPop, + Self::StddevSamp => Type::StddevSamp, + Self::VarPop => Type::VarPop, + Self::VarSamp => Type::VarSamp, } } } diff --git a/src/expr/src/expr/build_expr_from_prost.rs b/src/expr/src/expr/build_expr_from_prost.rs index b13c5507b886e..5273cf1a76560 100644 --- a/src/expr/src/expr/build_expr_from_prost.rs +++ b/src/expr/src/expr/build_expr_from_prost.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,32 +18,103 @@ use risingwave_common::util::value_encoding::deserialize_datum; use risingwave_pb::expr::expr_node::{RexNode, Type}; use risingwave_pb::expr::{ExprNode, FunctionCall, InputRefExpr}; -use super::expr_some_all::SomeAllExpression; -use crate::expr::expr_binary_bytes::{ +use super::expr_array_concat::ArrayConcatExpression; +use super::expr_binary_bytes::{ new_ltrim_characters, new_repeat, new_rtrim_characters, new_substr_start, new_to_char, new_trim_characters, }; -use crate::expr::expr_binary_nonnull::{ +use super::expr_binary_nonnull::{ new_binary_expr, new_date_trunc_expr, new_like_default, new_to_timestamp, }; -use crate::expr::expr_binary_nullable::new_nullable_binary_expr; -use crate::expr::expr_quaternary_bytes::new_overlay_for_exp; -use crate::expr::expr_ternary_bytes::{ +use super::expr_binary_nullable::new_nullable_binary_expr; +use super::expr_case::CaseExpression; +use super::expr_coalesce::CoalesceExpression; +use super::expr_concat_ws::ConcatWsExpression; +use super::expr_field::FieldExpression; +use super::expr_in::InExpression; +use super::expr_nested_construct::NestedConstructExpression; +use super::expr_quaternary_bytes::new_overlay_for_exp; +use super::expr_regexp::RegexpMatchExpression; +use super::expr_some_all::SomeAllExpression; +use super::expr_ternary_bytes::{ new_overlay_exp, new_replace_expr, new_split_part_expr, new_substr_start_end, new_translate_expr, }; -use crate::expr::expr_to_char_const_tmpl::{ExprToCharConstTmpl, ExprToCharConstTmplContext}; -use crate::expr::expr_to_timestamp_const_tmpl::{ +use super::expr_to_char_const_tmpl::{ExprToCharConstTmpl, ExprToCharConstTmplContext}; +use super::expr_to_timestamp_const_tmpl::{ ExprToTimestampConstTmpl, ExprToTimestampConstTmplContext, }; -use crate::expr::expr_unary::{ +use super::expr_udf::UdfExpression; +use super::expr_unary::{ new_length_default, new_ltrim_expr, new_rtrim_expr, new_trim_expr, new_unary_expr, }; +use super::expr_vnode::VnodeExpression; use crate::expr::{ - build_from_prost as expr_build_from_prost, BoxedExpression, Expression, LiteralExpression, + build_from_prost as expr_build_from_prost, BoxedExpression, Expression, InputRefExpression, + LiteralExpression, }; use crate::vector_op::to_char::compile_pattern_to_chrono; -use crate::{bail, ensure, Result}; +use crate::{bail, ensure, ExprError, Result}; + +pub fn build_from_prost(prost: &ExprNode) -> Result { + use risingwave_pb::expr::expr_node::Type::*; + + match prost.get_expr_type().unwrap() { + // Fixed number of arguments and based on `Unary/Binary/Ternary/...Expression` + Cast | Upper | Lower | Md5 | Not | IsTrue | IsNotTrue | IsFalse | IsNotFalse | IsNull + | IsNotNull | Neg | Ascii | Abs | Ceil | Floor | Round | Exp | BitwiseNot | CharLength + | BoolOut | OctetLength | BitLength | ToTimestamp => build_unary_expr_prost(prost), + Equal | NotEqual | LessThan | LessThanOrEqual | GreaterThan | GreaterThanOrEqual | Add + | Subtract | Multiply | Divide | Modulus | Extract | RoundDigit | Pow | TumbleStart + | Position | BitwiseShiftLeft | BitwiseShiftRight | BitwiseAnd | BitwiseOr | BitwiseXor + | ConcatOp | AtTimeZone | CastWithTimeZone => build_binary_expr_prost(prost), + And | Or | IsDistinctFrom | IsNotDistinctFrom | ArrayAccess | FormatType => { + build_nullable_binary_expr_prost(prost) + } + ToChar => build_to_char_expr(prost), + ToTimestamp1 => build_to_timestamp_expr(prost), + Length => build_length_expr(prost), + Replace => build_replace_expr(prost), + Like => build_like_expr(prost), + Repeat => build_repeat_expr(prost), + SplitPart => build_split_part_expr(prost), + Translate => build_translate_expr(prost), + + // Variable number of arguments and based on `Unary/Binary/Ternary/...Expression` + Substr => build_substr_expr(prost), + Overlay => build_overlay_expr(prost), + Trim => build_trim_expr(prost), + Ltrim => build_ltrim_expr(prost), + Rtrim => build_rtrim_expr(prost), + DateTrunc => build_date_trunc_expr(prost), + + // Dedicated types + All | Some => build_some_all_expr_prost(prost), + In => InExpression::try_from(prost).map(Expression::boxed), + Case => CaseExpression::try_from(prost).map(Expression::boxed), + Coalesce => CoalesceExpression::try_from(prost).map(Expression::boxed), + ConcatWs => ConcatWsExpression::try_from(prost).map(Expression::boxed), + ConstantValue => LiteralExpression::try_from(prost).map(Expression::boxed), + InputRef => InputRefExpression::try_from(prost).map(Expression::boxed), + Field => FieldExpression::try_from(prost).map(Expression::boxed), + Array => NestedConstructExpression::try_from(prost).map(Expression::boxed), + Row => NestedConstructExpression::try_from(prost).map(Expression::boxed), + RegexpMatch => RegexpMatchExpression::try_from(prost).map(Expression::boxed), + ArrayCat | ArrayAppend | ArrayPrepend => { + // Now we implement these three functions as a single expression for the + // sake of simplicity. If performance matters at some time, we can split + // the implementation to improve performance. + ArrayConcatExpression::try_from(prost).map(Expression::boxed) + } + Vnode => VnodeExpression::try_from(prost).map(Expression::boxed), + Now => build_now_expr(prost), + Udf => UdfExpression::try_from(prost).map(Expression::boxed), + _ => Err(ExprError::UnsupportedFunction(format!( + "{:?}", + prost.get_expr_type() + ))), + } +} fn get_children_and_return_type(prost: &ExprNode) -> Result<(Vec, DataType)> { let ret_type = DataType::from(prost.get_return_type().unwrap()); @@ -54,14 +125,14 @@ fn get_children_and_return_type(prost: &ExprNode) -> Result<(Vec, Data } } -pub fn build_unary_expr_prost(prost: &ExprNode) -> Result { +fn build_unary_expr_prost(prost: &ExprNode) -> Result { let (children, ret_type) = get_children_and_return_type(prost)?; let [child]: [_; 1] = children.try_into().unwrap(); let child_expr = expr_build_from_prost(&child)?; new_unary_expr(prost.get_expr_type().unwrap(), ret_type, child_expr) } -pub fn build_binary_expr_prost(prost: &ExprNode) -> Result { +fn build_binary_expr_prost(prost: &ExprNode) -> Result { let (children, ret_type) = get_children_and_return_type(prost)?; let [left_child, right_child]: [_; 2] = children.try_into().unwrap(); let left_expr = expr_build_from_prost(&left_child)?; @@ -74,7 +145,7 @@ pub fn build_binary_expr_prost(prost: &ExprNode) -> Result { ) } -pub fn build_nullable_binary_expr_prost(prost: &ExprNode) -> Result { +fn build_nullable_binary_expr_prost(prost: &ExprNode) -> Result { let (children, ret_type) = get_children_and_return_type(prost)?; let [left_child, right_child]: [_; 2] = children.try_into().unwrap(); let left_expr = expr_build_from_prost(&left_child)?; @@ -87,7 +158,7 @@ pub fn build_nullable_binary_expr_prost(prost: &ExprNode) -> Result Result { +fn build_overlay_expr(prost: &ExprNode) -> Result { let (children, ret_type) = get_children_and_return_type(prost)?; ensure!(children.len() == 3 || children.len() == 4); @@ -105,7 +176,7 @@ pub fn build_overlay_expr(prost: &ExprNode) -> Result { } } -pub fn build_repeat_expr(prost: &ExprNode) -> Result { +fn build_repeat_expr(prost: &ExprNode) -> Result { let (children, ret_type) = get_children_and_return_type(prost)?; let [left_child, right_child]: [_; 2] = children.try_into().unwrap(); let left_expr = expr_build_from_prost(&left_child)?; @@ -113,7 +184,7 @@ pub fn build_repeat_expr(prost: &ExprNode) -> Result { Ok(new_repeat(left_expr, right_expr, ret_type)) } -pub fn build_substr_expr(prost: &ExprNode) -> Result { +fn build_substr_expr(prost: &ExprNode) -> Result { let (children, ret_type) = get_children_and_return_type(prost)?; let child = expr_build_from_prost(&children[0])?; ensure!(children.len() == 2 || children.len() == 3); @@ -129,7 +200,7 @@ pub fn build_substr_expr(prost: &ExprNode) -> Result { } } -pub fn build_trim_expr(prost: &ExprNode) -> Result { +fn build_trim_expr(prost: &ExprNode) -> Result { let (children, ret_type) = get_children_and_return_type(prost)?; ensure!(!children.is_empty() && children.len() <= 2); let original = expr_build_from_prost(&children[0])?; @@ -143,7 +214,7 @@ pub fn build_trim_expr(prost: &ExprNode) -> Result { } } -pub fn build_ltrim_expr(prost: &ExprNode) -> Result { +fn build_ltrim_expr(prost: &ExprNode) -> Result { let (children, ret_type) = get_children_and_return_type(prost)?; ensure!(!children.is_empty() && children.len() <= 2); let original = expr_build_from_prost(&children[0])?; @@ -157,7 +228,7 @@ pub fn build_ltrim_expr(prost: &ExprNode) -> Result { } } -pub fn build_rtrim_expr(prost: &ExprNode) -> Result { +fn build_rtrim_expr(prost: &ExprNode) -> Result { let (children, ret_type) = get_children_and_return_type(prost)?; ensure!(!children.is_empty() && children.len() <= 2); let original = expr_build_from_prost(&children[0])?; @@ -171,7 +242,7 @@ pub fn build_rtrim_expr(prost: &ExprNode) -> Result { } } -pub fn build_replace_expr(prost: &ExprNode) -> Result { +fn build_replace_expr(prost: &ExprNode) -> Result { let (children, ret_type) = get_children_and_return_type(prost)?; ensure!(children.len() == 3); let s = expr_build_from_prost(&children[0])?; @@ -180,7 +251,7 @@ pub fn build_replace_expr(prost: &ExprNode) -> Result { Ok(new_replace_expr(s, from_str, to_str, ret_type)) } -pub fn build_date_trunc_expr(prost: &ExprNode) -> Result { +fn build_date_trunc_expr(prost: &ExprNode) -> Result { let (children, ret_type) = get_children_and_return_type(prost)?; ensure!(children.len() == 2 || children.len() == 3); let field = expr_build_from_prost(&children[0])?; @@ -193,7 +264,7 @@ pub fn build_date_trunc_expr(prost: &ExprNode) -> Result { Ok(new_date_trunc_expr(ret_type, field, source, time_zone)) } -pub fn build_length_expr(prost: &ExprNode) -> Result { +fn build_length_expr(prost: &ExprNode) -> Result { let (children, ret_type) = get_children_and_return_type(prost)?; // TODO: add encoding length expr let [child]: [_; 1] = children.try_into().unwrap(); @@ -201,7 +272,7 @@ pub fn build_length_expr(prost: &ExprNode) -> Result { Ok(new_length_default(child, ret_type)) } -pub fn build_like_expr(prost: &ExprNode) -> Result { +fn build_like_expr(prost: &ExprNode) -> Result { let (children, ret_type) = get_children_and_return_type(prost)?; ensure!(children.len() == 2); let expr_ia1 = expr_build_from_prost(&children[0])?; @@ -209,7 +280,7 @@ pub fn build_like_expr(prost: &ExprNode) -> Result { Ok(new_like_default(expr_ia1, expr_ia2, ret_type)) } -pub fn build_translate_expr(prost: &ExprNode) -> Result { +fn build_translate_expr(prost: &ExprNode) -> Result { let (children, ret_type) = get_children_and_return_type(prost)?; ensure!(children.len() == 3); let s = expr_build_from_prost(&children[0])?; @@ -218,7 +289,7 @@ pub fn build_translate_expr(prost: &ExprNode) -> Result { Ok(new_translate_expr(s, match_str, replace_str, ret_type)) } -pub fn build_split_part_expr(prost: &ExprNode) -> Result { +fn build_split_part_expr(prost: &ExprNode) -> Result { let (children, ret_type) = get_children_and_return_type(prost)?; ensure!(children.len() == 3); let string_expr = expr_build_from_prost(&children[0])?; @@ -232,7 +303,7 @@ pub fn build_split_part_expr(prost: &ExprNode) -> Result { )) } -pub fn build_to_char_expr(prost: &ExprNode) -> Result { +fn build_to_char_expr(prost: &ExprNode) -> Result { let (children, ret_type) = get_children_and_return_type(prost)?; ensure!(children.len() == 2); let data_expr = expr_build_from_prost(&children[0])?; diff --git a/src/expr/src/expr/data_types.rs b/src/expr/src/expr/data_types.rs index 20d6ac3251a41..6fe46a9fc37b7 100644 --- a/src/expr/src/expr/data_types.rs +++ b/src/expr/src/expr/data_types.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -71,6 +71,18 @@ macro_rules! list { pub(crate) use list; +#[macro_export] +macro_rules! jsonb { + ($macro:ident) => { + $macro! { + risingwave_common::types::DataType::Jsonb, + risingwave_common::array::JsonbArray + } + }; +} + +pub(crate) use jsonb; + #[macro_export] macro_rules! int16 { ($macro:ident) => { diff --git a/src/expr/src/expr/expr_array_concat.rs b/src/expr/src/expr/expr_array_concat.rs index 6cb90f86bda35..7e0723cf82e33 100644 --- a/src/expr/src/expr/expr_array_concat.rs +++ b/src/expr/src/expr/expr_array_concat.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,10 +14,10 @@ use std::sync::Arc; -use itertools::Itertools; use risingwave_common::array::{ArrayRef, DataChunk, ListValue}; use risingwave_common::row::OwnedRow; use risingwave_common::types::{DataType, Datum, DatumRef, ScalarRefImpl, ToDatumRef}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_pb::expr::expr_node::{RexNode, Type}; use risingwave_pb::expr::ExprNode; @@ -327,7 +327,7 @@ impl Expression for ArrayConcatExpression { for (vis, (left, right)) in input .vis() .iter() - .zip_eq(left_array.iter().zip_eq(right_array.iter())) + .zip_eq_fast(left_array.iter().zip_eq_fast(right_array.iter())) { if !vis { builder.append_null(); @@ -382,6 +382,7 @@ impl<'a> TryFrom<&'a ExprNode> for ArrayConcatExpression { #[cfg(test)] mod tests { + use itertools::Itertools; use risingwave_common::array::DataChunk; use risingwave_common::types::ScalarImpl; use risingwave_pb::data::Datum as ProstDatum; diff --git a/src/expr/src/expr/expr_binary_bytes.rs b/src/expr/src/expr/expr_binary_bytes.rs index a64c664244726..3ab6b822b35de 100644 --- a/src/expr/src/expr/expr_binary_bytes.rs +++ b/src/expr/src/expr/expr_binary_bytes.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/expr/expr_binary_nonnull.rs b/src/expr/src/expr/expr_binary_nonnull.rs index dacc53ba8cf38..910b563af67f8 100644 --- a/src/expr/src/expr/expr_binary_nonnull.rs +++ b/src/expr/src/expr/expr_binary_nonnull.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,8 +13,8 @@ // limitations under the License. use risingwave_common::array::{ - Array, BoolArray, DecimalArray, I32Array, I64Array, IntervalArray, ListArray, NaiveDateArray, - NaiveDateTimeArray, StructArray, Utf8Array, + Array, BoolArray, DecimalArray, F64Array, I32Array, I64Array, IntervalArray, ListArray, + NaiveDateArray, NaiveDateTimeArray, StructArray, Utf8Array, }; use risingwave_common::types::*; use risingwave_pb::expr::expr_node::Type; @@ -502,6 +502,7 @@ pub fn new_date_trunc_expr( } } +/// Create a new binary expression. pub fn new_binary_expr( expr_type: Type, ret: DataType, @@ -662,6 +663,9 @@ pub fn new_binary_expr( }, } } + Type::Pow => Box::new(BinaryExpression::::new( + l, r, ret, pow_f64, + )), Type::Extract => build_extract_expr(ret, l, r)?, Type::AtTimeZone => build_at_time_zone_expr(ret, l, r)?, Type::CastWithTimeZone => build_cast_with_time_zone_expr(ret, l, r)?, diff --git a/src/expr/src/expr/expr_binary_nullable.rs b/src/expr/src/expr/expr_binary_nullable.rs index d17588f706a25..6e3748f2d1852 100644 --- a/src/expr/src/expr/expr_binary_nullable.rs +++ b/src/expr/src/expr/expr_binary_nullable.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -31,6 +31,7 @@ use crate::vector_op::cmp::{ str_is_not_distinct_from, }; use crate::vector_op::conjunction::{and, or}; +use crate::vector_op::format_type::format_type; use crate::{for_all_cmp_variants, ExprError, Result}; macro_rules! gen_is_distinct_from_impl { @@ -164,6 +165,7 @@ pub fn new_nullable_binary_expr( Type::Or => Box::new(BinaryShortCircuitExpression::new(l, r, expr_type)), Type::IsDistinctFrom => new_distinct_from_expr(l, r, ret)?, Type::IsNotDistinctFrom => new_not_distinct_from_expr(l, r, ret)?, + Type::FormatType => new_format_type_expr(l, r, ret), tp => { return Err(ExprError::UnsupportedFunction(format!( "{:?}({:?}, {:?})", @@ -209,6 +211,7 @@ fn build_array_access_expr( DataType::Timestamp => array_access_expression!(NaiveDateTimeArray), DataType::Timestamptz => array_access_expression!(PrimitiveArray::), DataType::Interval => array_access_expression!(IntervalArray), + DataType::Jsonb => array_access_expression!(JsonbArray), DataType::Struct { .. } => array_access_expression!(StructArray), DataType::List { .. } => array_access_expression!(ListArray), } @@ -282,6 +285,21 @@ pub fn new_not_distinct_from_expr( Ok(expr) } +pub fn new_format_type_expr( + expr_ia1: BoxedExpression, + expr_ia2: BoxedExpression, + return_type: DataType, +) -> BoxedExpression { + Box::new( + BinaryNullableExpression::::new( + expr_ia1, + expr_ia2, + return_type, + format_type, + ), + ) +} + #[cfg(test)] mod tests { use risingwave_common::row::OwnedRow; @@ -445,4 +463,32 @@ mod tests { assert_eq!(res, expected); } } + + #[test] + fn test_format_type() { + let l = vec![Some(16), Some(21), Some(9527), None]; + let r = vec![Some(0), None, Some(0), Some(0)]; + let target: Vec> = vec![ + Some("boolean".into()), + Some("smallint".into()), + Some("???".into()), + None, + ]; + let expr = make_expression( + Type::FormatType, + &[TypeName::Int32, TypeName::Int32], + &[0, 1], + ); + let vec_executor = build_from_prost(&expr).unwrap(); + + for i in 0..l.len() { + let row = OwnedRow::new(vec![ + l[i].map(|x| x.to_scalar_value()), + r[i].map(|x| x.to_scalar_value()), + ]); + let res = vec_executor.eval_row(&row).unwrap(); + let expected = target[i].as_ref().map(|x| x.into()); + assert_eq!(res, expected); + } + } } diff --git a/src/expr/src/expr/expr_case.rs b/src/expr/src/expr/expr_case.rs index f2088c6df0de3..e1d6dfc2814b7 100644 --- a/src/expr/src/expr/expr_case.rs +++ b/src/expr/src/expr/expr_case.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/expr/expr_coalesce.rs b/src/expr/src/expr/expr_coalesce.rs index 71dc8da8b5c7c..d2e51e7404222 100644 --- a/src/expr/src/expr/expr_coalesce.rs +++ b/src/expr/src/expr/expr_coalesce.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/expr/expr_concat_ws.rs b/src/expr/src/expr/expr_concat_ws.rs index 73e839393058d..6c5da2acafc4a 100644 --- a/src/expr/src/expr/expr_concat_ws.rs +++ b/src/expr/src/expr/expr_concat_ws.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/expr/expr_field.rs b/src/expr/src/expr/expr_field.rs index ece518d49dc8c..06e91130d1c2f 100644 --- a/src/expr/src/expr/expr_field.rs +++ b/src/expr/src/expr/expr_field.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ use std::convert::TryFrom; use anyhow::anyhow; use risingwave_common::array::{ArrayImpl, ArrayRef, DataChunk}; use risingwave_common::row::OwnedRow; -use risingwave_common::types::{DataType, Datum}; +use risingwave_common::types::{DataType, Datum, ScalarImpl}; use risingwave_common::util::value_encoding::deserialize_datum; use risingwave_pb::expr::expr_node::{RexNode, Type}; use risingwave_pb::expr::ExprNode; @@ -47,8 +47,15 @@ impl Expression for FieldExpression { } } - fn eval_row(&self, _input: &OwnedRow) -> Result { - Err(anyhow!("expects a struct array ref").into()) + fn eval_row(&self, input: &OwnedRow) -> Result { + let struct_datum = self.input.eval_row(input)?; + struct_datum + .map(|s| match s { + ScalarImpl::Struct(v) => Ok(v.fields()[self.index].clone()), + _ => Err(anyhow!("expects a struct array ref").into()), + }) + .transpose() + .map(|x| x.flatten()) } } diff --git a/src/expr/src/expr/expr_in.rs b/src/expr/src/expr/expr_in.rs index 493a40ce6085d..e62b9f290d46f 100644 --- a/src/expr/src/expr/expr_in.rs +++ b/src/expr/src/expr/expr_in.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,10 +16,10 @@ use std::collections::HashSet; use std::fmt::Debug; use std::sync::Arc; -use itertools::Itertools; use risingwave_common::array::{ArrayBuilder, ArrayRef, BoolArrayBuilder, DataChunk}; use risingwave_common::row::OwnedRow; use risingwave_common::types::{DataType, Datum, Scalar, ToOwnedDatum}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::{bail, ensure}; use risingwave_pb::expr::expr_node::{RexNode, Type}; use risingwave_pb::expr::ExprNode; @@ -28,7 +28,7 @@ use crate::expr::{build_from_prost, BoxedExpression, Expression}; use crate::{ExprError, Result}; #[derive(Debug)] -pub(crate) struct InExpression { +pub struct InExpression { left: BoxedExpression, set: HashSet, return_type: DataType, @@ -74,7 +74,7 @@ impl Expression for InExpression { fn eval(&self, input: &DataChunk) -> Result { let input_array = self.left.eval_checked(input)?; let mut output_array = BoolArrayBuilder::new(input_array.len()); - for (data, vis) in input_array.iter().zip_eq(input.vis().iter()) { + for (data, vis) in input_array.iter().zip_eq_fast(input.vis().iter()) { if vis { let ret = self.exists(&data.to_owned_datum()); output_array.append(ret); diff --git a/src/expr/src/expr/expr_input_ref.rs b/src/expr/src/expr/expr_input_ref.rs index c64258d25c105..1a68736580859 100644 --- a/src/expr/src/expr/expr_input_ref.rs +++ b/src/expr/src/expr/expr_input_ref.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ use risingwave_pb::expr::ExprNode; use crate::expr::Expression; use crate::{bail, ensure, ExprError, Result}; -/// `InputRefExpression` references to a column in input relation +/// A reference to a column in input relation. #[derive(Debug, Clone)] pub struct InputRefExpression { return_type: DataType, diff --git a/src/expr/src/expr/expr_is_null.rs b/src/expr/src/expr/expr_is_null.rs index 71de06b0c87fe..35022becdc0a2 100644 --- a/src/expr/src/expr/expr_is_null.rs +++ b/src/expr/src/expr/expr_is_null.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/expr/expr_literal.rs b/src/expr/src/expr/expr_literal.rs index c1cb4f4465f08..40d8b21083f5f 100644 --- a/src/expr/src/expr/expr_literal.rs +++ b/src/expr/src/expr/expr_literal.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ use risingwave_pb::expr::ExprNode; use crate::expr::Expression; use crate::{bail, ensure, ExprError, Result}; +/// A literal expression. #[derive(Debug)] pub struct LiteralExpression { return_type: DataType, diff --git a/src/expr/src/expr/expr_nested_construct.rs b/src/expr/src/expr/expr_nested_construct.rs index fc6693fc47253..038907ed51039 100644 --- a/src/expr/src/expr/expr_nested_construct.rs +++ b/src/expr/src/expr/expr_nested_construct.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/expr/expr_quaternary_bytes.rs b/src/expr/src/expr/expr_quaternary_bytes.rs index b1d0cf7835362..74f9e6904c518 100644 --- a/src/expr/src/expr/expr_quaternary_bytes.rs +++ b/src/expr/src/expr/expr_quaternary_bytes.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/expr/expr_regexp.rs b/src/expr/src/expr/expr_regexp.rs index 0f184aadb663e..2f3348f6cea4d 100644 --- a/src/expr/src/expr/expr_regexp.rs +++ b/src/expr/src/expr/expr_regexp.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ use risingwave_common::array::{ }; use risingwave_common::row::OwnedRow; use risingwave_common::types::{DataType, Datum, ScalarImpl}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::value_encoding::deserialize_datum; use risingwave_pb::expr::expr_node::{RexNode, Type}; use risingwave_pb::expr::ExprNode; @@ -103,21 +104,21 @@ impl<'a> TryFrom<&'a ExprNode> for RegexpMatchExpression { let Some(pattern_node) = children.next() else { bail!("Expected argument pattern"); }; - let mut pattern = match &pattern_node.rex_node { - Some(RexNode::Constant(pattern_value)) => { - let pattern_scalar = deserialize_datum( + let mut pattern = match &pattern_node.get_rex_node()? { + RexNode::Constant(pattern_value) => { + let pattern_datum = deserialize_datum( pattern_value.get_body().as_slice(), &DataType::from(pattern_node.get_return_type().unwrap()), ) - .map_err(|e| ExprError::Internal(e.into()))? - .unwrap(); - let ScalarImpl::Utf8(pattern) = pattern_scalar else { - bail!("Expected pattern to be an String"); - }; - pattern.to_string() + .map_err(|e| ExprError::Internal(e.into()))?; + + match pattern_datum { + Some(ScalarImpl::Utf8(pattern)) => pattern.to_string(), + // NULL pattern + None => NULL_PATTERN.to_string(), + _ => bail!("Expected pattern to be an String"), + } } - // NULL pattern - None => NULL_PATTERN.to_string(), _ => { return Err(ExprError::UnsupportedFunction( "non-constant pattern in regexp_match".to_string(), @@ -126,23 +127,23 @@ impl<'a> TryFrom<&'a ExprNode> for RegexpMatchExpression { }; let flags = if let Some(flags_node) = children.next() { - match &flags_node.rex_node { - Some(RexNode::Constant(flags_value)) => { - let flags_scalar = deserialize_datum( + match &flags_node.get_rex_node()? { + RexNode::Constant(flags_value) => { + let flags_datum = deserialize_datum( flags_value.get_body().as_slice(), &DataType::from(flags_node.get_return_type().unwrap()), ) - .map_err(|e| ExprError::Internal(e.into()))? - .unwrap(); - let ScalarImpl::Utf8(flags) = flags_scalar else { - bail!("Expected flags to be an String"); - }; - flags.to_string() - } - // NULL flag - None => { - pattern = NULL_PATTERN.to_string(); - "".to_string() + .map_err(|e| ExprError::Internal(e.into()))?; + + match flags_datum { + Some(ScalarImpl::Utf8(flags)) => flags.to_string(), + // NULL flag + None => { + pattern = NULL_PATTERN.to_string(); + "".to_string() + } + _ => bail!("Expected flags to be an String"), + } } _ => { return Err(ExprError::UnsupportedFunction( @@ -205,7 +206,7 @@ impl Expression for RegexpMatchExpression { }, ); - for (text, vis) in text_arr.iter().zip_eq(input.vis().iter()) { + for (text, vis) in text_arr.iter().zip_eq_fast(input.vis().iter()) { if !vis { output.append_null(); } else if let Some(list) = self.match_one(text) { diff --git a/src/expr/src/expr/expr_some_all.rs b/src/expr/src/expr/expr_some_all.rs index 05cc950c5f641..58ce0094463f6 100644 --- a/src/expr/src/expr/expr_some_all.rs +++ b/src/expr/src/expr/expr_some_all.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ use itertools::{multizip, Itertools}; use risingwave_common::array::{Array, ArrayMeta, ArrayRef, BoolArray, DataChunk}; use risingwave_common::row::OwnedRow; use risingwave_common::types::{DataType, Datum, Scalar, ScalarImpl, ScalarRefImpl}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_pb::expr::expr_node::Type; use super::{BoxedExpression, Expression}; @@ -120,8 +121,10 @@ impl Expression for SomeAllExpression { match bitmap { Some(bitmap) => { - for ((left, right), visible) in - multizip((arr_left.iter(), arr_right.iter())).zip_eq(bitmap.iter()) + for ((left, right), visible) in arr_left + .iter() + .zip_eq_fast(arr_right.iter()) + .zip_eq_fast(bitmap.iter()) { if !visible { num_array.push(None); diff --git a/src/expr/src/expr/expr_ternary_bytes.rs b/src/expr/src/expr/expr_ternary_bytes.rs index c868330d561da..e12ae34dd10a5 100644 --- a/src/expr/src/expr/expr_ternary_bytes.rs +++ b/src/expr/src/expr/expr_ternary_bytes.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/expr/expr_to_char_const_tmpl.rs b/src/expr/src/expr/expr_to_char_const_tmpl.rs index ee9741758eedb..aa3fe82623f71 100644 --- a/src/expr/src/expr/expr_to_char_const_tmpl.rs +++ b/src/expr/src/expr/expr_to_char_const_tmpl.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,10 +15,10 @@ use std::fmt::Write; use std::sync::Arc; -use itertools::Itertools; use risingwave_common::array::{Array, ArrayBuilder, NaiveDateTimeArray, Utf8ArrayBuilder}; use risingwave_common::row::OwnedRow; use risingwave_common::types::{DataType, Datum, ScalarImpl}; +use risingwave_common::util::iter_util::ZipEqFast; use super::Expression; use crate::vector_op::to_char::ChronoPattern; @@ -46,7 +46,7 @@ impl Expression for ExprToCharConstTmpl { let data_arr = self.child.eval_checked(input)?; let data_arr: &NaiveDateTimeArray = data_arr.as_ref().into(); let mut output = Utf8ArrayBuilder::new(input.capacity()); - for (data, vis) in data_arr.iter().zip_eq(input.vis().iter()) { + for (data, vis) in data_arr.iter().zip_eq_fast(input.vis().iter()) { if !vis { output.append_null(); } else if let Some(data) = data { diff --git a/src/expr/src/expr/expr_to_timestamp_const_tmpl.rs b/src/expr/src/expr/expr_to_timestamp_const_tmpl.rs index 7e5c57f462929..94003542b0af5 100644 --- a/src/expr/src/expr/expr_to_timestamp_const_tmpl.rs +++ b/src/expr/src/expr/expr_to_timestamp_const_tmpl.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,10 +14,10 @@ use std::sync::Arc; -use itertools::Itertools; use risingwave_common::array::{Array, ArrayBuilder, NaiveDateTimeArrayBuilder, Utf8Array}; use risingwave_common::row::OwnedRow; use risingwave_common::types::{DataType, Datum, ScalarImpl}; +use risingwave_common::util::iter_util::ZipEqFast; use super::Expression; use crate::vector_op::to_char::ChronoPattern; @@ -46,7 +46,7 @@ impl Expression for ExprToTimestampConstTmpl { let data_arr = self.child.eval_checked(input)?; let data_arr: &Utf8Array = data_arr.as_ref().into(); let mut output = NaiveDateTimeArrayBuilder::new(input.capacity()); - for (data, vis) in data_arr.iter().zip_eq(input.vis().iter()) { + for (data, vis) in data_arr.iter().zip_eq_fast(input.vis().iter()) { if !vis { output.append_null(); } else if let Some(data) = data { diff --git a/src/expr/src/expr/expr_udf.rs b/src/expr/src/expr/expr_udf.rs index d06ae5fa600c8..b64f01add35c0 100644 --- a/src/expr/src/expr/expr_udf.rs +++ b/src/expr/src/expr/expr_udf.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/expr/expr_unary.rs b/src/expr/src/expr/expr_unary.rs index c4876c5cf418c..00f9151078a0c 100644 --- a/src/expr/src/expr/expr_unary.rs +++ b/src/expr/src/expr/expr_unary.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,16 +19,17 @@ use risingwave_common::buffer::Bitmap; use risingwave_common::types::*; use risingwave_pb::expr::expr_node::Type as ProstType; +use super::expr_is_null::{IsNotNullExpression, IsNullExpression}; use super::template::{UnaryBytesExpression, UnaryExpression}; -use crate::expr::expr_is_null::{IsNotNullExpression, IsNullExpression}; -use crate::expr::template_fast::BooleanUnaryExpression; -use crate::expr::{template_fast, BoxedExpression, Expression}; +use super::template_fast::BooleanUnaryExpression; +use super::{template_fast, BoxedExpression, Expression}; use crate::vector_op::arithmetic_op::{decimal_abs, general_abs, general_neg}; use crate::vector_op::ascii::ascii; use crate::vector_op::bitwise_op::general_bitnot; use crate::vector_op::cast::*; use crate::vector_op::cmp::{is_false, is_not_false, is_not_true, is_true}; use crate::vector_op::conjunction; +use crate::vector_op::exp::exp_f64; use crate::vector_op::length::{bit_length, length_default, octet_length}; use crate::vector_op::lower::lower; use crate::vector_op::ltrim::ltrim; @@ -121,6 +122,7 @@ macro_rules! gen_round_expr { }; } +/// Create a new unary expression. pub fn new_unary_expr( expr_type: ProstType, return_type: DataType, @@ -287,12 +289,17 @@ pub fn new_unary_expr( (ProstType::Ceil, _, _) => { gen_round_expr! {"Ceil", child_expr, return_type, ceil_f64, ceil_decimal} } - (ProstType::Floor, _, _) => { + (ProstType::Floor, DataType::Float64, DataType::Float64) => { gen_round_expr! {"Floor", child_expr, return_type, floor_f64, floor_decimal} } (ProstType::Round, _, _) => { gen_round_expr! {"Ceil", child_expr, return_type, round_f64, round_decimal} } + (ProstType::Exp, _, _) => Box::new(UnaryExpression::::new( + child_expr, + return_type, + exp_f64, + )), (ProstType::ToTimestamp, DataType::Timestamptz, DataType::Float64) => { Box::new(UnaryExpression::::new( child_expr, @@ -351,7 +358,7 @@ mod tests { use risingwave_pb::data::data_type::TypeName; use risingwave_pb::data::DataType; use risingwave_pb::expr::expr_node::{RexNode, Type}; - use risingwave_pb::expr::FunctionCall; + use risingwave_pb::expr::{ExprNode, FunctionCall}; use super::super::*; use crate::expr::test_utils::{make_expression, make_input_ref}; diff --git a/src/expr/src/expr/expr_vnode.rs b/src/expr/src/expr/expr_vnode.rs index 2075355cac558..7f951e12accbb 100644 --- a/src/expr/src/expr/expr_vnode.rs +++ b/src/expr/src/expr/expr_vnode.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/expr/mod.rs b/src/expr/src/expr/mod.rs index 572f85d8c58d2..cc77ebfe5d049 100644 --- a/src/expr/src/expr/mod.rs +++ b/src/expr/src/expr/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,13 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. -mod agg; -pub mod build_expr_from_prost; -pub mod data_types; +//! Expressions in RisingWave. +//! +//! All expressions are implemented under the [`Expression`] trait. +//! +//! ## Construction +//! +//! Expressions can be constructed by functions like [`new_binary_expr`], +//! which returns a [`BoxedExpression`]. +//! +//! They can also be transformed from the prost [`ExprNode`] using the [`build_from_prost`] +//! function. +//! +//! ## Evaluation +//! +//! Expressions can be evaluated using the [`eval`] function. +//! +//! [`ExprNode`]: risingwave_pb::expr::ExprNode +//! [`eval`]: Expression::eval + +// These modules define concrete expression structures. mod expr_array_concat; mod expr_binary_bytes; -pub mod expr_binary_nonnull; -pub mod expr_binary_nullable; +mod expr_binary_nonnull; +mod expr_binary_nullable; mod expr_case; mod expr_coalesce; mod expr_concat_ws; @@ -35,40 +52,33 @@ mod expr_ternary_bytes; mod expr_to_char_const_tmpl; mod expr_to_timestamp_const_tmpl; mod expr_udf; -pub mod expr_unary; +mod expr_unary; mod expr_vnode; + +mod agg; +mod build_expr_from_prost; +pub(crate) mod data_types; mod template; mod template_fast; +pub mod test_utils; -use std::convert::TryFrom; use std::sync::Arc; -pub use agg::AggKind; -pub use expr_input_ref::InputRefExpression; -pub use expr_literal::*; use risingwave_common::array::{ArrayRef, DataChunk}; use risingwave_common::row::OwnedRow; use risingwave_common::types::{DataType, Datum}; -use risingwave_pb::expr::ExprNode; +pub use self::agg::AggKind; +pub use self::build_expr_from_prost::build_from_prost; +pub use self::expr_binary_nonnull::new_binary_expr; +pub use self::expr_input_ref::InputRefExpression; +pub use self::expr_literal::LiteralExpression; +pub use self::expr_unary::new_unary_expr; use super::Result; -use crate::expr::build_expr_from_prost::*; -use crate::expr::expr_array_concat::ArrayConcatExpression; -use crate::expr::expr_case::CaseExpression; -use crate::expr::expr_coalesce::CoalesceExpression; -use crate::expr::expr_concat_ws::ConcatWsExpression; -use crate::expr::expr_field::FieldExpression; -use crate::expr::expr_in::InExpression; -use crate::expr::expr_nested_construct::NestedConstructExpression; -use crate::expr::expr_regexp::RegexpMatchExpression; -use crate::expr::expr_udf::UdfExpression; -use crate::expr::expr_vnode::VnodeExpression; -use crate::ExprError; - -pub type ExpressionRef = Arc; /// Instance of an expression pub trait Expression: std::fmt::Debug + Sync + Send { + /// Get the return data type. fn return_type(&self) -> DataType; /// Eval the result with extra checks. @@ -91,6 +101,7 @@ pub trait Expression: std::fmt::Debug + Sync + Send { /// Evaluate the expression in row-based execution. fn eval_row(&self, input: &OwnedRow) -> Result; + /// Wrap the expression in a Box. fn boxed(self) -> BoxedExpression where Self: Sized + Send + 'static, @@ -99,67 +110,8 @@ pub trait Expression: std::fmt::Debug + Sync + Send { } } +/// An owned dynamically typed [`Expression`]. pub type BoxedExpression = Box; -pub fn build_from_prost(prost: &ExprNode) -> Result { - use risingwave_pb::expr::expr_node::Type::*; - - match prost.get_expr_type().unwrap() { - // Fixed number of arguments and based on `Unary/Binary/Ternary/...Expression` - Cast | Upper | Lower | Md5 | Not | IsTrue | IsNotTrue | IsFalse | IsNotFalse | IsNull - | IsNotNull | Neg | Ascii | Abs | Ceil | Floor | Round | BitwiseNot | CharLength - | BoolOut | OctetLength | BitLength | ToTimestamp => build_unary_expr_prost(prost), - Equal | NotEqual | LessThan | LessThanOrEqual | GreaterThan | GreaterThanOrEqual | Add - | Subtract | Multiply | Divide | Modulus | Extract | RoundDigit | TumbleStart - | Position | BitwiseShiftLeft | BitwiseShiftRight | BitwiseAnd | BitwiseOr | BitwiseXor - | ConcatOp | AtTimeZone | CastWithTimeZone => build_binary_expr_prost(prost), - And | Or | IsDistinctFrom | IsNotDistinctFrom | ArrayAccess => { - build_nullable_binary_expr_prost(prost) - } - ToChar => build_to_char_expr(prost), - ToTimestamp1 => build_to_timestamp_expr(prost), - Length => build_length_expr(prost), - Replace => build_replace_expr(prost), - Like => build_like_expr(prost), - Repeat => build_repeat_expr(prost), - SplitPart => build_split_part_expr(prost), - Translate => build_translate_expr(prost), - - // Variable number of arguments and based on `Unary/Binary/Ternary/...Expression` - Substr => build_substr_expr(prost), - Overlay => build_overlay_expr(prost), - Trim => build_trim_expr(prost), - Ltrim => build_ltrim_expr(prost), - Rtrim => build_rtrim_expr(prost), - DateTrunc => build_date_trunc_expr(prost), - - // Dedicated types - All | Some => build_some_all_expr_prost(prost), - In => InExpression::try_from(prost).map(Expression::boxed), - Case => CaseExpression::try_from(prost).map(Expression::boxed), - Coalesce => CoalesceExpression::try_from(prost).map(Expression::boxed), - ConcatWs => ConcatWsExpression::try_from(prost).map(Expression::boxed), - ConstantValue => LiteralExpression::try_from(prost).map(Expression::boxed), - InputRef => InputRefExpression::try_from(prost).map(Expression::boxed), - Field => FieldExpression::try_from(prost).map(Expression::boxed), - Array => NestedConstructExpression::try_from(prost).map(Expression::boxed), - Row => NestedConstructExpression::try_from(prost).map(Expression::boxed), - RegexpMatch => RegexpMatchExpression::try_from(prost).map(Expression::boxed), - ArrayCat | ArrayAppend | ArrayPrepend => { - // Now we implement these three functions as a single expression for the - // sake of simplicity. If performance matters at some time, we can split - // the implementation to improve performance. - ArrayConcatExpression::try_from(prost).map(Expression::boxed) - } - Vnode => VnodeExpression::try_from(prost).map(Expression::boxed), - Now => build_now_expr(prost), - Udf => UdfExpression::try_from(prost).map(Expression::boxed), - _ => Err(ExprError::UnsupportedFunction(format!( - "{:?}", - prost.get_expr_type() - ))), - } -} - -mod test_utils; -pub use test_utils::*; +/// A reference to a dynamically typed [`Expression`]. +pub type ExpressionRef = Arc; diff --git a/src/expr/src/expr/template.rs b/src/expr/src/expr/template.rs index cf1803c2baef8..9b3206a6c0c2b 100644 --- a/src/expr/src/expr/template.rs +++ b/src/expr/src/expr/template.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,11 +17,12 @@ use std::fmt; use std::sync::Arc; -use itertools::{multizip, Itertools}; +use itertools::multizip; use paste::paste; use risingwave_common::array::{Array, ArrayBuilder, ArrayImpl, ArrayRef, DataChunk, Utf8Array}; use risingwave_common::row::OwnedRow; use risingwave_common::types::{option_as_scalar_ref, DataType, Datum, Scalar}; +use risingwave_common::util::iter_util::ZipEqDebug; use crate::expr::{BoxedExpression, Expression}; @@ -38,7 +39,7 @@ macro_rules! gen_eval { let mut output_array = <$OA as Array>::Builder::with_meta(data_chunk.capacity(), (&self.return_type).into()); Ok(Arc::new(match bitmap { Some(bitmap) => { - for (($([], )*), visible) in multizip(($([].iter(), )*)).zip_eq(bitmap.iter()) { + for (($([], )*), visible) in multizip(($([].iter(), )*)).zip_eq_debug(bitmap.iter()) { if !visible { output_array.append_null(); continue; diff --git a/src/expr/src/expr/template_fast.rs b/src/expr/src/expr/template_fast.rs index 78cd0838f93e2..70eeca15bf2c6 100644 --- a/src/expr/src/expr/template_fast.rs +++ b/src/expr/src/expr/template_fast.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/expr/test_utils.rs b/src/expr/src/expr/test_utils.rs index eb5036c95f79a..d3107e2faf01d 100644 --- a/src/expr/src/expr/test_utils.rs +++ b/src/expr/src/expr/test_utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use itertools::Itertools; +//! Helper functions to construct prost [`ExprNode`] for test. + use risingwave_common::types::ScalarImpl; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::value_encoding::serialize_datum; use risingwave_pb::data::data_type::TypeName; use risingwave_pb::data::{DataType as ProstDataType, DataType, Datum as ProstDatum}; @@ -23,7 +25,7 @@ use risingwave_pb::expr::{ExprNode, FunctionCall, InputRefExpr}; pub fn make_expression(kind: Type, rets: &[TypeName], indices: &[i32]) -> ExprNode { let mut exprs = Vec::new(); - for (idx, ret) in indices.iter().zip_eq(rets.iter()) { + for (idx, ret) in indices.iter().zip_eq_fast(rets.iter()) { exprs.push(make_input_ref(*idx, *ret)); } let function_call = FunctionCall { children: exprs }; diff --git a/src/expr/src/lib.rs b/src/expr/src/lib.rs index a87b2336da6f7..d7f964766c84d 100644 --- a/src/expr/src/lib.rs +++ b/src/expr/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,26 +13,20 @@ // limitations under the License. #![allow(rustdoc::private_intra_doc_links)] -#![feature(trait_alias)] #![feature(let_chains)] -#![feature(binary_heap_drain_sorted)] -#![feature(binary_heap_into_iter_sorted)] -#![feature(is_sorted)] #![feature(fn_traits)] #![feature(assert_matches)] #![feature(lint_reasons)] -#![feature(type_alias_impl_trait)] -#![feature(generators)] #![feature(iterator_try_collect)] #![feature(exclusive_range_pattern)] #![feature(once_cell)] +#![feature(try_blocks)] -pub mod error; +mod error; pub mod expr; pub mod sig; pub mod table_function; pub mod vector_op; -pub use error::ExprError; -pub use risingwave_common::{bail, ensure}; -pub type Result = std::result::Result; +pub use error::{ExprError, Result}; +use risingwave_common::{bail, ensure}; diff --git a/src/expr/src/sig/agg.rs b/src/expr/src/sig/agg.rs index 1e6c24ce38059..5ea282ba37571 100644 --- a/src/expr/src/sig/agg.rs +++ b/src/expr/src/sig/agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -143,8 +143,25 @@ pub fn infer_return_type(agg_kind: &AggKind, inputs: &[DataType]) -> Option DataType::Interval, _ => return None, }, + (AggKind::Sum, _) => return None, + // StdDev/Var, stddev_pop, stddev_samp, var_pop, var_samp + ( + AggKind::StddevPop | AggKind::StddevSamp | AggKind::VarPop | AggKind::VarSamp, + [input], + ) => match input { + DataType::Int16 | DataType::Int32 | DataType::Int64 | DataType::Decimal => { + DataType::Decimal + } + DataType::Float32 | DataType::Float64 => DataType::Float64, + _ => return None, + }, + + (AggKind::StddevPop | AggKind::StddevSamp | AggKind::VarPop | AggKind::VarSamp, _) => { + return None + } + (AggKind::Sum0, [DataType::Int64]) => DataType::Int64, (AggKind::Sum0, _) => return None, diff --git a/src/expr/src/sig/cast.rs b/src/expr/src/sig/cast.rs index 7c44ea3dbad9a..8a02c47176338 100644 --- a/src/expr/src/sig/cast.rs +++ b/src/expr/src/sig/cast.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -90,6 +90,7 @@ pub static CAST_MAP: LazyLock = LazyLock::new(|| { T::Timestamptz, T::Time, T::Interval, + T::Jsonb, ] { m.insert((t, T::Varchar), CastContext::Assign); m.insert((T::Varchar, t), CastContext::Explicit); diff --git a/src/expr/src/sig/func.rs b/src/expr/src/sig/func.rs index 1654b41080e0d..aaf47d1ca3664 100644 --- a/src/expr/src/sig/func.rs +++ b/src/expr/src/sig/func.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -172,6 +172,8 @@ fn build_type_derive_map() -> FuncSigMap { &[T::Int16, T::Int32, T::Int64, T::Decimal], ); map.insert(E::RoundDigit, vec![T::Decimal, T::Int32], T::Decimal); + map.insert(E::Pow, vec![T::Float64, T::Float64], T::Float64); + map.insert(E::Exp, vec![T::Float64], T::Float64); // build bitwise operator // bitwise operator @@ -270,6 +272,7 @@ fn build_type_derive_map() -> FuncSigMap { for e in [E::Replace, E::Translate] { map.insert(e, vec![T::Varchar, T::Varchar, T::Varchar], T::Varchar); } + map.insert(E::FormatType, vec![T::Int32, T::Int32], T::Varchar); map.insert( E::Overlay, vec![T::Varchar, T::Varchar, T::Int32], diff --git a/src/expr/src/sig/mod.rs b/src/expr/src/sig/mod.rs index 1912e72759a74..c2cb8af843ac2 100644 --- a/src/expr/src/sig/mod.rs +++ b/src/expr/src/sig/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/table_function/generate_series.rs b/src/expr/src/table_function/generate_series.rs index da83446156f83..f6555ef2ad3e0 100644 --- a/src/expr/src/table_function/generate_series.rs +++ b/src/expr/src/table_function/generate_series.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ use risingwave_common::array::{ NaiveDateTimeArray, }; use risingwave_common::types::{CheckedAdd, IsNegative, Scalar, ScalarRef}; +use risingwave_common::util::iter_util::ZipEqDebug; use super::*; use crate::ExprError; @@ -121,7 +122,7 @@ where Some(bitmap) => { for ((start, stop, step), visible) in multizip((arr_start.iter(), arr_stop.iter(), arr_step.iter())) - .zip_eq(bitmap.iter()) + .zip_eq_debug(bitmap.iter()) { let array = if !visible { empty_array(self.return_type()) diff --git a/src/expr/src/table_function/mod.rs b/src/expr/src/table_function/mod.rs index 0719a7c35debc..77905964e64b9 100644 --- a/src/expr/src/table_function/mod.rs +++ b/src/expr/src/table_function/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/table_function/regexp_matches.rs b/src/expr/src/table_function/regexp_matches.rs index 90637e3bbedd8..aa459de433d51 100644 --- a/src/expr/src/table_function/regexp_matches.rs +++ b/src/expr/src/table_function/regexp_matches.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ use std::sync::Arc; use risingwave_common::array::{Array, ArrayRef, DataChunk, ListValue, Utf8Array}; use risingwave_common::types::ScalarImpl; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::value_encoding::deserialize_datum; use risingwave_common::{bail, ensure}; use risingwave_pb::expr::expr_node::RexNode; @@ -73,7 +74,7 @@ impl TableFunction for RegexpMatches { match bitmap { Some(bitmap) => { - for (text, visible) in text_arr.iter().zip_eq(bitmap.iter()) { + for (text, visible) in text_arr.iter().zip_eq_fast(bitmap.iter()) { let array = if !visible { empty_array(self.return_type()) } else if let Some(text) = text { @@ -122,21 +123,21 @@ pub fn new_regexp_matches( let Some(pattern_node) = args.next() else { bail!("Expected argument pattern"); }; - let mut pattern = match &pattern_node.rex_node { - Some(RexNode::Constant(pattern_value)) => { - let pattern_scalar = deserialize_datum( + let mut pattern = match &pattern_node.get_rex_node()? { + RexNode::Constant(pattern_value) => { + let pattern_datum = deserialize_datum( pattern_value.get_body().as_slice(), &DataType::from(pattern_node.get_return_type().unwrap()), ) - .map_err(|e| ExprError::Internal(e.into()))? - .unwrap(); - let ScalarImpl::Utf8(pattern) = pattern_scalar else { - bail!("Expected pattern to be an String"); - }; - pattern.to_string() + .map_err(|e| ExprError::Internal(e.into()))?; + + match pattern_datum { + Some(ScalarImpl::Utf8(pattern)) => pattern.to_string(), + // NULL pattern + None => NULL_PATTERN.to_string(), + _ => bail!("Expected pattern to be an String"), + } } - // NULL pattern - None => NULL_PATTERN.to_string(), _ => { return Err(ExprError::UnsupportedFunction( "non-constant pattern in regexp_match".to_string(), @@ -145,23 +146,23 @@ pub fn new_regexp_matches( }; let flags = if let Some(flags_node) = args.next() { - match &flags_node.rex_node { - Some(RexNode::Constant(flags_value)) => { - let flags_scalar = deserialize_datum( + match &flags_node.get_rex_node()? { + RexNode::Constant(flags_value) => { + let flags_datum = deserialize_datum( flags_value.get_body().as_slice(), &DataType::from(flags_node.get_return_type().unwrap()), ) - .map_err(|e| ExprError::Internal(e.into()))? - .unwrap(); - let ScalarImpl::Utf8(flags) = flags_scalar else { - bail!("Expected flags to be an String"); - }; - flags.to_string() - } - // NULL flag - None => { - pattern = NULL_PATTERN.to_string(); - "".to_string() + .map_err(|e| ExprError::Internal(e.into()))?; + + match flags_datum { + Some(ScalarImpl::Utf8(flags)) => flags.to_string(), + // NULL flag + None => { + pattern = NULL_PATTERN.to_string(); + "".to_string() + } + _ => bail!("Expected flags to be an String"), + } } _ => { return Err(ExprError::UnsupportedFunction( diff --git a/src/expr/src/table_function/unnest.rs b/src/expr/src/table_function/unnest.rs index 86ea531063d37..c43463c250ece 100644 --- a/src/expr/src/table_function/unnest.rs +++ b/src/expr/src/table_function/unnest.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ use std::sync::Arc; use risingwave_common::array::{Array, ArrayRef, DataChunk, ListArray, ListRef}; +use risingwave_common::util::iter_util::ZipEqFast; use super::*; @@ -49,7 +50,7 @@ impl TableFunction for Unnest { match bitmap { Some(bitmap) => { - for (list, visible) in arr_list.iter().zip_eq(bitmap.iter()) { + for (list, visible) in arr_list.iter().zip_eq_fast(bitmap.iter()) { let array = if !visible { empty_array(self.return_type()) } else if let Some(list) = list { diff --git a/src/expr/src/vector_op/agg/aggregator.rs b/src/expr/src/vector_op/agg/aggregator.rs index 7a70c62620915..40658b25f5dbe 100644 --- a/src/expr/src/vector_op/agg/aggregator.rs +++ b/src/expr/src/vector_op/agg/aggregator.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/agg/approx_count_distinct.rs b/src/expr/src/vector_op/agg/approx_count_distinct.rs index d6cbcb367f998..fde8e04c545a2 100644 --- a/src/expr/src/vector_op/agg/approx_count_distinct.rs +++ b/src/expr/src/vector_op/agg/approx_count_distinct.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/agg/array_agg.rs b/src/expr/src/vector_op/agg/array_agg.rs index c7e1a575e7b4c..175c7c8564108 100644 --- a/src/expr/src/vector_op/agg/array_agg.rs +++ b/src/expr/src/vector_op/agg/array_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/agg/count_star.rs b/src/expr/src/vector_op/agg/count_star.rs index 281e714c10468..12021cd69c548 100644 --- a/src/expr/src/vector_op/agg/count_star.rs +++ b/src/expr/src/vector_op/agg/count_star.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/agg/filter.rs b/src/expr/src/vector_op/agg/filter.rs index 64430d8382cbb..018bd4bdb9062 100644 --- a/src/expr/src/vector_op/agg/filter.rs +++ b/src/expr/src/vector_op/agg/filter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -111,8 +111,7 @@ mod tests { use risingwave_pb::expr::expr_node::Type as ProstType; use super::*; - use crate::expr::expr_binary_nonnull::new_binary_expr; - use crate::expr::{Expression, InputRefExpression, LiteralExpression}; + use crate::expr::{new_binary_expr, Expression, InputRefExpression, LiteralExpression}; #[derive(Clone)] struct MockAgg { diff --git a/src/expr/src/vector_op/agg/functions.rs b/src/expr/src/vector_op/agg/functions.rs index 03c824f0f293a..4ad6a29a5c4ff 100644 --- a/src/expr/src/vector_op/agg/functions.rs +++ b/src/expr/src/vector_op/agg/functions.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ use risingwave_common::array::{Array, ListRef, StructRef}; -use crate::Result; +use crate::{ExprError, Result}; /// Essentially `RTFn` is an alias of the specific Fn. It was aliased not to /// shorten the `where` clause of `GeneralAgg`, but to workaround an compiler @@ -55,18 +55,20 @@ where } use std::convert::From; -use std::ops::Add; +use num_traits::CheckedAdd; use risingwave_common::types::ScalarRef; pub fn sum(result: Option, input: Option) -> Result> where - R: From + Add + Copy, + R: From + CheckedAdd + Copy, { let res = match (result, input) { (_, None) => result, (None, Some(i)) => Some(R::from(i)), - (Some(r), Some(i)) => Some(r + R::from(i)), + (Some(r), Some(i)) => r + .checked_add(&R::from(i)) + .map_or(Err(ExprError::NumericOutOfRange), |x| Ok(Some(x)))?, }; Ok(res) } diff --git a/src/expr/src/vector_op/agg/general_agg.rs b/src/expr/src/vector_op/agg/general_agg.rs index e1ca033671e82..c9e81644e6412 100644 --- a/src/expr/src/vector_op/agg/general_agg.rs +++ b/src/expr/src/vector_op/agg/general_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/agg/general_distinct_agg.rs b/src/expr/src/vector_op/agg/general_distinct_agg.rs index 90c5f073a5be1..97d68565f726c 100644 --- a/src/expr/src/vector_op/agg/general_distinct_agg.rs +++ b/src/expr/src/vector_op/agg/general_distinct_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/agg/general_sorted_grouper.rs b/src/expr/src/vector_op/agg/general_sorted_grouper.rs index 1c70b79efaaab..e4987b21f3463 100644 --- a/src/expr/src/vector_op/agg/general_sorted_grouper.rs +++ b/src/expr/src/vector_op/agg/general_sorted_grouper.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/agg/mod.rs b/src/expr/src/vector_op/agg/mod.rs index 7b5f76433ac8c..5d53fe2c7c016 100644 --- a/src/expr/src/vector_op/agg/mod.rs +++ b/src/expr/src/vector_op/agg/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/agg/string_agg.rs b/src/expr/src/vector_op/agg/string_agg.rs index 7a8f7ef283f07..a607fe096a67b 100644 --- a/src/expr/src/vector_op/agg/string_agg.rs +++ b/src/expr/src/vector_op/agg/string_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use itertools::Itertools; use risingwave_common::array::{ Array, ArrayBuilder, ArrayBuilderImpl, ArrayImpl, DataChunk, RowRef, }; use risingwave_common::bail; use risingwave_common::row::{Row, RowExt}; use risingwave_common::types::DataType; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::ordered::OrderedRow; use risingwave_common::util::sort_util::{OrderPair, OrderType}; @@ -88,7 +88,7 @@ impl Aggregator for StringAggUnordered { ) { for (value, delim) in agg_col .iter() - .zip_eq(delim_col.iter()) + .zip_eq_fast(delim_col.iter()) .skip(start_row_id) .take(end_row_id - start_row_id) .filter(|(v, _)| v.is_some()) @@ -207,7 +207,7 @@ impl Aggregator for StringAggOrdered { ) { for (row_id, (value, delim)) in agg_col .iter() - .zip_eq(delim_col.iter()) + .zip_eq_fast(delim_col.iter()) .enumerate() .skip(start_row_id) .take(end_row_id - start_row_id) diff --git a/src/expr/src/vector_op/arithmetic_op.rs b/src/expr/src/vector_op/arithmetic_op.rs index 6f311449b0463..0b52185798433 100644 --- a/src/expr/src/vector_op/arithmetic_op.rs +++ b/src/expr/src/vector_op/arithmetic_op.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,9 +14,9 @@ use std::convert::TryInto; use std::fmt::Debug; -use std::ops::Sub; use chrono::{Duration, NaiveDateTime}; +use num_traits::real::Real; use num_traits::{CheckedDiv, CheckedMul, CheckedNeg, CheckedRem, CheckedSub, Signed, Zero}; use risingwave_common::types::{ CheckedAdd, Decimal, IntervalUnit, NaiveDateTimeWrapper, NaiveDateWrapper, NaiveTimeWrapper, @@ -109,6 +109,15 @@ pub fn decimal_abs(decimal: Decimal) -> Result { Ok(Decimal::abs(&decimal)) } +pub fn pow_f64(l: OrderedF64, r: OrderedF64) -> Result { + let res = l.powf(r); + if res.is_infinite() { + Err(ExprError::NumericOutOfRange) + } else { + Ok(res) + } +} + #[inline(always)] pub fn general_atm(l: T1, r: T2, atm: F) -> Result where @@ -124,15 +133,15 @@ pub fn timestamp_timestamp_sub( l: NaiveDateTimeWrapper, r: NaiveDateTimeWrapper, ) -> Result { - let tmp = l.0 - r.0; + let tmp = l.0 - r.0; // this does not overflow or underflow let days = tmp.num_days(); - let ms = tmp.sub(Duration::days(tmp.num_days())).num_milliseconds(); + let ms = (tmp - Duration::days(tmp.num_days())).num_milliseconds(); Ok(IntervalUnit::new(0, days as i32, ms)) } #[inline(always)] pub fn date_date_sub(l: NaiveDateWrapper, r: NaiveDateWrapper) -> Result { - Ok((l.0 - r.0).num_days() as i32) + Ok((l.0 - r.0).num_days() as i32) // this does not overflow or underflow } #[inline(always)] @@ -172,7 +181,9 @@ pub fn date_interval_sub( l: NaiveDateWrapper, r: IntervalUnit, ) -> Result { - interval_date_add::(r.negative(), l) + // TODO: implement `checked_sub` for `NaiveDateTimeWrapper` to handle the edge case of negation + // overflowing. + interval_date_add::(r.checked_neg().ok_or(ExprError::NumericOutOfRange)?, l) } #[inline(always)] @@ -192,7 +203,12 @@ pub fn int_date_add(l: i32, r: NaiveDateWrapper) -> Result(l: NaiveDateWrapper, r: i32) -> Result { - date_int_add::(l, -r) + let date = l.0; + let date_wrapper = date + .checked_sub_signed(chrono::Duration::days(r as i64)) + .map(NaiveDateWrapper::new); + + date_wrapper.ok_or(ExprError::NumericOutOfRange) } #[inline(always)] @@ -208,34 +224,43 @@ pub fn timestamp_interval_sub( l: NaiveDateTimeWrapper, r: IntervalUnit, ) -> Result { - interval_timestamp_add::(r.negative(), l) + interval_timestamp_add::(r.checked_neg().ok_or(ExprError::NumericOutOfRange)?, l) } #[inline(always)] pub fn timestamptz_interval_add(l: i64, r: IntervalUnit) -> Result { - interval_timestamptz_add::(r, l) + timestamptz_interval_inner(l, r, i64::checked_add) } #[inline(always)] pub fn timestamptz_interval_sub(l: i64, r: IntervalUnit) -> Result { - interval_timestamptz_add::(r.negative(), l) + timestamptz_interval_inner(l, r, i64::checked_sub) } #[inline(always)] pub fn interval_timestamptz_add(l: IntervalUnit, r: i64) -> Result { + timestamptz_interval_add::(r, l) +} + +#[inline(always)] +fn timestamptz_interval_inner( + l: i64, + r: IntervalUnit, + f: fn(i64, i64) -> Option, +) -> Result { // Without session TimeZone, we cannot add month/day in local time. See #5826. - // However, we only reject months but accept days, assuming them are always 24-hour and ignoring - // Daylight Saving. - // This is to keep consistent with `tumble_start` of RisingWave / `date_bin` of PostgreSQL. - if l.get_months() != 0 { + if r.get_months() != 0 || r.get_days() != 0 { return Err(ExprError::UnsupportedFunction( - "timestamp with time zone +/- interval of months".into(), + "timestamp with time zone +/- interval of days".into(), )); } - let delta_usecs = l.get_days() as i64 * 24 * 60 * 60 * 1_000_000 + l.get_ms() * 1000; - r.checked_add(delta_usecs) - .ok_or(ExprError::NumericOutOfRange) + let result: Option = try { + let delta_usecs = r.get_ms().checked_mul(1000)?; + f(l, delta_usecs)? + }; + + result.ok_or(ExprError::NumericOutOfRange) } #[inline(always)] @@ -273,7 +298,7 @@ pub fn time_date_add( #[inline(always)] pub fn time_time_sub(l: NaiveTimeWrapper, r: NaiveTimeWrapper) -> Result { - let tmp = l.0 - r.0; + let tmp = l.0 - r.0; // this does not overflow or underflow let ms = tmp.num_milliseconds(); Ok(IntervalUnit::new(0, 0, ms)) } @@ -283,7 +308,13 @@ pub fn time_interval_sub( l: NaiveTimeWrapper, r: IntervalUnit, ) -> Result { - time_interval_add::(l, r.negative()) + let time = l.0; + let (new_time, ignored) = time.overflowing_sub_signed(Duration::milliseconds(r.get_ms())); + if ignored == 0 { + Ok(NaiveTimeWrapper::new(new_time)) + } else { + Err(ExprError::NumericOutOfRange) + } } #[inline(always)] @@ -292,8 +323,12 @@ pub fn time_interval_add( r: IntervalUnit, ) -> Result { let time = l.0; - let new_time = time + Duration::milliseconds(r.get_ms()); - Ok(NaiveTimeWrapper::new(new_time)) + let (new_time, ignored) = time.overflowing_add_signed(Duration::milliseconds(r.get_ms())); + if ignored == 0 { + Ok(NaiveTimeWrapper::new(new_time)) + } else { + Err(ExprError::NumericOutOfRange) + } } #[inline(always)] diff --git a/src/expr/src/vector_op/array_access.rs b/src/expr/src/vector_op/array_access.rs index 17f5fa33026f6..1e460acfddb9d 100644 --- a/src/expr/src/vector_op/array_access.rs +++ b/src/expr/src/vector_op/array_access.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/ascii.rs b/src/expr/src/vector_op/ascii.rs index 27d071eff8b18..e0421d39e0952 100644 --- a/src/expr/src/vector_op/ascii.rs +++ b/src/expr/src/vector_op/ascii.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/bitwise_op.rs b/src/expr/src/vector_op/bitwise_op.rs index 16eb1263356c3..9e45cc8e3fa21 100644 --- a/src/expr/src/vector_op/bitwise_op.rs +++ b/src/expr/src/vector_op/bitwise_op.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/cast.rs b/src/expr/src/vector_op/cast.rs index 0baf309fadcbe..39fc4c3fa6fc9 100644 --- a/src/expr/src/vector_op/cast.rs +++ b/src/expr/src/vector_op/cast.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ use risingwave_common::types::{ DataType, Decimal, IntervalUnit, NaiveDateTimeWrapper, NaiveDateWrapper, NaiveTimeWrapper, OrderedF32, OrderedF64, Scalar, ScalarImpl, ScalarRefImpl, }; +use risingwave_common::util::iter_util::ZipEqFast; use speedate::{Date as SpeedDate, DateTime as SpeedDateTime, Time as SpeedTime}; use crate::{ExprError, Result}; @@ -426,6 +427,7 @@ pub fn literal_parsing( // evaluation). DataType::List { .. } => return Err(None), DataType::Struct(_) => return Err(None), + DataType::Jsonb => return Err(None), DataType::Bytea => str_to_bytea(s)?.into(), }; Ok(scalar) @@ -454,6 +456,7 @@ macro_rules! for_all_cast_variants { { varchar, decimal, str_parse, false }, { varchar, boolean, str_to_bool, false }, { varchar, bytea, str_to_bytea, false }, + { varchar, jsonb, str_parse, false }, // `str_to_list` requires `target_elem_type` and is handled elsewhere { boolean, varchar, bool_to_varchar, false }, @@ -467,6 +470,7 @@ macro_rules! for_all_cast_variants { { interval, varchar, general_to_text, false }, { date, varchar, general_to_text, false }, { timestamp, varchar, general_to_text, false }, + { jsonb, varchar, |x, w| general_to_text(x, w), false }, { list, varchar, |x, w| general_to_text(x, w), false }, { boolean, int32, try_cast, false }, @@ -620,8 +624,8 @@ pub fn struct_cast( input .fields_ref() .into_iter() - .zip_eq(source_elem_type.fields.iter()) - .zip_eq(target_elem_type.fields.iter()) + .zip_eq_fast(source_elem_type.fields.iter()) + .zip_eq_fast(target_elem_type.fields.iter()) .map(|((datum_ref, source_elem_type), target_elem_type)| { if source_elem_type == target_elem_type { return Ok(datum_ref.map(|scalar_ref| scalar_ref.into_scalar_impl())); diff --git a/src/expr/src/vector_op/cmp.rs b/src/expr/src/vector_op/cmp.rs index 560a47e604c06..f329595b4d8e3 100644 --- a/src/expr/src/vector_op/cmp.rs +++ b/src/expr/src/vector_op/cmp.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/concat_op.rs b/src/expr/src/vector_op/concat_op.rs index 1ac642bdfa8c0..0dc6da2e3f138 100644 --- a/src/expr/src/vector_op/concat_op.rs +++ b/src/expr/src/vector_op/concat_op.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/conjunction.rs b/src/expr/src/vector_op/conjunction.rs index 2239dc8e50837..2cf0f6c5eda33 100644 --- a/src/expr/src/vector_op/conjunction.rs +++ b/src/expr/src/vector_op/conjunction.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/date_trunc.rs b/src/expr/src/vector_op/date_trunc.rs index 72f6749d6e20f..64f462c7838ff 100644 --- a/src/expr/src/vector_op/date_trunc.rs +++ b/src/expr/src/vector_op/date_trunc.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/exp.rs b/src/expr/src/vector_op/exp.rs new file mode 100644 index 0000000000000..bd52273612a0f --- /dev/null +++ b/src/expr/src/vector_op/exp.rs @@ -0,0 +1,27 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use num_traits::Float; +use risingwave_common::types::OrderedF64; + +use crate::{ExprError, Result}; + +pub fn exp_f64(input: OrderedF64) -> Result { + let res = input.exp(); + if res.is_infinite() { + Err(ExprError::NumericOutOfRange) + } else { + Ok(res) + } +} diff --git a/src/expr/src/vector_op/extract.rs b/src/expr/src/vector_op/extract.rs index 35b309c5f3950..c099c76ea6652 100644 --- a/src/expr/src/vector_op/extract.rs +++ b/src/expr/src/vector_op/extract.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/format_type.rs b/src/expr/src/vector_op/format_type.rs new file mode 100644 index 0000000000000..b18b734989cf6 --- /dev/null +++ b/src/expr/src/vector_op/format_type.rs @@ -0,0 +1,27 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_common::types::DataType; + +use crate::Result; + +#[inline(always)] +pub fn format_type(oid: Option, _typemod: Option) -> Result>> { + // since we don't support type modifier, ignore it. + Ok(oid.map(|i| { + DataType::from_oid(i) + .map(|dt| format!("{}", dt).into_boxed_str()) + .unwrap_or("???".into()) + })) +} diff --git a/src/expr/src/vector_op/length.rs b/src/expr/src/vector_op/length.rs index 3b7b298ba4359..dc071b807afb0 100644 --- a/src/expr/src/vector_op/length.rs +++ b/src/expr/src/vector_op/length.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/like.rs b/src/expr/src/vector_op/like.rs index 4fff0c8cd9efd..c6778ab3e1016 100644 --- a/src/expr/src/vector_op/like.rs +++ b/src/expr/src/vector_op/like.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/lower.rs b/src/expr/src/vector_op/lower.rs index 9e22f10999d8b..c895c10c97f11 100644 --- a/src/expr/src/vector_op/lower.rs +++ b/src/expr/src/vector_op/lower.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/ltrim.rs b/src/expr/src/vector_op/ltrim.rs index ab630a811d9b1..c542d9039ffff 100644 --- a/src/expr/src/vector_op/ltrim.rs +++ b/src/expr/src/vector_op/ltrim.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/md5.rs b/src/expr/src/vector_op/md5.rs index f4a6f2dc2060c..780526d893c84 100644 --- a/src/expr/src/vector_op/md5.rs +++ b/src/expr/src/vector_op/md5.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/mod.rs b/src/expr/src/vector_op/mod.rs index ae10b61addaed..d3f07ad7fed9d 100644 --- a/src/expr/src/vector_op/mod.rs +++ b/src/expr/src/vector_op/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,7 +22,9 @@ pub mod cmp; pub mod concat_op; pub mod conjunction; pub mod date_trunc; +pub mod exp; pub mod extract; +pub mod format_type; pub mod length; pub mod like; pub mod lower; diff --git a/src/expr/src/vector_op/overlay.rs b/src/expr/src/vector_op/overlay.rs index 8bd26e34994d7..e83251f7ff362 100644 --- a/src/expr/src/vector_op/overlay.rs +++ b/src/expr/src/vector_op/overlay.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ use std::fmt::Write; -use crate::Result; +use crate::{ExprError, Result}; #[inline(always)] pub fn overlay(s: &str, new_sub_str: &str, start: i32, writer: &mut dyn Write) -> Result<()> { @@ -34,7 +34,11 @@ pub fn overlay_for( // If start is out of range, attach it to the end. // Note that indices are 1-based. - let start = ((start - 1).max(0) as usize).min(s.len()); + let start = (start + .checked_sub(1) + .ok_or(ExprError::NumericOutOfRange)? + .max(0) as usize) + .min(s.len()); let remaining = start + count; diff --git a/src/expr/src/vector_op/position.rs b/src/expr/src/vector_op/position.rs index 4bc2f6949a953..5e2721687f24e 100644 --- a/src/expr/src/vector_op/position.rs +++ b/src/expr/src/vector_op/position.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/repeat.rs b/src/expr/src/vector_op/repeat.rs index baea10483692e..bd5f490de6008 100644 --- a/src/expr/src/vector_op/repeat.rs +++ b/src/expr/src/vector_op/repeat.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/replace.rs b/src/expr/src/vector_op/replace.rs index cad8fd652777f..ef530d4ba02bb 100644 --- a/src/expr/src/vector_op/replace.rs +++ b/src/expr/src/vector_op/replace.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/round.rs b/src/expr/src/vector_op/round.rs index d9d614c79474c..dfb43b73f5c69 100644 --- a/src/expr/src/vector_op/round.rs +++ b/src/expr/src/vector_op/round.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,7 +20,8 @@ pub fn round_digits>(input: Decimal, digits: D) -> Decimal { if digits < 0 { Decimal::zero() } else { - input.round_dp(digits as u32) + // rust_decimal can only handle up to 28 digits of scale + input.round_dp(std::cmp::min(digits as u32, 28)) } } @@ -77,6 +78,8 @@ mod tests { do_test("84818.33333333333333333333333", 4, "84818.3333"); do_test("84818.15", 1, "84818.2"); do_test("21.372736", -1, "0"); + // Maximum of 28 digits + do_test("0", 340, &format!("0.{}", "0".repeat(28))); } #[test] diff --git a/src/expr/src/vector_op/rtrim.rs b/src/expr/src/vector_op/rtrim.rs index 35bc88dc9c531..3257044edeb5d 100644 --- a/src/expr/src/vector_op/rtrim.rs +++ b/src/expr/src/vector_op/rtrim.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/split_part.rs b/src/expr/src/vector_op/split_part.rs index 39797f5c4141a..a3d4a33b6c6a3 100644 --- a/src/expr/src/vector_op/split_part.rs +++ b/src/expr/src/vector_op/split_part.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/substr.rs b/src/expr/src/vector_op/substr.rs index 74cbe69116fba..c3f81cae2c063 100644 --- a/src/expr/src/vector_op/substr.rs +++ b/src/expr/src/vector_op/substr.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ use crate::{bail, Result}; #[inline(always)] pub fn substr_start(s: &str, start: i32, writer: &mut dyn Write) -> Result<()> { - let start = min(max(start - 1, 0) as usize, s.len()); + let start = (start.saturating_sub(1).max(0) as usize).min(s.len()); writer.write_str(&s[start..]).unwrap(); Ok(()) } @@ -36,8 +36,13 @@ pub fn substr_start_for(s: &str, start: i32, count: i32, writer: &mut dyn Write) if count < 0 { bail!("length in substr should be non-negative: {}", count); } - let begin = max(start - 1, 0) as usize; - let end = min(max(start - 1 + count, 0) as usize, s.len()); + let start = start.saturating_sub(1); + // NOTE: we use `s.len()` here as an upper bound. + // This is so it will return an empty slice if it exceeds + // the length of `s`. + // 0 <= begin <= s.len() + let begin = min(max(start, 0) as usize, s.len()); + let end = (start.saturating_add(count).max(0) as usize).min(s.len()); writer.write_str(&s[begin..end]).unwrap(); Ok(()) } diff --git a/src/expr/src/vector_op/tests.rs b/src/expr/src/vector_op/tests.rs index e5559eb099494..e4eca3187f671 100644 --- a/src/expr/src/vector_op/tests.rs +++ b/src/expr/src/vector_op/tests.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/timestamptz.rs b/src/expr/src/vector_op/timestamptz.rs index bed9d2941f059..12a7ce3d74657 100644 --- a/src/expr/src/vector_op/timestamptz.rs +++ b/src/expr/src/vector_op/timestamptz.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -102,7 +102,7 @@ pub fn timestamptz_at_time_zone(input: i64, time_zone: &str) -> Result ChronoPattern { }); ChronoPatternBuilder { tmpl: chrono_tmpl, - items_builder: |tmpl| StrftimeItems::new(tmpl).into_iter().collect::>(), + items_builder: |tmpl| StrftimeItems::new(tmpl).collect::>(), } .build() } diff --git a/src/expr/src/vector_op/to_timestamp.rs b/src/expr/src/vector_op/to_timestamp.rs index 2b8460b56b463..2ccfc66df8921 100644 --- a/src/expr/src/vector_op/to_timestamp.rs +++ b/src/expr/src/vector_op/to_timestamp.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/translate.rs b/src/expr/src/vector_op/translate.rs index 893ca89fc4ef5..b2c44aa43d9f9 100644 --- a/src/expr/src/vector_op/translate.rs +++ b/src/expr/src/vector_op/translate.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -64,10 +64,10 @@ mod tests { "a之初,b本善cb相近,习相远c", ), ( - "奇点无限 Singularity Data", - "Data ", + "奇点无限 RisingWave Labs", + "Labs ", "1234", - "奇点无限Singul2ri3y1232", + "奇点无限Ri4ingW2ve1234", ), ]; diff --git a/src/expr/src/vector_op/trim.rs b/src/expr/src/vector_op/trim.rs index 520c839c3d939..165991fa04618 100644 --- a/src/expr/src/vector_op/trim.rs +++ b/src/expr/src/vector_op/trim.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/trim_characters.rs b/src/expr/src/vector_op/trim_characters.rs index 5d8cbb9d2b4fc..e61f72600b645 100644 --- a/src/expr/src/vector_op/trim_characters.rs +++ b/src/expr/src/vector_op/trim_characters.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/tumble.rs b/src/expr/src/vector_op/tumble.rs index c09e3302b7695..d5381b00f76d3 100644 --- a/src/expr/src/vector_op/tumble.rs +++ b/src/expr/src/vector_op/tumble.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/expr/src/vector_op/upper.rs b/src/expr/src/vector_op/upper.rs index f5170c5c7ead2..9bd54ba13cb4d 100644 --- a/src/expr/src/vector_op/upper.rs +++ b/src/expr/src/vector_op/upper.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/Cargo.toml b/src/frontend/Cargo.toml index da192a1012fe0..620cccdabb91a 100644 --- a/src/frontend/Cargo.toml +++ b/src/frontend/Cargo.toml @@ -8,12 +8,18 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = "1" arc-swap = "1" -assert-impl = "0.1" +async-recursion = "1.0.2" async-trait = "0.1" -byteorder = "1.4" +bk-tree = "0.4.0" bytes = "1" clap = { version = "3", features = ["derive"] } derivative = "2" @@ -27,18 +33,18 @@ itertools = "0.10" maplit = "1" md5 = "0.7.0" num-integer = "0.1" -num-traits = "0.2" parking_lot = "0.12" parse-display = "0.6" paste = "1" +petgraph = "0.6" pgwire = { path = "../utils/pgwire" } pin-project-lite = "0.2" postgres-types = { version = "0.2.4" } prometheus = { version = "0.13", features = ["process"] } -prost = "0.11" rand = "0.8" risingwave_batch = { path = "../batch" } risingwave_common = { path = "../common" } +risingwave_common_proc_macro = { path = "../common/proc_macro" } risingwave_common_service = { path = "../common/common_service" } risingwave_connector = { path = "../connector" } risingwave_expr = { path = "../expr" } @@ -48,7 +54,6 @@ risingwave_source = { path = "../source" } risingwave_sqlparser = { path = "../sqlparser" } risingwave_storage = { path = "../storage" } serde = { version = "1", features = ["derive"] } -serde_derive = "1" serde_json = "1" sha2 = "0.10.2" smallvec = { version = "1.6.1", features = ["serde"] } diff --git a/src/frontend/planner_test/Cargo.toml b/src/frontend/planner_test/Cargo.toml index 4fec1655a2a52..47ab5ffa5975c 100644 --- a/src/frontend/planner_test/Cargo.toml +++ b/src/frontend/planner_test/Cargo.toml @@ -8,12 +8,18 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = "1" +backtrace = "0.3.67" console = "0.15" futures = { version = "0.3", default-features = false, features = ["alloc"] } itertools = "0.10" -libtest-mimic = "0.6" risingwave_frontend = { path = ".." } risingwave_sqlparser = { path = "../../sqlparser" } serde = { version = "1", features = ["derive"] } @@ -34,6 +40,7 @@ walkdir = "2" workspace-hack = { path = "../../workspace-hack" } [dev-dependencies] +libtest-mimic = "0.6" tempfile = "3" [build-dependencies] diff --git a/src/frontend/planner_test/planner_test.toml b/src/frontend/planner_test/planner_test.toml index 517bfb2b655f8..b157e346387c1 100644 --- a/src/frontend/planner_test/planner_test.toml +++ b/src/frontend/planner_test/planner_test.toml @@ -55,6 +55,10 @@ echo "$(tput setaf 2)Diff applied!$(tput sgr 0)" ''' category = "RiseDev - Test" +[tasks.dapt] +alias = "do-apply-planner-test" + + [tasks.run-planner-test] description = "Run planner test" category = "RiseDev - Test" diff --git a/src/frontend/planner_test/src/bin/apply.rs b/src/frontend/planner_test/src/bin/apply.rs index ab7bc6ce49463..a4452232b4b28 100644 --- a/src/frontend/planner_test/src/bin/apply.rs +++ b/src/frontend/planner_test/src/bin/apply.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ use std::path::Path; use std::thread::available_parallelism; use anyhow::{anyhow, Context, Result}; +use backtrace::Backtrace; use console::style; use futures::StreamExt; use risingwave_planner_test::{resolve_testcase_id, TestCase}; @@ -24,13 +25,15 @@ use risingwave_planner_test::{resolve_testcase_id, TestCase}; #[tokio::main] async fn main() -> Result<()> { std::panic::set_hook(Box::new(move |e| { + let backtrace = Backtrace::new(); println!( - "{}{}{}{}{}\n{e}", + "{}{}{}{}{}\n{:?}\n{e}", style("ERROR: ").red().bold(), style("apply-planner-test").yellow(), style(" panicked! Try ").red().bold(), style("run-planner-test --no-fail-fast").yellow(), - style(" to find which test case panicked.").red().bold() + style(" to find which test case panicked.").red().bold(), + backtrace, ); std::process::abort(); })); diff --git a/src/frontend/planner_test/src/lib.rs b/src/frontend/planner_test/src/lib.rs index 50b93908e2112..10efc505c9bcf 100644 --- a/src/frontend/planner_test/src/lib.rs +++ b/src/frontend/planner_test/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,8 +26,8 @@ use std::sync::Arc; use anyhow::{anyhow, bail, Result}; pub use resolve_id::*; use risingwave_frontend::handler::{ - create_index, create_mv, create_schema, create_source, create_table, drop_table, explain, - variable, HandlerArgs, + create_index, create_mv, create_schema, create_source, create_table, create_view, drop_table, + explain, variable, HandlerArgs, }; use risingwave_frontend::session::SessionImpl; use risingwave_frontend::test_utils::{create_proto_file, get_explain_output, LocalFrontend}; @@ -408,6 +408,16 @@ impl TestCase { } => { create_mv::handle_create_mv(handler_args, name, *query, columns).await?; } + Statement::CreateView { + materialized: false, + or_replace: false, + name, + query, + columns, + .. + } => { + create_view::handle_create_view(handler_args, name, columns, *query).await?; + } Statement::Drop(drop_statement) => { drop_table::handle_drop_table( handler_args, @@ -431,7 +441,8 @@ impl TestCase { if result.is_some() { panic!("two queries in one test case"); } - let rsp = explain::handle_explain(handler_args, *statement, options, analyze)?; + let rsp = + explain::handle_explain(handler_args, *statement, options, analyze).await?; let explain_output = get_explain_output(rsp).await; let ret = TestCaseResult { diff --git a/src/frontend/planner_test/src/resolve_id.rs b/src/frontend/planner_test/src/resolve_id.rs index 01ba92b9f6860..d73770cce9b4b 100644 --- a/src/frontend/planner_test/src/resolve_id.rs +++ b/src/frontend/planner_test/src/resolve_id.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/planner_test/tests/planner_test_runner.rs b/src/frontend/planner_test/tests/planner_test_runner.rs index 83896be6d55aa..26a21b9aa20b1 100644 --- a/src/frontend/planner_test/tests/planner_test_runner.rs +++ b/src/frontend/planner_test/tests/planner_test_runner.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/planner_test/tests/testdata/agg.yaml b/src/frontend/planner_test/tests/testdata/agg.yaml index 30c09c81b5a2d..b020868b813da 100644 --- a/src/frontend/planner_test/tests/testdata/agg.yaml +++ b/src/frontend/planner_test/tests/testdata/agg.yaml @@ -20,18 +20,18 @@ select v1, min(v2) + max(v3) * count(v1) as agg from t group by v1; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [t.v1, (min(t.v2) + (max(t.v3) * count(t.v1)))] } + └─BatchProject { exprs: [t.v1, (min(t.v2) + (max(t.v3) * count(t.v1))) as $expr23] } └─BatchHashAgg { group_key: [t.v1], aggs: [min(t.v2), max(t.v3), count(t.v1)] } └─BatchExchange { order: [], dist: HashShard(t.v1) } └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard } batch_local_plan: | - BatchProject { exprs: [t.v1, (min(t.v2) + (max(t.v3) * count(t.v1)))] } + BatchProject { exprs: [t.v1, (min(t.v2) + (max(t.v3) * count(t.v1))) as $expr45] } └─BatchHashAgg { group_key: [t.v1], aggs: [min(t.v2), max(t.v3), count(t.v1)] } └─BatchExchange { order: [], dist: Single } └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [v1, agg], pk_columns: [v1] } - └─StreamProject { exprs: [t.v1, (min(t.v2) + (max(t.v3) * count(t.v1)))] } + └─StreamProject { exprs: [t.v1, (min(t.v2) + (max(t.v3) * count(t.v1))) as $expr69] } └─StreamHashAgg { group_key: [t.v1], aggs: [count, min(t.v2), max(t.v3), count(t.v1)] } └─StreamExchange { dist: HashShard(t.v1) } └─StreamTableScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } @@ -39,23 +39,23 @@ create table t(v1 int, v2 int, v3 int); select min(v1) + max(v2) * count(v3) as agg from t; batch_plan: | - BatchProject { exprs: [(min(min(t.v1)) + (max(max(t.v2)) * sum0(count(t.v3))))] } + BatchProject { exprs: [(min(min(t.v1)) + (max(max(t.v2)) * sum0(count(t.v3)))) as $expr23] } └─BatchSimpleAgg { aggs: [min(min(t.v1)), max(max(t.v2)), sum0(count(t.v3))] } └─BatchExchange { order: [], dist: Single } └─BatchSimpleAgg { aggs: [min(t.v1), max(t.v2), count(t.v3)] } └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard } batch_local_plan: | - BatchProject { exprs: [(min(t.v1) + (max(t.v2) * count(t.v3)))] } + BatchProject { exprs: [(min(t.v1) + (max(t.v2) * count(t.v3))) as $expr45] } └─BatchSimpleAgg { aggs: [min(t.v1), max(t.v2), count(t.v3)] } └─BatchExchange { order: [], dist: Single } └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [agg], pk_columns: [] } - └─StreamProject { exprs: [(min(min(t.v1)) + (max(max(t.v2)) * sum0(count(t.v3))))] } + └─StreamProject { exprs: [(min(min(t.v1)) + (max(max(t.v2)) * sum0(count(t.v3)))) as $expr70] } └─StreamGlobalSimpleAgg { aggs: [count, min(min(t.v1)), max(max(t.v2)), sum0(count(t.v3))] } └─StreamExchange { dist: Single } - └─StreamHashAgg { group_key: [Vnode(t._row_id)], aggs: [count, min(t.v1), max(t.v2), count(t.v3)] } - └─StreamProject { exprs: [t.v1, t.v2, t.v3, t._row_id, Vnode(t._row_id)] } + └─StreamHashAgg { group_key: [$expr68], aggs: [count, min(t.v1), max(t.v2), count(t.v3)] } + └─StreamProject { exprs: [t.v1, t.v2, t.v3, t._row_id, Vnode(t._row_id) as $expr68] } └─StreamTableScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - sql: | create table t(v1 int, v2 int); @@ -70,50 +70,50 @@ select v3, min(v1) * avg(v1+v2) as agg from t group by v3; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [t.v3, (min(t.v1) * (sum((t.v1 + t.v2))::Decimal / count((t.v1 + t.v2))))] } - └─BatchHashAgg { group_key: [t.v3], aggs: [min(t.v1), sum((t.v1 + t.v2)), count((t.v1 + t.v2))] } + └─BatchProject { exprs: [t.v3, (min(t.v1) * (sum($expr45)::Decimal / count($expr45))) as $expr46] } + └─BatchHashAgg { group_key: [t.v3], aggs: [min(t.v1), sum($expr45), count($expr45)] } └─BatchExchange { order: [], dist: HashShard(t.v3) } - └─BatchProject { exprs: [t.v3, t.v1, (t.v1 + t.v2)] } + └─BatchProject { exprs: [t.v3, t.v1, (t.v1 + t.v2) as $expr45] } └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard } batch_local_plan: | - BatchProject { exprs: [t.v3, (min(t.v1) * (sum((t.v1 + t.v2))::Decimal / count((t.v1 + t.v2))))] } - └─BatchHashAgg { group_key: [t.v3], aggs: [min(t.v1), sum((t.v1 + t.v2)), count((t.v1 + t.v2))] } + BatchProject { exprs: [t.v3, (min(t.v1) * (sum($expr89)::Decimal / count($expr89))) as $expr90] } + └─BatchHashAgg { group_key: [t.v3], aggs: [min(t.v1), sum($expr89), count($expr89)] } └─BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [t.v3, t.v1, (t.v1 + t.v2)] } + └─BatchProject { exprs: [t.v3, t.v1, (t.v1 + t.v2) as $expr89] } └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [v3, agg], pk_columns: [v3] } - └─StreamProject { exprs: [t.v3, (min(t.v1) * (sum((t.v1 + t.v2))::Decimal / count((t.v1 + t.v2))))] } - └─StreamHashAgg { group_key: [t.v3], aggs: [count, min(t.v1), sum((t.v1 + t.v2)), count((t.v1 + t.v2))] } + └─StreamProject { exprs: [t.v3, (min(t.v1) * (sum($expr135)::Decimal / count($expr135))) as $expr137] } + └─StreamHashAgg { group_key: [t.v3], aggs: [count, min(t.v1), sum($expr135), count($expr135)] } └─StreamExchange { dist: HashShard(t.v3) } - └─StreamProject { exprs: [t.v3, t.v1, (t.v1 + t.v2), t._row_id] } + └─StreamProject { exprs: [t.v3, t.v1, (t.v1 + t.v2) as $expr135, t._row_id] } └─StreamTableScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - name: test logical_agg with complex group expression sql: | create table t(v1 int, v2 int); select min(v1), sum(v1 + v2) from t group by v1 + v2; logical_plan: | - LogicalProject { exprs: [min(t.v1), sum((t.v1 + t.v2))] } - └─LogicalAgg { group_key: [(t.v1 + t.v2)], aggs: [min(t.v1), sum((t.v1 + t.v2))] } - └─LogicalProject { exprs: [(t.v1 + t.v2), t.v1] } + LogicalProject { exprs: [min(t.v1), sum($expr1)] } + └─LogicalAgg { group_key: [$expr1], aggs: [min(t.v1), sum($expr1)] } + └─LogicalProject { exprs: [(t.v1 + t.v2) as $expr1, t.v1] } └─LogicalScan { table: t, columns: [t.v1, t.v2, t._row_id] } - name: test logical_agg with complex group expression sql: | create table t(v1 int, v2 int, v3 int); select v1, sum(v1 * v2) as sum from t group by (v1 + v2) / v3, v1; logical_plan: | - LogicalProject { exprs: [t.v1, sum((t.v1 * t.v2))] } - └─LogicalAgg { group_key: [((t.v1 + t.v2) / t.v3), t.v1], aggs: [sum((t.v1 * t.v2))] } - └─LogicalProject { exprs: [((t.v1 + t.v2) / t.v3), t.v1, (t.v1 * t.v2)] } + LogicalProject { exprs: [t.v1, sum($expr2)] } + └─LogicalAgg { group_key: [$expr1, t.v1], aggs: [sum($expr2)] } + └─LogicalProject { exprs: [((t.v1 + t.v2) / t.v3) as $expr1, t.v1, (t.v1 * t.v2) as $expr2] } └─LogicalScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id] } - name: test logical_agg with complex group expression sql: | create table t(v1 int, v2 int); select v1 + v2 from t group by v1 + v2; logical_plan: | - LogicalProject { exprs: [(t.v1 + t.v2)] } - └─LogicalAgg { group_key: [(t.v1 + t.v2)], aggs: [] } - └─LogicalProject { exprs: [(t.v1 + t.v2)] } + LogicalProject { exprs: [$expr1] } + └─LogicalAgg { group_key: [$expr1], aggs: [] } + └─LogicalProject { exprs: [(t.v1 + t.v2) as $expr1] } └─LogicalScan { table: t, columns: [t.v1, t.v2, t._row_id] } - name: "test logical_agg with complex group expression \nshould complain about nested agg call \n" sql: | @@ -127,7 +127,7 @@ create table t(v1 int, v2 int); select v1 + v2 from t group by v1, v2; logical_plan: | - LogicalProject { exprs: [(t.v1 + t.v2)] } + LogicalProject { exprs: [(t.v1 + t.v2) as $expr1] } └─LogicalAgg { group_key: [t.v1, t.v2], aggs: [] } └─LogicalProject { exprs: [t.v1, t.v2] } └─LogicalScan { table: t, columns: [t.v1, t.v2, t._row_id] } @@ -139,40 +139,40 @@ create table t(v1 int, v2 int); select count(v1 + v2) as cnt, sum(v1 + v2) as sum from t; batch_plan: | - BatchSimpleAgg { aggs: [sum0(count((t.v1 + t.v2))), sum(sum((t.v1 + t.v2)))] } + BatchSimpleAgg { aggs: [sum0(count($expr23)), sum(sum($expr23))] } └─BatchExchange { order: [], dist: Single } - └─BatchSimpleAgg { aggs: [count((t.v1 + t.v2)), sum((t.v1 + t.v2))] } - └─BatchProject { exprs: [(t.v1 + t.v2)] } + └─BatchSimpleAgg { aggs: [count($expr23), sum($expr23)] } + └─BatchProject { exprs: [(t.v1 + t.v2) as $expr23] } └─BatchScan { table: t, columns: [t.v1, t.v2], distribution: SomeShard } batch_local_plan: | - BatchSimpleAgg { aggs: [count((t.v1 + t.v2)), sum((t.v1 + t.v2))] } + BatchSimpleAgg { aggs: [count($expr45), sum($expr45)] } └─BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [(t.v1 + t.v2)] } + └─BatchProject { exprs: [(t.v1 + t.v2) as $expr45] } └─BatchScan { table: t, columns: [t.v1, t.v2], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [cnt, sum], pk_columns: [] } - └─StreamProject { exprs: [sum0(count((t.v1 + t.v2))), sum(sum((t.v1 + t.v2)))] } - └─StreamGlobalSimpleAgg { aggs: [count, sum0(count((t.v1 + t.v2))), sum(sum((t.v1 + t.v2)))] } + └─StreamProject { exprs: [sum0(count($expr68)), sum(sum($expr68))] } + └─StreamGlobalSimpleAgg { aggs: [count, sum0(count($expr68)), sum(sum($expr68))] } └─StreamExchange { dist: Single } - └─StreamStatelessLocalSimpleAgg { aggs: [count, count((t.v1 + t.v2)), sum((t.v1 + t.v2))] } - └─StreamProject { exprs: [(t.v1 + t.v2), t._row_id] } + └─StreamStatelessLocalSimpleAgg { aggs: [count, count($expr68), sum($expr68)] } + └─StreamProject { exprs: [(t.v1 + t.v2) as $expr68, t._row_id] } └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - sql: | create table t(v1 int, v2 int, v3 int); select v1, sum(v2 + v3) / count(v2 + v3) + max(v1) as agg from t group by v1; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [t.v1, ((sum((t.v2 + t.v3)) / count((t.v2 + t.v3))) + max(t.v1))] } - └─BatchHashAgg { group_key: [t.v1], aggs: [sum((t.v2 + t.v3)), count((t.v2 + t.v3)), max(t.v1)] } + └─BatchProject { exprs: [t.v1, ((sum($expr45) / count($expr45)) + max(t.v1)) as $expr46] } + └─BatchHashAgg { group_key: [t.v1], aggs: [sum($expr45), count($expr45), max(t.v1)] } └─BatchExchange { order: [], dist: HashShard(t.v1) } - └─BatchProject { exprs: [t.v1, (t.v2 + t.v3)] } + └─BatchProject { exprs: [t.v1, (t.v2 + t.v3) as $expr45] } └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [v1, agg], pk_columns: [v1] } - └─StreamProject { exprs: [t.v1, ((sum((t.v2 + t.v3)) / count((t.v2 + t.v3))) + max(t.v1))] } - └─StreamHashAgg { group_key: [t.v1], aggs: [count, sum((t.v2 + t.v3)), count((t.v2 + t.v3)), max(t.v1)] } + └─StreamProject { exprs: [t.v1, ((sum($expr91) / count($expr91)) + max(t.v1)) as $expr93] } + └─StreamHashAgg { group_key: [t.v1], aggs: [count, sum($expr91), count($expr91), max(t.v1)] } └─StreamExchange { dist: HashShard(t.v1) } - └─StreamProject { exprs: [t.v1, (t.v2 + t.v3), t._row_id] } + └─StreamProject { exprs: [t.v1, (t.v2 + t.v3) as $expr91, t._row_id] } └─StreamTableScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - sql: | create table t (v1 real); @@ -372,16 +372,16 @@ create table t (v1 int, v2 int, v3 int); select distinct on(v1) v2 + v3 from t order by v1; logical_plan: | - LogicalProject { exprs: [(t.v2 + t.v3)] } + LogicalProject { exprs: [$expr1] } └─LogicalTopN { order: "[t.v1 ASC]", limit: 1, offset: 0, group_key: [1] } - └─LogicalProject { exprs: [(t.v2 + t.v3), t.v1] } + └─LogicalProject { exprs: [(t.v2 + t.v3) as $expr1, t.v1] } └─LogicalScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id] } batch_plan: | - BatchProject { exprs: [(t.v2 + t.v3)] } + BatchProject { exprs: [$expr23] } └─BatchExchange { order: [t.v1 ASC], dist: Single } └─BatchGroupTopN { order: "[t.v1 ASC]", limit: 1, offset: 0, group_key: [1] } └─BatchExchange { order: [], dist: HashShard(t.v1) } - └─BatchProject { exprs: [(t.v2 + t.v3), t.v1] } + └─BatchProject { exprs: [(t.v2 + t.v3) as $expr23, t.v1] } └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard } - name: arguments out-of-order sql: | @@ -405,27 +405,27 @@ create table t(v1 int, v2 int, v3 int); select min(v1) + max(v3) * count(v2) as agg from t; logical_plan: | - LogicalProject { exprs: [(min(t.v1) + (max(t.v3) * count(t.v2)))] } + LogicalProject { exprs: [(min(t.v1) + (max(t.v3) * count(t.v2))) as $expr1] } └─LogicalAgg { aggs: [min(t.v1), max(t.v3), count(t.v2)] } └─LogicalProject { exprs: [t.v1, t.v3, t.v2] } └─LogicalScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id] } optimized_logical_plan: | - LogicalProject { exprs: [(min(t.v1) + (max(t.v3) * count(t.v2)))] } + LogicalProject { exprs: [(min(t.v1) + (max(t.v3) * count(t.v2))) as $expr19] } └─LogicalAgg { aggs: [min(t.v1), max(t.v3), count(t.v2)] } └─LogicalScan { table: t, columns: [t.v1, t.v2, t.v3] } batch_plan: | - BatchProject { exprs: [(min(min(t.v1)) + (max(max(t.v3)) * sum0(count(t.v2))))] } + BatchProject { exprs: [(min(min(t.v1)) + (max(max(t.v3)) * sum0(count(t.v2)))) as $expr41] } └─BatchSimpleAgg { aggs: [min(min(t.v1)), max(max(t.v3)), sum0(count(t.v2))] } └─BatchExchange { order: [], dist: Single } └─BatchSimpleAgg { aggs: [min(t.v1), max(t.v3), count(t.v2)] } └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [agg], pk_columns: [] } - └─StreamProject { exprs: [(min(min(t.v1)) + (max(max(t.v3)) * sum0(count(t.v2))))] } + └─StreamProject { exprs: [(min(min(t.v1)) + (max(max(t.v3)) * sum0(count(t.v2)))) as $expr66] } └─StreamGlobalSimpleAgg { aggs: [count, min(min(t.v1)), max(max(t.v3)), sum0(count(t.v2))] } └─StreamExchange { dist: Single } - └─StreamHashAgg { group_key: [Vnode(t._row_id)], aggs: [count, min(t.v1), max(t.v3), count(t.v2)] } - └─StreamProject { exprs: [t.v1, t.v2, t.v3, t._row_id, Vnode(t._row_id)] } + └─StreamHashAgg { group_key: [$expr64], aggs: [count, min(t.v1), max(t.v3), count(t.v2)] } + └─StreamProject { exprs: [t.v1, t.v2, t.v3, t._row_id, Vnode(t._row_id) as $expr64] } └─StreamTableScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - name: dup group key sql: | @@ -588,52 +588,52 @@ create table t(a int, b int); select sum(a * b) filter (where a * b > 0) as sab from t; logical_plan: | - LogicalProject { exprs: [sum((t.a * t.b)) filter(((t.a * t.b) > 0:Int32))] } - └─LogicalAgg { aggs: [sum((t.a * t.b)) filter(((t.a * t.b) > 0:Int32))] } - └─LogicalProject { exprs: [t.a, t.b, (t.a * t.b)] } + LogicalProject { exprs: [sum($expr1) filter(((t.a * t.b) > 0:Int32))] } + └─LogicalAgg { aggs: [sum($expr1) filter(((t.a * t.b) > 0:Int32))] } + └─LogicalProject { exprs: [t.a, t.b, (t.a * t.b) as $expr1] } └─LogicalScan { table: t, columns: [t.a, t.b, t._row_id] } optimized_logical_plan: | - LogicalAgg { aggs: [sum((t.a * t.b)) filter(((t.a * t.b) > 0:Int32))] } - └─LogicalProject { exprs: [t.a, t.b, (t.a * t.b)] } + LogicalAgg { aggs: [sum($expr19) filter(((t.a * t.b) > 0:Int32))] } + └─LogicalProject { exprs: [t.a, t.b, (t.a * t.b) as $expr19] } └─LogicalScan { table: t, columns: [t.a, t.b] } - name: complex filter clause sql: | create table t(a int, b int); select max(a * b) FILTER (WHERE a < b AND a + b < 100 AND a * b != a + b - 1) AS sab from t; logical_plan: | - LogicalProject { exprs: [max((t.a * t.b)) filter((t.a < t.b) AND ((t.a + t.b) < 100:Int32) AND ((t.a * t.b) <> ((t.a + t.b) - 1:Int32)))] } - └─LogicalAgg { aggs: [max((t.a * t.b)) filter((t.a < t.b) AND ((t.a + t.b) < 100:Int32) AND ((t.a * t.b) <> ((t.a + t.b) - 1:Int32)))] } - └─LogicalProject { exprs: [t.a, t.b, (t.a * t.b)] } + LogicalProject { exprs: [max($expr1) filter((t.a < t.b) AND ((t.a + t.b) < 100:Int32) AND ((t.a * t.b) <> ((t.a + t.b) - 1:Int32)))] } + └─LogicalAgg { aggs: [max($expr1) filter((t.a < t.b) AND ((t.a + t.b) < 100:Int32) AND ((t.a * t.b) <> ((t.a + t.b) - 1:Int32)))] } + └─LogicalProject { exprs: [t.a, t.b, (t.a * t.b) as $expr1] } └─LogicalScan { table: t, columns: [t.a, t.b, t._row_id] } optimized_logical_plan: | - LogicalAgg { aggs: [max((t.a * t.b)) filter((t.a < t.b) AND ((t.a + t.b) < 100:Int32) AND ((t.a * t.b) <> ((t.a + t.b) - 1:Int32)))] } - └─LogicalProject { exprs: [t.a, t.b, (t.a * t.b)] } + LogicalAgg { aggs: [max($expr19) filter((t.a < t.b) AND ((t.a + t.b) < 100:Int32) AND ((t.a * t.b) <> ((t.a + t.b) - 1:Int32)))] } + └─LogicalProject { exprs: [t.a, t.b, (t.a * t.b) as $expr19] } └─LogicalScan { table: t, columns: [t.a, t.b] } stream_plan: | StreamMaterialize { columns: [sab], pk_columns: [] } - └─StreamProject { exprs: [max(max((t.a * t.b)) filter((t.a < t.b) AND ((t.a + t.b) < 100:Int32) AND ((t.a * t.b) <> ((t.a + t.b) - 1:Int32))))] } - └─StreamGlobalSimpleAgg { aggs: [count, max(max((t.a * t.b)) filter((t.a < t.b) AND ((t.a + t.b) < 100:Int32) AND ((t.a * t.b) <> ((t.a + t.b) - 1:Int32))))] } + └─StreamProject { exprs: [max(max($expr42) filter((t.a < t.b) AND ((t.a + t.b) < 100:Int32) AND ((t.a * t.b) <> ((t.a + t.b) - 1:Int32))))] } + └─StreamGlobalSimpleAgg { aggs: [count, max(max($expr42) filter((t.a < t.b) AND ((t.a + t.b) < 100:Int32) AND ((t.a * t.b) <> ((t.a + t.b) - 1:Int32))))] } └─StreamExchange { dist: Single } - └─StreamHashAgg { group_key: [Vnode(t._row_id)], aggs: [count, max((t.a * t.b)) filter((t.a < t.b) AND ((t.a + t.b) < 100:Int32) AND ((t.a * t.b) <> ((t.a + t.b) - 1:Int32)))] } - └─StreamProject { exprs: [t.a, t.b, (t.a * t.b), t._row_id, Vnode(t._row_id)] } - └─StreamProject { exprs: [t.a, t.b, (t.a * t.b), t._row_id] } + └─StreamHashAgg { group_key: [$expr43], aggs: [count, max($expr42) filter((t.a < t.b) AND ((t.a + t.b) < 100:Int32) AND ((t.a * t.b) <> ((t.a + t.b) - 1:Int32)))] } + └─StreamProject { exprs: [t.a, t.b, $expr42, t._row_id, Vnode(t._row_id) as $expr43] } + └─StreamProject { exprs: [t.a, t.b, (t.a * t.b) as $expr42, t._row_id] } └─StreamTableScan { table: t, columns: [t.a, t.b, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - name: avg filter clause + group by sql: | create table t(a int, b int); select avg(a) FILTER (WHERE a > b) AS avga from t group by b ; logical_plan: | - LogicalProject { exprs: [(sum(t.a) filter((t.a > t.b))::Decimal / count(t.a) filter((t.a > t.b)))] } + LogicalProject { exprs: [(sum(t.a) filter((t.a > t.b))::Decimal / count(t.a) filter((t.a > t.b))) as $expr1] } └─LogicalAgg { group_key: [t.b], aggs: [sum(t.a) filter((t.a > t.b)), count(t.a) filter((t.a > t.b))] } └─LogicalProject { exprs: [t.b, t.a] } └─LogicalScan { table: t, columns: [t.a, t.b, t._row_id] } optimized_logical_plan: | - LogicalProject { exprs: [(sum(t.a) filter((t.a > t.b))::Decimal / count(t.a) filter((t.a > t.b)))] } + LogicalProject { exprs: [(sum(t.a) filter((t.a > t.b))::Decimal / count(t.a) filter((t.a > t.b))) as $expr20] } └─LogicalAgg { group_key: [t.b], aggs: [sum(t.a) filter((t.a > t.b)), count(t.a) filter((t.a > t.b))] } └─LogicalScan { table: t, columns: [t.a, t.b] } stream_plan: | StreamMaterialize { columns: [avga, t.b(hidden)], pk_columns: [t.b] } - └─StreamProject { exprs: [(sum(t.a) filter((t.a > t.b))::Decimal / count(t.a) filter((t.a > t.b))), t.b] } + └─StreamProject { exprs: [(sum(t.a) filter((t.a > t.b))::Decimal / count(t.a) filter((t.a > t.b))) as $expr45, t.b] } └─StreamHashAgg { group_key: [t.b], aggs: [count, sum(t.a) filter((t.a > t.b)), count(t.a) filter((t.a > t.b))] } └─StreamExchange { dist: HashShard(t.b) } └─StreamTableScan { table: t, columns: [t.a, t.b, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } @@ -696,6 +696,63 @@ └─StreamExchange { dist: Single } └─StreamStatelessLocalSimpleAgg { aggs: [count, sum(t.v2) filter((t.v2 < 5:Int32))] } └─StreamTableScan { table: t, columns: [t.v2, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } +- name: force two phase aggregation + sql: | + SET QUERY_MODE TO DISTRIBUTED; + SET RW_FORCE_TWO_PHASE_AGG=true; + create table t(v1 int, v2 smallint, v3 varchar); + select min(v3), sum(v1) from t group by v1, v3, v2; + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchProject { exprs: [min(min(t.v3)), sum(sum(t.v1))] } + └─BatchHashAgg { group_key: [t.v1, t.v3, t.v2], aggs: [min(min(t.v3)), sum(sum(t.v1))] } + └─BatchExchange { order: [], dist: HashShard(t.v1, t.v3, t.v2) } + └─BatchHashAgg { group_key: [t.v1, t.v3, t.v2], aggs: [min(t.v3), sum(t.v1)] } + └─BatchExchange { order: [], dist: HashShard(t.v1, t.v2, t.v3) } + └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard } + stream_plan: | + StreamMaterialize { columns: [min, sum, t.v1(hidden), t.v3(hidden), t.v2(hidden)], pk_columns: [t.v1, t.v3, t.v2] } + └─StreamProject { exprs: [min(min(t.v3)), sum(sum(t.v1)), t.v1, t.v3, t.v2] } + └─StreamHashAgg { group_key: [t.v1, t.v3, t.v2], aggs: [count, min(min(t.v3)), sum(sum(t.v1))] } + └─StreamExchange { dist: HashShard(t.v1, t.v3, t.v2) } + └─StreamHashAgg { group_key: [t.v1, t.v3, t.v2, $expr1], aggs: [count, min(t.v3), sum(t.v1)] } + └─StreamProject { exprs: [t.v1, t.v2, t.v3, t._row_id, Vnode(t._row_id) as $expr1] } + └─StreamTableScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } +- name: enable two phase aggregation + sql: | + SET QUERY_MODE TO DISTRIBUTED; + SET RW_ENABLE_TWO_PHASE_AGG=true; + create table t(v1 int, v2 int); + select min(v1), sum(v2) from t; + batch_plan: | + BatchSimpleAgg { aggs: [min(min(t.v1)), sum(sum(t.v2))] } + └─BatchExchange { order: [], dist: Single } + └─BatchSimpleAgg { aggs: [min(t.v1), sum(t.v2)] } + └─BatchScan { table: t, columns: [t.v1, t.v2], distribution: SomeShard } + stream_plan: | + StreamMaterialize { columns: [min, sum], pk_columns: [] } + └─StreamProject { exprs: [min(min(t.v1)), sum(sum(t.v2))] } + └─StreamGlobalSimpleAgg { aggs: [count, min(min(t.v1)), sum(sum(t.v2))] } + └─StreamExchange { dist: Single } + └─StreamHashAgg { group_key: [$expr1], aggs: [count, min(t.v1), sum(t.v2)] } + └─StreamProject { exprs: [t.v1, t.v2, t._row_id, Vnode(t._row_id) as $expr1] } + └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } +- name: disable two phase aggregation + sql: | + SET QUERY_MODE TO DISTRIBUTED; + SET RW_ENABLE_TWO_PHASE_AGG=false; + create table t(v1 int, v2 int); + select min(v1), sum(v2) from t; + batch_plan: | + BatchSimpleAgg { aggs: [min(t.v1), sum(t.v2)] } + └─BatchExchange { order: [], dist: Single } + └─BatchScan { table: t, columns: [t.v1, t.v2], distribution: SomeShard } + stream_plan: | + StreamMaterialize { columns: [min, sum], pk_columns: [] } + └─StreamProject { exprs: [min(t.v1), sum(t.v2)] } + └─StreamGlobalSimpleAgg { aggs: [count, min(t.v1), sum(t.v2)] } + └─StreamExchange { dist: Single } + └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - name: only distinct agg sql: | create table t(a int, b int, c int); @@ -705,7 +762,7 @@ └─LogicalAgg { group_key: [t.a, t.b, t.c, flag], aggs: [count filter((t.c < 100:Int32))] } └─LogicalExpand { column_subsets: [[t.a, t.b], [t.a, t.c]] } └─LogicalScan { table: t, columns: [t.a, t.b, t.c] } -- name: distinct agg and non-disintct agg +- name: single distinct agg and non-disintct agg sql: | create table t(a int, b int, c int); select a, count(distinct b) as distinct_b_num, sum(c) as sum_c from t group by a; @@ -713,6 +770,13 @@ LogicalAgg { group_key: [t.a], aggs: [count(t.b), sum(sum(t.c))] } └─LogicalAgg { group_key: [t.a, t.b], aggs: [sum(t.c)] } └─LogicalScan { table: t, columns: [t.a, t.b, t.c] } + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchHashAgg { group_key: [t.a], aggs: [count(t.b), sum(sum(t.c))] } + └─BatchExchange { order: [], dist: HashShard(t.a) } + └─BatchHashAgg { group_key: [t.a, t.b], aggs: [sum(t.c)] } + └─BatchExchange { order: [], dist: HashShard(t.a, t.b) } + └─BatchScan { table: t, columns: [t.a, t.b, t.c], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [a, distinct_b_num, sum_c], pk_columns: [a] } └─StreamProject { exprs: [t.a, count(t.b), sum(sum(t.c))] } @@ -722,6 +786,33 @@ └─StreamHashAgg { group_key: [t.a, t.b], aggs: [count, sum(t.c)] } └─StreamExchange { dist: HashShard(t.a, t.b) } └─StreamTableScan { table: t, columns: [t.a, t.b, t.c, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } +- name: distinct agg and non-disintct agg with intersected argument + sql: | + create table t(a int, b int, c int); + select a, count(distinct b) as distinct_b_num, count(distinct c) as distinct_c_sum, sum(c) as sum_c from t group by a; + optimized_logical_plan: | + LogicalAgg { group_key: [t.a], aggs: [count(t.b) filter((flag = 1:Int64)), count(t.c) filter((flag = 0:Int64)), sum(sum(t.c)) filter((flag = 0:Int64))] } + └─LogicalAgg { group_key: [t.a, t.b, t.c, flag], aggs: [sum(t.c)] } + └─LogicalExpand { column_subsets: [[t.a, t.c], [t.a, t.b]] } + └─LogicalScan { table: t, columns: [t.a, t.b, t.c] } + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchHashAgg { group_key: [t.a], aggs: [count(t.b) filter((flag = 1:Int64)), count(t.c) filter((flag = 0:Int64)), sum(sum(t.c)) filter((flag = 0:Int64))] } + └─BatchExchange { order: [], dist: HashShard(t.a) } + └─BatchHashAgg { group_key: [t.a, t.b, t.c, flag], aggs: [sum(t.c)] } + └─BatchExchange { order: [], dist: HashShard(t.a, t.b, t.c, flag) } + └─BatchExpand { column_subsets: [[t.a, t.c], [t.a, t.b]] } + └─BatchScan { table: t, columns: [t.a, t.b, t.c], distribution: SomeShard } + stream_plan: | + StreamMaterialize { columns: [a, distinct_b_num, distinct_c_sum, sum_c], pk_columns: [a] } + └─StreamProject { exprs: [t.a, count(t.b) filter((flag = 1:Int64)), count(t.c) filter((flag = 0:Int64)), sum(sum(t.c)) filter((flag = 0:Int64))] } + └─StreamHashAgg { group_key: [t.a], aggs: [count, count(t.b) filter((flag = 1:Int64)), count(t.c) filter((flag = 0:Int64)), sum(sum(t.c)) filter((flag = 0:Int64))] } + └─StreamExchange { dist: HashShard(t.a) } + └─StreamProject { exprs: [t.a, t.b, t.c, flag, sum(t.c)] } + └─StreamHashAgg { group_key: [t.a, t.b, t.c, flag], aggs: [count, sum(t.c)] } + └─StreamExchange { dist: HashShard(t.a, t.b, t.c, flag) } + └─StreamExpand { column_subsets: [[t.a, t.c], [t.a, t.b]] } + └─StreamTableScan { table: t, columns: [t.a, t.b, t.c, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - name: distinct agg with filter sql: | create table t(a int, b int, c int); @@ -730,6 +821,22 @@ LogicalAgg { group_key: [t.a], aggs: [count(t.b) filter((count filter((t.b < 100:Int32)) > 0:Int64)), sum(sum(t.c))] } └─LogicalAgg { group_key: [t.a, t.b], aggs: [count filter((t.b < 100:Int32)), sum(t.c)] } └─LogicalScan { table: t, columns: [t.a, t.b, t.c] } + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchHashAgg { group_key: [t.a], aggs: [count(t.b) filter((count filter((t.b < 100:Int32)) > 0:Int64)), sum(sum(t.c))] } + └─BatchExchange { order: [], dist: HashShard(t.a) } + └─BatchHashAgg { group_key: [t.a, t.b], aggs: [count filter((t.b < 100:Int32)), sum(t.c)] } + └─BatchExchange { order: [], dist: HashShard(t.a, t.b) } + └─BatchScan { table: t, columns: [t.a, t.b, t.c], distribution: SomeShard } + stream_plan: | + StreamMaterialize { columns: [a, count, sum], pk_columns: [a] } + └─StreamProject { exprs: [t.a, count(t.b) filter((count filter((t.b < 100:Int32)) > 0:Int64)), sum(sum(t.c))] } + └─StreamHashAgg { group_key: [t.a], aggs: [count, count(t.b) filter((count filter((t.b < 100:Int32)) > 0:Int64)), sum(sum(t.c))] } + └─StreamExchange { dist: HashShard(t.a) } + └─StreamProject { exprs: [t.a, t.b, count filter((t.b < 100:Int32)), sum(t.c)] } + └─StreamHashAgg { group_key: [t.a, t.b], aggs: [count, count filter((t.b < 100:Int32)), sum(t.c)] } + └─StreamExchange { dist: HashShard(t.a, t.b) } + └─StreamTableScan { table: t, columns: [t.a, t.b, t.c, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - name: non-distinct agg with filter sql: | create table t(a int, b int, c int); @@ -743,21 +850,21 @@ create table t(a varchar, b int); select sum(length(a) * b order by length(a) + b) filter (where b < 100 AND b * 2 > 10) as s1 from t; logical_plan: | - LogicalProject { exprs: [sum((Length(t.a) * t.b)) filter((t.b < 100:Int32) AND ((t.b * 2:Int32) > 10:Int32))] } - └─LogicalAgg { aggs: [sum((Length(t.a) * t.b)) filter((t.b < 100:Int32) AND ((t.b * 2:Int32) > 10:Int32))] } - └─LogicalProject { exprs: [t.b, (Length(t.a) * t.b)] } + LogicalProject { exprs: [sum($expr1) filter((t.b < 100:Int32) AND ((t.b * 2:Int32) > 10:Int32))] } + └─LogicalAgg { aggs: [sum($expr1) filter((t.b < 100:Int32) AND ((t.b * 2:Int32) > 10:Int32))] } + └─LogicalProject { exprs: [t.b, (Length(t.a) * t.b) as $expr1] } └─LogicalScan { table: t, columns: [t.a, t.b, t._row_id] } optimized_logical_plan: | - LogicalAgg { aggs: [sum((Length(t.a) * t.b)) filter((t.b < 100:Int32) AND ((t.b * 2:Int32) > 10:Int32))] } - └─LogicalProject { exprs: [t.b, (Length(t.a) * t.b)] } + LogicalAgg { aggs: [sum($expr19) filter((t.b < 100:Int32) AND ((t.b * 2:Int32) > 10:Int32))] } + └─LogicalProject { exprs: [t.b, (Length(t.a) * t.b) as $expr19] } └─LogicalScan { table: t, columns: [t.a, t.b] } stream_plan: | StreamMaterialize { columns: [s1], pk_columns: [] } - └─StreamProject { exprs: [sum(sum((Length(t.a) * t.b)) filter((t.b < 100:Int32) AND ((t.b * 2:Int32) > 10:Int32)))] } - └─StreamGlobalSimpleAgg { aggs: [count, sum(sum((Length(t.a) * t.b)) filter((t.b < 100:Int32) AND ((t.b * 2:Int32) > 10:Int32)))] } + └─StreamProject { exprs: [sum(sum($expr42) filter((t.b < 100:Int32) AND ((t.b * 2:Int32) > 10:Int32)))] } + └─StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr42) filter((t.b < 100:Int32) AND ((t.b * 2:Int32) > 10:Int32)))] } └─StreamExchange { dist: Single } - └─StreamStatelessLocalSimpleAgg { aggs: [count, sum((Length(t.a) * t.b)) filter((t.b < 100:Int32) AND ((t.b * 2:Int32) > 10:Int32))] } - └─StreamProject { exprs: [t.b, (Length(t.a) * t.b), t._row_id] } + └─StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr42) filter((t.b < 100:Int32) AND ((t.b * 2:Int32) > 10:Int32))] } + └─StreamProject { exprs: [t.b, (Length(t.a) * t.b) as $expr42, t._row_id] } └─StreamTableScan { table: t, columns: [t.a, t.b, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - sql: | create table t(x int, y varchar); @@ -966,3 +1073,33 @@ └─LogicalAgg { group_key: [t.v2], aggs: [min(t.v1)] } └─LogicalProject { exprs: [t.v2, t.v1] } └─LogicalScan { table: t, columns: [t.v1, t.v2, t._row_id] } +- name: stddev_samp + sql: | + create table t (v1 int); + select stddev_samp(v1), stddev_pop(v1) from t; + logical_plan: | + LogicalProject { exprs: [Case((count(t.v1) <= 1:Int64), null:Decimal::Float64, Pow(((sum($expr1)::Decimal - ((sum(t.v1)::Decimal * sum(t.v1)::Decimal) / count(t.v1))) / (count(t.v1) - 1:Int64))::Float64, 0.5:Float64)) as $expr2, Pow(((sum($expr1)::Decimal - ((sum(t.v1)::Decimal * sum(t.v1)::Decimal) / count(t.v1))) / count(t.v1))::Float64, 0.5:Float64) as $expr3] } + └─LogicalAgg { aggs: [sum($expr1), sum(t.v1), count(t.v1), sum($expr1), sum(t.v1), count(t.v1)] } + └─LogicalProject { exprs: [t.v1, (t.v1 * t.v1) as $expr1] } + └─LogicalScan { table: t, columns: [t.v1, t._row_id] } + batch_plan: | + BatchProject { exprs: [Case((sum0(count(t.v1)) <= 1:Int64), null:Decimal::Float64, Pow(((sum(sum($expr67))::Decimal - ((sum(sum(t.v1))::Decimal * sum(sum(t.v1))::Decimal) / sum0(count(t.v1)))) / (sum0(count(t.v1)) - 1:Int64))::Float64, 0.5:Float64)) as $expr68, Pow(((sum(sum($expr67))::Decimal - ((sum(sum(t.v1))::Decimal * sum(sum(t.v1))::Decimal) / sum0(count(t.v1)))) / sum0(count(t.v1)))::Float64, 0.5:Float64) as $expr69] } + └─BatchSimpleAgg { aggs: [sum(sum($expr67)), sum(sum(t.v1)), sum0(count(t.v1)), sum(sum($expr67)), sum(sum(t.v1)), sum0(count(t.v1))] } + └─BatchExchange { order: [], dist: Single } + └─BatchSimpleAgg { aggs: [sum($expr67), sum(t.v1), count(t.v1), sum($expr67), sum(t.v1), count(t.v1)] } + └─BatchProject { exprs: [t.v1, (t.v1 * t.v1) as $expr67] } + └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } + batch_local_plan: | + BatchProject { exprs: [Case((count(t.v1) <= 1:Int64), null:Decimal::Float64, Pow(((sum($expr133)::Decimal - ((sum(t.v1)::Decimal * sum(t.v1)::Decimal) / count(t.v1))) / (count(t.v1) - 1:Int64))::Float64, 0.5:Float64)) as $expr134, Pow(((sum($expr133)::Decimal - ((sum(t.v1)::Decimal * sum(t.v1)::Decimal) / count(t.v1))) / count(t.v1))::Float64, 0.5:Float64) as $expr135] } + └─BatchSimpleAgg { aggs: [sum($expr133), sum(t.v1), count(t.v1), sum($expr133), sum(t.v1), count(t.v1)] } + └─BatchExchange { order: [], dist: Single } + └─BatchProject { exprs: [t.v1, (t.v1 * t.v1) as $expr133] } + └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } + stream_plan: | + StreamMaterialize { columns: [stddev_samp, stddev_pop], pk_columns: [] } + └─StreamProject { exprs: [Case((sum0(count(t.v1)) <= 1:Int64), null:Decimal::Float64, Pow(((sum(sum($expr202))::Decimal - ((sum(sum(t.v1))::Decimal * sum(sum(t.v1))::Decimal) / sum0(count(t.v1)))) / (sum0(count(t.v1)) - 1:Int64))::Float64, 0.5:Float64)) as $expr205, Pow(((sum(sum($expr202))::Decimal - ((sum(sum(t.v1))::Decimal * sum(sum(t.v1))::Decimal) / sum0(count(t.v1)))) / sum0(count(t.v1)))::Float64, 0.5:Float64) as $expr206] } + └─StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr202)), sum(sum(t.v1)), sum0(count(t.v1)), sum(sum($expr202)), sum(sum(t.v1)), sum0(count(t.v1))] } + └─StreamExchange { dist: Single } + └─StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr202), sum(t.v1), count(t.v1), sum($expr202), sum(t.v1), count(t.v1)] } + └─StreamProject { exprs: [t.v1, (t.v1 * t.v1) as $expr202, t._row_id] } + └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/array.yaml b/src/frontend/planner_test/tests/testdata/array.yaml index b436cbdff2d4e..422542ced1e2d 100644 --- a/src/frontend/planner_test/tests/testdata/array.yaml +++ b/src/frontend/planner_test/tests/testdata/array.yaml @@ -15,16 +15,16 @@ create table t (v1 int); select (ARRAY[1, v1]) from t; logical_plan: | - LogicalProject { exprs: [Array(1:Int32, t.v1)] } + LogicalProject { exprs: [Array(1:Int32, t.v1) as $expr1] } └─LogicalScan { table: t, columns: [t.v1, t._row_id] } batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [Array(1:Int32, t.v1)] } + └─BatchProject { exprs: [Array(1:Int32, t.v1) as $expr23] } └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } - sql: | select ARRAY[null]; logical_plan: | - LogicalProject { exprs: [Array(null:Varchar)] } + LogicalProject { exprs: [Array(null:Varchar) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - sql: | select ARRAY[]; @@ -34,12 +34,12 @@ - sql: | select ARRAY[]::int[]; logical_plan: | - LogicalProject { exprs: [Array::List { datatype: Int32 }] } + LogicalProject { exprs: [Array::List { datatype: Int32 } as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - sql: | select ARRAY[]::int[][]; logical_plan: | - LogicalProject { exprs: [Array::List { datatype: List { datatype: Int32 } }] } + LogicalProject { exprs: [Array::List { datatype: List { datatype: Int32 } } as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - sql: | select ARRAY[]::int; @@ -49,27 +49,24 @@ - sql: | select array_cat(array[66], array[123]); logical_plan: | - LogicalProject { exprs: [ArrayCat(Array(66:Int32), Array(123:Int32))] } + LogicalProject { exprs: [ArrayCat(Array(66:Int32), Array(123:Int32)) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } batch_plan: | - BatchProject { exprs: [ArrayCat(Array(66:Int32), Array(123:Int32))] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[ArrayCat(Array(66:Int32), Array(123:Int32))]] } - sql: | select array_cat(array[array[66]], array[233]); logical_plan: | - LogicalProject { exprs: [ArrayCat(Array(Array(66:Int32)), Array(233:Int32))] } + LogicalProject { exprs: [ArrayCat(Array(Array(66:Int32)), Array(233:Int32)) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } batch_plan: | - BatchProject { exprs: [ArrayCat(Array(Array(66:Int32)), Array(233:Int32))] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[ArrayCat(Array(Array(66:Int32)), Array(233:Int32))]] } - sql: | select array_cat(array[233], array[array[66]]); logical_plan: | - LogicalProject { exprs: [ArrayCat(Array(233:Int32), Array(Array(66:Int32)))] } + LogicalProject { exprs: [ArrayCat(Array(233:Int32), Array(Array(66:Int32))) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } batch_plan: | - BatchProject { exprs: [ArrayCat(Array(233:Int32), Array(Array(66:Int32)))] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[ArrayCat(Array(233:Int32), Array(Array(66:Int32)))]] } - sql: | select array_cat(array[233], array[array[array[66]]]); binder_error: 'Bind error: unable to find least restrictive type between integer[] and integer[][][]' @@ -82,11 +79,10 @@ - sql: | select array_append(array[66], 123); logical_plan: | - LogicalProject { exprs: [ArrayAppend(Array(66:Int32), 123:Int32)] } + LogicalProject { exprs: [ArrayAppend(Array(66:Int32), 123:Int32) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } batch_plan: | - BatchProject { exprs: [ArrayAppend(Array(66:Int32), 123:Int32)] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[ArrayAppend(Array(66:Int32), 123:Int32)]] } - sql: | select array_append(123, 234); binder_error: 'Bind error: Cannot append integer to integer' @@ -94,23 +90,22 @@ /* Combining multidimensional arrays as such is supported beyond what PostgresSQL allows */ select array_append(array[array[66]], array[233]); logical_plan: | - LogicalProject { exprs: [ArrayAppend(Array(Array(66:Int32)), Array(233:Int32))] } + LogicalProject { exprs: [ArrayAppend(Array(Array(66:Int32)), Array(233:Int32)) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - sql: | select array_prepend(123, array[66]); logical_plan: | - LogicalProject { exprs: [ArrayPrepend(123:Int32, Array(66:Int32))] } + LogicalProject { exprs: [ArrayPrepend(123:Int32, Array(66:Int32)) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } batch_plan: | - BatchProject { exprs: [ArrayPrepend(123:Int32, Array(66:Int32))] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[ArrayPrepend(123:Int32, Array(66:Int32))]] } - sql: | select array_prepend(123, 234); binder_error: 'Bind error: Cannot prepend integer to integer' - sql: | select array_prepend(array[233], array[array[66]]); logical_plan: | - LogicalProject { exprs: [ArrayPrepend(Array(233:Int32), Array(Array(66:Int32)))] } + LogicalProject { exprs: [ArrayPrepend(Array(233:Int32), Array(Array(66:Int32))) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - name: string from/to varchar[] in implicit context sql: | @@ -125,7 +120,7 @@ sql: | select ('{c,' || 'd}')::varchar[]; logical_plan: | - LogicalProject { exprs: [ConcatOp('{c,':Varchar, 'd}':Varchar)::List { datatype: Varchar }] } + LogicalProject { exprs: [ConcatOp('{c,':Varchar, 'd}':Varchar)::List { datatype: Varchar } as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - name: unknown to varchar[] in implicit context sql: | @@ -143,7 +138,7 @@ sql: | select ('{c,d}')::varchar[]; logical_plan: | - LogicalProject { exprs: ['{c,d}':Varchar::List { datatype: Varchar }] } + LogicalProject { exprs: ['{c,d}':Varchar::List { datatype: Varchar } as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - name: varchar[] to string in assign context sql: | @@ -156,31 +151,31 @@ sql: | select array['a', 'b']::varchar; logical_plan: | - LogicalProject { exprs: [Array('a':Varchar, 'b':Varchar)::Varchar] } + LogicalProject { exprs: [Array('a':Varchar, 'b':Varchar)::Varchar as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - name: bool[] cast in explicit context sql: | select array[1, true]::bool[]; logical_plan: | - LogicalProject { exprs: [Array(1:Int32::Boolean, true:Boolean)] } + LogicalProject { exprs: [Array(1:Int32::Boolean, true:Boolean) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - name: bool[][] cast in explicit context sql: | select array[array[1, true]]::bool[][]; logical_plan: | - LogicalProject { exprs: [Array(Array(1:Int32::Boolean, true:Boolean))] } + LogicalProject { exprs: [Array(Array(1:Int32::Boolean, true:Boolean)) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - name: compare with null sql: | select null = array[1]; logical_plan: | - LogicalProject { exprs: [(null:List { datatype: Int32 } = Array(1:Int32))] } + LogicalProject { exprs: [(null:List { datatype: Int32 } = Array(1:Int32)) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - name: compare with literal sql: | select array[1] = '{1}'; logical_plan: | - LogicalProject { exprs: [(Array(1:Int32) = '{1}':Varchar::List { datatype: Int32 })] } + LogicalProject { exprs: [(Array(1:Int32) = '{1}':Varchar::List { datatype: Int32 }) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - name: compare with different type sql: | diff --git a/src/frontend/planner_test/tests/testdata/array_access.yaml b/src/frontend/planner_test/tests/testdata/array_access.yaml index ffce9c03482b2..d1adfeb18d699 100644 --- a/src/frontend/planner_test/tests/testdata/array_access.yaml +++ b/src/frontend/planner_test/tests/testdata/array_access.yaml @@ -2,7 +2,7 @@ - sql: | select (ARRAY['foo', 'bar'])[1]; logical_plan: | - LogicalProject { exprs: [ArrayAccess(Array('foo':Varchar, 'bar':Varchar), 1:Int32)] } + LogicalProject { exprs: [ArrayAccess(Array('foo':Varchar, 'bar':Varchar), 1:Int32) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - sql: | create table t(i int[]); diff --git a/src/frontend/planner_test/tests/testdata/basic_query.yaml b/src/frontend/planner_test/tests/testdata/basic_query.yaml index 5b141abcc0d1f..7320d46d79e9a 100644 --- a/src/frontend/planner_test/tests/testdata/basic_query.yaml +++ b/src/frontend/planner_test/tests/testdata/basic_query.yaml @@ -98,8 +98,7 @@ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - sql: select 1 batch_plan: | - BatchProject { exprs: [1:Int32] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[1:Int32]] } - sql: | create table t (v1 bigint, v2 double precision); select a from t as t2(a); @@ -194,3 +193,11 @@ stream_plan: | StreamMaterialize { columns: [a, b, mv.t._row_id(hidden)], pk_columns: [mv.t._row_id] } └─StreamTableScan { table: mv, columns: [mv.a, mv.b, mv.t._row_id], pk: [mv.t._row_id], dist: UpstreamHashShard(mv.t._row_id) } +- sql: | + create table t (id int primary key, col int); + create index idx on t(col); + select id from idx; + stream_plan: | + StreamMaterialize { columns: [id], pk_columns: [id] } + └─StreamExchange { dist: HashShard(idx.id) } + └─StreamTableScan { table: idx, columns: [idx.id], pk: [idx.id], dist: SomeShard } diff --git a/src/frontend/planner_test/tests/testdata/batch_index_join.yaml b/src/frontend/planner_test/tests/testdata/batch_index_join.yaml index 96634888eb334..9a3b831ac457d 100644 --- a/src/frontend/planner_test/tests/testdata/batch_index_join.yaml +++ b/src/frontend/planner_test/tests/testdata/batch_index_join.yaml @@ -66,3 +66,13 @@ └─BatchLookupJoin { type: Inner, predicate: t.a = t2.c, output: [t2.c, t2.d, t.a] } └─BatchExchange { order: [], dist: UpstreamHashShard(t.a) } └─BatchScan { table: t, columns: [t.a], distribution: SomeShard } +- sql: | + create table t1 (a int, b int); + create table t2 (c int, d int); + create index idx on t2(c) include (d); + select * from t1 join idx on t1.a = idx.c and t1.b = idx.d; + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchLookupJoin { type: Inner, predicate: t1.a = idx.c AND t1.b = idx.d, output: all } + └─BatchExchange { order: [], dist: UpstreamHashShard(t1.a) } + └─BatchScan { table: t1, columns: [t1.a, t1.b], distribution: SomeShard } diff --git a/src/frontend/planner_test/tests/testdata/cast.yaml b/src/frontend/planner_test/tests/testdata/cast.yaml index e122dd0ace778..83c69c2134794 100644 --- a/src/frontend/planner_test/tests/testdata/cast.yaml +++ b/src/frontend/planner_test/tests/testdata/cast.yaml @@ -18,8 +18,7 @@ sql: | select case when NULL then 1 end; batch_plan: | - BatchProject { exprs: [Case(null:Boolean, 1:Int32)] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[Case(null:Boolean, 1:Int32)]] } - name: implicit cast boolean (JOIN ON NULL) sql: | create table t1(v1 int); @@ -37,8 +36,7 @@ sql: | select current_schemas(NULL); batch_plan: | - BatchProject { exprs: [null:List { datatype: Varchar }] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[null:List { datatype: Varchar }]] } - name: FILTER (FILTER NULL) sql: | create table t(v1 int); @@ -52,21 +50,17 @@ sql: | select 1 having 'y'; batch_plan: | - BatchProject { exprs: [1:Int32] } - └─BatchSimpleAgg { aggs: [] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[1:Int32]] } - name: implicit cast boolean (WHERE with literal 'y' of unknown type) sql: | select 1 where 'y'; batch_plan: | - BatchProject { exprs: [1:Int32] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[1:Int32]] } - name: implicit cast boolean (CASE with literal 'y' of unknown type) sql: | select case when 'y' then 1 end; batch_plan: | - BatchProject { exprs: [Case(true:Boolean, 1:Int32)] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[Case(true:Boolean, 1:Int32)]] } - name: implicit cast boolean (JOIN ON with literal 'y' of unknown type) sql: | create table t1(v1 int); @@ -82,8 +76,7 @@ sql: | select current_schemas('y'); batch_plan: | - BatchProject { exprs: [ARRAY[pg_catalog, public]:List { datatype: Varchar }] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[ARRAY[pg_catalog, public]:List { datatype: Varchar }]] } - name: FILTER (FILTER with literal 'y' of unknown type) sql: | create table t(v1 int); diff --git a/src/frontend/planner_test/tests/testdata/column_pruning.yaml b/src/frontend/planner_test/tests/testdata/column_pruning.yaml index 24f0cc0ab3485..8768fa59d7c1b 100644 --- a/src/frontend/planner_test/tests/testdata/column_pruning.yaml +++ b/src/frontend/planner_test/tests/testdata/column_pruning.yaml @@ -141,15 +141,18 @@ logical_plan: | LogicalProject { exprs: [t1.a, window_end] } └─LogicalHopWindow { time_col: t1.created_at, slide: 00:15:00, size: 00:30:00, output: all } - └─LogicalScan { table: t1, columns: [t1.a, t1.b, t1.created_at, t1._row_id] } + └─LogicalFilter { predicate: IsNotNull(t1.created_at) } + └─LogicalScan { table: t1, columns: [t1.a, t1.b, t1.created_at, t1._row_id] } optimized_logical_plan: | LogicalHopWindow { time_col: t1.created_at, slide: 00:15:00, size: 00:30:00, output: [t1.a, window_end] } - └─LogicalScan { table: t1, columns: [t1.a, t1.created_at] } + └─LogicalScan { table: t1, columns: [t1.a, t1.created_at], predicate: IsNotNull(t1.created_at) } batch_plan: | BatchHopWindow { time_col: t1.created_at, slide: 00:15:00, size: 00:30:00, output: [t1.a, window_end] } └─BatchExchange { order: [], dist: Single } - └─BatchScan { table: t1, columns: [t1.a, t1.created_at], distribution: SomeShard } + └─BatchFilter { predicate: IsNotNull(t1.created_at) } + └─BatchScan { table: t1, columns: [t1.a, t1.created_at], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [a, window_end, t1._row_id(hidden)], pk_columns: [t1._row_id, window_end] } └─StreamHopWindow { time_col: t1.created_at, slide: 00:15:00, size: 00:30:00, output: [t1.a, window_end, t1._row_id] } - └─StreamTableScan { table: t1, columns: [t1.a, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamFilter { predicate: IsNotNull(t1.created_at) } + └─StreamTableScan { table: t1, columns: [t1.a, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/delete.yaml b/src/frontend/planner_test/tests/testdata/delete.yaml index eee1e3b7f3e1f..d86b07028f370 100644 --- a/src/frontend/planner_test/tests/testdata/delete.yaml +++ b/src/frontend/planner_test/tests/testdata/delete.yaml @@ -4,12 +4,12 @@ create table t (a int, b int); delete from t returning *, a, a+b; logical_plan: | - LogicalProject { exprs: [t.a, t.b, t.a, (t.a + t.b)] } + LogicalProject { exprs: [t.a, t.b, t.a, (t.a + t.b) as $expr1] } └─LogicalDelete { table: t, returning: true } └─LogicalScan { table: t, columns: [t.a, t.b, t._row_id] } batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [t.a, t.b, t.a, (t.a + t.b)] } + └─BatchProject { exprs: [t.a, t.b, t.a, (t.a + t.b) as $expr23] } └─BatchDelete { table: t, returning: true } └─BatchExchange { order: [], dist: Single } └─BatchScan { table: t, columns: [t.a, t.b, t._row_id], distribution: UpstreamHashShard(t._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/delta_join.yaml b/src/frontend/planner_test/tests/testdata/delta_join.yaml new file mode 100644 index 0000000000000..331210d86a96c --- /dev/null +++ b/src/frontend/planner_test/tests/testdata/delta_join.yaml @@ -0,0 +1,40 @@ +# This file is automatically generated. See `src/frontend/planner_test/README.md` for more information. +- sql: | + set rw_streaming_enable_delta_join = true; + create table a (a1 int, a2 int); + create index i_a1 on a(a1); + create table b (b1 int, b2 int); + create index i_b1 on b(b1); + /* should generate delta join plan, and stream index scan */ + select * from a join b on a.a1 = b.b1 ; + stream_plan: | + StreamMaterialize { columns: [a1, a2, b1, b2, i_a1.a._row_id(hidden), i_b1.b._row_id(hidden)], pk_columns: [i_a1.a._row_id, i_b1.b._row_id, a1, b1] } + └─StreamExchange { dist: HashShard(i_a1.a1, i_b1.b1, i_a1.a._row_id, i_b1.b._row_id) } + └─StreamDeltaJoin { type: Inner, predicate: i_a1.a1 = i_b1.b1, output: [i_a1.a1, i_a1.a2, i_b1.b1, i_b1.b2, i_a1.a._row_id, i_b1.b._row_id] } + ├─StreamIndexScan { index: i_a1, columns: [i_a1.a1, i_a1.a2, i_a1.a._row_id], pk: [i_a1.a._row_id], dist: UpstreamHashShard(i_a1.a1) } + └─StreamIndexScan { index: i_b1, columns: [i_b1.b1, i_b1.b2, i_b1.b._row_id], pk: [i_b1.b._row_id], dist: UpstreamHashShard(i_b1.b1) } +- sql: | + set rw_streaming_enable_delta_join = true; + create table a (a1 int primary key, a2 int); + create table b (b1 int, b2 int); + create index i_b1 on b(b1); + /* should generate delta join plan, and stream index scan */ + select * from a join b on a.a1 = b.b1 ; + stream_plan: | + StreamMaterialize { columns: [a1, a2, b1, b2, i_b1.b._row_id(hidden)], pk_columns: [a1, i_b1.b._row_id, b1] } + └─StreamExchange { dist: HashShard(a.a1, i_b1.b1, i_b1.b._row_id) } + └─StreamDeltaJoin { type: Inner, predicate: a.a1 = i_b1.b1, output: all } + ├─StreamTableScan { table: a, columns: [a.a1, a.a2], pk: [a.a1], dist: UpstreamHashShard(a.a1) } + └─StreamIndexScan { index: i_b1, columns: [i_b1.b1, i_b1.b2, i_b1.b._row_id], pk: [i_b1.b._row_id], dist: UpstreamHashShard(i_b1.b1) } +- sql: | + set rw_streaming_enable_delta_join = true; + create table a (a1 int primary key, a2 int); + create table b (b1 int primary key, b2 int); + /* should generate delta join plan, and stream index scan */ + select * from a join b on a.a1 = b.b1 ; + stream_plan: | + StreamMaterialize { columns: [a1, a2, b1, b2], pk_columns: [a1, b1] } + └─StreamExchange { dist: HashShard(a.a1, b.b1) } + └─StreamDeltaJoin { type: Inner, predicate: a.a1 = b.b1, output: all } + ├─StreamTableScan { table: a, columns: [a.a1, a.a2], pk: [a.a1], dist: UpstreamHashShard(a.a1) } + └─StreamTableScan { table: b, columns: [b.b1, b.b2], pk: [b.b1], dist: UpstreamHashShard(b.b1) } diff --git a/src/frontend/planner_test/tests/testdata/distribution_derive.yaml b/src/frontend/planner_test/tests/testdata/distribution_derive.yaml index 62f47e9271f88..c47a702581e21 100644 --- a/src/frontend/planner_test/tests/testdata/distribution_derive.yaml +++ b/src/frontend/planner_test/tests/testdata/distribution_derive.yaml @@ -1,6 +1,7 @@ # This file is automatically generated. See `src/frontend/planner_test/README.md` for more information. - id: create_tables sql: | + set rw_streaming_enable_delta_join = true; create table A (k1 int, k2 int, k3 int, v int); create index Ak1 on A(k1) include(k1,k2,k3,v); create index Ak1k2 on A(k1,k2) include(k1,k2,k3,v); @@ -16,12 +17,43 @@ └─BatchExchange { order: [], dist: UpstreamHashShard(a.k1) } └─BatchScan { table: a, columns: [a.k1, a.v], distribution: SomeShard } stream_plan: | - StreamMaterialize { columns: [v, bv, a._row_id(hidden), a.k1(hidden), b._row_id(hidden), b.k1(hidden)], pk_columns: [a._row_id, b._row_id, a.k1, b.k1] } - └─StreamHashJoin { type: Inner, predicate: a.k1 = b.k1, output: [a.v, b.v, a._row_id, a.k1, b._row_id, b.k1] } - ├─StreamExchange { dist: HashShard(a.k1) } - | └─StreamTableScan { table: a, columns: [a.k1, a.v, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } - └─StreamExchange { dist: HashShard(b.k1) } - └─StreamTableScan { table: b, columns: [b.k1, b.v, b._row_id], pk: [b._row_id], dist: UpstreamHashShard(b._row_id) } + StreamMaterialize { columns: [v, bv, ak1.a._row_id(hidden), ak1.k1(hidden), bk1.b._row_id(hidden), bk1.k1(hidden)], pk_columns: [ak1.a._row_id, bk1.b._row_id, ak1.k1, bk1.k1] } + └─StreamExchange { dist: HashShard(ak1.a._row_id, ak1.k1, bk1.b._row_id, bk1.k1) } + └─StreamDeltaJoin { type: Inner, predicate: ak1.k1 = bk1.k1, output: [ak1.v, bk1.v, ak1.a._row_id, ak1.k1, bk1.b._row_id, bk1.k1] } + ├─StreamIndexScan { index: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } + └─StreamIndexScan { index: bk1, columns: [bk1.k1, bk1.v, bk1.b._row_id], pk: [bk1.b._row_id], dist: UpstreamHashShard(bk1.k1) } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [v, bv, ak1.a._row_id(hidden), ak1.k1(hidden), bk1.b._row_id(hidden), bk1.k1(hidden)], pk_columns: [ak1.a._row_id, bk1.b._row_id, ak1.k1, bk1.k1] } + materialized table: 4294967294 + StreamExchange Hash([2, 3, 4, 5]) from 1 + + Fragment 1 + Union + StreamExchange Hash([2, 4, 3, 5]) from 4 + StreamExchange Hash([2, 4, 3, 5]) from 5 + + Fragment 2 + StreamIndexScan { index: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } + Upstream + BatchPlanNode + + Fragment 3 + StreamIndexScan { index: bk1, columns: [bk1.k1, bk1.v, bk1.b._row_id], pk: [bk1.b._row_id], dist: UpstreamHashShard(bk1.k1) } + Upstream + BatchPlanNode + + Fragment 4 + Lookup + StreamExchange Hash([0]) from 3 + StreamExchange NoShuffle from 2 + + Fragment 5 + Lookup + StreamExchange Hash([0]) from 2 + StreamExchange NoShuffle from 3 + + Table 4294967294 { columns: [v, bv, ak1.a._row_id, ak1.k1, bk1.b._row_id, bk1.k1], primary key: [$2 ASC, $4 ASC, $3 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [2, 3, 4, 5] } - id: Ak1_join_B_onk1 before: - create_tables @@ -32,36 +64,43 @@ └─BatchExchange { order: [], dist: UpstreamHashShard(ak1.k1) } └─BatchScan { table: ak1, columns: [ak1.k1, ak1.v], distribution: UpstreamHashShard(ak1.k1) } stream_plan: | - StreamMaterialize { columns: [v, bv, ak1.a._row_id(hidden), ak1.k1(hidden), b._row_id(hidden), b.k1(hidden)], pk_columns: [ak1.a._row_id, b._row_id, ak1.k1, b.k1] } - └─StreamHashJoin { type: Inner, predicate: ak1.k1 = b.k1, output: [ak1.v, b.v, ak1.a._row_id, ak1.k1, b._row_id, b.k1] } - ├─StreamExchange { dist: HashShard(ak1.k1) } - | └─StreamTableScan { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } - └─StreamExchange { dist: HashShard(b.k1) } - └─StreamTableScan { table: b, columns: [b.k1, b.v, b._row_id], pk: [b._row_id], dist: UpstreamHashShard(b._row_id) } + StreamMaterialize { columns: [v, bv, ak1.a._row_id(hidden), ak1.k1(hidden), bk1.b._row_id(hidden), bk1.k1(hidden)], pk_columns: [ak1.a._row_id, bk1.b._row_id, ak1.k1, bk1.k1] } + └─StreamExchange { dist: HashShard(ak1.a._row_id, ak1.k1, bk1.b._row_id, bk1.k1) } + └─StreamDeltaJoin { type: Inner, predicate: ak1.k1 = bk1.k1, output: [ak1.v, bk1.v, ak1.a._row_id, ak1.k1, bk1.b._row_id, bk1.k1] } + ├─StreamTableScan { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } + └─StreamIndexScan { index: bk1, columns: [bk1.k1, bk1.v, bk1.b._row_id], pk: [bk1.b._row_id], dist: UpstreamHashShard(bk1.k1) } stream_dist_plan: | Fragment 0 - StreamMaterialize { columns: [v, bv, ak1.a._row_id(hidden), ak1.k1(hidden), b._row_id(hidden), b.k1(hidden)], pk_columns: [ak1.a._row_id, b._row_id, ak1.k1, b.k1] } + StreamMaterialize { columns: [v, bv, ak1.a._row_id(hidden), ak1.k1(hidden), bk1.b._row_id(hidden), bk1.k1(hidden)], pk_columns: [ak1.a._row_id, bk1.b._row_id, ak1.k1, bk1.k1] } materialized table: 4294967294 - StreamHashJoin { type: Inner, predicate: ak1.k1 = b.k1, output: [ak1.v, b.v, ak1.a._row_id, ak1.k1, b._row_id, b.k1] } - left table: 0, right table 2, left degree table: 1, right degree table: 3, - StreamExchange Hash([0]) from 1 - StreamExchange Hash([0]) from 2 + StreamExchange Hash([2, 3, 4, 5]) from 1 Fragment 1 + Union + StreamExchange Hash([2, 4, 3, 5]) from 4 + StreamExchange Hash([2, 4, 3, 5]) from 5 + + Fragment 2 Chain { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } Upstream BatchPlanNode - Fragment 2 - Chain { table: b, columns: [b.k1, b.v, b._row_id], pk: [b._row_id], dist: UpstreamHashShard(b._row_id) } + Fragment 3 + StreamIndexScan { index: bk1, columns: [bk1.k1, bk1.v, bk1.b._row_id], pk: [bk1.b._row_id], dist: UpstreamHashShard(bk1.k1) } Upstream BatchPlanNode - Table 0 { columns: [ak1_k1, ak1_v, ak1_a__row_id], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 1 { columns: [ak1_k1, ak1_a__row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 2 { columns: [b_k1, b_v, b__row_id], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 3 { columns: [b_k1, b__row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 4294967294 { columns: [v, bv, ak1.a._row_id, ak1.k1, b._row_id, b.k1], primary key: [$2 ASC, $4 ASC, $3 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [3] } + Fragment 4 + Lookup + StreamExchange Hash([0]) from 3 + StreamExchange NoShuffle from 2 + + Fragment 5 + Lookup + StreamExchange Hash([0]) from 2 + StreamExchange NoShuffle from 3 + + Table 4294967294 { columns: [v, bv, ak1.a._row_id, ak1.k1, bk1.b._row_id, bk1.k1], primary key: [$2 ASC, $4 ASC, $3 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [2, 3, 4, 5] } - id: A_join_Bk1_onk1 before: - create_tables @@ -72,36 +111,43 @@ └─BatchExchange { order: [], dist: UpstreamHashShard(a.k1) } └─BatchScan { table: a, columns: [a.k1, a.v], distribution: SomeShard } stream_plan: | - StreamMaterialize { columns: [v, bv, a._row_id(hidden), a.k1(hidden), bk1.b._row_id(hidden), bk1.k1(hidden)], pk_columns: [a._row_id, bk1.b._row_id, a.k1, bk1.k1] } - └─StreamHashJoin { type: Inner, predicate: a.k1 = bk1.k1, output: [a.v, bk1.v, a._row_id, a.k1, bk1.b._row_id, bk1.k1] } - ├─StreamExchange { dist: HashShard(a.k1) } - | └─StreamTableScan { table: a, columns: [a.k1, a.v, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } - └─StreamExchange { dist: HashShard(bk1.k1) } + StreamMaterialize { columns: [v, bv, ak1.a._row_id(hidden), ak1.k1(hidden), bk1.b._row_id(hidden), bk1.k1(hidden)], pk_columns: [ak1.a._row_id, bk1.b._row_id, ak1.k1, bk1.k1] } + └─StreamExchange { dist: HashShard(ak1.a._row_id, ak1.k1, bk1.b._row_id, bk1.k1) } + └─StreamDeltaJoin { type: Inner, predicate: ak1.k1 = bk1.k1, output: [ak1.v, bk1.v, ak1.a._row_id, ak1.k1, bk1.b._row_id, bk1.k1] } + ├─StreamIndexScan { index: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } └─StreamTableScan { table: bk1, columns: [bk1.k1, bk1.v, bk1.b._row_id], pk: [bk1.b._row_id], dist: UpstreamHashShard(bk1.k1) } stream_dist_plan: | Fragment 0 - StreamMaterialize { columns: [v, bv, a._row_id(hidden), a.k1(hidden), bk1.b._row_id(hidden), bk1.k1(hidden)], pk_columns: [a._row_id, bk1.b._row_id, a.k1, bk1.k1] } + StreamMaterialize { columns: [v, bv, ak1.a._row_id(hidden), ak1.k1(hidden), bk1.b._row_id(hidden), bk1.k1(hidden)], pk_columns: [ak1.a._row_id, bk1.b._row_id, ak1.k1, bk1.k1] } materialized table: 4294967294 - StreamHashJoin { type: Inner, predicate: a.k1 = bk1.k1, output: [a.v, bk1.v, a._row_id, a.k1, bk1.b._row_id, bk1.k1] } - left table: 0, right table 2, left degree table: 1, right degree table: 3, - StreamExchange Hash([0]) from 1 - StreamExchange Hash([0]) from 2 + StreamExchange Hash([2, 3, 4, 5]) from 1 Fragment 1 - Chain { table: a, columns: [a.k1, a.v, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } + Union + StreamExchange Hash([2, 4, 3, 5]) from 4 + StreamExchange Hash([2, 4, 3, 5]) from 5 + + Fragment 2 + StreamIndexScan { index: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } Upstream BatchPlanNode - Fragment 2 + Fragment 3 Chain { table: bk1, columns: [bk1.k1, bk1.v, bk1.b._row_id], pk: [bk1.b._row_id], dist: UpstreamHashShard(bk1.k1) } Upstream BatchPlanNode - Table 0 { columns: [a_k1, a_v, a__row_id], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 1 { columns: [a_k1, a__row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 2 { columns: [bk1_k1, bk1_v, bk1_b__row_id], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 3 { columns: [bk1_k1, bk1_b__row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 4294967294 { columns: [v, bv, a._row_id, a.k1, bk1.b._row_id, bk1.k1], primary key: [$2 ASC, $4 ASC, $3 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [3] } + Fragment 4 + Lookup + StreamExchange Hash([0]) from 3 + StreamExchange NoShuffle from 2 + + Fragment 5 + Lookup + StreamExchange Hash([0]) from 2 + StreamExchange NoShuffle from 3 + + Table 4294967294 { columns: [v, bv, ak1.a._row_id, ak1.k1, bk1.b._row_id, bk1.k1], primary key: [$2 ASC, $4 ASC, $3 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [2, 3, 4, 5] } - id: Ak1_join_Bk1_onk1 before: - create_tables @@ -113,35 +159,42 @@ └─BatchScan { table: ak1, columns: [ak1.k1, ak1.v], distribution: UpstreamHashShard(ak1.k1) } stream_plan: | StreamMaterialize { columns: [v, bv, ak1.a._row_id(hidden), ak1.k1(hidden), bk1.b._row_id(hidden), bk1.k1(hidden)], pk_columns: [ak1.a._row_id, bk1.b._row_id, ak1.k1, bk1.k1] } - └─StreamHashJoin { type: Inner, predicate: ak1.k1 = bk1.k1, output: [ak1.v, bk1.v, ak1.a._row_id, ak1.k1, bk1.b._row_id, bk1.k1] } - ├─StreamExchange { dist: HashShard(ak1.k1) } - | └─StreamTableScan { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } - └─StreamExchange { dist: HashShard(bk1.k1) } + └─StreamExchange { dist: HashShard(ak1.a._row_id, ak1.k1, bk1.b._row_id, bk1.k1) } + └─StreamDeltaJoin { type: Inner, predicate: ak1.k1 = bk1.k1, output: [ak1.v, bk1.v, ak1.a._row_id, ak1.k1, bk1.b._row_id, bk1.k1] } + ├─StreamTableScan { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } └─StreamTableScan { table: bk1, columns: [bk1.k1, bk1.v, bk1.b._row_id], pk: [bk1.b._row_id], dist: UpstreamHashShard(bk1.k1) } stream_dist_plan: | Fragment 0 StreamMaterialize { columns: [v, bv, ak1.a._row_id(hidden), ak1.k1(hidden), bk1.b._row_id(hidden), bk1.k1(hidden)], pk_columns: [ak1.a._row_id, bk1.b._row_id, ak1.k1, bk1.k1] } materialized table: 4294967294 - StreamHashJoin { type: Inner, predicate: ak1.k1 = bk1.k1, output: [ak1.v, bk1.v, ak1.a._row_id, ak1.k1, bk1.b._row_id, bk1.k1] } - left table: 0, right table 2, left degree table: 1, right degree table: 3, - StreamExchange Hash([0]) from 1 - StreamExchange Hash([0]) from 2 + StreamExchange Hash([2, 3, 4, 5]) from 1 Fragment 1 + Union + StreamExchange Hash([2, 4, 3, 5]) from 4 + StreamExchange Hash([2, 4, 3, 5]) from 5 + + Fragment 2 Chain { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } Upstream BatchPlanNode - Fragment 2 + Fragment 3 Chain { table: bk1, columns: [bk1.k1, bk1.v, bk1.b._row_id], pk: [bk1.b._row_id], dist: UpstreamHashShard(bk1.k1) } Upstream BatchPlanNode - Table 0 { columns: [ak1_k1, ak1_v, ak1_a__row_id], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 1 { columns: [ak1_k1, ak1_a__row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 2 { columns: [bk1_k1, bk1_v, bk1_b__row_id], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 3 { columns: [bk1_k1, bk1_b__row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 4294967294 { columns: [v, bv, ak1.a._row_id, ak1.k1, bk1.b._row_id, bk1.k1], primary key: [$2 ASC, $4 ASC, $3 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [3] } + Fragment 4 + Lookup + StreamExchange Hash([0]) from 3 + StreamExchange NoShuffle from 2 + + Fragment 5 + Lookup + StreamExchange Hash([0]) from 2 + StreamExchange NoShuffle from 3 + + Table 4294967294 { columns: [v, bv, ak1.a._row_id, ak1.k1, bk1.b._row_id, bk1.k1], primary key: [$2 ASC, $4 ASC, $3 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [2, 3, 4, 5] } - id: aggk1_from_A before: - create_tables @@ -626,8 +679,8 @@ Table 0 { columns: [ak1_k1, ak1_v, ak1_a__row_id], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } Table 1 { columns: [ak1_k1, ak1_a__row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 2 { columns: [count, a_k1], primary key: [$1 ASC, $1 ASC], value indices: [0, 1], distribution key: [1] } - Table 3 { columns: [a_k1, a_k1_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 2 { columns: [count, a_k1], primary key: [$1 ASC], value indices: [0, 1], distribution key: [1] } + Table 3 { columns: [a_k1, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 4 { columns: [a_k1, count, count_0], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } Table 4294967294 { columns: [v, bv, ak1.a._row_id, ak1.k1, a.k1], primary key: [$2 ASC, $4 ASC, $3 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [3] } - id: aggk1_join_Ak1_onk1 @@ -680,8 +733,8 @@ Upstream BatchPlanNode - Table 0 { columns: [count, a_k1], primary key: [$1 ASC, $1 ASC], value indices: [0, 1], distribution key: [1] } - Table 1 { columns: [a_k1, a_k1_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 0 { columns: [count, a_k1], primary key: [$1 ASC], value indices: [0, 1], distribution key: [1] } + Table 1 { columns: [a_k1, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 2 { columns: [ak1_k1, ak1_v, ak1_a__row_id], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } Table 3 { columns: [ak1_k1, ak1_a__row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 4 { columns: [a_k1, count, count_0], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } @@ -749,10 +802,10 @@ Upstream BatchPlanNode - Table 0 { columns: [count, a_k1], primary key: [$1 ASC, $1 ASC], value indices: [0, 1], distribution key: [1] } - Table 1 { columns: [a_k1, a_k1_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 2 { columns: [count, b_k1], primary key: [$1 ASC, $1 ASC], value indices: [0, 1], distribution key: [1] } - Table 3 { columns: [b_k1, b_k1_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 0 { columns: [count, a_k1], primary key: [$1 ASC], value indices: [0, 1], distribution key: [1] } + Table 1 { columns: [a_k1, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 2 { columns: [count, b_k1], primary key: [$1 ASC], value indices: [0, 1], distribution key: [1] } + Table 3 { columns: [b_k1, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 4 { columns: [a_k1, count, count_0], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } Table 5 { columns: [b_k1, count, count_0], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } Table 4294967294 { columns: [num, bv, a.k1, b.k1], primary key: [$2 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [2] } @@ -762,25 +815,29 @@ logical_plan: | LogicalProject { exprs: [t1.row_id, t1.uid, t1.v, t1.created_at, window_start, window_end] } └─LogicalHopWindow { time_col: t1.created_at, slide: 00:15:00, size: 00:30:00, output: all } - └─LogicalScan { table: t1, columns: [t1.row_id, t1.uid, t1.v, t1.created_at, t1._row_id] } + └─LogicalFilter { predicate: IsNotNull(t1.created_at) } + └─LogicalScan { table: t1, columns: [t1.row_id, t1.uid, t1.v, t1.created_at, t1._row_id] } optimized_logical_plan: | LogicalHopWindow { time_col: t1.created_at, slide: 00:15:00, size: 00:30:00, output: all } - └─LogicalScan { table: t1, columns: [t1.row_id, t1.uid, t1.v, t1.created_at] } + └─LogicalScan { table: t1, columns: [t1.row_id, t1.uid, t1.v, t1.created_at], predicate: IsNotNull(t1.created_at) } batch_plan: | BatchHopWindow { time_col: t1.created_at, slide: 00:15:00, size: 00:30:00, output: all } └─BatchExchange { order: [], dist: Single } - └─BatchScan { table: t1, columns: [t1.row_id, t1.uid, t1.v, t1.created_at], distribution: SomeShard } + └─BatchFilter { predicate: IsNotNull(t1.created_at) } + └─BatchScan { table: t1, columns: [t1.row_id, t1.uid, t1.v, t1.created_at], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [row_id, uid, v, created_at, window_start, window_end, t1._row_id(hidden)], pk_columns: [t1._row_id, window_start, window_end] } └─StreamHopWindow { time_col: t1.created_at, slide: 00:15:00, size: 00:30:00, output: [t1.row_id, t1.uid, t1.v, t1.created_at, window_start, window_end, t1._row_id] } - └─StreamTableScan { table: t1, columns: [t1.row_id, t1.uid, t1.v, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamFilter { predicate: IsNotNull(t1.created_at) } + └─StreamTableScan { table: t1, columns: [t1.row_id, t1.uid, t1.v, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } stream_dist_plan: | Fragment 0 StreamMaterialize { columns: [row_id, uid, v, created_at, window_start, window_end, t1._row_id(hidden)], pk_columns: [t1._row_id, window_start, window_end] } materialized table: 4294967294 StreamHopWindow { time_col: t1.created_at, slide: 00:15:00, size: 00:30:00, output: [t1.row_id, t1.uid, t1.v, t1.created_at, window_start, window_end, t1._row_id] } - Chain { table: t1, columns: [t1.row_id, t1.uid, t1.v, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - Upstream - BatchPlanNode + StreamFilter { predicate: IsNotNull(t1.created_at) } + Chain { table: t1, columns: [t1.row_id, t1.uid, t1.v, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + Upstream + BatchPlanNode Table 4294967294 { columns: [row_id, uid, v, created_at, window_start, window_end, t1._row_id], primary key: [$6 ASC, $4 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [6] } diff --git a/src/frontend/planner_test/tests/testdata/dynamic_filter.yaml b/src/frontend/planner_test/tests/testdata/dynamic_filter.yaml index e61c906616f5d..b361a6263eec4 100644 --- a/src/frontend/planner_test/tests/testdata/dynamic_filter.yaml +++ b/src/frontend/planner_test/tests/testdata/dynamic_filter.yaml @@ -21,8 +21,8 @@ └─StreamProject { exprs: [max(max(t2.v2))] } └─StreamGlobalSimpleAgg { aggs: [count, max(max(t2.v2))] } └─StreamExchange { dist: Single } - └─StreamHashAgg { group_key: [Vnode(t2._row_id)], aggs: [count, max(t2.v2)] } - └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id)] } + └─StreamHashAgg { group_key: [$expr1], aggs: [count, max(t2.v2)] } + └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr1] } └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - name: | With Top-1 on inner side @@ -50,23 +50,23 @@ sql: | with max_v2 as (select max(v2) max from t2) select v1 from t1, max_v2 where v1 + v1 > max; optimized_logical_plan: | - LogicalJoin { type: Inner, on: ((t1.v1 + t1.v1) > max(t2.v2)), output: [t1.v1] } - ├─LogicalProject { exprs: [t1.v1, (t1.v1 + t1.v1)] } + LogicalJoin { type: Inner, on: ($expr9 > max(t2.v2)), output: [t1.v1] } + ├─LogicalProject { exprs: [t1.v1, (t1.v1 + t1.v1) as $expr9] } | └─LogicalScan { table: t1, columns: [t1.v1] } └─LogicalAgg { aggs: [max(t2.v2)] } └─LogicalScan { table: t2, columns: [t2.v2] } stream_plan: | StreamMaterialize { columns: [v1, t1._row_id(hidden)], pk_columns: [t1._row_id] } └─StreamProject { exprs: [t1.v1, t1._row_id] } - └─StreamDynamicFilter { predicate: ((t1.v1 + t1.v1) > max(max(t2.v2))), output: [t1.v1, (t1.v1 + t1.v1), t1._row_id] } - ├─StreamProject { exprs: [t1.v1, (t1.v1 + t1.v1), t1._row_id] } + └─StreamDynamicFilter { predicate: ($expr21 > max(max(t2.v2))), output: [t1.v1, $expr21, t1._row_id] } + ├─StreamProject { exprs: [t1.v1, (t1.v1 + t1.v1) as $expr21, t1._row_id] } | └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } └─StreamExchange { dist: Broadcast } └─StreamProject { exprs: [max(max(t2.v2))] } └─StreamGlobalSimpleAgg { aggs: [count, max(max(t2.v2))] } └─StreamExchange { dist: Single } - └─StreamHashAgg { group_key: [Vnode(t2._row_id)], aggs: [count, max(t2.v2)] } - └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id)] } + └─StreamHashAgg { group_key: [$expr22], aggs: [count, max(t2.v2)] } + └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr22] } └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - name: Ensure error on multiple rows on inner side before: @@ -117,6 +117,68 @@ └─StreamProject { exprs: [max(max(t2.v2))] } └─StreamGlobalSimpleAgg { aggs: [count, max(max(t2.v2))] } └─StreamExchange { dist: Single } - └─StreamHashAgg { group_key: [Vnode(t2._row_id)], aggs: [count, max(t2.v2)] } - └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id)] } + └─StreamHashAgg { group_key: [$expr1], aggs: [count, max(t2.v2)] } + └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr1] } + └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } +- name: Dynamic filter join on unequal types + sql: | + create table t1 (v1 int); + create table t2 (v2 bigint); + with max_v2 as (select max(v2) max from t2) select v1 from t1, max_v2 where v1 > max; + optimized_logical_plan: | + LogicalJoin { type: Inner, on: ($expr9 > max(t2.v2)), output: [t1.v1] } + ├─LogicalProject { exprs: [t1.v1, t1.v1::Int64 as $expr9] } + | └─LogicalScan { table: t1, columns: [t1.v1] } + └─LogicalAgg { aggs: [max(t2.v2)] } + └─LogicalScan { table: t2, columns: [t2.v2] } + stream_plan: | + StreamMaterialize { columns: [v1, t1._row_id(hidden)], pk_columns: [t1._row_id] } + └─StreamProject { exprs: [t1.v1, t1._row_id] } + └─StreamDynamicFilter { predicate: ($expr21 > max(max(t2.v2))), output: [t1.v1, $expr21, t1._row_id] } + ├─StreamProject { exprs: [t1.v1, t1.v1::Int64 as $expr21, t1._row_id] } + | └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: Broadcast } + └─StreamProject { exprs: [max(max(t2.v2))] } + └─StreamGlobalSimpleAgg { aggs: [count, max(max(t2.v2))] } + └─StreamExchange { dist: Single } + └─StreamHashAgg { group_key: [$expr22], aggs: [count, max(t2.v2)] } + └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr22] } + └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } +- name: Dynamic filter on semi join + sql: | + create table t1 (v1 int); + create table t2 (v2 int); + with max_v2 as (select max(v2) max from t2) select v1 from t1 where exists (select * from max_v2 where v1 > max); + stream_plan: | + StreamMaterialize { columns: [v1, t1._row_id(hidden)], pk_columns: [t1._row_id] } + └─StreamDynamicFilter { predicate: (t1.v1 > max(max(t2.v2))), output: [t1.v1, t1._row_id] } + ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: Broadcast } + └─StreamProject { exprs: [max(max(t2.v2))] } + └─StreamGlobalSimpleAgg { aggs: [count, max(max(t2.v2))] } + └─StreamExchange { dist: Single } + └─StreamHashAgg { group_key: [$expr1], aggs: [count, max(t2.v2)] } + └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr1] } + └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } +- name: Complex expression on RHS of condition will still result in dynamic filter + before: + - create_tables + sql: | + with max_v2 as (select max(v2) max from t2) select v1 from t1, max_v2 where v1 > 2 * max; + optimized_logical_plan: | + LogicalJoin { type: Inner, on: (t1.v1 > $expr9), output: [t1.v1] } + ├─LogicalScan { table: t1, columns: [t1.v1] } + └─LogicalProject { exprs: [(2:Int32 * max(t2.v2)) as $expr9] } + └─LogicalAgg { aggs: [max(t2.v2)] } + └─LogicalScan { table: t2, columns: [t2.v2] } + stream_plan: | + StreamMaterialize { columns: [v1, t1._row_id(hidden)], pk_columns: [t1._row_id] } + └─StreamDynamicFilter { predicate: (t1.v1 > $expr23), output: [t1.v1, t1._row_id] } + ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: Broadcast } + └─StreamProject { exprs: [(2:Int32 * max(max(t2.v2))) as $expr23] } + └─StreamGlobalSimpleAgg { aggs: [count, max(max(t2.v2))] } + └─StreamExchange { dist: Single } + └─StreamHashAgg { group_key: [$expr21], aggs: [count, max(t2.v2)] } + └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr21] } └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/explain.yaml b/src/frontend/planner_test/tests/testdata/explain.yaml index c89502dce3ade..f71b9299f9bb9 100644 --- a/src/frontend/planner_test/tests/testdata/explain.yaml +++ b/src/frontend/planner_test/tests/testdata/explain.yaml @@ -36,23 +36,27 @@ LogicalProject { exprs: [1:Int32] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } + Project Remove: + + apply TrivialProjectToValuesRule 1 time(s) + + LogicalValues { rows: [[1:Int32]], schema: Schema { fields: [1:Int32:Int32] } } + To Batch Physical Plan: - BatchProject { exprs: [1:Int32] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[1:Int32]] } To Batch Local Plan: - BatchProject { exprs: [1:Int32] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[1:Int32]] } { "root_stage_id": 0, "stages": { "0": { "root": { - "plan_node_id": 28, - "plan_node_type": "BatchProject", + "plan_node_id": 22, + "plan_node_type": "BatchValues", "schema": [ { "dataType": { @@ -62,15 +66,7 @@ "name": "1:Int32" } ], - "children": [ - { - "plan_node_id": 26, - "plan_node_type": "BatchValues", - "schema": [], - "children": [], - "source_stage_id": null - } - ], + "children": [], "source_stage_id": null }, "parallelism": 1, @@ -169,3 +165,19 @@ explain (logical) create table t1(v1 int); explain_output: | LogicalSource +- sql: | + explain create table t (v1 int, v2 varchar); + explain_output: | + StreamMaterialize { columns: [v1, v2, _row_id(hidden)], pk_columns: [_row_id] } + └─StreamExchange { dist: HashShard(_row_id) } + └─StreamRowIdGen { row_id_index: 2 } + └─StreamDml { columns: [v1, v2, _row_id] } + └─StreamSource +- sql: | + explain create table t (v1 int, v2 varchar) with ( connector = 'kafka', kafka.topic = 'kafka_3_partition_topic', kafka.brokers = '127.0.0.1:1234', kafka.scan.startup.mode='earliest' ) row format json; + explain_output: | + StreamMaterialize { columns: [v1, v2, _row_id(hidden)], pk_columns: [_row_id] } + └─StreamExchange { dist: HashShard(_row_id) } + └─StreamRowIdGen { row_id_index: 2 } + └─StreamDml { columns: [v1, v2, _row_id] } + └─StreamSource { source: "t", columns: ["v1", "v2", "_row_id"] } diff --git a/src/frontend/planner_test/tests/testdata/expr.yaml b/src/frontend/planner_test/tests/testdata/expr.yaml index 2716382d399b1..6803e6020bef4 100644 --- a/src/frontend/planner_test/tests/testdata/expr.yaml +++ b/src/frontend/planner_test/tests/testdata/expr.yaml @@ -29,50 +29,48 @@ select (((((false is not true) is true) is not false) is false) is not null) is null from t; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [IsNull(IsNotNull(IsFalse(IsNotFalse(IsTrue(IsNotTrue(false:Boolean))))))] } + └─BatchProject { exprs: [IsNull(IsNotNull(IsFalse(IsNotFalse(IsTrue(IsNotTrue(false:Boolean)))))) as $expr23] } └─BatchScan { table: t, columns: [], distribution: SomeShard } - name: bind between sql: | SELECT 1 between 2 and 3 logical_plan: | - LogicalProject { exprs: [((1:Int32 >= 2:Int32) AND (1:Int32 <= 3:Int32))] } + LogicalProject { exprs: [((1:Int32 >= 2:Int32) AND (1:Int32 <= 3:Int32)) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - name: null eq null sql: | SELECT null = null; logical_plan: | - LogicalProject { exprs: [(null:Varchar = null:Varchar)] } + LogicalProject { exprs: [(null:Varchar = null:Varchar) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - name: null lt null sql: | SELECT null < null; logical_plan: | - LogicalProject { exprs: [(null:Varchar < null:Varchar)] } + LogicalProject { exprs: [(null:Varchar < null:Varchar) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - name: bind is distinct from sql: | SELECT 1 IS DISTINCT FROM 2 logical_plan: | - LogicalProject { exprs: [IsDistinctFrom(1:Int32, 2:Int32)] } + LogicalProject { exprs: [IsDistinctFrom(1:Int32, 2:Int32) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - name: bind is not distinct from sql: | SELECT 1 IS NOT DISTINCT FROM 2 logical_plan: | - LogicalProject { exprs: [IsNotDistinctFrom(1:Int32, 2:Int32)] } + LogicalProject { exprs: [IsNotDistinctFrom(1:Int32, 2:Int32) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - name: in-list with aligned types sql: | SELECT 1::real in (3, 1.0, 2); batch_plan: | - BatchProject { exprs: [In(1:Int32::Float32, 3:Int32::Float32, 1.0:Decimal::Float32, 2:Int32::Float32)] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[In(1:Int32::Float32, 3:Int32::Float32, 1.0:Decimal::Float32, 2:Int32::Float32)]] } - name: not in-list with aligned types sql: | SELECT 1::real not in (3, 1.0, 2); batch_plan: | - BatchProject { exprs: [Not(In(1:Int32::Float32, 3:Int32::Float32, 1.0:Decimal::Float32, 2:Int32::Float32))] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[Not(In(1:Int32::Float32, 3:Int32::Float32, 1.0:Decimal::Float32, 2:Int32::Float32))]] } - name: in-list with misaligned types sql: | SELECT true in (3, 1.0, 2); @@ -82,7 +80,7 @@ create table t (v1 int); SELECT 1 in (3, 0.5*2, min(v1)) from t; batch_plan: | - BatchProject { exprs: [(In(1:Int32::Decimal, 3:Int32::Decimal, (0.5:Decimal * 2:Int32)) OR (1:Int32 = min(min(t.v1))))] } + BatchProject { exprs: [(In(1:Int32::Decimal, 3:Int32::Decimal, (0.5:Decimal * 2:Int32)) OR (1:Int32 = min(min(t.v1)))) as $expr23] } └─BatchSimpleAgg { aggs: [min(min(t.v1))] } └─BatchExchange { order: [], dist: Single } └─BatchSimpleAgg { aggs: [min(t.v1)] } @@ -117,8 +115,7 @@ - sql: | select +1.0, -2.0; batch_plan: | - BatchProject { exprs: [1.0:Decimal, -2.0:Decimal] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[1.0:Decimal, -2.0:Decimal]] } - sql: | values(round(42.4382, 2)); batch_plan: | @@ -148,8 +145,6 @@ - sql: | -- When it is invalid, PostgreSQL reports error during explain, but we have to wait until execution as of now. #4235 values(round('abc')); - batch_plan: | - BatchValues { rows: [[Round('abc':Varchar::Float64)]] } - sql: | values(extract(hour from timestamp '2001-02-16 20:38:40')); batch_plan: | @@ -166,12 +161,11 @@ - sql: | select length(trim(trailing '1' from '12'))+length(trim(leading '2' from '23'))+length(trim(both '3' from '34')); batch_plan: | - BatchProject { exprs: [((Length(Rtrim('12':Varchar, '1':Varchar)) + Length(Ltrim('23':Varchar, '2':Varchar))) + Length(Trim('34':Varchar, '3':Varchar)))] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[((Length(Rtrim('12':Varchar, '1':Varchar)) + Length(Ltrim('23':Varchar, '2':Varchar))) + Length(Trim('34':Varchar, '3':Varchar)))]] } - sql: | select position(replace('1','1','2'),'123') where '12' like '%1'; batch_plan: | - BatchProject { exprs: [Position(Replace('1':Varchar, '1':Varchar, '2':Varchar), '123':Varchar)] } + BatchProject { exprs: [Position(Replace('1':Varchar, '1':Varchar, '2':Varchar), '123':Varchar) as $expr23] } └─BatchFilter { predicate: Like('12':Varchar, '%1':Varchar) } └─BatchValues { rows: [[]] } - name: case searched form with else @@ -180,11 +174,11 @@ select (case when v1=1 then 1 when v1=2 then 2 else 0.0 end) as expr from t; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [Case((t.v1 = 1:Int32), 1:Int32::Decimal, (t.v1 = 2:Int32), 2:Int32::Decimal, 0.0:Decimal)] } + └─BatchProject { exprs: [Case((t.v1 = 1:Int32), 1:Int32::Decimal, (t.v1 = 2:Int32), 2:Int32::Decimal, 0.0:Decimal) as $expr23] } └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [expr, t._row_id(hidden)], pk_columns: [t._row_id] } - └─StreamProject { exprs: [Case((t.v1 = 1:Int32), 1:Int32::Decimal, (t.v1 = 2:Int32), 2:Int32::Decimal, 0.0:Decimal), t._row_id] } + └─StreamProject { exprs: [Case((t.v1 = 1:Int32), 1:Int32::Decimal, (t.v1 = 2:Int32), 2:Int32::Decimal, 0.0:Decimal) as $expr46, t._row_id] } └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - name: case searched form without else sql: | @@ -192,7 +186,7 @@ select (case when v1=1 then 1 when v1=2 then 2.1 end) from t; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [Case((t.v1 = 1:Int32), 1:Int32::Decimal, (t.v1 = 2:Int32), 2.1:Decimal)] } + └─BatchProject { exprs: [Case((t.v1 = 1:Int32), 1:Int32::Decimal, (t.v1 = 2:Int32), 2.1:Decimal) as $expr23] } └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } - name: case simple form sql: | @@ -200,7 +194,7 @@ select (case v1 when 1 then 1 when 2.0 then 2 else 0.0 end) from t; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [Case((t.v1 = 1:Int32), 1:Int32::Decimal, (t.v1 = 2.0:Decimal), 2:Int32::Decimal, 0.0:Decimal)] } + └─BatchProject { exprs: [Case((t.v1 = 1:Int32), 1:Int32::Decimal, (t.v1 = 2.0:Decimal), 2:Int32::Decimal, 0.0:Decimal) as $expr23] } └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } - name: case misaligned result types sql: | @@ -219,11 +213,11 @@ select nullif(v1, 1) as expr from t; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [Case((t.v1 = 1:Int32), null:Int32, t.v1)] } + └─BatchProject { exprs: [Case((t.v1 = 1:Int32), null:Int32, t.v1) as $expr23] } └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [expr, t._row_id(hidden)], pk_columns: [t._row_id] } - └─StreamProject { exprs: [Case((t.v1 = 1:Int32), null:Int32, t.v1), t._row_id] } + └─StreamProject { exprs: [Case((t.v1 = 1:Int32), null:Int32, t.v1) as $expr46, t._row_id] } └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - sql: | create table t (v1 int); @@ -240,18 +234,18 @@ select coalesce(v1, 1) as expr from t; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [Coalesce(t.v1, 1:Int32)] } + └─BatchProject { exprs: [Coalesce(t.v1, 1:Int32) as $expr23] } └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [expr, t._row_id(hidden)], pk_columns: [t._row_id] } - └─StreamProject { exprs: [Coalesce(t.v1, 1:Int32), t._row_id] } + └─StreamProject { exprs: [Coalesce(t.v1, 1:Int32) as $expr46, t._row_id] } └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - sql: | create table t (v1 int); select coalesce(v1, 1.2) from t; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [Coalesce(t.v1::Decimal, 1.2:Decimal)] } + └─BatchProject { exprs: [Coalesce(t.v1::Decimal, 1.2:Decimal) as $expr23] } └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } - sql: | create table t (v1 int); @@ -266,18 +260,18 @@ select concat_ws(v1, 1) as expr from t; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [ConcatWs(t.v1, 1:Int32::Varchar)] } + └─BatchProject { exprs: [ConcatWs(t.v1, 1:Int32::Varchar) as $expr23] } └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [expr, t._row_id(hidden)], pk_columns: [t._row_id] } - └─StreamProject { exprs: [ConcatWs(t.v1, 1:Int32::Varchar), t._row_id] } + └─StreamProject { exprs: [ConcatWs(t.v1, 1:Int32::Varchar) as $expr46, t._row_id] } └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - sql: | create table t (v1 varchar); select concat_ws(v1, 1.2) from t; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [ConcatWs(t.v1, 1.2:Decimal::Varchar)] } + └─BatchProject { exprs: [ConcatWs(t.v1, 1.2:Decimal::Varchar) as $expr23] } └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } - sql: | create table t (v1 int); @@ -292,18 +286,18 @@ select concat(v1, v2, v3, 1) as expr from t; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [ConcatWs('':Varchar, t.v1, t.v2::Varchar, t.v3::Varchar, 1:Int32::Varchar)] } + └─BatchProject { exprs: [ConcatWs('':Varchar, t.v1, t.v2::Varchar, t.v3::Varchar, 1:Int32::Varchar) as $expr23] } └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [expr, t._row_id(hidden)], pk_columns: [t._row_id] } - └─StreamProject { exprs: [ConcatWs('':Varchar, t.v1, t.v2::Varchar, t.v3::Varchar, 1:Int32::Varchar), t._row_id] } + └─StreamProject { exprs: [ConcatWs('':Varchar, t.v1, t.v2::Varchar, t.v3::Varchar, 1:Int32::Varchar) as $expr46, t._row_id] } └─StreamTableScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - sql: | create table t (v1 float); select concat(v1) from t; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [ConcatWs('':Varchar, t.v1::Varchar)] } + └─BatchProject { exprs: [ConcatWs('':Varchar, t.v1::Varchar) as $expr23] } └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } - sql: | create table t (v1 int); @@ -312,75 +306,66 @@ - sql: | select concat(':', true); batch_plan: | - BatchProject { exprs: [ConcatWs('':Varchar, ':':Varchar, BoolOut(true:Boolean))] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[ConcatWs('':Varchar, ':':Varchar, BoolOut(true:Boolean))]] } - sql: | select ':' || true; batch_plan: | - BatchProject { exprs: [ConcatOp(':':Varchar, true:Boolean::Varchar)] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[ConcatOp(':':Varchar, true:Boolean::Varchar)]] } - sql: | select substr('hello', NULL); batch_plan: | - BatchProject { exprs: [Substr('hello':Varchar, null:Int32)] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[Substr('hello':Varchar, null:Int32)]] } - sql: | select substr(NULL, 1); batch_plan: | - BatchProject { exprs: [Substr(null:Varchar, 1:Int32)] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[Substr(null:Varchar, 1:Int32)]] } - sql: | select pg_typeof('123'); batch_plan: | - BatchProject { exprs: ['unknown':Varchar] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [['unknown':Varchar]] } - sql: | select pg_typeof(round(null)); batch_plan: | - BatchProject { exprs: ['double precision':Varchar] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [['double precision':Varchar]] } - sql: | select pg_typeof(row(true, 1, 'hello')); batch_plan: | - BatchProject { exprs: ['record':Varchar] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [['record':Varchar]] } - sql: | select pg_typeof(array[1, 2]); batch_plan: | - BatchProject { exprs: ['integer[]':Varchar] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [['integer[]':Varchar]] } - sql: | select pg_typeof(array[array[1, 2], array[3, 4]]); batch_plan: | - BatchProject { exprs: ['integer[][]':Varchar] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [['integer[][]':Varchar]] } - sql: | select 1 = SOME(1); binder_error: 'Bind error: op ANY/ALL (array) requires array on right side' - sql: | select 1 < SOME(null); logical_plan: | - LogicalProject { exprs: [Some((1:Int32 < null:List { datatype: Int32 }))] } + LogicalProject { exprs: [Some((1:Int32 < null:List { datatype: Int32 })) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - sql: | select 1 < ANY(null); logical_plan: | - LogicalProject { exprs: [Some((1:Int32 < null:List { datatype: Int32 }))] } + LogicalProject { exprs: [Some((1:Int32 < null:List { datatype: Int32 })) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - sql: | select 1 < ALL(null); logical_plan: | - LogicalProject { exprs: [All((1:Int32 < null:List { datatype: Int32 }))] } + LogicalProject { exprs: [All((1:Int32 < null:List { datatype: Int32 })) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - sql: | select 1 < ALL('{2,3}'); logical_plan: | - LogicalProject { exprs: [All((1:Int32 < '{2,3}':Varchar::List { datatype: Int32 }))] } + LogicalProject { exprs: [All((1:Int32 < '{2,3}':Varchar::List { datatype: Int32 })) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - sql: | select 1 < SOME(null::integer[]); logical_plan: | - LogicalProject { exprs: [Some((1:Int32 < null:List { datatype: Int32 }))] } + LogicalProject { exprs: [Some((1:Int32 < null:List { datatype: Int32 })) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - sql: | select 1 < SOME(null::varchar[]); @@ -405,51 +390,49 @@ - sql: | select 1 < SOME(array[null]::integer[]); logical_plan: | - LogicalProject { exprs: [Some((1:Int32 < Array(null:Int32)))] } + LogicalProject { exprs: [Some((1:Int32 < Array(null:Int32))) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - sql: | select 1 < SOME(array[1, 2]); logical_plan: | - LogicalProject { exprs: [Some((1:Int32 < Array(1:Int32, 2:Int32)))] } + LogicalProject { exprs: [Some((1:Int32 < Array(1:Int32, 2:Int32))) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - sql: | select 1 < SOME((select array[1]) || array[2]); logical_plan: | - LogicalProject { exprs: [Some((1:Int32 < ArrayCat(Array(1:Int32), Array(2:Int32))))] } + LogicalProject { exprs: [Some((1:Int32 < ArrayCat($expr1, Array(2:Int32)))) as $expr2] } └─LogicalApply { type: LeftOuter, on: true, correlated_id: 1, max_one_row: true } ├─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - └─LogicalProject { exprs: [Array(1:Int32)] } + └─LogicalProject { exprs: [Array(1:Int32) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } batch_plan: | - BatchProject { exprs: [Some((1:Int32 < ArrayCat(Array(1:Int32), Array(2:Int32))))] } + BatchProject { exprs: [Some((1:Int32 < ArrayCat($expr33, Array(2:Int32)))) as $expr40] } └─BatchNestedLoopJoin { type: LeftOuter, predicate: true, output: all } ├─BatchValues { rows: [[]] } - └─BatchProject { exprs: [Array(1:Int32)] } - └─BatchValues { rows: [[]] } + └─BatchValues { rows: [[Array(1:Int32)]] } - sql: | select 1 < ALL(array[null]::integer[]); logical_plan: | - LogicalProject { exprs: [All((1:Int32 < Array(null:Int32)))] } + LogicalProject { exprs: [All((1:Int32 < Array(null:Int32))) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - sql: | select 1 < ALL(array[1, 2]); logical_plan: | - LogicalProject { exprs: [All((1:Int32 < Array(1:Int32, 2:Int32)))] } + LogicalProject { exprs: [All((1:Int32 < Array(1:Int32, 2:Int32))) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - sql: | select 1 < ALL((select array[1]) || array[2]); logical_plan: | - LogicalProject { exprs: [All((1:Int32 < ArrayCat(Array(1:Int32), Array(2:Int32))))] } + LogicalProject { exprs: [All((1:Int32 < ArrayCat($expr1, Array(2:Int32)))) as $expr2] } └─LogicalApply { type: LeftOuter, on: true, correlated_id: 1, max_one_row: true } ├─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - └─LogicalProject { exprs: [Array(1:Int32)] } + └─LogicalProject { exprs: [Array(1:Int32) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } batch_plan: | - BatchProject { exprs: [All((1:Int32 < ArrayCat(Array(1:Int32), Array(2:Int32))))] } + BatchProject { exprs: [All((1:Int32 < ArrayCat($expr33, Array(2:Int32)))) as $expr40] } └─BatchNestedLoopJoin { type: LeftOuter, predicate: true, output: all } ├─BatchValues { rows: [[]] } - └─BatchProject { exprs: [Array(1:Int32)] } - └─BatchValues { rows: [[]] } + └─BatchValues { rows: [[Array(1:Int32)]] } - name: now expression sql: | create table t (v1 timestamp with time zone); @@ -462,7 +445,8 @@ StreamMaterialize { columns: [v1, t._row_id(hidden)], pk_columns: [t._row_id] } └─StreamDynamicFilter { predicate: (t.v1 >= now), output: [t.v1, t._row_id] } ├─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamNow { output: [now] } + └─StreamExchange { dist: Broadcast } + └─StreamNow { output: [now] } - name: now expression with proj sql: | create table t (v1 timestamp with time zone); @@ -473,10 +457,11 @@ └─LogicalScan { table: t, columns: [t.v1, t._row_id] } stream_plan: | StreamMaterialize { columns: [v1, t._row_id(hidden)], pk_columns: [t._row_id] } - └─StreamDynamicFilter { predicate: (t.v1 >= (now - '00:00:02':Interval)), output: [t.v1, t._row_id] } + └─StreamDynamicFilter { predicate: (t.v1 >= $expr12), output: [t.v1, t._row_id] } ├─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamProject { exprs: [(now - '00:00:02':Interval)], watermark_columns: [(now - '00:00:02':Interval)] } - └─StreamNow { output: [now] } + └─StreamExchange { dist: Broadcast } + └─StreamProject { exprs: [(now - '00:00:02':Interval) as $expr12], watermark_columns: [(now - '00:00:02':Interval)] } + └─StreamNow { output: [now] } - name: and of two now expression condition sql: | create table t (v1 timestamp with time zone, v2 timestamp with time zone); @@ -486,13 +471,15 @@ └─StreamDynamicFilter { predicate: (t.v2 >= now), output: [t.v1, t.v2, t._row_id] } ├─StreamDynamicFilter { predicate: (t.v1 >= now), output: [t.v1, t.v2, t._row_id] } | ├─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - | └─StreamNow { output: [now] } - └─StreamNow { output: [now] } + | └─StreamExchange { dist: Broadcast } + | └─StreamNow { output: [now] } + └─StreamExchange { dist: Broadcast } + └─StreamNow { output: [now] } - name: or of two now expression condition sql: | create table t (v1 timestamp with time zone, v2 timestamp with time zone); select * from t where v1 >= now() or v2 >= now(); - stream_error: 'Expr error: Invalid parameter now: now expression must be placed in a comparison' + stream_error: 'internal error: Conditions containing now must be of the form `input_expr cmp now() [+- const_expr]` or `now() [+- const_expr] cmp input_expr`, where `input_expr` references a column and contains no `now()`.' - name: now inside HAVING clause sql: | create table t (v1 timestamp with time zone, v2 int); @@ -500,12 +487,13 @@ stream_plan: | StreamMaterialize { columns: [max_time, t.v2(hidden)], pk_columns: [t.v2] } └─StreamProject { exprs: [max(t.v1), t.v2] } - └─StreamDynamicFilter { predicate: (max(t.v1) >= now), output: [t.v2, max(t.v1), max(t.v1)] } - ├─StreamProject { exprs: [t.v2, max(t.v1), max(t.v1)] } + └─StreamDynamicFilter { predicate: (max(t.v1) >= now), output: [max(t.v1), max(t.v1), t.v2] } + ├─StreamProject { exprs: [max(t.v1), max(t.v1), t.v2] } | └─StreamHashAgg { group_key: [t.v2], aggs: [count, max(t.v1), max(t.v1)] } | └─StreamExchange { dist: HashShard(t.v2) } | └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamNow { output: [now] } + └─StreamExchange { dist: Broadcast } + └─StreamNow { output: [now] } - name: forbid now in group by for stream sql: | create table t (v1 timestamp with time zone, v2 int); @@ -521,3 +509,30 @@ create table t (v1 timestamp with time zone, v2 int); select sum(v2) filter (where v1 >= now()) as sum_v2 from t; stream_error: 'Invalid input syntax: For creation of materialized views, `NOW()` function is only allowed in `WHERE` and `HAVING`. Found in clause: Some(Filter)' +- name: typo pg_teminate_backend + sql: | + select pg_teminate_backend(1); + binder_error: |- + Feature is not yet implemented: unsupported function "pg_teminate_backend", do you mean "pg_terminate_backend"? + Tracking issue: https://github.com/risingwavelabs/risingwave/issues/112 +- name: regression (#7571) - literal debug display for array with NULL values + sql: | + select ARRAY[1, null] t; + logical_plan: | + LogicalProject { exprs: [Array(1:Int32, null:Int32) as $expr1] } + └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } +- name: regression (#7641) -fuzzing test failed at Bind error,types Boolean and Varchar cannot be matched + sql: | + select false >= 'LN1O0QP1yi' NOT IN (md5('4SeUPZhUbH')) + batch_plan: | + BatchValues { rows: [[(false:Boolean >= Not(In('LN1O0QP1yi':Varchar, Md5('4SeUPZhUbH':Varchar))))]] } +- name: const_eval of const expr + sql: | + create table t(v1 int); + select 1 + 2 + v1 from t; + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchProject { exprs: [((1:Int32 + 2:Int32) + t.v1) as $expr23] } + └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } +- name: const_eval of division by 0 error + sql: select 1 / 0 t1; diff --git a/src/frontend/planner_test/tests/testdata/index.yaml b/src/frontend/planner_test/tests/testdata/index.yaml deleted file mode 100644 index 0f278a480ec7b..0000000000000 --- a/src/frontend/planner_test/tests/testdata/index.yaml +++ /dev/null @@ -1,45 +0,0 @@ -# This file is automatically generated. See `src/frontend/planner_test/README.md` for more information. -- sql: | - create table t1 (v1 int, v2 float); - create table t2 (v3 int, v4 numeric, v5 bigint); - create index t1_v1 on t1(v1) include(v2); - create index t2_v3 on t2(v3) include(v4, v5); - /* should generate delta join plan, and stream index scan */ - select * from t1, t2 where t1.v1 = t2.v3; - stream_plan: | - StreamMaterialize { columns: [v1, v2, v3, v4, v5, t1._row_id(hidden), t2._row_id(hidden)], pk_columns: [t1._row_id, t2._row_id, v1, v3] } - └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t2.v5, t1._row_id, t2._row_id] } - ├─StreamExchange { dist: HashShard(t1.v1) } - | └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamExchange { dist: HashShard(t2.v3) } - └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2.v5, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } -- id: index_slt - sql: | - create table iii_t1 (v1 int, v2 int); - create table iii_t2 (v3 int, v4 int); - create table iii_t3 (v5 int, v6 int); - create materialized view iii_mv1 as select * from iii_t1, iii_t2, iii_t3 where iii_t1.v1 = iii_t2.v3 and iii_t1.v1 = iii_t3.v5; - create index iii_index_1 on iii_t1(v1) include(v2); - create index iii_index_2 on iii_t2(v3) include(v4); -- before: - - index_slt - sql: | - select * from iii_t1, iii_t2 where iii_t1.v1 = iii_t2.v3; - stream_plan: | - StreamMaterialize { columns: [v1, v2, v3, v4, iii_t1._row_id(hidden), iii_t2._row_id(hidden)], pk_columns: [iii_t1._row_id, iii_t2._row_id, v1, v3] } - └─StreamHashJoin { type: Inner, predicate: iii_t1.v1 = iii_t2.v3, output: [iii_t1.v1, iii_t1.v2, iii_t2.v3, iii_t2.v4, iii_t1._row_id, iii_t2._row_id] } - ├─StreamExchange { dist: HashShard(iii_t1.v1) } - | └─StreamTableScan { table: iii_t1, columns: [iii_t1.v1, iii_t1.v2, iii_t1._row_id], pk: [iii_t1._row_id], dist: UpstreamHashShard(iii_t1._row_id) } - └─StreamExchange { dist: HashShard(iii_t2.v3) } - └─StreamTableScan { table: iii_t2, columns: [iii_t2.v3, iii_t2.v4, iii_t2._row_id], pk: [iii_t2._row_id], dist: UpstreamHashShard(iii_t2._row_id) } -- before: - - index_slt - sql: | - select v4 from iii_t1, iii_t2 where iii_t1.v1 = iii_t2.v3; - stream_plan: | - StreamMaterialize { columns: [v4, iii_t1._row_id(hidden), iii_t1.v1(hidden), iii_t2._row_id(hidden), iii_t2.v3(hidden)], pk_columns: [iii_t1._row_id, iii_t2._row_id, iii_t1.v1, iii_t2.v3] } - └─StreamHashJoin { type: Inner, predicate: iii_t1.v1 = iii_t2.v3, output: [iii_t2.v4, iii_t1._row_id, iii_t1.v1, iii_t2._row_id, iii_t2.v3] } - ├─StreamExchange { dist: HashShard(iii_t1.v1) } - | └─StreamTableScan { table: iii_t1, columns: [iii_t1.v1, iii_t1._row_id], pk: [iii_t1._row_id], dist: UpstreamHashShard(iii_t1._row_id) } - └─StreamExchange { dist: HashShard(iii_t2.v3) } - └─StreamTableScan { table: iii_t2, columns: [iii_t2.v3, iii_t2.v4, iii_t2._row_id], pk: [iii_t2._row_id], dist: UpstreamHashShard(iii_t2._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/index_selection.yaml b/src/frontend/planner_test/tests/testdata/index_selection.yaml index af18731a327f8..0e28d25116c0c 100644 --- a/src/frontend/planner_test/tests/testdata/index_selection.yaml +++ b/src/frontend/planner_test/tests/testdata/index_selection.yaml @@ -578,3 +578,45 @@ batch_plan: | BatchExchange { order: [], dist: Single } └─BatchScan { table: idx1, columns: [idx1.a, idx1.b, idx1.c], scan_ranges: [idx1.a = Int32(1)], distribution: UpstreamHashShard(idx1.a) } +- name: topn on index + sql: | + create table t1 (a int, b int); + create index idx1 on t1(a); + select * from t1 order by a limit 1 + batch_plan: | + BatchLimit { limit: 1, offset: 0 } + └─BatchExchange { order: [idx1.a ASC], dist: Single } + └─BatchLimit { limit: 1, offset: 0 } + └─BatchScan { table: idx1, columns: [idx1.a, idx1.b], distribution: UpstreamHashShard(idx1.a) } +- name: topn on primary key + sql: | + create table t1 (a int primary key, b int); + create index idx1 on t1(a); + select * from t1 order by a limit 1 + batch_plan: | + BatchLimit { limit: 1, offset: 0 } + └─BatchExchange { order: [t1.a ASC], dist: Single } + └─BatchLimit { limit: 1, offset: 0 } + └─BatchScan { table: t1, columns: [t1.a, t1.b], distribution: UpstreamHashShard(t1.a) } +- name: topn on index with descending ordering + sql: | + create table t1 (a int, b int); + create index idx1 on t1(a desc); + select * from t1 order by a desc limit 1 + batch_plan: | + BatchLimit { limit: 1, offset: 0 } + └─BatchExchange { order: [idx1.a DESC], dist: Single } + └─BatchLimit { limit: 1, offset: 0 } + └─BatchScan { table: idx1, columns: [idx1.a, idx1.b], distribution: UpstreamHashShard(idx1.a) } +- name: topn on pk streaming case, should NOT optimized + sql: | + create table t1 (a int primary key); + select * from t1 order by a limit 1; + stream_plan: | + StreamMaterialize { columns: [a], pk_columns: [a] } + └─StreamProject { exprs: [t1.a] } + └─StreamTopN { order: "[t1.a ASC]", limit: 1, offset: 0 } + └─StreamExchange { dist: Single } + └─StreamGroupTopN { order: "[t1.a ASC]", limit: 1, offset: 0, group_key: [1] } + └─StreamProject { exprs: [t1.a, Vnode(t1.a) as $expr1] } + └─StreamTableScan { table: t1, columns: [t1.a], pk: [t1.a], dist: UpstreamHashShard(t1.a) } diff --git a/src/frontend/planner_test/tests/testdata/insert.yaml b/src/frontend/planner_test/tests/testdata/insert.yaml index aa67c803d90a2..8c138433b476c 100644 --- a/src/frontend/planner_test/tests/testdata/insert.yaml +++ b/src/frontend/planner_test/tests/testdata/insert.yaml @@ -15,6 +15,67 @@ BatchExchange { order: [], dist: Single } └─BatchInsert { table: t } └─BatchValues { rows: [[22.33:Decimal::Float32, 33:Int32], [44:Int32::Float32, 55.0:Decimal::Int32]] } +- name: prohibit inserting different number of nulls per row + sql: | + create table t (v1 real, v2 int, v3 varchar); + insert into t values (1, 2), (3, 4, 5); + binder_error: 'Bind error: VALUES lists must all be the same length' +- name: insert values with implicit null (single row) + sql: | + create table t (v1 real, v2 int, v3 varchar); + insert into t values (1, 2); + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchInsert { table: t } + └─BatchValues { rows: [[1:Int32::Float32, 2:Int32, null:Varchar]] } +- name: insert values with implicit null (multiple rows) + sql: | + create table t (v1 real, v2 int, v3 varchar); + insert into t values (1, 2), (3, 4); + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchInsert { table: t } + └─BatchValues { rows: [[1:Int32::Float32, 2:Int32, null:Varchar], [3:Int32::Float32, 4:Int32, null:Varchar]] } +- name: implicit null user defined columns 1 + sql: | + create table t (v1 int, v2 int); + insert into t (v1) values (5); + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchInsert { table: t } + └─BatchValues { rows: [[5:Int32, null:Int32]] } +- name: implicit null user defined columns 2 + sql: | + create table t (v1 int, v2 int); + insert into t (v2) values (6); + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchInsert { table: t } + └─BatchValues { rows: [[6:Int32, null:Int32]] } +- name: implicit null user defined columns 3 + sql: | + create table t (v1 int, v2 int, v3 int); + insert into t (v3) values (6); + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchInsert { table: t } + └─BatchValues { rows: [[6:Int32, null:Int32, null:Int32]] } +- name: implicit null user defined columns 4 + sql: | + create table t (v1 int, v2 int, v3 int); + insert into t (v1) values (6); + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchInsert { table: t } + └─BatchValues { rows: [[6:Int32, null:Int32, null:Int32]] } +- name: implicit null user defined columns 5 + sql: | + create table t (v1 int, v2 int, v3 int); + insert into t (v2) values (6); + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchInsert { table: t } + └─BatchValues { rows: [[6:Int32, null:Int32, null:Int32]] } - name: insert values on non-assign-castable types sql: | create table t (v1 real, v2 int); @@ -53,11 +114,6 @@ create table t (v1 int, v2 int); insert into t (v1) values (5, 6); binder_error: 'Bind error: INSERT has less target columns than values' -- name: insert values mismatch columns length - sql: | - create table t (v1 real, v2 int, v3 varchar); - insert into t values (1, 2), (3, 4); - binder_error: 'Bind error: INSERT has more target columns than expressions' - name: insert literal null sql: | create table t(v1 int); @@ -115,7 +171,7 @@ BatchExchange { order: [], dist: Single } └─BatchInsert { table: t } └─BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: ['2020-01-01 01:02:03':Timestamp::Time, 11:Int32, 4.5:Decimal::Float32] } + └─BatchProject { exprs: ['2020-01-01 01:02:03':Timestamp::Time as $expr47, 11:Int32, 4.5:Decimal::Float32 as $expr48] } └─BatchScan { table: t, columns: [], distribution: SomeShard } - name: insert into select with cast error sql: | @@ -147,12 +203,12 @@ create table t (a int, b int); insert into t values (0,1), (1,2) returning *, a, a+b; logical_plan: | - LogicalProject { exprs: [*VALUES*_0.column_0, *VALUES*_0.column_1, *VALUES*_0.column_0, (*VALUES*_0.column_0 + *VALUES*_0.column_1)] } + LogicalProject { exprs: [*VALUES*_0.column_0, *VALUES*_0.column_1, *VALUES*_0.column_0, (*VALUES*_0.column_0 + *VALUES*_0.column_1) as $expr1] } └─LogicalInsert { table: t, returning: true } └─LogicalValues { rows: [[0:Int32, 1:Int32], [1:Int32, 2:Int32]], schema: Schema { fields: [*VALUES*_0.column_0:Int32, *VALUES*_0.column_1:Int32] } } batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [*VALUES*_0.column_0, *VALUES*_0.column_1, *VALUES*_0.column_0, (*VALUES*_0.column_0 + *VALUES*_0.column_1)] } + └─BatchProject { exprs: [*VALUES*_0.column_0, *VALUES*_0.column_1, *VALUES*_0.column_0, (*VALUES*_0.column_0 + *VALUES*_0.column_1) as $expr23] } └─BatchInsert { table: t, returning: true } └─BatchValues { rows: [[0:Int32, 1:Int32], [1:Int32, 2:Int32]] } - name: insert with select and returning @@ -160,14 +216,14 @@ create table t (a int, b int); insert into t select distinct * from t returning *, a, a+b; logical_plan: | - LogicalProject { exprs: [t.a, t.b, t.a, (t.a + t.b)] } + LogicalProject { exprs: [t.a, t.b, t.a, (t.a + t.b) as $expr1] } └─LogicalInsert { table: t, returning: true } └─LogicalAgg { group_key: [t.a, t.b], aggs: [] } └─LogicalProject { exprs: [t.a, t.b] } └─LogicalScan { table: t, columns: [t.a, t.b, t._row_id] } batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [t.a, t.b, t.a, (t.a + t.b)] } + └─BatchProject { exprs: [t.a, t.b, t.a, (t.a + t.b) as $expr23] } └─BatchInsert { table: t, returning: true } └─BatchExchange { order: [], dist: Single } └─BatchHashAgg { group_key: [t.a, t.b], aggs: [] } diff --git a/src/frontend/planner_test/tests/testdata/join.yaml b/src/frontend/planner_test/tests/testdata/join.yaml index d75814194dcbf..28b470b1e124e 100644 --- a/src/frontend/planner_test/tests/testdata/join.yaml +++ b/src/frontend/planner_test/tests/testdata/join.yaml @@ -186,7 +186,7 @@ using (x); batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [Coalesce(i.x, i.x)] } + └─BatchProject { exprs: [Coalesce(i.x, i.x) as $expr23] } └─BatchHashJoin { type: FullOuter, predicate: i.x = i.x, output: all } ├─BatchExchange { order: [], dist: HashShard(i.x) } | └─BatchLookupJoin { type: Inner, predicate: i.x = i.x, output: [i.x] } @@ -197,7 +197,7 @@ stream_plan: | StreamMaterialize { columns: [x, i.t._row_id(hidden), i.t._row_id#1(hidden), i.x(hidden), i.x#1(hidden), i.t._row_id#2(hidden), i.t._row_id#3(hidden), i.x#2(hidden), i.x#3(hidden)], pk_columns: [i.t._row_id, i.t._row_id#1, i.x, i.x#1, i.t._row_id#2, i.t._row_id#3, i.x#2, i.x#3] } └─StreamExchange { dist: HashShard(i.t._row_id, i.t._row_id, i.x, i.x, i.t._row_id, i.t._row_id, i.x, i.x) } - └─StreamProject { exprs: [Coalesce(i.x, i.x), i.t._row_id, i.t._row_id, i.x, i.x, i.t._row_id, i.t._row_id, i.x, i.x] } + └─StreamProject { exprs: [Coalesce(i.x, i.x) as $expr46, i.t._row_id, i.t._row_id, i.x, i.x, i.t._row_id, i.t._row_id, i.x, i.x] } └─StreamHashJoin { type: FullOuter, predicate: i.x = i.x, output: [i.x, i.x, i.t._row_id, i.t._row_id, i.x, i.t._row_id, i.t._row_id, i.x] } ├─StreamHashJoin { type: Inner, predicate: i.x = i.x, output: [i.x, i.t._row_id, i.t._row_id, i.x] } | ├─StreamExchange { dist: HashShard(i.x) } @@ -383,7 +383,7 @@ create table b(x int); select x, a.x, b.x from a natural full join b; logical_plan: | - LogicalProject { exprs: [Coalesce(a.x, b.x), a.x, b.x] } + LogicalProject { exprs: [Coalesce(a.x, b.x) as $expr1, a.x, b.x] } └─LogicalJoin { type: FullOuter, on: (a.x = b.x), output: all } ├─LogicalScan { table: a, columns: [a.x, a._row_id] } └─LogicalScan { table: b, columns: [b.x, b._row_id] } @@ -407,21 +407,20 @@ create table c(x int, a int); select x, a.x, b.x, c.x from a natural full join b natural full join c; logical_plan: | - LogicalProject { exprs: [Coalesce(a.x, b.x, c.x), a.x, b.x, c.x] } + LogicalProject { exprs: [Coalesce(a.x, b.x, c.x) as $expr1, a.x, b.x, c.x] } └─LogicalJoin { type: FullOuter, on: (Coalesce(a.x, b.x) = c.x), output: all } ├─LogicalJoin { type: FullOuter, on: (a.x = b.x), output: all } | ├─LogicalScan { table: a, columns: [a.x, a.y, a._row_id] } | └─LogicalScan { table: b, columns: [b.x, b.z, b._row_id] } └─LogicalScan { table: c, columns: [c.x, c.a, c._row_id] } optimized_logical_plan: | - LogicalProject { exprs: [Coalesce(a.x, b.x, c.x), a.x, b.x, c.x] } - └─LogicalJoin { type: FullOuter, on: (Coalesce(a.x, b.x) = c.x), output: [a.x, b.x, c.x] } - ├─LogicalProject { exprs: [a.x, b.x, Coalesce(a.x, b.x)] } + LogicalProject { exprs: [Coalesce(a.x, b.x, c.x) as $expr28, a.x, b.x, c.x] } + └─LogicalJoin { type: FullOuter, on: ($expr27 = c.x), output: [a.x, b.x, c.x] } + ├─LogicalProject { exprs: [a.x, b.x, Coalesce(a.x, b.x) as $expr27] } | └─LogicalJoin { type: FullOuter, on: (a.x = b.x), output: all } | ├─LogicalScan { table: a, columns: [a.x] } | └─LogicalScan { table: b, columns: [b.x] } - └─LogicalProject { exprs: [c.x, c.x] } - └─LogicalScan { table: c, columns: [c.x] } + └─LogicalScan { table: c, columns: [c.x] } - name: Ensure that nested natural joins bind and disambiguate columns sql: | create table a(a int, y int); @@ -429,7 +428,7 @@ create table c(x int, a int); select a, x, a.a, c.a, b.x, c.x from a natural full join b natural full join c; logical_plan: | - LogicalProject { exprs: [Coalesce(a.a, c.a), Coalesce(b.x, c.x), a.a, c.a, b.x, c.x] } + LogicalProject { exprs: [Coalesce(a.a, c.a) as $expr1, Coalesce(b.x, c.x) as $expr2, a.a, c.a, b.x, c.x] } └─LogicalJoin { type: FullOuter, on: (a.a = c.a) AND (b.x = c.x), output: all } ├─LogicalJoin { type: FullOuter, on: true, output: all } | ├─LogicalScan { table: a, columns: [a.a, a.y, a._row_id] } @@ -470,7 +469,7 @@ create table t3(x int); select *, x, t1.x, t2.x, t3.x from t1 full join (t2 full join t3 using (x)) using (x); logical_plan: | - LogicalProject { exprs: [Coalesce(t1.x, t2.x, t3.x), Coalesce(t1.x, t2.x, t3.x), t1.x, t2.x, t3.x] } + LogicalProject { exprs: [Coalesce(t1.x, t2.x, t3.x) as $expr1, Coalesce(t1.x, t2.x, t3.x) as $expr2, t1.x, t2.x, t3.x] } └─LogicalJoin { type: FullOuter, on: (t1.x = Coalesce(t2.x, t3.x)), output: all } ├─LogicalScan { table: t1, columns: [t1.x, t1._row_id] } └─LogicalJoin { type: FullOuter, on: (t2.x = t3.x), output: all } @@ -482,17 +481,17 @@ create table b(x int); select 2 * x as Y, x + x as Z from a natural full join b where 2 * x < 10 order by x + x; logical_plan: | - LogicalProject { exprs: [(2:Int32 * Coalesce(a.x, b.x)), (Coalesce(a.x, b.x) + Coalesce(a.x, b.x))] } - └─LogicalProject { exprs: [(2:Int32 * Coalesce(a.x, b.x)), (Coalesce(a.x, b.x) + Coalesce(a.x, b.x)), (Coalesce(a.x, b.x) + Coalesce(a.x, b.x))] } + LogicalProject { exprs: [$expr1, $expr2] } + └─LogicalProject { exprs: [(2:Int32 * Coalesce(a.x, b.x)) as $expr1, (Coalesce(a.x, b.x) + Coalesce(a.x, b.x)) as $expr2, (Coalesce(a.x, b.x) + Coalesce(a.x, b.x)) as $expr3] } └─LogicalFilter { predicate: ((2:Int32 * Coalesce(a.x, b.x)) < 10:Int32) } └─LogicalJoin { type: FullOuter, on: (a.x = b.x), output: all } ├─LogicalScan { table: a, columns: [a.x, a._row_id] } └─LogicalScan { table: b, columns: [b.x, b._row_id] } batch_plan: | - BatchProject { exprs: [(2:Int32 * Coalesce(a.x, b.x)), (Coalesce(a.x, b.x) + Coalesce(a.x, b.x))] } - └─BatchExchange { order: [(Coalesce(a.x, b.x) + Coalesce(a.x, b.x)) ASC], dist: Single } - └─BatchSort { order: [(Coalesce(a.x, b.x) + Coalesce(a.x, b.x)) ASC] } - └─BatchProject { exprs: [(2:Int32 * Coalesce(a.x, b.x)), (Coalesce(a.x, b.x) + Coalesce(a.x, b.x)), (Coalesce(a.x, b.x) + Coalesce(a.x, b.x))] } + BatchProject { exprs: [$expr67, $expr68] } + └─BatchExchange { order: [$expr69 ASC], dist: Single } + └─BatchSort { order: [$expr69 ASC] } + └─BatchProject { exprs: [(2:Int32 * Coalesce(a.x, b.x)) as $expr67, (Coalesce(a.x, b.x) + Coalesce(a.x, b.x)) as $expr68, (Coalesce(a.x, b.x) + Coalesce(a.x, b.x)) as $expr69] } └─BatchFilter { predicate: ((2:Int32 * Coalesce(a.x, b.x)) < 10:Int32) } └─BatchHashJoin { type: FullOuter, predicate: a.x = b.x, output: all } ├─BatchExchange { order: [], dist: HashShard(a.x) } @@ -500,9 +499,9 @@ └─BatchExchange { order: [], dist: HashShard(b.x) } └─BatchScan { table: b, columns: [b.x], distribution: SomeShard } stream_plan: | - StreamMaterialize { columns: [y, z, (Coalesce(a.x, b.x) + Coalesce(a.x, b.x))(hidden), a._row_id(hidden), b._row_id(hidden), a.x(hidden), b.x(hidden)], pk_columns: [a._row_id, b._row_id, a.x, b.x], order_descs: [(Coalesce(a.x, b.x) + Coalesce(a.x, b.x)), a._row_id, b._row_id, a.x, b.x] } + StreamMaterialize { columns: [y, z, $expr138(hidden), a._row_id(hidden), b._row_id(hidden), a.x(hidden), b.x(hidden)], pk_columns: [a._row_id, b._row_id, a.x, b.x], order_descs: [$expr138, a._row_id, b._row_id, a.x, b.x] } └─StreamExchange { dist: HashShard(a._row_id, b._row_id, a.x, b.x) } - └─StreamProject { exprs: [(2:Int32 * Coalesce(a.x, b.x)), (Coalesce(a.x, b.x) + Coalesce(a.x, b.x)), (Coalesce(a.x, b.x) + Coalesce(a.x, b.x)), a._row_id, b._row_id, a.x, b.x] } + └─StreamProject { exprs: [(2:Int32 * Coalesce(a.x, b.x)) as $expr136, (Coalesce(a.x, b.x) + Coalesce(a.x, b.x)) as $expr137, (Coalesce(a.x, b.x) + Coalesce(a.x, b.x)) as $expr138, a._row_id, b._row_id, a.x, b.x] } └─StreamFilter { predicate: ((2:Int32 * Coalesce(a.x, b.x)) < 10:Int32) } └─StreamHashJoin { type: FullOuter, predicate: a.x = b.x, output: [a.x, b.x, a._row_id, b._row_id] } ├─StreamExchange { dist: HashShard(a.x) } @@ -559,10 +558,10 @@ create table t2(x int, y int); select * from t1, t2 where t1.x + t1.y = t2.x + t2.y; optimized_logical_plan: | - LogicalJoin { type: Inner, on: ((t1.x + t1.y) = (t2.x + t2.y)), output: [t1.x, t1.y, t2.x, t2.y] } - ├─LogicalProject { exprs: [t1.x, t1.y, (t1.x + t1.y)] } + LogicalJoin { type: Inner, on: ($expr17 = $expr18), output: [t1.x, t1.y, t2.x, t2.y] } + ├─LogicalProject { exprs: [t1.x, t1.y, (t1.x + t1.y) as $expr17] } | └─LogicalScan { table: t1, columns: [t1.x, t1.y] } - └─LogicalProject { exprs: [t2.x, t2.y, (t2.x + t2.y)] } + └─LogicalProject { exprs: [t2.x, t2.y, (t2.x + t2.y) as $expr18] } └─LogicalScan { table: t2, columns: [t2.x, t2.y] } - name: Use project to align return types sql: | @@ -570,11 +569,10 @@ create table t2(x int, y decimal); select * from t1, t2 where t1.x = t2.y; optimized_logical_plan: | - LogicalJoin { type: Inner, on: (t1.x::Decimal = t2.y), output: [t1.x, t1.y, t2.x, t2.y] } - ├─LogicalProject { exprs: [t1.x, t1.y, t1.x::Decimal] } + LogicalJoin { type: Inner, on: ($expr9 = t2.y), output: [t1.x, t1.y, t2.x, t2.y] } + ├─LogicalProject { exprs: [t1.x, t1.y, t1.x::Decimal as $expr9] } | └─LogicalScan { table: t1, columns: [t1.x, t1.y] } - └─LogicalProject { exprs: [t2.x, t2.y, t2.y] } - └─LogicalScan { table: t2, columns: [t2.x, t2.y] } + └─LogicalScan { table: t2, columns: [t2.x, t2.y] } - name: Lookup join with no eq keys after pulling up predicate will revert to hash join sql: | create table t1 (v1 int, v2 int); @@ -595,19 +593,17 @@ select * from t1 join t2 on v1 IS NOT DISTINCT FROM v2; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchHashJoin { type: Inner, predicate: t1.v1::Int64 IS NOT DISTINCT FROM t2.v2, output: [t1.v1, t2.v2] } - ├─BatchExchange { order: [], dist: HashShard(t1.v1::Int64) } - | └─BatchProject { exprs: [t1.v1, t1.v1::Int64] } + └─BatchHashJoin { type: Inner, predicate: $expr13 IS NOT DISTINCT FROM t2.v2, output: [t1.v1, t2.v2] } + ├─BatchExchange { order: [], dist: HashShard($expr13) } + | └─BatchProject { exprs: [t1.v1, t1.v1::Int64 as $expr13] } | └─BatchScan { table: t1, columns: [t1.v1], distribution: SomeShard } └─BatchExchange { order: [], dist: HashShard(t2.v2) } - └─BatchProject { exprs: [t2.v2, t2.v2] } - └─BatchScan { table: t2, columns: [t2.v2], distribution: SomeShard } + └─BatchScan { table: t2, columns: [t2.v2], distribution: SomeShard } stream_plan: | - StreamMaterialize { columns: [v1, v2, t1._row_id(hidden), t1.v1::Int64(hidden), t2._row_id(hidden), t2.v2(hidden)], pk_columns: [t1._row_id, t2._row_id, t1.v1::Int64, t2.v2] } - └─StreamHashJoin { type: Inner, predicate: t1.v1::Int64 IS NOT DISTINCT FROM t2.v2, output: [t1.v1, t2.v2, t1._row_id, t1.v1::Int64, t2._row_id, t2.v2] } - ├─StreamExchange { dist: HashShard(t1.v1::Int64) } - | └─StreamProject { exprs: [t1.v1, t1.v1::Int64, t1._row_id] } + StreamMaterialize { columns: [v1, v2, t1._row_id(hidden), $expr25(hidden), t2._row_id(hidden)], pk_columns: [t1._row_id, t2._row_id, $expr25, v2] } + └─StreamHashJoin { type: Inner, predicate: $expr25 IS NOT DISTINCT FROM t2.v2, output: [t1.v1, t2.v2, t1._row_id, $expr25, t2._row_id] } + ├─StreamExchange { dist: HashShard($expr25) } + | └─StreamProject { exprs: [t1.v1, t1.v1::Int64 as $expr25, t1._row_id] } | └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } └─StreamExchange { dist: HashShard(t2.v2) } - └─StreamProject { exprs: [t2.v2, t2.v2, t2._row_id] } - └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/mv_column_name.yaml b/src/frontend/planner_test/tests/testdata/mv_column_name.yaml index f45c81b9f046c..30fb7e5a15717 100644 --- a/src/frontend/planner_test/tests/testdata/mv_column_name.yaml +++ b/src/frontend/planner_test/tests/testdata/mv_column_name.yaml @@ -15,7 +15,7 @@ select a is null as is_null from t; stream_plan: | StreamMaterialize { columns: [is_null, t._row_id(hidden)], pk_columns: [t._row_id] } - └─StreamProject { exprs: [IsNull(t.a), t._row_id] } + └─StreamProject { exprs: [IsNull(t.a) as $expr24, t._row_id] } └─StreamTableScan { table: t, columns: [t.a, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - name: is_true with alias sql: | @@ -23,7 +23,7 @@ select a, a is true as is_true from t; stream_plan: | StreamMaterialize { columns: [a, is_true, t._row_id(hidden)], pk_columns: [t._row_id] } - └─StreamProject { exprs: [t.a, IsTrue(t.a), t._row_id] } + └─StreamProject { exprs: [t.a, IsTrue(t.a) as $expr24, t._row_id] } └─StreamTableScan { table: t, columns: [t.a, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - name: column name specified more than once sql: | @@ -57,6 +57,6 @@ └─StreamProject { exprs: [sum0(count), max(max(t.a))] } └─StreamGlobalSimpleAgg { aggs: [count, sum0(count), max(max(t.a))] } └─StreamExchange { dist: Single } - └─StreamHashAgg { group_key: [Vnode(t._row_id)], aggs: [count, count, max(t.a)] } - └─StreamProject { exprs: [t.a, t._row_id, Vnode(t._row_id)] } + └─StreamHashAgg { group_key: [$expr1], aggs: [count, count, max(t.a)] } + └─StreamProject { exprs: [t.a, t._row_id, Vnode(t._row_id) as $expr1] } └─StreamTableScan { table: t, columns: [t.a, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/nexmark.yaml b/src/frontend/planner_test/tests/testdata/nexmark.yaml index 15394b9ed0300..08756481b5706 100644 --- a/src/frontend/planner_test/tests/testdata/nexmark.yaml +++ b/src/frontend/planner_test/tests/testdata/nexmark.yaml @@ -33,14 +33,7 @@ "url" VARCHAR, "date_time" TIMESTAMP, "extra" VARCHAR - )with (appendonly = true); -- id: nexmark_q0 - before: - - create_tables - sql: | - CREATE MATERIALIZED VIEW nexmark_q0 - AS - SELECT auction, bidder, price, date_time FROM bid; + ) with (appendonly = true); - id: nexmark_q0 before: - create_tables @@ -73,17 +66,17 @@ FROM bid; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [bid.auction, bid.bidder, (0.908:Decimal * bid.price), bid.date_time] } + └─BatchProject { exprs: [bid.auction, bid.bidder, (0.908:Decimal * bid.price) as $expr23, bid.date_time] } └─BatchScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [auction, bidder, price, date_time, bid._row_id(hidden)], pk_columns: [bid._row_id] } - └─StreamProject { exprs: [bid.auction, bid.bidder, (0.908:Decimal * bid.price), bid.date_time, bid._row_id] } + └─StreamProject { exprs: [bid.auction, bid.bidder, (0.908:Decimal * bid.price) as $expr46, bid.date_time, bid._row_id] } └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } stream_dist_plan: | Fragment 0 StreamMaterialize { columns: [auction, bidder, price, date_time, bid._row_id(hidden)], pk_columns: [bid._row_id] } materialized table: 4294967294 - StreamProject { exprs: [bid.auction, bid.bidder, (0.908:Decimal * bid.price), bid.date_time, bid._row_id] } + StreamProject { exprs: [bid.auction, bid.bidder, (0.908:Decimal * bid.price) as $expr46, bid.date_time, bid._row_id] } Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } Upstream BatchPlanNode @@ -162,8 +155,8 @@ Table 0 { columns: [auction_id, auction_seller], primary key: [$1 ASC, $0 ASC], value indices: [0, 1], distribution key: [1] } Table 1 { columns: [auction_seller, auction_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 2 { columns: [person_id, person_name, person_city, person_state], primary key: [$0 ASC, $0 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } - Table 3 { columns: [person_id, person_id_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 2 { columns: [person_id, person_name, person_city, person_state], primary key: [$0 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } + Table 3 { columns: [person_id, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 4294967294 { columns: [name, city, state, id, auction.seller, person.id], primary key: [$3 ASC, $5 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [4] } - id: nexmark_q4 before: @@ -181,7 +174,7 @@ GROUP BY Q.category; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [auction.category, (sum(max(bid.price)) / count(max(bid.price)))] } + └─BatchProject { exprs: [auction.category, (sum(max(bid.price)) / count(max(bid.price))) as $expr23] } └─BatchHashAgg { group_key: [auction.category], aggs: [sum(max(bid.price)), count(max(bid.price))] } └─BatchExchange { order: [], dist: HashShard(auction.category) } └─BatchHashAgg { group_key: [auction.id, auction.category], aggs: [max(bid.price)] } @@ -194,7 +187,7 @@ └─BatchScan { table: bid, columns: [bid.auction, bid.price, bid.date_time], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [category, avg], pk_columns: [category] } - └─StreamProject { exprs: [auction.category, (sum(max(bid.price)) / count(max(bid.price)))] } + └─StreamProject { exprs: [auction.category, (sum(max(bid.price)) / count(max(bid.price))) as $expr47] } └─StreamHashAgg { group_key: [auction.category], aggs: [count, sum(max(bid.price)), count(max(bid.price))] } └─StreamExchange { dist: HashShard(auction.category) } └─StreamProject { exprs: [auction.id, auction.category, max(bid.price)] } @@ -210,7 +203,7 @@ Fragment 0 StreamMaterialize { columns: [category, avg], pk_columns: [category] } materialized table: 4294967294 - StreamProject { exprs: [auction.category, (sum(max(bid.price)) / count(max(bid.price)))] } + StreamProject { exprs: [auction.category, (sum(max(bid.price)) / count(max(bid.price))) as $expr47] } StreamHashAgg { group_key: [auction.category], aggs: [count, sum(max(bid.price)), count(max(bid.price))] } result table: 0, state tables: [] StreamExchange Hash([1]) from 1 @@ -239,8 +232,8 @@ Table 0 { columns: [auction_category, count, sum(max(bid_price)), count(max(bid_price))], primary key: [$0 ASC], value indices: [1, 2, 3], distribution key: [0] } Table 1 { columns: [auction_id, auction_category, bid_price, bid__row_id, bid_auction], primary key: [$0 ASC, $1 ASC, $2 DESC, $3 ASC, $4 ASC], value indices: [0, 2, 3, 4], distribution key: [0] } Table 2 { columns: [auction_id, auction_category, count, max(bid_price)], primary key: [$0 ASC, $1 ASC], value indices: [2, 3], distribution key: [0] } - Table 3 { columns: [auction_id, auction_date_time, auction_expires, auction_category], primary key: [$0 ASC, $0 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } - Table 4 { columns: [auction_id, auction_id_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 3 { columns: [auction_id, auction_date_time, auction_expires, auction_category], primary key: [$0 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } + Table 4 { columns: [auction_id, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 5 { columns: [bid_auction, bid_price, bid_date_time, bid__row_id], primary key: [$0 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } Table 6 { columns: [bid_auction, bid__row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 4294967294 { columns: [category, avg], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } @@ -286,14 +279,16 @@ | └─BatchHashAgg { group_key: [window_start, bid.auction], aggs: [count] } | └─BatchHopWindow { time_col: bid.date_time, slide: 00:00:02, size: 00:00:10, output: [bid.auction, window_start] } | └─BatchExchange { order: [], dist: HashShard(bid.auction) } - | └─BatchScan { table: bid, columns: [bid.auction, bid.date_time], distribution: SomeShard } + | └─BatchFilter { predicate: IsNotNull(bid.date_time) } + | └─BatchScan { table: bid, columns: [bid.auction, bid.date_time], distribution: SomeShard } └─BatchProject { exprs: [max(count), window_start] } └─BatchHashAgg { group_key: [window_start], aggs: [max(count)] } └─BatchExchange { order: [], dist: HashShard(window_start) } └─BatchHashAgg { group_key: [bid.auction, window_start], aggs: [count] } └─BatchHopWindow { time_col: bid.date_time, slide: 00:00:02, size: 00:00:10, output: [bid.auction, window_start] } └─BatchExchange { order: [], dist: HashShard(bid.auction) } - └─BatchScan { table: bid, columns: [bid.auction, bid.date_time], distribution: SomeShard } + └─BatchFilter { predicate: IsNotNull(bid.date_time) } + └─BatchScan { table: bid, columns: [bid.auction, bid.date_time], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [auction, num, window_start(hidden), window_start#1(hidden)], pk_columns: [window_start, auction, window_start#1] } └─StreamProject { exprs: [bid.auction, count, window_start, window_start] } @@ -304,7 +299,8 @@ | └─StreamAppendOnlyHashAgg { group_key: [window_start, bid.auction], aggs: [count, count] } | └─StreamExchange { dist: HashShard(bid.auction, window_start) } | └─StreamHopWindow { time_col: bid.date_time, slide: 00:00:02, size: 00:00:10, output: [bid.auction, window_start, bid._row_id] } - | └─StreamTableScan { table: bid, columns: [bid.auction, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } + | └─StreamFilter { predicate: IsNotNull(bid.date_time) } + | └─StreamTableScan { table: bid, columns: [bid.auction, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } └─StreamProject { exprs: [max(count), window_start] } └─StreamHashAgg { group_key: [window_start], aggs: [count, max(count)] } └─StreamExchange { dist: HashShard(window_start) } @@ -312,7 +308,8 @@ └─StreamAppendOnlyHashAgg { group_key: [bid.auction, window_start], aggs: [count, count] } └─StreamExchange { dist: HashShard(bid.auction, window_start) } └─StreamHopWindow { time_col: bid.date_time, slide: 00:00:02, size: 00:00:10, output: [bid.auction, window_start, bid._row_id] } - └─StreamTableScan { table: bid, columns: [bid.auction, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } + └─StreamFilter { predicate: IsNotNull(bid.date_time) } + └─StreamTableScan { table: bid, columns: [bid.auction, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } stream_dist_plan: | Fragment 0 StreamMaterialize { columns: [auction, num, window_start(hidden), window_start#1(hidden)], pk_columns: [window_start, auction, window_start#1] } @@ -335,9 +332,10 @@ Fragment 2 StreamHopWindow { time_col: bid.date_time, slide: 00:00:02, size: 00:00:10, output: [bid.auction, window_start, bid._row_id] } - Chain { table: bid, columns: [bid.auction, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } - Upstream - BatchPlanNode + StreamFilter { predicate: IsNotNull(bid.date_time) } + Chain { table: bid, columns: [bid.auction, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } + Upstream + BatchPlanNode Fragment 3 StreamProject { exprs: [bid.auction, window_start, count] } @@ -347,14 +345,15 @@ Fragment 4 StreamHopWindow { time_col: bid.date_time, slide: 00:00:02, size: 00:00:10, output: [bid.auction, window_start, bid._row_id] } - Chain { table: bid, columns: [bid.auction, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } - Upstream - BatchPlanNode + StreamFilter { predicate: IsNotNull(bid.date_time) } + Chain { table: bid, columns: [bid.auction, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } + Upstream + BatchPlanNode - Table 0 { columns: [bid_auction, count, window_start], primary key: [$2 ASC, $2 ASC, $0 ASC], value indices: [0, 1, 2], distribution key: [2] } - Table 1 { columns: [window_start, window_start_0, bid_auction, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } - Table 2 { columns: [max(count), window_start], primary key: [$1 ASC, $1 ASC], value indices: [0, 1], distribution key: [1] } - Table 3 { columns: [window_start, window_start_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 0 { columns: [bid_auction, count, window_start], primary key: [$2 ASC, $0 ASC], value indices: [0, 1, 2], distribution key: [2] } + Table 1 { columns: [window_start, bid_auction, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 2 { columns: [max(count), window_start], primary key: [$1 ASC], value indices: [0, 1], distribution key: [1] } + Table 3 { columns: [window_start, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 4 { columns: [window_start, bid_auction, count, count_0], primary key: [$0 ASC, $1 ASC], value indices: [2, 3], distribution key: [1, 0] } Table 5 { columns: [window_start, count, bid_auction], primary key: [$0 ASC, $1 DESC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } Table 6 { columns: [window_start, count, max(count)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } @@ -404,66 +403,63 @@ batch_plan: | BatchExchange { order: [], dist: Single } └─BatchProject { exprs: [bid.auction, bid.price, bid.bidder, bid.date_time] } - └─BatchFilter { predicate: (bid.date_time >= ((TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval) - '00:00:10':Interval)) AND (bid.date_time <= (TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval)) } + └─BatchFilter { predicate: (bid.date_time >= $expr50) AND (bid.date_time <= $expr49) } └─BatchHashJoin { type: Inner, predicate: bid.price = max(bid.price), output: all } ├─BatchExchange { order: [], dist: HashShard(bid.price) } - | └─BatchProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, bid.date_time] } - | └─BatchScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time], distribution: SomeShard } + | └─BatchScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time], distribution: SomeShard } └─BatchExchange { order: [], dist: HashShard(max(bid.price)) } - └─BatchProject { exprs: [max(bid.price), (TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval), ((TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval) - '00:00:10':Interval)] } - └─BatchHashAgg { group_key: [(TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval)], aggs: [max(bid.price)] } - └─BatchExchange { order: [], dist: HashShard((TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval)) } - └─BatchProject { exprs: [(TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval), bid.price] } + └─BatchProject { exprs: [max(bid.price), $expr49, ($expr49 - '00:00:10':Interval) as $expr50] } + └─BatchHashAgg { group_key: [$expr49], aggs: [max(bid.price)] } + └─BatchExchange { order: [], dist: HashShard($expr49) } + └─BatchProject { exprs: [(TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr49, bid.price] } └─BatchScan { table: bid, columns: [bid.price, bid.date_time], distribution: SomeShard } stream_plan: | - StreamMaterialize { columns: [auction, price, bidder, date_time, bid._row_id(hidden), (TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval)(hidden), max(bid.price)(hidden)], pk_columns: [bid._row_id, (TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval), price, max(bid.price)] } - └─StreamProject { exprs: [bid.auction, bid.price, bid.bidder, bid.date_time, bid._row_id, (TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval), max(bid.price)] } - └─StreamFilter { predicate: (bid.date_time >= ((TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval) - '00:00:10':Interval)) AND (bid.date_time <= (TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval)) } + StreamMaterialize { columns: [auction, price, bidder, date_time, bid._row_id(hidden), $expr99(hidden), max(bid.price)(hidden)], pk_columns: [bid._row_id, $expr99, price, max(bid.price)] } + └─StreamProject { exprs: [bid.auction, bid.price, bid.bidder, bid.date_time, bid._row_id, $expr99, max(bid.price)] } + └─StreamFilter { predicate: (bid.date_time >= $expr101) AND (bid.date_time <= $expr99) } └─StreamHashJoin { type: Inner, predicate: bid.price = max(bid.price), output: all } ├─StreamExchange { dist: HashShard(bid.price) } - | └─StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, bid.date_time, bid._row_id] } - | └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } + | └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } └─StreamExchange { dist: HashShard(max(bid.price)) } - └─StreamProject { exprs: [max(bid.price), (TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval), ((TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval) - '00:00:10':Interval)] } - └─StreamAppendOnlyHashAgg { group_key: [(TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval)], aggs: [count, max(bid.price)] } - └─StreamExchange { dist: HashShard((TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval)) } - └─StreamProject { exprs: [(TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval), bid.price, bid._row_id] } + └─StreamProject { exprs: [max(bid.price), $expr99, ($expr99 - '00:00:10':Interval) as $expr101] } + └─StreamAppendOnlyHashAgg { group_key: [$expr99], aggs: [count, max(bid.price)] } + └─StreamExchange { dist: HashShard($expr99) } + └─StreamProject { exprs: [(TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr99, bid.price, bid._row_id] } └─StreamTableScan { table: bid, columns: [bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } stream_dist_plan: | Fragment 0 - StreamMaterialize { columns: [auction, price, bidder, date_time, bid._row_id(hidden), (TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval)(hidden), max(bid.price)(hidden)], pk_columns: [bid._row_id, (TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval), price, max(bid.price)] } + StreamMaterialize { columns: [auction, price, bidder, date_time, bid._row_id(hidden), $expr99(hidden), max(bid.price)(hidden)], pk_columns: [bid._row_id, $expr99, price, max(bid.price)] } materialized table: 4294967294 - StreamProject { exprs: [bid.auction, bid.price, bid.bidder, bid.date_time, bid._row_id, (TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval), max(bid.price)] } - StreamFilter { predicate: (bid.date_time >= ((TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval) - '00:00:10':Interval)) AND (bid.date_time <= (TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval)) } + StreamProject { exprs: [bid.auction, bid.price, bid.bidder, bid.date_time, bid._row_id, $expr99, max(bid.price)] } + StreamFilter { predicate: (bid.date_time >= $expr101) AND (bid.date_time <= $expr99) } StreamHashJoin { type: Inner, predicate: bid.price = max(bid.price), output: all } left table: 0, right table 2, left degree table: 1, right degree table: 3, StreamExchange Hash([2]) from 1 StreamExchange Hash([0]) from 2 Fragment 1 - StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, bid.date_time, bid._row_id] } - Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } - Upstream - BatchPlanNode + Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } + Upstream + BatchPlanNode Fragment 2 - StreamProject { exprs: [max(bid.price), (TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval), ((TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval) - '00:00:10':Interval)] } - StreamAppendOnlyHashAgg { group_key: [(TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval)], aggs: [count, max(bid.price)] } + StreamProject { exprs: [max(bid.price), $expr99, ($expr99 - '00:00:10':Interval) as $expr101] } + StreamAppendOnlyHashAgg { group_key: [$expr99], aggs: [count, max(bid.price)] } result table: 4, state tables: [] StreamExchange Hash([0]) from 3 Fragment 3 - StreamProject { exprs: [(TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval), bid.price, bid._row_id] } + StreamProject { exprs: [(TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr99, bid.price, bid._row_id] } Chain { table: bid, columns: [bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } Upstream BatchPlanNode - Table 0 { columns: [bid_auction, bid_bidder, bid_price, bid_date_time, bid_date_time_0, bid__row_id], primary key: [$2 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [2] } + Table 0 { columns: [bid_auction, bid_bidder, bid_price, bid_date_time, bid__row_id], primary key: [$2 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [2] } Table 1 { columns: [bid_price, bid__row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 2 { columns: [max(bid_price), (TumbleStart(bid_date_time, '00:00:10':Interval) + '00:00:10':Interval), ((TumbleStart(bid_date_time, '00:00:10':Interval) + '00:00:10':Interval) - '00:00:10':Interval)], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 3 { columns: [max(bid_price), (TumbleStart(bid_date_time, '00:00:10':Interval) + '00:00:10':Interval), _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 4 { columns: [(TumbleStart(bid_date_time, '00:00:10':Interval) + '00:00:10':Interval), count, max(bid_price)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } - Table 4294967294 { columns: [auction, price, bidder, date_time, bid._row_id, (TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval), max(bid.price)], primary key: [$4 ASC, $5 ASC, $1 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [1] } + Table 2 { columns: [max(bid_price), $expr99, $expr101], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 3 { columns: [max(bid_price), $expr99, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 4 { columns: [$expr99, count, max(bid_price)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } + Table 4294967294 { columns: [auction, price, bidder, date_time, bid._row_id, $expr99, max(bid.price)], primary key: [$4 ASC, $5 ASC, $1 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [1] } - id: nexmark_q8 before: - create_tables @@ -502,62 +498,62 @@ AND P.endtime = A.endtime; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchHashJoin { type: Inner, predicate: person.id = auction.seller AND TumbleStart(person.date_time, '00:00:10':Interval) = TumbleStart(auction.date_time, '00:00:10':Interval) AND (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval) = (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval), output: [person.id, person.name, TumbleStart(person.date_time, '00:00:10':Interval)] } - ├─BatchExchange { order: [], dist: HashShard(person.id, TumbleStart(person.date_time, '00:00:10':Interval), (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval)) } - | └─BatchHashAgg { group_key: [person.id, person.name, TumbleStart(person.date_time, '00:00:10':Interval), (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval)], aggs: [] } - | └─BatchProject { exprs: [person.id, person.name, TumbleStart(person.date_time, '00:00:10':Interval), (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval)] } + └─BatchHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr95 = $expr93 AND $expr96 = $expr94, output: [person.id, person.name, $expr95] } + ├─BatchExchange { order: [], dist: HashShard(person.id, $expr95, $expr96) } + | └─BatchHashAgg { group_key: [person.id, person.name, $expr95, $expr96], aggs: [] } + | └─BatchProject { exprs: [person.id, person.name, TumbleStart(person.date_time, '00:00:10':Interval) as $expr95, (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr96] } | └─BatchScan { table: person, columns: [person.id, person.name, person.date_time], distribution: UpstreamHashShard(person.id) } - └─BatchHashAgg { group_key: [auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval), (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval)], aggs: [] } - └─BatchExchange { order: [], dist: HashShard(auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval), (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval)) } - └─BatchProject { exprs: [auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval), (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval)] } + └─BatchHashAgg { group_key: [auction.seller, $expr93, $expr94], aggs: [] } + └─BatchExchange { order: [], dist: HashShard(auction.seller, $expr93, $expr94) } + └─BatchProject { exprs: [auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval) as $expr93, (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr94] } └─BatchScan { table: auction, columns: [auction.date_time, auction.seller], distribution: SomeShard } stream_plan: | - StreamMaterialize { columns: [id, name, starttime, (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval)(hidden), auction.seller(hidden), TumbleStart(auction.date_time, '00:00:10':Interval)(hidden), (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval)(hidden)], pk_columns: [id, name, starttime, (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval), auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval), (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval)] } - └─StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND TumbleStart(person.date_time, '00:00:10':Interval) = TumbleStart(auction.date_time, '00:00:10':Interval) AND (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval) = (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval), output: all } - ├─StreamExchange { dist: HashShard(person.id, TumbleStart(person.date_time, '00:00:10':Interval), (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval)) } - | └─StreamProject { exprs: [person.id, person.name, TumbleStart(person.date_time, '00:00:10':Interval), (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval)] } - | └─StreamHashAgg { group_key: [person.id, person.name, TumbleStart(person.date_time, '00:00:10':Interval), (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval)], aggs: [count] } - | └─StreamProject { exprs: [person.id, person.name, TumbleStart(person.date_time, '00:00:10':Interval), (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval)] } + StreamMaterialize { columns: [id, name, starttime, $expr192(hidden), auction.seller(hidden), $expr189(hidden), $expr190(hidden)], pk_columns: [id, name, starttime, $expr192, auction.seller, $expr189, $expr190] } + └─StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr191 = $expr189 AND $expr192 = $expr190, output: all } + ├─StreamExchange { dist: HashShard(person.id, $expr191, $expr192) } + | └─StreamProject { exprs: [person.id, person.name, $expr191, $expr192] } + | └─StreamHashAgg { group_key: [person.id, person.name, $expr191, $expr192], aggs: [count] } + | └─StreamProject { exprs: [person.id, person.name, TumbleStart(person.date_time, '00:00:10':Interval) as $expr191, (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr192] } | └─StreamTableScan { table: person, columns: [person.id, person.name, person.date_time], pk: [person.id], dist: UpstreamHashShard(person.id) } - └─StreamProject { exprs: [auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval), (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval)] } - └─StreamHashAgg { group_key: [auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval), (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval)], aggs: [count] } - └─StreamExchange { dist: HashShard(auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval), (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval)) } - └─StreamProject { exprs: [auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval), (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval), auction.id] } + └─StreamProject { exprs: [auction.seller, $expr189, $expr190] } + └─StreamHashAgg { group_key: [auction.seller, $expr189, $expr190], aggs: [count] } + └─StreamExchange { dist: HashShard(auction.seller, $expr189, $expr190) } + └─StreamProject { exprs: [auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval) as $expr189, (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr190, auction.id] } └─StreamTableScan { table: auction, columns: [auction.date_time, auction.seller, auction.id], pk: [auction.id], dist: UpstreamHashShard(auction.id) } stream_dist_plan: | Fragment 0 - StreamMaterialize { columns: [id, name, starttime, (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval)(hidden), auction.seller(hidden), TumbleStart(auction.date_time, '00:00:10':Interval)(hidden), (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval)(hidden)], pk_columns: [id, name, starttime, (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval), auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval), (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval)] } + StreamMaterialize { columns: [id, name, starttime, $expr192(hidden), auction.seller(hidden), $expr189(hidden), $expr190(hidden)], pk_columns: [id, name, starttime, $expr192, auction.seller, $expr189, $expr190] } materialized table: 4294967294 - StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND TumbleStart(person.date_time, '00:00:10':Interval) = TumbleStart(auction.date_time, '00:00:10':Interval) AND (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval) = (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval), output: all } + StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr191 = $expr189 AND $expr192 = $expr190, output: all } left table: 0, right table 2, left degree table: 1, right degree table: 3, StreamExchange Hash([0, 2, 3]) from 1 - StreamProject { exprs: [auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval), (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval)] } - StreamHashAgg { group_key: [auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval), (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval)], aggs: [count] } + StreamProject { exprs: [auction.seller, $expr189, $expr190] } + StreamHashAgg { group_key: [auction.seller, $expr189, $expr190], aggs: [count] } result table: 5, state tables: [] StreamExchange Hash([0, 1, 2]) from 2 Fragment 1 - StreamProject { exprs: [person.id, person.name, TumbleStart(person.date_time, '00:00:10':Interval), (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval)] } - StreamHashAgg { group_key: [person.id, person.name, TumbleStart(person.date_time, '00:00:10':Interval), (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval)], aggs: [count] } + StreamProject { exprs: [person.id, person.name, $expr191, $expr192] } + StreamHashAgg { group_key: [person.id, person.name, $expr191, $expr192], aggs: [count] } result table: 4, state tables: [] - StreamProject { exprs: [person.id, person.name, TumbleStart(person.date_time, '00:00:10':Interval), (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval)] } + StreamProject { exprs: [person.id, person.name, TumbleStart(person.date_time, '00:00:10':Interval) as $expr191, (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr192] } Chain { table: person, columns: [person.id, person.name, person.date_time], pk: [person.id], dist: UpstreamHashShard(person.id) } Upstream BatchPlanNode Fragment 2 - StreamProject { exprs: [auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval), (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval), auction.id] } + StreamProject { exprs: [auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval) as $expr189, (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr190, auction.id] } Chain { table: auction, columns: [auction.date_time, auction.seller, auction.id], pk: [auction.id], dist: UpstreamHashShard(auction.id) } Upstream BatchPlanNode - Table 0 { columns: [person_id, person_name, TumbleStart(person_date_time, '00:00:10':Interval), (TumbleStart(person_date_time, '00:00:10':Interval) + '00:00:10':Interval)], primary key: [$0 ASC, $2 ASC, $3 ASC, $0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [0, 2, 3] } - Table 1 { columns: [person_id, TumbleStart(person_date_time, '00:00:10':Interval), (TumbleStart(person_date_time, '00:00:10':Interval) + '00:00:10':Interval), person_id_0, person_name, TumbleStart(person_date_time, '00:00:10':Interval)_0, (TumbleStart(person_date_time, '00:00:10':Interval) + '00:00:10':Interval)_0, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [7], distribution key: [0, 1, 2] } - Table 2 { columns: [auction_seller, TumbleStart(auction_date_time, '00:00:10':Interval), (TumbleStart(auction_date_time, '00:00:10':Interval) + '00:00:10':Interval)], primary key: [$0 ASC, $1 ASC, $2 ASC, $0 ASC, $1 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0, 1, 2] } - Table 3 { columns: [auction_seller, TumbleStart(auction_date_time, '00:00:10':Interval), (TumbleStart(auction_date_time, '00:00:10':Interval) + '00:00:10':Interval), auction_seller_0, TumbleStart(auction_date_time, '00:00:10':Interval)_0, (TumbleStart(auction_date_time, '00:00:10':Interval) + '00:00:10':Interval)_0, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC], value indices: [6], distribution key: [0, 1, 2] } - Table 4 { columns: [person_id, person_name, TumbleStart(person_date_time, '00:00:10':Interval), (TumbleStart(person_date_time, '00:00:10':Interval) + '00:00:10':Interval), count], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } - Table 5 { columns: [auction_seller, TumbleStart(auction_date_time, '00:00:10':Interval), (TumbleStart(auction_date_time, '00:00:10':Interval) + '00:00:10':Interval), count], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0, 1, 2] } - Table 4294967294 { columns: [id, name, starttime, (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval), auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval), (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval)], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [0, 2, 3] } + Table 0 { columns: [person_id, person_name, $expr191, $expr192], primary key: [$0 ASC, $2 ASC, $3 ASC, $1 ASC], value indices: [0, 1, 2, 3], distribution key: [0, 2, 3] } + Table 1 { columns: [person_id, $expr191, $expr192, person_name, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0, 1, 2] } + Table 2 { columns: [auction_seller, $expr189, $expr190], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0, 1, 2] } + Table 3 { columns: [auction_seller, $expr189, $expr190, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0, 1, 2] } + Table 4 { columns: [person_id, person_name, $expr191, $expr192, count], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } + Table 5 { columns: [auction_seller, $expr189, $expr190, count], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0, 1, 2] } + Table 4294967294 { columns: [id, name, starttime, $expr192, auction.seller, $expr189, $expr190], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [0, 2, 3] } - id: nexmark_q9 before: - create_tables @@ -631,8 +627,8 @@ BatchPlanNode Table 0 { columns: [auction_id, auction_item_name, auction_description, auction_initial_bid, auction_reserve, auction_date_time, auction_expires, auction_seller, auction_category, bid_auction, bid_bidder, bid_price, bid_channel, bid_url, bid_date_time, bid_extra, bid__row_id], primary key: [$0 ASC, $11 DESC, $14 ASC, $16 ASC, $9 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], distribution key: [0] } - Table 1 { columns: [auction_id, auction_item_name, auction_description, auction_initial_bid, auction_reserve, auction_date_time, auction_expires, auction_seller, auction_category], primary key: [$0 ASC, $0 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8], distribution key: [0] } - Table 2 { columns: [auction_id, auction_id_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 1 { columns: [auction_id, auction_item_name, auction_description, auction_initial_bid, auction_reserve, auction_date_time, auction_expires, auction_seller, auction_category], primary key: [$0 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8], distribution key: [0] } + Table 2 { columns: [auction_id, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 3 { columns: [bid_auction, bid_bidder, bid_price, bid_channel, bid_url, bid_date_time, bid_extra, bid__row_id], primary key: [$0 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7], distribution key: [0] } Table 4 { columns: [bid_auction, bid__row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 4294967294 { columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, auction, bidder, price, bid_date_time, bid._row_id], primary key: [$0 ASC, $13 ASC, $9 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], distribution key: [0] } @@ -643,17 +639,17 @@ SELECT auction, bidder, price, date_time, TO_CHAR(date_time, 'YYYY-MM-DD') as date, TO_CHAR(date_time, 'HH:MI') as time FROM bid; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar), ToChar(bid.date_time, 'HH:MI':Varchar)] } + └─BatchProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar) as $expr45, ToChar(bid.date_time, 'HH:MI':Varchar) as $expr46] } └─BatchScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [auction, bidder, price, date_time, date, time, bid._row_id(hidden)], pk_columns: [bid._row_id] } - └─StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar), ToChar(bid.date_time, 'HH:MI':Varchar), bid._row_id] } + └─StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar) as $expr91, ToChar(bid.date_time, 'HH:MI':Varchar) as $expr92, bid._row_id] } └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } stream_dist_plan: | Fragment 0 StreamMaterialize { columns: [auction, bidder, price, date_time, date, time, bid._row_id(hidden)], pk_columns: [bid._row_id] } materialized table: 4294967294 - StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar), ToChar(bid.date_time, 'HH:MI':Varchar), bid._row_id] } + StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar) as $expr91, ToChar(bid.date_time, 'HH:MI':Varchar) as $expr92, bid._row_id] } Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } Upstream BatchPlanNode @@ -730,19 +726,19 @@ WHERE 0.908 * price > 1000000 AND 0.908 * price < 50000000; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [bid.auction, bid.bidder, (0.908:Decimal * bid.price), Case(((Extract('HOUR':Varchar, bid.date_time) >= 8:Int32) AND (Extract('HOUR':Varchar, bid.date_time) <= 18:Int32)), 'dayTime':Varchar, ((Extract('HOUR':Varchar, bid.date_time) <= 6:Int32) OR (Extract('HOUR':Varchar, bid.date_time) >= 20:Int32)), 'nightTime':Varchar, 'otherTime':Varchar), bid.date_time, bid.extra] } + └─BatchProject { exprs: [bid.auction, bid.bidder, (0.908:Decimal * bid.price) as $expr45, Case(((Extract('HOUR':Varchar, bid.date_time) >= 8:Int32) AND (Extract('HOUR':Varchar, bid.date_time) <= 18:Int32)), 'dayTime':Varchar, ((Extract('HOUR':Varchar, bid.date_time) <= 6:Int32) OR (Extract('HOUR':Varchar, bid.date_time) >= 20:Int32)), 'nightTime':Varchar, 'otherTime':Varchar) as $expr46, bid.date_time, bid.extra] } └─BatchFilter { predicate: ((0.908:Decimal * bid.price) > 1000000:Int32) AND ((0.908:Decimal * bid.price) < 50000000:Int32) } └─BatchScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid.extra], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [auction, bidder, price, bidtimetype, date_time, extra, bid._row_id(hidden)], pk_columns: [bid._row_id] } - └─StreamProject { exprs: [bid.auction, bid.bidder, (0.908:Decimal * bid.price), Case(((Extract('HOUR':Varchar, bid.date_time) >= 8:Int32) AND (Extract('HOUR':Varchar, bid.date_time) <= 18:Int32)), 'dayTime':Varchar, ((Extract('HOUR':Varchar, bid.date_time) <= 6:Int32) OR (Extract('HOUR':Varchar, bid.date_time) >= 20:Int32)), 'nightTime':Varchar, 'otherTime':Varchar), bid.date_time, bid.extra, bid._row_id] } + └─StreamProject { exprs: [bid.auction, bid.bidder, (0.908:Decimal * bid.price) as $expr91, Case(((Extract('HOUR':Varchar, bid.date_time) >= 8:Int32) AND (Extract('HOUR':Varchar, bid.date_time) <= 18:Int32)), 'dayTime':Varchar, ((Extract('HOUR':Varchar, bid.date_time) <= 6:Int32) OR (Extract('HOUR':Varchar, bid.date_time) >= 20:Int32)), 'nightTime':Varchar, 'otherTime':Varchar) as $expr92, bid.date_time, bid.extra, bid._row_id] } └─StreamFilter { predicate: ((0.908:Decimal * bid.price) > 1000000:Int32) AND ((0.908:Decimal * bid.price) < 50000000:Int32) } └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid.extra, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } stream_dist_plan: | Fragment 0 StreamMaterialize { columns: [auction, bidder, price, bidtimetype, date_time, extra, bid._row_id(hidden)], pk_columns: [bid._row_id] } materialized table: 4294967294 - StreamProject { exprs: [bid.auction, bid.bidder, (0.908:Decimal * bid.price), Case(((Extract('HOUR':Varchar, bid.date_time) >= 8:Int32) AND (Extract('HOUR':Varchar, bid.date_time) <= 18:Int32)), 'dayTime':Varchar, ((Extract('HOUR':Varchar, bid.date_time) <= 6:Int32) OR (Extract('HOUR':Varchar, bid.date_time) >= 20:Int32)), 'nightTime':Varchar, 'otherTime':Varchar), bid.date_time, bid.extra, bid._row_id] } + StreamProject { exprs: [bid.auction, bid.bidder, (0.908:Decimal * bid.price) as $expr91, Case(((Extract('HOUR':Varchar, bid.date_time) >= 8:Int32) AND (Extract('HOUR':Varchar, bid.date_time) <= 18:Int32)), 'dayTime':Varchar, ((Extract('HOUR':Varchar, bid.date_time) <= 6:Int32) OR (Extract('HOUR':Varchar, bid.date_time) >= 20:Int32)), 'nightTime':Varchar, 'otherTime':Varchar) as $expr92, bid.date_time, bid.extra, bid._row_id] } StreamFilter { predicate: ((0.908:Decimal * bid.price) > 1000000:Int32) AND ((0.908:Decimal * bid.price) < 50000000:Int32) } Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid.extra, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } Upstream @@ -771,49 +767,49 @@ GROUP BY to_char(date_time, 'yyyy-MM-dd'); batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchHashAgg { group_key: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar)], aggs: [sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } - └─BatchExchange { order: [], dist: HashShard(ToChar(bid.date_time, 'yyyy-MM-dd':Varchar)) } - └─BatchProject { exprs: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder, bid.bidder, bid.bidder, bid.bidder, bid.auction, bid.auction, bid.auction, bid.auction, flag, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } - └─BatchHashAgg { group_key: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder, bid.auction, flag], aggs: [count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } - └─BatchExchange { order: [], dist: HashShard(ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder, bid.auction, flag) } - └─BatchExpand { column_subsets: [[ToChar(bid.date_time, 'yyyy-MM-dd':Varchar)], [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder], [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.auction]] } - └─BatchProject { exprs: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.price, bid.bidder, bid.auction] } + └─BatchHashAgg { group_key: [$expr23], aggs: [sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + └─BatchExchange { order: [], dist: HashShard($expr23) } + └─BatchProject { exprs: [$expr23, bid.bidder, bid.bidder, bid.bidder, bid.bidder, bid.auction, bid.auction, bid.auction, bid.auction, flag, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } + └─BatchHashAgg { group_key: [$expr23, bid.bidder, bid.auction, flag], aggs: [count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } + └─BatchExchange { order: [], dist: HashShard($expr23, bid.bidder, bid.auction, flag) } + └─BatchExpand { column_subsets: [[$expr23], [$expr23, bid.bidder], [$expr23, bid.auction]] } + └─BatchProject { exprs: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar) as $expr23, bid.price, bid.bidder, bid.auction] } └─BatchScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [day, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], pk_columns: [day] } - └─StreamProject { exprs: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } - └─StreamHashAgg { group_key: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar)], aggs: [count, sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } - └─StreamExchange { dist: HashShard(ToChar(bid.date_time, 'yyyy-MM-dd':Varchar)) } - └─StreamProject { exprs: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder, bid.bidder, bid.bidder, bid.bidder, bid.auction, bid.auction, bid.auction, bid.auction, flag, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } - └─StreamAppendOnlyHashAgg { group_key: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder, bid.auction, flag], aggs: [count, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } - └─StreamExchange { dist: HashShard(ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder, bid.auction, flag) } - └─StreamExpand { column_subsets: [[ToChar(bid.date_time, 'yyyy-MM-dd':Varchar)], [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder], [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.auction]] } - └─StreamProject { exprs: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.price, bid.bidder, bid.auction, bid._row_id] } + └─StreamProject { exprs: [$expr46, sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + └─StreamHashAgg { group_key: [$expr46], aggs: [count, sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + └─StreamExchange { dist: HashShard($expr46) } + └─StreamProject { exprs: [$expr46, bid.bidder, bid.bidder, bid.bidder, bid.bidder, bid.auction, bid.auction, bid.auction, bid.auction, flag, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } + └─StreamAppendOnlyHashAgg { group_key: [$expr46, bid.bidder, bid.auction, flag], aggs: [count, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } + └─StreamExchange { dist: HashShard($expr46, bid.bidder, bid.auction, flag) } + └─StreamExpand { column_subsets: [[$expr46], [$expr46, bid.bidder], [$expr46, bid.auction]] } + └─StreamProject { exprs: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar) as $expr46, bid.price, bid.bidder, bid.auction, bid._row_id] } └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } stream_dist_plan: | Fragment 0 StreamMaterialize { columns: [day, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], pk_columns: [day] } materialized table: 4294967294 - StreamProject { exprs: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } - StreamHashAgg { group_key: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar)], aggs: [count, sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + StreamProject { exprs: [$expr46, sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + StreamHashAgg { group_key: [$expr46], aggs: [count, sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } result table: 0, state tables: [] StreamExchange Hash([0]) from 1 Fragment 1 - StreamProject { exprs: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder, bid.bidder, bid.bidder, bid.bidder, bid.auction, bid.auction, bid.auction, bid.auction, flag, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } - StreamAppendOnlyHashAgg { group_key: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder, bid.auction, flag], aggs: [count, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } + StreamProject { exprs: [$expr46, bid.bidder, bid.bidder, bid.bidder, bid.bidder, bid.auction, bid.auction, bid.auction, bid.auction, flag, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } + StreamAppendOnlyHashAgg { group_key: [$expr46, bid.bidder, bid.auction, flag], aggs: [count, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } result table: 1, state tables: [] StreamExchange Hash([0, 2, 3, 10]) from 2 Fragment 2 - StreamExpand { column_subsets: [[ToChar(bid.date_time, 'yyyy-MM-dd':Varchar)], [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder], [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.auction]] } - StreamProject { exprs: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.price, bid.bidder, bid.auction, bid._row_id] } + StreamExpand { column_subsets: [[$expr46], [$expr46, bid.bidder], [$expr46, bid.auction]] } + StreamProject { exprs: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar) as $expr46, bid.price, bid.bidder, bid.auction, bid._row_id] } Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } Upstream BatchPlanNode - Table 0 { columns: [ToChar(bid_date_time, 'yyyy-MM-dd':Varchar), count, sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid_price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid_price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid_bidder) filter((flag = 1:Int64)), count(bid_bidder) filter((count filter((bid_price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid_bidder) filter((count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid_bidder) filter((count filter((bid_price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid_auction) filter((flag = 2:Int64)), count(bid_auction) filter((count filter((bid_price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid_auction) filter((count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid_auction) filter((count filter((bid_price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))], primary key: [$0 ASC], value indices: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], distribution key: [0] } - Table 1 { columns: [ToChar(bid_date_time, 'yyyy-MM-dd':Varchar), bid_bidder, bid_auction, flag, count, count_0, count filter((bid_price < 10000:Int32)), count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count filter((bid_price >= 1000000:Int32)), count filter((bid_price < 10000:Int32))_0, count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32))_0, count filter((bid_price >= 1000000:Int32))_0, count filter((bid_price < 10000:Int32))_1, count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32))_1, count filter((bid_price >= 1000000:Int32))_1], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], distribution key: [0, 1, 2, 3] } + Table 0 { columns: [$expr46, count, sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid_price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid_price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid_bidder) filter((flag = 1:Int64)), count(bid_bidder) filter((count filter((bid_price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid_bidder) filter((count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid_bidder) filter((count filter((bid_price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid_auction) filter((flag = 2:Int64)), count(bid_auction) filter((count filter((bid_price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid_auction) filter((count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid_auction) filter((count filter((bid_price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))], primary key: [$0 ASC], value indices: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], distribution key: [0] } + Table 1 { columns: [$expr46, bid_bidder, bid_auction, flag, count, count_0, count filter((bid_price < 10000:Int32)), count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count filter((bid_price >= 1000000:Int32)), count filter((bid_price < 10000:Int32))_0, count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32))_0, count filter((bid_price >= 1000000:Int32))_0, count filter((bid_price < 10000:Int32))_1, count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32))_1, count filter((bid_price >= 1000000:Int32))_1], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], distribution key: [0, 1, 2, 3] } Table 4294967294 { columns: [day, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], primary key: [$0 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], distribution key: [0] } - id: nexmark_q16 before: @@ -839,50 +835,50 @@ GROUP BY channel, to_char(date_time, 'yyyy-MM-dd'); batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchHashAgg { group_key: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar)], aggs: [max(max(ToChar(bid.date_time, 'HH:mm':Varchar))) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } - └─BatchExchange { order: [], dist: HashShard(bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar)) } - └─BatchProject { exprs: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder, bid.bidder, bid.bidder, bid.bidder, bid.auction, bid.auction, bid.auction, bid.auction, flag, max(ToChar(bid.date_time, 'HH:mm':Varchar)), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } - └─BatchHashAgg { group_key: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder, bid.auction, flag], aggs: [max(ToChar(bid.date_time, 'HH:mm':Varchar)), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } - └─BatchExchange { order: [], dist: HashShard(bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder, bid.auction, flag) } - └─BatchExpand { column_subsets: [[bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), ToChar(bid.date_time, 'HH:mm':Varchar)], [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder], [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.auction]] } - └─BatchProject { exprs: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), ToChar(bid.date_time, 'HH:mm':Varchar), bid.price, bid.bidder, bid.auction] } + └─BatchHashAgg { group_key: [bid.channel, $expr45], aggs: [max(max($expr46)) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + └─BatchExchange { order: [], dist: HashShard(bid.channel, $expr45) } + └─BatchProject { exprs: [bid.channel, $expr45, bid.bidder, bid.bidder, bid.bidder, bid.bidder, bid.auction, bid.auction, bid.auction, bid.auction, flag, max($expr46), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } + └─BatchHashAgg { group_key: [bid.channel, $expr45, bid.bidder, bid.auction, flag], aggs: [max($expr46), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } + └─BatchExchange { order: [], dist: HashShard(bid.channel, $expr45, bid.bidder, bid.auction, flag) } + └─BatchExpand { column_subsets: [[bid.channel, $expr45, $expr46], [bid.channel, $expr45, bid.bidder], [bid.channel, $expr45, bid.auction]] } + └─BatchProject { exprs: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar) as $expr45, ToChar(bid.date_time, 'HH:mm':Varchar) as $expr46, bid.price, bid.bidder, bid.auction] } └─BatchScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.date_time], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [channel, day, minute, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], pk_columns: [channel, day] } - └─StreamProject { exprs: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), max(max(ToChar(bid.date_time, 'HH:mm':Varchar))) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } - └─StreamHashAgg { group_key: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar)], aggs: [count, max(max(ToChar(bid.date_time, 'HH:mm':Varchar))) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } - └─StreamExchange { dist: HashShard(bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar)) } - └─StreamProject { exprs: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder, bid.bidder, bid.bidder, bid.bidder, bid.auction, bid.auction, bid.auction, bid.auction, flag, max(ToChar(bid.date_time, 'HH:mm':Varchar)), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } - └─StreamAppendOnlyHashAgg { group_key: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder, bid.auction, flag], aggs: [count, max(ToChar(bid.date_time, 'HH:mm':Varchar)), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } - └─StreamExchange { dist: HashShard(bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder, bid.auction, flag) } - └─StreamExpand { column_subsets: [[bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), ToChar(bid.date_time, 'HH:mm':Varchar)], [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder], [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.auction]] } - └─StreamProject { exprs: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), ToChar(bid.date_time, 'HH:mm':Varchar), bid.price, bid.bidder, bid.auction, bid._row_id] } + └─StreamProject { exprs: [bid.channel, $expr91, max(max($expr92)) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + └─StreamHashAgg { group_key: [bid.channel, $expr91], aggs: [count, max(max($expr92)) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + └─StreamExchange { dist: HashShard(bid.channel, $expr91) } + └─StreamProject { exprs: [bid.channel, $expr91, bid.bidder, bid.bidder, bid.bidder, bid.bidder, bid.auction, bid.auction, bid.auction, bid.auction, flag, max($expr92), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } + └─StreamAppendOnlyHashAgg { group_key: [bid.channel, $expr91, bid.bidder, bid.auction, flag], aggs: [count, max($expr92), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } + └─StreamExchange { dist: HashShard(bid.channel, $expr91, bid.bidder, bid.auction, flag) } + └─StreamExpand { column_subsets: [[bid.channel, $expr91, $expr92], [bid.channel, $expr91, bid.bidder], [bid.channel, $expr91, bid.auction]] } + └─StreamProject { exprs: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar) as $expr91, ToChar(bid.date_time, 'HH:mm':Varchar) as $expr92, bid.price, bid.bidder, bid.auction, bid._row_id] } └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } stream_dist_plan: | Fragment 0 StreamMaterialize { columns: [channel, day, minute, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], pk_columns: [channel, day] } materialized table: 4294967294 - StreamProject { exprs: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), max(max(ToChar(bid.date_time, 'HH:mm':Varchar))) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } - StreamHashAgg { group_key: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar)], aggs: [count, max(max(ToChar(bid.date_time, 'HH:mm':Varchar))) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + StreamProject { exprs: [bid.channel, $expr91, max(max($expr92)) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + StreamHashAgg { group_key: [bid.channel, $expr91], aggs: [count, max(max($expr92)) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid.price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid.price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid.bidder) filter((flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.bidder) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid.auction) filter((flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid.auction) filter((count filter((bid.price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } result table: 1, state tables: [0] StreamExchange Hash([0, 1]) from 1 Fragment 1 - StreamProject { exprs: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder, bid.bidder, bid.bidder, bid.bidder, bid.auction, bid.auction, bid.auction, bid.auction, flag, max(ToChar(bid.date_time, 'HH:mm':Varchar)), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } - StreamAppendOnlyHashAgg { group_key: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder, bid.auction, flag], aggs: [count, max(ToChar(bid.date_time, 'HH:mm':Varchar)), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } + StreamProject { exprs: [bid.channel, $expr91, bid.bidder, bid.bidder, bid.bidder, bid.bidder, bid.auction, bid.auction, bid.auction, bid.auction, flag, max($expr92), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } + StreamAppendOnlyHashAgg { group_key: [bid.channel, $expr91, bid.bidder, bid.auction, flag], aggs: [count, max($expr92), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32))] } result table: 2, state tables: [] StreamExchange Hash([0, 1, 4, 5, 14]) from 2 Fragment 2 - StreamExpand { column_subsets: [[bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), ToChar(bid.date_time, 'HH:mm':Varchar)], [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.bidder], [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), bid.auction]] } - StreamProject { exprs: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar), ToChar(bid.date_time, 'HH:mm':Varchar), bid.price, bid.bidder, bid.auction, bid._row_id] } + StreamExpand { column_subsets: [[bid.channel, $expr91, $expr92], [bid.channel, $expr91, bid.bidder], [bid.channel, $expr91, bid.auction]] } + StreamProject { exprs: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar) as $expr91, ToChar(bid.date_time, 'HH:mm':Varchar) as $expr92, bid.price, bid.bidder, bid.auction, bid._row_id] } Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } Upstream BatchPlanNode - Table 0 { columns: [bid_channel, ToChar(bid_date_time, 'yyyy-MM-dd':Varchar), max(ToChar(bid_date_time, 'HH:mm':Varchar)), bid_bidder, bid_auction, flag], primary key: [$0 ASC, $1 ASC, $2 DESC, $3 ASC, $4 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [0, 1] } - Table 1 { columns: [bid_channel, ToChar(bid_date_time, 'yyyy-MM-dd':Varchar), count, max(max(ToChar(bid_date_time, 'HH:mm':Varchar))) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid_price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid_price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid_bidder) filter((flag = 1:Int64)), count(bid_bidder) filter((count filter((bid_price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid_bidder) filter((count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid_bidder) filter((count filter((bid_price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid_auction) filter((flag = 2:Int64)), count(bid_auction) filter((count filter((bid_price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid_auction) filter((count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid_auction) filter((count filter((bid_price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], distribution key: [0, 1] } - Table 2 { columns: [bid_channel, ToChar(bid_date_time, 'yyyy-MM-dd':Varchar), bid_bidder, bid_auction, flag, count, max(ToChar(bid_date_time, 'HH:mm':Varchar)), count_0, count filter((bid_price < 10000:Int32)), count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count filter((bid_price >= 1000000:Int32)), count filter((bid_price < 10000:Int32))_0, count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32))_0, count filter((bid_price >= 1000000:Int32))_0, count filter((bid_price < 10000:Int32))_1, count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32))_1, count filter((bid_price >= 1000000:Int32))_1], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC], value indices: [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], distribution key: [0, 1, 2, 3, 4] } + Table 0 { columns: [bid_channel, $expr91, max($expr92), bid_bidder, bid_auction, flag], primary key: [$0 ASC, $1 ASC, $2 DESC, $3 ASC, $4 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [0, 1] } + Table 1 { columns: [bid_channel, $expr91, count, max(max($expr92)) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((bid_price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((bid_price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bid_bidder) filter((flag = 1:Int64)), count(bid_bidder) filter((count filter((bid_price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid_bidder) filter((count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid_bidder) filter((count filter((bid_price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bid_auction) filter((flag = 2:Int64)), count(bid_auction) filter((count filter((bid_price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid_auction) filter((count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(bid_auction) filter((count filter((bid_price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], distribution key: [0, 1] } + Table 2 { columns: [bid_channel, $expr91, bid_bidder, bid_auction, flag, count, max($expr92), count_0, count filter((bid_price < 10000:Int32)), count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count filter((bid_price >= 1000000:Int32)), count filter((bid_price < 10000:Int32))_0, count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32))_0, count filter((bid_price >= 1000000:Int32))_0, count filter((bid_price < 10000:Int32))_1, count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32))_1, count filter((bid_price >= 1000000:Int32))_1], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC], value indices: [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], distribution key: [0, 1, 2, 3, 4] } Table 4294967294 { columns: [channel, day, minute, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], distribution key: [0, 1] } - id: nexmark_q17 before: @@ -903,34 +899,34 @@ GROUP BY auction, to_char(date_time, 'YYYY-MM-DD'); batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [bid.auction, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), min(bid.price), max(bid.price), (sum(bid.price) / count(bid.price)), sum(bid.price)] } - └─BatchHashAgg { group_key: [bid.auction, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar)], aggs: [count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), min(bid.price), max(bid.price), sum(bid.price), count(bid.price), sum(bid.price)] } - └─BatchExchange { order: [], dist: HashShard(bid.auction, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar)) } - └─BatchProject { exprs: [bid.auction, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar), bid.price] } + └─BatchProject { exprs: [bid.auction, $expr45, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), min(bid.price), max(bid.price), (sum(bid.price) / count(bid.price)) as $expr46, sum(bid.price)] } + └─BatchHashAgg { group_key: [bid.auction, $expr45], aggs: [count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), min(bid.price), max(bid.price), sum(bid.price), count(bid.price), sum(bid.price)] } + └─BatchExchange { order: [], dist: HashShard(bid.auction, $expr45) } + └─BatchProject { exprs: [bid.auction, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar) as $expr45, bid.price] } └─BatchScan { table: bid, columns: [bid.auction, bid.price, bid.date_time], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [auction, day, total_bids, rank1_bids, rank2_bids, rank3_bids, min_price, max_price, avg_price, sum_price], pk_columns: [auction, day] } - └─StreamProject { exprs: [bid.auction, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), min(bid.price), max(bid.price), (sum(bid.price) / count(bid.price)), sum(bid.price)] } - └─StreamAppendOnlyHashAgg { group_key: [bid.auction, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar)], aggs: [count, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), min(bid.price), max(bid.price), sum(bid.price), count(bid.price), sum(bid.price)] } - └─StreamExchange { dist: HashShard(bid.auction, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar)) } - └─StreamProject { exprs: [bid.auction, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar), bid.price, bid._row_id] } + └─StreamProject { exprs: [bid.auction, $expr91, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), min(bid.price), max(bid.price), (sum(bid.price) / count(bid.price)) as $expr93, sum(bid.price)] } + └─StreamAppendOnlyHashAgg { group_key: [bid.auction, $expr91], aggs: [count, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), min(bid.price), max(bid.price), sum(bid.price), count(bid.price), sum(bid.price)] } + └─StreamExchange { dist: HashShard(bid.auction, $expr91) } + └─StreamProject { exprs: [bid.auction, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar) as $expr91, bid.price, bid._row_id] } └─StreamTableScan { table: bid, columns: [bid.auction, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } stream_dist_plan: | Fragment 0 StreamMaterialize { columns: [auction, day, total_bids, rank1_bids, rank2_bids, rank3_bids, min_price, max_price, avg_price, sum_price], pk_columns: [auction, day] } materialized table: 4294967294 - StreamProject { exprs: [bid.auction, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), min(bid.price), max(bid.price), (sum(bid.price) / count(bid.price)), sum(bid.price)] } - StreamAppendOnlyHashAgg { group_key: [bid.auction, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar)], aggs: [count, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), min(bid.price), max(bid.price), sum(bid.price), count(bid.price), sum(bid.price)] } + StreamProject { exprs: [bid.auction, $expr91, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), min(bid.price), max(bid.price), (sum(bid.price) / count(bid.price)) as $expr93, sum(bid.price)] } + StreamAppendOnlyHashAgg { group_key: [bid.auction, $expr91], aggs: [count, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), min(bid.price), max(bid.price), sum(bid.price), count(bid.price), sum(bid.price)] } result table: 0, state tables: [] StreamExchange Hash([0, 1]) from 1 Fragment 1 - StreamProject { exprs: [bid.auction, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar), bid.price, bid._row_id] } + StreamProject { exprs: [bid.auction, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar) as $expr91, bid.price, bid._row_id] } Chain { table: bid, columns: [bid.auction, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } Upstream BatchPlanNode - Table 0 { columns: [bid_auction, ToChar(bid_date_time, 'YYYY-MM-DD':Varchar), count, count_0, count filter((bid_price < 10000:Int32)), count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count filter((bid_price >= 1000000:Int32)), min(bid_price), max(bid_price), sum(bid_price), count(bid_price), sum(bid_price)_0], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], distribution key: [0, 1] } + Table 0 { columns: [bid_auction, $expr91, count, count_0, count filter((bid_price < 10000:Int32)), count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count filter((bid_price >= 1000000:Int32)), min(bid_price), max(bid_price), sum(bid_price), count(bid_price), sum(bid_price)_0], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], distribution key: [0, 1] } Table 4294967294 { columns: [auction, day, total_bids, rank1_bids, rank2_bids, rank3_bids, min_price, max_price, avg_price, sum_price], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], distribution key: [0, 1] } - id: nexmark_q18 before: @@ -956,7 +952,7 @@ StreamMaterialize { columns: [auction, bidder, price, channel, url, date_time, extra, bid._row_id(hidden)], pk_columns: [bid._row_id] } └─StreamExchange { dist: HashShard(bid._row_id) } └─StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, bid.extra, bid._row_id] } - └─StreamGroupTopN { order: "[bid.date_time DESC]", limit: 1, offset: 0, group_key: [1, 0] } + └─StreamAppendOnlyGroupTopN { order: "[bid.date_time DESC]", limit: 1, offset: 0, group_key: [1, 0] } └─StreamExchange { dist: HashShard(bid.bidder, bid.auction) } └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, bid.extra, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } stream_dist_plan: | @@ -967,7 +963,7 @@ Fragment 1 StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, bid.extra, bid._row_id] } - StreamGroupTopN { order: "[bid.date_time DESC]", limit: 1, offset: 0, group_key: [1, 0] } + StreamAppendOnlyGroupTopN { order: "[bid.date_time DESC]", limit: 1, offset: 0, group_key: [1, 0] } state table: 0 StreamExchange Hash([1, 0]) from 2 @@ -1042,8 +1038,8 @@ Table 0 { columns: [bid_auction, bid_bidder, bid_price, bid_channel, bid_url, bid_date_time, bid__row_id], primary key: [$0 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [0] } Table 1 { columns: [bid_auction, bid__row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 2 { columns: [auction_id, auction_item_name, auction_description, auction_initial_bid, auction_reserve, auction_date_time, auction_expires, auction_seller, auction_category], primary key: [$0 ASC, $0 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8], distribution key: [0] } - Table 3 { columns: [auction_id, auction_id_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 2 { columns: [auction_id, auction_item_name, auction_description, auction_initial_bid, auction_reserve, auction_date_time, auction_expires, auction_seller, auction_category], primary key: [$0 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8], distribution key: [0] } + Table 3 { columns: [auction_id, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 4294967294 { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, bid._row_id, auction.id], primary key: [$14 ASC, $15 ASC, $0 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], distribution key: [0] } - id: nexmark_q21 before: @@ -1075,17 +1071,17 @@ SPLIT_PART(url, '/', 6) as dir3 FROM bid; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [bid.auction, bid.bidder, bid.price, bid.channel, SplitPart(bid.url, '/':Varchar, 4:Int32), SplitPart(bid.url, '/':Varchar, 5:Int32), SplitPart(bid.url, '/':Varchar, 6:Int32)] } + └─BatchProject { exprs: [bid.auction, bid.bidder, bid.price, bid.channel, SplitPart(bid.url, '/':Varchar, 4:Int32) as $expr67, SplitPart(bid.url, '/':Varchar, 5:Int32) as $expr68, SplitPart(bid.url, '/':Varchar, 6:Int32) as $expr69] } └─BatchScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [auction, bidder, price, channel, dir1, dir2, dir3, bid._row_id(hidden)], pk_columns: [bid._row_id] } - └─StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.channel, SplitPart(bid.url, '/':Varchar, 4:Int32), SplitPart(bid.url, '/':Varchar, 5:Int32), SplitPart(bid.url, '/':Varchar, 6:Int32), bid._row_id] } + └─StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.channel, SplitPart(bid.url, '/':Varchar, 4:Int32) as $expr136, SplitPart(bid.url, '/':Varchar, 5:Int32) as $expr137, SplitPart(bid.url, '/':Varchar, 6:Int32) as $expr138, bid._row_id] } └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } stream_dist_plan: | Fragment 0 StreamMaterialize { columns: [auction, bidder, price, channel, dir1, dir2, dir3, bid._row_id(hidden)], pk_columns: [bid._row_id] } materialized table: 4294967294 - StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.channel, SplitPart(bid.url, '/':Varchar, 4:Int32), SplitPart(bid.url, '/':Varchar, 5:Int32), SplitPart(bid.url, '/':Varchar, 6:Int32), bid._row_id] } + StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.channel, SplitPart(bid.url, '/':Varchar, 4:Int32) as $expr136, SplitPart(bid.url, '/':Varchar, 5:Int32) as $expr137, SplitPart(bid.url, '/':Varchar, 6:Int32) as $expr138, bid._row_id] } Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } Upstream BatchPlanNode @@ -1150,10 +1146,10 @@ Upstream BatchPlanNode - Table 0 { columns: [auction_id, auction_item_name], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 1 { columns: [auction_id, auction_id_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 2 { columns: [bid_auction, max(bid_price)], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 3 { columns: [bid_auction, bid_auction_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 0 { columns: [auction_id, auction_item_name], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 1 { columns: [auction_id, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 2 { columns: [bid_auction, max(bid_price)], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 3 { columns: [bid_auction, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 4 { columns: [bid_auction, count, max(bid_price)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } Table 4294967294 { columns: [auction_id, auction_item_name, current_highest_bid, bid.auction], primary key: [$0 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } - id: nexmark_q102 @@ -1175,7 +1171,7 @@ SELECT COUNT(*) / COUNT(DISTINCT auction) FROM bid ) batch_plan: | - BatchNestedLoopJoin { type: Inner, predicate: (count(bid.auction) >= (sum0(count) / count(bid.auction))), output: [auction.id, auction.item_name, count(bid.auction)] } + BatchNestedLoopJoin { type: Inner, predicate: (count(bid.auction) >= $expr23), output: [auction.id, auction.item_name, count(bid.auction)] } ├─BatchExchange { order: [], dist: Single } | └─BatchHashAgg { group_key: [auction.id, auction.item_name], aggs: [count(bid.auction), count(bid.auction)] } | └─BatchHashJoin { type: Inner, predicate: auction.id = bid.auction, output: all } @@ -1183,7 +1179,7 @@ | | └─BatchScan { table: auction, columns: [auction.id, auction.item_name], distribution: UpstreamHashShard(auction.id) } | └─BatchExchange { order: [], dist: HashShard(bid.auction) } | └─BatchScan { table: bid, columns: [bid.auction], distribution: SomeShard } - └─BatchProject { exprs: [(sum0(count) / count(bid.auction))] } + └─BatchProject { exprs: [(sum0(count) / count(bid.auction)) as $expr23] } └─BatchSimpleAgg { aggs: [sum0(count), count(bid.auction)] } └─BatchExchange { order: [], dist: Single } └─BatchHashAgg { group_key: [bid.auction], aggs: [count] } @@ -1192,7 +1188,7 @@ stream_plan: | StreamMaterialize { columns: [auction_id, auction_item_name, bid_count], pk_columns: [auction_id, auction_item_name] } └─StreamProject { exprs: [auction.id, auction.item_name, count(bid.auction)] } - └─StreamDynamicFilter { predicate: (count(bid.auction) >= (sum0(count) / count(bid.auction))), output: [auction.id, auction.item_name, count(bid.auction), count(bid.auction)] } + └─StreamDynamicFilter { predicate: (count(bid.auction) >= $expr47), output: [auction.id, auction.item_name, count(bid.auction), count(bid.auction)] } ├─StreamProject { exprs: [auction.id, auction.item_name, count(bid.auction), count(bid.auction)] } | └─StreamHashAgg { group_key: [auction.id, auction.item_name], aggs: [count, count(bid.auction), count(bid.auction)] } | └─StreamHashJoin { type: Inner, predicate: auction.id = bid.auction, output: all } @@ -1201,7 +1197,7 @@ | └─StreamExchange { dist: HashShard(bid.auction) } | └─StreamTableScan { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [(sum0(count) / count(bid.auction))] } + └─StreamProject { exprs: [(sum0(count) / count(bid.auction)) as $expr47] } └─StreamGlobalSimpleAgg { aggs: [count, sum0(count), count(bid.auction)] } └─StreamExchange { dist: Single } └─StreamProject { exprs: [bid.auction, count] } @@ -1213,7 +1209,7 @@ StreamMaterialize { columns: [auction_id, auction_item_name, bid_count], pk_columns: [auction_id, auction_item_name] } materialized table: 4294967294 StreamProject { exprs: [auction.id, auction.item_name, count(bid.auction)] } - StreamDynamicFilter { predicate: (count(bid.auction) >= (sum0(count) / count(bid.auction))), output: [auction.id, auction.item_name, count(bid.auction), count(bid.auction)] } + StreamDynamicFilter { predicate: (count(bid.auction) >= $expr47), output: [auction.id, auction.item_name, count(bid.auction), count(bid.auction)] } left table: 0, right table 1 StreamProject { exprs: [auction.id, auction.item_name, count(bid.auction), count(bid.auction)] } StreamHashAgg { group_key: [auction.id, auction.item_name], aggs: [count, count(bid.auction), count(bid.auction)] } @@ -1235,7 +1231,7 @@ BatchPlanNode Fragment 3 - StreamProject { exprs: [(sum0(count) / count(bid.auction))] } + StreamProject { exprs: [(sum0(count) / count(bid.auction)) as $expr47] } StreamGlobalSimpleAgg { aggs: [count, sum0(count), count(bid.auction)] } result table: 7, state tables: [] StreamExchange Single from 4 @@ -1252,10 +1248,10 @@ BatchPlanNode Table 0 { columns: [auction_id, auction_item_name, count(bid_auction), count(bid_auction)_0], primary key: [$3 ASC, $0 ASC, $1 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } - Table 1 { columns: [(sum0(count) / count(bid_auction))], primary key: [], value indices: [0], distribution key: [] } + Table 1 { columns: [$expr47], primary key: [], value indices: [0], distribution key: [] } Table 2 { columns: [auction_id, auction_item_name, count, count(bid_auction), count(bid_auction)_0], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4], distribution key: [0] } - Table 3 { columns: [auction_id, auction_item_name], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 4 { columns: [auction_id, auction_id_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 3 { columns: [auction_id, auction_item_name], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 4 { columns: [auction_id, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 5 { columns: [bid_auction, bid__row_id], primary key: [$0 ASC, $1 ASC], value indices: [0, 1], distribution key: [0] } Table 6 { columns: [bid_auction, bid__row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 7 { columns: [count, sum0(count), count(bid_auction)], primary key: [], value indices: [0, 1, 2], distribution key: [] } @@ -1322,10 +1318,10 @@ Upstream BatchPlanNode - Table 0 { columns: [auction_id, auction_item_name], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 1 { columns: [auction_id, auction_id_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 2 { columns: [bid_auction], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 3 { columns: [bid_auction, bid_auction_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 0 { columns: [auction_id, auction_item_name], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 1 { columns: [auction_id, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 2 { columns: [bid_auction], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 3 { columns: [bid_auction, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 4 { columns: [bid_auction, count, count_0], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } Table 4294967294 { columns: [auction_id, auction_item_name], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } - id: nexmark_q104 @@ -1389,10 +1385,10 @@ Upstream BatchPlanNode - Table 0 { columns: [auction_id, auction_item_name], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 1 { columns: [auction_id, auction_id_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 2 { columns: [bid_auction], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 3 { columns: [bid_auction, bid_auction_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 0 { columns: [auction_id, auction_item_name], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 1 { columns: [auction_id, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 2 { columns: [bid_auction], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 3 { columns: [bid_auction, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 4 { columns: [bid_auction, count, count_0], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } Table 4294967294 { columns: [auction_id, auction_item_name], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } - id: nexmark_q105 @@ -1427,7 +1423,7 @@ └─StreamTopN { order: "[count(bid.auction) DESC]", limit: 1000, offset: 0 } └─StreamExchange { dist: Single } └─StreamGroupTopN { order: "[count(bid.auction) DESC]", limit: 1000, offset: 0, group_key: [3] } - └─StreamProject { exprs: [auction.id, auction.item_name, count(bid.auction), Vnode(auction.id)] } + └─StreamProject { exprs: [auction.id, auction.item_name, count(bid.auction), Vnode(auction.id) as $expr1] } └─StreamProject { exprs: [auction.id, auction.item_name, count(bid.auction)] } └─StreamHashAgg { group_key: [auction.id, auction.item_name], aggs: [count, count(bid.auction)] } └─StreamHashJoin { type: Inner, predicate: auction.id = bid.auction, output: all } @@ -1447,7 +1443,7 @@ Fragment 1 StreamGroupTopN { order: "[count(bid.auction) DESC]", limit: 1000, offset: 0, group_key: [3] } state table: 1 - StreamProject { exprs: [auction.id, auction.item_name, count(bid.auction), Vnode(auction.id)] } + StreamProject { exprs: [auction.id, auction.item_name, count(bid.auction), Vnode(auction.id) as $expr1] } StreamProject { exprs: [auction.id, auction.item_name, count(bid.auction)] } StreamHashAgg { group_key: [auction.id, auction.item_name], aggs: [count, count(bid.auction)] } result table: 2, state tables: [] @@ -1466,11 +1462,11 @@ Upstream BatchPlanNode - Table 0 { columns: [auction_id, auction_item_name, count(bid_auction), Vnode(auction_id)], primary key: [$2 DESC, $0 ASC, $1 ASC], value indices: [0, 1, 2, 3], distribution key: [] } - Table 1 { columns: [auction_id, auction_item_name, count(bid_auction), Vnode(auction_id)], primary key: [$3 ASC, $2 DESC, $0 ASC, $1 ASC], value indices: [0, 1, 2, 3], distribution key: [0], vnode column idx: 3 } + Table 0 { columns: [auction_id, auction_item_name, count(bid_auction), $expr1], primary key: [$2 DESC, $0 ASC, $1 ASC], value indices: [0, 1, 2, 3], distribution key: [] } + Table 1 { columns: [auction_id, auction_item_name, count(bid_auction), $expr1], primary key: [$3 ASC, $2 DESC, $0 ASC, $1 ASC], value indices: [0, 1, 2, 3], distribution key: [0], vnode column idx: 3 } Table 2 { columns: [auction_id, auction_item_name, count, count(bid_auction)], primary key: [$0 ASC, $1 ASC], value indices: [2, 3], distribution key: [0] } - Table 3 { columns: [auction_id, auction_item_name], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 4 { columns: [auction_id, auction_id_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 3 { columns: [auction_id, auction_item_name], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 4 { columns: [auction_id, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 5 { columns: [bid_auction, bid__row_id], primary key: [$0 ASC, $1 ASC], value indices: [0, 1], distribution key: [0] } Table 6 { columns: [bid_auction, bid__row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 4294967294 { columns: [auction_id, auction_item_name, bid_count], primary key: [$2 DESC, $0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [] } diff --git a/src/frontend/planner_test/tests/testdata/nexmark_source.yaml b/src/frontend/planner_test/tests/testdata/nexmark_source.yaml new file mode 100644 index 0000000000000..fb637bba603dc --- /dev/null +++ b/src/frontend/planner_test/tests/testdata/nexmark_source.yaml @@ -0,0 +1,1657 @@ +# This file is automatically generated. See `src/frontend/planner_test/README.md` for more information. +- id: create_sources + sql: | + create source auction ( + id INTEGER, + item_name VARCHAR, + description VARCHAR, + initial_bid INTEGER, + reserve INTEGER, + date_time TIMESTAMP, + expires TIMESTAMP, + seller INTEGER, + category INTEGER) + with ( + connector = 'nexmark', + nexmark.table.type = 'Auction' + ); + + create source bid ( + auction INTEGER, + bidder INTEGER, + price INTEGER, + channel VARCHAR, + url VARCHAR, + date_time TIMESTAMP, + extra VARCHAR) + with ( + connector = 'nexmark', + nexmark.table.type = 'Bid' + ); + + create source person ( + id INTEGER, + name VARCHAR, + email_address VARCHAR, + credit_card VARCHAR, + city VARCHAR, + state VARCHAR, + date_time TIMESTAMP) + with ( + connector = 'nexmark', + nexmark.table.type = 'Person' + ); +- id: nexmark_q0 + before: + - create_sources + sql: | + SELECT auction, bidder, price, date_time FROM bid; + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchProject { exprs: [auction, bidder, price, date_time] } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [auction, bidder, price, date_time, _row_id(hidden)], pk_columns: [_row_id] } + └─StreamExchange { dist: HashShard(_row_id) } + └─StreamProject { exprs: [auction, bidder, price, date_time, _row_id] } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [auction, bidder, price, date_time, _row_id(hidden)], pk_columns: [_row_id] } + materialized table: 4294967294 + StreamExchange Hash([4]) from 1 + + Fragment 1 + StreamProject { exprs: [auction, bidder, price, date_time, _row_id] } + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 0 + + Table 0 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [auction, bidder, price, date_time, _row_id], primary key: [$4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [4] } +- id: nexmark_q1 + before: + - create_sources + sql: | + SELECT + auction, + bidder, + 0.908 * price as price, + date_time + FROM bid; + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchProject { exprs: [auction, bidder, (0.908:Decimal * price) as $expr24, date_time] } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [auction, bidder, price, date_time, _row_id(hidden)], pk_columns: [_row_id] } + └─StreamExchange { dist: HashShard(_row_id) } + └─StreamProject { exprs: [auction, bidder, (0.908:Decimal * price) as $expr48, date_time, _row_id] } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [auction, bidder, price, date_time, _row_id(hidden)], pk_columns: [_row_id] } + materialized table: 4294967294 + StreamExchange Hash([4]) from 1 + + Fragment 1 + StreamProject { exprs: [auction, bidder, (0.908:Decimal * price) as $expr48, date_time, _row_id] } + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 0 + + Table 0 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [auction, bidder, price, date_time, _row_id], primary key: [$4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [4] } +- id: nexmark_q2 + before: + - create_sources + sql: SELECT auction, price FROM bid WHERE auction = 1007 OR auction = 1020 OR auction = 2001 OR auction = 2019 OR auction = 2087; + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchProject { exprs: [auction, price] } + └─BatchFilter { predicate: (((((auction = 1007:Int32) OR (auction = 1020:Int32)) OR (auction = 2001:Int32)) OR (auction = 2019:Int32)) OR (auction = 2087:Int32)) } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [auction, price, _row_id(hidden)], pk_columns: [_row_id] } + └─StreamExchange { dist: HashShard(_row_id) } + └─StreamProject { exprs: [auction, price, _row_id] } + └─StreamFilter { predicate: (((((auction = 1007:Int32) OR (auction = 1020:Int32)) OR (auction = 2001:Int32)) OR (auction = 2019:Int32)) OR (auction = 2087:Int32)) } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [auction, price, _row_id(hidden)], pk_columns: [_row_id] } + materialized table: 4294967294 + StreamExchange Hash([2]) from 1 + + Fragment 1 + StreamProject { exprs: [auction, price, _row_id] } + StreamFilter { predicate: (((((auction = 1007:Int32) OR (auction = 1020:Int32)) OR (auction = 2001:Int32)) OR (auction = 2019:Int32)) OR (auction = 2087:Int32)) } + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 0 + + Table 0 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [auction, price, _row_id], primary key: [$2 ASC], value indices: [0, 1, 2], distribution key: [2] } +- id: nexmark_q3 + before: + - create_sources + sql: | + SELECT + P.name, P.city, P.state, A.id + FROM + auction AS A INNER JOIN person AS P on A.seller = P.id + WHERE + A.category = 10 and (P.state = 'or' OR P.state = 'id' OR P.state = 'ca'); + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchHashJoin { type: Inner, predicate: seller = id, output: [name, city, state, id] } + ├─BatchExchange { order: [], dist: HashShard(seller) } + | └─BatchProject { exprs: [id, seller] } + | └─BatchFilter { predicate: (category = 10:Int32) } + | └─BatchSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"], filter: (None, None) } + └─BatchExchange { order: [], dist: HashShard(id) } + └─BatchProject { exprs: [id, name, city, state] } + └─BatchFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) } + └─BatchSource { source: "person", columns: ["id", "name", "email_address", "credit_card", "city", "state", "date_time", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [name, city, state, id, _row_id(hidden), seller(hidden), _row_id#1(hidden), id#1(hidden)], pk_columns: [_row_id, _row_id#1, seller, id#1] } + └─StreamAppendOnlyHashJoin { type: Inner, predicate: seller = id, output: [name, city, state, id, _row_id, seller, _row_id, id] } + ├─StreamExchange { dist: HashShard(seller) } + | └─StreamProject { exprs: [id, seller, _row_id] } + | └─StreamFilter { predicate: (category = 10:Int32) } + | └─StreamRowIdGen { row_id_index: 9 } + | └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + └─StreamExchange { dist: HashShard(id) } + └─StreamProject { exprs: [id, name, city, state, _row_id] } + └─StreamFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "person", columns: ["id", "name", "email_address", "credit_card", "city", "state", "date_time", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [name, city, state, id, _row_id(hidden), seller(hidden), _row_id#1(hidden), id#1(hidden)], pk_columns: [_row_id, _row_id#1, seller, id#1] } + materialized table: 4294967294 + StreamAppendOnlyHashJoin { type: Inner, predicate: seller = id, output: [name, city, state, id, _row_id, seller, _row_id, id] } + left table: 0, right table 2, left degree table: 1, right degree table: 3, + StreamExchange Hash([1]) from 1 + StreamExchange Hash([0]) from 2 + + Fragment 1 + StreamProject { exprs: [id, seller, _row_id] } + StreamFilter { predicate: (category = 10:Int32) } + StreamRowIdGen { row_id_index: 9 } + StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + source state table: 4 + + Fragment 2 + StreamProject { exprs: [id, name, city, state, _row_id] } + StreamFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) } + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "person", columns: ["id", "name", "email_address", "credit_card", "city", "state", "date_time", "_row_id"] } + source state table: 5 + + Table 0 { columns: [id, seller, _row_id], primary key: [$1 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [1] } + Table 1 { columns: [seller, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 2 { columns: [id, name, city, state, _row_id], primary key: [$0 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0] } + Table 3 { columns: [id, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 4 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 5 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [name, city, state, id, _row_id, seller, _row_id#1, id#1], primary key: [$4 ASC, $6 ASC, $5 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7], distribution key: [5] } +- id: nexmark_q4 + before: + - create_sources + sql: | + SELECT + Q.category, + AVG(Q.final) as avg + FROM ( + SELECT MAX(B.price) AS final, A.category + FROM auction A, bid B + WHERE A.id = B.auction AND B.date_time BETWEEN A.date_time AND A.expires + GROUP BY A.id, A.category + ) Q + GROUP BY Q.category; + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchProject { exprs: [category, (sum(max(price))::Decimal / count(max(price))) as $expr23] } + └─BatchHashAgg { group_key: [category], aggs: [sum(max(price)), count(max(price))] } + └─BatchExchange { order: [], dist: HashShard(category) } + └─BatchHashAgg { group_key: [id, category], aggs: [max(price)] } + └─BatchProject { exprs: [id, category, price] } + └─BatchFilter { predicate: (date_time >= date_time) AND (date_time <= expires) } + └─BatchHashJoin { type: Inner, predicate: id = auction, output: all } + ├─BatchExchange { order: [], dist: HashShard(id) } + | └─BatchProject { exprs: [id, date_time, expires, category] } + | └─BatchSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"], filter: (None, None) } + └─BatchExchange { order: [], dist: HashShard(auction) } + └─BatchProject { exprs: [auction, price, date_time] } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [category, avg], pk_columns: [category] } + └─StreamProject { exprs: [category, (sum(max(price))::Decimal / count(max(price))) as $expr47] } + └─StreamHashAgg { group_key: [category], aggs: [count, sum(max(price)), count(max(price))] } + └─StreamExchange { dist: HashShard(category) } + └─StreamProject { exprs: [id, category, max(price)] } + └─StreamAppendOnlyHashAgg { group_key: [id, category], aggs: [count, max(price)] } + └─StreamProject { exprs: [id, category, price, _row_id, _row_id, auction] } + └─StreamFilter { predicate: (date_time >= date_time) AND (date_time <= expires) } + └─StreamAppendOnlyHashJoin { type: Inner, predicate: id = auction, output: all } + ├─StreamExchange { dist: HashShard(id) } + | └─StreamProject { exprs: [id, date_time, expires, category, _row_id] } + | └─StreamRowIdGen { row_id_index: 9 } + | └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + └─StreamExchange { dist: HashShard(auction) } + └─StreamProject { exprs: [auction, price, date_time, _row_id] } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [category, avg], pk_columns: [category] } + materialized table: 4294967294 + StreamProject { exprs: [category, (sum(max(price))::Decimal / count(max(price))) as $expr47] } + StreamHashAgg { group_key: [category], aggs: [count, sum(max(price)), count(max(price))] } + result table: 0, state tables: [] + StreamExchange Hash([1]) from 1 + + Fragment 1 + StreamProject { exprs: [id, category, max(price)] } + StreamAppendOnlyHashAgg { group_key: [id, category], aggs: [count, max(price)] } + result table: 1, state tables: [] + StreamProject { exprs: [id, category, price, _row_id, _row_id, auction] } + StreamFilter { predicate: (date_time >= date_time) AND (date_time <= expires) } + StreamAppendOnlyHashJoin { type: Inner, predicate: id = auction, output: all } + left table: 2, right table 4, left degree table: 3, right degree table: 5, + StreamExchange Hash([0]) from 2 + StreamExchange Hash([0]) from 3 + + Fragment 2 + StreamProject { exprs: [id, date_time, expires, category, _row_id] } + StreamRowIdGen { row_id_index: 9 } + StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + source state table: 6 + + Fragment 3 + StreamProject { exprs: [auction, price, date_time, _row_id] } + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 7 + + Table 0 { columns: [category, count, sum(max(price)), count(max(price))], primary key: [$0 ASC], value indices: [1, 2, 3], distribution key: [0] } + Table 1 { columns: [id, category, count, max(price)], primary key: [$0 ASC, $1 ASC], value indices: [2, 3], distribution key: [0] } + Table 2 { columns: [id, date_time, expires, category, _row_id], primary key: [$0 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0] } + Table 3 { columns: [id, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 4 { columns: [auction, price, date_time, _row_id], primary key: [$0 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } + Table 5 { columns: [auction, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 6 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 7 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [category, avg], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } +- id: nexmark_q5 + before: + - create_sources + sql: | + SELECT AuctionBids.auction, AuctionBids.num FROM ( + SELECT + bid.auction, + count(*) AS num, + window_start AS starttime + FROM + HOP(bid, date_time, INTERVAL '2' SECOND, INTERVAL '10' SECOND) + GROUP BY + window_start, + bid.auction + ) AS AuctionBids + JOIN ( + SELECT + max(CountBids.num) AS maxn, + CountBids.starttime_c + FROM ( + SELECT + count(*) AS num, + window_start AS starttime_c + FROM HOP(bid, date_time, INTERVAL '2' SECOND, INTERVAL '10' SECOND) + GROUP BY + bid.auction, + window_start + ) AS CountBids + GROUP BY + CountBids.starttime_c + ) AS MaxBids + ON AuctionBids.starttime = MaxBids.starttime_c AND AuctionBids.num >= MaxBids.maxn; + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchProject { exprs: [auction, count] } + └─BatchFilter { predicate: (count >= max(count)) } + └─BatchHashJoin { type: Inner, predicate: window_start = window_start, output: all } + ├─BatchExchange { order: [], dist: HashShard(window_start) } + | └─BatchProject { exprs: [auction, count, window_start] } + | └─BatchHashAgg { group_key: [window_start, auction], aggs: [count] } + | └─BatchHopWindow { time_col: date_time, slide: 00:00:02, size: 00:00:10, output: [auction, window_start] } + | └─BatchExchange { order: [], dist: HashShard(auction) } + | └─BatchFilter { predicate: IsNotNull(date_time) } + | └─BatchProject { exprs: [auction, date_time] } + | └─BatchFilter { predicate: IsNotNull(date_time) } + | └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + └─BatchProject { exprs: [max(count), window_start] } + └─BatchHashAgg { group_key: [window_start], aggs: [max(count)] } + └─BatchExchange { order: [], dist: HashShard(window_start) } + └─BatchHashAgg { group_key: [auction, window_start], aggs: [count] } + └─BatchHopWindow { time_col: date_time, slide: 00:00:02, size: 00:00:10, output: [auction, window_start] } + └─BatchExchange { order: [], dist: HashShard(auction) } + └─BatchFilter { predicate: IsNotNull(date_time) } + └─BatchProject { exprs: [auction, date_time] } + └─BatchFilter { predicate: IsNotNull(date_time) } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [auction, num, window_start(hidden), window_start#1(hidden)], pk_columns: [window_start, auction, window_start#1] } + └─StreamProject { exprs: [auction, count, window_start, window_start] } + └─StreamFilter { predicate: (count >= max(count)) } + └─StreamHashJoin { type: Inner, predicate: window_start = window_start, output: all } + ├─StreamExchange { dist: HashShard(window_start) } + | └─StreamProject { exprs: [auction, count, window_start] } + | └─StreamAppendOnlyHashAgg { group_key: [window_start, auction], aggs: [count, count] } + | └─StreamExchange { dist: HashShard(auction, window_start) } + | └─StreamHopWindow { time_col: date_time, slide: 00:00:02, size: 00:00:10, output: [auction, window_start, _row_id] } + | └─StreamFilter { predicate: IsNotNull(date_time) } + | └─StreamProject { exprs: [auction, date_time, _row_id] } + | └─StreamShare { id = 919 } + | └─StreamProject { exprs: [auction, date_time, _row_id] } + | └─StreamFilter { predicate: IsNotNull(date_time) } + | └─StreamRowIdGen { row_id_index: 7 } + | └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + └─StreamProject { exprs: [max(count), window_start] } + └─StreamHashAgg { group_key: [window_start], aggs: [count, max(count)] } + └─StreamExchange { dist: HashShard(window_start) } + └─StreamProject { exprs: [auction, window_start, count] } + └─StreamAppendOnlyHashAgg { group_key: [auction, window_start], aggs: [count, count] } + └─StreamExchange { dist: HashShard(auction, window_start) } + └─StreamHopWindow { time_col: date_time, slide: 00:00:02, size: 00:00:10, output: [auction, window_start, _row_id] } + └─StreamFilter { predicate: IsNotNull(date_time) } + └─StreamProject { exprs: [auction, date_time, _row_id] } + └─StreamShare { id = 919 } + └─StreamProject { exprs: [auction, date_time, _row_id] } + └─StreamFilter { predicate: IsNotNull(date_time) } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [auction, num, window_start(hidden), window_start#1(hidden)], pk_columns: [window_start, auction, window_start#1] } + materialized table: 4294967294 + StreamProject { exprs: [auction, count, window_start, window_start] } + StreamFilter { predicate: (count >= max(count)) } + StreamHashJoin { type: Inner, predicate: window_start = window_start, output: all } + left table: 0, right table 2, left degree table: 1, right degree table: 3, + StreamExchange Hash([2]) from 1 + StreamProject { exprs: [max(count), window_start] } + StreamHashAgg { group_key: [window_start], aggs: [count, max(count)] } + result table: 7, state tables: [6] + StreamExchange Hash([1]) from 4 + + Fragment 1 + StreamProject { exprs: [auction, count, window_start] } + StreamAppendOnlyHashAgg { group_key: [window_start, auction], aggs: [count, count] } + result table: 4, state tables: [] + StreamExchange Hash([0, 1]) from 2 + + Fragment 2 + StreamHopWindow { time_col: date_time, slide: 00:00:02, size: 00:00:10, output: [auction, window_start, _row_id] } + StreamFilter { predicate: IsNotNull(date_time) } + StreamProject { exprs: [auction, date_time, _row_id] } + StreamExchange Hash([2]) from 3 + + Fragment 3 + StreamProject { exprs: [auction, date_time, _row_id] } + StreamFilter { predicate: IsNotNull(date_time) } + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 5 + + Fragment 4 + StreamProject { exprs: [auction, window_start, count] } + StreamAppendOnlyHashAgg { group_key: [auction, window_start], aggs: [count, count] } + result table: 8, state tables: [] + StreamExchange Hash([0, 1]) from 5 + + Fragment 5 + StreamHopWindow { time_col: date_time, slide: 00:00:02, size: 00:00:10, output: [auction, window_start, _row_id] } + StreamFilter { predicate: IsNotNull(date_time) } + StreamProject { exprs: [auction, date_time, _row_id] } + StreamExchange Hash([2]) from 3 + + Table 0 { columns: [auction, count, window_start], primary key: [$2 ASC, $0 ASC], value indices: [0, 1, 2], distribution key: [2] } + Table 1 { columns: [window_start, auction, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 2 { columns: [max(count), window_start], primary key: [$1 ASC], value indices: [0, 1], distribution key: [1] } + Table 3 { columns: [window_start, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 4 { columns: [window_start, auction, count, count_0], primary key: [$0 ASC, $1 ASC], value indices: [2, 3], distribution key: [1, 0] } + Table 5 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 6 { columns: [window_start, count, auction], primary key: [$0 ASC, $1 DESC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 7 { columns: [window_start, count, max(count)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } + Table 8 { columns: [auction, window_start, count, count_0], primary key: [$0 ASC, $1 ASC], value indices: [2, 3], distribution key: [0, 1] } + Table 4294967294 { columns: [auction, num, window_start, window_start#1], primary key: [$2 ASC, $0 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [2] } +- id: nexmark_q6 + before: + - create_sources + sql: | + SELECT + Q.seller, + AVG(Q.final) OVER + (PARTITION BY Q.seller ORDER BY Q.date_time ROWS BETWEEN 10 PRECEDING AND CURRENT ROW) + as avg + FROM ( + SELECT MAX(B.price) AS final, A.seller, B.date_time + FROM auction AS A, bid AS B + WHERE A.id = B.auction and B.date_time between A.date_time and A.expires + GROUP BY A.id, A.seller + ) AS Q; + binder_error: |- + Feature is not yet implemented: aggregate function as over window function: avg + Tracking issue: https://github.com/risingwavelabs/risingwave/issues/4978 +- id: nexmark_q7 + before: + - create_sources + sql: | + SELECT + B.auction, + B.price, + B.bidder, + B.date_time + FROM + bid B + JOIN ( + SELECT + MAX(price) AS maxprice, + window_end as date_time + FROM + TUMBLE(bid, date_time, INTERVAL '10' SECOND) + GROUP BY + window_end + ) B1 ON B.price = B1.maxprice + WHERE + B.date_time BETWEEN B1.date_time - INTERVAL '10' SECOND + AND B1.date_time; + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchProject { exprs: [auction, price, bidder, date_time] } + └─BatchFilter { predicate: (date_time >= $expr55) AND (date_time <= $expr54) } + └─BatchHashJoin { type: Inner, predicate: price = max(price), output: all } + ├─BatchExchange { order: [], dist: HashShard(price) } + | └─BatchProject { exprs: [auction, bidder, price, date_time] } + | └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + └─BatchExchange { order: [], dist: HashShard(max(price)) } + └─BatchProject { exprs: [max(price), $expr54, ($expr54 - '00:00:10':Interval) as $expr55] } + └─BatchHashAgg { group_key: [$expr54], aggs: [max(price)] } + └─BatchExchange { order: [], dist: HashShard($expr54) } + └─BatchProject { exprs: [(TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr54, price] } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr108(hidden), max(price)(hidden)], pk_columns: [_row_id, $expr108, price, max(price)] } + └─StreamProject { exprs: [auction, price, bidder, date_time, _row_id, $expr108, max(price)] } + └─StreamFilter { predicate: (date_time >= $expr110) AND (date_time <= $expr108) } + └─StreamHashJoin { type: Inner, predicate: price = max(price), output: all } + ├─StreamExchange { dist: HashShard(price) } + | └─StreamProject { exprs: [auction, bidder, price, date_time, _row_id] } + | └─StreamShare { id = 440 } + | └─StreamProject { exprs: [auction, bidder, price, date_time, _row_id] } + | └─StreamRowIdGen { row_id_index: 7 } + | └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + └─StreamExchange { dist: HashShard(max(price)) } + └─StreamProject { exprs: [max(price), $expr108, ($expr108 - '00:00:10':Interval) as $expr110] } + └─StreamAppendOnlyHashAgg { group_key: [$expr108], aggs: [count, max(price)] } + └─StreamExchange { dist: HashShard($expr108) } + └─StreamProject { exprs: [(TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr108, price, _row_id] } + └─StreamShare { id = 440 } + └─StreamProject { exprs: [auction, bidder, price, date_time, _row_id] } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr108(hidden), max(price)(hidden)], pk_columns: [_row_id, $expr108, price, max(price)] } + materialized table: 4294967294 + StreamProject { exprs: [auction, price, bidder, date_time, _row_id, $expr108, max(price)] } + StreamFilter { predicate: (date_time >= $expr110) AND (date_time <= $expr108) } + StreamHashJoin { type: Inner, predicate: price = max(price), output: all } + left table: 0, right table 2, left degree table: 1, right degree table: 3, + StreamExchange Hash([2]) from 1 + StreamExchange Hash([0]) from 3 + + Fragment 1 + StreamProject { exprs: [auction, bidder, price, date_time, _row_id] } + StreamExchange Hash([4]) from 2 + + Fragment 2 + StreamProject { exprs: [auction, bidder, price, date_time, _row_id] } + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 4 + + Fragment 3 + StreamProject { exprs: [max(price), $expr108, ($expr108 - '00:00:10':Interval) as $expr110] } + StreamAppendOnlyHashAgg { group_key: [$expr108], aggs: [count, max(price)] } + result table: 5, state tables: [] + StreamExchange Hash([0]) from 4 + + Fragment 4 + StreamProject { exprs: [(TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr108, price, _row_id] } + StreamExchange Hash([4]) from 2 + + Table 0 { columns: [auction, bidder, price, date_time, _row_id], primary key: [$2 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [2] } + Table 1 { columns: [price, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 2 { columns: [max(price), $expr108, $expr110], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 3 { columns: [max(price), $expr108, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 4 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 5 { columns: [$expr108, count, max(price)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } + Table 4294967294 { columns: [auction, price, bidder, date_time, _row_id, $expr108, max(price)], primary key: [$4 ASC, $5 ASC, $1 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [1] } +- id: nexmark_q8 + before: + - create_sources + sql: | + SELECT + P.id, + P.name, + P.starttime + FROM ( + SELECT + id, + name, + window_start AS starttime, + window_end AS endtime + FROM + TUMBLE(person, date_time, INTERVAL '10' SECOND) + GROUP BY + id, + name, + window_start, + window_end + ) P + JOIN ( + SELECT + seller, + window_start AS starttime, + window_end AS endtime + FROM + TUMBLE(auction, date_time, INTERVAL '10' SECOND) + GROUP BY + seller, + window_start, + window_end + ) A ON P.id = A.seller + AND P.starttime = A.starttime + AND P.endtime = A.endtime; + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchHashJoin { type: Inner, predicate: id = seller AND $expr99 = $expr97 AND $expr100 = $expr98, output: [id, name, $expr99] } + ├─BatchExchange { order: [], dist: HashShard(id, $expr99, $expr100) } + | └─BatchHashAgg { group_key: [id, name, $expr99, $expr100], aggs: [] } + | └─BatchExchange { order: [], dist: HashShard(id, name, $expr99, $expr100) } + | └─BatchProject { exprs: [id, name, TumbleStart(date_time, '00:00:10':Interval) as $expr99, (TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr100] } + | └─BatchSource { source: "person", columns: ["id", "name", "email_address", "credit_card", "city", "state", "date_time", "_row_id"], filter: (None, None) } + └─BatchHashAgg { group_key: [seller, $expr97, $expr98], aggs: [] } + └─BatchExchange { order: [], dist: HashShard(seller, $expr97, $expr98) } + └─BatchProject { exprs: [seller, TumbleStart(date_time, '00:00:10':Interval) as $expr97, (TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr98] } + └─BatchSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [id, name, starttime, $expr200(hidden), seller(hidden), $expr197(hidden), $expr198(hidden)], pk_columns: [id, name, starttime, $expr200, seller, $expr197, $expr198] } + └─StreamHashJoin { type: Inner, predicate: id = seller AND $expr199 = $expr197 AND $expr200 = $expr198, output: all } + ├─StreamExchange { dist: HashShard(id, $expr199, $expr200) } + | └─StreamProject { exprs: [id, name, $expr199, $expr200] } + | └─StreamAppendOnlyHashAgg { group_key: [id, name, $expr199, $expr200], aggs: [count] } + | └─StreamExchange { dist: HashShard(id, name, $expr199, $expr200) } + | └─StreamProject { exprs: [id, name, TumbleStart(date_time, '00:00:10':Interval) as $expr199, (TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr200, _row_id] } + | └─StreamRowIdGen { row_id_index: 7 } + | └─StreamSource { source: "person", columns: ["id", "name", "email_address", "credit_card", "city", "state", "date_time", "_row_id"] } + └─StreamProject { exprs: [seller, $expr197, $expr198] } + └─StreamAppendOnlyHashAgg { group_key: [seller, $expr197, $expr198], aggs: [count] } + └─StreamExchange { dist: HashShard(seller, $expr197, $expr198) } + └─StreamProject { exprs: [seller, TumbleStart(date_time, '00:00:10':Interval) as $expr197, (TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr198, _row_id] } + └─StreamRowIdGen { row_id_index: 9 } + └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [id, name, starttime, $expr200(hidden), seller(hidden), $expr197(hidden), $expr198(hidden)], pk_columns: [id, name, starttime, $expr200, seller, $expr197, $expr198] } + materialized table: 4294967294 + StreamHashJoin { type: Inner, predicate: id = seller AND $expr199 = $expr197 AND $expr200 = $expr198, output: all } + left table: 0, right table 2, left degree table: 1, right degree table: 3, + StreamExchange Hash([0, 2, 3]) from 1 + StreamProject { exprs: [seller, $expr197, $expr198] } + StreamAppendOnlyHashAgg { group_key: [seller, $expr197, $expr198], aggs: [count] } + result table: 6, state tables: [] + StreamExchange Hash([0, 1, 2]) from 3 + + Fragment 1 + StreamProject { exprs: [id, name, $expr199, $expr200] } + StreamAppendOnlyHashAgg { group_key: [id, name, $expr199, $expr200], aggs: [count] } + result table: 4, state tables: [] + StreamExchange Hash([0, 1, 2, 3]) from 2 + + Fragment 2 + StreamProject { exprs: [id, name, TumbleStart(date_time, '00:00:10':Interval) as $expr199, (TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr200, _row_id] } + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "person", columns: ["id", "name", "email_address", "credit_card", "city", "state", "date_time", "_row_id"] } + source state table: 5 + + Fragment 3 + StreamProject { exprs: [seller, TumbleStart(date_time, '00:00:10':Interval) as $expr197, (TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr198, _row_id] } + StreamRowIdGen { row_id_index: 9 } + StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + source state table: 7 + + Table 0 { columns: [id, name, $expr199, $expr200], primary key: [$0 ASC, $2 ASC, $3 ASC, $1 ASC], value indices: [0, 1, 2, 3], distribution key: [0, 2, 3] } + Table 1 { columns: [id, $expr199, $expr200, name, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0, 1, 2] } + Table 2 { columns: [seller, $expr197, $expr198], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0, 1, 2] } + Table 3 { columns: [seller, $expr197, $expr198, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0, 1, 2] } + Table 4 { columns: [id, name, $expr199, $expr200, count], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0, 1, 2, 3] } + Table 5 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 6 { columns: [seller, $expr197, $expr198, count], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0, 1, 2] } + Table 7 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [id, name, starttime, $expr200, seller, $expr197, $expr198], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [0, 2, 3] } +- id: nexmark_q9 + before: + - create_sources + sql: | + SELECT + id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, + auction, bidder, price, bid_date_time + FROM ( + SELECT A.*, B.auction, B.bidder, B.price, B.date_time AS bid_date_time, + ROW_NUMBER() OVER (PARTITION BY A.id ORDER BY B.price DESC, B.date_time ASC) AS rownum + FROM auction A, bid B + WHERE A.id = B.auction AND B.date_time BETWEEN A.date_time AND A.expires + ) + WHERE rownum <= 1; + logical_plan: | + LogicalProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, auction, bidder, price, date_time] } + └─LogicalFilter { predicate: (ROW_NUMBER <= 1:Int32) } + └─LogicalProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, auction, bidder, price, date_time, ROW_NUMBER] } + └─LogicalOverAgg { window_function: ROW_NUMBER() OVER(PARTITION BY id ORDER BY price DESC NULLS FIRST, date_time ASC NULLS LAST) } + └─LogicalFilter { predicate: (id = auction) AND (date_time >= date_time) AND (date_time <= expires) } + └─LogicalJoin { type: Inner, on: true, output: all } + ├─LogicalSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id], time_range: [(Unbounded, Unbounded)] } + └─LogicalSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id], time_range: [(Unbounded, Unbounded)] } + optimized_logical_plan: | + LogicalProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, auction, bidder, price, date_time] } + └─LogicalTopN { order: "[price DESC, date_time ASC]", limit: 1, offset: 0, group_key: [0] } + └─LogicalJoin { type: Inner, on: (id = auction) AND (date_time >= date_time) AND (date_time <= expires), output: all } + ├─LogicalSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id], time_range: [(Unbounded, Unbounded)] } + └─LogicalSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id], time_range: [(Unbounded, Unbounded)] } + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, auction, bidder, price, date_time] } + └─BatchGroupTopN { order: "[price DESC, date_time ASC]", limit: 1, offset: 0, group_key: [0] } + └─BatchFilter { predicate: (date_time >= date_time) AND (date_time <= expires) } + └─BatchHashJoin { type: Inner, predicate: id = auction, output: all } + ├─BatchExchange { order: [], dist: HashShard(id) } + | └─BatchSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"], filter: (None, None) } + └─BatchExchange { order: [], dist: HashShard(auction) } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, auction, bidder, price, bid_date_time, _row_id(hidden), _row_id#1(hidden)], pk_columns: [_row_id, _row_id#1, id, auction] } + └─StreamProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, auction, bidder, price, date_time, _row_id, _row_id] } + └─StreamAppendOnlyGroupTopN { order: "[price DESC, date_time ASC]", limit: 1, offset: 0, group_key: [0] } + └─StreamFilter { predicate: (date_time >= date_time) AND (date_time <= expires) } + └─StreamAppendOnlyHashJoin { type: Inner, predicate: id = auction, output: all } + ├─StreamExchange { dist: HashShard(id) } + | └─StreamRowIdGen { row_id_index: 9 } + | └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + └─StreamExchange { dist: HashShard(auction) } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, auction, bidder, price, bid_date_time, _row_id(hidden), _row_id#1(hidden)], pk_columns: [_row_id, _row_id#1, id, auction] } + materialized table: 4294967294 + StreamProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, auction, bidder, price, date_time, _row_id, _row_id] } + StreamAppendOnlyGroupTopN { order: "[price DESC, date_time ASC]", limit: 1, offset: 0, group_key: [0] } + state table: 0 + StreamFilter { predicate: (date_time >= date_time) AND (date_time <= expires) } + StreamAppendOnlyHashJoin { type: Inner, predicate: id = auction, output: all } + left table: 1, right table 3, left degree table: 2, right degree table: 4, + StreamExchange Hash([0]) from 1 + StreamExchange Hash([0]) from 2 + + Fragment 1 + StreamRowIdGen { row_id_index: 9 } + StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + source state table: 5 + + Fragment 2 + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 6 + + Table 0 { columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id, auction, bidder, price, channel, url, date_time_0, extra, _row_id_0], primary key: [$0 ASC, $12 DESC, $15 ASC, $9 ASC, $17 ASC, $10 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], distribution key: [0] } + Table 1 { columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id], primary key: [$0 ASC, $9 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], distribution key: [0] } + Table 2 { columns: [id, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 3 { columns: [auction, bidder, price, channel, url, date_time, extra, _row_id], primary key: [$0 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7], distribution key: [0] } + Table 4 { columns: [auction, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 5 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 6 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, auction, bidder, price, bid_date_time, _row_id, _row_id#1], primary key: [$13 ASC, $14 ASC, $0 ASC, $9 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], distribution key: [0] } +- id: nexmark_q10 + before: + - create_sources + sql: | + SELECT auction, bidder, price, date_time, TO_CHAR(date_time, 'YYYY-MM-DD') as date, TO_CHAR(date_time, 'HH:MI') as time FROM bid; + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchProject { exprs: [auction, bidder, price, date_time, ToChar(date_time, 'YYYY-MM-DD':Varchar) as $expr47, ToChar(date_time, 'HH:MI':Varchar) as $expr48] } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [auction, bidder, price, date_time, date, time, _row_id(hidden)], pk_columns: [_row_id] } + └─StreamExchange { dist: HashShard(_row_id) } + └─StreamProject { exprs: [auction, bidder, price, date_time, ToChar(date_time, 'YYYY-MM-DD':Varchar) as $expr95, ToChar(date_time, 'HH:MI':Varchar) as $expr96, _row_id] } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [auction, bidder, price, date_time, date, time, _row_id(hidden)], pk_columns: [_row_id] } + materialized table: 4294967294 + StreamExchange Hash([6]) from 1 + + Fragment 1 + StreamProject { exprs: [auction, bidder, price, date_time, ToChar(date_time, 'YYYY-MM-DD':Varchar) as $expr95, ToChar(date_time, 'HH:MI':Varchar) as $expr96, _row_id] } + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 0 + + Table 0 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [auction, bidder, price, date_time, date, time, _row_id], primary key: [$6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [6] } +- id: nexmark_q11 + before: + - create_sources + sql: | + SELECT + B.bidder, + count(*) as bid_count, + SESSION_START(B.date_time, INTERVAL '10' SECOND) as starttime, + SESSION_END(B.date_time, INTERVAL '10' SECOND) as endtime + FROM bid B + GROUP BY B.bidder, SESSION(B.date_time, INTERVAL '10' SECOND); + binder_error: |- + Feature is not yet implemented: unsupported function: "session_start" + Tracking issue: https://github.com/risingwavelabs/risingwave/issues/112 +- id: nexmark_q12 + before: + - create_sources + sql: | + SELECT + B.bidder, + count(*) as bid_count, + TUMBLE_START(B.p_time, INTERVAL '10' SECOND) as starttime, + TUMBLE_END(B.p_time, INTERVAL '10' SECOND) as endtime + FROM (SELECT *, PROCTIME() as p_time FROM bid) B + GROUP BY B.bidder, TUMBLE(B.p_time, INTERVAL '10' SECOND); + binder_error: |- + Feature is not yet implemented: unsupported function: "proctime" + Tracking issue: https://github.com/risingwavelabs/risingwave/issues/112 +- id: nexmark_q13 + before: + - create_sources + sql: | + /* SELECT + B.auction, + B.bidder, + B.price, + B.date_time, + S.value + FROM (SELECT *, PROCTIME() as p_time FROM bid) B + JOIN side_input FOR SYSTEM_TIME AS OF B.p_time AS S + ON mod(B.auction, 10000) = S.key; */ + select 1; + stream_error: 'Bind error: An alias must be specified for the 1st expression (counting from 1) in result relation' +- id: nexmark_q14 + before: + - create_sources + sql: | + SELECT + auction, + bidder, + 0.908 * price as price, + CASE + WHEN + extract(hour from date_time) >= 8 AND + extract(hour from date_time) <= 18 + THEN 'dayTime' + WHEN + extract(hour from date_time) <= 6 OR + extract(hour from date_time) >= 20 + THEN 'nightTime' + ELSE 'otherTime' + END AS bidTimeType, + date_time, + extra + -- TODO: count_char is an UDF, add it back when we support similar functionality. + -- https://github.com/nexmark/nexmark/blob/master/nexmark-flink/src/main/java/com/github/nexmark/flink/udf/CountChar.java + -- count_char(extra, 'c') AS c_counts + FROM bid + WHERE 0.908 * price > 1000000 AND 0.908 * price < 50000000; + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchProject { exprs: [auction, bidder, (0.908:Decimal * price) as $expr47, Case(((Extract('HOUR':Varchar, date_time) >= 8:Int32) AND (Extract('HOUR':Varchar, date_time) <= 18:Int32)), 'dayTime':Varchar, ((Extract('HOUR':Varchar, date_time) <= 6:Int32) OR (Extract('HOUR':Varchar, date_time) >= 20:Int32)), 'nightTime':Varchar, 'otherTime':Varchar) as $expr48, date_time, extra] } + └─BatchFilter { predicate: ((0.908:Decimal * price) > 1000000:Int32) AND ((0.908:Decimal * price) < 50000000:Int32) } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [auction, bidder, price, bidtimetype, date_time, extra, _row_id(hidden)], pk_columns: [_row_id] } + └─StreamExchange { dist: HashShard(_row_id) } + └─StreamProject { exprs: [auction, bidder, (0.908:Decimal * price) as $expr95, Case(((Extract('HOUR':Varchar, date_time) >= 8:Int32) AND (Extract('HOUR':Varchar, date_time) <= 18:Int32)), 'dayTime':Varchar, ((Extract('HOUR':Varchar, date_time) <= 6:Int32) OR (Extract('HOUR':Varchar, date_time) >= 20:Int32)), 'nightTime':Varchar, 'otherTime':Varchar) as $expr96, date_time, extra, _row_id] } + └─StreamFilter { predicate: ((0.908:Decimal * price) > 1000000:Int32) AND ((0.908:Decimal * price) < 50000000:Int32) } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [auction, bidder, price, bidtimetype, date_time, extra, _row_id(hidden)], pk_columns: [_row_id] } + materialized table: 4294967294 + StreamExchange Hash([6]) from 1 + + Fragment 1 + StreamProject { exprs: [auction, bidder, (0.908:Decimal * price) as $expr95, Case(((Extract('HOUR':Varchar, date_time) >= 8:Int32) AND (Extract('HOUR':Varchar, date_time) <= 18:Int32)), 'dayTime':Varchar, ((Extract('HOUR':Varchar, date_time) <= 6:Int32) OR (Extract('HOUR':Varchar, date_time) >= 20:Int32)), 'nightTime':Varchar, 'otherTime':Varchar) as $expr96, date_time, extra, _row_id] } + StreamFilter { predicate: ((0.908:Decimal * price) > 1000000:Int32) AND ((0.908:Decimal * price) < 50000000:Int32) } + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 0 + + Table 0 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [auction, bidder, price, bidtimetype, date_time, extra, _row_id], primary key: [$6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [6] } +- id: nexmark_q15 + before: + - create_sources + sql: | + SELECT + TO_CHAR(date_time, 'yyyy-MM-dd') as day, + count(*) AS total_bids, + count(*) filter (where price < 10000) AS rank1_bids, + count(*) filter (where price >= 10000 and price < 1000000) AS rank2_bids, + count(*) filter (where price >= 1000000) AS rank3_bids, + count(distinct bidder) AS total_bidders, + count(distinct bidder) filter (where price < 10000) AS rank1_bidders, + count(distinct bidder) filter (where price >= 10000 and price < 1000000) AS rank2_bidders, + count(distinct bidder) filter (where price >= 1000000) AS rank3_bidders, + count(distinct auction) AS total_auctions, + count(distinct auction) filter (where price < 10000) AS rank1_auctions, + count(distinct auction) filter (where price >= 10000 and price < 1000000) AS rank2_auctions, + count(distinct auction) filter (where price >= 1000000) AS rank3_auctions + FROM bid + GROUP BY to_char(date_time, 'yyyy-MM-dd'); + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchHashAgg { group_key: [$expr24], aggs: [sum0(count) filter((flag = 0:Int64)), sum0(count filter((price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 10000:Int32) AND (price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bidder) filter((flag = 1:Int64)), count(bidder) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(auction) filter((flag = 2:Int64)), count(auction) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + └─BatchExchange { order: [], dist: HashShard($expr24) } + └─BatchProject { exprs: [$expr24, bidder, bidder, bidder, bidder, auction, auction, auction, auction, flag, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32))] } + └─BatchHashAgg { group_key: [$expr24, bidder, auction, flag], aggs: [count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32))] } + └─BatchExchange { order: [], dist: HashShard($expr24, bidder, auction, flag) } + └─BatchExpand { column_subsets: [[$expr24], [$expr24, bidder], [$expr24, auction]] } + └─BatchProject { exprs: [ToChar(date_time, 'yyyy-MM-dd':Varchar) as $expr24, price, bidder, auction] } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [day, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], pk_columns: [day] } + └─StreamProject { exprs: [$expr48, sum0(count) filter((flag = 0:Int64)), sum0(count filter((price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 10000:Int32) AND (price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bidder) filter((flag = 1:Int64)), count(bidder) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(auction) filter((flag = 2:Int64)), count(auction) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + └─StreamHashAgg { group_key: [$expr48], aggs: [count, sum0(count) filter((flag = 0:Int64)), sum0(count filter((price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 10000:Int32) AND (price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bidder) filter((flag = 1:Int64)), count(bidder) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(auction) filter((flag = 2:Int64)), count(auction) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + └─StreamExchange { dist: HashShard($expr48) } + └─StreamProject { exprs: [$expr48, bidder, bidder, bidder, bidder, auction, auction, auction, auction, flag, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32))] } + └─StreamAppendOnlyHashAgg { group_key: [$expr48, bidder, auction, flag], aggs: [count, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32))] } + └─StreamExchange { dist: HashShard($expr48, bidder, auction, flag) } + └─StreamExpand { column_subsets: [[$expr48], [$expr48, bidder], [$expr48, auction]] } + └─StreamProject { exprs: [ToChar(date_time, 'yyyy-MM-dd':Varchar) as $expr48, price, bidder, auction, _row_id] } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [day, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], pk_columns: [day] } + materialized table: 4294967294 + StreamProject { exprs: [$expr48, sum0(count) filter((flag = 0:Int64)), sum0(count filter((price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 10000:Int32) AND (price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bidder) filter((flag = 1:Int64)), count(bidder) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(auction) filter((flag = 2:Int64)), count(auction) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + StreamHashAgg { group_key: [$expr48], aggs: [count, sum0(count) filter((flag = 0:Int64)), sum0(count filter((price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 10000:Int32) AND (price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bidder) filter((flag = 1:Int64)), count(bidder) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(auction) filter((flag = 2:Int64)), count(auction) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + result table: 0, state tables: [] + StreamExchange Hash([0]) from 1 + + Fragment 1 + StreamProject { exprs: [$expr48, bidder, bidder, bidder, bidder, auction, auction, auction, auction, flag, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32))] } + StreamAppendOnlyHashAgg { group_key: [$expr48, bidder, auction, flag], aggs: [count, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32))] } + result table: 1, state tables: [] + StreamExchange Hash([0, 2, 3, 10]) from 2 + + Fragment 2 + StreamExpand { column_subsets: [[$expr48], [$expr48, bidder], [$expr48, auction]] } + StreamProject { exprs: [ToChar(date_time, 'yyyy-MM-dd':Varchar) as $expr48, price, bidder, auction, _row_id] } + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 2 + + Table 0 { columns: [$expr48, count, sum0(count) filter((flag = 0:Int64)), sum0(count filter((price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 10000:Int32) AND (price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bidder) filter((flag = 1:Int64)), count(bidder) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(auction) filter((flag = 2:Int64)), count(auction) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))], primary key: [$0 ASC], value indices: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], distribution key: [0] } + Table 1 { columns: [$expr48, bidder, auction, flag, count, count_0, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32))_0, count filter((price >= 10000:Int32) AND (price < 1000000:Int32))_0, count filter((price >= 1000000:Int32))_0, count filter((price < 10000:Int32))_1, count filter((price >= 10000:Int32) AND (price < 1000000:Int32))_1, count filter((price >= 1000000:Int32))_1], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], distribution key: [0, 1, 2, 3] } + Table 2 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [day, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], primary key: [$0 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], distribution key: [0] } +- id: nexmark_q16 + before: + - create_sources + sql: | + SELECT + channel, + to_char(date_time, 'yyyy-MM-dd') AS day, + max(to_char(date_time, 'HH:mm')) AS minute, + count(*) AS total_bids, + count(*) filter (where price < 10000) AS rank1_bids, + count(*) filter (where price >= 10000 and price < 1000000) AS rank2_bids, + count(*) filter (where price >= 1000000) AS rank3_bids, + count(distinct bidder) AS total_bidders, + count(distinct bidder) filter (where price < 10000) AS rank1_bidders, + count(distinct bidder) filter (where price >= 10000 and price < 1000000) AS rank2_bidders, + count(distinct bidder) filter (where price >= 1000000) AS rank3_bidders, + count(distinct auction) AS total_auctions, + count(distinct auction) filter (where price < 10000) AS rank1_auctions, + count(distinct auction) filter (where price >= 10000 and price < 1000000) AS rank2_auctions, + count(distinct auction) filter (where price >= 1000000) AS rank3_auctions + FROM bid + GROUP BY channel, to_char(date_time, 'yyyy-MM-dd'); + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchHashAgg { group_key: [channel, $expr47], aggs: [max(max($expr48)) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 10000:Int32) AND (price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bidder) filter((flag = 1:Int64)), count(bidder) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(auction) filter((flag = 2:Int64)), count(auction) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + └─BatchExchange { order: [], dist: HashShard(channel, $expr47) } + └─BatchProject { exprs: [channel, $expr47, bidder, bidder, bidder, bidder, auction, auction, auction, auction, flag, max($expr48), count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32))] } + └─BatchHashAgg { group_key: [channel, $expr47, bidder, auction, flag], aggs: [max($expr48), count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32))] } + └─BatchExchange { order: [], dist: HashShard(channel, $expr47, bidder, auction, flag) } + └─BatchExpand { column_subsets: [[channel, $expr47, $expr48], [channel, $expr47, bidder], [channel, $expr47, auction]] } + └─BatchProject { exprs: [channel, ToChar(date_time, 'yyyy-MM-dd':Varchar) as $expr47, ToChar(date_time, 'HH:mm':Varchar) as $expr48, price, bidder, auction] } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [channel, day, minute, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], pk_columns: [channel, day] } + └─StreamProject { exprs: [channel, $expr95, max(max($expr96)) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 10000:Int32) AND (price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bidder) filter((flag = 1:Int64)), count(bidder) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(auction) filter((flag = 2:Int64)), count(auction) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + └─StreamHashAgg { group_key: [channel, $expr95], aggs: [count, max(max($expr96)) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 10000:Int32) AND (price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bidder) filter((flag = 1:Int64)), count(bidder) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(auction) filter((flag = 2:Int64)), count(auction) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + └─StreamExchange { dist: HashShard(channel, $expr95) } + └─StreamProject { exprs: [channel, $expr95, bidder, bidder, bidder, bidder, auction, auction, auction, auction, flag, max($expr96), count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32))] } + └─StreamAppendOnlyHashAgg { group_key: [channel, $expr95, bidder, auction, flag], aggs: [count, max($expr96), count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32))] } + └─StreamExchange { dist: HashShard(channel, $expr95, bidder, auction, flag) } + └─StreamExpand { column_subsets: [[channel, $expr95, $expr96], [channel, $expr95, bidder], [channel, $expr95, auction]] } + └─StreamProject { exprs: [channel, ToChar(date_time, 'yyyy-MM-dd':Varchar) as $expr95, ToChar(date_time, 'HH:mm':Varchar) as $expr96, price, bidder, auction, _row_id] } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [channel, day, minute, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], pk_columns: [channel, day] } + materialized table: 4294967294 + StreamProject { exprs: [channel, $expr95, max(max($expr96)) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 10000:Int32) AND (price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bidder) filter((flag = 1:Int64)), count(bidder) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(auction) filter((flag = 2:Int64)), count(auction) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + StreamHashAgg { group_key: [channel, $expr95], aggs: [count, max(max($expr96)) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 10000:Int32) AND (price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bidder) filter((flag = 1:Int64)), count(bidder) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(auction) filter((flag = 2:Int64)), count(auction) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))] } + result table: 1, state tables: [0] + StreamExchange Hash([0, 1]) from 1 + + Fragment 1 + StreamProject { exprs: [channel, $expr95, bidder, bidder, bidder, bidder, auction, auction, auction, auction, flag, max($expr96), count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32))] } + StreamAppendOnlyHashAgg { group_key: [channel, $expr95, bidder, auction, flag], aggs: [count, max($expr96), count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32))] } + result table: 2, state tables: [] + StreamExchange Hash([0, 1, 4, 5, 14]) from 2 + + Fragment 2 + StreamExpand { column_subsets: [[channel, $expr95, $expr96], [channel, $expr95, bidder], [channel, $expr95, auction]] } + StreamProject { exprs: [channel, ToChar(date_time, 'yyyy-MM-dd':Varchar) as $expr95, ToChar(date_time, 'HH:mm':Varchar) as $expr96, price, bidder, auction, _row_id] } + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 3 + + Table 0 { columns: [channel, $expr95, max($expr96), bidder, auction, flag], primary key: [$0 ASC, $1 ASC, $2 DESC, $3 ASC, $4 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [0, 1] } + Table 1 { columns: [channel, $expr95, count, max(max($expr96)) filter((flag = 0:Int64)), sum0(count) filter((flag = 0:Int64)), sum0(count filter((price < 10000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 10000:Int32) AND (price < 1000000:Int32))) filter((flag = 0:Int64)), sum0(count filter((price >= 1000000:Int32))) filter((flag = 0:Int64)), count(bidder) filter((flag = 1:Int64)), count(bidder) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(bidder) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 1:Int64)), count(auction) filter((flag = 2:Int64)), count(auction) filter((count filter((price < 10000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 10000:Int32) AND (price < 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64)), count(auction) filter((count filter((price >= 1000000:Int32)) > 0:Int64) AND (flag = 2:Int64))], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], distribution key: [0, 1] } + Table 2 { columns: [channel, $expr95, bidder, auction, flag, count, max($expr96), count_0, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count filter((price < 10000:Int32))_0, count filter((price >= 10000:Int32) AND (price < 1000000:Int32))_0, count filter((price >= 1000000:Int32))_0, count filter((price < 10000:Int32))_1, count filter((price >= 10000:Int32) AND (price < 1000000:Int32))_1, count filter((price >= 1000000:Int32))_1], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC], value indices: [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], distribution key: [0, 1, 2, 3, 4] } + Table 3 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [channel, day, minute, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], distribution key: [0, 1] } +- id: nexmark_q17 + before: + - create_sources + sql: | + SELECT + auction, + to_char(date_time, 'YYYY-MM-DD') AS day, + count(*) AS total_bids, + count(*) filter (where price < 10000) AS rank1_bids, + count(*) filter (where price >= 10000 and price < 1000000) AS rank2_bids, + count(*) filter (where price >= 1000000) AS rank3_bids, + min(price) AS min_price, + max(price) AS max_price, + avg(price) AS avg_price, + sum(price) AS sum_price + FROM bid + GROUP BY auction, to_char(date_time, 'YYYY-MM-DD'); + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchProject { exprs: [auction, $expr46, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), min(price), max(price), (sum(price)::Decimal / count(price)) as $expr47, sum(price)] } + └─BatchHashAgg { group_key: [auction, $expr46], aggs: [count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), min(price), max(price), sum(price), count(price), sum(price)] } + └─BatchExchange { order: [], dist: HashShard(auction, $expr46) } + └─BatchProject { exprs: [auction, ToChar(date_time, 'YYYY-MM-DD':Varchar) as $expr46, price] } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [auction, day, total_bids, rank1_bids, rank2_bids, rank3_bids, min_price, max_price, avg_price, sum_price], pk_columns: [auction, day] } + └─StreamProject { exprs: [auction, $expr93, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), min(price), max(price), (sum(price)::Decimal / count(price)) as $expr95, sum(price)] } + └─StreamAppendOnlyHashAgg { group_key: [auction, $expr93], aggs: [count, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), min(price), max(price), sum(price), count(price), sum(price)] } + └─StreamExchange { dist: HashShard(auction, $expr93) } + └─StreamProject { exprs: [auction, ToChar(date_time, 'YYYY-MM-DD':Varchar) as $expr93, price, _row_id] } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [auction, day, total_bids, rank1_bids, rank2_bids, rank3_bids, min_price, max_price, avg_price, sum_price], pk_columns: [auction, day] } + materialized table: 4294967294 + StreamProject { exprs: [auction, $expr93, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), min(price), max(price), (sum(price)::Decimal / count(price)) as $expr95, sum(price)] } + StreamAppendOnlyHashAgg { group_key: [auction, $expr93], aggs: [count, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), min(price), max(price), sum(price), count(price), sum(price)] } + result table: 0, state tables: [] + StreamExchange Hash([0, 1]) from 1 + + Fragment 1 + StreamProject { exprs: [auction, ToChar(date_time, 'YYYY-MM-DD':Varchar) as $expr93, price, _row_id] } + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 1 + + Table 0 { columns: [auction, $expr93, count, count_0, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), min(price), max(price), sum(price), count(price), sum(price)_0], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], distribution key: [0, 1] } + Table 1 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [auction, day, total_bids, rank1_bids, rank2_bids, rank3_bids, min_price, max_price, avg_price, sum_price], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], distribution key: [0, 1] } +- id: nexmark_q18 + before: + - create_sources + sql: | + SELECT auction, bidder, price, channel, url, date_time, extra + FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY bidder, auction ORDER BY date_time DESC) AS rank_number + FROM bid) + WHERE rank_number <= 1; + logical_plan: | + LogicalProject { exprs: [auction, bidder, price, channel, url, date_time, extra] } + └─LogicalFilter { predicate: (ROW_NUMBER <= 1:Int32) } + └─LogicalProject { exprs: [auction, bidder, price, channel, url, date_time, extra, ROW_NUMBER] } + └─LogicalOverAgg { window_function: ROW_NUMBER() OVER(PARTITION BY bidder, auction ORDER BY date_time DESC NULLS FIRST) } + └─LogicalSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id], time_range: [(Unbounded, Unbounded)] } + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchProject { exprs: [auction, bidder, price, channel, url, date_time, extra] } + └─BatchGroupTopN { order: "[date_time DESC]", limit: 1, offset: 0, group_key: [1, 0] } + └─BatchExchange { order: [], dist: HashShard(bidder, auction) } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [auction, bidder, price, channel, url, date_time, extra, _row_id(hidden)], pk_columns: [_row_id] } + └─StreamExchange { dist: HashShard(_row_id) } + └─StreamProject { exprs: [auction, bidder, price, channel, url, date_time, extra, _row_id] } + └─StreamAppendOnlyGroupTopN { order: "[date_time DESC]", limit: 1, offset: 0, group_key: [1, 0] } + └─StreamExchange { dist: HashShard(bidder, auction) } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [auction, bidder, price, channel, url, date_time, extra, _row_id(hidden)], pk_columns: [_row_id] } + materialized table: 4294967294 + StreamExchange Hash([7]) from 1 + + Fragment 1 + StreamProject { exprs: [auction, bidder, price, channel, url, date_time, extra, _row_id] } + StreamAppendOnlyGroupTopN { order: "[date_time DESC]", limit: 1, offset: 0, group_key: [1, 0] } + state table: 0 + StreamExchange Hash([1, 0]) from 2 + + Fragment 2 + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 1 + + Table 0 { columns: [auction, bidder, price, channel, url, date_time, extra, _row_id], primary key: [$1 ASC, $0 ASC, $5 DESC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7], distribution key: [1, 0] } + Table 1 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [auction, bidder, price, channel, url, date_time, extra, _row_id], primary key: [$7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7], distribution key: [7] } +- id: nexmark_q19 + before: + - create_sources + sql: | + SELECT * FROM + (SELECT *, ROW_NUMBER() OVER (PARTITION BY auction ORDER BY price DESC) AS rank_number FROM bid) + WHERE rank_number <= 10; + logical_plan: | + LogicalProject { exprs: [auction, bidder, price, channel, url, date_time, extra, ROW_NUMBER] } + └─LogicalFilter { predicate: (ROW_NUMBER <= 10:Int32) } + └─LogicalProject { exprs: [auction, bidder, price, channel, url, date_time, extra, ROW_NUMBER] } + └─LogicalOverAgg { window_function: ROW_NUMBER() OVER(PARTITION BY auction ORDER BY price DESC NULLS FIRST) } + └─LogicalSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id], time_range: [(Unbounded, Unbounded)] } + optimizer_error: | + internal error: OverAgg can not be transformed. Plan: + LogicalProject { exprs: [auction, bidder, price, channel, url, date_time, extra, ROW_NUMBER] } + └─LogicalFilter { predicate: (ROW_NUMBER <= 10:Int32) } + └─LogicalOverAgg { window_function: ROW_NUMBER() OVER(PARTITION BY auction ORDER BY price DESC NULLS FIRST) } + └─LogicalSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id], time_range: [(Unbounded, Unbounded)] } +- id: nexmark_q20 + before: + - create_sources + sql: | + SELECT + auction, bidder, price, channel, url, B.date_time as date_timeB, + item_name, description, initial_bid, reserve, A.date_time as date_timeA, expires, seller, category + FROM + bid B INNER JOIN auction A on B.auction = A.id + WHERE A.category = 10; + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchHashJoin { type: Inner, predicate: auction = id, output: [auction, bidder, price, channel, url, date_time, item_name, description, initial_bid, reserve, date_time, expires, seller, category] } + ├─BatchExchange { order: [], dist: HashShard(auction) } + | └─BatchProject { exprs: [auction, bidder, price, channel, url, date_time] } + | └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + └─BatchExchange { order: [], dist: HashShard(id) } + └─BatchProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category] } + └─BatchFilter { predicate: (category = 10:Int32) } + └─BatchSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden), id(hidden)], pk_columns: [_row_id, _row_id#1, auction, id] } + └─StreamAppendOnlyHashJoin { type: Inner, predicate: auction = id, output: [auction, bidder, price, channel, url, date_time, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id, _row_id, id] } + ├─StreamExchange { dist: HashShard(auction) } + | └─StreamProject { exprs: [auction, bidder, price, channel, url, date_time, _row_id] } + | └─StreamRowIdGen { row_id_index: 7 } + | └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + └─StreamExchange { dist: HashShard(id) } + └─StreamProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id] } + └─StreamFilter { predicate: (category = 10:Int32) } + └─StreamRowIdGen { row_id_index: 9 } + └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden), id(hidden)], pk_columns: [_row_id, _row_id#1, auction, id] } + materialized table: 4294967294 + StreamAppendOnlyHashJoin { type: Inner, predicate: auction = id, output: [auction, bidder, price, channel, url, date_time, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id, _row_id, id] } + left table: 0, right table 2, left degree table: 1, right degree table: 3, + StreamExchange Hash([0]) from 1 + StreamExchange Hash([0]) from 2 + + Fragment 1 + StreamProject { exprs: [auction, bidder, price, channel, url, date_time, _row_id] } + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 4 + + Fragment 2 + StreamProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id] } + StreamFilter { predicate: (category = 10:Int32) } + StreamRowIdGen { row_id_index: 9 } + StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + source state table: 5 + + Table 0 { columns: [auction, bidder, price, channel, url, date_time, _row_id], primary key: [$0 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [0] } + Table 1 { columns: [auction, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 2 { columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id], primary key: [$0 ASC, $9 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], distribution key: [0] } + Table 3 { columns: [id, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 4 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 5 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id, _row_id#1, id], primary key: [$14 ASC, $15 ASC, $0 ASC, $16 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], distribution key: [0] } +- id: nexmark_q21 + before: + - create_sources + sql: | + SELECT + auction, bidder, price, channel, + CASE + WHEN lower(channel) = 'apple' THEN '0' + WHEN lower(channel) = 'google' THEN '1' + WHEN lower(channel) = 'facebook' THEN '2' + WHEN lower(channel) = 'baidu' THEN '3' + ELSE REGEXP_EXTRACT(url, '(&|^)channel_id=([^&]*)', 2) + END + AS channel_id FROM bid + where REGEXP_EXTRACT(url, '(&|^)channel_id=([^&]*)', 2) is not null or + lower(channel) in ('apple', 'google', 'facebook', 'baidu'); + binder_error: |- + Feature is not yet implemented: unsupported function: "regexp_extract" + Tracking issue: https://github.com/risingwavelabs/risingwave/issues/112 +- id: nexmark_q22 + before: + - create_sources + sql: | + SELECT + auction, bidder, price, channel, + SPLIT_PART(url, '/', 4) as dir1, + SPLIT_PART(url, '/', 5) as dir2, + SPLIT_PART(url, '/', 6) as dir3 FROM bid; + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchProject { exprs: [auction, bidder, price, channel, SplitPart(url, '/':Varchar, 4:Int32) as $expr70, SplitPart(url, '/':Varchar, 5:Int32) as $expr71, SplitPart(url, '/':Varchar, 6:Int32) as $expr72] } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [auction, bidder, price, channel, dir1, dir2, dir3, _row_id(hidden)], pk_columns: [_row_id] } + └─StreamExchange { dist: HashShard(_row_id) } + └─StreamProject { exprs: [auction, bidder, price, channel, SplitPart(url, '/':Varchar, 4:Int32) as $expr142, SplitPart(url, '/':Varchar, 5:Int32) as $expr143, SplitPart(url, '/':Varchar, 6:Int32) as $expr144, _row_id] } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [auction, bidder, price, channel, dir1, dir2, dir3, _row_id(hidden)], pk_columns: [_row_id] } + materialized table: 4294967294 + StreamExchange Hash([7]) from 1 + + Fragment 1 + StreamProject { exprs: [auction, bidder, price, channel, SplitPart(url, '/':Varchar, 4:Int32) as $expr142, SplitPart(url, '/':Varchar, 5:Int32) as $expr143, SplitPart(url, '/':Varchar, 6:Int32) as $expr144, _row_id] } + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 0 + + Table 0 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [auction, bidder, price, channel, dir1, dir2, dir3, _row_id], primary key: [$7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7], distribution key: [7] } +- id: nexmark_q101 + before: + - create_sources + sql: | + -- A self-made query that covers outer join. + -- + -- Monitor ongoing auctions and track the current highest bid for each one in real-time. If + -- the auction has no bids, the highest bid will be NULL. + SELECT + a.id AS auction_id, + a.item_name AS auction_item_name, + b.max_price AS current_highest_bid + FROM auction a + LEFT OUTER JOIN ( + SELECT + b1.auction, + MAX(b1.price) max_price + FROM bid b1 + GROUP BY b1.auction + ) b ON a.id = b.auction; + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchHashJoin { type: LeftOuter, predicate: id = auction, output: [id, item_name, max(price)] } + ├─BatchExchange { order: [], dist: HashShard(id) } + | └─BatchProject { exprs: [id, item_name] } + | └─BatchSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"], filter: (None, None) } + └─BatchHashAgg { group_key: [auction], aggs: [max(price)] } + └─BatchExchange { order: [], dist: HashShard(auction) } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), auction(hidden)], pk_columns: [_row_id, auction, auction_id] } + └─StreamHashJoin { type: LeftOuter, predicate: id = auction, output: [id, item_name, max(price), _row_id, auction] } + ├─StreamExchange { dist: HashShard(id) } + | └─StreamProject { exprs: [id, item_name, _row_id] } + | └─StreamRowIdGen { row_id_index: 9 } + | └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + └─StreamProject { exprs: [auction, max(price)] } + └─StreamAppendOnlyHashAgg { group_key: [auction], aggs: [count, max(price)] } + └─StreamExchange { dist: HashShard(auction) } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), auction(hidden)], pk_columns: [_row_id, auction, auction_id] } + materialized table: 4294967294 + StreamHashJoin { type: LeftOuter, predicate: id = auction, output: [id, item_name, max(price), _row_id, auction] } + left table: 0, right table 2, left degree table: 1, right degree table: 3, + StreamExchange Hash([0]) from 1 + StreamProject { exprs: [auction, max(price)] } + StreamAppendOnlyHashAgg { group_key: [auction], aggs: [count, max(price)] } + result table: 5, state tables: [] + StreamExchange Hash([0]) from 2 + + Fragment 1 + StreamProject { exprs: [id, item_name, _row_id] } + StreamRowIdGen { row_id_index: 9 } + StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + source state table: 4 + + Fragment 2 + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 6 + + Table 0 { columns: [id, item_name, _row_id], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 1 { columns: [id, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 2 { columns: [auction, max(price)], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 3 { columns: [auction, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 4 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 5 { columns: [auction, count, max(price)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } + Table 6 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [auction_id, auction_item_name, current_highest_bid, _row_id, auction], primary key: [$3 ASC, $4 ASC, $0 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0] } +- id: nexmark_q102 + before: + - create_sources + sql: | + -- A self-made query that covers dynamic filter. + -- + -- Show the auctions whose count of bids is greater than the overall average count of bids + -- per auction. + SELECT + a.id AS auction_id, + a.item_name AS auction_item_name, + COUNT(b.auction) AS bid_count + FROM auction a + JOIN bid b ON a.id = b.auction + GROUP BY a.id, a.item_name + HAVING COUNT(b.auction) >= ( + SELECT COUNT(*) / COUNT(DISTINCT auction) FROM bid + ) + batch_plan: | + BatchNestedLoopJoin { type: Inner, predicate: (count(auction) >= $expr24), output: [id, item_name, count(auction)] } + ├─BatchExchange { order: [], dist: Single } + | └─BatchHashAgg { group_key: [id, item_name], aggs: [count(auction), count(auction)] } + | └─BatchHashJoin { type: Inner, predicate: id = auction, output: all } + | ├─BatchExchange { order: [], dist: HashShard(id) } + | | └─BatchProject { exprs: [id, item_name] } + | | └─BatchSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"], filter: (None, None) } + | └─BatchExchange { order: [], dist: HashShard(auction) } + | └─BatchProject { exprs: [auction] } + | └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + └─BatchProject { exprs: [(sum0(count) / count(auction)) as $expr24] } + └─BatchSimpleAgg { aggs: [sum0(count), count(auction)] } + └─BatchExchange { order: [], dist: Single } + └─BatchHashAgg { group_key: [auction], aggs: [count] } + └─BatchExchange { order: [], dist: HashShard(auction) } + └─BatchProject { exprs: [auction] } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [auction_id, auction_item_name, bid_count], pk_columns: [auction_id, auction_item_name] } + └─StreamProject { exprs: [id, item_name, count(auction)] } + └─StreamDynamicFilter { predicate: (count(auction) >= $expr49), output: [id, item_name, count(auction), count(auction)] } + ├─StreamProject { exprs: [id, item_name, count(auction), count(auction)] } + | └─StreamAppendOnlyHashAgg { group_key: [id, item_name], aggs: [count, count(auction), count(auction)] } + | └─StreamAppendOnlyHashJoin { type: Inner, predicate: id = auction, output: [id, item_name, auction, _row_id, _row_id] } + | ├─StreamExchange { dist: HashShard(id) } + | | └─StreamProject { exprs: [id, item_name, _row_id] } + | | └─StreamRowIdGen { row_id_index: 9 } + | | └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + | └─StreamExchange { dist: HashShard(auction) } + | └─StreamProject { exprs: [auction, _row_id] } + | └─StreamShare { id = 562 } + | └─StreamProject { exprs: [auction, _row_id] } + | └─StreamRowIdGen { row_id_index: 7 } + | └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + └─StreamExchange { dist: Broadcast } + └─StreamProject { exprs: [(sum0(count) / count(auction)) as $expr49] } + └─StreamGlobalSimpleAgg { aggs: [count, sum0(count), count(auction)] } + └─StreamExchange { dist: Single } + └─StreamProject { exprs: [auction, count] } + └─StreamAppendOnlyHashAgg { group_key: [auction], aggs: [count, count] } + └─StreamExchange { dist: HashShard(auction) } + └─StreamProject { exprs: [auction, _row_id] } + └─StreamShare { id = 562 } + └─StreamProject { exprs: [auction, _row_id] } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [auction_id, auction_item_name, bid_count], pk_columns: [auction_id, auction_item_name] } + materialized table: 4294967294 + StreamProject { exprs: [id, item_name, count(auction)] } + StreamDynamicFilter { predicate: (count(auction) >= $expr49), output: [id, item_name, count(auction), count(auction)] } + left table: 0, right table 1 + StreamProject { exprs: [id, item_name, count(auction), count(auction)] } + StreamAppendOnlyHashAgg { group_key: [id, item_name], aggs: [count, count(auction), count(auction)] } + result table: 2, state tables: [] + StreamAppendOnlyHashJoin { type: Inner, predicate: id = auction, output: [id, item_name, auction, _row_id, _row_id] } + left table: 3, right table 5, left degree table: 4, right degree table: 6, + StreamExchange Hash([0]) from 1 + StreamExchange Hash([0]) from 2 + StreamExchange Broadcast from 4 + + Fragment 1 + StreamProject { exprs: [id, item_name, _row_id] } + StreamRowIdGen { row_id_index: 9 } + StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + source state table: 7 + + Fragment 2 + StreamProject { exprs: [auction, _row_id] } + StreamExchange Hash([1]) from 3 + + Fragment 3 + StreamProject { exprs: [auction, _row_id] } + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 8 + + Fragment 4 + StreamProject { exprs: [(sum0(count) / count(auction)) as $expr49] } + StreamGlobalSimpleAgg { aggs: [count, sum0(count), count(auction)] } + result table: 9, state tables: [] + StreamExchange Single from 5 + + Fragment 5 + StreamProject { exprs: [auction, count] } + StreamAppendOnlyHashAgg { group_key: [auction], aggs: [count, count] } + result table: 10, state tables: [] + StreamExchange Hash([0]) from 6 + + Fragment 6 + StreamProject { exprs: [auction, _row_id] } + StreamExchange Hash([1]) from 3 + + Table 0 { columns: [id, item_name, count(auction), count(auction)_0], primary key: [$3 ASC, $0 ASC, $1 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } + Table 1 { columns: [$expr49], primary key: [], value indices: [0], distribution key: [] } + Table 2 { columns: [id, item_name, count, count(auction), count(auction)_0], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4], distribution key: [0] } + Table 3 { columns: [id, item_name, _row_id], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 4 { columns: [id, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 5 { columns: [auction, _row_id], primary key: [$0 ASC, $1 ASC], value indices: [0, 1], distribution key: [0] } + Table 6 { columns: [auction, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 7 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 8 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 9 { columns: [count, sum0(count), count(auction)], primary key: [], value indices: [0, 1, 2], distribution key: [] } + Table 10 { columns: [auction, count, count_0], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } + Table 4294967294 { columns: [auction_id, auction_item_name, bid_count], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0] } +- id: nexmark_q103 + before: + - create_sources + sql: | + -- A self-made query that covers semi join. + -- + -- Show the auctions that have at least 20 bids. + SELECT + a.id AS auction_id, + a.item_name AS auction_item_name + FROM auction a + WHERE a.id IN ( + SELECT b.auction FROM bid b + GROUP BY b.auction + HAVING COUNT(*) >= 20 + ); + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchHashJoin { type: LeftSemi, predicate: id = auction, output: all } + ├─BatchExchange { order: [], dist: HashShard(id) } + | └─BatchProject { exprs: [id, item_name] } + | └─BatchSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"], filter: (None, None) } + └─BatchProject { exprs: [auction] } + └─BatchFilter { predicate: (count >= 20:Int32) } + └─BatchHashAgg { group_key: [auction], aggs: [count] } + └─BatchExchange { order: [], dist: HashShard(auction) } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], pk_columns: [_row_id, auction_id] } + └─StreamHashJoin { type: LeftSemi, predicate: id = auction, output: all } + ├─StreamExchange { dist: HashShard(id) } + | └─StreamProject { exprs: [id, item_name, _row_id] } + | └─StreamRowIdGen { row_id_index: 9 } + | └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + └─StreamProject { exprs: [auction] } + └─StreamFilter { predicate: (count >= 20:Int32) } + └─StreamProject { exprs: [auction, count] } + └─StreamAppendOnlyHashAgg { group_key: [auction], aggs: [count, count] } + └─StreamExchange { dist: HashShard(auction) } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], pk_columns: [_row_id, auction_id] } + materialized table: 4294967294 + StreamHashJoin { type: LeftSemi, predicate: id = auction, output: all } + left table: 0, right table 2, left degree table: 1, right degree table: 3, + StreamExchange Hash([0]) from 1 + StreamProject { exprs: [auction] } + StreamFilter { predicate: (count >= 20:Int32) } + StreamProject { exprs: [auction, count] } + StreamAppendOnlyHashAgg { group_key: [auction], aggs: [count, count] } + result table: 5, state tables: [] + StreamExchange Hash([0]) from 2 + + Fragment 1 + StreamProject { exprs: [id, item_name, _row_id] } + StreamRowIdGen { row_id_index: 9 } + StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + source state table: 4 + + Fragment 2 + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 6 + + Table 0 { columns: [id, item_name, _row_id], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 1 { columns: [id, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 2 { columns: [auction], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 3 { columns: [auction, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 4 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 5 { columns: [auction, count, count_0], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } + Table 6 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [auction_id, auction_item_name, _row_id], primary key: [$2 ASC, $0 ASC], value indices: [0, 1, 2], distribution key: [0] } +- id: nexmark_q104 + before: + - create_sources + sql: | + -- A self-made query that covers anti join. + -- + -- This is the same as q103, which shows the auctions that have at least 20 bids. + SELECT + a.id AS auction_id, + a.item_name AS auction_item_name + FROM auction a + WHERE a.id NOT IN ( + SELECT b.auction FROM bid b + GROUP BY b.auction + HAVING COUNT(*) < 20 + ); + batch_plan: | + BatchExchange { order: [], dist: Single } + └─BatchHashJoin { type: LeftAnti, predicate: id = auction, output: all } + ├─BatchExchange { order: [], dist: HashShard(id) } + | └─BatchProject { exprs: [id, item_name] } + | └─BatchSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"], filter: (None, None) } + └─BatchProject { exprs: [auction] } + └─BatchFilter { predicate: (count < 20:Int32) } + └─BatchHashAgg { group_key: [auction], aggs: [count] } + └─BatchExchange { order: [], dist: HashShard(auction) } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], pk_columns: [_row_id, auction_id] } + └─StreamHashJoin { type: LeftAnti, predicate: id = auction, output: all } + ├─StreamExchange { dist: HashShard(id) } + | └─StreamProject { exprs: [id, item_name, _row_id] } + | └─StreamRowIdGen { row_id_index: 9 } + | └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + └─StreamProject { exprs: [auction] } + └─StreamFilter { predicate: (count < 20:Int32) } + └─StreamProject { exprs: [auction, count] } + └─StreamAppendOnlyHashAgg { group_key: [auction], aggs: [count, count] } + └─StreamExchange { dist: HashShard(auction) } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], pk_columns: [_row_id, auction_id] } + materialized table: 4294967294 + StreamHashJoin { type: LeftAnti, predicate: id = auction, output: all } + left table: 0, right table 2, left degree table: 1, right degree table: 3, + StreamExchange Hash([0]) from 1 + StreamProject { exprs: [auction] } + StreamFilter { predicate: (count < 20:Int32) } + StreamProject { exprs: [auction, count] } + StreamAppendOnlyHashAgg { group_key: [auction], aggs: [count, count] } + result table: 5, state tables: [] + StreamExchange Hash([0]) from 2 + + Fragment 1 + StreamProject { exprs: [id, item_name, _row_id] } + StreamRowIdGen { row_id_index: 9 } + StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + source state table: 4 + + Fragment 2 + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 6 + + Table 0 { columns: [id, item_name, _row_id], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 1 { columns: [id, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 2 { columns: [auction], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 3 { columns: [auction, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 4 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 5 { columns: [auction, count, count_0], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } + Table 6 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [auction_id, auction_item_name, _row_id], primary key: [$2 ASC, $0 ASC], value indices: [0, 1, 2], distribution key: [0] } +- id: nexmark_q105 + before: + - create_sources + sql: | + -- A self-made query that covers singleton top-n (and local-phase group top-n). + -- + -- Show the top 1000 auctions by the number of bids. + SELECT + a.id AS auction_id, + a.item_name AS auction_item_name, + COUNT(b.auction) AS bid_count + FROM auction a + JOIN bid b ON a.id = b.auction + GROUP BY a.id, a.item_name + ORDER BY bid_count DESC + LIMIT 1000; + batch_plan: | + BatchTopN { order: "[count(auction) DESC]", limit: 1000, offset: 0 } + └─BatchExchange { order: [], dist: Single } + └─BatchTopN { order: "[count(auction) DESC]", limit: 1000, offset: 0 } + └─BatchHashAgg { group_key: [id, item_name], aggs: [count(auction)] } + └─BatchHashJoin { type: Inner, predicate: id = auction, output: all } + ├─BatchExchange { order: [], dist: HashShard(id) } + | └─BatchProject { exprs: [id, item_name] } + | └─BatchSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"], filter: (None, None) } + └─BatchExchange { order: [], dist: HashShard(auction) } + └─BatchProject { exprs: [auction] } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } + stream_plan: | + StreamMaterialize { columns: [auction_id, auction_item_name, bid_count], pk_columns: [auction_id, auction_item_name], order_descs: [bid_count, auction_id, auction_item_name] } + └─StreamProject { exprs: [id, item_name, count(auction)] } + └─StreamTopN { order: "[count(auction) DESC]", limit: 1000, offset: 0 } + └─StreamExchange { dist: Single } + └─StreamGroupTopN { order: "[count(auction) DESC]", limit: 1000, offset: 0, group_key: [3] } + └─StreamProject { exprs: [id, item_name, count(auction), Vnode(id) as $expr1] } + └─StreamProject { exprs: [id, item_name, count(auction)] } + └─StreamAppendOnlyHashAgg { group_key: [id, item_name], aggs: [count, count(auction)] } + └─StreamAppendOnlyHashJoin { type: Inner, predicate: id = auction, output: [id, item_name, auction, _row_id, _row_id] } + ├─StreamExchange { dist: HashShard(id) } + | └─StreamProject { exprs: [id, item_name, _row_id] } + | └─StreamRowIdGen { row_id_index: 9 } + | └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + └─StreamExchange { dist: HashShard(auction) } + └─StreamProject { exprs: [auction, _row_id] } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + stream_dist_plan: | + Fragment 0 + StreamMaterialize { columns: [auction_id, auction_item_name, bid_count], pk_columns: [auction_id, auction_item_name], order_descs: [bid_count, auction_id, auction_item_name] } + materialized table: 4294967294 + StreamProject { exprs: [id, item_name, count(auction)] } + StreamTopN { order: "[count(auction) DESC]", limit: 1000, offset: 0 } + state table: 0 + StreamExchange Single from 1 + + Fragment 1 + StreamGroupTopN { order: "[count(auction) DESC]", limit: 1000, offset: 0, group_key: [3] } + state table: 1 + StreamProject { exprs: [id, item_name, count(auction), Vnode(id) as $expr1] } + StreamProject { exprs: [id, item_name, count(auction)] } + StreamAppendOnlyHashAgg { group_key: [id, item_name], aggs: [count, count(auction)] } + result table: 2, state tables: [] + StreamAppendOnlyHashJoin { type: Inner, predicate: id = auction, output: [id, item_name, auction, _row_id, _row_id] } + left table: 3, right table 5, left degree table: 4, right degree table: 6, + StreamExchange Hash([0]) from 2 + StreamExchange Hash([0]) from 3 + + Fragment 2 + StreamProject { exprs: [id, item_name, _row_id] } + StreamRowIdGen { row_id_index: 9 } + StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + source state table: 7 + + Fragment 3 + StreamProject { exprs: [auction, _row_id] } + StreamRowIdGen { row_id_index: 7 } + StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } + source state table: 8 + + Table 0 { columns: [id, item_name, count(auction), $expr1], primary key: [$2 DESC, $0 ASC, $1 ASC], value indices: [0, 1, 2, 3], distribution key: [] } + Table 1 { columns: [id, item_name, count(auction), $expr1], primary key: [$3 ASC, $2 DESC, $0 ASC, $1 ASC], value indices: [0, 1, 2, 3], distribution key: [0], vnode column idx: 3 } + Table 2 { columns: [id, item_name, count, count(auction)], primary key: [$0 ASC, $1 ASC], value indices: [2, 3], distribution key: [0] } + Table 3 { columns: [id, item_name, _row_id], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 4 { columns: [id, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 5 { columns: [auction, _row_id], primary key: [$0 ASC, $1 ASC], value indices: [0, 1], distribution key: [0] } + Table 6 { columns: [auction, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 7 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 8 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } + Table 4294967294 { columns: [auction_id, auction_item_name, bid_count], primary key: [$2 DESC, $0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [] } diff --git a/src/frontend/planner_test/tests/testdata/order_by.yaml b/src/frontend/planner_test/tests/testdata/order_by.yaml index 4f460213c6791..984fbb2cf7b37 100644 --- a/src/frontend/planner_test/tests/testdata/order_by.yaml +++ b/src/frontend/planner_test/tests/testdata/order_by.yaml @@ -23,7 +23,7 @@ select v1, v1+1 from t order by v1; batch_plan: | BatchExchange { order: [t.v1 ASC], dist: Single } - └─BatchProject { exprs: [t.v1, (t.v1 + 1:Int32)] } + └─BatchProject { exprs: [t.v1, (t.v1 + 1:Int32) as $expr23] } └─BatchSort { order: [t.v1 ASC] } └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } - sql: | @@ -59,13 +59,13 @@ select * from t order by 1+1; batch_plan: | BatchProject { exprs: [t.v1, t.v2] } - └─BatchExchange { order: [(1:Int32 + 1:Int32) ASC], dist: Single } - └─BatchSort { order: [(1:Int32 + 1:Int32) ASC] } - └─BatchProject { exprs: [t.v1, t.v2, (1:Int32 + 1:Int32)] } + └─BatchExchange { order: [$expr23 ASC], dist: Single } + └─BatchSort { order: [$expr23 ASC] } + └─BatchProject { exprs: [t.v1, t.v2, (1:Int32 + 1:Int32) as $expr23] } └─BatchScan { table: t, columns: [t.v1, t.v2], distribution: SomeShard } stream_plan: | - StreamMaterialize { columns: [v1, v2, (1:Int32 + 1:Int32)(hidden), t._row_id(hidden)], pk_columns: [t._row_id], order_descs: [(1:Int32 + 1:Int32), t._row_id] } - └─StreamProject { exprs: [t.v1, t.v2, (1:Int32 + 1:Int32), t._row_id] } + StreamMaterialize { columns: [v1, v2, $expr46(hidden), t._row_id(hidden)], pk_columns: [t._row_id], order_descs: [$expr46, t._row_id] } + └─StreamProject { exprs: [t.v1, t.v2, (1:Int32 + 1:Int32) as $expr46, t._row_id] } └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - sql: | create table t (v1 bigint, v2 double precision); @@ -85,7 +85,7 @@ └─StreamTopN { order: "[t.v1 DESC]", limit: 5, offset: 0 } └─StreamExchange { dist: Single } └─StreamGroupTopN { order: "[t.v1 DESC]", limit: 5, offset: 0, group_key: [3] } - └─StreamProject { exprs: [t.v1, t.v2, t._row_id, Vnode(t._row_id)] } + └─StreamProject { exprs: [t.v1, t.v2, t._row_id, Vnode(t._row_id) as $expr1] } └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - sql: | create table t (v1 bigint, v2 double precision); @@ -117,24 +117,24 @@ └─StreamTopN { order: "[t.v1 DESC]", limit: 5, offset: 7 } └─StreamExchange { dist: Single } └─StreamGroupTopN { order: "[t.v1 DESC]", limit: 12, offset: 0, group_key: [3] } - └─StreamProject { exprs: [t.v1, t.v2, t._row_id, Vnode(t._row_id)] } + └─StreamProject { exprs: [t.v1, t.v2, t._row_id, Vnode(t._row_id) as $expr1] } └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - name: order by expression that would be valid in select list sql: | create table t (x int, y int, z int); select x, y from t order by x + y, z; optimized_logical_plan: | - LogicalProject { exprs: [t.x, t.y, (t.x + t.y), t.z] } + LogicalProject { exprs: [t.x, t.y, (t.x + t.y) as $expr19, t.z] } └─LogicalScan { table: t, columns: [t.x, t.y, t.z] } batch_plan: | BatchProject { exprs: [t.x, t.y] } - └─BatchExchange { order: [(t.x + t.y) ASC, t.z ASC], dist: Single } - └─BatchSort { order: [(t.x + t.y) ASC, t.z ASC] } - └─BatchProject { exprs: [t.x, t.y, (t.x + t.y), t.z] } + └─BatchExchange { order: [$expr41 ASC, t.z ASC], dist: Single } + └─BatchSort { order: [$expr41 ASC, t.z ASC] } + └─BatchProject { exprs: [t.x, t.y, (t.x + t.y) as $expr41, t.z] } └─BatchScan { table: t, columns: [t.x, t.y, t.z], distribution: SomeShard } stream_plan: | - StreamMaterialize { columns: [x, y, (t.x + t.y)(hidden), t.z(hidden), t._row_id(hidden)], pk_columns: [t._row_id], order_descs: [(t.x + t.y), t.z, t._row_id] } - └─StreamProject { exprs: [t.x, t.y, (t.x + t.y), t.z, t._row_id] } + StreamMaterialize { columns: [x, y, $expr64(hidden), t.z(hidden), t._row_id(hidden)], pk_columns: [t._row_id], order_descs: [$expr64, t.z, t._row_id] } + └─StreamProject { exprs: [t.x, t.y, (t.x + t.y) as $expr64, t.z, t._row_id] } └─StreamTableScan { table: t, columns: [t.x, t.y, t.z, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - name: order by the number of an output column sql: | @@ -208,9 +208,9 @@ CREATE TABLE test (a INTEGER, b INTEGER); SELECT b % 2 AS f, SUM(a) FROM test GROUP BY b % 2 ORDER BY f; batch_plan: | - BatchExchange { order: [(test.b % 2:Int32) ASC], dist: Single } - └─BatchSortAgg { group_key: [(test.b % 2:Int32)], aggs: [sum(test.a)] } - └─BatchExchange { order: [(test.b % 2:Int32) ASC], dist: HashShard((test.b % 2:Int32)) } - └─BatchSort { order: [(test.b % 2:Int32) ASC] } - └─BatchProject { exprs: [(test.b % 2:Int32), test.a] } + BatchExchange { order: [$expr23 ASC], dist: Single } + └─BatchSortAgg { group_key: [$expr23], aggs: [sum(test.a)] } + └─BatchExchange { order: [$expr23 ASC], dist: HashShard($expr23) } + └─BatchSort { order: [$expr23 ASC] } + └─BatchProject { exprs: [(test.b % 2:Int32) as $expr23, test.a] } └─BatchScan { table: test, columns: [test.a, test.b], distribution: SomeShard } diff --git a/src/frontend/planner_test/tests/testdata/over_window_function.yaml b/src/frontend/planner_test/tests/testdata/over_window_function.yaml index 038325e3b2a26..e3775e9f77c4f 100644 --- a/src/frontend/planner_test/tests/testdata/over_window_function.yaml +++ b/src/frontend/planner_test/tests/testdata/over_window_function.yaml @@ -290,11 +290,11 @@ stream_plan: | StreamMaterialize { columns: [window_start, window_end, supplier_id, price, cnt], pk_columns: [window_start, window_end, supplier_id] } └─StreamGroupTopN { order: "[sum(bid.price) DESC]", limit: 3, offset: 0, group_key: [0, 1] } - └─StreamExchange { dist: HashShard(TumbleStart(bid.bidtime, '00:10:00':Interval), (TumbleStart(bid.bidtime, '00:10:00':Interval) + '00:10:00':Interval)) } - └─StreamProject { exprs: [TumbleStart(bid.bidtime, '00:10:00':Interval), (TumbleStart(bid.bidtime, '00:10:00':Interval) + '00:10:00':Interval), bid.supplier_id, sum(bid.price), count] } - └─StreamHashAgg { group_key: [TumbleStart(bid.bidtime, '00:10:00':Interval), (TumbleStart(bid.bidtime, '00:10:00':Interval) + '00:10:00':Interval), bid.supplier_id], aggs: [count, sum(bid.price), count] } - └─StreamExchange { dist: HashShard(TumbleStart(bid.bidtime, '00:10:00':Interval), (TumbleStart(bid.bidtime, '00:10:00':Interval) + '00:10:00':Interval), bid.supplier_id) } - └─StreamProject { exprs: [TumbleStart(bid.bidtime, '00:10:00':Interval), (TumbleStart(bid.bidtime, '00:10:00':Interval) + '00:10:00':Interval), bid.supplier_id, bid.price, bid._row_id] } + └─StreamExchange { dist: HashShard($expr47, $expr48) } + └─StreamProject { exprs: [$expr47, $expr48, bid.supplier_id, sum(bid.price), count] } + └─StreamHashAgg { group_key: [$expr47, $expr48, bid.supplier_id], aggs: [count, sum(bid.price), count] } + └─StreamExchange { dist: HashShard($expr47, $expr48, bid.supplier_id) } + └─StreamProject { exprs: [TumbleStart(bid.bidtime, '00:10:00':Interval) as $expr47, (TumbleStart(bid.bidtime, '00:10:00':Interval) + '00:10:00':Interval) as $expr48, bid.supplier_id, bid.price, bid._row_id] } └─StreamTableScan { table: bid, columns: [bid.bidtime, bid.price, bid.item, bid.supplier_id, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } - before: - create_bid @@ -310,10 +310,10 @@ stream_plan: | StreamMaterialize { columns: [window_start, window_end, supplier_id, price, bid._row_id(hidden)], pk_columns: [bid._row_id] } └─StreamExchange { dist: HashShard(bid._row_id) } - └─StreamProject { exprs: [TumbleStart(bid.bidtime, '00:10:00':Interval), (TumbleStart(bid.bidtime, '00:10:00':Interval) + '00:10:00':Interval), bid.supplier_id, bid.price, bid._row_id] } + └─StreamProject { exprs: [$expr45, $expr46, bid.supplier_id, bid.price, bid._row_id] } └─StreamGroupTopN { order: "[bid.price DESC]", limit: 3, offset: 0, group_key: [5, 6] } - └─StreamExchange { dist: HashShard(TumbleStart(bid.bidtime, '00:10:00':Interval), (TumbleStart(bid.bidtime, '00:10:00':Interval) + '00:10:00':Interval)) } - └─StreamProject { exprs: [bid.bidtime, bid.price, bid.item, bid.supplier_id, bid._row_id, TumbleStart(bid.bidtime, '00:10:00':Interval), (TumbleStart(bid.bidtime, '00:10:00':Interval) + '00:10:00':Interval)] } + └─StreamExchange { dist: HashShard($expr45, $expr46) } + └─StreamProject { exprs: [bid.bidtime, bid.price, bid.item, bid.supplier_id, bid._row_id, TumbleStart(bid.bidtime, '00:10:00':Interval) as $expr45, (TumbleStart(bid.bidtime, '00:10:00':Interval) + '00:10:00':Interval) as $expr46] } └─StreamTableScan { table: bid, columns: [bid.bidtime, bid.price, bid.item, bid.supplier_id, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } - name: Deduplication sql: | diff --git a/src/frontend/planner_test/tests/testdata/pg_catalog.yaml b/src/frontend/planner_test/tests/testdata/pg_catalog.yaml index b3f0c77e98e1a..06ba337441bfd 100644 --- a/src/frontend/planner_test/tests/testdata/pg_catalog.yaml +++ b/src/frontend/planner_test/tests/testdata/pg_catalog.yaml @@ -41,5 +41,4 @@ LogicalProject { exprs: [2:Int32] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } batch_plan: | - BatchProject { exprs: [2:Int32] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[2:Int32]] } diff --git a/src/frontend/planner_test/tests/testdata/predicate_pushdown.yaml b/src/frontend/planner_test/tests/testdata/predicate_pushdown.yaml index 8440a6b676cee..3ae0faf0d7e96 100644 --- a/src/frontend/planner_test/tests/testdata/predicate_pushdown.yaml +++ b/src/frontend/planner_test/tests/testdata/predicate_pushdown.yaml @@ -54,14 +54,15 @@ logical_plan: | LogicalProject { exprs: [t.v1, t.v2, t.v3, t.v4, t.ts, window_start, window_end] } └─LogicalFilter { predicate: (t.v1 = 10:Int32) AND (t.v2 = 20:Int32) AND (t.v3 = 30:Int32) AND (t.ts >= '1997-07-01':Date) AND (window_start >= '1997-07-02':Date) AND (window_end >= '1997-07-03':Date) AND (window_start >= (t.ts + '1 day':Interval)) AND (window_end > (t.ts + '4 days':Interval)) } - └─LogicalShare { id = 4 } + └─LogicalShare { id = 5 } └─LogicalProject { exprs: [t.v1, t.v2, t.v3, t.v4, t.ts, window_start, window_end] } └─LogicalHopWindow { time_col: t.ts, slide: 1 day, size: 3 days, output: all } - └─LogicalScan { table: t, columns: [t.v1, t.v2, t.v3, t.v4, t.ts, t._row_id] } + └─LogicalFilter { predicate: IsNotNull(t.ts) } + └─LogicalScan { table: t, columns: [t.v1, t.v2, t.v3, t.v4, t.ts, t._row_id] } optimized_logical_plan: | LogicalFilter { predicate: (window_start >= '1997-07-02':Date) AND (window_end >= '1997-07-03':Date) AND (window_start >= (t.ts + '1 day':Interval)) AND (window_end > (t.ts + '4 days':Interval)) } └─LogicalHopWindow { time_col: t.ts, slide: 1 day, size: 3 days, output: all } - └─LogicalScan { table: t, columns: [t.v1, t.v2, t.v3, t.v4, t.ts], predicate: (t.v1 = 10:Int32) AND (t.v2 = 20:Int32) AND (t.v3 = 30:Int32) AND (t.ts >= '1997-07-01':Date) } + └─LogicalScan { table: t, columns: [t.v1, t.v2, t.v3, t.v4, t.ts], predicate: (t.v1 = 10:Int32) AND (t.v2 = 20:Int32) AND (t.v3 = 30:Int32) AND (t.ts >= '1997-07-01':Date) AND IsNotNull(t.ts) } - name: filter hop transpose with non-trivial output-indices sql: | create table t(v1 int, v2 int, v3 int, v4 int, ts date); @@ -70,15 +71,16 @@ logical_plan: | LogicalProject { exprs: [window_end, t.v4, t.v2] } └─LogicalFilter { predicate: (window_end > '2022-01-01':Date) AND (t.v4 = 10:Int32) AND (t.v2 > 20:Int32) } - └─LogicalShare { id = 4 } + └─LogicalShare { id = 5 } └─LogicalProject { exprs: [window_end, t.v4, t.v2] } └─LogicalHopWindow { time_col: t.ts, slide: 1 day, size: 3 days, output: all } - └─LogicalScan { table: t, columns: [t.v1, t.v2, t.v3, t.v4, t.ts, t._row_id] } + └─LogicalFilter { predicate: IsNotNull(t.ts) } + └─LogicalScan { table: t, columns: [t.v1, t.v2, t.v3, t.v4, t.ts, t._row_id] } optimized_logical_plan: | LogicalProject { exprs: [window_end, t.v4, t.v2] } └─LogicalFilter { predicate: (window_end > '2022-01-01':Date) } └─LogicalHopWindow { time_col: t.ts, slide: 1 day, size: 3 days, output: [t.v2, t.v4, window_end] } - └─LogicalScan { table: t, columns: [t.v2, t.v4, t.ts], predicate: (t.v4 = 10:Int32) AND (t.v2 > 20:Int32) } + └─LogicalScan { table: t, columns: [t.v2, t.v4, t.ts], predicate: (t.v4 = 10:Int32) AND (t.v2 > 20:Int32) AND IsNotNull(t.ts) } - name: filter union transpose sql: | create table t1 (v1 int, v2 int, v3 int); @@ -102,13 +104,13 @@ create table t(v1 int, v2 int, v3 int, v4 int, ts date); with cte as (select v1,v2,v3,v4,v1+v2 as expr_a, v3*v4 as expr_b from t) select * from cte where expr_a>10 AND v1=10 AND v2=20 AND v3=30 AND expr_a > expr_b AND expr_a > v1; logical_plan: | - LogicalProject { exprs: [t.v1, t.v2, t.v3, t.v4, (t.v1 + t.v2), (t.v3 * t.v4)] } - └─LogicalFilter { predicate: ((t.v1 + t.v2) > 10:Int32) AND (t.v1 = 10:Int32) AND (t.v2 = 20:Int32) AND (t.v3 = 30:Int32) AND ((t.v1 + t.v2) > (t.v3 * t.v4)) AND ((t.v1 + t.v2) > t.v1) } + LogicalProject { exprs: [t.v1, t.v2, t.v3, t.v4, $expr1, $expr2] } + └─LogicalFilter { predicate: ($expr1 > 10:Int32) AND (t.v1 = 10:Int32) AND (t.v2 = 20:Int32) AND (t.v3 = 30:Int32) AND ($expr1 > $expr2) AND ($expr1 > t.v1) } └─LogicalShare { id = 3 } - └─LogicalProject { exprs: [t.v1, t.v2, t.v3, t.v4, (t.v1 + t.v2), (t.v3 * t.v4)] } + └─LogicalProject { exprs: [t.v1, t.v2, t.v3, t.v4, (t.v1 + t.v2) as $expr1, (t.v3 * t.v4) as $expr2] } └─LogicalScan { table: t, columns: [t.v1, t.v2, t.v3, t.v4, t.ts, t._row_id] } optimized_logical_plan: | - LogicalProject { exprs: [t.v1, t.v2, t.v3, t.v4, (t.v1 + t.v2), (t.v3 * t.v4)] } + LogicalProject { exprs: [t.v1, t.v2, t.v3, t.v4, (t.v1 + t.v2) as $expr39, (t.v3 * t.v4) as $expr40] } └─LogicalScan { table: t, columns: [t.v1, t.v2, t.v3, t.v4], predicate: ((t.v1 + t.v2) > 10:Int32) AND (t.v1 = 10:Int32) AND (t.v2 = 20:Int32) AND (t.v3 = 30:Int32) AND ((t.v1 + t.v2) > (t.v3 * t.v4)) AND ((t.v1 + t.v2) > t.v1) } - name: filter agg transpose sql: | @@ -264,10 +266,11 @@ StreamMaterialize { columns: [v1, v2, t1._row_id(hidden), t2._row_id(hidden)], pk_columns: [t1._row_id, t2._row_id, v1, v2] } └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v2, output: [t1.v1, t2.v2, t1._row_id, t2._row_id] } ├─StreamExchange { dist: HashShard(t1.v1) } - | └─StreamDynamicFilter { predicate: (t1.v1 > (now + '01:00:00':Interval)), output: [t1.v1, t1._row_id] } + | └─StreamDynamicFilter { predicate: (t1.v1 > $expr12), output: [t1.v1, t1._row_id] } | ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - | └─StreamProject { exprs: [(now + '01:00:00':Interval)], watermark_columns: [(now + '01:00:00':Interval)] } - | └─StreamNow { output: [now] } + | └─StreamExchange { dist: Broadcast } + | └─StreamProject { exprs: [(now + '01:00:00':Interval) as $expr12], watermark_columns: [(now + '01:00:00':Interval)] } + | └─StreamNow { output: [now] } └─StreamExchange { dist: HashShard(t2.v2) } └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - name: now() in a complex cmp expr does not get pushed down @@ -280,7 +283,7 @@ └─LogicalJoin { type: Inner, on: (t1.v1 = t2.v2), output: all } ├─LogicalScan { table: t1, columns: [t1.v1] } └─LogicalScan { table: t2, columns: [t2.v2, t2.v3] } - stream_error: 'Expr error: Invalid parameter now: expressions containing now must be of the form `col [cmp] now() +- [literal]`' + stream_error: 'internal error: Conditions containing now must be of the form `input_expr cmp now() [+- const_expr]` or `now() [+- const_expr] cmp input_expr`, where `input_expr` references a column and contains no `now()`.' - name: now() in complex cmp expr pushed onto join ON clause results in dynamic filter sql: | create table t1(v1 timestamp with time zone); @@ -291,7 +294,7 @@ └─LogicalJoin { type: Inner, on: (t1.v1 = t2.v2), output: all } ├─LogicalScan { table: t1, columns: [t1.v1] } └─LogicalScan { table: t2, columns: [t2.v2, t2.v3] } - stream_error: 'Expr error: Invalid parameter now: expressions containing now must be of the form `col [cmp] now() +- [literal]`' + stream_error: 'internal error: Conditions containing now must be of the form `input_expr cmp now() [+- const_expr]` or `now() [+- const_expr] cmp input_expr`, where `input_expr` references a column and contains no `now()`.' - name: now() does not get pushed to scan, but others do sql: | create table t1(v1 timestamp with time zone, v2 int); @@ -301,11 +304,12 @@ └─LogicalScan { table: t1, columns: [t1.v1, t1.v2], predicate: (t1.v2 > 5:Int32) } stream_plan: | StreamMaterialize { columns: [v1, v2, t1._row_id(hidden)], pk_columns: [t1._row_id] } - └─StreamDynamicFilter { predicate: (t1.v1 > (now + '00:30:00':Interval)), output: [t1.v1, t1.v2, t1._row_id] } + └─StreamDynamicFilter { predicate: (t1.v1 > $expr12), output: [t1.v1, t1.v2, t1._row_id] } ├─StreamFilter { predicate: (t1.v2 > 5:Int32) } | └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamProject { exprs: [(now + '00:30:00':Interval)], watermark_columns: [(now + '00:30:00':Interval)] } - └─StreamNow { output: [now] } + └─StreamExchange { dist: Broadcast } + └─StreamProject { exprs: [(now + '00:30:00':Interval) as $expr12], watermark_columns: [(now + '00:30:00':Interval)] } + └─StreamNow { output: [now] } - name: eq-predicate derived condition other side pushdown in inner join sql: | create table t1(v1 int, v2 int); @@ -350,6 +354,7 @@ ├─StreamExchange { dist: HashShard(t1.v1) } | └─StreamDynamicFilter { predicate: (t1.v1 > now), output: [t1.v1, t1._row_id] } | ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - | └─StreamNow { output: [now] } + | └─StreamExchange { dist: Broadcast } + | └─StreamNow { output: [now] } └─StreamExchange { dist: HashShard(t2.v2) } └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/project.yaml b/src/frontend/planner_test/tests/testdata/project.yaml new file mode 100644 index 0000000000000..6de27ff36553a --- /dev/null +++ b/src/frontend/planner_test/tests/testdata/project.yaml @@ -0,0 +1,11 @@ +# This file is automatically generated. See `src/frontend/planner_test/README.md` for more information. +- sql: | + select 1 as k, 2 as v; + batch_plan: | + BatchValues { rows: [[1:Int32, 2:Int32]] } +- name: Project over union all of nested project over union all, now, and simple agg + sql: | + create table t(v int); + select 'abc', 1, 1.4 as k from (select 1 as k, 2 from (select 1, 2 union all select 3, 4) union all select * from (select 3, 4) union all select 100, 200 from (select now(), now() - interval '1 hour') union all select count(*)::int, sum(v)::int from t); + batch_plan: | + BatchValues { rows: [['abc':Varchar, 1:Int32, 1.4:Decimal], ['abc':Varchar, 1:Int32, 1.4:Decimal], ['abc':Varchar, 1:Int32, 1.4:Decimal], ['abc':Varchar, 1:Int32, 1.4:Decimal], ['abc':Varchar, 1:Int32, 1.4:Decimal]] } diff --git a/src/frontend/planner_test/tests/testdata/project_set.yaml b/src/frontend/planner_test/tests/testdata/project_set.yaml index 4adbeadc2bc68..5e4c0dd31e694 100644 --- a/src/frontend/planner_test/tests/testdata/project_set.yaml +++ b/src/frontend/planner_test/tests/testdata/project_set.yaml @@ -53,7 +53,7 @@ select -generate_series(x,x,x) from t; batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [Neg(Generate($0, $0, $0))] } + └─BatchProject { exprs: [Neg(Generate($0, $0, $0)) as $expr24] } └─BatchProjectSet { select_list: [$0, $1, Generate($0, $0, $0)] } └─BatchScan { table: t, columns: [t.x, t._row_id], distribution: UpstreamHashShard(t._row_id) } - name: table functions as parameters of usual functions @@ -98,7 +98,7 @@ └─StreamTopN { order: "[projected_row_id ASC]", limit: 1, offset: 0 } └─StreamExchange { dist: Single } └─StreamGroupTopN { order: "[projected_row_id ASC]", limit: 1, offset: 0, group_key: [3] } - └─StreamProject { exprs: [projected_row_id, Unnest($0), t._row_id, Vnode(t._row_id)] } + └─StreamProject { exprs: [projected_row_id, Unnest($0), t._row_id, Vnode(t._row_id) as $expr1] } └─StreamProjectSet { select_list: [Unnest($0), $1] } └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - sql: | diff --git a/src/frontend/planner_test/tests/testdata/share.yaml b/src/frontend/planner_test/tests/testdata/share.yaml index 24b5b46b4857f..1d5e5dfaebe24 100644 --- a/src/frontend/planner_test/tests/testdata/share.yaml +++ b/src/frontend/planner_test/tests/testdata/share.yaml @@ -1,20 +1,20 @@ # This file is automatically generated. See `src/frontend/planner_test/README.md` for more information. - id: create_sources sql: | - create source auction (id INTEGER, "item_name" VARCHAR, description VARCHAR, "initial_bid" INTEGER, reserve INTEGER, "date_time" TIMESTAMP, expires TIMESTAMP, seller INTEGER, category INTEGER) + create source auction (id BIGINT, "item_name" VARCHAR, description VARCHAR, "initial_bid" BIGINT, reserve BIGINT, "date_time" TIMESTAMP, expires TIMESTAMP, seller BIGINT, category BIGINT, "extra" VARCHAR) with ( connector = 'nexmark', nexmark.table.type = 'Auction', nexmark.split.num = '4', nexmark.min.event.gap.in.ns = '1000' - ) row format json; - create source bid (auction INTEGER, bidder INTEGER, price INTEGER, "date_time" TIMESTAMP) + ); + create source bid (auction BIGINT, bidder BIGINT, price BIGINT, "channel" VARCHAR, "url" VARCHAR, "date_time" TIMESTAMP, "extra" VARCHAR) with ( connector = 'nexmark', nexmark.table.type = 'Bid', nexmark.split.num = '4', nexmark.min.event.gap.in.ns = '1000' - ) row format json; + ); - id: self_join before: - create_sources @@ -30,13 +30,13 @@ | └─BatchFilter { predicate: (initial_bid = 1:Int32) } | └─BatchProject { exprs: [id, initial_bid] } | └─BatchFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) } - | └─BatchSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"], filter: (None, None) } + | └─BatchSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "extra", "_row_id"], filter: (None, None) } └─BatchExchange { order: [], dist: HashShard(id) } └─BatchProject { exprs: [id] } └─BatchFilter { predicate: (initial_bid = 2:Int32) } └─BatchProject { exprs: [id, initial_bid] } └─BatchFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) } - └─BatchSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"], filter: (None, None) } + └─BatchSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "extra", "_row_id"], filter: (None, None) } stream_plan: | StreamMaterialize { columns: [cnt], pk_columns: [] } └─StreamProject { exprs: [sum0(count)] } @@ -48,20 +48,20 @@ | └─StreamProject { exprs: [id, _row_id] } | └─StreamFilter { predicate: (initial_bid = 1:Int32) } | └─StreamProject { exprs: [id, initial_bid, _row_id] } - | └─StreamShare { id = 511 } + | └─StreamShare { id = 529 } | └─StreamProject { exprs: [id, initial_bid, _row_id] } | └─StreamFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) } - | └─StreamRowIdGen { row_id_index: 9 } - | └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + | └─StreamRowIdGen { row_id_index: 10 } + | └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "extra", "_row_id"] } └─StreamExchange { dist: HashShard(id) } └─StreamProject { exprs: [id, _row_id] } └─StreamFilter { predicate: (initial_bid = 2:Int32) } └─StreamProject { exprs: [id, initial_bid, _row_id] } - └─StreamShare { id = 511 } + └─StreamShare { id = 529 } └─StreamProject { exprs: [id, initial_bid, _row_id] } └─StreamFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) } - └─StreamRowIdGen { row_id_index: 9 } - └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } + └─StreamRowIdGen { row_id_index: 10 } + └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "extra", "_row_id"] } - id: nexmark_q5 before: - create_sources @@ -104,16 +104,20 @@ | └─BatchHashAgg { group_key: [window_start, auction], aggs: [count] } | └─BatchHopWindow { time_col: date_time, slide: 00:00:02, size: 00:00:10, output: [auction, window_start] } | └─BatchExchange { order: [], dist: HashShard(auction) } - | └─BatchProject { exprs: [auction, date_time] } - | └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "date_time", "_row_id"], filter: (None, None) } + | └─BatchFilter { predicate: IsNotNull(date_time) } + | └─BatchProject { exprs: [auction, date_time] } + | └─BatchFilter { predicate: IsNotNull(date_time) } + | └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } └─BatchProject { exprs: [max(count), window_start] } └─BatchHashAgg { group_key: [window_start], aggs: [max(count)] } └─BatchExchange { order: [], dist: HashShard(window_start) } └─BatchHashAgg { group_key: [auction, window_start], aggs: [count] } └─BatchHopWindow { time_col: date_time, slide: 00:00:02, size: 00:00:10, output: [auction, window_start] } └─BatchExchange { order: [], dist: HashShard(auction) } - └─BatchProject { exprs: [auction, date_time] } - └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "date_time", "_row_id"], filter: (None, None) } + └─BatchFilter { predicate: IsNotNull(date_time) } + └─BatchProject { exprs: [auction, date_time] } + └─BatchFilter { predicate: IsNotNull(date_time) } + └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } stream_plan: | StreamMaterialize { columns: [auction, num, window_start(hidden), window_start#1(hidden)], pk_columns: [window_start, auction, window_start#1] } └─StreamProject { exprs: [auction, count, window_start, window_start] } @@ -124,11 +128,13 @@ | └─StreamAppendOnlyHashAgg { group_key: [window_start, auction], aggs: [count, count] } | └─StreamExchange { dist: HashShard(auction, window_start) } | └─StreamHopWindow { time_col: date_time, slide: 00:00:02, size: 00:00:10, output: [auction, window_start, _row_id] } - | └─StreamProject { exprs: [auction, date_time, _row_id] } - | └─StreamShare { id = 731 } - | └─StreamProject { exprs: [auction, date_time, _row_id] } - | └─StreamRowIdGen { row_id_index: 4 } - | └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "date_time", "_row_id"] } + | └─StreamFilter { predicate: IsNotNull(date_time) } + | └─StreamProject { exprs: [auction, date_time, _row_id] } + | └─StreamShare { id = 919 } + | └─StreamProject { exprs: [auction, date_time, _row_id] } + | └─StreamFilter { predicate: IsNotNull(date_time) } + | └─StreamRowIdGen { row_id_index: 7 } + | └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } └─StreamProject { exprs: [max(count), window_start] } └─StreamHashAgg { group_key: [window_start], aggs: [count, max(count)] } └─StreamExchange { dist: HashShard(window_start) } @@ -136,8 +142,77 @@ └─StreamAppendOnlyHashAgg { group_key: [auction, window_start], aggs: [count, count] } └─StreamExchange { dist: HashShard(auction, window_start) } └─StreamHopWindow { time_col: date_time, slide: 00:00:02, size: 00:00:10, output: [auction, window_start, _row_id] } - └─StreamProject { exprs: [auction, date_time, _row_id] } - └─StreamShare { id = 731 } - └─StreamProject { exprs: [auction, date_time, _row_id] } - └─StreamRowIdGen { row_id_index: 4 } - └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "date_time", "_row_id"] } + └─StreamFilter { predicate: IsNotNull(date_time) } + └─StreamProject { exprs: [auction, date_time, _row_id] } + └─StreamShare { id = 919 } + └─StreamProject { exprs: [auction, date_time, _row_id] } + └─StreamFilter { predicate: IsNotNull(date_time) } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } +- sql: | + set rw_enable_share_plan=true; + create table t(a int, b int); + with cte as (select count(*) from t) select * from cte union all select * from cte; + stream_plan: | + StreamMaterialize { columns: [count, 0:Int32(hidden)], pk_columns: [0:Int32] } + └─StreamUnion { all: true } + ├─StreamExchange { dist: HashShard(0:Int32) } + | └─StreamProject { exprs: [sum0(count), 0:Int32] } + | └─StreamShare { id = 210 } + | └─StreamProject { exprs: [sum0(count)] } + | └─StreamGlobalSimpleAgg { aggs: [count, sum0(count)] } + | └─StreamExchange { dist: Single } + | └─StreamStatelessLocalSimpleAgg { aggs: [count, count] } + | └─StreamTableScan { table: t, columns: [t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(1:Int32) } + └─StreamProject { exprs: [sum0(count), 1:Int32] } + └─StreamShare { id = 210 } + └─StreamProject { exprs: [sum0(count)] } + └─StreamGlobalSimpleAgg { aggs: [count, sum0(count)] } + └─StreamExchange { dist: Single } + └─StreamStatelessLocalSimpleAgg { aggs: [count, count] } + └─StreamTableScan { table: t, columns: [t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } +- sql: | + set rw_enable_share_plan=false; + create table t(a int, b int); + with cte as (select count(*) from t) select * from cte union all select * from cte; + stream_plan: | + StreamMaterialize { columns: [count, 0:Int32(hidden)], pk_columns: [0:Int32] } + └─StreamUnion { all: true } + ├─StreamExchange { dist: HashShard(0:Int32) } + | └─StreamProject { exprs: [sum0(count), 0:Int32] } + | └─StreamGlobalSimpleAgg { aggs: [count, sum0(count)] } + | └─StreamExchange { dist: Single } + | └─StreamStatelessLocalSimpleAgg { aggs: [count, count] } + | └─StreamTableScan { table: t, columns: [t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(1:Int32) } + └─StreamProject { exprs: [sum0(count), 1:Int32] } + └─StreamGlobalSimpleAgg { aggs: [count, sum0(count)] } + └─StreamExchange { dist: Single } + └─StreamStatelessLocalSimpleAgg { aggs: [count, count] } + └─StreamTableScan { table: t, columns: [t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } +- id: force_share_source_for_self_join + before: + - create_sources + sql: | + set rw_enable_share_plan=false; + select count(*) cnt from auction A join auction B on A.id = B.id; + stream_plan: | + StreamMaterialize { columns: [cnt], pk_columns: [] } + └─StreamProject { exprs: [sum0(count)] } + └─StreamAppendOnlyGlobalSimpleAgg { aggs: [count, sum0(count)] } + └─StreamExchange { dist: Single } + └─StreamStatelessLocalSimpleAgg { aggs: [count, count] } + └─StreamAppendOnlyHashJoin { type: Inner, predicate: id = id, output: [_row_id, id, _row_id, id] } + ├─StreamExchange { dist: HashShard(id) } + | └─StreamProject { exprs: [id, _row_id] } + | └─StreamShare { id = 152 } + | └─StreamProject { exprs: [id, _row_id] } + | └─StreamRowIdGen { row_id_index: 10 } + | └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "extra", "_row_id"] } + └─StreamExchange { dist: HashShard(id) } + └─StreamProject { exprs: [id, _row_id] } + └─StreamShare { id = 152 } + └─StreamProject { exprs: [id, _row_id] } + └─StreamRowIdGen { row_id_index: 10 } + └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "extra", "_row_id"] } diff --git a/src/frontend/planner_test/tests/testdata/shared_views.yaml b/src/frontend/planner_test/tests/testdata/shared_views.yaml new file mode 100644 index 0000000000000..206a89e9e22fb --- /dev/null +++ b/src/frontend/planner_test/tests/testdata/shared_views.yaml @@ -0,0 +1,43 @@ +# This file is automatically generated. See `src/frontend/planner_test/README.md` for more information. +- sql: | + create table t1 (x int, y int); + create view v1 as select x + y as z from t1 where y > 0; + create view v2 as select x * z as a, y * z as b from t1, v1 where z = x; + select * from v1, v2 where z = a; + logical_plan: | + LogicalProject { exprs: [$expr1, $expr2, $expr3] } + └─LogicalFilter { predicate: ($expr1 = $expr2) } + └─LogicalJoin { type: Inner, on: true, output: all } + ├─LogicalShare { id = 4 } + | └─LogicalProject { exprs: [(t1.x + t1.y) as $expr1] } + | └─LogicalFilter { predicate: (t1.y > 0:Int32) } + | └─LogicalScan { table: t1, columns: [t1.x, t1.y, t1._row_id] } + └─LogicalShare { id = 9 } + └─LogicalProject { exprs: [(t1.x * $expr1) as $expr2, (t1.y * $expr1) as $expr3] } + └─LogicalFilter { predicate: ($expr1 = t1.x) } + └─LogicalJoin { type: Inner, on: true, output: all } + ├─LogicalScan { table: t1, columns: [t1.x, t1.y, t1._row_id] } + └─LogicalShare { id = 4 } + └─LogicalProject { exprs: [(t1.x + t1.y) as $expr1] } + └─LogicalFilter { predicate: (t1.y > 0:Int32) } + └─LogicalScan { table: t1, columns: [t1.x, t1.y, t1._row_id] } + stream_plan: | + StreamMaterialize { columns: [z, a, b, t1._row_id(hidden), t1._row_id#1(hidden), t1._row_id#2(hidden), t1.x(hidden), $expr86(hidden)], pk_columns: [t1._row_id, t1._row_id#1, t1._row_id#2, t1.x, $expr86, z, a] } + └─StreamHashJoin { type: Inner, predicate: $expr86 = $expr87, output: [$expr86, $expr87, $expr88, t1._row_id, t1._row_id, t1._row_id, t1.x, $expr86] } + ├─StreamExchange { dist: HashShard($expr86) } + | └─StreamProject { exprs: [$expr86, t1._row_id] } + | └─StreamShare { id = 207 } + | └─StreamProject { exprs: [(t1.x + t1.y) as $expr86, t1._row_id] } + | └─StreamFilter { predicate: (t1.y > 0:Int32) } + | └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard($expr87) } + └─StreamProject { exprs: [(t1.x * $expr86) as $expr87, (t1.y * $expr86) as $expr88, t1._row_id, t1._row_id, t1.x, $expr86] } + └─StreamHashJoin { type: Inner, predicate: t1.x = $expr86, output: [t1.x, t1.y, $expr86, t1._row_id, t1._row_id] } + ├─StreamExchange { dist: HashShard(t1.x) } + | └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard($expr86) } + └─StreamProject { exprs: [$expr86, t1._row_id] } + └─StreamShare { id = 207 } + └─StreamProject { exprs: [(t1.x + t1.y) as $expr86, t1._row_id] } + └─StreamFilter { predicate: (t1.y > 0:Int32) } + └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/stream_dist_agg.yaml b/src/frontend/planner_test/tests/testdata/stream_dist_agg.yaml index 940fb9e31724e..c5ffc5cb34f78 100644 --- a/src/frontend/planner_test/tests/testdata/stream_dist_agg.yaml +++ b/src/frontend/planner_test/tests/testdata/stream_dist_agg.yaml @@ -134,8 +134,8 @@ └─StreamProject { exprs: [max(max(t.v))] } └─StreamGlobalSimpleAgg { aggs: [count, max(max(t.v))] } └─StreamExchange { dist: Single } - └─StreamHashAgg { group_key: [Vnode(t._row_id)], aggs: [count, max(t.v)] } - └─StreamProject { exprs: [t.v, t._row_id, Vnode(t._row_id)] } + └─StreamHashAgg { group_key: [$expr1], aggs: [count, max(t.v)] } + └─StreamProject { exprs: [t.v, t._row_id, Vnode(t._row_id) as $expr1] } └─StreamTableScan { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } stream_dist_plan: | Fragment 0 @@ -147,17 +147,17 @@ StreamExchange Single from 1 Fragment 1 - StreamHashAgg { group_key: [Vnode(t._row_id)], aggs: [count, max(t.v)] } + StreamHashAgg { group_key: [$expr1], aggs: [count, max(t.v)] } result table: 3, state tables: [2] - StreamProject { exprs: [t.v, t._row_id, Vnode(t._row_id)] } + StreamProject { exprs: [t.v, t._row_id, Vnode(t._row_id) as $expr1] } Chain { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } Upstream BatchPlanNode - Table 0 { columns: [max(t_v), Vnode(t__row_id)], primary key: [$0 DESC, $1 ASC], value indices: [0, 1], distribution key: [] } + Table 0 { columns: [max(t_v), $expr1], primary key: [$0 DESC, $1 ASC], value indices: [0, 1], distribution key: [] } Table 1 { columns: [count, max(max(t_v))], primary key: [], value indices: [0, 1], distribution key: [] } - Table 2 { columns: [Vnode(t__row_id), t_v, t__row_id], primary key: [$0 ASC, $1 DESC, $2 ASC], value indices: [1, 2], distribution key: [2], vnode column idx: 0 } - Table 3 { columns: [Vnode(t__row_id), count, max(t_v)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [], vnode column idx: 0 } + Table 2 { columns: [$expr1, t_v, t__row_id], primary key: [$0 ASC, $1 DESC, $2 ASC], value indices: [1, 2], distribution key: [2], vnode column idx: 0 } + Table 3 { columns: [$expr1, count, max(t_v)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [], vnode column idx: 0 } Table 4294967294 { columns: [a1], primary key: [], value indices: [0], distribution key: [] } - id: extreme_on_AO before: @@ -393,8 +393,8 @@ └─StreamProject { exprs: [max(max(t.v)), sum0(count(t.v))] } └─StreamGlobalSimpleAgg { aggs: [count, max(max(t.v)), sum0(count(t.v))] } └─StreamExchange { dist: Single } - └─StreamHashAgg { group_key: [Vnode(t._row_id)], aggs: [count, max(t.v), count(t.v)] } - └─StreamProject { exprs: [t.v, t._row_id, Vnode(t._row_id)] } + └─StreamHashAgg { group_key: [$expr1], aggs: [count, max(t.v), count(t.v)] } + └─StreamProject { exprs: [t.v, t._row_id, Vnode(t._row_id) as $expr1] } └─StreamTableScan { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } stream_dist_plan: | Fragment 0 @@ -406,17 +406,17 @@ StreamExchange Single from 1 Fragment 1 - StreamHashAgg { group_key: [Vnode(t._row_id)], aggs: [count, max(t.v), count(t.v)] } + StreamHashAgg { group_key: [$expr1], aggs: [count, max(t.v), count(t.v)] } result table: 3, state tables: [2] - StreamProject { exprs: [t.v, t._row_id, Vnode(t._row_id)] } + StreamProject { exprs: [t.v, t._row_id, Vnode(t._row_id) as $expr1] } Chain { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } Upstream BatchPlanNode - Table 0 { columns: [max(t_v), Vnode(t__row_id)], primary key: [$0 DESC, $1 ASC], value indices: [0, 1], distribution key: [] } + Table 0 { columns: [max(t_v), $expr1], primary key: [$0 DESC, $1 ASC], value indices: [0, 1], distribution key: [] } Table 1 { columns: [count, max(max(t_v)), sum0(count(t_v))], primary key: [], value indices: [0, 1, 2], distribution key: [] } - Table 2 { columns: [Vnode(t__row_id), t_v, t__row_id], primary key: [$0 ASC, $1 DESC, $2 ASC], value indices: [1, 2], distribution key: [2], vnode column idx: 0 } - Table 3 { columns: [Vnode(t__row_id), count, max(t_v), count(t_v)], primary key: [$0 ASC], value indices: [1, 2, 3], distribution key: [], vnode column idx: 0 } + Table 2 { columns: [$expr1, t_v, t__row_id], primary key: [$0 ASC, $1 DESC, $2 ASC], value indices: [1, 2], distribution key: [2], vnode column idx: 0 } + Table 3 { columns: [$expr1, count, max(t_v), count(t_v)], primary key: [$0 ASC], value indices: [1, 2, 3], distribution key: [], vnode column idx: 0 } Table 4294967294 { columns: [a1, a2], primary key: [], value indices: [0, 1], distribution key: [] } - id: extreme_count_on_AO before: diff --git a/src/frontend/planner_test/tests/testdata/struct_field_access.yaml b/src/frontend/planner_test/tests/testdata/struct_field_access.yaml index 2a6bfeaa43bd5..da1fe76a6af81 100644 --- a/src/frontend/planner_test/tests/testdata/struct_field_access.yaml +++ b/src/frontend/planner_test/tests/testdata/struct_field_access.yaml @@ -7,39 +7,39 @@ create table t(c STRUCT); select (t.c).x from t logical_plan: | - LogicalProject { exprs: [Field(t.c, 0:Int32)] } + LogicalProject { exprs: [Field(t.c, 0:Int32) as $expr1] } └─LogicalScan { table: t, columns: [t.c, t._row_id] } - sql: | create table t(c STRUCT); select (c).x from t logical_plan: | - LogicalProject { exprs: [Field(t.c, 0:Int32)] } + LogicalProject { exprs: [Field(t.c, 0:Int32) as $expr1] } └─LogicalScan { table: t, columns: [t.c, t._row_id] } - sql: | create table t(c STRUCT); -- select (t).c.x from t select (t.c).x from t logical_plan: | - LogicalProject { exprs: [Field(t.c, 0:Int32)] } + LogicalProject { exprs: [Field(t.c, 0:Int32) as $expr1] } └─LogicalScan { table: t, columns: [t.c, t._row_id] } - sql: | create table t(c STRUCT); select (t.c).* from t logical_plan: | - LogicalProject { exprs: [Field(t.c, 0:Int32), Field(t.c, 1:Int32)] } + LogicalProject { exprs: [Field(t.c, 0:Int32) as $expr1, Field(t.c, 1:Int32) as $expr2] } └─LogicalScan { table: t, columns: [t.c, t._row_id] } - sql: | create table t(c STRUCT); select (c).* from t logical_plan: | - LogicalProject { exprs: [Field(t.c, 0:Int32), Field(t.c, 1:Int32)] } + LogicalProject { exprs: [Field(t.c, 0:Int32) as $expr1, Field(t.c, 1:Int32) as $expr2] } └─LogicalScan { table: t, columns: [t.c, t._row_id] } - sql: | create table t(c STRUCT); -- select (t).c.* from t select (t.c).* from t logical_plan: | - LogicalProject { exprs: [Field(t.c, 0:Int32), Field(t.c, 1:Int32)] } + LogicalProject { exprs: [Field(t.c, 0:Int32) as $expr1, Field(t.c, 1:Int32) as $expr2] } └─LogicalScan { table: t, columns: [t.c, t._row_id] } - sql: | create table t1(c STRUCT); @@ -70,7 +70,7 @@ create table s.t(a STRUCT); select (s.t.a).b from s.t; logical_plan: | - LogicalProject { exprs: [Field(t.a, 0:Int32)] } + LogicalProject { exprs: [Field(t.a, 0:Int32) as $expr1] } └─LogicalScan { table: t, columns: [t.a, t._row_id] } - sql: | create schema s; @@ -78,7 +78,7 @@ -- select (s.t).a.b from s.t; select (s.t.a).b from s.t; logical_plan: | - LogicalProject { exprs: [Field(t.a, 0:Int32)] } + LogicalProject { exprs: [Field(t.a, 0:Int32) as $expr1] } └─LogicalScan { table: t, columns: [t.a, t._row_id] } - sql: | create schema s; @@ -86,7 +86,7 @@ -- select ((s.t).a).b from s.t; select (s.t.a).b from s.t; logical_plan: | - LogicalProject { exprs: [Field(t.a, 0:Int32)] } + LogicalProject { exprs: [Field(t.a, 0:Int32) as $expr1] } └─LogicalScan { table: t, columns: [t.a, t._row_id] } - sql: | create schema s; @@ -94,7 +94,7 @@ -- select (s.t).a.* from s.t; select (s.t.a).* from s.t; logical_plan: | - LogicalProject { exprs: [Field(t.a, 0:Int32), Field(t.a, 1:Int32)] } + LogicalProject { exprs: [Field(t.a, 0:Int32) as $expr1, Field(t.a, 1:Int32) as $expr2] } └─LogicalScan { table: t, columns: [t.a, t._row_id] } - sql: | create schema s; @@ -102,31 +102,31 @@ -- select ((s.t).a).* from s.t; select (s.t.a).* from s.t; logical_plan: | - LogicalProject { exprs: [Field(t.a, 0:Int32), Field(t.a, 1:Int32)] } + LogicalProject { exprs: [Field(t.a, 0:Int32) as $expr1, Field(t.a, 1:Int32) as $expr2] } └─LogicalScan { table: t, columns: [t.a, t._row_id] } - sql: | create schema t; create table t.t(t STRUCT); select (t.t.t).t from t.t; logical_plan: | - LogicalProject { exprs: [Field(t.t, 0:Int32)] } + LogicalProject { exprs: [Field(t.t, 0:Int32) as $expr1] } └─LogicalScan { table: t, columns: [t.t, t._row_id] } - sql: | create schema t; create table t.t(t STRUCT>); select (t.t.t).t.t from t.t; logical_plan: | - LogicalProject { exprs: [Field(Field(t.t, 0:Int32), 0:Int32)] } + LogicalProject { exprs: [Field(Field(t.t, 0:Int32), 0:Int32) as $expr1] } └─LogicalScan { table: t, columns: [t.t, t._row_id] } - sql: | create table t(c STRUCT[]); select (c[1]).x from t logical_plan: | - LogicalProject { exprs: [Field(ArrayAccess(t.c, 1:Int32), 0:Int32)] } + LogicalProject { exprs: [Field(ArrayAccess(t.c, 1:Int32), 0:Int32) as $expr1] } └─LogicalScan { table: t, columns: [t.c, t._row_id] } - sql: | create table t(c STRUCT); select (case when true then c end).x from t logical_plan: | - LogicalProject { exprs: [Field(Case(true:Boolean, t.c), 0:Int32)] } + LogicalProject { exprs: [Field(Case(true:Boolean, t.c), 0:Int32) as $expr1] } └─LogicalScan { table: t, columns: [t.c, t._row_id] } diff --git a/src/frontend/planner_test/tests/testdata/struct_query.yaml b/src/frontend/planner_test/tests/testdata/struct_query.yaml index b78f32ff84c0f..8d90395ecceb9 100644 --- a/src/frontend/planner_test/tests/testdata/struct_query.yaml +++ b/src/frontend/planner_test/tests/testdata/struct_query.yaml @@ -34,7 +34,7 @@ -- select (t).country.city,(t).country,(country).city.address from t; select (t.country).city,t.country,(country).city.address from t; logical_plan: | - LogicalProject { exprs: [Field(t.country, 1:Int32), t.country, Field(Field(t.country, 1:Int32), 0:Int32)] } + LogicalProject { exprs: [Field(t.country, 1:Int32) as $expr1, t.country, Field(Field(t.country, 1:Int32), 0:Int32) as $expr2] } └─LogicalScan { table: t, columns: [t.id, t.country, t.zipcode, t.rate, t._row_id] } create_source: row_format: protobuf @@ -70,14 +70,14 @@ -- select (t).country.city.*,(t.country).*,(country).city.* from t; select (t.country).city.*,(t.country).*,(country).city.* from t; logical_plan: | - LogicalProject { exprs: [Field(Field(t.country, 1:Int32), 0:Int32), Field(Field(t.country, 1:Int32), 1:Int32), Field(t.country, 0:Int32), Field(t.country, 1:Int32), Field(t.country, 2:Int32), Field(Field(t.country, 1:Int32), 0:Int32), Field(Field(t.country, 1:Int32), 1:Int32)] } + LogicalProject { exprs: [Field(Field(t.country, 1:Int32), 0:Int32) as $expr1, Field(Field(t.country, 1:Int32), 1:Int32) as $expr2, Field(t.country, 0:Int32) as $expr3, Field(t.country, 1:Int32) as $expr4, Field(t.country, 2:Int32) as $expr5, Field(Field(t.country, 1:Int32), 0:Int32) as $expr6, Field(Field(t.country, 1:Int32), 1:Int32) as $expr7] } └─LogicalScan { table: t, columns: [t.id, t.country, t.zipcode, t.rate, t._row_id] } - sql: | create materialized view t as select * from s; -- select (t).country1.city.*,(t.country2).*,(country3).city.* from t; select (t.country1).city.*,(t.country2).*,(country3).city.* from t; logical_plan: | - LogicalProject { exprs: [Field(Field(t.country1, 1:Int32), 0:Int32), Field(Field(t.country1, 1:Int32), 1:Int32), Field(t.country2, 0:Int32), Field(t.country2, 1:Int32), Field(t.country2, 2:Int32), Field(Field(t.country3, 1:Int32), 0:Int32), Field(Field(t.country3, 1:Int32), 1:Int32)] } + LogicalProject { exprs: [Field(Field(t.country1, 1:Int32), 0:Int32) as $expr1, Field(Field(t.country1, 1:Int32), 1:Int32) as $expr2, Field(t.country2, 0:Int32) as $expr3, Field(t.country2, 1:Int32) as $expr4, Field(t.country2, 2:Int32) as $expr5, Field(Field(t.country3, 1:Int32), 0:Int32) as $expr6, Field(Field(t.country3, 1:Int32), 1:Int32) as $expr7] } └─LogicalScan { table: t, columns: [t.id, t.country1, t.country2, t.country3, t.zipcode, t.rate, t._row_id] } create_source: row_format: protobuf @@ -107,8 +107,8 @@ -- select (c).zipcode from (select (t).country.city as c from t); select (c).zipcode from (select (t.country).city as c from t); logical_plan: | - LogicalProject { exprs: [Field(Field(t.country, 1:Int32), 1:Int32)] } - └─LogicalProject { exprs: [Field(t.country, 1:Int32)] } + LogicalProject { exprs: [Field($expr1, 1:Int32) as $expr2] } + └─LogicalProject { exprs: [Field(t.country, 1:Int32) as $expr1] } └─LogicalScan { table: t, columns: [t.id, t.country, t.zipcode, t.rate, t._row_id] } create_source: row_format: protobuf @@ -136,10 +136,10 @@ -- select (c).zipcode from (select min((t).country.city) as c from t); select (c).zipcode from (select min((t.country).city) as c from t); logical_plan: | - LogicalProject { exprs: [Field(min(Field(t.country, 1:Int32)), 1:Int32)] } - └─LogicalProject { exprs: [min(Field(t.country, 1:Int32))] } - └─LogicalAgg { aggs: [min(Field(t.country, 1:Int32))] } - └─LogicalProject { exprs: [Field(t.country, 1:Int32)] } + LogicalProject { exprs: [Field(min($expr1), 1:Int32) as $expr2] } + └─LogicalProject { exprs: [min($expr1)] } + └─LogicalAgg { aggs: [min($expr1)] } + └─LogicalProject { exprs: [Field(t.country, 1:Int32) as $expr1] } └─LogicalScan { table: t, columns: [t.id, t.country, t.zipcode, t.rate, t._row_id] } create_source: row_format: protobuf @@ -166,9 +166,9 @@ create materialized view t as select * from s; select * from (select (country).city as c from t) as vv join t on (c).zipcode=(t.country).zipcode; logical_plan: | - LogicalProject { exprs: [Field(t.country, 1:Int32), t.id, t.country, t.zipcode, t.rate] } - └─LogicalJoin { type: Inner, on: (Field(Field(t.country, 1:Int32), 1:Int32) = Field(t.country, 2:Int32)), output: all } - ├─LogicalProject { exprs: [Field(t.country, 1:Int32)] } + LogicalProject { exprs: [$expr1, t.id, t.country, t.zipcode, t.rate] } + └─LogicalJoin { type: Inner, on: (Field($expr1, 1:Int32) = Field(t.country, 2:Int32)), output: all } + ├─LogicalProject { exprs: [Field(t.country, 1:Int32) as $expr1] } | └─LogicalScan { table: t, columns: [t.id, t.country, t.zipcode, t.rate, t._row_id] } └─LogicalScan { table: t, columns: [t.id, t.country, t.zipcode, t.rate, t._row_id] } create_source: @@ -196,9 +196,9 @@ create materialized view t as select * from s; select min((t.country).city.address) + max((t.country).city.address) * count(zipcode) from t; logical_plan: | - LogicalProject { exprs: [(min(Field(Field(t.country, 1:Int32), 0:Int32)) + (max(Field(Field(t.country, 1:Int32), 0:Int32)) * count(t.zipcode)))] } - └─LogicalAgg { aggs: [min(Field(Field(t.country, 1:Int32), 0:Int32)), max(Field(Field(t.country, 1:Int32), 0:Int32)), count(t.zipcode)] } - └─LogicalProject { exprs: [Field(Field(t.country, 1:Int32), 0:Int32), t.zipcode] } + LogicalProject { exprs: [(min($expr1) + (max($expr1) * count(t.zipcode))) as $expr2] } + └─LogicalAgg { aggs: [min($expr1), max($expr1), count(t.zipcode)] } + └─LogicalProject { exprs: [Field(Field(t.country, 1:Int32), 0:Int32) as $expr1, t.zipcode] } └─LogicalScan { table: t, columns: [t.id, t.country, t.zipcode, t.rate, t._row_id] } create_source: row_format: protobuf @@ -225,9 +225,9 @@ create materialized view t as select * from s; select count(1), count((country).city.zipcode) from t where (country).city.address>1; logical_plan: | - LogicalProject { exprs: [count(1:Int32), count(Field(Field(t.country, 1:Int32), 1:Int32))] } - └─LogicalAgg { aggs: [count(1:Int32), count(Field(Field(t.country, 1:Int32), 1:Int32))] } - └─LogicalProject { exprs: [1:Int32, Field(Field(t.country, 1:Int32), 1:Int32)] } + LogicalProject { exprs: [count(1:Int32), count($expr1)] } + └─LogicalAgg { aggs: [count(1:Int32), count($expr1)] } + └─LogicalProject { exprs: [1:Int32, Field(Field(t.country, 1:Int32), 1:Int32) as $expr1] } └─LogicalFilter { predicate: (Field(Field(t.country, 1:Int32), 0:Int32) > 1:Int32) } └─LogicalScan { table: t, columns: [t.id, t.country, t.zipcode, t.rate, t._row_id] } create_source: @@ -398,5 +398,5 @@ sql: | select CASE WHEN false THEN ROW(0, INTERVAL '1') WHEN true THEN ROW(1.1, INTERVAL '1') ELSE ROW(1, INTERVAL '1') END; logical_plan: | - LogicalProject { exprs: [Case(false:Boolean, Row(0:Int32::Decimal, '00:00:01':Interval), true:Boolean, Row(1.1:Decimal, '00:00:01':Interval), Row(1:Int32::Decimal, '00:00:01':Interval))] } + LogicalProject { exprs: [Case(false:Boolean, Row(0:Int32::Decimal, '00:00:01':Interval), true:Boolean, Row(1.1:Decimal, '00:00:01':Interval), Row(1:Int32::Decimal, '00:00:01':Interval)) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } diff --git a/src/frontend/planner_test/tests/testdata/subquery.yaml b/src/frontend/planner_test/tests/testdata/subquery.yaml index 6551564c73e02..0bca90173b309 100644 --- a/src/frontend/planner_test/tests/testdata/subquery.yaml +++ b/src/frontend/planner_test/tests/testdata/subquery.yaml @@ -106,11 +106,11 @@ ) as t0 ); optimized_logical_plan: | - LogicalJoin { type: LeftSemi, on: IsNotDistinctFrom(ab.a, bc.b), output: all } + LogicalJoin { type: LeftSemi, on: IsNotDistinctFrom(ab.a, t.v1), output: all } ├─LogicalScan { table: ab, columns: [ab.a, ab.b] } └─LogicalJoin { type: Inner, on: true, output: all } - ├─LogicalScan { table: bc, columns: [bc.b] } - └─LogicalScan { table: t, output_columns: [], required_columns: [t.v1], predicate: IsNotNull(t.v1) } + ├─LogicalScan { table: bc, columns: [] } + └─LogicalScan { table: t, columns: [t.v1], predicate: IsNotNull(t.v1) } - name: We cannot reference columns in left table if not lateral sql: | create table ab (a int, b int); @@ -160,8 +160,7 @@ └─BatchNestedLoopJoin { type: Inner, predicate: true, output: all } ├─BatchSimpleAgg { aggs: [] } | └─BatchExchange { order: [], dist: Single } - | └─BatchSimpleAgg { aggs: [] } - | └─BatchScan { table: t1, columns: [], distribution: SomeShard } + | └─BatchScan { table: t1, columns: [], distribution: SomeShard } └─BatchExchange { order: [], dist: Single } └─BatchScan { table: t2, columns: [], distribution: SomeShard } - sql: | @@ -178,7 +177,7 @@ AND pg_catalog.pg_table_is_visible(c.oid) ORDER BY 1,2; logical_plan: | - LogicalProject { exprs: [pg_namespace.nspname, pg_class.relname, Case((pg_class.relkind = 'r':Varchar), 'table':Varchar, (pg_class.relkind = 'v':Varchar), 'view':Varchar, (pg_class.relkind = 'm':Varchar), 'materialized view':Varchar, (pg_class.relkind = 'i':Varchar), 'index':Varchar, (pg_class.relkind = 'S':Varchar), 'sequence':Varchar, (pg_class.relkind = 's':Varchar), 'special':Varchar, (pg_class.relkind = 't':Varchar), 'TOAST table':Varchar, (pg_class.relkind = 'f':Varchar), 'foreign table':Varchar, (pg_class.relkind = 'p':Varchar), 'partitioned table':Varchar, (pg_class.relkind = 'I':Varchar), 'partitioned index':Varchar), pg_user.name] } + LogicalProject { exprs: [pg_namespace.nspname, pg_class.relname, Case((pg_class.relkind = 'r':Varchar), 'table':Varchar, (pg_class.relkind = 'v':Varchar), 'view':Varchar, (pg_class.relkind = 'm':Varchar), 'materialized view':Varchar, (pg_class.relkind = 'i':Varchar), 'index':Varchar, (pg_class.relkind = 'S':Varchar), 'sequence':Varchar, (pg_class.relkind = 's':Varchar), 'special':Varchar, (pg_class.relkind = 't':Varchar), 'TOAST table':Varchar, (pg_class.relkind = 'f':Varchar), 'foreign table':Varchar, (pg_class.relkind = 'p':Varchar), 'partitioned table':Varchar, (pg_class.relkind = 'I':Varchar), 'partitioned index':Varchar) as $expr1, pg_user.name] } └─LogicalApply { type: LeftOuter, on: true, correlated_id: 1, max_one_row: true } ├─LogicalFilter { predicate: In(pg_class.relkind, 'r':Varchar, 'p':Varchar, 'v':Varchar, 'm':Varchar, 'S':Varchar, 'f':Varchar, '':Varchar) AND (pg_namespace.nspname <> 'pg_catalog':Varchar) AND IsNull(RegexpMatch(pg_namespace.nspname, '^pg_toast':Varchar)) AND (pg_namespace.nspname <> 'information_schema':Varchar) } | └─LogicalJoin { type: LeftOuter, on: (pg_namespace.oid = pg_class.relnamespace), output: all } @@ -189,7 +188,7 @@ └─LogicalScan { table: pg_user, columns: [pg_user.usesysid, pg_user.name, pg_user.usecreatedb, pg_user.usesuper, pg_user.passwd] } batch_plan: | BatchExchange { order: [pg_namespace.nspname ASC, pg_class.relname ASC], dist: Single } - └─BatchProject { exprs: [pg_namespace.nspname, pg_class.relname, Case((pg_class.relkind = 'r':Varchar), 'table':Varchar, (pg_class.relkind = 'v':Varchar), 'view':Varchar, (pg_class.relkind = 'm':Varchar), 'materialized view':Varchar, (pg_class.relkind = 'i':Varchar), 'index':Varchar, (pg_class.relkind = 'S':Varchar), 'sequence':Varchar, (pg_class.relkind = 's':Varchar), 'special':Varchar, (pg_class.relkind = 't':Varchar), 'TOAST table':Varchar, (pg_class.relkind = 'f':Varchar), 'foreign table':Varchar, (pg_class.relkind = 'p':Varchar), 'partitioned table':Varchar, (pg_class.relkind = 'I':Varchar), 'partitioned index':Varchar), pg_user.name] } + └─BatchProject { exprs: [pg_namespace.nspname, pg_class.relname, Case((pg_class.relkind = 'r':Varchar), 'table':Varchar, (pg_class.relkind = 'v':Varchar), 'view':Varchar, (pg_class.relkind = 'm':Varchar), 'materialized view':Varchar, (pg_class.relkind = 'i':Varchar), 'index':Varchar, (pg_class.relkind = 'S':Varchar), 'sequence':Varchar, (pg_class.relkind = 's':Varchar), 'special':Varchar, (pg_class.relkind = 't':Varchar), 'TOAST table':Varchar, (pg_class.relkind = 'f':Varchar), 'foreign table':Varchar, (pg_class.relkind = 'p':Varchar), 'partitioned table':Varchar, (pg_class.relkind = 'I':Varchar), 'partitioned index':Varchar) as $expr23, pg_user.name] } └─BatchSort { order: [pg_namespace.nspname ASC, pg_class.relname ASC] } └─BatchHashJoin { type: LeftOuter, predicate: pg_class.relowner = pg_user.usesysid, output: [pg_class.relname, pg_class.relkind, pg_namespace.nspname, pg_user.name] } ├─BatchExchange { order: [], dist: HashShard(pg_class.relowner) } @@ -211,30 +210,32 @@ LogicalProject { exprs: [auction.date_time, window_start, window_end] } └─LogicalApply { type: LeftSemi, on: true, correlated_id: 1 } ├─LogicalHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: all } - | └─LogicalScan { table: auction, columns: [auction.date_time, auction._row_id] } - └─LogicalProject { exprs: [CorrelatedInputRef { index: 0, correlated_id: 1 }] } - └─LogicalAgg { group_key: [CorrelatedInputRef { index: 0, correlated_id: 1 }], aggs: [] } - └─LogicalProject { exprs: [CorrelatedInputRef { index: 0, correlated_id: 1 }] } + | └─LogicalFilter { predicate: IsNotNull(auction.date_time) } + | └─LogicalScan { table: auction, columns: [auction.date_time, auction._row_id] } + └─LogicalProject { exprs: [CorrelatedInputRef { index: 0, correlated_id: 1 } as $expr2] } + └─LogicalAgg { group_key: [$expr1], aggs: [] } + └─LogicalProject { exprs: [CorrelatedInputRef { index: 0, correlated_id: 1 } as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } optimized_logical_plan: | LogicalJoin { type: LeftSemi, on: IsNotDistinctFrom(auction.date_time, auction.date_time), output: all } - ├─LogicalShare { id = 121 } + ├─LogicalShare { id = 125 } | └─LogicalHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: all } - | └─LogicalScan { table: auction, columns: [auction.date_time] } + | └─LogicalScan { table: auction, columns: [auction.date_time], predicate: IsNotNull(auction.date_time) } └─LogicalProject { exprs: [auction.date_time] } └─LogicalAgg { group_key: [auction.date_time, auction.date_time], aggs: [] } └─LogicalJoin { type: Inner, on: true, output: [auction.date_time, auction.date_time] } ├─LogicalAgg { group_key: [auction.date_time], aggs: [] } - | └─LogicalShare { id = 121 } + | └─LogicalShare { id = 125 } | └─LogicalHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: all } - | └─LogicalScan { table: auction, columns: [auction.date_time] } + | └─LogicalScan { table: auction, columns: [auction.date_time], predicate: IsNotNull(auction.date_time) } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } batch_plan: | BatchExchange { order: [], dist: Single } └─BatchHashJoin { type: LeftSemi, predicate: auction.date_time IS NOT DISTINCT FROM auction.date_time, output: all } ├─BatchHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: all } | └─BatchExchange { order: [], dist: HashShard(auction.date_time) } - | └─BatchScan { table: auction, columns: [auction.date_time], distribution: SomeShard } + | └─BatchFilter { predicate: IsNotNull(auction.date_time) } + | └─BatchScan { table: auction, columns: [auction.date_time], distribution: SomeShard } └─BatchExchange { order: [], dist: HashShard(auction.date_time) } └─BatchProject { exprs: [auction.date_time] } └─BatchHashAgg { group_key: [auction.date_time, auction.date_time], aggs: [] } @@ -244,7 +245,8 @@ | └─BatchHashAgg { group_key: [auction.date_time], aggs: [] } | └─BatchHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: all } | └─BatchExchange { order: [], dist: HashShard(auction.date_time) } - | └─BatchScan { table: auction, columns: [auction.date_time], distribution: SomeShard } + | └─BatchFilter { predicate: IsNotNull(auction.date_time) } + | └─BatchScan { table: auction, columns: [auction.date_time], distribution: SomeShard } └─BatchValues { rows: [[]] } stream_error: |- Feature is not yet implemented: Stream values executor is unimplemented! @@ -281,7 +283,7 @@ ├─LogicalAgg { aggs: [] } | └─LogicalProject { exprs: [] } | └─LogicalScan { table: b, columns: [b.b1, b.b2, b._row_id] } - └─LogicalProject { exprs: [CorrelatedInputRef { index: 0, correlated_id: 1 }] } + └─LogicalProject { exprs: [CorrelatedInputRef { index: 0, correlated_id: 1 } as $expr1] } └─LogicalScan { table: c, columns: [c.c1, c.c2, c._row_id] } optimized_logical_plan: | LogicalProject { exprs: [1:Int32] } @@ -322,21 +324,20 @@ └─LogicalFilter { predicate: (b.b1 = CorrelatedInputRef { index: 0, correlated_id: 1 }) } └─LogicalScan { table: b, columns: [b.b1, b.b2, b._row_id] } optimized_logical_plan: | - LogicalJoin { type: Inner, on: IsNotDistinctFrom(a.a1, a.a1) AND (a.a1 = min(b.b2)), output: [a.a1, a.a2] } + LogicalJoin { type: Inner, on: IsNotDistinctFrom(a.a1, a.a1) AND (a.a1 = min(b.b1)), output: [a.a1, a.a2] } ├─LogicalScan { table: a, columns: [a.a1, a.a2] } - └─LogicalAgg { group_key: [a.a1], aggs: [min(b.b2)] } - └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(a.a1, b.b1), output: [a.a1, b.b2] } + └─LogicalAgg { group_key: [a.a1], aggs: [min(b.b1)] } + └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(a.a1, a.a1), output: [a.a1, b.b1] } ├─LogicalAgg { group_key: [a.a1], aggs: [] } | └─LogicalScan { table: a, columns: [a.a1] } - └─LogicalJoin { type: Inner, on: (b.b2 = min(b.b1)), output: [b.b1, b.b2] } + └─LogicalJoin { type: Inner, on: (b.b2 = min(b.b1)), output: [a.a1, b.b1] } ├─LogicalScan { table: b, columns: [b.b1, b.b2] } - └─LogicalProject { exprs: [min(b.b1)] } - └─LogicalAgg { group_key: [a.a1], aggs: [min(b.b1)] } - └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(a.a1, b.b1), output: [a.a1, b.b1] } - ├─LogicalAgg { group_key: [a.a1], aggs: [] } - | └─LogicalScan { table: a, columns: [a.a1] } - └─LogicalProject { exprs: [b.b1, b.b1] } - └─LogicalScan { table: b, columns: [b.b1], predicate: IsNotNull(b.b1) } + └─LogicalAgg { group_key: [a.a1], aggs: [min(b.b1)] } + └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(a.a1, b.b1), output: [a.a1, b.b1] } + ├─LogicalAgg { group_key: [a.a1], aggs: [] } + | └─LogicalScan { table: a, columns: [a.a1] } + └─LogicalProject { exprs: [b.b1, b.b1] } + └─LogicalScan { table: b, columns: [b.b1], predicate: IsNotNull(b.b1) } - name: test subquery in join on condition sql: | create table a (v1 int, v2 int); diff --git a/src/frontend/planner_test/tests/testdata/subquery_expr.yaml b/src/frontend/planner_test/tests/testdata/subquery_expr.yaml index 0823142650dba..415988e73635d 100644 --- a/src/frontend/planner_test/tests/testdata/subquery_expr.yaml +++ b/src/frontend/planner_test/tests/testdata/subquery_expr.yaml @@ -10,8 +10,7 @@ optimized_logical_plan: | LogicalJoin { type: LeftOuter, on: true, output: all } ├─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - └─LogicalProject { exprs: [1:Int32] } - └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } + └─LogicalValues { rows: [[1:Int32]], schema: Schema { fields: [1:Int32:Int32] } } - sql: | create table t(x int); select (select x from t), 1 from t; @@ -69,7 +68,7 @@ create table t(x int); select (select x from t) + 1 from t; logical_plan: | - LogicalProject { exprs: [(t.x + 1:Int32)] } + LogicalProject { exprs: [(t.x + 1:Int32) as $expr1] } └─LogicalApply { type: LeftOuter, on: true, correlated_id: 1, max_one_row: true } ├─LogicalScan { table: t, columns: [t.x, t._row_id] } └─LogicalProject { exprs: [t.x] } @@ -92,10 +91,10 @@ create table t(x int); select x + (select x + (select x as v1 from t) as v2 from t) as v3 from t; logical_plan: | - LogicalProject { exprs: [(t.x + (t.x + t.x))] } + LogicalProject { exprs: [(t.x + $expr1) as $expr2] } └─LogicalApply { type: LeftOuter, on: true, correlated_id: 1, max_one_row: true } ├─LogicalScan { table: t, columns: [t.x, t._row_id] } - └─LogicalProject { exprs: [(t.x + t.x)] } + └─LogicalProject { exprs: [(t.x + t.x) as $expr1] } └─LogicalApply { type: LeftOuter, on: true, correlated_id: 2, max_one_row: true } ├─LogicalScan { table: t, columns: [t.x, t._row_id] } └─LogicalProject { exprs: [t.x] } @@ -184,25 +183,25 @@ select 1 where (not exists (values (1))) and (1>0 or exists (values (1))) logical_plan: | LogicalProject { exprs: [1:Int32] } - └─LogicalFilter { predicate: ((1:Int32 > 0:Int32) OR (count >= 1:Int32)) } + └─LogicalFilter { predicate: ((1:Int32 > 0:Int32) OR $expr1) } └─LogicalApply { type: LeftOuter, on: true, correlated_id: 2, max_one_row: true } ├─LogicalApply { type: LeftAnti, on: true, correlated_id: 1 } | ├─LogicalValues { rows: [[]], schema: Schema { fields: [] } } | └─LogicalValues { rows: [[1:Int32]], schema: Schema { fields: [*VALUES*_0.column_0:Int32] } } - └─LogicalProject { exprs: [(count >= 1:Int32)] } + └─LogicalProject { exprs: [(count >= 1:Int32) as $expr1] } └─LogicalAgg { aggs: [count] } └─LogicalValues { rows: [[1:Int32]], schema: Schema { fields: [*VALUES*_1.column_0:Int32] } } - sql: | select a + 1, b::varchar, c from (values (1, 2, 3), (4, 5, 6)) t(a, b, c); logical_plan: | - LogicalProject { exprs: [(*VALUES*_0.column_0 + 1:Int32), *VALUES*_0.column_1::Varchar, *VALUES*_0.column_2] } + LogicalProject { exprs: [(*VALUES*_0.column_0 + 1:Int32) as $expr1, *VALUES*_0.column_1::Varchar as $expr2, *VALUES*_0.column_2] } └─LogicalValues { rows: [[1:Int32, 2:Int32, 3:Int32], [4:Int32, 5:Int32, 6:Int32]], schema: Schema { fields: [*VALUES*_0.column_0:Int32, *VALUES*_0.column_1:Int32, *VALUES*_0.column_2:Int32] } } - sql: | select sum(a), max(b + c + 10), string_agg(c::varchar || '~', ',') from (values (1, 2, 3), (4, 5, 6)) as t(a, b, c); logical_plan: | - LogicalProject { exprs: [sum(*VALUES*_0.column_0), max(((*VALUES*_0.column_1 + *VALUES*_0.column_2) + 10:Int32)), string_agg(ConcatOp(*VALUES*_0.column_2::Varchar, '~':Varchar), ',':Varchar)] } - └─LogicalAgg { aggs: [sum(*VALUES*_0.column_0), max(((*VALUES*_0.column_1 + *VALUES*_0.column_2) + 10:Int32)), string_agg(ConcatOp(*VALUES*_0.column_2::Varchar, '~':Varchar), ',':Varchar)] } - └─LogicalProject { exprs: [*VALUES*_0.column_0, ((*VALUES*_0.column_1 + *VALUES*_0.column_2) + 10:Int32), ConcatOp(*VALUES*_0.column_2::Varchar, '~':Varchar), ',':Varchar] } + LogicalProject { exprs: [sum(*VALUES*_0.column_0), max($expr1), string_agg($expr2, ',':Varchar)] } + └─LogicalAgg { aggs: [sum(*VALUES*_0.column_0), max($expr1), string_agg($expr2, ',':Varchar)] } + └─LogicalProject { exprs: [*VALUES*_0.column_0, ((*VALUES*_0.column_1 + *VALUES*_0.column_2) + 10:Int32) as $expr1, ConcatOp(*VALUES*_0.column_2::Varchar, '~':Varchar) as $expr2, ',':Varchar] } └─LogicalValues { rows: [[1:Int32, 2:Int32, 3:Int32], [4:Int32, 5:Int32, 6:Int32]], schema: Schema { fields: [*VALUES*_0.column_0:Int32, *VALUES*_0.column_1:Int32, *VALUES*_0.column_2:Int32] } } - sql: | select 1 + (select 2 from t); diff --git a/src/frontend/planner_test/tests/testdata/subquery_expr_correlated.yaml b/src/frontend/planner_test/tests/testdata/subquery_expr_correlated.yaml index 2c7679cbec013..2bfef446c612e 100644 --- a/src/frontend/planner_test/tests/testdata/subquery_expr_correlated.yaml +++ b/src/frontend/planner_test/tests/testdata/subquery_expr_correlated.yaml @@ -5,19 +5,19 @@ select * from t1 where x > (select 1.5 * min(x) from t2 where t1.y=t2.y and t2.y = 1000) logical_plan: | LogicalProject { exprs: [t1.x, t1.y] } - └─LogicalFilter { predicate: (t1.x > (1.5:Decimal * min(t2.x))) } + └─LogicalFilter { predicate: (t1.x > $expr1) } └─LogicalApply { type: LeftOuter, on: true, correlated_id: 1, max_one_row: true } ├─LogicalScan { table: t1, columns: [t1.x, t1.y, t1._row_id] } - └─LogicalProject { exprs: [(1.5:Decimal * min(t2.x))] } + └─LogicalProject { exprs: [(1.5:Decimal * min(t2.x)) as $expr1] } └─LogicalAgg { aggs: [min(t2.x)] } └─LogicalProject { exprs: [t2.x] } └─LogicalFilter { predicate: (CorrelatedInputRef { index: 1, correlated_id: 1 } = t2.y) AND (t2.y = 1000:Int32) } └─LogicalScan { table: t2, columns: [t2.x, t2.y, t2._row_id] } optimized_logical_plan: | - LogicalJoin { type: Inner, on: IsNotDistinctFrom(t1.y, t1.y) AND (t1.x::Decimal > (1.5:Decimal * min(t2.x))), output: [t1.x, t1.y] } - ├─LogicalProject { exprs: [t1.x, t1.y, t1.x::Decimal] } + LogicalJoin { type: Inner, on: IsNotDistinctFrom(t1.y, t1.y) AND ($expr29 > $expr30), output: [t1.x, t1.y] } + ├─LogicalProject { exprs: [t1.x, t1.y, t1.x::Decimal as $expr29] } | └─LogicalScan { table: t1, columns: [t1.x, t1.y] } - └─LogicalProject { exprs: [t1.y, (1.5:Decimal * min(t2.x))] } + └─LogicalProject { exprs: [t1.y, (1.5:Decimal * min(t2.x)) as $expr30] } └─LogicalAgg { group_key: [t1.y], aggs: [min(t2.x)] } └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(t1.y, t2.y), output: [t1.y, t2.x] } ├─LogicalAgg { group_key: [t1.y], aggs: [] } @@ -36,10 +36,10 @@ └─LogicalProject { exprs: [min(t2.x)] } └─LogicalAgg { aggs: [min(t2.x)] } └─LogicalProject { exprs: [t2.x] } - └─LogicalFilter { predicate: (t2.y = CorrelatedInputRef { index: 1, correlated_id: 1 }) } + └─LogicalFilter { predicate: (t2.y = $expr1) } └─LogicalApply { type: LeftOuter, on: true, correlated_id: 2, max_one_row: true } ├─LogicalScan { table: t2, columns: [t2.x, t2.y, t2._row_id] } - └─LogicalProject { exprs: [CorrelatedInputRef { index: 1, correlated_id: 1 }] } + └─LogicalProject { exprs: [CorrelatedInputRef { index: 1, correlated_id: 1 } as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - sql: | create table t1(x int, y int); @@ -86,19 +86,19 @@ select * from t1 where x > (select 1.5 * min(x) from t2 where t1.y = t2.y); logical_plan: | LogicalProject { exprs: [t1.x, t1.y] } - └─LogicalFilter { predicate: (t1.x > (1.5:Decimal * min(t2.x))) } + └─LogicalFilter { predicate: (t1.x > $expr1) } └─LogicalApply { type: LeftOuter, on: true, correlated_id: 1, max_one_row: true } ├─LogicalScan { table: t1, columns: [t1.x, t1.y, t1._row_id] } - └─LogicalProject { exprs: [(1.5:Decimal * min(t2.x))] } + └─LogicalProject { exprs: [(1.5:Decimal * min(t2.x)) as $expr1] } └─LogicalAgg { aggs: [min(t2.x)] } └─LogicalProject { exprs: [t2.x] } └─LogicalFilter { predicate: (CorrelatedInputRef { index: 1, correlated_id: 1 } = t2.y) } └─LogicalScan { table: t2, columns: [t2.x, t2.y, t2._row_id] } optimized_logical_plan: | - LogicalJoin { type: Inner, on: IsNotDistinctFrom(t1.y, t1.y) AND (t1.x::Decimal > (1.5:Decimal * min(t2.x))), output: [t1.x, t1.y] } - ├─LogicalProject { exprs: [t1.x, t1.y, t1.x::Decimal] } + LogicalJoin { type: Inner, on: IsNotDistinctFrom(t1.y, t1.y) AND ($expr29 > $expr30), output: [t1.x, t1.y] } + ├─LogicalProject { exprs: [t1.x, t1.y, t1.x::Decimal as $expr29] } | └─LogicalScan { table: t1, columns: [t1.x, t1.y] } - └─LogicalProject { exprs: [t1.y, (1.5:Decimal * min(t2.x))] } + └─LogicalProject { exprs: [t1.y, (1.5:Decimal * min(t2.x)) as $expr30] } └─LogicalAgg { group_key: [t1.y], aggs: [min(t2.x)] } └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(t1.y, t2.y), output: [t1.y, t2.x] } ├─LogicalAgg { group_key: [t1.y], aggs: [] } @@ -120,8 +120,8 @@ └─LogicalFilter { predicate: (CorrelatedInputRef { index: 1, correlated_id: 1 } = t2.y) } └─LogicalScan { table: t2, columns: [t2.x, t2.y, t2._row_id] } optimized_logical_plan: | - LogicalJoin { type: Inner, on: IsNotDistinctFrom(t1.y, t1.y) AND (t1.x::Int64 > count(1:Int32)), output: [t1.x, t1.y] } - ├─LogicalProject { exprs: [t1.x, t1.y, t1.x::Int64] } + LogicalJoin { type: Inner, on: IsNotDistinctFrom(t1.y, t1.y) AND ($expr9 > count(1:Int32)), output: [t1.x, t1.y] } + ├─LogicalProject { exprs: [t1.x, t1.y, t1.x::Int64 as $expr9] } | └─LogicalScan { table: t1, columns: [t1.x, t1.y] } └─LogicalAgg { group_key: [t1.y], aggs: [count(1:Int32)] } └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(t1.y, t2.y), output: [t1.y, 1:Int32] } @@ -135,19 +135,19 @@ select * from t1 where x > (select count(*) + count(*) from t2 where t1.y = t2.y); logical_plan: | LogicalProject { exprs: [t1.x, t1.y] } - └─LogicalFilter { predicate: (t1.x > (count + count)) } + └─LogicalFilter { predicate: (t1.x > $expr1) } └─LogicalApply { type: LeftOuter, on: true, correlated_id: 1, max_one_row: true } ├─LogicalScan { table: t1, columns: [t1.x, t1.y, t1._row_id] } - └─LogicalProject { exprs: [(count + count)] } + └─LogicalProject { exprs: [(count + count) as $expr1] } └─LogicalAgg { aggs: [count, count] } └─LogicalProject { exprs: [] } └─LogicalFilter { predicate: (CorrelatedInputRef { index: 1, correlated_id: 1 } = t2.y) } └─LogicalScan { table: t2, columns: [t2.x, t2.y, t2._row_id] } optimized_logical_plan: | - LogicalJoin { type: Inner, on: IsNotDistinctFrom(t1.y, t1.y) AND (t1.x::Int64 > (count(1:Int32) + count(1:Int32))), output: [t1.x, t1.y] } - ├─LogicalProject { exprs: [t1.x, t1.y, t1.x::Int64] } + LogicalJoin { type: Inner, on: IsNotDistinctFrom(t1.y, t1.y) AND ($expr29 > $expr30), output: [t1.x, t1.y] } + ├─LogicalProject { exprs: [t1.x, t1.y, t1.x::Int64 as $expr29] } | └─LogicalScan { table: t1, columns: [t1.x, t1.y] } - └─LogicalProject { exprs: [t1.y, (count(1:Int32) + count(1:Int32))] } + └─LogicalProject { exprs: [t1.y, (count(1:Int32) + count(1:Int32)) as $expr30] } └─LogicalAgg { group_key: [t1.y], aggs: [count(1:Int32), count(1:Int32)] } └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(t1.y, t2.y), output: [t1.y, 1:Int32] } ├─LogicalAgg { group_key: [t1.y], aggs: [] } @@ -202,10 +202,9 @@ └─LogicalFilter { predicate: (CorrelatedInputRef { index: 0, correlated_id: 1 } > (t2.x + 1000:Int32)) } └─LogicalScan { table: t2, columns: [t2.x, t2.y, t2._row_id] } optimized_logical_plan: | - LogicalJoin { type: LeftSemi, on: (t1.y = t2.y) AND (t1.x > (t2.x + 1000:Int32)), output: [t1.x] } - ├─LogicalProject { exprs: [t1.x, t1.y, t1.x] } - | └─LogicalScan { table: t1, columns: [t1.x, t1.y] } - └─LogicalProject { exprs: [t2.y, (t2.x + 1000:Int32)] } + LogicalJoin { type: LeftSemi, on: (t1.y = t2.y) AND (t1.x > $expr10), output: [t1.x] } + ├─LogicalScan { table: t1, columns: [t1.x, t1.y] } + └─LogicalProject { exprs: [t2.y, (t2.x + 1000:Int32) as $expr10] } └─LogicalScan { table: t2, columns: [t2.x, t2.y] } - sql: | create table t1(x int, y int); @@ -348,7 +347,7 @@ └─LogicalProject { exprs: [1:Int32] } └─LogicalApply { type: LeftSemi, on: true, correlated_id: 2 } ├─LogicalScan { table: b, columns: [b.b1, b.b2, b._row_id] } - └─LogicalProject { exprs: [CorrelatedInputRef { index: 0, correlated_id: 2 }] } + └─LogicalProject { exprs: [CorrelatedInputRef { index: 0, correlated_id: 2 } as $expr1] } └─LogicalScan { table: c, columns: [c.c1, c.c2, c._row_id] } - name: correlated column with depth=2 in HAVING sql: | @@ -486,8 +485,8 @@ select count(*) from a where a.x=3 and a.y = (select count(*) from b where b.z = a.z and a.x = 3); optimized_logical_plan: | LogicalAgg { aggs: [count] } - └─LogicalJoin { type: Inner, on: IsNotDistinctFrom(a.x, a.x) AND IsNotDistinctFrom(a.z, a.z) AND (a.y::Int64 = count(1:Int32)), output: [] } - ├─LogicalProject { exprs: [a.x, a.z, a.y::Int64] } + └─LogicalJoin { type: Inner, on: IsNotDistinctFrom(a.x, a.x) AND IsNotDistinctFrom(a.z, a.z) AND ($expr9 = count(1:Int32)), output: [] } + ├─LogicalProject { exprs: [a.x, a.z, a.y::Int64 as $expr9] } | └─LogicalScan { table: a, columns: [a.x, a.y, a.z], predicate: (a.x = 3:Int32) } └─LogicalAgg { group_key: [a.x, a.z], aggs: [count(1:Int32)] } └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(a.x, a.x) AND IsNotDistinctFrom(a.z, a.z), output: [a.x, a.z, 1:Int32] } @@ -504,8 +503,8 @@ select count(*) from a where a.x=3 and a.y = (select count(*) from b where b.z = a.z); optimized_logical_plan: | LogicalAgg { aggs: [count] } - └─LogicalJoin { type: Inner, on: IsNotDistinctFrom(a.z, a.z) AND (a.y::Int64 = count(1:Int32)), output: [] } - ├─LogicalProject { exprs: [a.z, a.y::Int64] } + └─LogicalJoin { type: Inner, on: IsNotDistinctFrom(a.z, a.z) AND ($expr9 = count(1:Int32)), output: [] } + ├─LogicalProject { exprs: [a.z, a.y::Int64 as $expr9] } | └─LogicalScan { table: a, output_columns: [a.y, a.z], required_columns: [a.y, a.z, a.x], predicate: (a.x = 3:Int32) } └─LogicalAgg { group_key: [a.z], aggs: [count(1:Int32)] } └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(a.z, b.z), output: [a.z, 1:Int32] } @@ -533,8 +532,8 @@ select count(*) from a where a.y = (select count(distinct x) from b where b.z = a.z); optimized_logical_plan: | LogicalAgg { aggs: [count] } - └─LogicalJoin { type: Inner, on: IsNotDistinctFrom(a.z, a.z) AND (a.y::Int64 = count(b.x)), output: [] } - ├─LogicalProject { exprs: [a.z, a.y::Int64] } + └─LogicalJoin { type: Inner, on: IsNotDistinctFrom(a.z, a.z) AND ($expr9 = count(b.x)), output: [] } + ├─LogicalProject { exprs: [a.z, a.y::Int64 as $expr9] } | └─LogicalScan { table: a, columns: [a.y, a.z] } └─LogicalAgg { group_key: [a.z], aggs: [count(b.x)] } └─LogicalAgg { group_key: [a.z, b.x], aggs: [] } @@ -549,8 +548,8 @@ select count(*) from a where a.y = (select count(x) filter(where x < 100) from b where b.z = a.z); optimized_logical_plan: | LogicalAgg { aggs: [count] } - └─LogicalJoin { type: Inner, on: IsNotDistinctFrom(a.z, a.z) AND (a.y::Int64 = count(b.x) filter((b.x < 100:Int32))), output: [] } - ├─LogicalProject { exprs: [a.z, a.y::Int64] } + └─LogicalJoin { type: Inner, on: IsNotDistinctFrom(a.z, a.z) AND ($expr9 = count(b.x) filter((b.x < 100:Int32))), output: [] } + ├─LogicalProject { exprs: [a.z, a.y::Int64 as $expr9] } | └─LogicalScan { table: a, columns: [a.y, a.z] } └─LogicalAgg { group_key: [a.z], aggs: [count(b.x) filter((b.x < 100:Int32))] } └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(a.z, b.z), output: [a.z, b.x] } @@ -625,9 +624,9 @@ create table t3(x int, y int); select * from t1 where exists(select t2.x from t2 right join t3 on t2.x = t3.x and t1.y = t2.y and t1.y = t3.y); optimized_logical_plan: | - LogicalJoin { type: LeftSemi, on: IsNotDistinctFrom(t1.y, t1.y), output: all } + LogicalJoin { type: LeftSemi, on: IsNotDistinctFrom(t1.y, t2.y), output: all } ├─LogicalScan { table: t1, columns: [t1.x, t1.y] } - └─LogicalJoin { type: LeftOuter, on: (t2.x = t3.x) AND (t2.y = t3.y) AND IsNotDistinctFrom(t2.y, t1.y), output: [t1.y] } + └─LogicalJoin { type: LeftOuter, on: (t2.x = t3.x) AND (t2.y = t3.y) AND IsNotDistinctFrom(t2.y, t1.y), output: [t2.y] } ├─LogicalJoin { type: Inner, on: true, output: all } | ├─LogicalAgg { group_key: [t1.y], aggs: [] } | | └─LogicalScan { table: t1, columns: [t1.y] } @@ -659,9 +658,9 @@ optimized_logical_plan: | LogicalJoin { type: LeftSemi, on: IsNotDistinctFrom(t1.x, t2.x), output: all } ├─LogicalScan { table: t1, columns: [t1.x, t1.y] } - └─LogicalJoin { type: LeftSemi, on: (t2.y = (t3.y + t2.y)) AND IsNotDistinctFrom(t2.y, t2.y) AND IsNotDistinctFrom(t2.x, t3.x), output: [t2.x] } + └─LogicalJoin { type: LeftSemi, on: (t2.y = $expr24) AND IsNotDistinctFrom(t2.y, t2.y) AND IsNotDistinctFrom(t2.x, t3.x), output: [t2.x] } ├─LogicalScan { table: t2, columns: [t2.x, t2.y], predicate: IsNotNull(t2.x) } - └─LogicalProject { exprs: [t3.x, t2.y, (t3.y + t2.y)] } + └─LogicalProject { exprs: [t3.x, t2.y, (t3.y + t2.y) as $expr24] } └─LogicalJoin { type: Inner, on: true, output: [t3.x, t2.y, t3.y] } ├─LogicalAgg { group_key: [t2.y], aggs: [] } | └─LogicalScan { table: t2, columns: [t2.y] } @@ -738,3 +737,31 @@ ├─LogicalAgg { group_key: [strings.v1], aggs: [] } | └─LogicalScan { table: strings, columns: [strings.v1] } └─LogicalScan { table: strings, columns: [strings.v1] } +- name: Existential join on outer join with correlated condition + sql: | + create table t1(x int, y int); + create table t2(x int, y int); + create table t3(a varchar, z int); + select x from t1 where y in (select y from t3 full join t2 where t1.x = t2.x and z IS NOT DISTINCT FROM t2.x); + optimized_logical_plan: | + LogicalJoin { type: LeftSemi, on: (t1.y = t2.y) AND (t1.x = t2.x), output: [t1.x] } + ├─LogicalScan { table: t1, columns: [t1.x, t1.y] } + └─LogicalProject { exprs: [t2.y, t2.x] } + └─LogicalFilter { predicate: IsNotDistinctFrom(t3.z, t2.x) } + └─LogicalJoin { type: FullOuter, on: true, output: all } + ├─LogicalScan { table: t3, columns: [t3.z] } + └─LogicalScan { table: t2, columns: [t2.x, t2.y] } +- name: Correlated condition in RHS of right outer join + sql: | + create table t1(x int, y int); + create table t2(x int, y int); + create table t3(a varchar, z int); + select x from t1 where y in (select y from t3 right join t2 where t1.x = t2.x and z IS NOT DISTINCT FROM t2.x); + optimized_logical_plan: | + LogicalJoin { type: LeftSemi, on: (t1.y = t2.y) AND (t1.x = t2.x), output: [t1.x] } + ├─LogicalScan { table: t1, columns: [t1.x, t1.y] } + └─LogicalProject { exprs: [t2.y, t2.x] } + └─LogicalFilter { predicate: IsNotDistinctFrom(t3.z, t2.x) } + └─LogicalJoin { type: LeftOuter, on: true, output: [t3.z, t2.x, t2.y] } + ├─LogicalScan { table: t2, columns: [t2.x, t2.y] } + └─LogicalScan { table: t3, columns: [t3.z] } diff --git a/src/frontend/planner_test/tests/testdata/sysinfo_funcs.yaml b/src/frontend/planner_test/tests/testdata/sysinfo_funcs.yaml index c386aecc1a58b..c6b3a39fe411d 100644 --- a/src/frontend/planner_test/tests/testdata/sysinfo_funcs.yaml +++ b/src/frontend/planner_test/tests/testdata/sysinfo_funcs.yaml @@ -2,35 +2,33 @@ - sql: | select current_schema(); batch_plan: | - BatchProject { exprs: ['public':Varchar] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [['public':Varchar]] } - sql: | select current_schema; batch_plan: | - BatchProject { exprs: ['public':Varchar] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [['public':Varchar]] } - sql: | select session_user; batch_plan: | - BatchProject { exprs: ['root':Varchar] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [['root':Varchar]] } - sql: | select current_schemas(true); batch_plan: | - BatchProject { exprs: [ARRAY[pg_catalog, public]:List { datatype: Varchar }] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[ARRAY[pg_catalog, public]:List { datatype: Varchar }]] } - sql: | select current_schemas(false); batch_plan: | - BatchProject { exprs: [ARRAY[public]:List { datatype: Varchar }] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[ARRAY[public]:List { datatype: Varchar }]] } - sql: | select current_schemas(null); batch_plan: | - BatchProject { exprs: [null:List { datatype: Varchar }] } - └─BatchValues { rows: [[]] } + BatchValues { rows: [[null:List { datatype: Varchar }]] } - sql: | select current_schemas(true and false); binder_error: |- Feature is not yet implemented: Only boolean literals are supported in `current_schemas`. No tracking issue yet. Feel free to submit a feature request at https://github.com/risingwavelabs/risingwave/issues/new?labels=type%2Ffeature&template=feature_request.yml +- sql: | + select current_timestamp; + batch_plan: | + BatchValues { rows: [[Now('2021-04-01 00:00:00+00:00':Timestamptz)]] } diff --git a/src/frontend/planner_test/tests/testdata/temporal_filter.yaml b/src/frontend/planner_test/tests/testdata/temporal_filter.yaml new file mode 100644 index 0000000000000..e0af04db818b5 --- /dev/null +++ b/src/frontend/planner_test/tests/testdata/temporal_filter.yaml @@ -0,0 +1,58 @@ +# This file is automatically generated. See `src/frontend/planner_test/README.md` for more information. +- name: Temporal filter works on complex columns on LHS + sql: | + create table t1 (ts timestamp with time zone); + select * from t1 where ts + interval '1 hour' > now(); + stream_plan: | + StreamMaterialize { columns: [ts, t1._row_id(hidden)], pk_columns: [t1._row_id] } + └─StreamProject { exprs: [t1.ts, t1._row_id] } + └─StreamDynamicFilter { predicate: ($expr12 > now), output: [t1.ts, $expr12, t1._row_id] } + ├─StreamProject { exprs: [t1.ts, (t1.ts + '01:00:00':Interval) as $expr12, t1._row_id] } + | └─StreamTableScan { table: t1, columns: [t1.ts, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: Broadcast } + └─StreamNow { output: [now] } +- name: Temporal filter works on complex columns on LHS (part 2) + sql: | + create table t1 (ts timestamp with time zone, time_to_live interval); + select * from t1 where ts + time_to_live * 1.5 > now(); + stream_plan: | + StreamMaterialize { columns: [ts, time_to_live, t1._row_id(hidden)], pk_columns: [t1._row_id] } + └─StreamProject { exprs: [t1.ts, t1.time_to_live, t1._row_id] } + └─StreamDynamicFilter { predicate: ($expr12 > now), output: [t1.ts, t1.time_to_live, $expr12, t1._row_id] } + ├─StreamProject { exprs: [t1.ts, t1.time_to_live, (t1.ts + (t1.time_to_live * 1.5:Decimal)) as $expr12, t1._row_id] } + | └─StreamTableScan { table: t1, columns: [t1.ts, t1.time_to_live, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: Broadcast } + └─StreamNow { output: [now] } +- name: Temporal filter works on complex columns on LHS (part 2, flipped) + sql: | + create table t1 (ts timestamp with time zone, additional_time_to_live interval); + select * from t1 where now() - interval '15 minutes' < ts + additional_time_to_live * 1.5; + stream_plan: | + StreamMaterialize { columns: [ts, additional_time_to_live, t1._row_id(hidden)], pk_columns: [t1._row_id] } + └─StreamProject { exprs: [t1.ts, t1.additional_time_to_live, t1._row_id] } + └─StreamDynamicFilter { predicate: ($expr23 > $expr24), output: [t1.ts, t1.additional_time_to_live, $expr23, t1._row_id] } + ├─StreamProject { exprs: [t1.ts, t1.additional_time_to_live, (t1.ts + (t1.additional_time_to_live * 1.5:Decimal)) as $expr23, t1._row_id] } + | └─StreamTableScan { table: t1, columns: [t1.ts, t1.additional_time_to_live, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: Broadcast } + └─StreamProject { exprs: [(now - '00:15:00':Interval) as $expr24], watermark_columns: [(now - '00:15:00':Interval)] } + └─StreamNow { output: [now] } +- name: Temporal filter fails without `now()` in lower bound + sql: |- + create table t1 (ts timestamp with time zone); + select * from t1 where now() - interval '15 minutes' > ts; + stream_error: 'internal error: All `now()` exprs were valid, but the condition must have at least one now expr as a lower bound.' +- name: Temporal filter reorders now expressions correctly + sql: | + create table t1 (ts timestamp with time zone); + select * from t1 where ts < now() - interval '1 hour' and ts >= now() - interval '2 hour'; + stream_plan: | + StreamMaterialize { columns: [ts, t1._row_id(hidden)], pk_columns: [t1._row_id] } + └─StreamDynamicFilter { predicate: (t1.ts < $expr24), output: [t1.ts, t1._row_id] } + ├─StreamDynamicFilter { predicate: (t1.ts >= $expr23), output: [t1.ts, t1._row_id] } + | ├─StreamTableScan { table: t1, columns: [t1.ts, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + | └─StreamExchange { dist: Broadcast } + | └─StreamProject { exprs: [(now - '02:00:00':Interval) as $expr23], watermark_columns: [(now - '02:00:00':Interval)] } + | └─StreamNow { output: [now] } + └─StreamExchange { dist: Broadcast } + └─StreamProject { exprs: [(now - '01:00:00':Interval) as $expr24], watermark_columns: [(now - '01:00:00':Interval)] } + └─StreamNow { output: [now] } diff --git a/src/frontend/planner_test/tests/testdata/time_window.yaml b/src/frontend/planner_test/tests/testdata/time_window.yaml index 3c9eb5a11ac91..817d008bd656d 100644 --- a/src/frontend/planner_test/tests/testdata/time_window.yaml +++ b/src/frontend/planner_test/tests/testdata/time_window.yaml @@ -3,12 +3,12 @@ create table t1 (id int, created_at date); select * from tumble(t1, created_at, interval '3' day); logical_plan: | - LogicalProject { exprs: [t1.id, t1.created_at, TumbleStart(t1.created_at, '3 days':Interval), (TumbleStart(t1.created_at, '3 days':Interval) + '3 days':Interval)] } - └─LogicalProject { exprs: [t1.id, t1.created_at, t1._row_id, TumbleStart(t1.created_at, '3 days':Interval), (TumbleStart(t1.created_at, '3 days':Interval) + '3 days':Interval)] } + LogicalProject { exprs: [t1.id, t1.created_at, $expr1, $expr2] } + └─LogicalProject { exprs: [t1.id, t1.created_at, t1._row_id, TumbleStart(t1.created_at, '3 days':Interval) as $expr1, (TumbleStart(t1.created_at, '3 days':Interval) + '3 days':Interval) as $expr2] } └─LogicalScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id] } batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [t1.id, t1.created_at, TumbleStart(t1.created_at, '3 days':Interval), (TumbleStart(t1.created_at, '3 days':Interval) + '3 days':Interval)] } + └─BatchProject { exprs: [t1.id, t1.created_at, TumbleStart(t1.created_at, '3 days':Interval) as $expr47, (TumbleStart(t1.created_at, '3 days':Interval) + '3 days':Interval) as $expr48] } └─BatchScan { table: t1, columns: [t1.id, t1.created_at], distribution: SomeShard } - sql: | create materialized view t as select * from s; @@ -62,105 +62,120 @@ logical_plan: | LogicalProject { exprs: [t1.id, t1.created_at, window_start, window_end] } └─LogicalHopWindow { time_col: t1.created_at, slide: 1 day, size: 3 days, output: all } - └─LogicalScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id] } + └─LogicalFilter { predicate: IsNotNull(t1.created_at) } + └─LogicalScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id] } stream_plan: | StreamMaterialize { columns: [id, created_at, window_start, window_end, t1._row_id(hidden)], pk_columns: [t1._row_id, window_start, window_end] } └─StreamHopWindow { time_col: t1.created_at, slide: 1 day, size: 3 days, output: [t1.id, t1.created_at, window_start, window_end, t1._row_id] } - └─StreamTableScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamFilter { predicate: IsNotNull(t1.created_at) } + └─StreamTableScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - sql: | create table t1 (id int, created_at date); select id, created_at, window_start from hop(t1, created_at, interval '1' day, interval '3' day); logical_plan: | LogicalProject { exprs: [t1.id, t1.created_at, window_start] } └─LogicalHopWindow { time_col: t1.created_at, slide: 1 day, size: 3 days, output: all } - └─LogicalScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id] } + └─LogicalFilter { predicate: IsNotNull(t1.created_at) } + └─LogicalScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id] } stream_plan: | StreamMaterialize { columns: [id, created_at, window_start, t1._row_id(hidden)], pk_columns: [t1._row_id, window_start] } └─StreamHopWindow { time_col: t1.created_at, slide: 1 day, size: 3 days, output: [t1.id, t1.created_at, window_start, t1._row_id] } - └─StreamTableScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamFilter { predicate: IsNotNull(t1.created_at) } + └─StreamTableScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - sql: | create table t1 (id int, created_at date); select id, created_at, window_end from hop(t1, created_at, interval '1' day, interval '3' day); logical_plan: | LogicalProject { exprs: [t1.id, t1.created_at, window_end] } └─LogicalHopWindow { time_col: t1.created_at, slide: 1 day, size: 3 days, output: all } - └─LogicalScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id] } + └─LogicalFilter { predicate: IsNotNull(t1.created_at) } + └─LogicalScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id] } stream_plan: | StreamMaterialize { columns: [id, created_at, window_end, t1._row_id(hidden)], pk_columns: [t1._row_id, window_end] } └─StreamHopWindow { time_col: t1.created_at, slide: 1 day, size: 3 days, output: [t1.id, t1.created_at, window_end, t1._row_id] } - └─StreamTableScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamFilter { predicate: IsNotNull(t1.created_at) } + └─StreamTableScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - sql: | create table t1 (id int, created_at date); select id, created_at from hop(t1, created_at, interval '1' day, interval '3' day); logical_plan: | LogicalProject { exprs: [t1.id, t1.created_at] } └─LogicalHopWindow { time_col: t1.created_at, slide: 1 day, size: 3 days, output: all } - └─LogicalScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id] } + └─LogicalFilter { predicate: IsNotNull(t1.created_at) } + └─LogicalScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id] } batch_plan: | BatchHopWindow { time_col: t1.created_at, slide: 1 day, size: 3 days, output: [t1.id, t1.created_at] } └─BatchExchange { order: [], dist: Single } - └─BatchScan { table: t1, columns: [t1.id, t1.created_at], distribution: SomeShard } + └─BatchFilter { predicate: IsNotNull(t1.created_at) } + └─BatchScan { table: t1, columns: [t1.id, t1.created_at], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [id, created_at, window_start(hidden), t1._row_id(hidden)], pk_columns: [t1._row_id, window_start] } └─StreamHopWindow { time_col: t1.created_at, slide: 1 day, size: 3 days, output: [t1.id, t1.created_at, window_start, t1._row_id] } - └─StreamTableScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamFilter { predicate: IsNotNull(t1.created_at) } + └─StreamTableScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - sql: | create table t1 (id int, created_at date); select t_hop.id, t_hop.created_at from hop(t1, created_at, interval '1' day, interval '3' day) as t_hop; logical_plan: | LogicalProject { exprs: [t1.id, t1.created_at] } └─LogicalHopWindow { time_col: t1.created_at, slide: 1 day, size: 3 days, output: all } - └─LogicalScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id] } + └─LogicalFilter { predicate: IsNotNull(t1.created_at) } + └─LogicalScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id] } batch_plan: | BatchHopWindow { time_col: t1.created_at, slide: 1 day, size: 3 days, output: [t1.id, t1.created_at] } └─BatchExchange { order: [], dist: Single } - └─BatchScan { table: t1, columns: [t1.id, t1.created_at], distribution: SomeShard } + └─BatchFilter { predicate: IsNotNull(t1.created_at) } + └─BatchScan { table: t1, columns: [t1.id, t1.created_at], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [id, created_at, window_start(hidden), t1._row_id(hidden)], pk_columns: [t1._row_id, window_start] } └─StreamHopWindow { time_col: t1.created_at, slide: 1 day, size: 3 days, output: [t1.id, t1.created_at, window_start, t1._row_id] } - └─StreamTableScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamFilter { predicate: IsNotNull(t1.created_at) } + └─StreamTableScan { table: t1, columns: [t1.id, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - sql: | create table t (v1 varchar, v2 timestamp, v3 float); select v1, window_end, avg(v3) as avg from hop( t, v2, interval '1' minute, interval '10' minute) group by v1, window_end; logical_plan: | - LogicalProject { exprs: [t.v1, window_end, (sum(t.v3) / count(t.v3)::Float64)] } + LogicalProject { exprs: [t.v1, window_end, (sum(t.v3) / count(t.v3)::Float64) as $expr1] } └─LogicalAgg { group_key: [t.v1, window_end], aggs: [sum(t.v3), count(t.v3)] } └─LogicalProject { exprs: [t.v1, window_end, t.v3] } └─LogicalHopWindow { time_col: t.v2, slide: 00:01:00, size: 00:10:00, output: all } - └─LogicalScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id] } + └─LogicalFilter { predicate: IsNotNull(t.v2) } + └─LogicalScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id] } batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [t.v1, window_end, (sum(t.v3) / count(t.v3)::Float64)] } + └─BatchProject { exprs: [t.v1, window_end, (sum(t.v3) / count(t.v3)::Float64) as $expr23] } └─BatchHashAgg { group_key: [t.v1, window_end], aggs: [sum(t.v3), count(t.v3)] } └─BatchHopWindow { time_col: t.v2, slide: 00:01:00, size: 00:10:00, output: [t.v1, t.v3, window_end] } └─BatchExchange { order: [], dist: HashShard(t.v1) } - └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard } + └─BatchFilter { predicate: IsNotNull(t.v2) } + └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [v1, window_end, avg], pk_columns: [v1, window_end] } - └─StreamProject { exprs: [t.v1, window_end, (sum(t.v3) / count(t.v3)::Float64)] } + └─StreamProject { exprs: [t.v1, window_end, (sum(t.v3) / count(t.v3)::Float64) as $expr47] } └─StreamHashAgg { group_key: [t.v1, window_end], aggs: [count, sum(t.v3), count(t.v3)] } └─StreamExchange { dist: HashShard(t.v1, window_end) } └─StreamHopWindow { time_col: t.v2, slide: 00:01:00, size: 00:10:00, output: [t.v1, t.v3, window_end, t._row_id] } - └─StreamTableScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamFilter { predicate: IsNotNull(t.v2) } + └─StreamTableScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - sql: | create table t1 (id int, v1 int, created_at date); with t2 as (select * from t1 where v1 >= 10) select * from tumble(t2, created_at, interval '3' day); logical_plan: | - LogicalProject { exprs: [t1.id, t1.v1, t1.created_at, TumbleStart(t1.created_at, '3 days':Interval), (TumbleStart(t1.created_at, '3 days':Interval) + '3 days':Interval)] } - └─LogicalProject { exprs: [t1.id, t1.v1, t1.created_at, TumbleStart(t1.created_at, '3 days':Interval), (TumbleStart(t1.created_at, '3 days':Interval) + '3 days':Interval)] } + LogicalProject { exprs: [t1.id, t1.v1, t1.created_at, $expr1, $expr2] } + └─LogicalProject { exprs: [t1.id, t1.v1, t1.created_at, TumbleStart(t1.created_at, '3 days':Interval) as $expr1, (TumbleStart(t1.created_at, '3 days':Interval) + '3 days':Interval) as $expr2] } └─LogicalShare { id = 4 } └─LogicalProject { exprs: [t1.id, t1.v1, t1.created_at] } └─LogicalFilter { predicate: (t1.v1 >= 10:Int32) } └─LogicalScan { table: t1, columns: [t1.id, t1.v1, t1.created_at, t1._row_id] } batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [t1.id, t1.v1, t1.created_at, TumbleStart(t1.created_at, '3 days':Interval), (TumbleStart(t1.created_at, '3 days':Interval) + '3 days':Interval)] } + └─BatchProject { exprs: [t1.id, t1.v1, t1.created_at, TumbleStart(t1.created_at, '3 days':Interval) as $expr47, (TumbleStart(t1.created_at, '3 days':Interval) + '3 days':Interval) as $expr48] } └─BatchFilter { predicate: (t1.v1 >= 10:Int32) } └─BatchScan { table: t1, columns: [t1.id, t1.v1, t1.created_at], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [id, v1, created_at, window_start, window_end, t1._row_id(hidden)], pk_columns: [t1._row_id] } - └─StreamProject { exprs: [t1.id, t1.v1, t1.created_at, TumbleStart(t1.created_at, '3 days':Interval), (TumbleStart(t1.created_at, '3 days':Interval) + '3 days':Interval), t1._row_id] } + └─StreamProject { exprs: [t1.id, t1.v1, t1.created_at, TumbleStart(t1.created_at, '3 days':Interval) as $expr95, (TumbleStart(t1.created_at, '3 days':Interval) + '3 days':Interval) as $expr96, t1._row_id] } └─StreamFilter { predicate: (t1.v1 >= 10:Int32) } └─StreamTableScan { table: t1, columns: [t1.id, t1.v1, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - sql: | @@ -170,27 +185,28 @@ logical_plan: | LogicalProject { exprs: [t1.id, t1.v1, t1.created_at, window_start, window_end] } └─LogicalHopWindow { time_col: t1.created_at, slide: 1 day, size: 3 days, output: all } - └─LogicalShare { id = 4 } - └─LogicalProject { exprs: [t1.id, t1.v1, t1.created_at] } - └─LogicalFilter { predicate: (t1.v1 >= 10:Int32) } - └─LogicalScan { table: t1, columns: [t1.id, t1.v1, t1.created_at, t1._row_id] } + └─LogicalFilter { predicate: IsNotNull(t1.created_at) } + └─LogicalShare { id = 4 } + └─LogicalProject { exprs: [t1.id, t1.v1, t1.created_at] } + └─LogicalFilter { predicate: (t1.v1 >= 10:Int32) } + └─LogicalScan { table: t1, columns: [t1.id, t1.v1, t1.created_at, t1._row_id] } batch_plan: | BatchHopWindow { time_col: t1.created_at, slide: 1 day, size: 3 days, output: all } └─BatchExchange { order: [], dist: Single } - └─BatchFilter { predicate: (t1.v1 >= 10:Int32) } + └─BatchFilter { predicate: IsNotNull(t1.created_at) AND (t1.v1 >= 10:Int32) } └─BatchScan { table: t1, columns: [t1.id, t1.v1, t1.created_at], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [id, v1, created_at, window_start, window_end, t1._row_id(hidden)], pk_columns: [t1._row_id, window_start, window_end] } └─StreamHopWindow { time_col: t1.created_at, slide: 1 day, size: 3 days, output: [t1.id, t1.v1, t1.created_at, window_start, window_end, t1._row_id] } - └─StreamFilter { predicate: (t1.v1 >= 10:Int32) } + └─StreamFilter { predicate: IsNotNull(t1.created_at) AND (t1.v1 >= 10:Int32) } └─StreamTableScan { table: t1, columns: [t1.id, t1.v1, t1.created_at, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - sql: | with t(ts) as (values ('2020-01-01 12:00:00'::timestamp)) select * from tumble(t, ts, interval '10' second) as z; logical_plan: | - LogicalProject { exprs: [*VALUES*_0.column_0, TumbleStart(*VALUES*_0.column_0, '00:00:10':Interval), (TumbleStart(*VALUES*_0.column_0, '00:00:10':Interval) + '00:00:10':Interval)] } - └─LogicalProject { exprs: [*VALUES*_0.column_0, TumbleStart(*VALUES*_0.column_0, '00:00:10':Interval), (TumbleStart(*VALUES*_0.column_0, '00:00:10':Interval) + '00:00:10':Interval)] } + LogicalProject { exprs: [*VALUES*_0.column_0, $expr1, $expr2] } + └─LogicalProject { exprs: [*VALUES*_0.column_0, TumbleStart(*VALUES*_0.column_0, '00:00:10':Interval) as $expr1, (TumbleStart(*VALUES*_0.column_0, '00:00:10':Interval) + '00:00:10':Interval) as $expr2] } └─LogicalShare { id = 2 } └─LogicalValues { rows: [['2020-01-01 12:00:00':Timestamp]], schema: Schema { fields: [*VALUES*_0.column_0:Timestamp] } } batch_plan: | - BatchProject { exprs: [*VALUES*_0.column_0, TumbleStart(*VALUES*_0.column_0, '00:00:10':Interval), (TumbleStart(*VALUES*_0.column_0, '00:00:10':Interval) + '00:00:10':Interval)] } + BatchProject { exprs: [*VALUES*_0.column_0, TumbleStart(*VALUES*_0.column_0, '00:00:10':Interval) as $expr47, (TumbleStart(*VALUES*_0.column_0, '00:00:10':Interval) + '00:00:10':Interval) as $expr48] } └─BatchValues { rows: [['2020-01-01 12:00:00':Timestamp]] } diff --git a/src/frontend/planner_test/tests/testdata/tpch.yaml b/src/frontend/planner_test/tests/testdata/tpch.yaml index 66ceb6e08d4f7..7594908c78507 100644 --- a/src/frontend/planner_test/tests/testdata/tpch.yaml +++ b/src/frontend/planner_test/tests/testdata/tpch.yaml @@ -119,50 +119,50 @@ l_returnflag, l_linestatus; logical_plan: | - LogicalProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax))), RoundDigit((sum(lineitem.l_quantity) / count(lineitem.l_quantity)), 4:Int32), RoundDigit((sum(lineitem.l_extendedprice) / count(lineitem.l_extendedprice)), 4:Int32), RoundDigit((sum(lineitem.l_discount) / count(lineitem.l_discount)), 4:Int32), count] } - └─LogicalAgg { group_key: [lineitem.l_returnflag, lineitem.l_linestatus], aggs: [sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax))), sum(lineitem.l_quantity), count(lineitem.l_quantity), sum(lineitem.l_extendedprice), count(lineitem.l_extendedprice), sum(lineitem.l_discount), count(lineitem.l_discount), count] } - └─LogicalProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_quantity, lineitem.l_extendedprice, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax)), lineitem.l_discount] } + LogicalProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum($expr1), sum($expr2), RoundDigit((sum(lineitem.l_quantity) / count(lineitem.l_quantity)), 4:Int32) as $expr3, RoundDigit((sum(lineitem.l_extendedprice) / count(lineitem.l_extendedprice)), 4:Int32) as $expr4, RoundDigit((sum(lineitem.l_discount) / count(lineitem.l_discount)), 4:Int32) as $expr5, count] } + └─LogicalAgg { group_key: [lineitem.l_returnflag, lineitem.l_linestatus], aggs: [sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum($expr1), sum($expr2), sum(lineitem.l_quantity), count(lineitem.l_quantity), sum(lineitem.l_extendedprice), count(lineitem.l_extendedprice), sum(lineitem.l_discount), count(lineitem.l_discount), count] } + └─LogicalProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_quantity, lineitem.l_extendedprice, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr1, ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax)) as $expr2, lineitem.l_discount] } └─LogicalFilter { predicate: (lineitem.l_shipdate <= ('1998-12-01':Date - '71 days':Interval)) } └─LogicalScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_linenumber, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_shipdate, lineitem.l_commitdate, lineitem.l_receiptdate, lineitem.l_shipinstruct, lineitem.l_shipmode, lineitem.l_comment] } optimized_logical_plan: | - LogicalProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax))), RoundDigit((sum(lineitem.l_quantity) / count(lineitem.l_quantity)), 4:Int32), RoundDigit((sum(lineitem.l_extendedprice) / count(lineitem.l_extendedprice)), 4:Int32), RoundDigit((sum(lineitem.l_discount) / count(lineitem.l_discount)), 4:Int32), count] } - └─LogicalAgg { group_key: [lineitem.l_returnflag, lineitem.l_linestatus], aggs: [sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax))), sum(lineitem.l_quantity), count(lineitem.l_quantity), sum(lineitem.l_extendedprice), count(lineitem.l_extendedprice), sum(lineitem.l_discount), count(lineitem.l_discount), count] } - └─LogicalProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_quantity, lineitem.l_extendedprice, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax)), lineitem.l_discount] } + LogicalProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum($expr91), sum($expr92), RoundDigit((sum(lineitem.l_quantity) / count(lineitem.l_quantity)), 4:Int32) as $expr93, RoundDigit((sum(lineitem.l_extendedprice) / count(lineitem.l_extendedprice)), 4:Int32) as $expr94, RoundDigit((sum(lineitem.l_discount) / count(lineitem.l_discount)), 4:Int32) as $expr95, count] } + └─LogicalAgg { group_key: [lineitem.l_returnflag, lineitem.l_linestatus], aggs: [sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum($expr91), sum($expr92), sum(lineitem.l_quantity), count(lineitem.l_quantity), sum(lineitem.l_extendedprice), count(lineitem.l_extendedprice), sum(lineitem.l_discount), count(lineitem.l_discount), count] } + └─LogicalProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_quantity, lineitem.l_extendedprice, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr91, ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax)) as $expr92, lineitem.l_discount] } └─LogicalScan { table: lineitem, output_columns: [lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus], required_columns: [lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_shipdate], predicate: (lineitem.l_shipdate <= ('1998-12-01':Date - '71 days':Interval)) } batch_plan: | BatchExchange { order: [lineitem.l_returnflag ASC, lineitem.l_linestatus ASC], dist: Single } - └─BatchProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax))), RoundDigit((sum(lineitem.l_quantity) / count(lineitem.l_quantity)), 4:Int32), RoundDigit((sum(lineitem.l_extendedprice) / count(lineitem.l_extendedprice)), 4:Int32), RoundDigit((sum(lineitem.l_discount) / count(lineitem.l_discount)), 4:Int32), count] } + └─BatchProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum($expr203), sum($expr204), RoundDigit((sum(lineitem.l_quantity) / count(lineitem.l_quantity)), 4:Int32) as $expr205, RoundDigit((sum(lineitem.l_extendedprice) / count(lineitem.l_extendedprice)), 4:Int32) as $expr206, RoundDigit((sum(lineitem.l_discount) / count(lineitem.l_discount)), 4:Int32) as $expr207, count] } └─BatchSort { order: [lineitem.l_returnflag ASC, lineitem.l_linestatus ASC] } - └─BatchHashAgg { group_key: [lineitem.l_returnflag, lineitem.l_linestatus], aggs: [sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax))), sum(lineitem.l_quantity), count(lineitem.l_quantity), sum(lineitem.l_extendedprice), count(lineitem.l_extendedprice), sum(lineitem.l_discount), count(lineitem.l_discount), count] } + └─BatchHashAgg { group_key: [lineitem.l_returnflag, lineitem.l_linestatus], aggs: [sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum($expr203), sum($expr204), sum(lineitem.l_quantity), count(lineitem.l_quantity), sum(lineitem.l_extendedprice), count(lineitem.l_extendedprice), sum(lineitem.l_discount), count(lineitem.l_discount), count] } └─BatchExchange { order: [], dist: HashShard(lineitem.l_returnflag, lineitem.l_linestatus) } - └─BatchProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_quantity, lineitem.l_extendedprice, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax)), lineitem.l_discount] } + └─BatchProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_quantity, lineitem.l_extendedprice, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr203, ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax)) as $expr204, lineitem.l_discount] } └─BatchFilter { predicate: (lineitem.l_shipdate <= ('1998-12-01':Date - '71 days':Interval)) } └─BatchScan { table: lineitem, columns: [lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_shipdate], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [l_returnflag, l_linestatus, sum_qty, sum_base_price, sum_disc_price, sum_charge, avg_qty, avg_price, avg_disc, count_order], pk_columns: [l_returnflag, l_linestatus] } - └─StreamProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax))), RoundDigit((sum(lineitem.l_quantity) / count(lineitem.l_quantity)), 4:Int32), RoundDigit((sum(lineitem.l_extendedprice) / count(lineitem.l_extendedprice)), 4:Int32), RoundDigit((sum(lineitem.l_discount) / count(lineitem.l_discount)), 4:Int32), count] } - └─StreamHashAgg { group_key: [lineitem.l_returnflag, lineitem.l_linestatus], aggs: [count, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax))), sum(lineitem.l_quantity), count(lineitem.l_quantity), sum(lineitem.l_extendedprice), count(lineitem.l_extendedprice), sum(lineitem.l_discount), count(lineitem.l_discount), count] } + └─StreamProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum($expr320), sum($expr321), RoundDigit((sum(lineitem.l_quantity) / count(lineitem.l_quantity)), 4:Int32) as $expr325, RoundDigit((sum(lineitem.l_extendedprice) / count(lineitem.l_extendedprice)), 4:Int32) as $expr326, RoundDigit((sum(lineitem.l_discount) / count(lineitem.l_discount)), 4:Int32) as $expr327, count] } + └─StreamHashAgg { group_key: [lineitem.l_returnflag, lineitem.l_linestatus], aggs: [count, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum($expr320), sum($expr321), sum(lineitem.l_quantity), count(lineitem.l_quantity), sum(lineitem.l_extendedprice), count(lineitem.l_extendedprice), sum(lineitem.l_discount), count(lineitem.l_discount), count] } └─StreamExchange { dist: HashShard(lineitem.l_returnflag, lineitem.l_linestatus) } - └─StreamProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_quantity, lineitem.l_extendedprice, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax)), lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber] } + └─StreamProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_quantity, lineitem.l_extendedprice, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr320, ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax)) as $expr321, lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber] } └─StreamFilter { predicate: (lineitem.l_shipdate <= ('1998-12-01':Date - '71 days':Interval)) } └─StreamTableScan { table: lineitem, columns: [lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_shipdate], pk: [lineitem.l_orderkey, lineitem.l_linenumber], dist: UpstreamHashShard(lineitem.l_orderkey, lineitem.l_linenumber) } stream_dist_plan: | Fragment 0 StreamMaterialize { columns: [l_returnflag, l_linestatus, sum_qty, sum_base_price, sum_disc_price, sum_charge, avg_qty, avg_price, avg_disc, count_order], pk_columns: [l_returnflag, l_linestatus] } materialized table: 4294967294 - StreamProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax))), RoundDigit((sum(lineitem.l_quantity) / count(lineitem.l_quantity)), 4:Int32), RoundDigit((sum(lineitem.l_extendedprice) / count(lineitem.l_extendedprice)), 4:Int32), RoundDigit((sum(lineitem.l_discount) / count(lineitem.l_discount)), 4:Int32), count] } - StreamHashAgg { group_key: [lineitem.l_returnflag, lineitem.l_linestatus], aggs: [count, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax))), sum(lineitem.l_quantity), count(lineitem.l_quantity), sum(lineitem.l_extendedprice), count(lineitem.l_extendedprice), sum(lineitem.l_discount), count(lineitem.l_discount), count] } + StreamProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum($expr320), sum($expr321), RoundDigit((sum(lineitem.l_quantity) / count(lineitem.l_quantity)), 4:Int32) as $expr325, RoundDigit((sum(lineitem.l_extendedprice) / count(lineitem.l_extendedprice)), 4:Int32) as $expr326, RoundDigit((sum(lineitem.l_discount) / count(lineitem.l_discount)), 4:Int32) as $expr327, count] } + StreamHashAgg { group_key: [lineitem.l_returnflag, lineitem.l_linestatus], aggs: [count, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum($expr320), sum($expr321), sum(lineitem.l_quantity), count(lineitem.l_quantity), sum(lineitem.l_extendedprice), count(lineitem.l_extendedprice), sum(lineitem.l_discount), count(lineitem.l_discount), count] } result table: 0, state tables: [] StreamExchange Hash([0, 1]) from 1 Fragment 1 - StreamProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_quantity, lineitem.l_extendedprice, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax)), lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber] } + StreamProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_quantity, lineitem.l_extendedprice, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr320, ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax)) as $expr321, lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber] } StreamFilter { predicate: (lineitem.l_shipdate <= ('1998-12-01':Date - '71 days':Interval)) } Chain { table: lineitem, columns: [lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_shipdate], pk: [lineitem.l_orderkey, lineitem.l_linenumber], dist: UpstreamHashShard(lineitem.l_orderkey, lineitem.l_linenumber) } Upstream BatchPlanNode - Table 0 { columns: [lineitem_l_returnflag, lineitem_l_linestatus, count, sum(lineitem_l_quantity), sum(lineitem_l_extendedprice), sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount))), sum(((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount)) * (1:Int32 + lineitem_l_tax))), sum(lineitem_l_quantity)_0, count(lineitem_l_quantity), sum(lineitem_l_extendedprice)_0, count(lineitem_l_extendedprice), sum(lineitem_l_discount), count(lineitem_l_discount), count_0], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], distribution key: [0, 1] } + Table 0 { columns: [lineitem_l_returnflag, lineitem_l_linestatus, count, sum(lineitem_l_quantity), sum(lineitem_l_extendedprice), sum($expr320), sum($expr321), sum(lineitem_l_quantity)_0, count(lineitem_l_quantity), sum(lineitem_l_extendedprice)_0, count(lineitem_l_extendedprice), sum(lineitem_l_discount), count(lineitem_l_discount), count_0], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], distribution key: [0, 1] } Table 4294967294 { columns: [l_returnflag, l_linestatus, sum_qty, sum_base_price, sum_disc_price, sum_charge, avg_qty, avg_price, avg_disc, count_order], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], distribution key: [0, 1] } - id: tpch_q2 before: @@ -301,7 +301,7 @@ └─StreamTopN { order: "[supplier.s_acctbal DESC, nation.n_name ASC, supplier.s_name ASC, part.p_partkey ASC]", limit: 100, offset: 0 } └─StreamExchange { dist: Single } └─StreamGroupTopN { order: "[supplier.s_acctbal DESC, nation.n_name ASC, supplier.s_name ASC, part.p_partkey ASC]", limit: 100, offset: 0, group_key: [18] } - └─StreamProject { exprs: [supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment, partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey, part.p_partkey, partsupp.ps_supplycost, min(partsupp.ps_supplycost), nation.n_regionkey, region.r_regionkey, Vnode(nation.n_regionkey)] } + └─StreamProject { exprs: [supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment, partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey, part.p_partkey, partsupp.ps_supplycost, min(partsupp.ps_supplycost), nation.n_regionkey, region.r_regionkey, Vnode(nation.n_regionkey) as $expr1] } └─StreamHashJoin { type: Inner, predicate: nation.n_regionkey = region.r_regionkey, output: [supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment, partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey, part.p_partkey, partsupp.ps_supplycost, min(partsupp.ps_supplycost), nation.n_regionkey, region.r_regionkey] } ├─StreamExchange { dist: HashShard(nation.n_regionkey) } | └─StreamHashJoin { type: Inner, predicate: part.p_partkey IS NOT DISTINCT FROM part.p_partkey AND partsupp.ps_supplycost = min(partsupp.ps_supplycost), output: [part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, nation.n_name, nation.n_regionkey, partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey, partsupp.ps_supplycost, part.p_partkey, min(partsupp.ps_supplycost)] } @@ -363,7 +363,7 @@ Fragment 1 StreamGroupTopN { order: "[supplier.s_acctbal DESC, nation.n_name ASC, supplier.s_name ASC, part.p_partkey ASC]", limit: 100, offset: 0, group_key: [18] } state table: 1 - StreamProject { exprs: [supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment, partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey, part.p_partkey, partsupp.ps_supplycost, min(partsupp.ps_supplycost), nation.n_regionkey, region.r_regionkey, Vnode(nation.n_regionkey)] } + StreamProject { exprs: [supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment, partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey, part.p_partkey, partsupp.ps_supplycost, min(partsupp.ps_supplycost), nation.n_regionkey, region.r_regionkey, Vnode(nation.n_regionkey) as $expr1] } StreamHashJoin { type: Inner, predicate: nation.n_regionkey = region.r_regionkey, output: [supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment, partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey, part.p_partkey, partsupp.ps_supplycost, min(partsupp.ps_supplycost), nation.n_regionkey, region.r_regionkey] } left table: 2, right table 4, left degree table: 3, right degree table: 5, StreamExchange Hash([8]) from 2 @@ -479,47 +479,47 @@ Upstream BatchPlanNode - Table 0 { columns: [supplier_s_acctbal, supplier_s_name, nation_n_name, part_p_partkey, part_p_mfgr, supplier_s_address, supplier_s_phone, supplier_s_comment, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, nation_n_nationkey, supplier_s_nationkey, part_p_partkey_0, partsupp_ps_supplycost, min(partsupp_ps_supplycost), nation_n_regionkey, region_r_regionkey, Vnode(nation_n_regionkey)], primary key: [$0 DESC, $2 ASC, $1 ASC, $3 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $14 ASC, $15 ASC, $17 ASC, $16 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], distribution key: [] } - Table 1 { columns: [supplier_s_acctbal, supplier_s_name, nation_n_name, part_p_partkey, part_p_mfgr, supplier_s_address, supplier_s_phone, supplier_s_comment, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, nation_n_nationkey, supplier_s_nationkey, part_p_partkey_0, partsupp_ps_supplycost, min(partsupp_ps_supplycost), nation_n_regionkey, region_r_regionkey, Vnode(nation_n_regionkey)], primary key: [$18 ASC, $0 DESC, $2 ASC, $1 ASC, $3 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $14 ASC, $15 ASC, $17 ASC, $16 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], distribution key: [16], vnode column idx: 18 } + Table 0 { columns: [supplier_s_acctbal, supplier_s_name, nation_n_name, part_p_partkey, part_p_mfgr, supplier_s_address, supplier_s_phone, supplier_s_comment, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, nation_n_nationkey, supplier_s_nationkey, part_p_partkey_0, partsupp_ps_supplycost, min(partsupp_ps_supplycost), nation_n_regionkey, region_r_regionkey, $expr1], primary key: [$0 DESC, $2 ASC, $1 ASC, $3 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $14 ASC, $15 ASC, $17 ASC, $16 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], distribution key: [] } + Table 1 { columns: [supplier_s_acctbal, supplier_s_name, nation_n_name, part_p_partkey, part_p_mfgr, supplier_s_address, supplier_s_phone, supplier_s_comment, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, nation_n_nationkey, supplier_s_nationkey, part_p_partkey_0, partsupp_ps_supplycost, min(partsupp_ps_supplycost), nation_n_regionkey, region_r_regionkey, $expr1], primary key: [$18 ASC, $0 DESC, $2 ASC, $1 ASC, $3 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $14 ASC, $15 ASC, $17 ASC, $16 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], distribution key: [16], vnode column idx: 18 } Table 2 { columns: [part_p_partkey, part_p_mfgr, supplier_s_name, supplier_s_address, supplier_s_phone, supplier_s_acctbal, supplier_s_comment, nation_n_name, nation_n_regionkey, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, nation_n_nationkey, supplier_s_nationkey, partsupp_ps_supplycost, part_p_partkey_0, min(partsupp_ps_supplycost)], primary key: [$8 ASC, $0 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $15 ASC, $14 ASC, $16 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], distribution key: [8] } Table 3 { columns: [nation_n_regionkey, part_p_partkey, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, nation_n_nationkey, supplier_s_nationkey, part_p_partkey_0, partsupp_ps_supplycost, min(partsupp_ps_supplycost), _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC], value indices: [10], distribution key: [0] } - Table 4 { columns: [region_r_regionkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 5 { columns: [region_r_regionkey, region_r_regionkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 6 { columns: [part_p_partkey, part_p_mfgr, partsupp_ps_supplycost, supplier_s_name, supplier_s_address, supplier_s_phone, supplier_s_acctbal, supplier_s_comment, nation_n_name, nation_n_regionkey, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, supplier_s_nationkey, nation_n_nationkey], primary key: [$0 ASC, $2 ASC, $0 ASC, $10 ASC, $11 ASC, $12 ASC, $14 ASC, $13 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], distribution key: [0] } - Table 7 { columns: [part_p_partkey, partsupp_ps_supplycost, part_p_partkey_0, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, nation_n_nationkey, supplier_s_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC], value indices: [8], distribution key: [0] } - Table 8 { columns: [part_p_partkey, min(partsupp_ps_supplycost)], primary key: [$0 ASC, $1 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 9 { columns: [part_p_partkey, min(partsupp_ps_supplycost), part_p_partkey_0, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } + Table 4 { columns: [region_r_regionkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 5 { columns: [region_r_regionkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 6 { columns: [part_p_partkey, part_p_mfgr, partsupp_ps_supplycost, supplier_s_name, supplier_s_address, supplier_s_phone, supplier_s_acctbal, supplier_s_comment, nation_n_name, nation_n_regionkey, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, supplier_s_nationkey, nation_n_nationkey], primary key: [$0 ASC, $2 ASC, $10 ASC, $11 ASC, $12 ASC, $14 ASC, $13 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], distribution key: [0] } + Table 7 { columns: [part_p_partkey, partsupp_ps_supplycost, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, nation_n_nationkey, supplier_s_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [7], distribution key: [0] } + Table 8 { columns: [part_p_partkey, min(partsupp_ps_supplycost)], primary key: [$0 ASC, $1 ASC], value indices: [0, 1], distribution key: [0] } + Table 9 { columns: [part_p_partkey, min(partsupp_ps_supplycost), _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 10 { columns: [part_p_partkey, part_p_mfgr, partsupp_ps_supplycost, supplier_s_name, supplier_s_address, supplier_s_nationkey, supplier_s_phone, supplier_s_acctbal, supplier_s_comment, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey], primary key: [$5 ASC, $0 ASC, $9 ASC, $10 ASC, $11 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], distribution key: [5] } Table 11 { columns: [supplier_s_nationkey, part_p_partkey, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC], value indices: [5], distribution key: [0] } - Table 12 { columns: [nation_n_nationkey, nation_n_name, nation_n_regionkey], primary key: [$0 ASC, $0 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 13 { columns: [nation_n_nationkey, nation_n_nationkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 14 { columns: [part_p_partkey, part_p_mfgr, partsupp_ps_suppkey, partsupp_ps_supplycost, partsupp_ps_partkey], primary key: [$2 ASC, $0 ASC, $4 ASC, $2 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [2] } - Table 15 { columns: [partsupp_ps_suppkey, part_p_partkey, partsupp_ps_partkey, partsupp_ps_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } - Table 16 { columns: [supplier_s_suppkey, supplier_s_name, supplier_s_address, supplier_s_nationkey, supplier_s_phone, supplier_s_acctbal, supplier_s_comment], primary key: [$0 ASC, $0 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [0] } - Table 17 { columns: [supplier_s_suppkey, supplier_s_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 18 { columns: [part_p_partkey, part_p_mfgr], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 19 { columns: [part_p_partkey, part_p_partkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 20 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, partsupp_ps_supplycost], primary key: [$0 ASC, $0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 21 { columns: [partsupp_ps_partkey, partsupp_ps_partkey_0, partsupp_ps_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } + Table 12 { columns: [nation_n_nationkey, nation_n_name, nation_n_regionkey], primary key: [$0 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 13 { columns: [nation_n_nationkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 14 { columns: [part_p_partkey, part_p_mfgr, partsupp_ps_suppkey, partsupp_ps_supplycost, partsupp_ps_partkey], primary key: [$2 ASC, $0 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [2] } + Table 15 { columns: [partsupp_ps_suppkey, part_p_partkey, partsupp_ps_partkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } + Table 16 { columns: [supplier_s_suppkey, supplier_s_name, supplier_s_address, supplier_s_nationkey, supplier_s_phone, supplier_s_acctbal, supplier_s_comment], primary key: [$0 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [0] } + Table 17 { columns: [supplier_s_suppkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 18 { columns: [part_p_partkey, part_p_mfgr], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 19 { columns: [part_p_partkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 20 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, partsupp_ps_supplycost], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 21 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 22 { columns: [part_p_partkey, partsupp_ps_supplycost, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, nation_n_nationkey, supplier_s_nationkey, region_r_regionkey, nation_n_regionkey], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8], distribution key: [0] } Table 23 { columns: [part_p_partkey, count, min(partsupp_ps_supplycost)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } - Table 24 { columns: [part_p_partkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 25 { columns: [part_p_partkey, part_p_partkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 26 { columns: [partsupp_ps_partkey, partsupp_ps_supplycost, partsupp_ps_suppkey, supplier_s_suppkey, nation_n_nationkey, supplier_s_nationkey, nation_n_regionkey, region_r_regionkey], primary key: [$0 ASC, $0 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $7 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7], distribution key: [0] } - Table 27 { columns: [partsupp_ps_partkey, partsupp_ps_partkey_0, partsupp_ps_suppkey, supplier_s_suppkey, nation_n_nationkey, supplier_s_nationkey, region_r_regionkey, nation_n_regionkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC], value indices: [8], distribution key: [0] } + Table 24 { columns: [part_p_partkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 25 { columns: [part_p_partkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 26 { columns: [partsupp_ps_partkey, partsupp_ps_supplycost, partsupp_ps_suppkey, supplier_s_suppkey, nation_n_nationkey, supplier_s_nationkey, nation_n_regionkey, region_r_regionkey], primary key: [$0 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $7 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7], distribution key: [0] } + Table 27 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, nation_n_nationkey, supplier_s_nationkey, region_r_regionkey, nation_n_regionkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [7], distribution key: [0] } Table 28 { columns: [part_p_partkey, count], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 29 { columns: [partsupp_ps_partkey, partsupp_ps_supplycost, nation_n_regionkey, partsupp_ps_suppkey, supplier_s_suppkey, supplier_s_nationkey, nation_n_nationkey], primary key: [$2 ASC, $0 ASC, $3 ASC, $4 ASC, $6 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [2] } Table 30 { columns: [nation_n_regionkey, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, nation_n_nationkey, supplier_s_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC], value indices: [6], distribution key: [0] } - Table 31 { columns: [region_r_regionkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 32 { columns: [region_r_regionkey, region_r_regionkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 31 { columns: [region_r_regionkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 32 { columns: [region_r_regionkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 33 { columns: [partsupp_ps_partkey, partsupp_ps_supplycost, supplier_s_nationkey, partsupp_ps_suppkey, supplier_s_suppkey], primary key: [$2 ASC, $0 ASC, $3 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [2] } Table 34 { columns: [supplier_s_nationkey, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } - Table 35 { columns: [nation_n_nationkey, nation_n_regionkey], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 36 { columns: [nation_n_nationkey, nation_n_nationkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 37 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, partsupp_ps_supplycost], primary key: [$1 ASC, $0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [1] } - Table 38 { columns: [partsupp_ps_suppkey, partsupp_ps_partkey, partsupp_ps_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } - Table 39 { columns: [supplier_s_suppkey, supplier_s_nationkey], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 40 { columns: [supplier_s_suppkey, supplier_s_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 35 { columns: [nation_n_nationkey, nation_n_regionkey], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 36 { columns: [nation_n_nationkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 37 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, partsupp_ps_supplycost], primary key: [$1 ASC, $0 ASC], value indices: [0, 1, 2], distribution key: [1] } + Table 38 { columns: [partsupp_ps_suppkey, partsupp_ps_partkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 39 { columns: [supplier_s_suppkey, supplier_s_nationkey], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 40 { columns: [supplier_s_suppkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 4294967294 { columns: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey, part.p_partkey, partsupp.ps_supplycost, min(partsupp.ps_supplycost), nation.n_regionkey, region.r_regionkey], primary key: [$0 DESC, $2 ASC, $1 ASC, $3 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $14 ASC, $15 ASC, $17 ASC, $16 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], distribution key: [] } - id: tpch_q3 before: @@ -549,10 +549,10 @@ o_orderdate limit 10; logical_plan: | - LogicalTopN { order: "[sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0 } - └─LogicalProject { exprs: [lineitem.l_orderkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), orders.o_orderdate, orders.o_shippriority] } - └─LogicalAgg { group_key: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority], aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalProject { exprs: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + LogicalTopN { order: "[sum($expr1) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0 } + └─LogicalProject { exprs: [lineitem.l_orderkey, sum($expr1), orders.o_orderdate, orders.o_shippriority] } + └─LogicalAgg { group_key: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority], aggs: [sum($expr1)] } + └─LogicalProject { exprs: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr1] } └─LogicalFilter { predicate: (customer.c_mktsegment = 'FURNITURE':Varchar) AND (customer.c_custkey = orders.o_custkey) AND (lineitem.l_orderkey = orders.o_orderkey) AND (orders.o_orderdate < '1995-03-29':Date) AND (lineitem.l_shipdate > '1995-03-29':Date) } └─LogicalJoin { type: Inner, on: true, output: all } ├─LogicalJoin { type: Inner, on: true, output: all } @@ -560,23 +560,23 @@ | └─LogicalScan { table: orders, columns: [orders.o_orderkey, orders.o_custkey, orders.o_orderstatus, orders.o_totalprice, orders.o_orderdate, orders.o_orderpriority, orders.o_clerk, orders.o_shippriority, orders.o_comment] } └─LogicalScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_linenumber, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_shipdate, lineitem.l_commitdate, lineitem.l_receiptdate, lineitem.l_shipinstruct, lineitem.l_shipmode, lineitem.l_comment] } optimized_logical_plan: | - LogicalTopN { order: "[sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0 } - └─LogicalProject { exprs: [lineitem.l_orderkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), orders.o_orderdate, orders.o_shippriority] } - └─LogicalAgg { group_key: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority], aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalProject { exprs: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + LogicalTopN { order: "[sum($expr19) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0 } + └─LogicalProject { exprs: [lineitem.l_orderkey, sum($expr19), orders.o_orderdate, orders.o_shippriority] } + └─LogicalAgg { group_key: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority], aggs: [sum($expr19)] } + └─LogicalProject { exprs: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr19] } └─LogicalJoin { type: Inner, on: (lineitem.l_orderkey = orders.o_orderkey), output: [orders.o_orderdate, orders.o_shippriority, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount] } ├─LogicalJoin { type: Inner, on: (customer.c_custkey = orders.o_custkey), output: [orders.o_orderkey, orders.o_orderdate, orders.o_shippriority] } | ├─LogicalScan { table: customer, output_columns: [customer.c_custkey], required_columns: [customer.c_custkey, customer.c_mktsegment], predicate: (customer.c_mktsegment = 'FURNITURE':Varchar) } | └─LogicalScan { table: orders, columns: [orders.o_orderkey, orders.o_custkey, orders.o_orderdate, orders.o_shippriority], predicate: (orders.o_orderdate < '1995-03-29':Date) } └─LogicalScan { table: lineitem, output_columns: [lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount], required_columns: [lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate], predicate: (lineitem.l_shipdate > '1995-03-29':Date) } batch_plan: | - BatchTopN { order: "[sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0 } + BatchTopN { order: "[sum($expr41) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0 } └─BatchExchange { order: [], dist: Single } - └─BatchTopN { order: "[sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0 } - └─BatchProject { exprs: [lineitem.l_orderkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), orders.o_orderdate, orders.o_shippriority] } - └─BatchHashAgg { group_key: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority], aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + └─BatchTopN { order: "[sum($expr41) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0 } + └─BatchProject { exprs: [lineitem.l_orderkey, sum($expr41), orders.o_orderdate, orders.o_shippriority] } + └─BatchHashAgg { group_key: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority], aggs: [sum($expr41)] } └─BatchExchange { order: [], dist: HashShard(lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority) } - └─BatchProject { exprs: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + └─BatchProject { exprs: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr41] } └─BatchHashJoin { type: Inner, predicate: orders.o_orderkey = lineitem.l_orderkey, output: [orders.o_orderdate, orders.o_shippriority, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount] } ├─BatchExchange { order: [], dist: HashShard(orders.o_orderkey) } | └─BatchHashJoin { type: Inner, predicate: customer.c_custkey = orders.o_custkey, output: [orders.o_orderkey, orders.o_orderdate, orders.o_shippriority] } @@ -593,15 +593,15 @@ └─BatchScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [l_orderkey, revenue, o_orderdate, o_shippriority], pk_columns: [l_orderkey, o_orderdate, o_shippriority], order_descs: [revenue, o_orderdate, l_orderkey, o_shippriority] } - └─StreamProject { exprs: [lineitem.l_orderkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), orders.o_orderdate, orders.o_shippriority] } - └─StreamTopN { order: "[sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0 } + └─StreamProject { exprs: [lineitem.l_orderkey, sum($expr64), orders.o_orderdate, orders.o_shippriority] } + └─StreamTopN { order: "[sum($expr64) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0 } └─StreamExchange { dist: Single } - └─StreamGroupTopN { order: "[sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0, group_key: [4] } - └─StreamProject { exprs: [lineitem.l_orderkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), orders.o_orderdate, orders.o_shippriority, Vnode(lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority)] } - └─StreamProject { exprs: [lineitem.l_orderkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), orders.o_orderdate, orders.o_shippriority] } - └─StreamHashAgg { group_key: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority], aggs: [count, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + └─StreamGroupTopN { order: "[sum($expr64) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0, group_key: [4] } + └─StreamProject { exprs: [lineitem.l_orderkey, sum($expr64), orders.o_orderdate, orders.o_shippriority, Vnode(lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority) as $expr65] } + └─StreamProject { exprs: [lineitem.l_orderkey, sum($expr64), orders.o_orderdate, orders.o_shippriority] } + └─StreamHashAgg { group_key: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority], aggs: [count, sum($expr64)] } └─StreamExchange { dist: HashShard(lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority) } - └─StreamProject { exprs: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), customer.c_custkey, orders.o_orderkey, orders.o_custkey, lineitem.l_linenumber] } + └─StreamProject { exprs: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr64, customer.c_custkey, orders.o_orderkey, orders.o_custkey, lineitem.l_linenumber] } └─StreamHashJoin { type: Inner, predicate: orders.o_orderkey = lineitem.l_orderkey, output: [orders.o_orderdate, orders.o_shippriority, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, customer.c_custkey, orders.o_orderkey, orders.o_custkey, lineitem.l_linenumber] } ├─StreamExchange { dist: HashShard(orders.o_orderkey) } | └─StreamHashJoin { type: Inner, predicate: customer.c_custkey = orders.o_custkey, output: [orders.o_orderkey, orders.o_orderdate, orders.o_shippriority, customer.c_custkey, orders.o_custkey] } @@ -620,22 +620,22 @@ Fragment 0 StreamMaterialize { columns: [l_orderkey, revenue, o_orderdate, o_shippriority], pk_columns: [l_orderkey, o_orderdate, o_shippriority], order_descs: [revenue, o_orderdate, l_orderkey, o_shippriority] } materialized table: 4294967294 - StreamProject { exprs: [lineitem.l_orderkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), orders.o_orderdate, orders.o_shippriority] } - StreamTopN { order: "[sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0 } + StreamProject { exprs: [lineitem.l_orderkey, sum($expr64), orders.o_orderdate, orders.o_shippriority] } + StreamTopN { order: "[sum($expr64) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0 } state table: 0 StreamExchange Single from 1 Fragment 1 - StreamGroupTopN { order: "[sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0, group_key: [4] } + StreamGroupTopN { order: "[sum($expr64) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0, group_key: [4] } state table: 1 - StreamProject { exprs: [lineitem.l_orderkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), orders.o_orderdate, orders.o_shippriority, Vnode(lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority)] } - StreamProject { exprs: [lineitem.l_orderkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), orders.o_orderdate, orders.o_shippriority] } - StreamHashAgg { group_key: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority], aggs: [count, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + StreamProject { exprs: [lineitem.l_orderkey, sum($expr64), orders.o_orderdate, orders.o_shippriority, Vnode(lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority) as $expr65] } + StreamProject { exprs: [lineitem.l_orderkey, sum($expr64), orders.o_orderdate, orders.o_shippriority] } + StreamHashAgg { group_key: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority], aggs: [count, sum($expr64)] } result table: 2, state tables: [] StreamExchange Hash([0, 1, 2]) from 2 Fragment 2 - StreamProject { exprs: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), customer.c_custkey, orders.o_orderkey, orders.o_custkey, lineitem.l_linenumber] } + StreamProject { exprs: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr64, customer.c_custkey, orders.o_orderkey, orders.o_custkey, lineitem.l_linenumber] } StreamHashJoin { type: Inner, predicate: orders.o_orderkey = lineitem.l_orderkey, output: [orders.o_orderdate, orders.o_shippriority, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, customer.c_custkey, orders.o_orderkey, orders.o_custkey, lineitem.l_linenumber] } left table: 3, right table 5, left degree table: 4, right degree table: 6, StreamExchange Hash([0]) from 3 @@ -667,15 +667,15 @@ Upstream BatchPlanNode - Table 0 { columns: [lineitem_l_orderkey, sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount))), orders_o_orderdate, orders_o_shippriority, Vnode(lineitem_l_orderkey, orders_o_orderdate, orders_o_shippriority)], primary key: [$1 DESC, $2 ASC, $0 ASC, $3 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [] } - Table 1 { columns: [lineitem_l_orderkey, sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount))), orders_o_orderdate, orders_o_shippriority, Vnode(lineitem_l_orderkey, orders_o_orderdate, orders_o_shippriority)], primary key: [$4 ASC, $1 DESC, $2 ASC, $0 ASC, $3 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0, 2, 3], vnode column idx: 4 } - Table 2 { columns: [lineitem_l_orderkey, orders_o_orderdate, orders_o_shippriority, count, sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount)))], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3, 4], distribution key: [0, 1, 2] } - Table 3 { columns: [orders_o_orderkey, orders_o_orderdate, orders_o_shippriority, customer_c_custkey, orders_o_custkey], primary key: [$0 ASC, $3 ASC, $0 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0] } - Table 4 { columns: [orders_o_orderkey, customer_c_custkey, orders_o_orderkey_0, orders_o_custkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } - Table 5 { columns: [lineitem_l_orderkey, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_linenumber], primary key: [$0 ASC, $0 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } - Table 6 { columns: [lineitem_l_orderkey, lineitem_l_orderkey_0, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } - Table 7 { columns: [customer_c_custkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 8 { columns: [customer_c_custkey, customer_c_custkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 0 { columns: [lineitem_l_orderkey, sum($expr64), orders_o_orderdate, orders_o_shippriority, $expr65], primary key: [$1 DESC, $2 ASC, $0 ASC, $3 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [] } + Table 1 { columns: [lineitem_l_orderkey, sum($expr64), orders_o_orderdate, orders_o_shippriority, $expr65], primary key: [$4 ASC, $1 DESC, $2 ASC, $0 ASC, $3 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0, 2, 3], vnode column idx: 4 } + Table 2 { columns: [lineitem_l_orderkey, orders_o_orderdate, orders_o_shippriority, count, sum($expr64)], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3, 4], distribution key: [0, 1, 2] } + Table 3 { columns: [orders_o_orderkey, orders_o_orderdate, orders_o_shippriority, customer_c_custkey, orders_o_custkey], primary key: [$0 ASC, $3 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0] } + Table 4 { columns: [orders_o_orderkey, customer_c_custkey, orders_o_custkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } + Table 5 { columns: [lineitem_l_orderkey, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_linenumber], primary key: [$0 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } + Table 6 { columns: [lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 7 { columns: [customer_c_custkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 8 { columns: [customer_c_custkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 9 { columns: [orders_o_orderkey, orders_o_custkey, orders_o_orderdate, orders_o_shippriority], primary key: [$1 ASC, $0 ASC], value indices: [0, 1, 2, 3], distribution key: [1] } Table 10 { columns: [orders_o_custkey, orders_o_orderkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 4294967294 { columns: [l_orderkey, revenue, o_orderdate, o_shippriority], primary key: [$1 DESC, $2 ASC, $0 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [] } @@ -777,10 +777,10 @@ BatchPlanNode Table 0 { columns: [orders_o_orderpriority, count, count_0], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } - Table 1 { columns: [orders_o_orderkey, orders_o_orderpriority], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 2 { columns: [orders_o_orderkey, orders_o_orderkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 3 { columns: [lineitem_l_orderkey, lineitem_l_linenumber], primary key: [$0 ASC, $0 ASC, $1 ASC], value indices: [0, 1], distribution key: [0] } - Table 4 { columns: [lineitem_l_orderkey, lineitem_l_orderkey_0, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } + Table 1 { columns: [orders_o_orderkey, orders_o_orderpriority], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 2 { columns: [orders_o_orderkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 3 { columns: [lineitem_l_orderkey, lineitem_l_linenumber], primary key: [$0 ASC, $1 ASC], value indices: [0, 1], distribution key: [0] } + Table 4 { columns: [lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 4294967294 { columns: [o_orderpriority, order_count], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } - id: tpch_q5 before: @@ -811,9 +811,9 @@ order by revenue desc; logical_plan: | - LogicalProject { exprs: [nation.n_name, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalAgg { group_key: [nation.n_name], aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalProject { exprs: [nation.n_name, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + LogicalProject { exprs: [nation.n_name, sum($expr1)] } + └─LogicalAgg { group_key: [nation.n_name], aggs: [sum($expr1)] } + └─LogicalProject { exprs: [nation.n_name, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr1] } └─LogicalFilter { predicate: (customer.c_custkey = orders.o_custkey) AND (lineitem.l_orderkey = orders.o_orderkey) AND (lineitem.l_suppkey = supplier.s_suppkey) AND (customer.c_nationkey = supplier.s_nationkey) AND (supplier.s_nationkey = nation.n_nationkey) AND (nation.n_regionkey = region.r_regionkey) AND (region.r_name = 'MIDDLE EAST':Varchar) AND (orders.o_orderdate >= '1994-01-01':Date) AND (orders.o_orderdate < ('1994-01-01':Date + '1 year':Interval)) } └─LogicalJoin { type: Inner, on: true, output: all } ├─LogicalJoin { type: Inner, on: true, output: all } @@ -827,8 +827,8 @@ | └─LogicalScan { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey, nation.n_comment] } └─LogicalScan { table: region, columns: [region.r_regionkey, region.r_name, region.r_comment] } optimized_logical_plan: | - LogicalAgg { group_key: [nation.n_name], aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalProject { exprs: [nation.n_name, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + LogicalAgg { group_key: [nation.n_name], aggs: [sum($expr19)] } + └─LogicalProject { exprs: [nation.n_name, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr19] } └─LogicalJoin { type: Inner, on: (nation.n_regionkey = region.r_regionkey), output: [lineitem.l_extendedprice, lineitem.l_discount, nation.n_name] } ├─LogicalJoin { type: Inner, on: (supplier.s_nationkey = nation.n_nationkey), output: [lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, nation.n_regionkey] } | ├─LogicalJoin { type: Inner, on: (lineitem.l_orderkey = orders.o_orderkey) AND (lineitem.l_suppkey = supplier.s_suppkey), output: [supplier.s_nationkey, lineitem.l_extendedprice, lineitem.l_discount] } @@ -841,11 +841,11 @@ | └─LogicalScan { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey] } └─LogicalScan { table: region, output_columns: [region.r_regionkey], required_columns: [region.r_regionkey, region.r_name], predicate: (region.r_name = 'MIDDLE EAST':Varchar) } batch_plan: | - BatchExchange { order: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))) DESC], dist: Single } - └─BatchSort { order: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))) DESC] } - └─BatchHashAgg { group_key: [nation.n_name], aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + BatchExchange { order: [sum($expr41) DESC], dist: Single } + └─BatchSort { order: [sum($expr41) DESC] } + └─BatchHashAgg { group_key: [nation.n_name], aggs: [sum($expr41)] } └─BatchExchange { order: [], dist: HashShard(nation.n_name) } - └─BatchProject { exprs: [nation.n_name, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + └─BatchProject { exprs: [nation.n_name, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr41] } └─BatchLookupJoin { type: Inner, predicate: nation.n_regionkey = region.r_regionkey AND (region.r_name = 'MIDDLE EAST':Varchar), output: [lineitem.l_extendedprice, lineitem.l_discount, nation.n_name] } └─BatchExchange { order: [], dist: UpstreamHashShard(nation.n_regionkey) } └─BatchLookupJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, nation.n_regionkey] } @@ -867,10 +867,10 @@ └─BatchScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [n_name, revenue], pk_columns: [n_name], order_descs: [revenue, n_name] } - └─StreamProject { exprs: [nation.n_name, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─StreamHashAgg { group_key: [nation.n_name], aggs: [count, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + └─StreamProject { exprs: [nation.n_name, sum($expr64)] } + └─StreamHashAgg { group_key: [nation.n_name], aggs: [count, sum($expr64)] } └─StreamExchange { dist: HashShard(nation.n_name) } - └─StreamProject { exprs: [nation.n_name, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), customer.c_custkey, orders.o_orderkey, orders.o_custkey, supplier.s_suppkey, customer.c_nationkey, supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_suppkey, nation.n_nationkey, region.r_regionkey, nation.n_regionkey] } + └─StreamProject { exprs: [nation.n_name, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr64, customer.c_custkey, orders.o_orderkey, orders.o_custkey, supplier.s_suppkey, customer.c_nationkey, supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_suppkey, nation.n_nationkey, region.r_regionkey, nation.n_regionkey] } └─StreamHashJoin { type: Inner, predicate: nation.n_regionkey = region.r_regionkey, output: [lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, customer.c_custkey, orders.o_orderkey, orders.o_custkey, supplier.s_suppkey, customer.c_nationkey, supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_suppkey, nation.n_nationkey, nation.n_regionkey, region.r_regionkey] } ├─StreamExchange { dist: HashShard(nation.n_regionkey) } | └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, nation.n_regionkey, customer.c_custkey, orders.o_orderkey, orders.o_custkey, supplier.s_suppkey, customer.c_nationkey, supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_suppkey, nation.n_nationkey] } @@ -900,13 +900,13 @@ Fragment 0 StreamMaterialize { columns: [n_name, revenue], pk_columns: [n_name], order_descs: [revenue, n_name] } materialized table: 4294967294 - StreamProject { exprs: [nation.n_name, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - StreamHashAgg { group_key: [nation.n_name], aggs: [count, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + StreamProject { exprs: [nation.n_name, sum($expr64)] } + StreamHashAgg { group_key: [nation.n_name], aggs: [count, sum($expr64)] } result table: 0, state tables: [] StreamExchange Hash([0]) from 1 Fragment 1 - StreamProject { exprs: [nation.n_name, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), customer.c_custkey, orders.o_orderkey, orders.o_custkey, supplier.s_suppkey, customer.c_nationkey, supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_suppkey, nation.n_nationkey, region.r_regionkey, nation.n_regionkey] } + StreamProject { exprs: [nation.n_name, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr64, customer.c_custkey, orders.o_orderkey, orders.o_custkey, supplier.s_suppkey, customer.c_nationkey, supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_suppkey, nation.n_nationkey, region.r_regionkey, nation.n_regionkey] } StreamHashJoin { type: Inner, predicate: nation.n_regionkey = region.r_regionkey, output: [lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, customer.c_custkey, orders.o_orderkey, orders.o_custkey, supplier.s_suppkey, customer.c_nationkey, supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_suppkey, nation.n_nationkey, nation.n_regionkey, region.r_regionkey] } left table: 1, right table 3, left degree table: 2, right degree table: 4, StreamExchange Hash([3]) from 2 @@ -970,25 +970,25 @@ Upstream BatchPlanNode - Table 0 { columns: [nation_n_name, count, sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount)))], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } + Table 0 { columns: [nation_n_name, count, sum($expr64)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } Table 1 { columns: [lineitem_l_extendedprice, lineitem_l_discount, nation_n_name, nation_n_regionkey, customer_c_custkey, orders_o_orderkey, orders_o_custkey, supplier_s_suppkey, customer_c_nationkey, supplier_s_nationkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey], primary key: [$3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], distribution key: [3] } Table 2 { columns: [nation_n_regionkey, customer_c_custkey, orders_o_orderkey, orders_o_custkey, supplier_s_suppkey, customer_c_nationkey, supplier_s_nationkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC], value indices: [11], distribution key: [0] } - Table 3 { columns: [region_r_regionkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 4 { columns: [region_r_regionkey, region_r_regionkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 5 { columns: [supplier_s_nationkey, lineitem_l_extendedprice, lineitem_l_discount, customer_c_custkey, orders_o_orderkey, orders_o_custkey, supplier_s_suppkey, customer_c_nationkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_suppkey], primary key: [$0 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $0 ASC, $8 ASC, $9 ASC, $10 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], distribution key: [0] } - Table 6 { columns: [supplier_s_nationkey, customer_c_custkey, orders_o_orderkey, orders_o_custkey, supplier_s_suppkey, customer_c_nationkey, supplier_s_nationkey_0, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC], value indices: [10], distribution key: [0] } - Table 7 { columns: [nation_n_nationkey, nation_n_name, nation_n_regionkey], primary key: [$0 ASC, $0 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 8 { columns: [nation_n_nationkey, nation_n_nationkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 9 { columns: [orders_o_orderkey, supplier_s_suppkey, supplier_s_nationkey, customer_c_custkey, orders_o_custkey, customer_c_nationkey], primary key: [$0 ASC, $1 ASC, $3 ASC, $0 ASC, $4 ASC, $1 ASC, $5 ASC, $2 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [0, 1] } - Table 10 { columns: [orders_o_orderkey, supplier_s_suppkey, customer_c_custkey, orders_o_orderkey_0, orders_o_custkey, supplier_s_suppkey_0, customer_c_nationkey, supplier_s_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC], value indices: [8], distribution key: [0, 1] } - Table 11 { columns: [lineitem_l_orderkey, lineitem_l_suppkey, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_linenumber], primary key: [$0 ASC, $1 ASC, $0 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0, 1] } - Table 12 { columns: [lineitem_l_orderkey, lineitem_l_suppkey, lineitem_l_orderkey_0, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0, 1] } + Table 3 { columns: [region_r_regionkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 4 { columns: [region_r_regionkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 5 { columns: [supplier_s_nationkey, lineitem_l_extendedprice, lineitem_l_discount, customer_c_custkey, orders_o_orderkey, orders_o_custkey, supplier_s_suppkey, customer_c_nationkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_suppkey], primary key: [$0 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], distribution key: [0] } + Table 6 { columns: [supplier_s_nationkey, customer_c_custkey, orders_o_orderkey, orders_o_custkey, supplier_s_suppkey, customer_c_nationkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC], value indices: [9], distribution key: [0] } + Table 7 { columns: [nation_n_nationkey, nation_n_name, nation_n_regionkey], primary key: [$0 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 8 { columns: [nation_n_nationkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 9 { columns: [orders_o_orderkey, supplier_s_suppkey, supplier_s_nationkey, customer_c_custkey, orders_o_custkey, customer_c_nationkey], primary key: [$0 ASC, $1 ASC, $3 ASC, $4 ASC, $5 ASC, $2 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [0, 1] } + Table 10 { columns: [orders_o_orderkey, supplier_s_suppkey, customer_c_custkey, orders_o_custkey, customer_c_nationkey, supplier_s_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC], value indices: [6], distribution key: [0, 1] } + Table 11 { columns: [lineitem_l_orderkey, lineitem_l_suppkey, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_linenumber], primary key: [$0 ASC, $1 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0, 1] } + Table 12 { columns: [lineitem_l_orderkey, lineitem_l_suppkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0, 1] } Table 13 { columns: [customer_c_nationkey, orders_o_orderkey, customer_c_custkey, orders_o_custkey], primary key: [$0 ASC, $2 ASC, $1 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } Table 14 { columns: [customer_c_nationkey, customer_c_custkey, orders_o_orderkey, orders_o_custkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } Table 15 { columns: [supplier_s_suppkey, supplier_s_nationkey], primary key: [$1 ASC, $0 ASC], value indices: [0, 1], distribution key: [1] } Table 16 { columns: [supplier_s_nationkey, supplier_s_suppkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 17 { columns: [customer_c_custkey, customer_c_nationkey], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 18 { columns: [customer_c_custkey, customer_c_custkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 17 { columns: [customer_c_custkey, customer_c_nationkey], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 18 { columns: [customer_c_custkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 19 { columns: [orders_o_orderkey, orders_o_custkey], primary key: [$1 ASC, $0 ASC], value indices: [0, 1], distribution key: [1] } Table 20 { columns: [orders_o_custkey, orders_o_orderkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 4294967294 { columns: [n_name, revenue], primary key: [$1 DESC, $0 ASC], value indices: [0, 1], distribution key: [0] } @@ -1006,49 +1006,49 @@ and l_discount between 0.08 - 0.01 and 0.08 + 0.01 and l_quantity < 24; logical_plan: | - LogicalProject { exprs: [sum((lineitem.l_extendedprice * lineitem.l_discount))] } - └─LogicalAgg { aggs: [sum((lineitem.l_extendedprice * lineitem.l_discount))] } - └─LogicalProject { exprs: [(lineitem.l_extendedprice * lineitem.l_discount)] } + LogicalProject { exprs: [sum($expr1)] } + └─LogicalAgg { aggs: [sum($expr1)] } + └─LogicalProject { exprs: [(lineitem.l_extendedprice * lineitem.l_discount) as $expr1] } └─LogicalFilter { predicate: (lineitem.l_shipdate >= '1994-01-01':Date) AND (lineitem.l_shipdate < ('1994-01-01':Date + '1 year':Interval)) AND (lineitem.l_discount >= (0.08:Decimal - 0.01:Decimal)) AND (lineitem.l_discount <= (0.08:Decimal + 0.01:Decimal)) AND (lineitem.l_quantity < 24:Int32) } └─LogicalScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_linenumber, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_shipdate, lineitem.l_commitdate, lineitem.l_receiptdate, lineitem.l_shipinstruct, lineitem.l_shipmode, lineitem.l_comment] } optimized_logical_plan: | - LogicalAgg { aggs: [sum((lineitem.l_extendedprice * lineitem.l_discount))] } - └─LogicalProject { exprs: [(lineitem.l_extendedprice * lineitem.l_discount)] } + LogicalAgg { aggs: [sum($expr19)] } + └─LogicalProject { exprs: [(lineitem.l_extendedprice * lineitem.l_discount) as $expr19] } └─LogicalScan { table: lineitem, output_columns: [lineitem.l_extendedprice, lineitem.l_discount], required_columns: [lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_quantity, lineitem.l_shipdate], predicate: (lineitem.l_shipdate >= '1994-01-01':Date) AND (lineitem.l_shipdate < ('1994-01-01':Date + '1 year':Interval)) AND (lineitem.l_discount >= (0.08:Decimal - 0.01:Decimal)) AND (lineitem.l_discount <= (0.08:Decimal + 0.01:Decimal)) AND (lineitem.l_quantity < 24:Int32) } batch_plan: | - BatchSimpleAgg { aggs: [sum(sum((lineitem.l_extendedprice * lineitem.l_discount)))] } + BatchSimpleAgg { aggs: [sum(sum($expr42))] } └─BatchExchange { order: [], dist: Single } - └─BatchSimpleAgg { aggs: [sum((lineitem.l_extendedprice * lineitem.l_discount))] } - └─BatchProject { exprs: [(lineitem.l_extendedprice * lineitem.l_discount)] } + └─BatchSimpleAgg { aggs: [sum($expr42)] } + └─BatchProject { exprs: [(lineitem.l_extendedprice * lineitem.l_discount) as $expr42] } └─BatchFilter { predicate: (lineitem.l_shipdate >= '1994-01-01':Date) AND (lineitem.l_shipdate < ('1994-01-01':Date + '1 year':Interval)) AND (lineitem.l_discount >= (0.08:Decimal - 0.01:Decimal)) AND (lineitem.l_discount <= (0.08:Decimal + 0.01:Decimal)) AND (lineitem.l_quantity < 24:Int32) } └─BatchScan { table: lineitem, columns: [lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_quantity, lineitem.l_shipdate], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [revenue], pk_columns: [] } - └─StreamProject { exprs: [sum(sum((lineitem.l_extendedprice * lineitem.l_discount)))] } - └─StreamGlobalSimpleAgg { aggs: [count, sum(sum((lineitem.l_extendedprice * lineitem.l_discount)))] } + └─StreamProject { exprs: [sum(sum($expr66))] } + └─StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr66))] } └─StreamExchange { dist: Single } - └─StreamStatelessLocalSimpleAgg { aggs: [count, sum((lineitem.l_extendedprice * lineitem.l_discount))] } - └─StreamProject { exprs: [(lineitem.l_extendedprice * lineitem.l_discount), lineitem.l_orderkey, lineitem.l_linenumber] } + └─StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr66)] } + └─StreamProject { exprs: [(lineitem.l_extendedprice * lineitem.l_discount) as $expr66, lineitem.l_orderkey, lineitem.l_linenumber] } └─StreamFilter { predicate: (lineitem.l_shipdate >= '1994-01-01':Date) AND (lineitem.l_shipdate < ('1994-01-01':Date + '1 year':Interval)) AND (lineitem.l_discount >= (0.08:Decimal - 0.01:Decimal)) AND (lineitem.l_discount <= (0.08:Decimal + 0.01:Decimal)) AND (lineitem.l_quantity < 24:Int32) } └─StreamTableScan { table: lineitem, columns: [lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_quantity, lineitem.l_shipdate], pk: [lineitem.l_orderkey, lineitem.l_linenumber], dist: UpstreamHashShard(lineitem.l_orderkey, lineitem.l_linenumber) } stream_dist_plan: | Fragment 0 StreamMaterialize { columns: [revenue], pk_columns: [] } materialized table: 4294967294 - StreamProject { exprs: [sum(sum((lineitem.l_extendedprice * lineitem.l_discount)))] } - StreamGlobalSimpleAgg { aggs: [count, sum(sum((lineitem.l_extendedprice * lineitem.l_discount)))] } + StreamProject { exprs: [sum(sum($expr66))] } + StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr66))] } result table: 0, state tables: [] StreamExchange Single from 1 Fragment 1 - StreamStatelessLocalSimpleAgg { aggs: [count, sum((lineitem.l_extendedprice * lineitem.l_discount))] } - StreamProject { exprs: [(lineitem.l_extendedprice * lineitem.l_discount), lineitem.l_orderkey, lineitem.l_linenumber] } + StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr66)] } + StreamProject { exprs: [(lineitem.l_extendedprice * lineitem.l_discount) as $expr66, lineitem.l_orderkey, lineitem.l_linenumber] } StreamFilter { predicate: (lineitem.l_shipdate >= '1994-01-01':Date) AND (lineitem.l_shipdate < ('1994-01-01':Date + '1 year':Interval)) AND (lineitem.l_discount >= (0.08:Decimal - 0.01:Decimal)) AND (lineitem.l_discount <= (0.08:Decimal + 0.01:Decimal)) AND (lineitem.l_quantity < 24:Int32) } Chain { table: lineitem, columns: [lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_quantity, lineitem.l_shipdate], pk: [lineitem.l_orderkey, lineitem.l_linenumber], dist: UpstreamHashShard(lineitem.l_orderkey, lineitem.l_linenumber) } Upstream BatchPlanNode - Table 0 { columns: [count, sum(sum((lineitem_l_extendedprice * lineitem_l_discount)))], primary key: [], value indices: [0, 1], distribution key: [] } + Table 0 { columns: [count, sum(sum($expr66))], primary key: [], value indices: [0, 1], distribution key: [] } Table 4294967294 { columns: [revenue], primary key: [], value indices: [0], distribution key: [] } - id: tpch_q7 before: @@ -1094,10 +1094,10 @@ cust_nation, l_year; logical_plan: | - LogicalProject { exprs: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalAgg { group_key: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate)], aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalProject { exprs: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } - └─LogicalProject { exprs: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + LogicalProject { exprs: [nation.n_name, nation.n_name, $expr1, sum($expr2)] } + └─LogicalAgg { group_key: [nation.n_name, nation.n_name, $expr1], aggs: [sum($expr2)] } + └─LogicalProject { exprs: [nation.n_name, nation.n_name, $expr1, $expr2] } + └─LogicalProject { exprs: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate) as $expr1, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr2] } └─LogicalFilter { predicate: (supplier.s_suppkey = lineitem.l_suppkey) AND (orders.o_orderkey = lineitem.l_orderkey) AND (customer.c_custkey = orders.o_custkey) AND (supplier.s_nationkey = nation.n_nationkey) AND (customer.c_nationkey = nation.n_nationkey) AND (((nation.n_name = 'ROMANIA':Varchar) AND (nation.n_name = 'IRAN':Varchar)) OR ((nation.n_name = 'IRAN':Varchar) AND (nation.n_name = 'ROMANIA':Varchar))) AND (lineitem.l_shipdate >= '1983-01-01':Date) AND (lineitem.l_shipdate <= '2000-12-31':Date) } └─LogicalJoin { type: Inner, on: true, output: all } ├─LogicalJoin { type: Inner, on: true, output: all } @@ -1111,8 +1111,8 @@ | └─LogicalScan { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey, nation.n_comment] } └─LogicalScan { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey, nation.n_comment] } optimized_logical_plan: | - LogicalAgg { group_key: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate)], aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalProject { exprs: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + LogicalAgg { group_key: [nation.n_name, nation.n_name, $expr39], aggs: [sum($expr40)] } + └─LogicalProject { exprs: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate) as $expr39, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr40] } └─LogicalJoin { type: Inner, on: (customer.c_nationkey = nation.n_nationkey) AND (((nation.n_name = 'ROMANIA':Varchar) AND (nation.n_name = 'IRAN':Varchar)) OR ((nation.n_name = 'IRAN':Varchar) AND (nation.n_name = 'ROMANIA':Varchar))), output: [lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, nation.n_name, nation.n_name] } ├─LogicalJoin { type: Inner, on: (customer.c_custkey = orders.o_custkey), output: [lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, nation.n_name, customer.c_nationkey] } | ├─LogicalJoin { type: Inner, on: (orders.o_orderkey = lineitem.l_orderkey), output: [lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, nation.n_name, orders.o_custkey] } @@ -1125,11 +1125,11 @@ | └─LogicalScan { table: customer, columns: [customer.c_custkey, customer.c_nationkey] } └─LogicalScan { table: nation, columns: [nation.n_nationkey, nation.n_name] } batch_plan: | - BatchExchange { order: [nation.n_name ASC, nation.n_name ASC, Extract('YEAR':Varchar, lineitem.l_shipdate) ASC], dist: Single } - └─BatchSort { order: [nation.n_name ASC, nation.n_name ASC, Extract('YEAR':Varchar, lineitem.l_shipdate) ASC] } - └─BatchHashAgg { group_key: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate)], aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─BatchExchange { order: [], dist: HashShard(nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate)) } - └─BatchProject { exprs: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + BatchExchange { order: [nation.n_name ASC, nation.n_name ASC, $expr85 ASC], dist: Single } + └─BatchSort { order: [nation.n_name ASC, nation.n_name ASC, $expr85 ASC] } + └─BatchHashAgg { group_key: [nation.n_name, nation.n_name, $expr85], aggs: [sum($expr86)] } + └─BatchExchange { order: [], dist: HashShard(nation.n_name, nation.n_name, $expr85) } + └─BatchProject { exprs: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate) as $expr85, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr86] } └─BatchLookupJoin { type: Inner, predicate: customer.c_nationkey = nation.n_nationkey AND (((nation.n_name = 'ROMANIA':Varchar) AND (nation.n_name = 'IRAN':Varchar)) OR ((nation.n_name = 'IRAN':Varchar) AND (nation.n_name = 'ROMANIA':Varchar))), output: [lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, nation.n_name, nation.n_name] } └─BatchExchange { order: [], dist: UpstreamHashShard(customer.c_nationkey) } └─BatchLookupJoin { type: Inner, predicate: orders.o_custkey = customer.c_custkey, output: [lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, nation.n_name, customer.c_nationkey] } @@ -1146,10 +1146,10 @@ └─BatchScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [supp_nation, cust_nation, l_year, revenue], pk_columns: [supp_nation, cust_nation, l_year] } - └─StreamProject { exprs: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─StreamHashAgg { group_key: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate)], aggs: [count, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─StreamExchange { dist: HashShard(nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate)) } - └─StreamProject { exprs: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), supplier.s_suppkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, orders.o_orderkey, customer.c_custkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey] } + └─StreamProject { exprs: [nation.n_name, nation.n_name, $expr135, sum($expr136)] } + └─StreamHashAgg { group_key: [nation.n_name, nation.n_name, $expr135], aggs: [count, sum($expr136)] } + └─StreamExchange { dist: HashShard(nation.n_name, nation.n_name, $expr135) } + └─StreamProject { exprs: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate) as $expr135, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr136, supplier.s_suppkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, orders.o_orderkey, customer.c_custkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey] } └─StreamFilter { predicate: (((nation.n_name = 'ROMANIA':Varchar) AND (nation.n_name = 'IRAN':Varchar)) OR ((nation.n_name = 'IRAN':Varchar) AND (nation.n_name = 'ROMANIA':Varchar))) } └─StreamHashJoin { type: Inner, predicate: customer.c_nationkey = nation.n_nationkey, output: all } ├─StreamExchange { dist: HashShard(customer.c_nationkey) } @@ -1177,13 +1177,13 @@ Fragment 0 StreamMaterialize { columns: [supp_nation, cust_nation, l_year, revenue], pk_columns: [supp_nation, cust_nation, l_year] } materialized table: 4294967294 - StreamProject { exprs: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - StreamHashAgg { group_key: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate)], aggs: [count, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + StreamProject { exprs: [nation.n_name, nation.n_name, $expr135, sum($expr136)] } + StreamHashAgg { group_key: [nation.n_name, nation.n_name, $expr135], aggs: [count, sum($expr136)] } result table: 0, state tables: [] StreamExchange Hash([0, 1, 2]) from 1 Fragment 1 - StreamProject { exprs: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), supplier.s_suppkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, orders.o_orderkey, customer.c_custkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey] } + StreamProject { exprs: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate) as $expr135, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr136, supplier.s_suppkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, orders.o_orderkey, customer.c_custkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey] } StreamFilter { predicate: (((nation.n_name = 'ROMANIA':Varchar) AND (nation.n_name = 'IRAN':Varchar)) OR ((nation.n_name = 'IRAN':Varchar) AND (nation.n_name = 'ROMANIA':Varchar))) } StreamHashJoin { type: Inner, predicate: customer.c_nationkey = nation.n_nationkey, output: all } left table: 1, right table 3, left degree table: 2, right degree table: 4, @@ -1245,25 +1245,25 @@ Upstream BatchPlanNode - Table 0 { columns: [nation_n_name, nation_n_name_0, Extract('YEAR':Varchar, lineitem_l_shipdate), count, sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount)))], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3, 4], distribution key: [0, 1, 2] } + Table 0 { columns: [nation_n_name, nation_n_name_0, $expr135, count, sum($expr136)], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3, 4], distribution key: [0, 1, 2] } Table 1 { columns: [lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_shipdate, nation_n_name, customer_c_nationkey, supplier_s_suppkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey, orders_o_custkey, customer_c_custkey], primary key: [$4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $13 ASC, $12 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], distribution key: [4] } Table 2 { columns: [customer_c_nationkey, supplier_s_suppkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey, customer_c_custkey, orders_o_custkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC], value indices: [10], distribution key: [0] } - Table 3 { columns: [nation_n_nationkey, nation_n_name], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 4 { columns: [nation_n_nationkey, nation_n_nationkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 3 { columns: [nation_n_nationkey, nation_n_name], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 4 { columns: [nation_n_nationkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 5 { columns: [lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_shipdate, nation_n_name, orders_o_custkey, supplier_s_suppkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey], primary key: [$4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], distribution key: [4] } Table 6 { columns: [orders_o_custkey, supplier_s_suppkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC], value indices: [8], distribution key: [0] } - Table 7 { columns: [customer_c_custkey, customer_c_nationkey], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 8 { columns: [customer_c_custkey, customer_c_custkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 9 { columns: [lineitem_l_orderkey, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_shipdate, nation_n_name, supplier_s_suppkey, lineitem_l_linenumber, lineitem_l_suppkey, supplier_s_nationkey, nation_n_nationkey], primary key: [$0 ASC, $5 ASC, $0 ASC, $6 ASC, $7 ASC, $9 ASC, $8 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], distribution key: [0] } - Table 10 { columns: [lineitem_l_orderkey, supplier_s_suppkey, lineitem_l_orderkey_0, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [7], distribution key: [0] } - Table 11 { columns: [orders_o_orderkey, orders_o_custkey], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 12 { columns: [orders_o_orderkey, orders_o_orderkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 7 { columns: [customer_c_custkey, customer_c_nationkey], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 8 { columns: [customer_c_custkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 9 { columns: [lineitem_l_orderkey, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_shipdate, nation_n_name, supplier_s_suppkey, lineitem_l_linenumber, lineitem_l_suppkey, supplier_s_nationkey, nation_n_nationkey], primary key: [$0 ASC, $5 ASC, $6 ASC, $7 ASC, $9 ASC, $8 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], distribution key: [0] } + Table 10 { columns: [lineitem_l_orderkey, supplier_s_suppkey, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC], value indices: [6], distribution key: [0] } + Table 11 { columns: [orders_o_orderkey, orders_o_custkey], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 12 { columns: [orders_o_orderkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 13 { columns: [supplier_s_nationkey, lineitem_l_orderkey, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_shipdate, supplier_s_suppkey, lineitem_l_linenumber, lineitem_l_suppkey], primary key: [$0 ASC, $5 ASC, $1 ASC, $6 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7], distribution key: [0] } Table 14 { columns: [supplier_s_nationkey, supplier_s_suppkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC], value indices: [5], distribution key: [0] } - Table 15 { columns: [nation_n_nationkey, nation_n_name], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 16 { columns: [nation_n_nationkey, nation_n_nationkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 17 { columns: [supplier_s_suppkey, supplier_s_nationkey], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 18 { columns: [supplier_s_suppkey, supplier_s_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 15 { columns: [nation_n_nationkey, nation_n_name], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 16 { columns: [nation_n_nationkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 17 { columns: [supplier_s_suppkey, supplier_s_nationkey], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 18 { columns: [supplier_s_suppkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 19 { columns: [lineitem_l_orderkey, lineitem_l_suppkey, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_shipdate, lineitem_l_linenumber], primary key: [$1 ASC, $0 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [1] } Table 20 { columns: [lineitem_l_suppkey, lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } Table 4294967294 { columns: [supp_nation, cust_nation, l_year, revenue], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [0, 1, 2, 3], distribution key: [0, 1, 2] } @@ -1309,10 +1309,10 @@ order by o_year; logical_plan: | - LogicalProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate), RoundDigit((sum(Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)) / sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))), 6:Int32)] } - └─LogicalAgg { group_key: [Extract('YEAR':Varchar, orders.o_orderdate)], aggs: [sum(Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate), Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } - └─LogicalProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), nation.n_name] } + LogicalProject { exprs: [$expr1, RoundDigit((sum($expr3) / sum($expr2)), 6:Int32) as $expr4] } + └─LogicalAgg { group_key: [$expr1], aggs: [sum($expr3), sum($expr2)] } + └─LogicalProject { exprs: [$expr1, Case((nation.n_name = 'IRAN':Varchar), $expr2, 0:Int32::Decimal) as $expr3, $expr2] } + └─LogicalProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate) as $expr1, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr2, nation.n_name] } └─LogicalFilter { predicate: (part.p_partkey = lineitem.l_partkey) AND (supplier.s_suppkey = lineitem.l_suppkey) AND (lineitem.l_orderkey = orders.o_orderkey) AND (orders.o_custkey = customer.c_custkey) AND (customer.c_nationkey = nation.n_nationkey) AND (nation.n_regionkey = region.r_regionkey) AND (region.r_name = 'ASIA':Varchar) AND (supplier.s_nationkey = nation.n_nationkey) AND (orders.o_orderdate >= '1995-01-01':Date) AND (orders.o_orderdate <= '1996-12-31':Date) AND (part.p_type = 'PROMO ANODIZED STEEL':Varchar) } └─LogicalJoin { type: Inner, on: true, output: all } ├─LogicalJoin { type: Inner, on: true, output: all } @@ -1330,9 +1330,9 @@ | └─LogicalScan { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey, nation.n_comment] } └─LogicalScan { table: region, columns: [region.r_regionkey, region.r_name, region.r_comment] } optimized_logical_plan: | - LogicalProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate), RoundDigit((sum(Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)) / sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))), 6:Int32)] } - └─LogicalAgg { group_key: [Extract('YEAR':Varchar, orders.o_orderdate)], aggs: [sum(Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate), Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + LogicalProject { exprs: [$expr76, RoundDigit((sum($expr77) / sum($expr78)), 6:Int32) as $expr79] } + └─LogicalAgg { group_key: [$expr76], aggs: [sum($expr77), sum($expr78)] } + └─LogicalProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate) as $expr76, Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal) as $expr77, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr78] } └─LogicalJoin { type: Inner, on: (nation.n_regionkey = region.r_regionkey), output: [lineitem.l_extendedprice, lineitem.l_discount, orders.o_orderdate, nation.n_name] } ├─LogicalJoin { type: Inner, on: (customer.c_nationkey = nation.n_nationkey), output: [lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, orders.o_orderdate, nation.n_regionkey] } | ├─LogicalJoin { type: Inner, on: (orders.o_custkey = customer.c_custkey), output: [lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, orders.o_orderdate, customer.c_nationkey] } @@ -1349,12 +1349,12 @@ | └─LogicalScan { table: nation, columns: [nation.n_nationkey, nation.n_regionkey] } └─LogicalScan { table: region, output_columns: [region.r_regionkey], required_columns: [region.r_regionkey, region.r_name], predicate: (region.r_name = 'ASIA':Varchar) } batch_plan: | - BatchExchange { order: [Extract('YEAR':Varchar, orders.o_orderdate) ASC], dist: Single } - └─BatchProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate), RoundDigit((sum(Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)) / sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))), 6:Int32)] } - └─BatchSort { order: [Extract('YEAR':Varchar, orders.o_orderdate) ASC] } - └─BatchHashAgg { group_key: [Extract('YEAR':Varchar, orders.o_orderdate)], aggs: [sum(Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─BatchExchange { order: [], dist: HashShard(Extract('YEAR':Varchar, orders.o_orderdate)) } - └─BatchProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate), Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + BatchExchange { order: [$expr167 ASC], dist: Single } + └─BatchProject { exprs: [$expr167, RoundDigit((sum($expr168) / sum($expr169)), 6:Int32) as $expr170] } + └─BatchSort { order: [$expr167 ASC] } + └─BatchHashAgg { group_key: [$expr167], aggs: [sum($expr168), sum($expr169)] } + └─BatchExchange { order: [], dist: HashShard($expr167) } + └─BatchProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate) as $expr167, Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal) as $expr168, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr169] } └─BatchLookupJoin { type: Inner, predicate: nation.n_regionkey = region.r_regionkey AND (region.r_name = 'ASIA':Varchar), output: [lineitem.l_extendedprice, lineitem.l_discount, orders.o_orderdate, nation.n_name] } └─BatchExchange { order: [], dist: UpstreamHashShard(nation.n_regionkey) } └─BatchLookupJoin { type: Inner, predicate: customer.c_nationkey = nation.n_nationkey, output: [lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, orders.o_orderdate, nation.n_regionkey] } @@ -1376,10 +1376,10 @@ └─BatchScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [o_year, mkt_share], pk_columns: [o_year] } - └─StreamProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate), RoundDigit((sum(Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)) / sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))), 6:Int32)] } - └─StreamHashAgg { group_key: [Extract('YEAR':Varchar, orders.o_orderdate)], aggs: [count, sum(Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─StreamExchange { dist: HashShard(Extract('YEAR':Varchar, orders.o_orderdate)) } - └─StreamProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate), Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, orders.o_orderkey, customer.c_custkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, region.r_regionkey, nation.n_regionkey] } + └─StreamProject { exprs: [$expr262, RoundDigit((sum($expr263) / sum($expr264)), 6:Int32) as $expr266] } + └─StreamHashAgg { group_key: [$expr262], aggs: [count, sum($expr263), sum($expr264)] } + └─StreamExchange { dist: HashShard($expr262) } + └─StreamProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate) as $expr262, Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal) as $expr263, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr264, part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, orders.o_orderkey, customer.c_custkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, region.r_regionkey, nation.n_regionkey] } └─StreamHashJoin { type: Inner, predicate: nation.n_regionkey = region.r_regionkey, output: [lineitem.l_extendedprice, lineitem.l_discount, orders.o_orderdate, nation.n_name, part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, orders.o_orderkey, customer.c_custkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, nation.n_regionkey, region.r_regionkey] } ├─StreamExchange { dist: HashShard(nation.n_regionkey) } | └─StreamHashJoin { type: Inner, predicate: customer.c_nationkey = nation.n_nationkey, output: [lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, orders.o_orderdate, nation.n_regionkey, part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, orders.o_orderkey, customer.c_custkey, orders.o_custkey, customer.c_nationkey, nation.n_nationkey] } @@ -1418,13 +1418,13 @@ Fragment 0 StreamMaterialize { columns: [o_year, mkt_share], pk_columns: [o_year] } materialized table: 4294967294 - StreamProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate), RoundDigit((sum(Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)) / sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))), 6:Int32)] } - StreamHashAgg { group_key: [Extract('YEAR':Varchar, orders.o_orderdate)], aggs: [count, sum(Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + StreamProject { exprs: [$expr262, RoundDigit((sum($expr263) / sum($expr264)), 6:Int32) as $expr266] } + StreamHashAgg { group_key: [$expr262], aggs: [count, sum($expr263), sum($expr264)] } result table: 0, state tables: [] StreamExchange Hash([0]) from 1 Fragment 1 - StreamProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate), Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, orders.o_orderkey, customer.c_custkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, region.r_regionkey, nation.n_regionkey] } + StreamProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate) as $expr262, Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal) as $expr263, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr264, part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, orders.o_orderkey, customer.c_custkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, region.r_regionkey, nation.n_regionkey] } StreamHashJoin { type: Inner, predicate: nation.n_regionkey = region.r_regionkey, output: [lineitem.l_extendedprice, lineitem.l_discount, orders.o_orderdate, nation.n_name, part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, orders.o_orderkey, customer.c_custkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, nation.n_regionkey, region.r_regionkey] } left table: 1, right table 3, left degree table: 2, right degree table: 4, StreamExchange Hash([4]) from 2 @@ -1511,33 +1511,33 @@ Upstream BatchPlanNode - Table 0 { columns: [Extract('YEAR':Varchar, orders_o_orderdate), count, sum(Case((nation_n_name = 'IRAN':Varchar), (lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount)), 0:Int32::Decimal)), sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount)))], primary key: [$0 ASC], value indices: [1, 2, 3], distribution key: [0] } + Table 0 { columns: [$expr262, count, sum($expr263), sum($expr264)], primary key: [$0 ASC], value indices: [1, 2, 3], distribution key: [0] } Table 1 { columns: [lineitem_l_extendedprice, lineitem_l_discount, nation_n_name, orders_o_orderdate, nation_n_regionkey, part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey, customer_c_custkey, orders_o_custkey, customer_c_nationkey, nation_n_nationkey_0], primary key: [$4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $14 ASC, $15 ASC, $17 ASC, $16 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], distribution key: [4] } Table 2 { columns: [nation_n_regionkey, part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey, customer_c_custkey, orders_o_custkey, nation_n_nationkey_0, customer_c_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC], value indices: [14], distribution key: [0] } - Table 3 { columns: [region_r_regionkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 4 { columns: [region_r_regionkey, region_r_regionkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 3 { columns: [region_r_regionkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 4 { columns: [region_r_regionkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 5 { columns: [lineitem_l_extendedprice, lineitem_l_discount, nation_n_name, orders_o_orderdate, customer_c_nationkey, part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey, orders_o_custkey, customer_c_custkey], primary key: [$4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $15 ASC, $14 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], distribution key: [4] } Table 6 { columns: [customer_c_nationkey, part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey, customer_c_custkey, orders_o_custkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC], value indices: [12], distribution key: [0] } - Table 7 { columns: [nation_n_nationkey, nation_n_regionkey], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 8 { columns: [nation_n_nationkey, nation_n_nationkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 7 { columns: [nation_n_nationkey, nation_n_regionkey], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 8 { columns: [nation_n_nationkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 9 { columns: [lineitem_l_extendedprice, lineitem_l_discount, nation_n_name, orders_o_custkey, orders_o_orderdate, part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey], primary key: [$3 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], distribution key: [3] } Table 10 { columns: [orders_o_custkey, part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC], value indices: [10], distribution key: [0] } - Table 11 { columns: [customer_c_custkey, customer_c_nationkey], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 12 { columns: [customer_c_custkey, customer_c_custkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 13 { columns: [lineitem_l_orderkey, lineitem_l_extendedprice, lineitem_l_discount, nation_n_name, part_p_partkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, supplier_s_nationkey, nation_n_nationkey], primary key: [$0 ASC, $4 ASC, $0 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $10 ASC, $9 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], distribution key: [0] } - Table 14 { columns: [lineitem_l_orderkey, part_p_partkey, lineitem_l_orderkey_0, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC], value indices: [9], distribution key: [0] } - Table 15 { columns: [orders_o_orderkey, orders_o_custkey, orders_o_orderdate], primary key: [$0 ASC, $0 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 16 { columns: [orders_o_orderkey, orders_o_orderkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 11 { columns: [customer_c_custkey, customer_c_nationkey], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 12 { columns: [customer_c_custkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 13 { columns: [lineitem_l_orderkey, lineitem_l_extendedprice, lineitem_l_discount, nation_n_name, part_p_partkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, supplier_s_nationkey, nation_n_nationkey], primary key: [$0 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $10 ASC, $9 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], distribution key: [0] } + Table 14 { columns: [lineitem_l_orderkey, part_p_partkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC], value indices: [8], distribution key: [0] } + Table 15 { columns: [orders_o_orderkey, orders_o_custkey, orders_o_orderdate], primary key: [$0 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 16 { columns: [orders_o_orderkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 17 { columns: [lineitem_l_orderkey, lineitem_l_extendedprice, lineitem_l_discount, supplier_s_nationkey, part_p_partkey, lineitem_l_linenumber, lineitem_l_partkey, lineitem_l_suppkey, supplier_s_suppkey], primary key: [$3 ASC, $4 ASC, $0 ASC, $5 ASC, $6 ASC, $8 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8], distribution key: [3] } Table 18 { columns: [supplier_s_nationkey, part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [7], distribution key: [0] } - Table 19 { columns: [nation_n_nationkey, nation_n_name], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 20 { columns: [nation_n_nationkey, nation_n_nationkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 19 { columns: [nation_n_nationkey, nation_n_name], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 20 { columns: [nation_n_nationkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 21 { columns: [lineitem_l_orderkey, lineitem_l_suppkey, lineitem_l_extendedprice, lineitem_l_discount, part_p_partkey, lineitem_l_linenumber, lineitem_l_partkey], primary key: [$1 ASC, $4 ASC, $0 ASC, $5 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [1] } Table 22 { columns: [lineitem_l_suppkey, part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC], value indices: [5], distribution key: [0] } - Table 23 { columns: [supplier_s_suppkey, supplier_s_nationkey], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 24 { columns: [supplier_s_suppkey, supplier_s_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 25 { columns: [part_p_partkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 26 { columns: [part_p_partkey, part_p_partkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 23 { columns: [supplier_s_suppkey, supplier_s_nationkey], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 24 { columns: [supplier_s_suppkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 25 { columns: [part_p_partkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 26 { columns: [part_p_partkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 27 { columns: [lineitem_l_orderkey, lineitem_l_partkey, lineitem_l_suppkey, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_linenumber], primary key: [$1 ASC, $0 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [1] } Table 28 { columns: [lineitem_l_partkey, lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } Table 4294967294 { columns: [o_year, mkt_share], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } @@ -1578,10 +1578,10 @@ nation, o_year desc; logical_plan: | - LogicalProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate), RoundDigit(sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity))), 2:Int32)] } - └─LogicalAgg { group_key: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate)], aggs: [sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity)))] } - └─LogicalProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate), ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity))] } - └─LogicalProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate), ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity))] } + LogicalProject { exprs: [nation.n_name, $expr1, RoundDigit(sum($expr2), 2:Int32) as $expr3] } + └─LogicalAgg { group_key: [nation.n_name, $expr1], aggs: [sum($expr2)] } + └─LogicalProject { exprs: [nation.n_name, $expr1, $expr2] } + └─LogicalProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate) as $expr1, ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity)) as $expr2] } └─LogicalFilter { predicate: (supplier.s_suppkey = lineitem.l_suppkey) AND (partsupp.ps_suppkey = lineitem.l_suppkey) AND (partsupp.ps_partkey = lineitem.l_partkey) AND (part.p_partkey = lineitem.l_partkey) AND (orders.o_orderkey = lineitem.l_orderkey) AND (supplier.s_nationkey = nation.n_nationkey) AND Like(part.p_name, '%yellow%':Varchar) } └─LogicalJoin { type: Inner, on: true, output: all } ├─LogicalJoin { type: Inner, on: true, output: all } @@ -1595,9 +1595,9 @@ | └─LogicalScan { table: orders, columns: [orders.o_orderkey, orders.o_custkey, orders.o_orderstatus, orders.o_totalprice, orders.o_orderdate, orders.o_orderpriority, orders.o_clerk, orders.o_shippriority, orders.o_comment] } └─LogicalScan { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey, nation.n_comment] } optimized_logical_plan: | - LogicalProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate), RoundDigit(sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity))), 2:Int32)] } - └─LogicalAgg { group_key: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate)], aggs: [sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity)))] } - └─LogicalProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate), ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity))] } + LogicalProject { exprs: [nation.n_name, $expr57, RoundDigit(sum($expr58), 2:Int32) as $expr59] } + └─LogicalAgg { group_key: [nation.n_name, $expr57], aggs: [sum($expr58)] } + └─LogicalProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate) as $expr57, ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity)) as $expr58] } └─LogicalJoin { type: Inner, on: (orders.o_orderkey = lineitem.l_orderkey), output: [lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, partsupp.ps_supplycost, orders.o_orderdate, nation.n_name] } ├─LogicalJoin { type: Inner, on: (partsupp.ps_suppkey = lineitem.l_suppkey) AND (partsupp.ps_partkey = lineitem.l_partkey), output: [lineitem.l_orderkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, partsupp.ps_supplycost] } | ├─LogicalJoin { type: Inner, on: (supplier.s_nationkey = nation.n_nationkey), output: [lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, nation.n_name] } @@ -1610,12 +1610,12 @@ | └─LogicalScan { table: partsupp, columns: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_supplycost] } └─LogicalScan { table: orders, columns: [orders.o_orderkey, orders.o_orderdate] } batch_plan: | - BatchExchange { order: [nation.n_name ASC, Extract('YEAR':Varchar, orders.o_orderdate) DESC], dist: Single } - └─BatchProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate), RoundDigit(sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity))), 2:Int32)] } - └─BatchSort { order: [nation.n_name ASC, Extract('YEAR':Varchar, orders.o_orderdate) DESC] } - └─BatchHashAgg { group_key: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate)], aggs: [sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity)))] } - └─BatchExchange { order: [], dist: HashShard(nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate)) } - └─BatchProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate), ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity))] } + BatchExchange { order: [nation.n_name ASC, $expr125 DESC], dist: Single } + └─BatchProject { exprs: [nation.n_name, $expr125, RoundDigit(sum($expr126), 2:Int32) as $expr127] } + └─BatchSort { order: [nation.n_name ASC, $expr125 DESC] } + └─BatchHashAgg { group_key: [nation.n_name, $expr125], aggs: [sum($expr126)] } + └─BatchExchange { order: [], dist: HashShard(nation.n_name, $expr125) } + └─BatchProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate) as $expr125, ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity)) as $expr126] } └─BatchLookupJoin { type: Inner, predicate: lineitem.l_orderkey = orders.o_orderkey, output: [lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, partsupp.ps_supplycost, orders.o_orderdate, nation.n_name] } └─BatchExchange { order: [], dist: UpstreamHashShard(lineitem.l_orderkey) } └─BatchLookupJoin { type: Inner, predicate: lineitem.l_partkey = partsupp.ps_partkey AND lineitem.l_suppkey = partsupp.ps_suppkey, output: [lineitem.l_orderkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, partsupp.ps_supplycost] } @@ -1633,10 +1633,10 @@ └─BatchScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [nation, o_year, sum_profit], pk_columns: [nation, o_year] } - └─StreamProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate), RoundDigit(sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity))), 2:Int32)] } - └─StreamHashAgg { group_key: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate)], aggs: [count, sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity)))] } - └─StreamExchange { dist: HashShard(nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate)) } - └─StreamProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate), ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity)), part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, partsupp.ps_partkey, partsupp.ps_suppkey, orders.o_orderkey] } + └─StreamProject { exprs: [nation.n_name, $expr196, RoundDigit(sum($expr197), 2:Int32) as $expr199] } + └─StreamHashAgg { group_key: [nation.n_name, $expr196], aggs: [count, sum($expr197)] } + └─StreamExchange { dist: HashShard(nation.n_name, $expr196) } + └─StreamProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate) as $expr196, ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity)) as $expr197, part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, partsupp.ps_partkey, partsupp.ps_suppkey, orders.o_orderkey] } └─StreamHashJoin { type: Inner, predicate: lineitem.l_orderkey = orders.o_orderkey, output: [lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, partsupp.ps_supplycost, orders.o_orderdate, nation.n_name, part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, partsupp.ps_partkey, partsupp.ps_suppkey, orders.o_orderkey] } ├─StreamExchange { dist: HashShard(lineitem.l_orderkey) } | └─StreamHashJoin { type: Inner, predicate: lineitem.l_suppkey = partsupp.ps_suppkey AND lineitem.l_partkey = partsupp.ps_partkey, output: [lineitem.l_orderkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, partsupp.ps_supplycost, part.p_partkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, partsupp.ps_partkey, partsupp.ps_suppkey] } @@ -1664,13 +1664,13 @@ Fragment 0 StreamMaterialize { columns: [nation, o_year, sum_profit], pk_columns: [nation, o_year] } materialized table: 4294967294 - StreamProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate), RoundDigit(sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity))), 2:Int32)] } - StreamHashAgg { group_key: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate)], aggs: [count, sum(((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity)))] } + StreamProject { exprs: [nation.n_name, $expr196, RoundDigit(sum($expr197), 2:Int32) as $expr199] } + StreamHashAgg { group_key: [nation.n_name, $expr196], aggs: [count, sum($expr197)] } result table: 0, state tables: [] StreamExchange Hash([0, 1]) from 1 Fragment 1 - StreamProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate), ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity)), part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, partsupp.ps_partkey, partsupp.ps_suppkey, orders.o_orderkey] } + StreamProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate) as $expr196, ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity)) as $expr197, part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, partsupp.ps_partkey, partsupp.ps_suppkey, orders.o_orderkey] } StreamHashJoin { type: Inner, predicate: lineitem.l_orderkey = orders.o_orderkey, output: [lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, partsupp.ps_supplycost, orders.o_orderdate, nation.n_name, part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, partsupp.ps_partkey, partsupp.ps_suppkey, orders.o_orderkey] } left table: 1, right table 3, left degree table: 2, right degree table: 4, StreamExchange Hash([0]) from 2 @@ -1732,25 +1732,25 @@ Upstream BatchPlanNode - Table 0 { columns: [nation_n_name, Extract('YEAR':Varchar, orders_o_orderdate), count, sum(((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount)) - (partsupp_ps_supplycost * lineitem_l_quantity)))], primary key: [$0 ASC, $1 ASC], value indices: [2, 3], distribution key: [0, 1] } - Table 1 { columns: [lineitem_l_orderkey, lineitem_l_quantity, lineitem_l_extendedprice, lineitem_l_discount, nation_n_name, partsupp_ps_supplycost, part_p_partkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, partsupp_ps_partkey, partsupp_ps_suppkey], primary key: [$0 ASC, $6 ASC, $0 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $14 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], distribution key: [0] } - Table 2 { columns: [lineitem_l_orderkey, part_p_partkey, lineitem_l_orderkey_0, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, partsupp_ps_partkey, partsupp_ps_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC], value indices: [11], distribution key: [0] } - Table 3 { columns: [orders_o_orderkey, orders_o_orderdate], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 4 { columns: [orders_o_orderkey, orders_o_orderkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 5 { columns: [lineitem_l_orderkey, lineitem_l_partkey, lineitem_l_suppkey, lineitem_l_quantity, lineitem_l_extendedprice, lineitem_l_discount, nation_n_name, part_p_partkey, lineitem_l_linenumber, supplier_s_suppkey, supplier_s_nationkey, nation_n_nationkey], primary key: [$2 ASC, $1 ASC, $7 ASC, $0 ASC, $8 ASC, $1 ASC, $9 ASC, $2 ASC, $11 ASC, $10 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], distribution key: [1, 2] } - Table 6 { columns: [lineitem_l_suppkey, lineitem_l_partkey, part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey_0, supplier_s_suppkey, lineitem_l_suppkey_0, nation_n_nationkey, supplier_s_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC], value indices: [10], distribution key: [1, 0] } - Table 7 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, partsupp_ps_supplycost], primary key: [$1 ASC, $0 ASC, $0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0, 1] } - Table 8 { columns: [partsupp_ps_suppkey, partsupp_ps_partkey, partsupp_ps_partkey_0, partsupp_ps_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [1, 0] } + Table 0 { columns: [nation_n_name, $expr196, count, sum($expr197)], primary key: [$0 ASC, $1 ASC], value indices: [2, 3], distribution key: [0, 1] } + Table 1 { columns: [lineitem_l_orderkey, lineitem_l_quantity, lineitem_l_extendedprice, lineitem_l_discount, nation_n_name, partsupp_ps_supplycost, part_p_partkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, partsupp_ps_partkey, partsupp_ps_suppkey], primary key: [$0 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $14 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], distribution key: [0] } + Table 2 { columns: [lineitem_l_orderkey, part_p_partkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, partsupp_ps_partkey, partsupp_ps_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC], value indices: [10], distribution key: [0] } + Table 3 { columns: [orders_o_orderkey, orders_o_orderdate], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 4 { columns: [orders_o_orderkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 5 { columns: [lineitem_l_orderkey, lineitem_l_partkey, lineitem_l_suppkey, lineitem_l_quantity, lineitem_l_extendedprice, lineitem_l_discount, nation_n_name, part_p_partkey, lineitem_l_linenumber, supplier_s_suppkey, supplier_s_nationkey, nation_n_nationkey], primary key: [$2 ASC, $1 ASC, $7 ASC, $0 ASC, $8 ASC, $9 ASC, $11 ASC, $10 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], distribution key: [1, 2] } + Table 6 { columns: [lineitem_l_suppkey, lineitem_l_partkey, part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, supplier_s_suppkey, nation_n_nationkey, supplier_s_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC], value indices: [8], distribution key: [1, 0] } + Table 7 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, partsupp_ps_supplycost], primary key: [$1 ASC, $0 ASC], value indices: [0, 1, 2], distribution key: [0, 1] } + Table 8 { columns: [partsupp_ps_suppkey, partsupp_ps_partkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [1, 0] } Table 9 { columns: [lineitem_l_orderkey, lineitem_l_partkey, lineitem_l_suppkey, lineitem_l_quantity, lineitem_l_extendedprice, lineitem_l_discount, supplier_s_nationkey, part_p_partkey, lineitem_l_linenumber, supplier_s_suppkey], primary key: [$6 ASC, $7 ASC, $0 ASC, $8 ASC, $1 ASC, $9 ASC, $2 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], distribution key: [6] } Table 10 { columns: [supplier_s_nationkey, part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [7], distribution key: [0] } - Table 11 { columns: [nation_n_nationkey, nation_n_name], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 12 { columns: [nation_n_nationkey, nation_n_nationkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 11 { columns: [nation_n_nationkey, nation_n_name], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 12 { columns: [nation_n_nationkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 13 { columns: [lineitem_l_orderkey, lineitem_l_partkey, lineitem_l_suppkey, lineitem_l_quantity, lineitem_l_extendedprice, lineitem_l_discount, part_p_partkey, lineitem_l_linenumber], primary key: [$2 ASC, $6 ASC, $0 ASC, $7 ASC, $1 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7], distribution key: [2] } Table 14 { columns: [lineitem_l_suppkey, part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC], value indices: [5], distribution key: [0] } - Table 15 { columns: [supplier_s_suppkey, supplier_s_nationkey], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 16 { columns: [supplier_s_suppkey, supplier_s_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 17 { columns: [part_p_partkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 18 { columns: [part_p_partkey, part_p_partkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 15 { columns: [supplier_s_suppkey, supplier_s_nationkey], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 16 { columns: [supplier_s_suppkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 17 { columns: [part_p_partkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 18 { columns: [part_p_partkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 19 { columns: [lineitem_l_orderkey, lineitem_l_partkey, lineitem_l_suppkey, lineitem_l_quantity, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_linenumber], primary key: [$1 ASC, $0 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [1] } Table 20 { columns: [lineitem_l_partkey, lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } Table 4294967294 { columns: [nation, o_year, sum_profit], primary key: [$0 ASC, $1 DESC], value indices: [0, 1, 2], distribution key: [0, 1] } @@ -1791,10 +1791,10 @@ revenue desc limit 20; logical_plan: | - LogicalTopN { order: "[sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))) DESC]", limit: 20, offset: 0 } - └─LogicalProject { exprs: [customer.c_custkey, customer.c_name, sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } - └─LogicalAgg { group_key: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment], aggs: [sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount)))] } - └─LogicalProject { exprs: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment, (lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))] } + LogicalTopN { order: "[sum($expr1) DESC]", limit: 20, offset: 0 } + └─LogicalProject { exprs: [customer.c_custkey, customer.c_name, sum($expr1), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } + └─LogicalAgg { group_key: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment], aggs: [sum($expr1)] } + └─LogicalProject { exprs: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment, (lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount)) as $expr1] } └─LogicalFilter { predicate: (customer.c_custkey = orders.o_custkey) AND (lineitem.l_orderkey = orders.o_orderkey) AND (orders.o_orderdate >= '1994-01-01':Date) AND (orders.o_orderdate < ('1994-01-01':Date + '3 mons':Interval)) AND (lineitem.l_returnflag = 'R':Varchar) AND (customer.c_nationkey = nation.n_nationkey) } └─LogicalJoin { type: Inner, on: true, output: all } ├─LogicalJoin { type: Inner, on: true, output: all } @@ -1804,10 +1804,10 @@ | └─LogicalScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_linenumber, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_shipdate, lineitem.l_commitdate, lineitem.l_receiptdate, lineitem.l_shipinstruct, lineitem.l_shipmode, lineitem.l_comment] } └─LogicalScan { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey, nation.n_comment] } optimized_logical_plan: | - LogicalTopN { order: "[sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))) DESC]", limit: 20, offset: 0 } - └─LogicalProject { exprs: [customer.c_custkey, customer.c_name, sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } - └─LogicalAgg { group_key: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment], aggs: [sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount)))] } - └─LogicalProject { exprs: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment, (lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))] } + LogicalTopN { order: "[sum($expr19) DESC]", limit: 20, offset: 0 } + └─LogicalProject { exprs: [customer.c_custkey, customer.c_name, sum($expr19), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } + └─LogicalAgg { group_key: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment], aggs: [sum($expr19)] } + └─LogicalProject { exprs: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment, (lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount)) as $expr19] } └─LogicalJoin { type: Inner, on: (lineitem.l_orderkey = orders.o_orderkey), output: [customer.c_custkey, customer.c_name, customer.c_address, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount, nation.n_name] } ├─LogicalJoin { type: Inner, on: (customer.c_nationkey = nation.n_nationkey), output: [customer.c_custkey, customer.c_name, customer.c_address, customer.c_phone, customer.c_acctbal, customer.c_comment, orders.o_orderkey, nation.n_name] } | ├─LogicalJoin { type: Inner, on: (customer.c_custkey = orders.o_custkey), output: [customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_comment, orders.o_orderkey] } @@ -1816,13 +1816,13 @@ | └─LogicalScan { table: nation, columns: [nation.n_nationkey, nation.n_name] } └─LogicalScan { table: lineitem, output_columns: [lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount], required_columns: [lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_returnflag], predicate: (lineitem.l_returnflag = 'R':Varchar) } batch_plan: | - BatchTopN { order: "[sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))) DESC]", limit: 20, offset: 0 } + BatchTopN { order: "[sum($expr41) DESC]", limit: 20, offset: 0 } └─BatchExchange { order: [], dist: Single } - └─BatchTopN { order: "[sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))) DESC]", limit: 20, offset: 0 } - └─BatchProject { exprs: [customer.c_custkey, customer.c_name, sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } - └─BatchHashAgg { group_key: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment], aggs: [sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount)))] } + └─BatchTopN { order: "[sum($expr41) DESC]", limit: 20, offset: 0 } + └─BatchProject { exprs: [customer.c_custkey, customer.c_name, sum($expr41), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } + └─BatchHashAgg { group_key: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment], aggs: [sum($expr41)] } └─BatchExchange { order: [], dist: HashShard(customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment) } - └─BatchProject { exprs: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment, (lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))] } + └─BatchProject { exprs: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment, (lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount)) as $expr41] } └─BatchHashJoin { type: Inner, predicate: orders.o_orderkey = lineitem.l_orderkey, output: [customer.c_custkey, customer.c_name, customer.c_address, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount, nation.n_name] } ├─BatchExchange { order: [], dist: HashShard(orders.o_orderkey) } | └─BatchLookupJoin { type: Inner, predicate: customer.c_nationkey = nation.n_nationkey, output: [customer.c_custkey, customer.c_name, customer.c_address, customer.c_phone, customer.c_acctbal, customer.c_comment, orders.o_orderkey, nation.n_name] } @@ -1840,15 +1840,15 @@ └─BatchScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_returnflag], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [c_custkey, c_name, revenue, c_acctbal, n_name, c_address, c_phone, c_comment], pk_columns: [c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment], order_descs: [revenue, c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment] } - └─StreamProject { exprs: [customer.c_custkey, customer.c_name, sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } - └─StreamTopN { order: "[sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))) DESC]", limit: 20, offset: 0 } + └─StreamProject { exprs: [customer.c_custkey, customer.c_name, sum($expr64), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } + └─StreamTopN { order: "[sum($expr64) DESC]", limit: 20, offset: 0 } └─StreamExchange { dist: Single } - └─StreamGroupTopN { order: "[sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))) DESC]", limit: 20, offset: 0, group_key: [8] } - └─StreamProject { exprs: [customer.c_custkey, customer.c_name, sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment, Vnode(customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment)] } - └─StreamProject { exprs: [customer.c_custkey, customer.c_name, sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } - └─StreamHashAgg { group_key: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment], aggs: [count, sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount)))] } + └─StreamGroupTopN { order: "[sum($expr64) DESC]", limit: 20, offset: 0, group_key: [8] } + └─StreamProject { exprs: [customer.c_custkey, customer.c_name, sum($expr64), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment, Vnode(customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment) as $expr65] } + └─StreamProject { exprs: [customer.c_custkey, customer.c_name, sum($expr64), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } + └─StreamHashAgg { group_key: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment], aggs: [count, sum($expr64)] } └─StreamExchange { dist: HashShard(customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment) } - └─StreamProject { exprs: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment, (lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount)), orders.o_orderkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, lineitem.l_orderkey, lineitem.l_linenumber] } + └─StreamProject { exprs: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment, (lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount)) as $expr64, orders.o_orderkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, lineitem.l_orderkey, lineitem.l_linenumber] } └─StreamHashJoin { type: Inner, predicate: orders.o_orderkey = lineitem.l_orderkey, output: [customer.c_custkey, customer.c_name, customer.c_address, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, orders.o_orderkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, lineitem.l_orderkey, lineitem.l_linenumber] } ├─StreamExchange { dist: HashShard(orders.o_orderkey) } | └─StreamHashJoin { type: Inner, predicate: customer.c_nationkey = nation.n_nationkey, output: [customer.c_custkey, customer.c_name, customer.c_address, customer.c_phone, customer.c_acctbal, customer.c_comment, orders.o_orderkey, nation.n_name, orders.o_custkey, customer.c_nationkey, nation.n_nationkey] } @@ -1870,22 +1870,22 @@ Fragment 0 StreamMaterialize { columns: [c_custkey, c_name, revenue, c_acctbal, n_name, c_address, c_phone, c_comment], pk_columns: [c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment], order_descs: [revenue, c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment] } materialized table: 4294967294 - StreamProject { exprs: [customer.c_custkey, customer.c_name, sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } - StreamTopN { order: "[sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))) DESC]", limit: 20, offset: 0 } + StreamProject { exprs: [customer.c_custkey, customer.c_name, sum($expr64), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } + StreamTopN { order: "[sum($expr64) DESC]", limit: 20, offset: 0 } state table: 0 StreamExchange Single from 1 Fragment 1 - StreamGroupTopN { order: "[sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))) DESC]", limit: 20, offset: 0, group_key: [8] } + StreamGroupTopN { order: "[sum($expr64) DESC]", limit: 20, offset: 0, group_key: [8] } state table: 1 - StreamProject { exprs: [customer.c_custkey, customer.c_name, sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment, Vnode(customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment)] } - StreamProject { exprs: [customer.c_custkey, customer.c_name, sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount))), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } - StreamHashAgg { group_key: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment], aggs: [count, sum((lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount)))] } + StreamProject { exprs: [customer.c_custkey, customer.c_name, sum($expr64), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment, Vnode(customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment) as $expr65] } + StreamProject { exprs: [customer.c_custkey, customer.c_name, sum($expr64), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } + StreamHashAgg { group_key: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment], aggs: [count, sum($expr64)] } result table: 2, state tables: [] StreamExchange Hash([0, 1, 2, 3, 4, 5, 6]) from 2 Fragment 2 - StreamProject { exprs: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment, (lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount)), orders.o_orderkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, lineitem.l_orderkey, lineitem.l_linenumber] } + StreamProject { exprs: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment, (lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount)) as $expr64, orders.o_orderkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, lineitem.l_orderkey, lineitem.l_linenumber] } StreamHashJoin { type: Inner, predicate: orders.o_orderkey = lineitem.l_orderkey, output: [customer.c_custkey, customer.c_name, customer.c_address, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, orders.o_orderkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, lineitem.l_orderkey, lineitem.l_linenumber] } left table: 3, right table 5, left degree table: 4, right degree table: 6, StreamExchange Hash([6]) from 3 @@ -1927,19 +1927,19 @@ Upstream BatchPlanNode - Table 0 { columns: [customer_c_custkey, customer_c_name, sum((lineitem_l_extendedprice * (1_00:Decimal - lineitem_l_discount))), customer_c_acctbal, nation_n_name, customer_c_address, customer_c_phone, customer_c_comment, Vnode(customer_c_custkey, customer_c_name, customer_c_acctbal, customer_c_phone, nation_n_name, customer_c_address, customer_c_comment)], primary key: [$2 DESC, $0 ASC, $1 ASC, $3 ASC, $6 ASC, $4 ASC, $5 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8], distribution key: [] } - Table 1 { columns: [customer_c_custkey, customer_c_name, sum((lineitem_l_extendedprice * (1_00:Decimal - lineitem_l_discount))), customer_c_acctbal, nation_n_name, customer_c_address, customer_c_phone, customer_c_comment, Vnode(customer_c_custkey, customer_c_name, customer_c_acctbal, customer_c_phone, nation_n_name, customer_c_address, customer_c_comment)], primary key: [$8 ASC, $2 DESC, $0 ASC, $1 ASC, $3 ASC, $6 ASC, $4 ASC, $5 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8], distribution key: [0, 1, 3, 6, 4, 5, 7], vnode column idx: 8 } - Table 2 { columns: [customer_c_custkey, customer_c_name, customer_c_acctbal, customer_c_phone, nation_n_name, customer_c_address, customer_c_comment, count, sum((lineitem_l_extendedprice * (1_00:Decimal - lineitem_l_discount)))], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [7, 8], distribution key: [0, 1, 2, 3, 4, 5, 6] } - Table 3 { columns: [customer_c_custkey, customer_c_name, customer_c_address, customer_c_phone, customer_c_acctbal, customer_c_comment, orders_o_orderkey, nation_n_name, orders_o_custkey, customer_c_nationkey, nation_n_nationkey], primary key: [$6 ASC, $0 ASC, $6 ASC, $8 ASC, $10 ASC, $9 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], distribution key: [6] } - Table 4 { columns: [orders_o_orderkey, customer_c_custkey, orders_o_orderkey_0, orders_o_custkey, nation_n_nationkey, customer_c_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC], value indices: [6], distribution key: [0] } - Table 5 { columns: [lineitem_l_orderkey, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_linenumber], primary key: [$0 ASC, $0 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } - Table 6 { columns: [lineitem_l_orderkey, lineitem_l_orderkey_0, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } + Table 0 { columns: [customer_c_custkey, customer_c_name, sum($expr64), customer_c_acctbal, nation_n_name, customer_c_address, customer_c_phone, customer_c_comment, $expr65], primary key: [$2 DESC, $0 ASC, $1 ASC, $3 ASC, $6 ASC, $4 ASC, $5 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8], distribution key: [] } + Table 1 { columns: [customer_c_custkey, customer_c_name, sum($expr64), customer_c_acctbal, nation_n_name, customer_c_address, customer_c_phone, customer_c_comment, $expr65], primary key: [$8 ASC, $2 DESC, $0 ASC, $1 ASC, $3 ASC, $6 ASC, $4 ASC, $5 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8], distribution key: [0, 1, 3, 6, 4, 5, 7], vnode column idx: 8 } + Table 2 { columns: [customer_c_custkey, customer_c_name, customer_c_acctbal, customer_c_phone, nation_n_name, customer_c_address, customer_c_comment, count, sum($expr64)], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [7, 8], distribution key: [0, 1, 2, 3, 4, 5, 6] } + Table 3 { columns: [customer_c_custkey, customer_c_name, customer_c_address, customer_c_phone, customer_c_acctbal, customer_c_comment, orders_o_orderkey, nation_n_name, orders_o_custkey, customer_c_nationkey, nation_n_nationkey], primary key: [$6 ASC, $0 ASC, $8 ASC, $10 ASC, $9 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], distribution key: [6] } + Table 4 { columns: [orders_o_orderkey, customer_c_custkey, orders_o_custkey, nation_n_nationkey, customer_c_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC], value indices: [5], distribution key: [0] } + Table 5 { columns: [lineitem_l_orderkey, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_linenumber], primary key: [$0 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } + Table 6 { columns: [lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 7 { columns: [customer_c_custkey, customer_c_name, customer_c_address, customer_c_nationkey, customer_c_phone, customer_c_acctbal, customer_c_comment, orders_o_orderkey, orders_o_custkey], primary key: [$3 ASC, $0 ASC, $7 ASC, $8 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8], distribution key: [3] } Table 8 { columns: [customer_c_nationkey, customer_c_custkey, orders_o_orderkey, orders_o_custkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } - Table 9 { columns: [nation_n_nationkey, nation_n_name], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 10 { columns: [nation_n_nationkey, nation_n_nationkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 11 { columns: [customer_c_custkey, customer_c_name, customer_c_address, customer_c_nationkey, customer_c_phone, customer_c_acctbal, customer_c_comment], primary key: [$0 ASC, $0 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [0] } - Table 12 { columns: [customer_c_custkey, customer_c_custkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 9 { columns: [nation_n_nationkey, nation_n_name], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 10 { columns: [nation_n_nationkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 11 { columns: [customer_c_custkey, customer_c_name, customer_c_address, customer_c_nationkey, customer_c_phone, customer_c_acctbal, customer_c_comment], primary key: [$0 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [0] } + Table 12 { columns: [customer_c_custkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 13 { columns: [orders_o_orderkey, orders_o_custkey], primary key: [$1 ASC, $0 ASC], value indices: [0, 1], distribution key: [1] } Table 14 { columns: [orders_o_custkey, orders_o_orderkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 4294967294 { columns: [c_custkey, c_name, revenue, c_acctbal, n_name, c_address, c_phone, c_comment], primary key: [$2 DESC, $0 ASC, $1 ASC, $3 ASC, $6 ASC, $4 ASC, $5 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7], distribution key: [] } @@ -1976,20 +1976,20 @@ order by value desc; logical_plan: | - LogicalProject { exprs: [partsupp.ps_partkey, sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } - └─LogicalFilter { predicate: (sum((partsupp.ps_supplycost * partsupp.ps_availqty)) > (sum((partsupp.ps_supplycost * partsupp.ps_availqty)) * 0.0001000000:Decimal)) } + LogicalProject { exprs: [partsupp.ps_partkey, sum($expr1)] } + └─LogicalFilter { predicate: (sum($expr1) > $expr3) } └─LogicalApply { type: LeftOuter, on: true, correlated_id: 1, max_one_row: true } - ├─LogicalAgg { group_key: [partsupp.ps_partkey], aggs: [sum((partsupp.ps_supplycost * partsupp.ps_availqty)), sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } - | └─LogicalProject { exprs: [partsupp.ps_partkey, (partsupp.ps_supplycost * partsupp.ps_availqty)] } + ├─LogicalAgg { group_key: [partsupp.ps_partkey], aggs: [sum($expr1), sum($expr1)] } + | └─LogicalProject { exprs: [partsupp.ps_partkey, (partsupp.ps_supplycost * partsupp.ps_availqty) as $expr1] } | └─LogicalFilter { predicate: (partsupp.ps_suppkey = supplier.s_suppkey) AND (supplier.s_nationkey = nation.n_nationkey) AND (nation.n_name = 'ARGENTINA':Varchar) } | └─LogicalJoin { type: Inner, on: true, output: all } | ├─LogicalJoin { type: Inner, on: true, output: all } | | ├─LogicalScan { table: partsupp, columns: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty, partsupp.ps_supplycost, partsupp.ps_comment] } | | └─LogicalScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_acctbal, supplier.s_comment] } | └─LogicalScan { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey, nation.n_comment] } - └─LogicalProject { exprs: [(sum((partsupp.ps_supplycost * partsupp.ps_availqty)) * 0.0001000000:Decimal)] } - └─LogicalAgg { aggs: [sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } - └─LogicalProject { exprs: [(partsupp.ps_supplycost * partsupp.ps_availqty)] } + └─LogicalProject { exprs: [(sum($expr2) * 0.0001000000:Decimal) as $expr3] } + └─LogicalAgg { aggs: [sum($expr2)] } + └─LogicalProject { exprs: [(partsupp.ps_supplycost * partsupp.ps_availqty) as $expr2] } └─LogicalFilter { predicate: (partsupp.ps_suppkey = supplier.s_suppkey) AND (supplier.s_nationkey = nation.n_nationkey) AND (nation.n_name = 'ARGENTINA':Varchar) } └─LogicalJoin { type: Inner, on: true, output: all } ├─LogicalJoin { type: Inner, on: true, output: all } @@ -1997,39 +1997,39 @@ | └─LogicalScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_acctbal, supplier.s_comment] } └─LogicalScan { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey, nation.n_comment] } optimized_logical_plan: | - LogicalJoin { type: Inner, on: (sum((partsupp.ps_supplycost * partsupp.ps_availqty)) > (sum((partsupp.ps_supplycost * partsupp.ps_availqty)) * 0.0001000000:Decimal)), output: [partsupp.ps_partkey, sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } - ├─LogicalAgg { group_key: [partsupp.ps_partkey], aggs: [sum((partsupp.ps_supplycost * partsupp.ps_availqty)), sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } - | └─LogicalProject { exprs: [partsupp.ps_partkey, (partsupp.ps_supplycost * partsupp.ps_availqty)] } + LogicalJoin { type: Inner, on: (sum($expr55) > $expr57), output: [partsupp.ps_partkey, sum($expr55)] } + ├─LogicalAgg { group_key: [partsupp.ps_partkey], aggs: [sum($expr55), sum($expr55)] } + | └─LogicalProject { exprs: [partsupp.ps_partkey, (partsupp.ps_supplycost * partsupp.ps_availqty) as $expr55] } | └─LogicalJoin { type: Inner, on: (supplier.s_nationkey = nation.n_nationkey), output: [partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost] } | ├─LogicalJoin { type: Inner, on: (partsupp.ps_suppkey = supplier.s_suppkey), output: [partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey] } | | ├─LogicalScan { table: partsupp, columns: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty, partsupp.ps_supplycost] } | | └─LogicalScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_nationkey] } | └─LogicalScan { table: nation, output_columns: [nation.n_nationkey], required_columns: [nation.n_nationkey, nation.n_name], predicate: (nation.n_name = 'ARGENTINA':Varchar) } - └─LogicalProject { exprs: [(sum((partsupp.ps_supplycost * partsupp.ps_availqty)) * 0.0001000000:Decimal)] } - └─LogicalAgg { aggs: [sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } - └─LogicalProject { exprs: [(partsupp.ps_supplycost * partsupp.ps_availqty)] } + └─LogicalProject { exprs: [(sum($expr56) * 0.0001000000:Decimal) as $expr57] } + └─LogicalAgg { aggs: [sum($expr56)] } + └─LogicalProject { exprs: [(partsupp.ps_supplycost * partsupp.ps_availqty) as $expr56] } └─LogicalJoin { type: Inner, on: (supplier.s_nationkey = nation.n_nationkey), output: [partsupp.ps_availqty, partsupp.ps_supplycost] } ├─LogicalJoin { type: Inner, on: (partsupp.ps_suppkey = supplier.s_suppkey), output: [partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey] } | ├─LogicalScan { table: partsupp, columns: [partsupp.ps_suppkey, partsupp.ps_availqty, partsupp.ps_supplycost] } | └─LogicalScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_nationkey] } └─LogicalScan { table: nation, output_columns: [nation.n_nationkey], required_columns: [nation.n_nationkey, nation.n_name], predicate: (nation.n_name = 'ARGENTINA':Varchar) } batch_plan: | - BatchSort { order: [sum((partsupp.ps_supplycost * partsupp.ps_availqty)) DESC] } - └─BatchNestedLoopJoin { type: Inner, predicate: (sum((partsupp.ps_supplycost * partsupp.ps_availqty)) > (sum(sum((partsupp.ps_supplycost * partsupp.ps_availqty))) * 0.0001000000:Decimal)), output: [partsupp.ps_partkey, sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } + BatchSort { order: [sum($expr121) DESC] } + └─BatchNestedLoopJoin { type: Inner, predicate: (sum($expr121) > $expr123), output: [partsupp.ps_partkey, sum($expr121)] } ├─BatchExchange { order: [], dist: Single } - | └─BatchHashAgg { group_key: [partsupp.ps_partkey], aggs: [sum((partsupp.ps_supplycost * partsupp.ps_availqty)), sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } + | └─BatchHashAgg { group_key: [partsupp.ps_partkey], aggs: [sum($expr121), sum($expr121)] } | └─BatchExchange { order: [], dist: HashShard(partsupp.ps_partkey) } - | └─BatchProject { exprs: [partsupp.ps_partkey, (partsupp.ps_supplycost * partsupp.ps_availqty)] } + | └─BatchProject { exprs: [partsupp.ps_partkey, (partsupp.ps_supplycost * partsupp.ps_availqty) as $expr121] } | └─BatchLookupJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey AND (nation.n_name = 'ARGENTINA':Varchar), output: [partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost] } | └─BatchExchange { order: [], dist: UpstreamHashShard(supplier.s_nationkey) } | └─BatchLookupJoin { type: Inner, predicate: partsupp.ps_suppkey = supplier.s_suppkey, output: [partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey] } | └─BatchExchange { order: [], dist: UpstreamHashShard(partsupp.ps_suppkey) } | └─BatchScan { table: partsupp, columns: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty, partsupp.ps_supplycost], distribution: UpstreamHashShard(partsupp.ps_partkey, partsupp.ps_suppkey) } - └─BatchProject { exprs: [(sum(sum((partsupp.ps_supplycost * partsupp.ps_availqty))) * 0.0001000000:Decimal)] } - └─BatchSimpleAgg { aggs: [sum(sum((partsupp.ps_supplycost * partsupp.ps_availqty)))] } + └─BatchProject { exprs: [(sum(sum($expr122)) * 0.0001000000:Decimal) as $expr123] } + └─BatchSimpleAgg { aggs: [sum(sum($expr122))] } └─BatchExchange { order: [], dist: Single } - └─BatchSimpleAgg { aggs: [sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } - └─BatchProject { exprs: [(partsupp.ps_supplycost * partsupp.ps_availqty)] } + └─BatchSimpleAgg { aggs: [sum($expr122)] } + └─BatchProject { exprs: [(partsupp.ps_supplycost * partsupp.ps_availqty) as $expr122] } └─BatchLookupJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey AND (nation.n_name = 'ARGENTINA':Varchar), output: [partsupp.ps_availqty, partsupp.ps_supplycost] } └─BatchExchange { order: [], dist: UpstreamHashShard(supplier.s_nationkey) } └─BatchLookupJoin { type: Inner, predicate: partsupp.ps_suppkey = supplier.s_suppkey, output: [partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey] } @@ -2037,12 +2037,12 @@ └─BatchScan { table: partsupp, columns: [partsupp.ps_suppkey, partsupp.ps_availqty, partsupp.ps_supplycost], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [ps_partkey, value], pk_columns: [ps_partkey], order_descs: [value, ps_partkey] } - └─StreamProject { exprs: [partsupp.ps_partkey, sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } - └─StreamDynamicFilter { predicate: (sum((partsupp.ps_supplycost * partsupp.ps_availqty)) > (sum(sum((partsupp.ps_supplycost * partsupp.ps_availqty))) * 0.0001000000:Decimal)), output: [partsupp.ps_partkey, sum((partsupp.ps_supplycost * partsupp.ps_availqty)), sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } - ├─StreamProject { exprs: [partsupp.ps_partkey, sum((partsupp.ps_supplycost * partsupp.ps_availqty)), sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } - | └─StreamHashAgg { group_key: [partsupp.ps_partkey], aggs: [count, sum((partsupp.ps_supplycost * partsupp.ps_availqty)), sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } + └─StreamProject { exprs: [partsupp.ps_partkey, sum($expr190)] } + └─StreamDynamicFilter { predicate: (sum($expr190) > $expr193), output: [partsupp.ps_partkey, sum($expr190), sum($expr190)] } + ├─StreamProject { exprs: [partsupp.ps_partkey, sum($expr190), sum($expr190)] } + | └─StreamHashAgg { group_key: [partsupp.ps_partkey], aggs: [count, sum($expr190), sum($expr190)] } | └─StreamExchange { dist: HashShard(partsupp.ps_partkey) } - | └─StreamProject { exprs: [partsupp.ps_partkey, (partsupp.ps_supplycost * partsupp.ps_availqty), partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey] } + | └─StreamProject { exprs: [partsupp.ps_partkey, (partsupp.ps_supplycost * partsupp.ps_availqty) as $expr190, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey] } | └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, partsupp.ps_suppkey, supplier.s_suppkey, supplier.s_nationkey, nation.n_nationkey] } | ├─StreamExchange { dist: HashShard(supplier.s_nationkey) } | | └─StreamHashJoin { type: Inner, predicate: partsupp.ps_suppkey = supplier.s_suppkey, output: [partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey, partsupp.ps_suppkey, supplier.s_suppkey] } @@ -2055,11 +2055,11 @@ | └─StreamFilter { predicate: (nation.n_name = 'ARGENTINA':Varchar) } | └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_name], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [(sum(sum((partsupp.ps_supplycost * partsupp.ps_availqty))) * 0.0001000000:Decimal)] } - └─StreamGlobalSimpleAgg { aggs: [count, sum(sum((partsupp.ps_supplycost * partsupp.ps_availqty)))] } + └─StreamProject { exprs: [(sum(sum($expr191)) * 0.0001000000:Decimal) as $expr193] } + └─StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr191))] } └─StreamExchange { dist: Single } - └─StreamStatelessLocalSimpleAgg { aggs: [count, sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } - └─StreamProject { exprs: [(partsupp.ps_supplycost * partsupp.ps_availqty), partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey] } + └─StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr191)] } + └─StreamProject { exprs: [(partsupp.ps_supplycost * partsupp.ps_availqty) as $expr191, partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey] } └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [partsupp.ps_availqty, partsupp.ps_supplycost, partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey, supplier.s_nationkey, nation.n_nationkey] } ├─StreamExchange { dist: HashShard(supplier.s_nationkey) } | └─StreamHashJoin { type: Inner, predicate: partsupp.ps_suppkey = supplier.s_suppkey, output: [partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey, partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey] } @@ -2075,17 +2075,17 @@ Fragment 0 StreamMaterialize { columns: [ps_partkey, value], pk_columns: [ps_partkey], order_descs: [value, ps_partkey] } materialized table: 4294967294 - StreamProject { exprs: [partsupp.ps_partkey, sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } - StreamDynamicFilter { predicate: (sum((partsupp.ps_supplycost * partsupp.ps_availqty)) > (sum(sum((partsupp.ps_supplycost * partsupp.ps_availqty))) * 0.0001000000:Decimal)), output: [partsupp.ps_partkey, sum((partsupp.ps_supplycost * partsupp.ps_availqty)), sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } + StreamProject { exprs: [partsupp.ps_partkey, sum($expr190)] } + StreamDynamicFilter { predicate: (sum($expr190) > $expr193), output: [partsupp.ps_partkey, sum($expr190), sum($expr190)] } left table: 0, right table 1 - StreamProject { exprs: [partsupp.ps_partkey, sum((partsupp.ps_supplycost * partsupp.ps_availqty)), sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } - StreamHashAgg { group_key: [partsupp.ps_partkey], aggs: [count, sum((partsupp.ps_supplycost * partsupp.ps_availqty)), sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } + StreamProject { exprs: [partsupp.ps_partkey, sum($expr190), sum($expr190)] } + StreamHashAgg { group_key: [partsupp.ps_partkey], aggs: [count, sum($expr190), sum($expr190)] } result table: 2, state tables: [] StreamExchange Hash([0]) from 1 StreamExchange Broadcast from 6 Fragment 1 - StreamProject { exprs: [partsupp.ps_partkey, (partsupp.ps_supplycost * partsupp.ps_availqty), partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey] } + StreamProject { exprs: [partsupp.ps_partkey, (partsupp.ps_supplycost * partsupp.ps_availqty) as $expr190, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey] } StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, partsupp.ps_suppkey, supplier.s_suppkey, supplier.s_nationkey, nation.n_nationkey] } left table: 3, right table 5, left degree table: 4, right degree table: 6, StreamExchange Hash([3]) from 2 @@ -2115,14 +2115,14 @@ BatchPlanNode Fragment 6 - StreamProject { exprs: [(sum(sum((partsupp.ps_supplycost * partsupp.ps_availqty))) * 0.0001000000:Decimal)] } - StreamGlobalSimpleAgg { aggs: [count, sum(sum((partsupp.ps_supplycost * partsupp.ps_availqty)))] } + StreamProject { exprs: [(sum(sum($expr191)) * 0.0001000000:Decimal) as $expr193] } + StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr191))] } result table: 11, state tables: [] StreamExchange Single from 7 Fragment 7 - StreamStatelessLocalSimpleAgg { aggs: [count, sum((partsupp.ps_supplycost * partsupp.ps_availqty))] } - StreamProject { exprs: [(partsupp.ps_supplycost * partsupp.ps_availqty), partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey] } + StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr191)] } + StreamProject { exprs: [(partsupp.ps_supplycost * partsupp.ps_availqty) as $expr191, partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey] } StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [partsupp.ps_availqty, partsupp.ps_supplycost, partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey, supplier.s_nationkey, nation.n_nationkey] } left table: 12, right table 14, left degree table: 13, right degree table: 15, StreamExchange Hash([2]) from 8 @@ -2151,26 +2151,26 @@ Upstream BatchPlanNode - Table 0 { columns: [partsupp_ps_partkey, sum((partsupp_ps_supplycost * partsupp_ps_availqty)), sum((partsupp_ps_supplycost * partsupp_ps_availqty))_0], primary key: [$2 ASC, $0 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 1 { columns: [(sum(sum((partsupp_ps_supplycost * partsupp_ps_availqty))) * 0_0001000000:Decimal)], primary key: [], value indices: [0], distribution key: [] } - Table 2 { columns: [partsupp_ps_partkey, count, sum((partsupp_ps_supplycost * partsupp_ps_availqty)), sum((partsupp_ps_supplycost * partsupp_ps_availqty))_0], primary key: [$0 ASC], value indices: [1, 2, 3], distribution key: [0] } + Table 0 { columns: [partsupp_ps_partkey, sum($expr190), sum($expr190)_0], primary key: [$2 ASC, $0 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 1 { columns: [$expr193], primary key: [], value indices: [0], distribution key: [] } + Table 2 { columns: [partsupp_ps_partkey, count, sum($expr190), sum($expr190)_0], primary key: [$0 ASC], value indices: [1, 2, 3], distribution key: [0] } Table 3 { columns: [partsupp_ps_partkey, partsupp_ps_availqty, partsupp_ps_supplycost, supplier_s_nationkey, partsupp_ps_suppkey, supplier_s_suppkey], primary key: [$3 ASC, $0 ASC, $4 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [3] } Table 4 { columns: [supplier_s_nationkey, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } - Table 5 { columns: [nation_n_nationkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 6 { columns: [nation_n_nationkey, nation_n_nationkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 7 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, partsupp_ps_availqty, partsupp_ps_supplycost], primary key: [$1 ASC, $0 ASC, $1 ASC], value indices: [0, 1, 2, 3], distribution key: [1] } - Table 8 { columns: [partsupp_ps_suppkey, partsupp_ps_partkey, partsupp_ps_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } - Table 9 { columns: [supplier_s_suppkey, supplier_s_nationkey], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 10 { columns: [supplier_s_suppkey, supplier_s_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 11 { columns: [count, sum(sum((partsupp_ps_supplycost * partsupp_ps_availqty)))], primary key: [], value indices: [0, 1], distribution key: [] } + Table 5 { columns: [nation_n_nationkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 6 { columns: [nation_n_nationkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 7 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, partsupp_ps_availqty, partsupp_ps_supplycost], primary key: [$1 ASC, $0 ASC], value indices: [0, 1, 2, 3], distribution key: [1] } + Table 8 { columns: [partsupp_ps_suppkey, partsupp_ps_partkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 9 { columns: [supplier_s_suppkey, supplier_s_nationkey], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 10 { columns: [supplier_s_suppkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 11 { columns: [count, sum(sum($expr191))], primary key: [], value indices: [0, 1], distribution key: [] } Table 12 { columns: [partsupp_ps_availqty, partsupp_ps_supplycost, supplier_s_nationkey, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey], primary key: [$2 ASC, $3 ASC, $4 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [2] } Table 13 { columns: [supplier_s_nationkey, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } - Table 14 { columns: [nation_n_nationkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 15 { columns: [nation_n_nationkey, nation_n_nationkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 16 { columns: [partsupp_ps_suppkey, partsupp_ps_availqty, partsupp_ps_supplycost, partsupp_ps_partkey], primary key: [$0 ASC, $3 ASC, $0 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } - Table 17 { columns: [partsupp_ps_suppkey, partsupp_ps_partkey, partsupp_ps_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } - Table 18 { columns: [supplier_s_suppkey, supplier_s_nationkey], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 19 { columns: [supplier_s_suppkey, supplier_s_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 14 { columns: [nation_n_nationkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 15 { columns: [nation_n_nationkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 16 { columns: [partsupp_ps_suppkey, partsupp_ps_availqty, partsupp_ps_supplycost, partsupp_ps_partkey], primary key: [$0 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } + Table 17 { columns: [partsupp_ps_suppkey, partsupp_ps_partkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 18 { columns: [supplier_s_suppkey, supplier_s_nationkey], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 19 { columns: [supplier_s_suppkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 4294967294 { columns: [ps_partkey, value], primary key: [$1 DESC, $0 ASC], value indices: [0, 1], distribution key: [0] } - id: tpch_q12 before: @@ -2205,25 +2205,25 @@ order by l_shipmode; logical_plan: | - LogicalProject { exprs: [lineitem.l_shipmode, sum(Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32)), sum(Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32))] } - └─LogicalAgg { group_key: [lineitem.l_shipmode], aggs: [sum(Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32)), sum(Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32))] } - └─LogicalProject { exprs: [lineitem.l_shipmode, Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32), Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32)] } + LogicalProject { exprs: [lineitem.l_shipmode, sum($expr1), sum($expr2)] } + └─LogicalAgg { group_key: [lineitem.l_shipmode], aggs: [sum($expr1), sum($expr2)] } + └─LogicalProject { exprs: [lineitem.l_shipmode, Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32) as $expr1, Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32) as $expr2] } └─LogicalFilter { predicate: (orders.o_orderkey = lineitem.l_orderkey) AND In(lineitem.l_shipmode, 'FOB':Varchar, 'SHIP':Varchar) AND (lineitem.l_commitdate < lineitem.l_receiptdate) AND (lineitem.l_shipdate < lineitem.l_commitdate) AND (lineitem.l_receiptdate >= '1994-01-01':Date) AND (lineitem.l_receiptdate < ('1994-01-01':Date + '1 year':Interval)) } └─LogicalJoin { type: Inner, on: true, output: all } ├─LogicalScan { table: orders, columns: [orders.o_orderkey, orders.o_custkey, orders.o_orderstatus, orders.o_totalprice, orders.o_orderdate, orders.o_orderpriority, orders.o_clerk, orders.o_shippriority, orders.o_comment] } └─LogicalScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_linenumber, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_shipdate, lineitem.l_commitdate, lineitem.l_receiptdate, lineitem.l_shipinstruct, lineitem.l_shipmode, lineitem.l_comment] } optimized_logical_plan: | - LogicalAgg { group_key: [lineitem.l_shipmode], aggs: [sum(Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32)), sum(Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32))] } - └─LogicalProject { exprs: [lineitem.l_shipmode, Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32), Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32)] } + LogicalAgg { group_key: [lineitem.l_shipmode], aggs: [sum($expr37), sum($expr38)] } + └─LogicalProject { exprs: [lineitem.l_shipmode, Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32) as $expr37, Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32) as $expr38] } └─LogicalJoin { type: Inner, on: (orders.o_orderkey = lineitem.l_orderkey), output: [orders.o_orderpriority, lineitem.l_shipmode] } ├─LogicalScan { table: orders, columns: [orders.o_orderkey, orders.o_orderpriority] } └─LogicalScan { table: lineitem, output_columns: [lineitem.l_orderkey, lineitem.l_shipmode], required_columns: [lineitem.l_orderkey, lineitem.l_shipmode, lineitem.l_shipdate, lineitem.l_commitdate, lineitem.l_receiptdate], predicate: In(lineitem.l_shipmode, 'FOB':Varchar, 'SHIP':Varchar) AND (lineitem.l_commitdate < lineitem.l_receiptdate) AND (lineitem.l_shipdate < lineitem.l_commitdate) AND (lineitem.l_receiptdate >= '1994-01-01':Date) AND (lineitem.l_receiptdate < ('1994-01-01':Date + '1 year':Interval)) } batch_plan: | BatchExchange { order: [lineitem.l_shipmode ASC], dist: Single } └─BatchSort { order: [lineitem.l_shipmode ASC] } - └─BatchHashAgg { group_key: [lineitem.l_shipmode], aggs: [sum(Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32)), sum(Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32))] } + └─BatchHashAgg { group_key: [lineitem.l_shipmode], aggs: [sum($expr81), sum($expr82)] } └─BatchExchange { order: [], dist: HashShard(lineitem.l_shipmode) } - └─BatchProject { exprs: [lineitem.l_shipmode, Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32), Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32)] } + └─BatchProject { exprs: [lineitem.l_shipmode, Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32) as $expr81, Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32) as $expr82] } └─BatchHashJoin { type: Inner, predicate: orders.o_orderkey = lineitem.l_orderkey, output: [orders.o_orderpriority, lineitem.l_shipmode] } ├─BatchExchange { order: [], dist: HashShard(orders.o_orderkey) } | └─BatchScan { table: orders, columns: [orders.o_orderkey, orders.o_orderpriority], distribution: UpstreamHashShard(orders.o_orderkey) } @@ -2233,10 +2233,10 @@ └─BatchScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_shipmode, lineitem.l_shipdate, lineitem.l_commitdate, lineitem.l_receiptdate], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [l_shipmode, high_line_count, low_line_count], pk_columns: [l_shipmode] } - └─StreamProject { exprs: [lineitem.l_shipmode, sum(Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32)), sum(Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32))] } - └─StreamHashAgg { group_key: [lineitem.l_shipmode], aggs: [count, sum(Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32)), sum(Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32))] } + └─StreamProject { exprs: [lineitem.l_shipmode, sum($expr127), sum($expr128)] } + └─StreamHashAgg { group_key: [lineitem.l_shipmode], aggs: [count, sum($expr127), sum($expr128)] } └─StreamExchange { dist: HashShard(lineitem.l_shipmode) } - └─StreamProject { exprs: [lineitem.l_shipmode, Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32), Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32), orders.o_orderkey, lineitem.l_orderkey, lineitem.l_linenumber] } + └─StreamProject { exprs: [lineitem.l_shipmode, Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32) as $expr127, Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32) as $expr128, orders.o_orderkey, lineitem.l_orderkey, lineitem.l_linenumber] } └─StreamHashJoin { type: Inner, predicate: orders.o_orderkey = lineitem.l_orderkey, output: [orders.o_orderpriority, lineitem.l_shipmode, orders.o_orderkey, lineitem.l_orderkey, lineitem.l_linenumber] } ├─StreamExchange { dist: HashShard(orders.o_orderkey) } | └─StreamTableScan { table: orders, columns: [orders.o_orderkey, orders.o_orderpriority], pk: [orders.o_orderkey], dist: UpstreamHashShard(orders.o_orderkey) } @@ -2248,13 +2248,13 @@ Fragment 0 StreamMaterialize { columns: [l_shipmode, high_line_count, low_line_count], pk_columns: [l_shipmode] } materialized table: 4294967294 - StreamProject { exprs: [lineitem.l_shipmode, sum(Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32)), sum(Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32))] } - StreamHashAgg { group_key: [lineitem.l_shipmode], aggs: [count, sum(Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32)), sum(Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32))] } + StreamProject { exprs: [lineitem.l_shipmode, sum($expr127), sum($expr128)] } + StreamHashAgg { group_key: [lineitem.l_shipmode], aggs: [count, sum($expr127), sum($expr128)] } result table: 0, state tables: [] StreamExchange Hash([0]) from 1 Fragment 1 - StreamProject { exprs: [lineitem.l_shipmode, Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32), Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32), orders.o_orderkey, lineitem.l_orderkey, lineitem.l_linenumber] } + StreamProject { exprs: [lineitem.l_shipmode, Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32) as $expr127, Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32) as $expr128, orders.o_orderkey, lineitem.l_orderkey, lineitem.l_linenumber] } StreamHashJoin { type: Inner, predicate: orders.o_orderkey = lineitem.l_orderkey, output: [orders.o_orderpriority, lineitem.l_shipmode, orders.o_orderkey, lineitem.l_orderkey, lineitem.l_linenumber] } left table: 1, right table 3, left degree table: 2, right degree table: 4, StreamExchange Hash([0]) from 2 @@ -2272,11 +2272,11 @@ Upstream BatchPlanNode - Table 0 { columns: [lineitem_l_shipmode, count, sum(Case(((orders_o_orderpriority = '1-URGENT':Varchar) OR (orders_o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32)), sum(Case(((orders_o_orderpriority <> '1-URGENT':Varchar) AND (orders_o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32))], primary key: [$0 ASC], value indices: [1, 2, 3], distribution key: [0] } - Table 1 { columns: [orders_o_orderkey, orders_o_orderpriority], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 2 { columns: [orders_o_orderkey, orders_o_orderkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 3 { columns: [lineitem_l_orderkey, lineitem_l_shipmode, lineitem_l_linenumber], primary key: [$0 ASC, $0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 4 { columns: [lineitem_l_orderkey, lineitem_l_orderkey_0, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } + Table 0 { columns: [lineitem_l_shipmode, count, sum($expr127), sum($expr128)], primary key: [$0 ASC], value indices: [1, 2, 3], distribution key: [0] } + Table 1 { columns: [orders_o_orderkey, orders_o_orderpriority], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 2 { columns: [orders_o_orderkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 3 { columns: [lineitem_l_orderkey, lineitem_l_shipmode, lineitem_l_linenumber], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 4 { columns: [lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 4294967294 { columns: [l_shipmode, high_line_count, low_line_count], primary key: [$0 ASC], value indices: [0, 1, 2], distribution key: [0] } - id: tpch_q13 before: @@ -2377,8 +2377,8 @@ Table 0 { columns: [count(orders_o_orderkey), count, count_0], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } Table 1 { columns: [customer_c_custkey, count, count(orders_o_orderkey)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } - Table 2 { columns: [customer_c_custkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 3 { columns: [customer_c_custkey, customer_c_custkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 2 { columns: [customer_c_custkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 3 { columns: [customer_c_custkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 4 { columns: [orders_o_orderkey, orders_o_custkey], primary key: [$1 ASC, $0 ASC], value indices: [0, 1], distribution key: [1] } Table 5 { columns: [orders_o_custkey, orders_o_orderkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 4294967294 { columns: [c_count, custdist], primary key: [$1 DESC, $0 DESC], value indices: [0, 1], distribution key: [0] } @@ -2400,26 +2400,26 @@ and l_shipdate >= date '1995-09-01' and l_shipdate < date '1995-09-01' + interval '1' month; logical_plan: | - LogicalProject { exprs: [((100.00:Decimal * sum(Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal))) / sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))] } - └─LogicalAgg { aggs: [sum(Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalProject { exprs: [Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + LogicalProject { exprs: [((100.00:Decimal * sum($expr1)) / sum($expr2)) as $expr3] } + └─LogicalAgg { aggs: [sum($expr1), sum($expr2)] } + └─LogicalProject { exprs: [Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal) as $expr1, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr2] } └─LogicalFilter { predicate: (lineitem.l_partkey = part.p_partkey) AND (lineitem.l_shipdate >= '1995-09-01':Date) AND (lineitem.l_shipdate < ('1995-09-01':Date + '1 mon':Interval)) } └─LogicalJoin { type: Inner, on: true, output: all } ├─LogicalScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_linenumber, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_shipdate, lineitem.l_commitdate, lineitem.l_receiptdate, lineitem.l_shipinstruct, lineitem.l_shipmode, lineitem.l_comment] } └─LogicalScan { table: part, columns: [part.p_partkey, part.p_name, part.p_mfgr, part.p_brand, part.p_type, part.p_size, part.p_container, part.p_retailprice, part.p_comment] } optimized_logical_plan: | - LogicalProject { exprs: [((100.00:Decimal * sum(Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal))) / sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))] } - └─LogicalAgg { aggs: [sum(Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalProject { exprs: [Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + LogicalProject { exprs: [((100.00:Decimal * sum($expr55)) / sum($expr56)) as $expr57] } + └─LogicalAgg { aggs: [sum($expr55), sum($expr56)] } + └─LogicalProject { exprs: [Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal) as $expr55, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr56] } └─LogicalJoin { type: Inner, on: (lineitem.l_partkey = part.p_partkey), output: [lineitem.l_extendedprice, lineitem.l_discount, part.p_type] } ├─LogicalScan { table: lineitem, output_columns: [lineitem.l_partkey, lineitem.l_extendedprice, lineitem.l_discount], required_columns: [lineitem.l_partkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate], predicate: (lineitem.l_shipdate >= '1995-09-01':Date) AND (lineitem.l_shipdate < ('1995-09-01':Date + '1 mon':Interval)) } └─LogicalScan { table: part, columns: [part.p_partkey, part.p_type] } batch_plan: | - BatchProject { exprs: [((100.00:Decimal * sum(sum(Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)))) / sum(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))))] } - └─BatchSimpleAgg { aggs: [sum(sum(Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal))), sum(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))] } + BatchProject { exprs: [((100.00:Decimal * sum(sum($expr121))) / sum(sum($expr122))) as $expr123] } + └─BatchSimpleAgg { aggs: [sum(sum($expr121)), sum(sum($expr122))] } └─BatchExchange { order: [], dist: Single } - └─BatchSimpleAgg { aggs: [sum(Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─BatchProject { exprs: [Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + └─BatchSimpleAgg { aggs: [sum($expr121), sum($expr122)] } + └─BatchProject { exprs: [Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal) as $expr121, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr122] } └─BatchLookupJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey, output: [lineitem.l_extendedprice, lineitem.l_discount, part.p_type] } └─BatchExchange { order: [], dist: UpstreamHashShard(lineitem.l_partkey) } └─BatchProject { exprs: [lineitem.l_partkey, lineitem.l_extendedprice, lineitem.l_discount] } @@ -2427,11 +2427,11 @@ └─BatchScan { table: lineitem, columns: [lineitem.l_partkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [promo_revenue], pk_columns: [] } - └─StreamProject { exprs: [((100.00:Decimal * sum(sum(Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)))) / sum(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))))] } - └─StreamGlobalSimpleAgg { aggs: [count, sum(sum(Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal))), sum(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))] } + └─StreamProject { exprs: [((100.00:Decimal * sum(sum($expr190))) / sum(sum($expr191))) as $expr193] } + └─StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr190)), sum(sum($expr191))] } └─StreamExchange { dist: Single } - └─StreamStatelessLocalSimpleAgg { aggs: [count, sum(Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─StreamProject { exprs: [Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), lineitem.l_orderkey, lineitem.l_linenumber, part.p_partkey, lineitem.l_partkey] } + └─StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr190), sum($expr191)] } + └─StreamProject { exprs: [Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal) as $expr190, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr191, lineitem.l_orderkey, lineitem.l_linenumber, part.p_partkey, lineitem.l_partkey] } └─StreamHashJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey, output: [lineitem.l_extendedprice, lineitem.l_discount, part.p_type, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, part.p_partkey] } ├─StreamExchange { dist: HashShard(lineitem.l_partkey) } | └─StreamProject { exprs: [lineitem.l_partkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber] } @@ -2443,14 +2443,14 @@ Fragment 0 StreamMaterialize { columns: [promo_revenue], pk_columns: [] } materialized table: 4294967294 - StreamProject { exprs: [((100.00:Decimal * sum(sum(Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)))) / sum(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))))] } - StreamGlobalSimpleAgg { aggs: [count, sum(sum(Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal))), sum(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))] } + StreamProject { exprs: [((100.00:Decimal * sum(sum($expr190))) / sum(sum($expr191))) as $expr193] } + StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr190)), sum(sum($expr191))] } result table: 0, state tables: [] StreamExchange Single from 1 Fragment 1 - StreamStatelessLocalSimpleAgg { aggs: [count, sum(Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal)), sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - StreamProject { exprs: [Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), lineitem.l_orderkey, lineitem.l_linenumber, part.p_partkey, lineitem.l_partkey] } + StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr190), sum($expr191)] } + StreamProject { exprs: [Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Int32::Decimal) as $expr190, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr191, lineitem.l_orderkey, lineitem.l_linenumber, part.p_partkey, lineitem.l_partkey] } StreamHashJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey, output: [lineitem.l_extendedprice, lineitem.l_discount, part.p_type, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, part.p_partkey] } left table: 1, right table 3, left degree table: 2, right degree table: 4, StreamExchange Hash([0]) from 2 @@ -2468,11 +2468,11 @@ Upstream BatchPlanNode - Table 0 { columns: [count, sum(sum(Case(Like(part_p_type, 'PROMO%':Varchar), (lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount)), 0:Int32::Decimal))), sum(sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount))))], primary key: [], value indices: [0, 1, 2], distribution key: [] } + Table 0 { columns: [count, sum(sum($expr190)), sum(sum($expr191))], primary key: [], value indices: [0, 1, 2], distribution key: [] } Table 1 { columns: [lineitem_l_partkey, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_orderkey, lineitem_l_linenumber], primary key: [$0 ASC, $3 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0] } Table 2 { columns: [lineitem_l_partkey, lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } - Table 3 { columns: [part_p_partkey, part_p_type], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 4 { columns: [part_p_partkey, part_p_partkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 3 { columns: [part_p_partkey, part_p_type], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 4 { columns: [part_p_partkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 4294967294 { columns: [promo_revenue], primary key: [], value indices: [0], distribution key: [] } - id: tpch_q15 before: @@ -2510,104 +2510,104 @@ order by s_suppkey; logical_plan: | - LogicalProject { exprs: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalFilter { predicate: (supplier.s_suppkey = lineitem.l_suppkey) AND (sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))) = max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))) } + LogicalProject { exprs: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum($expr1)] } + └─LogicalFilter { predicate: (supplier.s_suppkey = lineitem.l_suppkey) AND (sum($expr1) = max(sum($expr1))) } └─LogicalApply { type: LeftOuter, on: true, correlated_id: 1, max_one_row: true } ├─LogicalJoin { type: Inner, on: true, output: all } | ├─LogicalScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_acctbal, supplier.s_comment] } | └─LogicalShare { id = 7 } - | └─LogicalProject { exprs: [lineitem.l_suppkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - | └─LogicalAgg { group_key: [lineitem.l_suppkey], aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - | └─LogicalProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + | └─LogicalProject { exprs: [lineitem.l_suppkey, sum($expr1)] } + | └─LogicalAgg { group_key: [lineitem.l_suppkey], aggs: [sum($expr1)] } + | └─LogicalProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr1] } | └─LogicalFilter { predicate: (lineitem.l_shipdate >= '1993-01-01':Date) AND (lineitem.l_shipdate < ('1993-01-01':Date + '3 mons':Interval)) } | └─LogicalScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_linenumber, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_shipdate, lineitem.l_commitdate, lineitem.l_receiptdate, lineitem.l_shipinstruct, lineitem.l_shipmode, lineitem.l_comment] } - └─LogicalProject { exprs: [max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))] } - └─LogicalAgg { aggs: [max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))] } - └─LogicalProject { exprs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + └─LogicalProject { exprs: [max(sum($expr1))] } + └─LogicalAgg { aggs: [max(sum($expr1))] } + └─LogicalProject { exprs: [sum($expr1)] } └─LogicalShare { id = 7 } - └─LogicalProject { exprs: [lineitem.l_suppkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalAgg { group_key: [lineitem.l_suppkey], aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + └─LogicalProject { exprs: [lineitem.l_suppkey, sum($expr1)] } + └─LogicalAgg { group_key: [lineitem.l_suppkey], aggs: [sum($expr1)] } + └─LogicalProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr1] } └─LogicalFilter { predicate: (lineitem.l_shipdate >= '1993-01-01':Date) AND (lineitem.l_shipdate < ('1993-01-01':Date + '3 mons':Interval)) } └─LogicalScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_linenumber, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_shipdate, lineitem.l_commitdate, lineitem.l_receiptdate, lineitem.l_shipinstruct, lineitem.l_shipmode, lineitem.l_comment] } optimized_logical_plan: | - LogicalJoin { type: Inner, on: (sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))) = max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - ├─LogicalJoin { type: Inner, on: (supplier.s_suppkey = lineitem.l_suppkey), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + LogicalJoin { type: Inner, on: (sum($expr20) = max(sum($expr20))), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum($expr20)] } + ├─LogicalJoin { type: Inner, on: (supplier.s_suppkey = lineitem.l_suppkey), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum($expr20)] } | ├─LogicalScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone] } | └─LogicalShare { id = 165 } - | └─LogicalAgg { group_key: [lineitem.l_suppkey], aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - | └─LogicalProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + | └─LogicalAgg { group_key: [lineitem.l_suppkey], aggs: [sum($expr32)] } + | └─LogicalProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr32] } | └─LogicalScan { table: lineitem, output_columns: [lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount], required_columns: [lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate], predicate: (lineitem.l_shipdate >= '1993-01-01':Date) AND (lineitem.l_shipdate < ('1993-01-01':Date + '3 mons':Interval)) } - └─LogicalAgg { aggs: [max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))] } + └─LogicalAgg { aggs: [max(sum($expr20))] } └─LogicalShare { id = 165 } - └─LogicalAgg { group_key: [lineitem.l_suppkey], aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + └─LogicalAgg { group_key: [lineitem.l_suppkey], aggs: [sum($expr32)] } + └─LogicalProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr32] } └─LogicalScan { table: lineitem, output_columns: [lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount], required_columns: [lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate], predicate: (lineitem.l_shipdate >= '1993-01-01':Date) AND (lineitem.l_shipdate < ('1993-01-01':Date + '3 mons':Interval)) } batch_plan: | BatchExchange { order: [supplier.s_suppkey ASC], dist: Single } └─BatchSort { order: [supplier.s_suppkey ASC] } - └─BatchHashJoin { type: Inner, predicate: sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))) = max(max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - ├─BatchExchange { order: [], dist: HashShard(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))) } - | └─BatchHashJoin { type: Inner, predicate: supplier.s_suppkey = lineitem.l_suppkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + └─BatchHashJoin { type: Inner, predicate: sum($expr73) = max(max(sum($expr72))), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum($expr73)] } + ├─BatchExchange { order: [], dist: HashShard(sum($expr73)) } + | └─BatchHashJoin { type: Inner, predicate: supplier.s_suppkey = lineitem.l_suppkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum($expr73)] } | ├─BatchExchange { order: [], dist: HashShard(supplier.s_suppkey) } | | └─BatchScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone], distribution: UpstreamHashShard(supplier.s_suppkey) } - | └─BatchHashAgg { group_key: [lineitem.l_suppkey], aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + | └─BatchHashAgg { group_key: [lineitem.l_suppkey], aggs: [sum($expr73)] } | └─BatchExchange { order: [], dist: HashShard(lineitem.l_suppkey) } - | └─BatchProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + | └─BatchProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr73] } | └─BatchFilter { predicate: (lineitem.l_shipdate >= '1993-01-01':Date) AND (lineitem.l_shipdate < ('1993-01-01':Date + '3 mons':Interval)) } | └─BatchScan { table: lineitem, columns: [lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate], distribution: SomeShard } - └─BatchExchange { order: [], dist: HashShard(max(max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))))) } - └─BatchSimpleAgg { aggs: [max(max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))))] } + └─BatchExchange { order: [], dist: HashShard(max(max(sum($expr72)))) } + └─BatchSimpleAgg { aggs: [max(max(sum($expr72)))] } └─BatchExchange { order: [], dist: Single } - └─BatchSimpleAgg { aggs: [max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))] } - └─BatchHashAgg { group_key: [lineitem.l_suppkey], aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + └─BatchSimpleAgg { aggs: [max(sum($expr72))] } + └─BatchHashAgg { group_key: [lineitem.l_suppkey], aggs: [sum($expr72)] } └─BatchExchange { order: [], dist: HashShard(lineitem.l_suppkey) } - └─BatchProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + └─BatchProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr72] } └─BatchFilter { predicate: (lineitem.l_shipdate >= '1993-01-01':Date) AND (lineitem.l_shipdate < ('1993-01-01':Date + '3 mons':Interval)) } └─BatchScan { table: lineitem, columns: [lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate], distribution: SomeShard } stream_plan: | - StreamMaterialize { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, lineitem.l_suppkey(hidden), max(max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))))(hidden)], pk_columns: [s_suppkey, lineitem.l_suppkey, total_revenue, max(max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))))] } - └─StreamHashJoin { type: Inner, predicate: sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))) = max(max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))), output: all } - ├─StreamExchange { dist: HashShard(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))) } - | └─StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = lineitem.l_suppkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), lineitem.l_suppkey] } + StreamMaterialize { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, lineitem.l_suppkey(hidden), max(max(sum($expr111)))(hidden)], pk_columns: [s_suppkey, lineitem.l_suppkey, total_revenue, max(max(sum($expr111)))] } + └─StreamHashJoin { type: Inner, predicate: sum($expr111) = max(max(sum($expr111))), output: all } + ├─StreamExchange { dist: HashShard(sum($expr111)) } + | └─StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = lineitem.l_suppkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum($expr111), lineitem.l_suppkey] } | ├─StreamExchange { dist: HashShard(supplier.s_suppkey) } | | └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } - | └─StreamProject { exprs: [lineitem.l_suppkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - | └─StreamShare { id = 888 } - | └─StreamProject { exprs: [lineitem.l_suppkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - | └─StreamHashAgg { group_key: [lineitem.l_suppkey], aggs: [count, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + | └─StreamProject { exprs: [lineitem.l_suppkey, sum($expr111)] } + | └─StreamShare { id = 907 } + | └─StreamProject { exprs: [lineitem.l_suppkey, sum($expr111)] } + | └─StreamHashAgg { group_key: [lineitem.l_suppkey], aggs: [count, sum($expr111)] } | └─StreamExchange { dist: HashShard(lineitem.l_suppkey) } - | └─StreamProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), lineitem.l_orderkey, lineitem.l_linenumber] } + | └─StreamProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr111, lineitem.l_orderkey, lineitem.l_linenumber] } | └─StreamFilter { predicate: (lineitem.l_shipdate >= '1993-01-01':Date) AND (lineitem.l_shipdate < ('1993-01-01':Date + '3 mons':Interval)) } | └─StreamTableScan { table: lineitem, columns: [lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_shipdate], pk: [lineitem.l_orderkey, lineitem.l_linenumber], dist: UpstreamHashShard(lineitem.l_orderkey, lineitem.l_linenumber) } - └─StreamExchange { dist: HashShard(max(max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))))) } - └─StreamProject { exprs: [max(max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))))] } - └─StreamGlobalSimpleAgg { aggs: [count, max(max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))))] } + └─StreamExchange { dist: HashShard(max(max(sum($expr111)))) } + └─StreamProject { exprs: [max(max(sum($expr111)))] } + └─StreamGlobalSimpleAgg { aggs: [count, max(max(sum($expr111)))] } └─StreamExchange { dist: Single } - └─StreamHashAgg { group_key: [Vnode(lineitem.l_suppkey)], aggs: [count, max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))] } - └─StreamProject { exprs: [lineitem.l_suppkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), Vnode(lineitem.l_suppkey)] } - └─StreamProject { exprs: [lineitem.l_suppkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─StreamShare { id = 888 } - └─StreamProject { exprs: [lineitem.l_suppkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─StreamHashAgg { group_key: [lineitem.l_suppkey], aggs: [count, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + └─StreamHashAgg { group_key: [$expr112], aggs: [count, max(sum($expr111))] } + └─StreamProject { exprs: [lineitem.l_suppkey, sum($expr111), Vnode(lineitem.l_suppkey) as $expr112] } + └─StreamProject { exprs: [lineitem.l_suppkey, sum($expr111)] } + └─StreamShare { id = 907 } + └─StreamProject { exprs: [lineitem.l_suppkey, sum($expr111)] } + └─StreamHashAgg { group_key: [lineitem.l_suppkey], aggs: [count, sum($expr111)] } └─StreamExchange { dist: HashShard(lineitem.l_suppkey) } - └─StreamProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), lineitem.l_orderkey, lineitem.l_linenumber] } + └─StreamProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr111, lineitem.l_orderkey, lineitem.l_linenumber] } └─StreamFilter { predicate: (lineitem.l_shipdate >= '1993-01-01':Date) AND (lineitem.l_shipdate < ('1993-01-01':Date + '3 mons':Interval)) } └─StreamTableScan { table: lineitem, columns: [lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_shipdate], pk: [lineitem.l_orderkey, lineitem.l_linenumber], dist: UpstreamHashShard(lineitem.l_orderkey, lineitem.l_linenumber) } stream_dist_plan: | Fragment 0 - StreamMaterialize { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, lineitem.l_suppkey(hidden), max(max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))))(hidden)], pk_columns: [s_suppkey, lineitem.l_suppkey, total_revenue, max(max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))))] } + StreamMaterialize { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, lineitem.l_suppkey(hidden), max(max(sum($expr111)))(hidden)], pk_columns: [s_suppkey, lineitem.l_suppkey, total_revenue, max(max(sum($expr111)))] } materialized table: 4294967294 - StreamHashJoin { type: Inner, predicate: sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))) = max(max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))), output: all } + StreamHashJoin { type: Inner, predicate: sum($expr111) = max(max(sum($expr111))), output: all } left table: 0, right table 2, left degree table: 1, right degree table: 3, StreamExchange Hash([4]) from 1 StreamExchange Hash([0]) from 5 Fragment 1 - StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = lineitem.l_suppkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), lineitem.l_suppkey] } + StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = lineitem.l_suppkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum($expr111), lineitem.l_suppkey] } left table: 4, right table 6, left degree table: 5, right degree table: 7, StreamExchange Hash([0]) from 2 - StreamProject { exprs: [lineitem.l_suppkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + StreamProject { exprs: [lineitem.l_suppkey, sum($expr111)] } StreamExchange Hash([0]) from 3 Fragment 2 @@ -2616,45 +2616,45 @@ BatchPlanNode Fragment 3 - StreamProject { exprs: [lineitem.l_suppkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - StreamHashAgg { group_key: [lineitem.l_suppkey], aggs: [count, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + StreamProject { exprs: [lineitem.l_suppkey, sum($expr111)] } + StreamHashAgg { group_key: [lineitem.l_suppkey], aggs: [count, sum($expr111)] } result table: 8, state tables: [] StreamExchange Hash([0]) from 4 Fragment 4 - StreamProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), lineitem.l_orderkey, lineitem.l_linenumber] } + StreamProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr111, lineitem.l_orderkey, lineitem.l_linenumber] } StreamFilter { predicate: (lineitem.l_shipdate >= '1993-01-01':Date) AND (lineitem.l_shipdate < ('1993-01-01':Date + '3 mons':Interval)) } Chain { table: lineitem, columns: [lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_shipdate], pk: [lineitem.l_orderkey, lineitem.l_linenumber], dist: UpstreamHashShard(lineitem.l_orderkey, lineitem.l_linenumber) } Upstream BatchPlanNode Fragment 5 - StreamProject { exprs: [max(max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))))] } - StreamGlobalSimpleAgg { aggs: [count, max(max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))))] } + StreamProject { exprs: [max(max(sum($expr111)))] } + StreamGlobalSimpleAgg { aggs: [count, max(max(sum($expr111)))] } result table: 10, state tables: [9] StreamExchange Single from 6 Fragment 6 - StreamHashAgg { group_key: [Vnode(lineitem.l_suppkey)], aggs: [count, max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))] } + StreamHashAgg { group_key: [$expr112], aggs: [count, max(sum($expr111))] } result table: 12, state tables: [11] - StreamProject { exprs: [lineitem.l_suppkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))), Vnode(lineitem.l_suppkey)] } - StreamProject { exprs: [lineitem.l_suppkey, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } + StreamProject { exprs: [lineitem.l_suppkey, sum($expr111), Vnode(lineitem.l_suppkey) as $expr112] } + StreamProject { exprs: [lineitem.l_suppkey, sum($expr111)] } StreamExchange Hash([0]) from 3 - Table 0 { columns: [supplier_s_suppkey, supplier_s_name, supplier_s_address, supplier_s_phone, sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount))), lineitem_l_suppkey], primary key: [$4 ASC, $0 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [4] } - Table 1 { columns: [sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount))), supplier_s_suppkey, lineitem_l_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } - Table 2 { columns: [max(max(sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount)))))], primary key: [$0 ASC], value indices: [0], distribution key: [0] } - Table 3 { columns: [max(max(sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount))))), _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } - Table 4 { columns: [supplier_s_suppkey, supplier_s_name, supplier_s_address, supplier_s_phone], primary key: [$0 ASC, $0 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } - Table 5 { columns: [supplier_s_suppkey, supplier_s_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 6 { columns: [lineitem_l_suppkey, sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount)))], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 7 { columns: [lineitem_l_suppkey, lineitem_l_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 8 { columns: [lineitem_l_suppkey, count, sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount)))], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } - Table 9 { columns: [max(sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount)))), Vnode(lineitem_l_suppkey)], primary key: [$0 DESC, $1 ASC], value indices: [0, 1], distribution key: [] } - Table 10 { columns: [count, max(max(sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount)))))], primary key: [], value indices: [0, 1], distribution key: [] } - Table 11 { columns: [Vnode(lineitem_l_suppkey), sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount))), lineitem_l_suppkey], primary key: [$0 ASC, $1 DESC, $2 ASC], value indices: [1, 2], distribution key: [2], vnode column idx: 0 } - Table 12 { columns: [Vnode(lineitem_l_suppkey), count, max(sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount))))], primary key: [$0 ASC], value indices: [1, 2], distribution key: [], vnode column idx: 0 } - Table 4294967294 { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, lineitem.l_suppkey, max(max(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))))], primary key: [$0 ASC, $5 ASC, $4 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [4] } + Table 0 { columns: [supplier_s_suppkey, supplier_s_name, supplier_s_address, supplier_s_phone, sum($expr111), lineitem_l_suppkey], primary key: [$4 ASC, $0 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [4] } + Table 1 { columns: [sum($expr111), supplier_s_suppkey, lineitem_l_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } + Table 2 { columns: [max(max(sum($expr111)))], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 3 { columns: [max(max(sum($expr111))), _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 4 { columns: [supplier_s_suppkey, supplier_s_name, supplier_s_address, supplier_s_phone], primary key: [$0 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } + Table 5 { columns: [supplier_s_suppkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 6 { columns: [lineitem_l_suppkey, sum($expr111)], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 7 { columns: [lineitem_l_suppkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 8 { columns: [lineitem_l_suppkey, count, sum($expr111)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } + Table 9 { columns: [max(sum($expr111)), $expr112], primary key: [$0 DESC, $1 ASC], value indices: [0, 1], distribution key: [] } + Table 10 { columns: [count, max(max(sum($expr111)))], primary key: [], value indices: [0, 1], distribution key: [] } + Table 11 { columns: [$expr112, sum($expr111), lineitem_l_suppkey], primary key: [$0 ASC, $1 DESC, $2 ASC], value indices: [1, 2], distribution key: [2], vnode column idx: 0 } + Table 12 { columns: [$expr112, count, max(sum($expr111))], primary key: [$0 ASC], value indices: [1, 2], distribution key: [], vnode column idx: 0 } + Table 4294967294 { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, lineitem.l_suppkey, max(max(sum($expr111)))], primary key: [$0 ASC, $5 ASC, $4 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [4] } - id: tpch_q16 before: - create_tables @@ -2783,14 +2783,14 @@ Table 0 { columns: [part_p_brand, part_p_type, part_p_size, count, count(partsupp_ps_suppkey)], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3, 4], distribution key: [0, 1, 2] } Table 1 { columns: [part_p_brand, part_p_type, part_p_size, partsupp_ps_suppkey, count], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [3] } - Table 2 { columns: [partsupp_ps_suppkey, part_p_brand, part_p_type, part_p_size, partsupp_ps_partkey, part_p_partkey], primary key: [$0 ASC, $4 ASC, $0 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [0] } - Table 3 { columns: [partsupp_ps_suppkey, partsupp_ps_partkey, partsupp_ps_suppkey_0, part_p_partkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } - Table 4 { columns: [supplier_s_suppkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 5 { columns: [supplier_s_suppkey, supplier_s_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 6 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey], primary key: [$0 ASC, $0 ASC, $1 ASC], value indices: [0, 1], distribution key: [0] } - Table 7 { columns: [partsupp_ps_partkey, partsupp_ps_partkey_0, partsupp_ps_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } - Table 8 { columns: [part_p_partkey, part_p_brand, part_p_type, part_p_size], primary key: [$0 ASC, $0 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } - Table 9 { columns: [part_p_partkey, part_p_partkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 2 { columns: [partsupp_ps_suppkey, part_p_brand, part_p_type, part_p_size, partsupp_ps_partkey, part_p_partkey], primary key: [$0 ASC, $4 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [0] } + Table 3 { columns: [partsupp_ps_suppkey, partsupp_ps_partkey, part_p_partkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } + Table 4 { columns: [supplier_s_suppkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 5 { columns: [supplier_s_suppkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 6 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey], primary key: [$0 ASC, $1 ASC], value indices: [0, 1], distribution key: [0] } + Table 7 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 8 { columns: [part_p_partkey, part_p_brand, part_p_type, part_p_size], primary key: [$0 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } + Table 9 { columns: [part_p_partkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 4294967294 { columns: [p_brand, p_type, p_size, supplier_cnt], primary key: [$3 DESC, $0 ASC, $1 ASC, $2 ASC], value indices: [0, 1, 2, 3], distribution key: [0, 1, 2] } - id: tpch_q17 before: @@ -2814,45 +2814,45 @@ l_partkey = p_partkey ); logical_plan: | - LogicalProject { exprs: [RoundDigit((sum(lineitem.l_extendedprice) / 7.0:Decimal), 16:Int32)] } + LogicalProject { exprs: [RoundDigit((sum(lineitem.l_extendedprice) / 7.0:Decimal), 16:Int32) as $expr2] } └─LogicalAgg { aggs: [sum(lineitem.l_extendedprice)] } └─LogicalProject { exprs: [lineitem.l_extendedprice] } - └─LogicalFilter { predicate: (part.p_partkey = lineitem.l_partkey) AND (part.p_brand = 'Brand#13':Varchar) AND (part.p_container = 'JUMBO PKG':Varchar) AND (lineitem.l_quantity < (0.2:Decimal * (sum(lineitem.l_quantity) / count(lineitem.l_quantity)))) } + └─LogicalFilter { predicate: (part.p_partkey = lineitem.l_partkey) AND (part.p_brand = 'Brand#13':Varchar) AND (part.p_container = 'JUMBO PKG':Varchar) AND (lineitem.l_quantity < $expr1) } └─LogicalApply { type: LeftOuter, on: true, correlated_id: 1, max_one_row: true } ├─LogicalJoin { type: Inner, on: true, output: all } | ├─LogicalScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_linenumber, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_shipdate, lineitem.l_commitdate, lineitem.l_receiptdate, lineitem.l_shipinstruct, lineitem.l_shipmode, lineitem.l_comment] } | └─LogicalScan { table: part, columns: [part.p_partkey, part.p_name, part.p_mfgr, part.p_brand, part.p_type, part.p_size, part.p_container, part.p_retailprice, part.p_comment] } - └─LogicalProject { exprs: [(0.2:Decimal * (sum(lineitem.l_quantity) / count(lineitem.l_quantity)))] } + └─LogicalProject { exprs: [(0.2:Decimal * (sum(lineitem.l_quantity) / count(lineitem.l_quantity))) as $expr1] } └─LogicalAgg { aggs: [sum(lineitem.l_quantity), count(lineitem.l_quantity)] } └─LogicalProject { exprs: [lineitem.l_quantity] } └─LogicalFilter { predicate: (lineitem.l_partkey = CorrelatedInputRef { index: 16, correlated_id: 1 }) } └─LogicalScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_linenumber, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_shipdate, lineitem.l_commitdate, lineitem.l_receiptdate, lineitem.l_shipinstruct, lineitem.l_shipmode, lineitem.l_comment] } optimized_logical_plan: | - LogicalProject { exprs: [RoundDigit((sum(lineitem.l_extendedprice) / 7.0:Decimal), 16:Int32)] } + LogicalProject { exprs: [RoundDigit((sum(lineitem.l_extendedprice) / 7.0:Decimal), 16:Int32) as $expr41] } └─LogicalAgg { aggs: [sum(lineitem.l_extendedprice)] } - └─LogicalJoin { type: Inner, on: IsNotDistinctFrom(part.p_partkey, part.p_partkey) AND (lineitem.l_quantity < (0.2:Decimal * (sum(lineitem.l_quantity) / count(lineitem.l_quantity)))), output: [lineitem.l_extendedprice] } + └─LogicalJoin { type: Inner, on: IsNotDistinctFrom(part.p_partkey, part.p_partkey) AND (lineitem.l_quantity < $expr40), output: [lineitem.l_extendedprice] } ├─LogicalJoin { type: Inner, on: (part.p_partkey = lineitem.l_partkey), output: [lineitem.l_quantity, lineitem.l_extendedprice, part.p_partkey] } | ├─LogicalScan { table: lineitem, columns: [lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice] } | └─LogicalScan { table: part, output_columns: [part.p_partkey], required_columns: [part.p_partkey, part.p_brand, part.p_container], predicate: (part.p_brand = 'Brand#13':Varchar) AND (part.p_container = 'JUMBO PKG':Varchar) } - └─LogicalProject { exprs: [part.p_partkey, (0.2:Decimal * (sum(lineitem.l_quantity) / count(lineitem.l_quantity)))] } + └─LogicalProject { exprs: [part.p_partkey, (0.2:Decimal * (sum(lineitem.l_quantity) / count(lineitem.l_quantity))) as $expr40] } └─LogicalAgg { group_key: [part.p_partkey], aggs: [sum(lineitem.l_quantity), count(lineitem.l_quantity)] } └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(part.p_partkey, lineitem.l_partkey), output: [part.p_partkey, lineitem.l_quantity] } ├─LogicalAgg { group_key: [part.p_partkey], aggs: [] } | └─LogicalScan { table: part, output_columns: [part.p_partkey], required_columns: [part.p_partkey, part.p_brand, part.p_container], predicate: (part.p_brand = 'Brand#13':Varchar) AND (part.p_container = 'JUMBO PKG':Varchar) } └─LogicalScan { table: lineitem, columns: [lineitem.l_partkey, lineitem.l_quantity], predicate: IsNotNull(lineitem.l_partkey) } batch_plan: | - BatchProject { exprs: [RoundDigit((sum(sum(lineitem.l_extendedprice)) / 7.0:Decimal), 16:Int32)] } + BatchProject { exprs: [RoundDigit((sum(sum(lineitem.l_extendedprice)) / 7.0:Decimal), 16:Int32) as $expr88] } └─BatchSimpleAgg { aggs: [sum(sum(lineitem.l_extendedprice))] } └─BatchExchange { order: [], dist: Single } └─BatchSimpleAgg { aggs: [sum(lineitem.l_extendedprice)] } └─BatchProject { exprs: [lineitem.l_extendedprice] } - └─BatchFilter { predicate: (lineitem.l_quantity < (0.2:Decimal * (sum(lineitem.l_quantity) / count(lineitem.l_quantity)))) } + └─BatchFilter { predicate: (lineitem.l_quantity < $expr87) } └─BatchHashJoin { type: Inner, predicate: part.p_partkey IS NOT DISTINCT FROM part.p_partkey, output: all } ├─BatchExchange { order: [], dist: HashShard(part.p_partkey) } | └─BatchLookupJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey AND (part.p_brand = 'Brand#13':Varchar) AND (part.p_container = 'JUMBO PKG':Varchar), output: [lineitem.l_quantity, lineitem.l_extendedprice, part.p_partkey] } | └─BatchExchange { order: [], dist: UpstreamHashShard(lineitem.l_partkey) } | └─BatchScan { table: lineitem, columns: [lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice], distribution: SomeShard } - └─BatchProject { exprs: [part.p_partkey, (0.2:Decimal * (sum(lineitem.l_quantity) / count(lineitem.l_quantity)))] } + └─BatchProject { exprs: [part.p_partkey, (0.2:Decimal * (sum(lineitem.l_quantity) / count(lineitem.l_quantity))) as $expr87] } └─BatchHashAgg { group_key: [part.p_partkey], aggs: [sum(lineitem.l_quantity), count(lineitem.l_quantity)] } └─BatchHashJoin { type: LeftOuter, predicate: part.p_partkey IS NOT DISTINCT FROM lineitem.l_partkey, output: [part.p_partkey, lineitem.l_quantity] } ├─BatchExchange { order: [], dist: HashShard(part.p_partkey) } @@ -2865,12 +2865,12 @@ └─BatchScan { table: lineitem, columns: [lineitem.l_partkey, lineitem.l_quantity], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [avg_yearly], pk_columns: [] } - └─StreamProject { exprs: [RoundDigit((sum(sum(lineitem.l_extendedprice)) / 7.0:Decimal), 16:Int32)] } + └─StreamProject { exprs: [RoundDigit((sum(sum(lineitem.l_extendedprice)) / 7.0:Decimal), 16:Int32) as $expr139] } └─StreamGlobalSimpleAgg { aggs: [count, sum(sum(lineitem.l_extendedprice))] } └─StreamExchange { dist: Single } └─StreamStatelessLocalSimpleAgg { aggs: [count, sum(lineitem.l_extendedprice)] } └─StreamProject { exprs: [lineitem.l_extendedprice, lineitem.l_orderkey, lineitem.l_linenumber, part.p_partkey, lineitem.l_partkey, part.p_partkey] } - └─StreamFilter { predicate: (lineitem.l_quantity < (0.2:Decimal * (sum(lineitem.l_quantity) / count(lineitem.l_quantity)))) } + └─StreamFilter { predicate: (lineitem.l_quantity < $expr137) } └─StreamHashJoin { type: Inner, predicate: part.p_partkey IS NOT DISTINCT FROM part.p_partkey, output: all } ├─StreamExchange { dist: HashShard(part.p_partkey) } | └─StreamHashJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey, output: [lineitem.l_quantity, lineitem.l_extendedprice, part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey] } @@ -2880,7 +2880,7 @@ | └─StreamProject { exprs: [part.p_partkey] } | └─StreamFilter { predicate: (part.p_brand = 'Brand#13':Varchar) AND (part.p_container = 'JUMBO PKG':Varchar) } | └─StreamTableScan { table: part, columns: [part.p_partkey, part.p_brand, part.p_container], pk: [part.p_partkey], dist: UpstreamHashShard(part.p_partkey) } - └─StreamProject { exprs: [part.p_partkey, (0.2:Decimal * (sum(lineitem.l_quantity) / count(lineitem.l_quantity)))] } + └─StreamProject { exprs: [part.p_partkey, (0.2:Decimal * (sum(lineitem.l_quantity) / count(lineitem.l_quantity))) as $expr137] } └─StreamHashAgg { group_key: [part.p_partkey], aggs: [count, sum(lineitem.l_quantity), count(lineitem.l_quantity)] } └─StreamHashJoin { type: LeftOuter, predicate: part.p_partkey IS NOT DISTINCT FROM lineitem.l_partkey, output: [part.p_partkey, lineitem.l_quantity, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey] } ├─StreamExchange { dist: HashShard(part.p_partkey) } @@ -2896,7 +2896,7 @@ Fragment 0 StreamMaterialize { columns: [avg_yearly], pk_columns: [] } materialized table: 4294967294 - StreamProject { exprs: [RoundDigit((sum(sum(lineitem.l_extendedprice)) / 7.0:Decimal), 16:Int32)] } + StreamProject { exprs: [RoundDigit((sum(sum(lineitem.l_extendedprice)) / 7.0:Decimal), 16:Int32) as $expr139] } StreamGlobalSimpleAgg { aggs: [count, sum(sum(lineitem.l_extendedprice))] } result table: 0, state tables: [] StreamExchange Single from 1 @@ -2904,11 +2904,11 @@ Fragment 1 StreamStatelessLocalSimpleAgg { aggs: [count, sum(lineitem.l_extendedprice)] } StreamProject { exprs: [lineitem.l_extendedprice, lineitem.l_orderkey, lineitem.l_linenumber, part.p_partkey, lineitem.l_partkey, part.p_partkey] } - StreamFilter { predicate: (lineitem.l_quantity < (0.2:Decimal * (sum(lineitem.l_quantity) / count(lineitem.l_quantity)))) } + StreamFilter { predicate: (lineitem.l_quantity < $expr137) } StreamHashJoin { type: Inner, predicate: part.p_partkey IS NOT DISTINCT FROM part.p_partkey, output: all } left table: 1, right table 3, left degree table: 2, right degree table: 4, StreamExchange Hash([2]) from 2 - StreamProject { exprs: [part.p_partkey, (0.2:Decimal * (sum(lineitem.l_quantity) / count(lineitem.l_quantity)))] } + StreamProject { exprs: [part.p_partkey, (0.2:Decimal * (sum(lineitem.l_quantity) / count(lineitem.l_quantity))) as $expr137] } StreamHashAgg { group_key: [part.p_partkey], aggs: [count, sum(lineitem.l_quantity), count(lineitem.l_quantity)] } result table: 9, state tables: [] StreamHashJoin { type: LeftOuter, predicate: part.p_partkey IS NOT DISTINCT FROM lineitem.l_partkey, output: [part.p_partkey, lineitem.l_quantity, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey] } @@ -2951,17 +2951,17 @@ BatchPlanNode Table 0 { columns: [count, sum(sum(lineitem_l_extendedprice))], primary key: [], value indices: [0, 1], distribution key: [] } - Table 1 { columns: [lineitem_l_quantity, lineitem_l_extendedprice, part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey], primary key: [$2 ASC, $3 ASC, $4 ASC, $2 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [2] } - Table 2 { columns: [part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, part_p_partkey_0, lineitem_l_partkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC], value indices: [5], distribution key: [0] } - Table 3 { columns: [part_p_partkey, (0_2:Decimal * (sum(lineitem_l_quantity) / count(lineitem_l_quantity)))], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 4 { columns: [part_p_partkey, part_p_partkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 1 { columns: [lineitem_l_quantity, lineitem_l_extendedprice, part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey], primary key: [$2 ASC, $3 ASC, $4 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [2] } + Table 2 { columns: [part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } + Table 3 { columns: [part_p_partkey, $expr137], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 4 { columns: [part_p_partkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 5 { columns: [lineitem_l_partkey, lineitem_l_quantity, lineitem_l_extendedprice, lineitem_l_orderkey, lineitem_l_linenumber], primary key: [$0 ASC, $3 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0] } Table 6 { columns: [lineitem_l_partkey, lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } - Table 7 { columns: [part_p_partkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 8 { columns: [part_p_partkey, part_p_partkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 7 { columns: [part_p_partkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 8 { columns: [part_p_partkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 9 { columns: [part_p_partkey, count, sum(lineitem_l_quantity), count(lineitem_l_quantity)], primary key: [$0 ASC], value indices: [1, 2, 3], distribution key: [0] } - Table 10 { columns: [part_p_partkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 11 { columns: [part_p_partkey, part_p_partkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 10 { columns: [part_p_partkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 11 { columns: [part_p_partkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 12 { columns: [lineitem_l_partkey, lineitem_l_quantity, lineitem_l_orderkey, lineitem_l_linenumber], primary key: [$0 ASC, $2 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } Table 13 { columns: [lineitem_l_partkey, lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } Table 14 { columns: [part_p_partkey, count], primary key: [$0 ASC], value indices: [1], distribution key: [0] } @@ -3060,7 +3060,7 @@ └─StreamTopN { order: "[orders.o_totalprice DESC, orders.o_orderdate ASC]", limit: 100, offset: 0 } └─StreamExchange { dist: Single } └─StreamGroupTopN { order: "[orders.o_totalprice DESC, orders.o_orderdate ASC]", limit: 100, offset: 0, group_key: [6] } - └─StreamProject { exprs: [customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice, sum(lineitem.l_quantity), Vnode(orders.o_orderkey)] } + └─StreamProject { exprs: [customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice, sum(lineitem.l_quantity), Vnode(orders.o_orderkey) as $expr1] } └─StreamProject { exprs: [customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice, sum(lineitem.l_quantity)] } └─StreamHashAgg { group_key: [customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice], aggs: [count, sum(lineitem.l_quantity)] } └─StreamHashJoin { type: LeftSemi, predicate: orders.o_orderkey = lineitem.l_orderkey, output: [customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice, lineitem.l_quantity, orders.o_custkey, lineitem.l_orderkey, lineitem.l_linenumber] } @@ -3091,7 +3091,7 @@ Fragment 1 StreamGroupTopN { order: "[orders.o_totalprice DESC, orders.o_orderdate ASC]", limit: 100, offset: 0, group_key: [6] } state table: 1 - StreamProject { exprs: [customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice, sum(lineitem.l_quantity), Vnode(orders.o_orderkey)] } + StreamProject { exprs: [customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice, sum(lineitem.l_quantity), Vnode(orders.o_orderkey) as $expr1] } StreamProject { exprs: [customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice, sum(lineitem.l_quantity)] } StreamHashAgg { group_key: [customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice], aggs: [count, sum(lineitem.l_quantity)] } result table: 2, state tables: [] @@ -3134,19 +3134,19 @@ Upstream BatchPlanNode - Table 0 { columns: [customer_c_name, customer_c_custkey, orders_o_orderkey, orders_o_orderdate, orders_o_totalprice, sum(lineitem_l_quantity), Vnode(orders_o_orderkey)], primary key: [$4 DESC, $3 ASC, $0 ASC, $1 ASC, $2 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [] } - Table 1 { columns: [customer_c_name, customer_c_custkey, orders_o_orderkey, orders_o_orderdate, orders_o_totalprice, sum(lineitem_l_quantity), Vnode(orders_o_orderkey)], primary key: [$6 ASC, $4 DESC, $3 ASC, $0 ASC, $1 ASC, $2 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [2], vnode column idx: 6 } + Table 0 { columns: [customer_c_name, customer_c_custkey, orders_o_orderkey, orders_o_orderdate, orders_o_totalprice, sum(lineitem_l_quantity), $expr1], primary key: [$4 DESC, $3 ASC, $0 ASC, $1 ASC, $2 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [] } + Table 1 { columns: [customer_c_name, customer_c_custkey, orders_o_orderkey, orders_o_orderdate, orders_o_totalprice, sum(lineitem_l_quantity), $expr1], primary key: [$6 ASC, $4 DESC, $3 ASC, $0 ASC, $1 ASC, $2 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [2], vnode column idx: 6 } Table 2 { columns: [customer_c_name, customer_c_custkey, orders_o_orderkey, orders_o_orderdate, orders_o_totalprice, count, sum(lineitem_l_quantity)], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC], value indices: [5, 6], distribution key: [2] } - Table 3 { columns: [customer_c_custkey, customer_c_name, orders_o_orderkey, orders_o_totalprice, orders_o_orderdate, lineitem_l_quantity, orders_o_custkey, lineitem_l_orderkey, lineitem_l_linenumber], primary key: [$2 ASC, $0 ASC, $2 ASC, $6 ASC, $7 ASC, $8 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8], distribution key: [2] } - Table 4 { columns: [orders_o_orderkey, customer_c_custkey, orders_o_orderkey_0, orders_o_custkey, lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC], value indices: [6], distribution key: [0] } - Table 5 { columns: [lineitem_l_orderkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 6 { columns: [lineitem_l_orderkey, lineitem_l_orderkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 7 { columns: [customer_c_custkey, customer_c_name, orders_o_orderkey, orders_o_totalprice, orders_o_orderdate, orders_o_custkey], primary key: [$2 ASC, $0 ASC, $2 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [2] } - Table 8 { columns: [orders_o_orderkey, customer_c_custkey, orders_o_orderkey_0, orders_o_custkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } - Table 9 { columns: [lineitem_l_orderkey, lineitem_l_quantity, lineitem_l_linenumber], primary key: [$0 ASC, $0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 10 { columns: [lineitem_l_orderkey, lineitem_l_orderkey_0, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } - Table 11 { columns: [customer_c_custkey, customer_c_name], primary key: [$0 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 12 { columns: [customer_c_custkey, customer_c_custkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 3 { columns: [customer_c_custkey, customer_c_name, orders_o_orderkey, orders_o_totalprice, orders_o_orderdate, lineitem_l_quantity, orders_o_custkey, lineitem_l_orderkey, lineitem_l_linenumber], primary key: [$2 ASC, $0 ASC, $6 ASC, $7 ASC, $8 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8], distribution key: [2] } + Table 4 { columns: [orders_o_orderkey, customer_c_custkey, orders_o_custkey, lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC], value indices: [5], distribution key: [0] } + Table 5 { columns: [lineitem_l_orderkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 6 { columns: [lineitem_l_orderkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 7 { columns: [customer_c_custkey, customer_c_name, orders_o_orderkey, orders_o_totalprice, orders_o_orderdate, orders_o_custkey], primary key: [$2 ASC, $0 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [2] } + Table 8 { columns: [orders_o_orderkey, customer_c_custkey, orders_o_custkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } + Table 9 { columns: [lineitem_l_orderkey, lineitem_l_quantity, lineitem_l_linenumber], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 10 { columns: [lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 11 { columns: [customer_c_custkey, customer_c_name], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 12 { columns: [customer_c_custkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 13 { columns: [orders_o_orderkey, orders_o_custkey, orders_o_totalprice, orders_o_orderdate], primary key: [$1 ASC, $0 ASC], value indices: [0, 1, 2, 3], distribution key: [1] } Table 14 { columns: [orders_o_custkey, orders_o_orderkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 15 { columns: [lineitem_l_orderkey, count, sum(lineitem_l_quantity)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } @@ -3191,24 +3191,24 @@ and l_shipinstruct = 'DELIVER IN PERSON' ); logical_plan: | - LogicalProject { exprs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalAgg { aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalProject { exprs: [(lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + LogicalProject { exprs: [sum($expr1)] } + └─LogicalAgg { aggs: [sum($expr1)] } + └─LogicalProject { exprs: [(lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr1] } └─LogicalFilter { predicate: (part.p_partkey = lineitem.l_partkey) AND (part.p_size >= 1:Int32) AND In(lineitem.l_shipmode, 'AIR':Varchar, 'AIR REG':Varchar) AND (lineitem.l_shipinstruct = 'DELIVER IN PERSON':Varchar) AND (((((((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)) AND (lineitem.l_quantity >= 1:Int32)) AND (lineitem.l_quantity <= 11:Int32)) AND (part.p_size <= 5:Int32)) OR (((((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)) AND (lineitem.l_quantity >= 30:Int32)) AND (lineitem.l_quantity <= 40:Int32)) AND (part.p_size <= 10:Int32))) OR (((((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)) AND (lineitem.l_quantity >= 10:Int32)) AND (lineitem.l_quantity <= 20:Int32)) AND (part.p_size <= 15:Int32))) } └─LogicalJoin { type: Inner, on: true, output: all } ├─LogicalScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_linenumber, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_shipdate, lineitem.l_commitdate, lineitem.l_receiptdate, lineitem.l_shipinstruct, lineitem.l_shipmode, lineitem.l_comment] } └─LogicalScan { table: part, columns: [part.p_partkey, part.p_name, part.p_mfgr, part.p_brand, part.p_type, part.p_size, part.p_container, part.p_retailprice, part.p_comment] } optimized_logical_plan: | - LogicalAgg { aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─LogicalProject { exprs: [(lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + LogicalAgg { aggs: [sum($expr19)] } + └─LogicalProject { exprs: [(lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr19] } └─LogicalJoin { type: Inner, on: (part.p_partkey = lineitem.l_partkey) AND (((((((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)) AND (lineitem.l_quantity >= 1:Int32)) AND (lineitem.l_quantity <= 11:Int32)) AND (part.p_size <= 5:Int32)) OR (((((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)) AND (lineitem.l_quantity >= 30:Int32)) AND (lineitem.l_quantity <= 40:Int32)) AND (part.p_size <= 10:Int32))) OR (((((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)) AND (lineitem.l_quantity >= 10:Int32)) AND (lineitem.l_quantity <= 20:Int32)) AND (part.p_size <= 15:Int32))), output: [lineitem.l_extendedprice, lineitem.l_discount] } ├─LogicalScan { table: lineitem, output_columns: [lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount], required_columns: [lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipinstruct, lineitem.l_shipmode], predicate: In(lineitem.l_shipmode, 'AIR':Varchar, 'AIR REG':Varchar) AND (lineitem.l_shipinstruct = 'DELIVER IN PERSON':Varchar) } └─LogicalScan { table: part, columns: [part.p_partkey, part.p_brand, part.p_size, part.p_container], predicate: (part.p_size >= 1:Int32) } batch_plan: | - BatchSimpleAgg { aggs: [sum(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))] } + BatchSimpleAgg { aggs: [sum(sum($expr41))] } └─BatchExchange { order: [], dist: Single } - └─BatchSimpleAgg { aggs: [sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─BatchProject { exprs: [(lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))] } + └─BatchSimpleAgg { aggs: [sum($expr41)] } + └─BatchProject { exprs: [(lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr41] } └─BatchLookupJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey AND (((((((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)) AND (lineitem.l_quantity >= 1:Int32)) AND (lineitem.l_quantity <= 11:Int32)) AND (part.p_size <= 5:Int32)) OR (((((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)) AND (lineitem.l_quantity >= 30:Int32)) AND (lineitem.l_quantity <= 40:Int32)) AND (part.p_size <= 10:Int32))) OR (((((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)) AND (lineitem.l_quantity >= 10:Int32)) AND (lineitem.l_quantity <= 20:Int32)) AND (part.p_size <= 15:Int32))) AND (part.p_size >= 1:Int32), output: [lineitem.l_extendedprice, lineitem.l_discount] } └─BatchExchange { order: [], dist: UpstreamHashShard(lineitem.l_partkey) } └─BatchProject { exprs: [lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount] } @@ -3216,11 +3216,11 @@ └─BatchScan { table: lineitem, columns: [lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipinstruct, lineitem.l_shipmode], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [revenue], pk_columns: [] } - └─StreamProject { exprs: [sum(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))] } - └─StreamGlobalSimpleAgg { aggs: [count, sum(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))] } + └─StreamProject { exprs: [sum(sum($expr65))] } + └─StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr65))] } └─StreamExchange { dist: Single } - └─StreamStatelessLocalSimpleAgg { aggs: [count, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - └─StreamProject { exprs: [(lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), lineitem.l_orderkey, lineitem.l_linenumber, part.p_partkey, lineitem.l_partkey] } + └─StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr65)] } + └─StreamProject { exprs: [(lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr65, lineitem.l_orderkey, lineitem.l_linenumber, part.p_partkey, lineitem.l_partkey] } └─StreamFilter { predicate: (((((((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)) AND (lineitem.l_quantity >= 1:Int32)) AND (lineitem.l_quantity <= 11:Int32)) AND (part.p_size <= 5:Int32)) OR (((((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)) AND (lineitem.l_quantity >= 30:Int32)) AND (lineitem.l_quantity <= 40:Int32)) AND (part.p_size <= 10:Int32))) OR (((((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)) AND (lineitem.l_quantity >= 10:Int32)) AND (lineitem.l_quantity <= 20:Int32)) AND (part.p_size <= 15:Int32))) } └─StreamHashJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey, output: all } ├─StreamExchange { dist: HashShard(lineitem.l_partkey) } @@ -3234,14 +3234,14 @@ Fragment 0 StreamMaterialize { columns: [revenue], pk_columns: [] } materialized table: 4294967294 - StreamProject { exprs: [sum(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))] } - StreamGlobalSimpleAgg { aggs: [count, sum(sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount))))] } + StreamProject { exprs: [sum(sum($expr65))] } + StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr65))] } result table: 0, state tables: [] StreamExchange Single from 1 Fragment 1 - StreamStatelessLocalSimpleAgg { aggs: [count, sum((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)))] } - StreamProject { exprs: [(lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), lineitem.l_orderkey, lineitem.l_linenumber, part.p_partkey, lineitem.l_partkey] } + StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr65)] } + StreamProject { exprs: [(lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr65, lineitem.l_orderkey, lineitem.l_linenumber, part.p_partkey, lineitem.l_partkey] } StreamFilter { predicate: (((((((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)) AND (lineitem.l_quantity >= 1:Int32)) AND (lineitem.l_quantity <= 11:Int32)) AND (part.p_size <= 5:Int32)) OR (((((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)) AND (lineitem.l_quantity >= 30:Int32)) AND (lineitem.l_quantity <= 40:Int32)) AND (part.p_size <= 10:Int32))) OR (((((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)) AND (lineitem.l_quantity >= 10:Int32)) AND (lineitem.l_quantity <= 20:Int32)) AND (part.p_size <= 15:Int32))) } StreamHashJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey, output: all } left table: 1, right table 3, left degree table: 2, right degree table: 4, @@ -3261,11 +3261,11 @@ Upstream BatchPlanNode - Table 0 { columns: [count, sum(sum((lineitem_l_extendedprice * (1:Int32 - lineitem_l_discount))))], primary key: [], value indices: [0, 1], distribution key: [] } + Table 0 { columns: [count, sum(sum($expr65))], primary key: [], value indices: [0, 1], distribution key: [] } Table 1 { columns: [lineitem_l_partkey, lineitem_l_quantity, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_orderkey, lineitem_l_linenumber], primary key: [$0 ASC, $4 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [0] } Table 2 { columns: [lineitem_l_partkey, lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } - Table 3 { columns: [part_p_partkey, part_p_brand, part_p_size, part_p_container], primary key: [$0 ASC, $0 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } - Table 4 { columns: [part_p_partkey, part_p_partkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 3 { columns: [part_p_partkey, part_p_brand, part_p_size, part_p_container], primary key: [$0 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } + Table 4 { columns: [part_p_partkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 4294967294 { columns: [revenue], primary key: [], value indices: [0], distribution key: [] } - id: tpch_q20 before: @@ -3316,14 +3316,14 @@ | ├─LogicalScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_acctbal, supplier.s_comment] } | └─LogicalScan { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey, nation.n_comment] } └─LogicalProject { exprs: [partsupp.ps_suppkey] } - └─LogicalFilter { predicate: (partsupp.ps_availqty > (0.5:Decimal * sum(lineitem.l_quantity))) } + └─LogicalFilter { predicate: (partsupp.ps_availqty > $expr1) } └─LogicalApply { type: LeftOuter, on: true, correlated_id: 3, max_one_row: true } ├─LogicalApply { type: LeftSemi, on: (partsupp.ps_partkey = part.p_partkey), correlated_id: 2 } | ├─LogicalScan { table: partsupp, columns: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty, partsupp.ps_supplycost, partsupp.ps_comment] } | └─LogicalProject { exprs: [part.p_partkey] } | └─LogicalFilter { predicate: Like(part.p_name, 'forest%':Varchar) } | └─LogicalScan { table: part, columns: [part.p_partkey, part.p_name, part.p_mfgr, part.p_brand, part.p_type, part.p_size, part.p_container, part.p_retailprice, part.p_comment] } - └─LogicalProject { exprs: [(0.5:Decimal * sum(lineitem.l_quantity))] } + └─LogicalProject { exprs: [(0.5:Decimal * sum(lineitem.l_quantity)) as $expr1] } └─LogicalAgg { aggs: [sum(lineitem.l_quantity)] } └─LogicalProject { exprs: [lineitem.l_quantity] } └─LogicalFilter { predicate: (lineitem.l_partkey = CorrelatedInputRef { index: 0, correlated_id: 3 }) AND (lineitem.l_suppkey = CorrelatedInputRef { index: 1, correlated_id: 3 }) AND (lineitem.l_shipdate >= '1994-01-01':Date) AND (lineitem.l_shipdate < ('1994-01-01':Date + '1 year':Interval)) } @@ -3333,12 +3333,12 @@ ├─LogicalJoin { type: Inner, on: (supplier.s_nationkey = nation.n_nationkey), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address] } | ├─LogicalScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey] } | └─LogicalScan { table: nation, output_columns: [nation.n_nationkey], required_columns: [nation.n_nationkey, nation.n_name], predicate: (nation.n_name = 'KENYA':Varchar) } - └─LogicalJoin { type: Inner, on: IsNotDistinctFrom(partsupp.ps_partkey, partsupp.ps_partkey) AND IsNotDistinctFrom(partsupp.ps_suppkey, partsupp.ps_suppkey) AND (partsupp.ps_availqty::Decimal > (0.5:Decimal * sum(lineitem.l_quantity))), output: [partsupp.ps_suppkey] } - ├─LogicalProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty::Decimal] } + └─LogicalJoin { type: Inner, on: IsNotDistinctFrom(partsupp.ps_partkey, partsupp.ps_partkey) AND IsNotDistinctFrom(partsupp.ps_suppkey, partsupp.ps_suppkey) AND ($expr29 > $expr30), output: [partsupp.ps_suppkey] } + ├─LogicalProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty::Decimal as $expr29] } | └─LogicalJoin { type: LeftSemi, on: (partsupp.ps_partkey = part.p_partkey), output: all } | ├─LogicalScan { table: partsupp, columns: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty] } | └─LogicalScan { table: part, output_columns: [part.p_partkey], required_columns: [part.p_partkey, part.p_name], predicate: Like(part.p_name, 'forest%':Varchar) } - └─LogicalProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, (0.5:Decimal * sum(lineitem.l_quantity))] } + └─LogicalProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, (0.5:Decimal * sum(lineitem.l_quantity)) as $expr30] } └─LogicalAgg { group_key: [partsupp.ps_partkey, partsupp.ps_suppkey], aggs: [sum(lineitem.l_quantity)] } └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(partsupp.ps_partkey, lineitem.l_partkey) AND IsNotDistinctFrom(partsupp.ps_suppkey, lineitem.l_suppkey), output: [partsupp.ps_partkey, partsupp.ps_suppkey, lineitem.l_quantity] } ├─LogicalAgg { group_key: [partsupp.ps_partkey, partsupp.ps_suppkey], aggs: [] } @@ -3354,14 +3354,14 @@ | └─BatchScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey], distribution: UpstreamHashShard(supplier.s_suppkey) } └─BatchExchange { order: [], dist: HashShard(partsupp.ps_suppkey) } └─BatchProject { exprs: [partsupp.ps_suppkey] } - └─BatchFilter { predicate: (partsupp.ps_availqty::Decimal > (0.5:Decimal * sum(lineitem.l_quantity))) } + └─BatchFilter { predicate: ($expr67 > $expr66) } └─BatchHashJoin { type: Inner, predicate: partsupp.ps_partkey IS NOT DISTINCT FROM partsupp.ps_partkey AND partsupp.ps_suppkey IS NOT DISTINCT FROM partsupp.ps_suppkey, output: all } ├─BatchExchange { order: [], dist: HashShard(partsupp.ps_partkey, partsupp.ps_suppkey) } - | └─BatchProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty::Decimal] } + | └─BatchProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty::Decimal as $expr67] } | └─BatchLookupJoin { type: LeftSemi, predicate: partsupp.ps_partkey = part.p_partkey AND Like(part.p_name, 'forest%':Varchar), output: all } | └─BatchExchange { order: [], dist: UpstreamHashShard(partsupp.ps_partkey) } | └─BatchScan { table: partsupp, columns: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty], distribution: UpstreamHashShard(partsupp.ps_partkey, partsupp.ps_suppkey) } - └─BatchProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, (0.5:Decimal * sum(lineitem.l_quantity))] } + └─BatchProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, (0.5:Decimal * sum(lineitem.l_quantity)) as $expr66] } └─BatchHashAgg { group_key: [partsupp.ps_partkey, partsupp.ps_suppkey], aggs: [sum(lineitem.l_quantity)] } └─BatchHashJoin { type: LeftOuter, predicate: partsupp.ps_partkey IS NOT DISTINCT FROM lineitem.l_partkey AND partsupp.ps_suppkey IS NOT DISTINCT FROM lineitem.l_suppkey, output: [partsupp.ps_partkey, partsupp.ps_suppkey, lineitem.l_quantity] } ├─BatchExchange { order: [], dist: HashShard(partsupp.ps_partkey, partsupp.ps_suppkey) } @@ -3384,10 +3384,10 @@ | └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_name], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } └─StreamExchange { dist: HashShard(partsupp.ps_suppkey) } └─StreamProject { exprs: [partsupp.ps_suppkey, partsupp.ps_partkey, partsupp.ps_partkey, partsupp.ps_suppkey] } - └─StreamFilter { predicate: (partsupp.ps_availqty::Decimal > (0.5:Decimal * sum(lineitem.l_quantity))) } + └─StreamFilter { predicate: ($expr105 > $expr104) } └─StreamHashJoin { type: Inner, predicate: partsupp.ps_partkey IS NOT DISTINCT FROM partsupp.ps_partkey AND partsupp.ps_suppkey IS NOT DISTINCT FROM partsupp.ps_suppkey, output: all } ├─StreamExchange { dist: HashShard(partsupp.ps_partkey, partsupp.ps_suppkey) } - | └─StreamProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty::Decimal] } + | └─StreamProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty::Decimal as $expr105] } | └─StreamHashJoin { type: LeftSemi, predicate: partsupp.ps_partkey = part.p_partkey, output: all } | ├─StreamExchange { dist: HashShard(partsupp.ps_partkey) } | | └─StreamTableScan { table: partsupp, columns: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty], pk: [partsupp.ps_partkey, partsupp.ps_suppkey], dist: UpstreamHashShard(partsupp.ps_partkey, partsupp.ps_suppkey) } @@ -3395,7 +3395,7 @@ | └─StreamProject { exprs: [part.p_partkey] } | └─StreamFilter { predicate: Like(part.p_name, 'forest%':Varchar) } | └─StreamTableScan { table: part, columns: [part.p_partkey, part.p_name], pk: [part.p_partkey], dist: UpstreamHashShard(part.p_partkey) } - └─StreamProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, (0.5:Decimal * sum(lineitem.l_quantity))] } + └─StreamProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, (0.5:Decimal * sum(lineitem.l_quantity)) as $expr104] } └─StreamHashAgg { group_key: [partsupp.ps_partkey, partsupp.ps_suppkey], aggs: [count, sum(lineitem.l_quantity)] } └─StreamHashJoin { type: LeftOuter, predicate: partsupp.ps_partkey IS NOT DISTINCT FROM lineitem.l_partkey AND partsupp.ps_suppkey IS NOT DISTINCT FROM lineitem.l_suppkey, output: [partsupp.ps_partkey, partsupp.ps_suppkey, lineitem.l_quantity, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, lineitem.l_suppkey] } ├─StreamExchange { dist: HashShard(partsupp.ps_partkey, partsupp.ps_suppkey) } @@ -3435,11 +3435,11 @@ Fragment 4 StreamProject { exprs: [partsupp.ps_suppkey, partsupp.ps_partkey, partsupp.ps_partkey, partsupp.ps_suppkey] } - StreamFilter { predicate: (partsupp.ps_availqty::Decimal > (0.5:Decimal * sum(lineitem.l_quantity))) } + StreamFilter { predicate: ($expr105 > $expr104) } StreamHashJoin { type: Inner, predicate: partsupp.ps_partkey IS NOT DISTINCT FROM partsupp.ps_partkey AND partsupp.ps_suppkey IS NOT DISTINCT FROM partsupp.ps_suppkey, output: all } left table: 8, right table 10, left degree table: 9, right degree table: 11, StreamExchange Hash([0, 1]) from 5 - StreamProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, (0.5:Decimal * sum(lineitem.l_quantity))] } + StreamProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, (0.5:Decimal * sum(lineitem.l_quantity)) as $expr104] } StreamHashAgg { group_key: [partsupp.ps_partkey, partsupp.ps_suppkey], aggs: [count, sum(lineitem.l_quantity)] } result table: 16, state tables: [] StreamHashJoin { type: LeftOuter, predicate: partsupp.ps_partkey IS NOT DISTINCT FROM lineitem.l_partkey AND partsupp.ps_suppkey IS NOT DISTINCT FROM lineitem.l_suppkey, output: [partsupp.ps_partkey, partsupp.ps_suppkey, lineitem.l_quantity, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, lineitem.l_suppkey] } @@ -3448,7 +3448,7 @@ StreamExchange Hash([0, 1]) from 9 Fragment 5 - StreamProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty::Decimal] } + StreamProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty::Decimal as $expr105] } StreamHashJoin { type: LeftSemi, predicate: partsupp.ps_partkey = part.p_partkey, output: all } left table: 12, right table 14, left degree table: 13, right degree table: 15, StreamExchange Hash([0]) from 6 @@ -3481,25 +3481,25 @@ Upstream BatchPlanNode - Table 0 { columns: [supplier_s_suppkey, supplier_s_name, supplier_s_address, supplier_s_nationkey, nation_n_nationkey], primary key: [$0 ASC, $0 ASC, $4 ASC, $3 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0] } - Table 1 { columns: [supplier_s_suppkey, supplier_s_suppkey_0, nation_n_nationkey, supplier_s_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } - Table 2 { columns: [partsupp_ps_suppkey, partsupp_ps_partkey, partsupp_ps_partkey_0, partsupp_ps_suppkey_0], primary key: [$0 ASC, $1 ASC, $0 ASC, $2 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } - Table 3 { columns: [partsupp_ps_suppkey, partsupp_ps_partkey, partsupp_ps_suppkey_0, partsupp_ps_partkey_0, partsupp_ps_suppkey_1, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC], value indices: [5], distribution key: [0] } + Table 0 { columns: [supplier_s_suppkey, supplier_s_name, supplier_s_address, supplier_s_nationkey, nation_n_nationkey], primary key: [$0 ASC, $4 ASC, $3 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0] } + Table 1 { columns: [supplier_s_suppkey, nation_n_nationkey, supplier_s_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } + Table 2 { columns: [partsupp_ps_suppkey, partsupp_ps_partkey, partsupp_ps_partkey_0, partsupp_ps_suppkey_0], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } + Table 3 { columns: [partsupp_ps_suppkey, partsupp_ps_partkey, partsupp_ps_partkey_0, partsupp_ps_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } Table 4 { columns: [supplier_s_suppkey, supplier_s_name, supplier_s_address, supplier_s_nationkey], primary key: [$3 ASC, $0 ASC], value indices: [0, 1, 2, 3], distribution key: [3] } Table 5 { columns: [supplier_s_nationkey, supplier_s_suppkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 6 { columns: [nation_n_nationkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 7 { columns: [nation_n_nationkey, nation_n_nationkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 8 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, partsupp_ps_availqty::Decimal], primary key: [$0 ASC, $1 ASC, $0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0, 1] } - Table 9 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, partsupp_ps_partkey_0, partsupp_ps_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0, 1] } - Table 10 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, (0_5:Decimal * sum(lineitem_l_quantity))], primary key: [$0 ASC, $1 ASC, $0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0, 1] } - Table 11 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, partsupp_ps_partkey_0, partsupp_ps_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0, 1] } - Table 12 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, partsupp_ps_availqty], primary key: [$0 ASC, $0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 13 { columns: [partsupp_ps_partkey, partsupp_ps_partkey_0, partsupp_ps_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } - Table 14 { columns: [part_p_partkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 15 { columns: [part_p_partkey, part_p_partkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 6 { columns: [nation_n_nationkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 7 { columns: [nation_n_nationkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 8 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, $expr105], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0, 1] } + Table 9 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0, 1] } + Table 10 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, $expr104], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0, 1] } + Table 11 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0, 1] } + Table 12 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, partsupp_ps_availqty], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 13 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 14 { columns: [part_p_partkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 15 { columns: [part_p_partkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 16 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, count, sum(lineitem_l_quantity)], primary key: [$0 ASC, $1 ASC], value indices: [2, 3], distribution key: [0, 1] } - Table 17 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey], primary key: [$0 ASC, $1 ASC, $0 ASC, $1 ASC], value indices: [0, 1], distribution key: [0, 1] } - Table 18 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, partsupp_ps_partkey_0, partsupp_ps_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0, 1] } + Table 17 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey], primary key: [$0 ASC, $1 ASC], value indices: [0, 1], distribution key: [0, 1] } + Table 18 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0, 1] } Table 19 { columns: [lineitem_l_partkey, lineitem_l_suppkey, lineitem_l_quantity, lineitem_l_orderkey, lineitem_l_linenumber], primary key: [$0 ASC, $1 ASC, $3 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0, 1] } Table 20 { columns: [lineitem_l_partkey, lineitem_l_suppkey, lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0, 1] } Table 21 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, count], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0, 1] } @@ -3615,7 +3615,7 @@ └─StreamTopN { order: "[count DESC, supplier.s_name ASC]", limit: 100, offset: 0 } └─StreamExchange { dist: Single } └─StreamGroupTopN { order: "[count DESC, supplier.s_name ASC]", limit: 100, offset: 0, group_key: [2] } - └─StreamProject { exprs: [supplier.s_name, count, Vnode(supplier.s_name)] } + └─StreamProject { exprs: [supplier.s_name, count, Vnode(supplier.s_name) as $expr1] } └─StreamProject { exprs: [supplier.s_name, count] } └─StreamHashAgg { group_key: [supplier.s_name], aggs: [count, count] } └─StreamExchange { dist: HashShard(supplier.s_name) } @@ -3658,7 +3658,7 @@ Fragment 1 StreamGroupTopN { order: "[count DESC, supplier.s_name ASC]", limit: 100, offset: 0, group_key: [2] } state table: 1 - StreamProject { exprs: [supplier.s_name, count, Vnode(supplier.s_name)] } + StreamProject { exprs: [supplier.s_name, count, Vnode(supplier.s_name) as $expr1] } StreamProject { exprs: [supplier.s_name, count] } StreamHashAgg { group_key: [supplier.s_name], aggs: [count, count] } result table: 2, state tables: [] @@ -3726,27 +3726,27 @@ Upstream BatchPlanNode - Table 0 { columns: [supplier_s_name, count, Vnode(supplier_s_name)], primary key: [$1 DESC, $0 ASC], value indices: [0, 1, 2], distribution key: [] } - Table 1 { columns: [supplier_s_name, count, Vnode(supplier_s_name)], primary key: [$2 ASC, $1 DESC, $0 ASC], value indices: [0, 1, 2], distribution key: [0], vnode column idx: 2 } + Table 0 { columns: [supplier_s_name, count, $expr1], primary key: [$1 DESC, $0 ASC], value indices: [0, 1, 2], distribution key: [] } + Table 1 { columns: [supplier_s_name, count, $expr1], primary key: [$2 ASC, $1 DESC, $0 ASC], value indices: [0, 1, 2], distribution key: [0], vnode column idx: 2 } Table 2 { columns: [supplier_s_name, count, count_0], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } - Table 3 { columns: [supplier_s_name, lineitem_l_orderkey, lineitem_l_suppkey, supplier_s_suppkey, lineitem_l_linenumber, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey], primary key: [$1 ASC, $3 ASC, $1 ASC, $4 ASC, $2 ASC, $5 ASC, $6 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7], distribution key: [1] } - Table 4 { columns: [lineitem_l_orderkey, supplier_s_suppkey, lineitem_l_orderkey_0, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC], value indices: [8], distribution key: [0] } - Table 5 { columns: [lineitem_l_orderkey, lineitem_l_suppkey, lineitem_l_linenumber], primary key: [$0 ASC, $0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 6 { columns: [lineitem_l_orderkey, lineitem_l_orderkey_0, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } - Table 7 { columns: [supplier_s_name, lineitem_l_orderkey, lineitem_l_suppkey, supplier_s_suppkey, lineitem_l_linenumber, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey], primary key: [$1 ASC, $3 ASC, $1 ASC, $4 ASC, $2 ASC, $5 ASC, $6 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7], distribution key: [1] } - Table 8 { columns: [lineitem_l_orderkey, supplier_s_suppkey, lineitem_l_orderkey_0, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC], value indices: [8], distribution key: [0] } - Table 9 { columns: [lineitem_l_orderkey, lineitem_l_suppkey, lineitem_l_linenumber], primary key: [$0 ASC, $0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 10 { columns: [lineitem_l_orderkey, lineitem_l_orderkey_0, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } - Table 11 { columns: [supplier_s_name, lineitem_l_orderkey, lineitem_l_suppkey, supplier_s_suppkey, lineitem_l_linenumber, supplier_s_nationkey, nation_n_nationkey], primary key: [$1 ASC, $3 ASC, $1 ASC, $4 ASC, $2 ASC, $6 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [1] } - Table 12 { columns: [lineitem_l_orderkey, supplier_s_suppkey, lineitem_l_orderkey_0, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [7], distribution key: [0] } - Table 13 { columns: [orders_o_orderkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 14 { columns: [orders_o_orderkey, orders_o_orderkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 3 { columns: [supplier_s_name, lineitem_l_orderkey, lineitem_l_suppkey, supplier_s_suppkey, lineitem_l_linenumber, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey], primary key: [$1 ASC, $3 ASC, $4 ASC, $2 ASC, $5 ASC, $6 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7], distribution key: [1] } + Table 4 { columns: [lineitem_l_orderkey, supplier_s_suppkey, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [7], distribution key: [0] } + Table 5 { columns: [lineitem_l_orderkey, lineitem_l_suppkey, lineitem_l_linenumber], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 6 { columns: [lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 7 { columns: [supplier_s_name, lineitem_l_orderkey, lineitem_l_suppkey, supplier_s_suppkey, lineitem_l_linenumber, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey], primary key: [$1 ASC, $3 ASC, $4 ASC, $2 ASC, $5 ASC, $6 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7], distribution key: [1] } + Table 8 { columns: [lineitem_l_orderkey, supplier_s_suppkey, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [7], distribution key: [0] } + Table 9 { columns: [lineitem_l_orderkey, lineitem_l_suppkey, lineitem_l_linenumber], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 10 { columns: [lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 11 { columns: [supplier_s_name, lineitem_l_orderkey, lineitem_l_suppkey, supplier_s_suppkey, lineitem_l_linenumber, supplier_s_nationkey, nation_n_nationkey], primary key: [$1 ASC, $3 ASC, $4 ASC, $2 ASC, $6 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [1] } + Table 12 { columns: [lineitem_l_orderkey, supplier_s_suppkey, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC], value indices: [6], distribution key: [0] } + Table 13 { columns: [orders_o_orderkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 14 { columns: [orders_o_orderkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 15 { columns: [supplier_s_name, supplier_s_nationkey, lineitem_l_orderkey, lineitem_l_suppkey, supplier_s_suppkey, lineitem_l_linenumber], primary key: [$1 ASC, $4 ASC, $2 ASC, $5 ASC, $3 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [1] } Table 16 { columns: [supplier_s_nationkey, supplier_s_suppkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC], value indices: [5], distribution key: [0] } - Table 17 { columns: [nation_n_nationkey], primary key: [$0 ASC, $0 ASC], value indices: [0], distribution key: [0] } - Table 18 { columns: [nation_n_nationkey, nation_n_nationkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 19 { columns: [supplier_s_suppkey, supplier_s_name, supplier_s_nationkey], primary key: [$0 ASC, $0 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 20 { columns: [supplier_s_suppkey, supplier_s_suppkey_0, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 17 { columns: [nation_n_nationkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 18 { columns: [nation_n_nationkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 19 { columns: [supplier_s_suppkey, supplier_s_name, supplier_s_nationkey], primary key: [$0 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 20 { columns: [supplier_s_suppkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 21 { columns: [lineitem_l_orderkey, lineitem_l_suppkey, lineitem_l_linenumber], primary key: [$1 ASC, $0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [1] } Table 22 { columns: [lineitem_l_suppkey, lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } Table 4294967294 { columns: [s_name, numwait], primary key: [$1 DESC, $0 ASC], value indices: [0, 1], distribution key: [] } @@ -3792,39 +3792,39 @@ order by cntrycode; logical_plan: | - LogicalProject { exprs: [Substr(customer.c_phone, 1:Int32, 2:Int32), count, sum(customer.c_acctbal)] } - └─LogicalAgg { group_key: [Substr(customer.c_phone, 1:Int32, 2:Int32)], aggs: [count, sum(customer.c_acctbal)] } - └─LogicalProject { exprs: [Substr(customer.c_phone, 1:Int32, 2:Int32), customer.c_acctbal] } - └─LogicalProject { exprs: [Substr(customer.c_phone, 1:Int32, 2:Int32), customer.c_acctbal] } - └─LogicalFilter { predicate: In(Substr(customer.c_phone, 1:Int32, 2:Int32), '30':Varchar, '24':Varchar, '31':Varchar, '38':Varchar, '25':Varchar, '34':Varchar, '37':Varchar) AND (customer.c_acctbal > (sum(customer.c_acctbal) / count(customer.c_acctbal))) } + LogicalProject { exprs: [$expr2, count, sum(customer.c_acctbal)] } + └─LogicalAgg { group_key: [$expr2], aggs: [count, sum(customer.c_acctbal)] } + └─LogicalProject { exprs: [$expr2, customer.c_acctbal] } + └─LogicalProject { exprs: [Substr(customer.c_phone, 1:Int32, 2:Int32) as $expr2, customer.c_acctbal] } + └─LogicalFilter { predicate: In(Substr(customer.c_phone, 1:Int32, 2:Int32), '30':Varchar, '24':Varchar, '31':Varchar, '38':Varchar, '25':Varchar, '34':Varchar, '37':Varchar) AND (customer.c_acctbal > $expr1) } └─LogicalApply { type: LeftOuter, on: true, correlated_id: 2, max_one_row: true } ├─LogicalApply { type: LeftAnti, on: true, correlated_id: 1 } | ├─LogicalScan { table: customer, columns: [customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_mktsegment, customer.c_comment] } | └─LogicalProject { exprs: [orders.o_orderkey, orders.o_custkey, orders.o_orderstatus, orders.o_totalprice, orders.o_orderdate, orders.o_orderpriority, orders.o_clerk, orders.o_shippriority, orders.o_comment] } | └─LogicalFilter { predicate: (orders.o_custkey = CorrelatedInputRef { index: 0, correlated_id: 1 }) } | └─LogicalScan { table: orders, columns: [orders.o_orderkey, orders.o_custkey, orders.o_orderstatus, orders.o_totalprice, orders.o_orderdate, orders.o_orderpriority, orders.o_clerk, orders.o_shippriority, orders.o_comment] } - └─LogicalProject { exprs: [(sum(customer.c_acctbal) / count(customer.c_acctbal))] } + └─LogicalProject { exprs: [(sum(customer.c_acctbal) / count(customer.c_acctbal)) as $expr1] } └─LogicalAgg { aggs: [sum(customer.c_acctbal), count(customer.c_acctbal)] } └─LogicalProject { exprs: [customer.c_acctbal] } └─LogicalFilter { predicate: (customer.c_acctbal > 0.00:Decimal) AND In(Substr(customer.c_phone, 1:Int32, 2:Int32), '30':Varchar, '24':Varchar, '31':Varchar, '38':Varchar, '25':Varchar, '34':Varchar, '37':Varchar) } └─LogicalScan { table: customer, columns: [customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_mktsegment, customer.c_comment] } optimized_logical_plan: | - LogicalAgg { group_key: [Substr(customer.c_phone, 1:Int32, 2:Int32)], aggs: [count, sum(customer.c_acctbal)] } - └─LogicalProject { exprs: [Substr(customer.c_phone, 1:Int32, 2:Int32), customer.c_acctbal] } - └─LogicalJoin { type: Inner, on: (customer.c_acctbal > (sum(customer.c_acctbal) / count(customer.c_acctbal))), output: [customer.c_phone, customer.c_acctbal] } + LogicalAgg { group_key: [$expr39], aggs: [count, sum(customer.c_acctbal)] } + └─LogicalProject { exprs: [Substr(customer.c_phone, 1:Int32, 2:Int32) as $expr39, customer.c_acctbal] } + └─LogicalJoin { type: Inner, on: (customer.c_acctbal > $expr38), output: [customer.c_phone, customer.c_acctbal] } ├─LogicalJoin { type: LeftAnti, on: (orders.o_custkey = customer.c_custkey), output: [customer.c_phone, customer.c_acctbal] } | ├─LogicalScan { table: customer, columns: [customer.c_custkey, customer.c_phone, customer.c_acctbal], predicate: In(Substr(customer.c_phone, 1:Int32, 2:Int32), '30':Varchar, '24':Varchar, '31':Varchar, '38':Varchar, '25':Varchar, '34':Varchar, '37':Varchar) } | └─LogicalScan { table: orders, columns: [orders.o_custkey] } - └─LogicalProject { exprs: [(sum(customer.c_acctbal) / count(customer.c_acctbal))] } + └─LogicalProject { exprs: [(sum(customer.c_acctbal) / count(customer.c_acctbal)) as $expr38] } └─LogicalAgg { aggs: [sum(customer.c_acctbal), count(customer.c_acctbal)] } └─LogicalScan { table: customer, output_columns: [customer.c_acctbal], required_columns: [customer.c_acctbal, customer.c_phone], predicate: (customer.c_acctbal > 0.00:Decimal) AND In(Substr(customer.c_phone, 1:Int32, 2:Int32), '30':Varchar, '24':Varchar, '31':Varchar, '38':Varchar, '25':Varchar, '34':Varchar, '37':Varchar) } batch_plan: | - BatchExchange { order: [Substr(customer.c_phone, 1:Int32, 2:Int32) ASC], dist: Single } - └─BatchSort { order: [Substr(customer.c_phone, 1:Int32, 2:Int32) ASC] } - └─BatchHashAgg { group_key: [Substr(customer.c_phone, 1:Int32, 2:Int32)], aggs: [count, sum(customer.c_acctbal)] } - └─BatchExchange { order: [], dist: HashShard(Substr(customer.c_phone, 1:Int32, 2:Int32)) } - └─BatchProject { exprs: [Substr(customer.c_phone, 1:Int32, 2:Int32), customer.c_acctbal] } - └─BatchNestedLoopJoin { type: Inner, predicate: (customer.c_acctbal > (sum(sum(customer.c_acctbal)) / sum0(count(customer.c_acctbal)))), output: [customer.c_phone, customer.c_acctbal] } + BatchExchange { order: [$expr84 ASC], dist: Single } + └─BatchSort { order: [$expr84 ASC] } + └─BatchHashAgg { group_key: [$expr84], aggs: [count, sum(customer.c_acctbal)] } + └─BatchExchange { order: [], dist: HashShard($expr84) } + └─BatchProject { exprs: [Substr(customer.c_phone, 1:Int32, 2:Int32) as $expr84, customer.c_acctbal] } + └─BatchNestedLoopJoin { type: Inner, predicate: (customer.c_acctbal > $expr83), output: [customer.c_phone, customer.c_acctbal] } ├─BatchExchange { order: [], dist: Single } | └─BatchHashJoin { type: LeftAnti, predicate: customer.c_custkey = orders.o_custkey, output: [customer.c_phone, customer.c_acctbal] } | ├─BatchExchange { order: [], dist: HashShard(customer.c_custkey) } @@ -3832,7 +3832,7 @@ | | └─BatchScan { table: customer, columns: [customer.c_custkey, customer.c_phone, customer.c_acctbal], distribution: UpstreamHashShard(customer.c_custkey) } | └─BatchExchange { order: [], dist: HashShard(orders.o_custkey) } | └─BatchScan { table: orders, columns: [orders.o_custkey], distribution: SomeShard } - └─BatchProject { exprs: [(sum(sum(customer.c_acctbal)) / sum0(count(customer.c_acctbal)))] } + └─BatchProject { exprs: [(sum(sum(customer.c_acctbal)) / sum0(count(customer.c_acctbal))) as $expr83] } └─BatchSimpleAgg { aggs: [sum(sum(customer.c_acctbal)), sum0(count(customer.c_acctbal))] } └─BatchExchange { order: [], dist: Single } └─BatchSimpleAgg { aggs: [sum(customer.c_acctbal), count(customer.c_acctbal)] } @@ -3841,11 +3841,11 @@ └─BatchScan { table: customer, columns: [customer.c_acctbal, customer.c_phone], distribution: SomeShard } stream_plan: | StreamMaterialize { columns: [cntrycode, numcust, totacctbal], pk_columns: [cntrycode] } - └─StreamProject { exprs: [Substr(customer.c_phone, 1:Int32, 2:Int32), count, sum(customer.c_acctbal)] } - └─StreamHashAgg { group_key: [Substr(customer.c_phone, 1:Int32, 2:Int32)], aggs: [count, count, sum(customer.c_acctbal)] } - └─StreamExchange { dist: HashShard(Substr(customer.c_phone, 1:Int32, 2:Int32)) } - └─StreamProject { exprs: [Substr(customer.c_phone, 1:Int32, 2:Int32), customer.c_acctbal, customer.c_custkey] } - └─StreamDynamicFilter { predicate: (customer.c_acctbal > (sum(sum(customer.c_acctbal)) / sum0(count(customer.c_acctbal)))), output: [customer.c_phone, customer.c_acctbal, customer.c_custkey] } + └─StreamProject { exprs: [$expr132, count, sum(customer.c_acctbal)] } + └─StreamHashAgg { group_key: [$expr132], aggs: [count, count, sum(customer.c_acctbal)] } + └─StreamExchange { dist: HashShard($expr132) } + └─StreamProject { exprs: [Substr(customer.c_phone, 1:Int32, 2:Int32) as $expr132, customer.c_acctbal, customer.c_custkey] } + └─StreamDynamicFilter { predicate: (customer.c_acctbal > $expr131), output: [customer.c_phone, customer.c_acctbal, customer.c_custkey] } ├─StreamHashJoin { type: LeftAnti, predicate: customer.c_custkey = orders.o_custkey, output: [customer.c_phone, customer.c_acctbal, customer.c_custkey] } | ├─StreamExchange { dist: HashShard(customer.c_custkey) } | | └─StreamFilter { predicate: In(Substr(customer.c_phone, 1:Int32, 2:Int32), '30':Varchar, '24':Varchar, '31':Varchar, '38':Varchar, '25':Varchar, '34':Varchar, '37':Varchar) } @@ -3853,7 +3853,7 @@ | └─StreamExchange { dist: HashShard(orders.o_custkey) } | └─StreamTableScan { table: orders, columns: [orders.o_custkey, orders.o_orderkey], pk: [orders.o_orderkey], dist: UpstreamHashShard(orders.o_orderkey) } └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [(sum(sum(customer.c_acctbal)) / sum0(count(customer.c_acctbal)))] } + └─StreamProject { exprs: [(sum(sum(customer.c_acctbal)) / sum0(count(customer.c_acctbal))) as $expr131] } └─StreamGlobalSimpleAgg { aggs: [count, sum(sum(customer.c_acctbal)), sum0(count(customer.c_acctbal))] } └─StreamExchange { dist: Single } └─StreamStatelessLocalSimpleAgg { aggs: [count, sum(customer.c_acctbal), count(customer.c_acctbal)] } diff --git a/src/frontend/planner_test/tests/testdata/union.yaml b/src/frontend/planner_test/tests/testdata/union.yaml index 7053540b81b0f..1b221281526bd 100644 --- a/src/frontend/planner_test/tests/testdata/union.yaml +++ b/src/frontend/planner_test/tests/testdata/union.yaml @@ -207,69 +207,45 @@ select 1 union all select 1 optimized_logical_plan: | LogicalUnion { all: true } - ├─LogicalProject { exprs: [1:Int32] } - | └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - └─LogicalProject { exprs: [1:Int32] } - └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } + ├─LogicalValues { rows: [[1:Int32]], schema: Schema { fields: [1:Int32:Int32] } } + └─LogicalValues { rows: [[1:Int32]], schema: Schema { fields: [1:Int32:Int32] } } batch_plan: | BatchUnion { all: true } - ├─BatchProject { exprs: [1:Int32] } - | └─BatchValues { rows: [[]] } - └─BatchProject { exprs: [1:Int32] } - └─BatchValues { rows: [[]] } + ├─BatchValues { rows: [[1:Int32]] } + └─BatchValues { rows: [[1:Int32]] } - sql: | select 1 union all select 2 union all select 3 union all select 4 union all select 5 optimized_logical_plan: | LogicalUnion { all: true } - ├─LogicalProject { exprs: [1:Int32] } - | └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - ├─LogicalProject { exprs: [2:Int32] } - | └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - ├─LogicalProject { exprs: [3:Int32] } - | └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - ├─LogicalProject { exprs: [4:Int32] } - | └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - └─LogicalProject { exprs: [5:Int32] } - └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } + ├─LogicalValues { rows: [[1:Int32]], schema: Schema { fields: [1:Int32:Int32] } } + ├─LogicalValues { rows: [[2:Int32]], schema: Schema { fields: [2:Int32:Int32] } } + ├─LogicalValues { rows: [[3:Int32]], schema: Schema { fields: [3:Int32:Int32] } } + ├─LogicalValues { rows: [[4:Int32]], schema: Schema { fields: [4:Int32:Int32] } } + └─LogicalValues { rows: [[5:Int32]], schema: Schema { fields: [5:Int32:Int32] } } batch_plan: | BatchUnion { all: true } - ├─BatchProject { exprs: [1:Int32] } - | └─BatchValues { rows: [[]] } - ├─BatchProject { exprs: [2:Int32] } - | └─BatchValues { rows: [[]] } - ├─BatchProject { exprs: [3:Int32] } - | └─BatchValues { rows: [[]] } - ├─BatchProject { exprs: [4:Int32] } - | └─BatchValues { rows: [[]] } - └─BatchProject { exprs: [5:Int32] } - └─BatchValues { rows: [[]] } + ├─BatchValues { rows: [[1:Int32]] } + ├─BatchValues { rows: [[2:Int32]] } + ├─BatchValues { rows: [[3:Int32]] } + ├─BatchValues { rows: [[4:Int32]] } + └─BatchValues { rows: [[5:Int32]] } - sql: | select 1 union select 2 union select 3 union select 4 union select 5 optimized_logical_plan: | LogicalAgg { group_key: [1:Int32], aggs: [] } └─LogicalUnion { all: true } - ├─LogicalProject { exprs: [1:Int32] } - | └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - ├─LogicalProject { exprs: [2:Int32] } - | └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - ├─LogicalProject { exprs: [3:Int32] } - | └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - ├─LogicalProject { exprs: [4:Int32] } - | └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - └─LogicalProject { exprs: [5:Int32] } - └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } + ├─LogicalValues { rows: [[1:Int32]], schema: Schema { fields: [1:Int32:Int32] } } + ├─LogicalValues { rows: [[2:Int32]], schema: Schema { fields: [2:Int32:Int32] } } + ├─LogicalValues { rows: [[3:Int32]], schema: Schema { fields: [3:Int32:Int32] } } + ├─LogicalValues { rows: [[4:Int32]], schema: Schema { fields: [4:Int32:Int32] } } + └─LogicalValues { rows: [[5:Int32]], schema: Schema { fields: [5:Int32:Int32] } } batch_plan: | BatchExchange { order: [], dist: Single } └─BatchHashAgg { group_key: [1:Int32], aggs: [] } └─BatchExchange { order: [], dist: HashShard(1:Int32) } └─BatchUnion { all: true } - ├─BatchProject { exprs: [1:Int32] } - | └─BatchValues { rows: [[]] } - ├─BatchProject { exprs: [2:Int32] } - | └─BatchValues { rows: [[]] } - ├─BatchProject { exprs: [3:Int32] } - | └─BatchValues { rows: [[]] } - ├─BatchProject { exprs: [4:Int32] } - | └─BatchValues { rows: [[]] } - └─BatchProject { exprs: [5:Int32] } - └─BatchValues { rows: [[]] } + ├─BatchValues { rows: [[1:Int32]] } + ├─BatchValues { rows: [[2:Int32]] } + ├─BatchValues { rows: [[3:Int32]] } + ├─BatchValues { rows: [[4:Int32]] } + └─BatchValues { rows: [[5:Int32]] } diff --git a/src/frontend/planner_test/tests/testdata/update.yaml b/src/frontend/planner_test/tests/testdata/update.yaml index 4cdd8121c749d..17c3d7f54e329 100644 --- a/src/frontend/planner_test/tests/testdata/update.yaml +++ b/src/frontend/planner_test/tests/testdata/update.yaml @@ -49,13 +49,13 @@ create table t (v1 int, v2 int); update t set (v1, v2) = (v2 + 1, v1 - 1) where v1 != v2 returning *, v2+1, v1-1; logical_plan: | - LogicalProject { exprs: [t.v1, t.v2, (t.v2 + 1:Int32), (t.v1 - 1:Int32)] } + LogicalProject { exprs: [t.v1, t.v2, (t.v2 + 1:Int32) as $expr1, (t.v1 - 1:Int32) as $expr2] } └─LogicalUpdate { table: t, exprs: [($1 + 1:Int32), ($0 - 1:Int32), $2], returning: true } └─LogicalFilter { predicate: (t.v1 <> t.v2) } └─LogicalScan { table: t, columns: [t.v1, t.v2, t._row_id] } batch_plan: | BatchExchange { order: [], dist: Single } - └─BatchProject { exprs: [t.v1, t.v2, (t.v2 + 1:Int32), (t.v1 - 1:Int32)] } + └─BatchProject { exprs: [t.v1, t.v2, (t.v2 + 1:Int32) as $expr45, (t.v1 - 1:Int32) as $expr46] } └─BatchUpdate { table: t, exprs: [($1 + 1:Int32), ($0 - 1:Int32), $2], returning: true } └─BatchExchange { order: [], dist: Single } └─BatchFilter { predicate: (t.v1 <> t.v2) } diff --git a/src/frontend/planner_test/tests/testdata/watermark.yaml b/src/frontend/planner_test/tests/testdata/watermark.yaml new file mode 100644 index 0000000000000..a0237485407cc --- /dev/null +++ b/src/frontend/planner_test/tests/testdata/watermark.yaml @@ -0,0 +1,15 @@ +# This file is automatically generated. See `src/frontend/planner_test/README.md` for more information. +- name: watermark on source + sql: | + create source t (v1 timestamp with time zone, watermark for v1 as v1 - INTERVAL '1' SECOND) ROW FORMAT JSON; + select t.v1 - INTERVAL '2' SECOND as v1 from t; + logical_plan: | + LogicalProject { exprs: [(v1 - '00:00:02':Interval) as $expr1] } + └─LogicalSource { source: t, columns: [v1, _row_id], time_range: [(Unbounded, Unbounded)] } + stream_plan: | + StreamMaterialize { columns: [v1, _row_id(hidden)], pk_columns: [_row_id] } + └─StreamExchange { dist: HashShard(_row_id) } + └─StreamProject { exprs: [(v1 - '00:00:02':Interval) as $expr25, _row_id], watermark_columns: [(v1 - '00:00:02':Interval)] } + └─StreamRowIdGen { row_id_index: 1 } + └─StreamWatermarkFilter { watermark_descs: [idx: 0, expr: (v1 - '00:00:01':Interval)] } + └─StreamSource { source: "t", columns: ["v1", "_row_id"] } diff --git a/src/frontend/src/binder/bind_context.rs b/src/frontend/src/binder/bind_context.rs index 355723b9e1289..838012c8ecac0 100644 --- a/src/frontend/src/binder/bind_context.rs +++ b/src/frontend/src/binder/bind_context.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/binder/create.rs b/src/frontend/src/binder/create.rs new file mode 100644 index 0000000000000..e23d2746e8f32 --- /dev/null +++ b/src/frontend/src/binder/create.rs @@ -0,0 +1,43 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use itertools::Itertools; +use risingwave_common::catalog::{ColumnCatalog, Field}; +use risingwave_common::error::Result; + +use crate::Binder; + +impl Binder { + pub fn bind_columns_to_context( + &mut self, + name: String, + column_catalogs: Vec, + ) -> Result<()> { + let columns = column_catalogs + .iter() + .map(|c| (c.is_hidden, Field::from(&c.column_desc))) + .collect_vec(); + self.bind_table_to_context(columns, name, None) + } + + pub fn get_column_binding_index( + &mut self, + table_name: String, + column_name: &String, + ) -> Result { + Ok(self + .context + .get_column_binding_index(&Some(table_name), column_name)?) + } +} diff --git a/src/frontend/src/binder/delete.rs b/src/frontend/src/binder/delete.rs index caded12e99fe0..d6811e5ff5115 100644 --- a/src/frontend/src/binder/delete.rs +++ b/src/frontend/src/binder/delete.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/binder/expr/binary_op.rs b/src/frontend/src/binder/expr/binary_op.rs index cba047d672eee..460381a1f1449 100644 --- a/src/frontend/src/binder/expr/binary_op.rs +++ b/src/frontend/src/binder/expr/binary_op.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/binder/expr/column.rs b/src/frontend/src/binder/expr/column.rs index 989e3f55c2721..f703bb4493247 100644 --- a/src/frontend/src/binder/expr/column.rs +++ b/src/frontend/src/binder/expr/column.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/binder/expr/function.rs b/src/frontend/src/binder/expr/function.rs index 90f9f2662573b..5a3a1ba48af60 100644 --- a/src/frontend/src/binder/expr/function.rs +++ b/src/frontend/src/binder/expr/function.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,16 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use std::iter::once; use std::str::FromStr; +use std::sync::LazyLock; +use bk_tree::{metrics, BKTree}; use itertools::Itertools; use risingwave_common::array::ListValue; use risingwave_common::catalog::PG_CATALOG_SCHEMA_NAME; use risingwave_common::error::{ErrorCode, Result}; use risingwave_common::session_config::USER_NAME_WILD_CARD; use risingwave_common::types::{DataType, ScalarImpl}; -use risingwave_common::RW_VERSION; +use risingwave_common::{GIT_SHA, RW_VERSION}; use risingwave_expr::expr::AggKind; use risingwave_sqlparser::ast::{Function, FunctionArg, FunctionArgExpr, WindowSpec}; @@ -114,255 +117,7 @@ impl Binder { return Ok(UserDefinedFunction::new(func.clone(), inputs).into()); } - // normal function - let mut inputs = inputs; - let function_type = match function_name.as_str() { - // comparison - "booleq" => { - inputs = Self::rewrite_two_bool_inputs(inputs)?; - ExprType::Equal - } - "boolne" => { - inputs = Self::rewrite_two_bool_inputs(inputs)?; - ExprType::NotEqual - } - // conditional - "coalesce" => ExprType::Coalesce, - "nullif" => { - inputs = Self::rewrite_nullif_to_case_when(inputs)?; - ExprType::Case - } - // mathematical - "round" => { - if inputs.len() >= 2 { - ExprType::RoundDigit - } else { - ExprType::Round - } - } - "ceil" => ExprType::Ceil, - "floor" => ExprType::Floor, - "abs" => ExprType::Abs, - "mod" => ExprType::Modulus, - // temporal/chrono - "to_timestamp" if inputs.len() == 1 => ExprType::ToTimestamp, - "to_timestamp" if inputs.len() == 2 => ExprType::ToTimestamp1, - "date_trunc" => ExprType::DateTrunc, - // string - "substr" => ExprType::Substr, - "length" => ExprType::Length, - "upper" => ExprType::Upper, - "lower" => ExprType::Lower, - "trim" => ExprType::Trim, - "replace" => ExprType::Replace, - "overlay" => ExprType::Overlay, - "position" => ExprType::Position, - "ltrim" => ExprType::Ltrim, - "rtrim" => ExprType::Rtrim, - "md5" => ExprType::Md5, - "to_char" => ExprType::ToChar, - "concat" => { - inputs = Self::rewrite_concat_to_concat_ws(inputs)?; - ExprType::ConcatWs - } - "concat_ws" => ExprType::ConcatWs, - "split_part" => ExprType::SplitPart, - "char_length" => ExprType::CharLength, - "character_length" => ExprType::CharLength, - "repeat" => ExprType::Repeat, - "ascii" => ExprType::Ascii, - "octet_length" => ExprType::OctetLength, - "bit_length" => ExprType::BitLength, - "regexp_match" => ExprType::RegexpMatch, - // array - "array_cat" => ExprType::ArrayCat, - "array_append" => ExprType::ArrayAppend, - "array_prepend" => ExprType::ArrayPrepend, - // System information operations. - "pg_typeof" if inputs.len() == 1 => { - let input = &inputs[0]; - let v = match input.is_unknown() { - true => "unknown".into(), - false => input.return_type().to_string(), - }; - return Ok(ExprImpl::literal_varchar(v)); - } - "current_database" if inputs.is_empty() => { - return Ok(ExprImpl::literal_varchar(self.db_name.clone())); - } - "current_schema" if inputs.is_empty() => { - return Ok(self - .catalog - .first_valid_schema( - &self.db_name, - &self.search_path, - &self.auth_context.user_name, - ) - .map(|schema| ExprImpl::literal_varchar(schema.name())) - .unwrap_or_else(|_| ExprImpl::literal_null(DataType::Varchar))); - } - "current_schemas" => { - let no_match_err = ErrorCode::ExprError( - "No function matches the given name and argument types. You might need to add explicit type casts.".into() - ); - if inputs.len() != 1 { - return Err(no_match_err.into()); - } - let input = inputs - .pop() - .unwrap() - .enforce_bool_clause("current_schemas") - .map_err(|_| no_match_err)?; - - let ExprImpl::Literal(literal) = &input else { - return Err(ErrorCode::NotImplemented( - "Only boolean literals are supported in `current_schemas`.".to_string(), None.into() - ) - .into()); - }; - - let Some(bool) = literal.get_data().as_ref().map(|bool| bool.clone().into_bool()) else { - return Ok(ExprImpl::literal_null(DataType::List { - datatype: Box::new(DataType::Varchar), - })); - }; - - let paths = if bool { - self.search_path.path() - } else { - self.search_path.real_path() - }; - - let mut schema_names = vec![]; - for path in paths { - let mut schema_name = path; - if schema_name == USER_NAME_WILD_CARD { - schema_name = &self.auth_context.user_name; - } - - if self - .catalog - .get_schema_by_name(&self.db_name, schema_name) - .is_ok() - { - schema_names.push(Some(schema_name.into())); - } - } - - return Ok(ExprImpl::literal_list( - ListValue::new(schema_names), - DataType::Varchar, - )); - } - "session_user" if inputs.is_empty() => { - return Ok(ExprImpl::literal_varchar( - self.auth_context.user_name.clone(), - )); - } - "pg_get_userbyid" => { - return if inputs.len() == 1 { - let input = &inputs[0]; - let bound_query = self.bind_get_user_by_id_select(input)?; - Ok(ExprImpl::Subquery(Box::new(Subquery::new( - BoundQuery { - body: BoundSetExpr::Select(Box::new(bound_query)), - order: vec![], - limit: None, - offset: None, - with_ties: false, - extra_order_exprs: vec![], - }, - SubqueryKind::Scalar, - )))) - } else { - Err(ErrorCode::ExprError( - "Too many/few arguments for pg_catalog.pg_get_userbyid()".into(), - ) - .into()) - }; - } - "pg_get_expr" => { - return if inputs.len() == 2 || inputs.len() == 3 { - // TODO: implement pg_get_expr rather than just return empty as an workaround. - Ok(ExprImpl::literal_varchar("".into())) - } else { - Err(ErrorCode::ExprError( - "Too many/few arguments for pg_catalog.pg_get_expr()".into(), - ) - .into()) - }; - } - "format_type" => { - return if inputs.len() == 2 { - // TODO - // return null as an workaround for now - Ok(ExprImpl::literal_null(DataType::Varchar)) - } else { - Err( - ErrorCode::ExprError("Too many/few arguments for format_type()".into()) - .into(), - ) - }; - } - "pg_table_is_visible" => return Ok(ExprImpl::literal_bool(true)), - "pg_encoding_to_char" => return Ok(ExprImpl::literal_varchar("UTF8".into())), - "has_database_privilege" => return Ok(ExprImpl::literal_bool(true)), - "pg_backend_pid" if inputs.is_empty() => { - // FIXME: the session id is not global unique in multi-frontend env. - return Ok(ExprImpl::literal_int(self.session_id.0)); - } - "pg_cancel_backend" => { - return if inputs.len() == 1 { - // TODO: implement real cancel rather than just return false as an workaround. - Ok(ExprImpl::literal_bool(false)) - } else { - Err(ErrorCode::ExprError( - "Too many/few arguments for pg_cancel_backend()".into(), - ) - .into()) - }; - } - "pg_terminate_backend" => { - return if inputs.len() == 1 { - // TODO: implement real terminate rather than just return false as an - // workaround. - Ok(ExprImpl::literal_bool(false)) - } else { - Err(ErrorCode::ExprError( - "Too many/few arguments for pg_terminate_backend()".into(), - ) - .into()) - }; - } - // internal - "rw_vnode" => ExprType::Vnode, - // TODO: choose which pg version we should return. - "version" => { - return Ok(ExprImpl::literal_varchar(format!( - "PostgreSQL 13.9-RW-{}", - RW_VERSION - ))) - } - // non-deterministic - "now" => { - self.ensure_now_function_allowed()?; - if !self.in_create_mv { - inputs.push(ExprImpl::from(Literal::new( - Some(ScalarImpl::Int64((self.bind_timestamp_ms * 1000) as i64)), - DataType::Timestamptz, - ))); - } - ExprType::Now - } - _ => { - return Err(ErrorCode::NotImplemented( - format!("unsupported function: {:?}", function_name), - 112.into(), - ) - .into()); - } - }; - Ok(FunctionCall::new(function_type, inputs)?.into()) + self.bind_builtin_scalar_function(function_name.as_str(), inputs) } pub(super) fn bind_agg(&mut self, mut f: Function, kind: AggKind) -> Result { @@ -487,6 +242,318 @@ impl Binder { Ok(WindowFunction::new(window_function_type, partition_by, order_by, inputs)?.into()) } + fn bind_builtin_scalar_function( + &mut self, + function_name: &str, + inputs: Vec, + ) -> Result { + type Inputs = Vec; + + type Handle = Box Result + Sync + Send>; + + fn rewrite(r#type: ExprType, rewriter: fn(Inputs) -> Result) -> Handle { + Box::new(move |_binder, mut inputs| { + inputs = (rewriter)(inputs)?; + Ok(FunctionCall::new(r#type, inputs)?.into()) + }) + } + + fn raw_call(r#type: ExprType) -> Handle { + rewrite(r#type, Ok) + } + + fn guard_by_len(expected_len: usize, handle: Handle) -> Handle { + Box::new(move |binder, inputs| { + if inputs.len() == expected_len { + handle(binder, inputs) + } else { + Err(ErrorCode::ExprError("unexpected arguments number".into()).into()) + } + }) + } + + fn raw Result + Sync + Send + 'static>( + f: F, + ) -> Handle { + Box::new(f) + } + + fn dispatch_by_len(mapping: Vec<(usize, Handle)>) -> Handle { + Box::new(move |binder, inputs| { + for (len, handle) in &mapping { + if inputs.len() == *len { + return handle(binder, inputs); + } + } + Err(ErrorCode::ExprError("unexpected arguments number".into()).into()) + }) + } + + fn raw_literal(literal: ExprImpl) -> Handle { + Box::new(move |_binder, _inputs| Ok(literal.clone())) + } + fn now() -> Handle { + Box::new(move |binder, mut inputs| { + binder.ensure_now_function_allowed()?; + if !binder.in_create_mv { + inputs.push(ExprImpl::from(Literal::new( + Some(ScalarImpl::Int64((binder.bind_timestamp_ms * 1000) as i64)), + DataType::Timestamptz, + ))); + } + raw_call(ExprType::Now)(binder, inputs) + }) + } + + static HANDLES: LazyLock> = LazyLock::new(|| { + [ + ( + "booleq", + rewrite(ExprType::Equal, Binder::rewrite_two_bool_inputs), + ), + ( + "boolne", + rewrite(ExprType::NotEqual, Binder::rewrite_two_bool_inputs), + ), + ("coalesce", raw_call(ExprType::Coalesce)), + ( + "nullif", + rewrite(ExprType::Case, Binder::rewrite_nullif_to_case_when), + ), + ( + "round", + dispatch_by_len(vec![ + (2, raw_call(ExprType::RoundDigit)), + (1, raw_call(ExprType::Round)), + ]), + ), + ("pow", raw_call(ExprType::Pow)), + // "power" is the function name used in PG. + ("power", raw_call(ExprType::Pow)), + ("ceil", raw_call(ExprType::Ceil)), + ("floor", raw_call(ExprType::Floor)), + ("abs", raw_call(ExprType::Abs)), + ("exp", raw_call(ExprType::Exp)), + ("mod", raw_call(ExprType::Modulus)), + ( + "to_timestamp", + dispatch_by_len(vec![ + (1, raw_call(ExprType::ToTimestamp)), + (2, raw_call(ExprType::ToTimestamp1)), + ]), + ), + ("date_trunc", raw_call(ExprType::DateTrunc)), + // string + ("substr", raw_call(ExprType::Substr)), + ("length", raw_call(ExprType::Length)), + ("upper", raw_call(ExprType::Upper)), + ("lower", raw_call(ExprType::Lower)), + ("trim", raw_call(ExprType::Trim)), + ("replace", raw_call(ExprType::Replace)), + ("overlay", raw_call(ExprType::Overlay)), + ("position", raw_call(ExprType::Position)), + ("ltrim", raw_call(ExprType::Ltrim)), + ("rtrim", raw_call(ExprType::Rtrim)), + ("md5", raw_call(ExprType::Md5)), + ("to_char", raw_call(ExprType::ToChar)), + ( + "concat", + rewrite(ExprType::ConcatWs, Binder::rewrite_concat_to_concat_ws), + ), + ("concat_ws", raw_call(ExprType::ConcatWs)), + ("split_part", raw_call(ExprType::SplitPart)), + ("char_length", raw_call(ExprType::CharLength)), + ("character_length", raw_call(ExprType::CharLength)), + ("repeat", raw_call(ExprType::Repeat)), + ("ascii", raw_call(ExprType::Ascii)), + ("octet_length", raw_call(ExprType::OctetLength)), + ("bit_length", raw_call(ExprType::BitLength)), + ("regexp_match", raw_call(ExprType::RegexpMatch)), + // array + ("array_cat", raw_call(ExprType::ArrayCat)), + ("array_append", raw_call(ExprType::ArrayAppend)), + ("array_prepend", raw_call(ExprType::ArrayPrepend)), + // System information operations. + ( + "pg_typeof", + raw(|_binder, inputs| { + let input = &inputs[0]; + let v = match input.is_unknown() { + true => "unknown".into(), + false => input.return_type().to_string(), + }; + Ok(ExprImpl::literal_varchar(v)) + }), + ), + ("current_database", guard_by_len(0, raw(|binder, _inputs| { + Ok(ExprImpl::literal_varchar(binder.db_name.clone())) + }))), + ("current_schema", guard_by_len(0, raw(|binder, _inputs| { + return Ok(binder + .catalog + .first_valid_schema( + &binder.db_name, + &binder.search_path, + &binder.auth_context.user_name, + ) + .map(|schema| ExprImpl::literal_varchar(schema.name())) + .unwrap_or_else(|_| ExprImpl::literal_null(DataType::Varchar))); + }))), + ("current_schemas", raw(|binder, mut inputs| { + let no_match_err = ErrorCode::ExprError( + "No function matches the given name and argument types. You might need to add explicit type casts.".into() + ); + if inputs.len() != 1 { + return Err(no_match_err.into()); + } + let input = inputs + .pop() + .unwrap() + .enforce_bool_clause("current_schemas") + .map_err(|_| no_match_err)?; + + let ExprImpl::Literal(literal) = &input else { + return Err(ErrorCode::NotImplemented( + "Only boolean literals are supported in `current_schemas`.".to_string(), None.into() + ) + .into()); + }; + + let Some(bool) = literal.get_data().as_ref().map(|bool| bool.clone().into_bool()) else { + return Ok(ExprImpl::literal_null(DataType::List { + datatype: Box::new(DataType::Varchar), + })); + }; + + let paths = if bool { + binder.search_path.path() + } else { + binder.search_path.real_path() + }; + + let mut schema_names = vec![]; + for path in paths { + let mut schema_name = path; + if schema_name == USER_NAME_WILD_CARD { + schema_name = &binder.auth_context.user_name; + } + + if binder + .catalog + .get_schema_by_name(&binder.db_name, schema_name) + .is_ok() + { + schema_names.push(Some(schema_name.into())); + } + } + + Ok(ExprImpl::literal_list( + ListValue::new(schema_names), + DataType::Varchar, + )) + })), + ("session_user", guard_by_len(0, raw(|binder, _inputs| { + Ok(ExprImpl::literal_varchar( + binder.auth_context.user_name.clone(), + )) + }))), + ("pg_get_userbyid", guard_by_len(1, raw(|binder, inputs|{ + let input = &inputs[0]; + let bound_query = binder.bind_get_user_by_id_select(input)?; + Ok(ExprImpl::Subquery(Box::new(Subquery::new( + BoundQuery { + body: BoundSetExpr::Select(Box::new(bound_query)), + order: vec![], + limit: None, + offset: None, + with_ties: false, + extra_order_exprs: vec![], + }, + SubqueryKind::Scalar, + )))) + } + ))), + ("pg_get_expr", raw(|_binder, inputs|{ + if inputs.len() == 2 || inputs.len() == 3 { + // TODO: implement pg_get_expr rather than just return empty as an workaround. + Ok(ExprImpl::literal_varchar("".into())) + } else { + Err(ErrorCode::ExprError( + "Too many/few arguments for pg_catalog.pg_get_expr()".into(), + ) + .into()) + } + })), + ("format_type", raw_call(ExprType::FormatType)), + ("pg_table_is_visible", raw_literal(ExprImpl::literal_bool(true))), + ("pg_encoding_to_char", raw_literal(ExprImpl::literal_varchar("UTF8".into()))), + ("has_database_privilege", raw_literal(ExprImpl::literal_bool(true))), + ("pg_backend_pid", raw(|binder, _inputs| { + // FIXME: the session id is not global unique in multi-frontend env. + Ok(ExprImpl::literal_int(binder.session_id.0)) + })), + ("pg_cancel_backend", guard_by_len(1, raw(|_binder, _inputs| { + // TODO: implement real cancel rather than just return false as an workaround. + Ok(ExprImpl::literal_bool(false)) + }))), + ("pg_terminate_backend", guard_by_len(1, raw(|_binder, _inputs|{ + // TODO: implement real terminate rather than just return false as an + // workaround. + Ok(ExprImpl::literal_bool(false)) + }))), + // internal + ("rw_vnode", raw_call(ExprType::Vnode)), + // TODO: choose which pg version we should return. + ("version", raw_literal(ExprImpl::literal_varchar(format!( + "PostgreSQL 13.9-RisingWave-{} ({})", + RW_VERSION, + GIT_SHA + )))), + // non-deterministic + ("now", now()), + ("current_timestamp", now()) + ] + .into_iter() + .collect() + }); + + static FUNCTIONS_BKTREE: LazyLock> = LazyLock::new(|| { + let mut tree = BKTree::new(metrics::Levenshtein); + + // TODO: Also hint other functinos, e,g, Agg or UDF. + for k in HANDLES.keys() { + tree.add(*k); + } + + tree + }); + + match HANDLES.get(function_name) { + Some(handle) => handle(self, inputs), + None => Err({ + let allowed_distance = if function_name.len() > 3 { 2 } else { 1 }; + + let candidates = FUNCTIONS_BKTREE + .find(function_name, allowed_distance) + .map(|(_idx, c)| c); + + let mut candidates = candidates.peekable(); + + let err_msg = if candidates.peek().is_none() { + format!("unsupported function: \"{}\"", function_name) + } else { + format!( + "unsupported function \"{}\", do you mean \"{}\"?", + function_name, + candidates.join(" or ") + ) + }; + + ErrorCode::NotImplemented(err_msg, 112.into()).into() + }), + } + } + fn rewrite_concat_to_concat_ws(inputs: Vec) -> Result> { if inputs.is_empty() { Err(ErrorCode::BindError( diff --git a/src/frontend/src/binder/expr/mod.rs b/src/frontend/src/binder/expr/mod.rs index 996ba0c71752a..de782f7e02110 100644 --- a/src/frontend/src/binder/expr/mod.rs +++ b/src/frontend/src/binder/expr/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,12 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use itertools::{zip_eq, Itertools}; +use itertools::Itertools; use risingwave_common::catalog::{ColumnDesc, ColumnId}; use risingwave_common::error::{ErrorCode, Result}; use risingwave_common::types::DataType; +use risingwave_common::util::iter_util::zip_eq_fast; use risingwave_sqlparser::ast::{ - BinaryOperator, DataType as AstDataType, Expr, Function, ObjectName, Query, StructField, + Array, BinaryOperator, DataType as AstDataType, Expr, Function, ObjectName, Query, StructField, TrimWhereField, UnaryOperator, }; @@ -32,7 +33,7 @@ mod subquery; mod value; impl Binder { - pub(super) fn bind_expr(&mut self, expr: Expr) -> Result { + pub fn bind_expr(&mut self, expr: Expr) -> Result { match expr { // literal Expr::Value(v) => Ok(ExprImpl::Literal(Box::new(self.bind_value(v)?))), @@ -43,7 +44,7 @@ impl Binder { Expr::Row(exprs) => self.bind_row(exprs), // input ref Expr::Identifier(ident) => { - if ["session_user", "current_schema"] + if ["session_user", "current_schema", "current_timestamp"] .iter() .any(|e| ident.real_value().as_str() == *e) { @@ -64,7 +65,7 @@ impl Binder { Expr::UnaryOp { op, expr } => self.bind_unary_expr(op, *expr), Expr::BinaryOp { left, op, right } => self.bind_binary_op(*left, op, *right), Expr::Nested(expr) => self.bind_expr(*expr), - Expr::Array(exprs) => self.bind_array(exprs), + Expr::Array(Array { elem: exprs, .. }) => self.bind_array(exprs), Expr::ArrayIndex { obj, index } => self.bind_array_index(*obj, *index), Expr::Function(f) => self.bind_function(f), // subquery @@ -347,7 +348,7 @@ impl Binder { .collect::>()?; let else_result_expr = else_result.map(|expr| self.bind_expr(*expr)).transpose()?; - for (condition, result) in zip_eq(conditions, results_expr) { + for (condition, result) in zip_eq_fast(conditions, results_expr) { let condition = match operand { Some(ref t) => Expr::BinaryOp { left: t.clone(), @@ -425,7 +426,7 @@ impl Binder { } pub fn bind_cast_inner(&mut self, expr: Expr, data_type: DataType) -> Result { - if let Expr::Array(ref expr) = expr && matches!(&data_type, DataType::List{ .. } ) { + if let Expr::Array(Array {elem: ref expr, ..}) = expr && matches!(&data_type, DataType::List{ .. } ) { return self.bind_array_cast(expr.clone(), data_type); } let lhs = self.bind_expr(expr)?; @@ -509,6 +510,7 @@ pub fn bind_data_type(data_type: &AstDataType) -> Result { "float4" => DataType::Float32, "float8" => DataType::Float64, "timestamptz" => DataType::Timestamptz, + "jsonb" => DataType::Jsonb, _ => return Err(new_err().into()), } } diff --git a/src/frontend/src/binder/expr/order_by.rs b/src/frontend/src/binder/expr/order_by.rs index 326b1bb706587..f1c848efc24f2 100644 --- a/src/frontend/src/binder/expr/order_by.rs +++ b/src/frontend/src/binder/expr/order_by.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/binder/expr/subquery.rs b/src/frontend/src/binder/expr/subquery.rs index 299491e86c679..458e0b91c6bba 100644 --- a/src/frontend/src/binder/expr/subquery.rs +++ b/src/frontend/src/binder/expr/subquery.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/binder/expr/value.rs b/src/frontend/src/binder/expr/value.rs index fcd5f4c9218f3..514a848ee17fe 100644 --- a/src/frontend/src/binder/expr/value.rs +++ b/src/frontend/src/binder/expr/value.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/binder/insert.rs b/src/frontend/src/binder/insert.rs index eca4cfeb864c8..6b849bbd9b2bb 100644 --- a/src/frontend/src/binder/insert.rs +++ b/src/frontend/src/binder/insert.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,10 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashSet; + use itertools::Itertools; use risingwave_common::catalog::Schema; use risingwave_common::error::{ErrorCode, Result, RwError}; use risingwave_common::types::DataType; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_sqlparser::ast::{Ident, ObjectName, Query, SelectItem, SetExpr}; use super::{BoundQuery, BoundSetExpr}; @@ -47,8 +50,9 @@ pub struct BoundInsert { pub source: BoundQuery, - /// Used as part of an extra `Project` when the column types of `source` query does not match - /// `table_source`. This does not include a simple `VALUE`. See comments in code for details. + /// Used as part of an extra `Project` when the column types of the query does not match + /// those of the table. This does not include a simple `VALUE`. See comments in code for + /// details. pub cast_exprs: Vec, // used for the 'RETURNING" keyword to indicate the returning items and schema @@ -68,7 +72,6 @@ impl Binder { returning_items: Vec, ) -> Result { let (schema_name, table_name) = Self::resolve_schema_qualified_name(&self.db_name, name)?; - // let table_source = self.bind_table_source(schema_name.as_deref(), &table_name)?; self.bind_table(schema_name.as_deref(), &table_name, None)?; let table_catalog = self.resolve_dml_table(schema_name.as_deref(), &table_name, true)?; @@ -109,7 +112,7 @@ impl Binder { // internal implicit cast. // In other cases, the `source` query is handled on its own and assignment cast is done // afterwards. - let (source, cast_exprs) = match source { + let (source, cast_exprs, nulls_inserted) = match source { Query { with: None, body: SetExpr::Values(values), @@ -118,7 +121,8 @@ impl Binder { offset: None, fetch: None, } if order.is_empty() => { - let values = self.bind_values(values, Some(expected_types.clone()))?; + let (values, nulls_inserted) = + self.bind_values(values, Some(expected_types.clone()))?; let body = BoundSetExpr::Values(values.into()); ( BoundQuery { @@ -130,6 +134,7 @@ impl Binder { extra_order_exprs: vec![], }, vec![], + nulls_inserted, ) } query => { @@ -146,7 +151,7 @@ impl Binder { .collect(), )?, }; - (bound, cast_exprs) + (bound, cast_exprs, false) } }; @@ -166,19 +171,40 @@ impl Binder { )))); } + // create table t1 (v1 int, v2 int); insert into t1 (v2) values (5); + // We added the null values above. Above is equivalent to + // insert into t1 values (NULL, 5); + let target_table_col_indices = if !target_table_col_indices.is_empty() && nulls_inserted { + let provided_insert_cols: HashSet = + target_table_col_indices.iter().cloned().collect(); + + let mut result: Vec = target_table_col_indices.clone(); + for i in 0..columns_to_insert.len() { + if !provided_insert_cols.contains(&i) { + result.push(i); + } + } + result + } else { + target_table_col_indices + }; + let (returning_list, fields) = self.bind_returning_list(returning_items)?; - let returning = !returning_list.is_empty(); + let is_returning = !returning_list.is_empty(); // validate that query has a value for each target column, if target columns are used // create table t1 (v1 int, v2 int); // insert into t1 (v1, v2, v2) values (5, 6); // ...more target columns than values // insert into t1 (v1) values (5, 6); // ...less target columns than values - let (eq_len, msg) = match target_table_col_indices.len().cmp(&expected_types.len()) { - std::cmp::Ordering::Equal => (true, ""), - std::cmp::Ordering::Greater => (false, "INSERT has more target columns than values"), - std::cmp::Ordering::Less => (false, "INSERT has less target columns than values"), + let err_msg = match target_table_col_indices.len().cmp(&expected_types.len()) { + std::cmp::Ordering::Equal => None, + std::cmp::Ordering::Greater => Some("INSERT has more target columns than values"), + std::cmp::Ordering::Less => Some("INSERT has less target columns than values"), }; - if !eq_len && !target_table_col_indices.is_empty() { - return Err(RwError::from(ErrorCode::BindError(msg.to_string()))); + + if let Some(msg) = err_msg && !target_table_col_indices.is_empty() { + return Err(RwError::from(ErrorCode::BindError( + msg.to_string(), + ))); } // Check if column was used multiple times in query e.g. @@ -201,13 +227,12 @@ impl Binder { source, cast_exprs, returning_list, - returning_schema: if returning { + returning_schema: if is_returning { Some(Schema { fields }) } else { None }, }; - Ok(insert) } @@ -221,7 +246,7 @@ impl Binder { std::cmp::Ordering::Equal => { return exprs .into_iter() - .zip_eq(expected_types) + .zip_eq_fast(expected_types) .map(|(e, t)| e.cast_assign(t.clone())) .try_collect(); } diff --git a/src/frontend/src/binder/mod.rs b/src/frontend/src/binder/mod.rs index 4838e84472e14..b52c2b79d6d1a 100644 --- a/src/frontend/src/binder/mod.rs +++ b/src/frontend/src/binder/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use std::sync::Arc; use risingwave_common::error::Result; @@ -19,6 +20,7 @@ use risingwave_common::session_config::SearchPath; use risingwave_sqlparser::ast::Statement; mod bind_context; +mod create; mod delete; mod expr; mod insert; @@ -49,6 +51,7 @@ pub use update::BoundUpdate; pub use values::BoundValues; use crate::catalog::catalog_service::CatalogReadGuard; +use crate::catalog::ViewId; use crate::session::{AuthContext, SessionImpl}; pub type ShareId = usize; @@ -84,6 +87,9 @@ pub struct Binder { search_path: SearchPath, /// Whether the Binder is binding an MV. in_create_mv: bool, + + /// `ShareId`s identifying shared views. + shared_views: HashMap, } impl Binder { @@ -107,6 +113,7 @@ impl Binder { next_share_id: 0, search_path: session.config().get_search_path(), in_create_mv, + shared_views: HashMap::new(), } } diff --git a/src/frontend/src/binder/query.rs b/src/frontend/src/binder/query.rs index 6a4104130127c..f1d2c4573a4e7 100644 --- a/src/frontend/src/binder/query.rs +++ b/src/frontend/src/binder/query.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/binder/relation/join.rs b/src/frontend/src/binder/relation/join.rs index fae0c972c1a72..78fff99289564 100644 --- a/src/frontend/src/binder/relation/join.rs +++ b/src/frontend/src/binder/relation/join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/binder/relation/mod.rs b/src/frontend/src/binder/relation/mod.rs index 9b7d982abdc4e..a9ef690f63584 100644 --- a/src/frontend/src/binder/relation/mod.rs +++ b/src/frontend/src/binder/relation/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/binder/relation/share.rs b/src/frontend/src/binder/relation/share.rs index e435048ef836e..0ad7b66410ef5 100644 --- a/src/frontend/src/binder/relation/share.rs +++ b/src/frontend/src/binder/relation/share.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/binder/relation/subquery.rs b/src/frontend/src/binder/relation/subquery.rs index 44f7ec2f7926e..b237b324cee40 100644 --- a/src/frontend/src/binder/relation/subquery.rs +++ b/src/frontend/src/binder/relation/subquery.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/binder/relation/table_or_source.rs b/src/frontend/src/binder/relation/table_or_source.rs index 7649559ac7800..2e46b3725c99f 100644 --- a/src/frontend/src/binder/relation/table_or_source.rs +++ b/src/frontend/src/binder/relation/table_or_source.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ use risingwave_common::session_config::USER_NAME_WILD_CARD; use risingwave_sqlparser::ast::{Statement, TableAlias}; use risingwave_sqlparser::parser::Parser; +use super::BoundShare; use crate::binder::relation::BoundSubquery; use crate::binder::{Binder, Relation}; use crate::catalog::root_catalog::SchemaPath; @@ -231,8 +232,17 @@ impl Binder { )) })?; let columns = view_catalog.columns.clone(); + let share_id = match self.shared_views.get(&view_catalog.id) { + Some(share_id) => *share_id, + None => { + let share_id = self.next_share_id(); + self.shared_views.insert(view_catalog.id, share_id); + share_id + } + }; + let input = Relation::Subquery(Box::new(BoundSubquery { query })); Ok(( - Relation::Subquery(Box::new(BoundSubquery { query })), + Relation::Share(Box::new(BoundShare { share_id, input })), columns.iter().map(|c| (false, c.clone())).collect_vec(), )) } diff --git a/src/frontend/src/binder/relation/watermark.rs b/src/frontend/src/binder/relation/watermark.rs index 6bcfa09a31a04..b55c96d61e29e 100644 --- a/src/frontend/src/binder/relation/watermark.rs +++ b/src/frontend/src/binder/relation/watermark.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/binder/relation/window_table_function.rs b/src/frontend/src/binder/relation/window_table_function.rs index a06e4b8d11361..479ffe075dac8 100644 --- a/src/frontend/src/binder/relation/window_table_function.rs +++ b/src/frontend/src/binder/relation/window_table_function.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/binder/select.rs b/src/frontend/src/binder/select.rs index cda81c56573af..02fffc8d387a1 100644 --- a/src/frontend/src/binder/select.rs +++ b/src/frontend/src/binder/select.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ use itertools::Itertools; use risingwave_common::catalog::{Field, Schema, PG_CATALOG_SCHEMA_NAME}; use risingwave_common::error::{ErrorCode, Result, RwError}; use risingwave_common::types::DataType; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_sqlparser::ast::{DataType as AstDataType, Distinct, Expr, Select, SelectItem}; use super::bind_context::{Clause, ColumnBinding}; @@ -158,7 +159,7 @@ impl Binder { // Store field from `ExprImpl` to support binding `field_desc` in `subquery`. let fields = select_items .iter() - .zip_eq(aliases.iter()) + .zip_eq_fast(aliases.iter()) .map(|(s, a)| { let name = a.clone().unwrap_or_else(|| UNNAMED_COLUMN.to_string()); Ok(Field::with_name(s.return_type(), name)) @@ -272,7 +273,7 @@ impl Binder { let fields = returning_list .iter() - .zip_eq(aliases.iter()) + .zip_eq_fast(aliases.iter()) .map(|(s, a)| { let name = a.clone().unwrap_or_else(|| UNNAMED_COLUMN.to_string()); Ok::(Field::with_name(s.return_type(), name)) diff --git a/src/frontend/src/binder/set_expr.rs b/src/frontend/src/binder/set_expr.rs index 01ce3332b20ae..01a75f467df22 100644 --- a/src/frontend/src/binder/set_expr.rs +++ b/src/frontend/src/binder/set_expr.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use itertools::Itertools; use risingwave_common::catalog::Schema; use risingwave_common::error::{ErrorCode, Result}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_sqlparser::ast::{SetExpr, SetOperator}; use crate::binder::{BindContext, Binder, BoundQuery, BoundSelect, BoundValues}; @@ -99,7 +99,7 @@ impl Binder { pub(super) fn bind_set_expr(&mut self, set_expr: SetExpr) -> Result { match set_expr { SetExpr::Select(s) => Ok(BoundSetExpr::Select(Box::new(self.bind_select(*s)?))), - SetExpr::Values(v) => Ok(BoundSetExpr::Values(Box::new(self.bind_values(v, None)?))), + SetExpr::Values(v) => Ok(BoundSetExpr::Values(Box::new(self.bind_values(v, None)?.0))), SetExpr::Query(q) => Ok(BoundSetExpr::Query(Box::new(self.bind_query(*q)?))), SetExpr::SetOperation { op, @@ -126,7 +126,7 @@ impl Binder { .schema() .fields .iter() - .zip_eq(right.schema().fields.iter()) + .zip_eq_fast(right.schema().fields.iter()) { if a.data_type != b.data_type { return Err(ErrorCode::InvalidInputSyntax(format!( diff --git a/src/frontend/src/binder/statement.rs b/src/frontend/src/binder/statement.rs index e26a02bda6698..887d4f0e94fb2 100644 --- a/src/frontend/src/binder/statement.rs +++ b/src/frontend/src/binder/statement.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/binder/struct_field.rs b/src/frontend/src/binder/struct_field.rs index 9ff5c19e74810..6f822af7d0f1e 100644 --- a/src/frontend/src/binder/struct_field.rs +++ b/src/frontend/src/binder/struct_field.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/binder/update.rs b/src/frontend/src/binder/update.rs index fbadc74f4481e..0567b687c35bd 100644 --- a/src/frontend/src/binder/update.rs +++ b/src/frontend/src/binder/update.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ use std::collections::HashMap; use itertools::Itertools; use risingwave_common::catalog::Schema; use risingwave_common::error::{ErrorCode, Result}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_sqlparser::ast::{Assignment, Expr, ObjectName, SelectItem}; use super::{Binder, Relation}; @@ -91,7 +92,7 @@ impl Binder { } // (col1, col2) = (expr1, expr2) (ids, Expr::Row(values)) if ids.len() == values.len() => { - id.into_iter().zip_eq(values.into_iter()).collect() + id.into_iter().zip_eq_fast(values.into_iter()).collect() } // (col1, col2) = _ => { diff --git a/src/frontend/src/binder/values.rs b/src/frontend/src/binder/values.rs index 2dc37e97a7931..b7b0ce4628827 100644 --- a/src/frontend/src/binder/values.rs +++ b/src/frontend/src/binder/values.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ use itertools::Itertools; use risingwave_common::catalog::{Field, Schema}; use risingwave_common::error::{ErrorCode, Result}; use risingwave_common::types::DataType; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_sqlparser::ast::Values; use super::bind_context::Clause; @@ -67,11 +68,12 @@ fn values_column_name(values_id: usize, col_id: usize) -> String { impl Binder { /// Bind [`Values`] with given `expected_types`. If no types are expected, a compatible type for /// all rows will be used. + /// Returns true if null values were inserted pub(super) fn bind_values( &mut self, values: Values, expected_types: Option>, - ) -> Result { + ) -> Result<(BoundValues, bool)> { assert!(!values.0.is_empty()); self.context.clause = Some(Clause::Values); @@ -82,8 +84,31 @@ impl Binder { .collect::>>>()?; self.context.clause = None; + // Adding Null values in case user did not specify all columns. E.g. + // create table t1 (v1 int, v2 int); insert into t1 (v2) values (5); + let vec_len = bound[0].len(); + let nulls_to_insert = if let Some(expected_types) = &expected_types && expected_types.len() > vec_len { + let nulls_to_insert = expected_types.len() - vec_len; + for row in &mut bound { + if vec_len != row.len() { + return Err(ErrorCode::BindError( + "VALUES lists must all be the same length".into(), + ) + .into()); + } + for i in 0..nulls_to_insert { + let t = expected_types[vec_len + i].clone(); + row.push(ExprImpl::literal_null(t)); + } + } + nulls_to_insert + } else { + 0 + }; + + // only check for this condition again if we did not insert any nulls let num_columns = bound[0].len(); - if bound.iter().any(|row| row.len() != num_columns) { + if nulls_to_insert == 0 && bound.iter().any(|row| row.len() != num_columns) { return Err( ErrorCode::BindError("VALUES lists must all be the same length".into()).into(), ); @@ -108,7 +133,7 @@ impl Binder { let schema = Schema::new( types .into_iter() - .zip_eq(0..num_columns) + .zip_eq_fast(0..num_columns) .map(|(ty, col_id)| Field::with_name(ty, values_column_name(values_id, col_id))) .collect(), ); @@ -132,14 +157,14 @@ impl Binder { ) .into()); } - Ok(bound_values) + Ok((bound_values, nulls_to_insert > 0)) } } #[cfg(test)] mod tests { - use itertools::zip_eq; + use risingwave_common::util::iter_util::zip_eq_fast; use risingwave_sqlparser::ast::{Expr, Value}; use super::*; @@ -161,14 +186,14 @@ mod tests { let schema = Schema::new( types .into_iter() - .zip_eq(0..n_cols) + .zip_eq_fast(0..n_cols) .map(|(ty, col_id)| Field::with_name(ty, values_column_name(0, col_id))) .collect(), ); - assert_eq!(res.schema, schema); - for vec in res.rows { - for (expr, ty) in zip_eq(vec, schema.data_types()) { + assert_eq!(res.0.schema, schema); + for vec in res.0.rows { + for (expr, ty) in zip_eq_fast(vec, schema.data_types()) { assert_eq!(expr.return_type(), ty); } } diff --git a/src/frontend/src/catalog/catalog_service.rs b/src/frontend/src/catalog/catalog_service.rs index 17e273f129fd5..37cffcfa816e9 100644 --- a/src/frontend/src/catalog/catalog_service.rs +++ b/src/frontend/src/catalog/catalog_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -78,6 +78,8 @@ pub trait CatalogWriter: Send + Sync { graph: StreamFragmentGraph, ) -> Result<()>; + async fn replace_table(&self, table: ProstTable, graph: StreamFragmentGraph) -> Result<()>; + async fn create_index( &self, index: ProstIndex, @@ -186,6 +188,11 @@ impl CatalogWriter for CatalogWriterImpl { self.wait_version(version).await } + async fn replace_table(&self, table: ProstTable, graph: StreamFragmentGraph) -> Result<()> { + let version = self.meta_client.replace_table(table, graph).await?; + self.wait_version(version).await + } + async fn create_source(&self, source: ProstSource) -> Result<()> { let (_id, version) = self.meta_client.create_source(source).await?; self.wait_version(version).await diff --git a/src/frontend/src/catalog/column_catalog.rs b/src/frontend/src/catalog/column_catalog.rs deleted file mode 100644 index 460f127d6c3c9..0000000000000 --- a/src/frontend/src/catalog/column_catalog.rs +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2023 Singularity Data -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::borrow::Cow; - -use risingwave_common::catalog::{ColumnDesc, ColumnId}; -use risingwave_common::types::DataType; -use risingwave_pb::plan_common::ColumnCatalog as ProstColumnCatalog; - -use super::row_id_column_desc; - -#[derive(Debug, Clone, PartialEq)] -pub struct ColumnCatalog { - pub column_desc: ColumnDesc, - pub is_hidden: bool, -} - -impl ColumnCatalog { - /// Get the column catalog's is hidden. - pub fn is_hidden(&self) -> bool { - self.is_hidden - } - - /// Get a reference to the column desc's data type. - pub fn data_type(&self) -> &DataType { - &self.column_desc.data_type - } - - /// Get the column desc's column id. - pub fn column_id(&self) -> ColumnId { - self.column_desc.column_id - } - - /// Get a reference to the column desc's name. - pub fn name(&self) -> &str { - self.column_desc.name.as_ref() - } - - /// Convert column catalog to proto - pub fn to_protobuf(&self) -> ProstColumnCatalog { - ProstColumnCatalog { - column_desc: Some(self.column_desc.to_protobuf()), - is_hidden: self.is_hidden, - } - } - - /// Creates a row ID column (for implicit primary key). - pub fn row_id_column() -> Self { - Self { - column_desc: row_id_column_desc(), - is_hidden: true, - } - } -} - -impl From for ColumnCatalog { - fn from(prost: ProstColumnCatalog) -> Self { - Self { - column_desc: prost.column_desc.unwrap().into(), - is_hidden: prost.is_hidden, - } - } -} - -impl ColumnCatalog { - pub fn name_with_hidden(&self) -> Cow<'_, str> { - if self.is_hidden { - Cow::Owned(format!("{}(hidden)", self.column_desc.name)) - } else { - Cow::Borrowed(&self.column_desc.name) - } - } -} diff --git a/src/frontend/src/catalog/database_catalog.rs b/src/frontend/src/catalog/database_catalog.rs index ec3ffd662981d..c63b2aeda6669 100644 --- a/src/frontend/src/catalog/database_catalog.rs +++ b/src/frontend/src/catalog/database_catalog.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/function_catalog.rs b/src/frontend/src/catalog/function_catalog.rs index 623731d7627e3..f8d9cab814f24 100644 --- a/src/frontend/src/catalog/function_catalog.rs +++ b/src/frontend/src/catalog/function_catalog.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/index_catalog.rs b/src/frontend/src/catalog/index_catalog.rs index 0f92f73c95e73..370f8e6f48431 100644 --- a/src/frontend/src/catalog/index_catalog.rs +++ b/src/frontend/src/catalog/index_catalog.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/mod.rs b/src/frontend/src/catalog/mod.rs index 54ff528f70e99..dc4d9e9729683 100644 --- a/src/frontend/src/catalog/mod.rs +++ b/src/frontend/src/catalog/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,19 +18,17 @@ //! structs. It is accessed via [`catalog_service::CatalogReader`] and //! [`catalog_service::CatalogWriter`], which is held by [`crate::session::FrontendEnv`]. -use risingwave_common::catalog::{ColumnDesc, PG_CATALOG_SCHEMA_NAME}; +use risingwave_common::catalog::{is_row_id_column_name, PG_CATALOG_SCHEMA_NAME, ROWID_PREFIX}; use risingwave_common::error::{ErrorCode, Result, RwError}; -use risingwave_common::types::DataType; +use risingwave_connector::sink::catalog::SinkCatalog; use thiserror::Error; pub(crate) mod catalog_service; -pub(crate) mod column_catalog; pub(crate) mod database_catalog; pub(crate) mod function_catalog; pub(crate) mod index_catalog; pub(crate) mod root_catalog; pub(crate) mod schema_catalog; -pub(crate) mod sink_catalog; pub(crate) mod source_catalog; pub(crate) mod system_catalog; pub(crate) mod table_catalog; @@ -75,35 +73,6 @@ pub fn check_schema_writable(schema: &str) -> Result<()> { } } -const ROWID_PREFIX: &str = "_row_id"; - -pub fn row_id_column_name() -> String { - ROWID_PREFIX.to_string() -} - -pub fn is_row_id_column_name(name: &str) -> bool { - name.starts_with(ROWID_PREFIX) -} - -/// The column ID preserved for the row ID column. -pub const ROW_ID_COLUMN_ID: ColumnId = ColumnId::new(0); - -/// The column ID offset for user-defined columns. -/// -/// All IDs of user-defined columns must be greater or equal to this value. -pub const USER_COLUMN_ID_OFFSET: i32 = ROW_ID_COLUMN_ID.next().get_id(); - -/// Creates a row ID column (for implicit primary key). It'll always have the ID `0` for now. -pub fn row_id_column_desc() -> ColumnDesc { - ColumnDesc { - data_type: DataType::Int64, - column_id: ROW_ID_COLUMN_ID, - name: row_id_column_name(), - field_descs: vec![], - type_name: "".to_string(), - } -} - pub type CatalogResult = std::result::Result; #[derive(Error, Debug)] @@ -129,3 +98,9 @@ pub trait RelationCatalog { /// Returns the owner of the relation. fn owner(&self) -> UserId; } + +impl RelationCatalog for SinkCatalog { + fn owner(&self) -> UserId { + self.owner.user_id + } +} diff --git a/src/frontend/src/catalog/root_catalog.rs b/src/frontend/src/catalog/root_catalog.rs index dced588d42fba..5077b6e4a2b6e 100644 --- a/src/frontend/src/catalog/root_catalog.rs +++ b/src/frontend/src/catalog/root_catalog.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ use itertools::Itertools; use risingwave_common::catalog::{CatalogVersion, FunctionId, IndexId, TableId}; use risingwave_common::session_config::{SearchPath, USER_NAME_WILD_CARD}; use risingwave_common::types::DataType; +use risingwave_connector::sink::catalog::SinkCatalog; use risingwave_pb::catalog::{ Database as ProstDatabase, Function as ProstFunction, Index as ProstIndex, Schema as ProstSchema, Sink as ProstSink, Source as ProstSource, Table as ProstTable, @@ -32,7 +33,6 @@ use super::view_catalog::ViewCatalog; use super::{CatalogError, CatalogResult, SinkId, SourceId, ViewId}; use crate::catalog::database_catalog::DatabaseCatalog; use crate::catalog::schema_catalog::SchemaCatalog; -use crate::catalog::sink_catalog::SinkCatalog; use crate::catalog::system_catalog::SystemCatalog; use crate::catalog::table_catalog::TableCatalog; use crate::catalog::{DatabaseId, IndexCatalog, SchemaId}; diff --git a/src/frontend/src/catalog/schema_catalog.rs b/src/frontend/src/catalog/schema_catalog.rs index 83decc969f43d..1e1862965b84a 100644 --- a/src/frontend/src/catalog/schema_catalog.rs +++ b/src/frontend/src/catalog/schema_catalog.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ use std::sync::Arc; use risingwave_common::catalog::{valid_table_name, FunctionId, IndexId, TableId}; use risingwave_common::types::DataType; +use risingwave_connector::sink::catalog::SinkCatalog; use risingwave_pb::catalog::{ Function as ProstFunction, Index as ProstIndex, Schema as ProstSchema, Sink as ProstSink, Source as ProstSource, Table as ProstTable, View as ProstView, @@ -27,7 +28,6 @@ use super::source_catalog::SourceCatalog; use super::ViewId; use crate::catalog::function_catalog::FunctionCatalog; use crate::catalog::index_catalog::IndexCatalog; -use crate::catalog::sink_catalog::SinkCatalog; use crate::catalog::system_catalog::SystemCatalog; use crate::catalog::table_catalog::TableCatalog; use crate::catalog::view_catalog::ViewCatalog; diff --git a/src/frontend/src/catalog/sink_catalog.rs b/src/frontend/src/catalog/sink_catalog.rs deleted file mode 100644 index 5129286eb92b2..0000000000000 --- a/src/frontend/src/catalog/sink_catalog.rs +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2023 Singularity Data -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use risingwave_pb::catalog::Sink as ProstSink; - -use super::{RelationCatalog, SinkId}; -use crate::user::UserId; -use crate::WithOptions; - -#[derive(Clone, Debug)] -pub struct SinkCatalog { - pub id: SinkId, - pub name: String, - - pub properties: WithOptions, - pub owner: UserId, -} - -impl From<&ProstSink> for SinkCatalog { - fn from(sink: &ProstSink) -> Self { - SinkCatalog { - id: sink.id, - name: sink.name.clone(), - properties: WithOptions::new(sink.properties.clone()), - owner: sink.owner, - } - } -} - -impl RelationCatalog for SinkCatalog { - fn owner(&self) -> UserId { - self.owner - } -} diff --git a/src/frontend/src/catalog/source_catalog.rs b/src/frontend/src/catalog/source_catalog.rs index c98cd722c1c88..167c494b3ac72 100644 --- a/src/frontend/src/catalog/source_catalog.rs +++ b/src/frontend/src/catalog/source_catalog.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,17 +14,15 @@ use std::collections::HashMap; -use risingwave_pb::catalog::{Source as ProstSource, StreamSourceInfo}; +use risingwave_common::catalog::ColumnCatalog; +use risingwave_pb::catalog::{Source as ProstSource, StreamSourceInfo, WatermarkDesc}; -use super::column_catalog::ColumnCatalog; use super::{ColumnId, RelationCatalog, SourceId}; use crate::user::UserId; use crate::WithOptions; -/// This struct `SourceCatalog` is used in frontend and compared with `ProstSource` it only maintain -/// information which will be used during optimization. -/// -/// It can be either a table source or a stream source. Use `self.kind()` to distinguish them. +/// This struct `SourceCatalog` is used in frontend. +/// Compared with `ProstSource`, it only maintains information used during optimization. #[derive(Clone, Debug)] pub struct SourceCatalog { pub id: SourceId, @@ -36,6 +34,7 @@ pub struct SourceCatalog { pub info: StreamSourceInfo, pub row_id_index: Option, pub properties: HashMap, + pub watermark_descs: Vec, } impl From<&ProstSource> for SourceCatalog { @@ -58,6 +57,7 @@ impl From<&ProstSource> for SourceCatalog { let append_only = row_id_index.is_some(); let owner = prost.owner; + let watermark_descs = prost.get_watermark_descs().clone(); Self { id, @@ -69,6 +69,7 @@ impl From<&ProstSource> for SourceCatalog { info: prost.info.clone().unwrap(), row_id_index, properties: with_options.into_inner(), + watermark_descs, } } } diff --git a/src/frontend/src/catalog/system_catalog/information_schema/columns.rs b/src/frontend/src/catalog/system_catalog/information_schema/columns.rs index 4b5c1a2d746a9..f89e4fedbed6c 100644 --- a/src/frontend/src/catalog/system_catalog/information_schema/columns.rs +++ b/src/frontend/src/catalog/system_catalog/information_schema/columns.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/information_schema/mod.rs b/src/frontend/src/catalog/system_catalog/information_schema/mod.rs index 81f9050088979..224d5bc15defa 100644 --- a/src/frontend/src/catalog/system_catalog/information_schema/mod.rs +++ b/src/frontend/src/catalog/system_catalog/information_schema/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/information_schema/tables.rs b/src/frontend/src/catalog/system_catalog/information_schema/tables.rs index 131b809e9f5e1..e8a27540964b4 100644 --- a/src/frontend/src/catalog/system_catalog/information_schema/tables.rs +++ b/src/frontend/src/catalog/system_catalog/information_schema/tables.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/mod.rs b/src/frontend/src/catalog/system_catalog/mod.rs index 94f1de9294d6a..8bdd8c5fbebe2 100644 --- a/src/frontend/src/catalog/system_catalog/mod.rs +++ b/src/frontend/src/catalog/system_catalog/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ use std::sync::{Arc, LazyLock}; use async_trait::async_trait; use paste::paste; use risingwave_common::catalog::{ - ColumnDesc, SysCatalogReader, TableDesc, TableId, DEFAULT_SUPER_USER_ID, + ColumnCatalog, ColumnDesc, SysCatalogReader, TableDesc, TableId, DEFAULT_SUPER_USER_ID, INFORMATION_SCHEMA_SCHEMA_NAME, PG_CATALOG_SCHEMA_NAME, RW_CATALOG_SCHEMA_NAME, }; use risingwave_common::error::Result; @@ -30,7 +30,6 @@ use risingwave_common::row::OwnedRow; use risingwave_common::types::DataType; use crate::catalog::catalog_service::CatalogReader; -use crate::catalog::column_catalog::ColumnCatalog; use crate::catalog::system_catalog::information_schema::*; use crate::catalog::system_catalog::pg_catalog::*; use crate::catalog::system_catalog::rw_catalog::*; @@ -202,7 +201,10 @@ prepare_sys_catalog! { { PG_CATALOG, PG_SHDESCRIPTION, vec![0], read_shdescription_info }, { PG_CATALOG, PG_TABLESPACE, vec![0], read_tablespace_info }, { PG_CATALOG, PG_STAT_ACTIVITY, vec![0], read_stat_activity }, + { PG_CATALOG, PG_ENUM, vec![0], read_enum_info }, + { PG_CATALOG, PG_CONVERSION, vec![0], read_conversion_info }, { INFORMATION_SCHEMA, COLUMNS, vec![], read_columns_info }, { INFORMATION_SCHEMA, TABLES, vec![], read_tables_info }, { RW_CATALOG, RW_META_SNAPSHOT, vec![], read_meta_snapshot await }, + { RW_CATALOG, RW_DDL_PROGRESS, vec![], read_ddl_progress await }, } diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs index 3c624a9603b57..cc16b50ec2c95 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,8 +18,10 @@ pub mod pg_attribute; pub mod pg_cast; pub mod pg_class; pub mod pg_collation; +pub mod pg_conversion; pub mod pg_database; pub mod pg_description; +pub mod pg_enum; pub mod pg_index; pub mod pg_keywords; pub mod pg_matviews; @@ -44,8 +46,10 @@ pub use pg_attribute::*; pub use pg_cast::*; pub use pg_class::*; pub use pg_collation::*; +pub use pg_conversion::*; pub use pg_database::*; pub use pg_description::*; +pub use pg_enum::*; pub use pg_index::*; pub use pg_keywords::*; pub use pg_matviews::*; @@ -65,6 +69,7 @@ use risingwave_common::error::Result; use risingwave_common::row::OwnedRow; use risingwave_common::types::{NaiveDateTimeWrapper, ScalarImpl}; use risingwave_common::util::epoch::Epoch; +use risingwave_common::util::iter_util::ZipEqDebug; use risingwave_pb::user::grant_privilege::{Action, Object}; use risingwave_pb::user::UserInfo; use serde_json::json; @@ -222,6 +227,23 @@ impl SysCatalogReaderImpl { Ok(meta_snapshots) } + pub(super) async fn read_ddl_progress(&self) -> Result> { + let ddl_grogress = self + .meta_client + .list_ddl_progress() + .await? + .into_iter() + .map(|s| { + OwnedRow::new(vec![ + Some(ScalarImpl::Int64(s.id as i64)), + Some(ScalarImpl::Utf8(s.statement.into())), + Some(ScalarImpl::Utf8(s.progress.into())), + ]) + }) + .collect_vec(); + Ok(ddl_grogress) + } + // FIXME(noel): Tracked by pub(super) fn read_opclass_info(&self) -> Result> { Ok(vec![]) @@ -250,6 +272,10 @@ impl SysCatalogReaderImpl { Ok(vec![]) } + pub(crate) fn read_enum_info(&self) -> Result> { + Ok(vec![]) + } + pub(super) fn read_roles_info(&self) -> Result> { let reader = self.user_info_reader.read_guard(); let users = reader.get_all_users(); @@ -276,7 +302,7 @@ impl SysCatalogReaderImpl { let schema_infos = reader.get_all_schema_info(&self.auth_context.database)?; Ok(schemas - .zip_eq(schema_infos.iter()) + .zip_eq_debug(schema_infos.iter()) .flat_map(|(schema, schema_info)| { // !!! If we need to add more class types, remember to update // Catalog::get_id_by_class_name_inner accordingly. @@ -595,6 +621,10 @@ impl SysCatalogReaderImpl { Ok(PG_TABLESPACE_DATA_ROWS.clone()) } + pub(crate) fn read_conversion_info(&self) -> Result> { + Ok(vec![]) + } + pub(super) fn read_stat_activity(&self) -> Result> { Ok(vec![]) } diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_am.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_am.rs index a0df2fd39fa4a..03fab1031d4e5 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_am.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_am.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_attrdef.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_attrdef.rs index 6b4676d2ab8d3..6dbf42f9305cf 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_attrdef.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_attrdef.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_attribute.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_attribute.rs index ca0f3151f1c41..21cf9d513fcdd 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_attribute.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_attribute.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs index 373cbe8ea026c..762584c99e968 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_class.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_class.rs index d4debe0f20a98..7961ab41c9fbf 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_class.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_class.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_collation.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_collation.rs index 6121035d1772e..bf9aef2afa0d5 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_collation.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_collation.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_conversion.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_conversion.rs new file mode 100644 index 0000000000000..d8d73f934d442 --- /dev/null +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_conversion.rs @@ -0,0 +1,31 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_common::types::DataType; + +use crate::catalog::system_catalog::SystemCatalogColumnsDef; + +/// The catalog `pg_conversion` describes encoding conversion functions. +/// Reference: [`https://www.postgresql.org/docs/current/catalog-pg-conversion.html`] +pub const PG_CONVERSION_TABLE_NAME: &str = "pg_conversion"; +pub const PG_CONVERSION_COLUMNS: &[SystemCatalogColumnsDef<'_>] = &[ + (DataType::Int32, "oid"), + (DataType::Varchar, "conname"), + (DataType::Int32, "connamespace"), + (DataType::Int32, "conowner"), + (DataType::Int16, "conforencoding"), + (DataType::Int16, "contoencoding"), + (DataType::Int32, "conproc"), + (DataType::Boolean, "condefault"), +]; diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_database.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_database.rs index 39340a6454f8b..ecc1e2e900d16 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_database.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_database.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_description.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_description.rs index 4582a85d29aed..120a05e12f385 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_description.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_description.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_enum.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_enum.rs new file mode 100644 index 0000000000000..4c12c051dd4a6 --- /dev/null +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_enum.rs @@ -0,0 +1,28 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_common::types::DataType; + +use crate::catalog::system_catalog::SystemCatalogColumnsDef; + +/// The `pg_enum` catalog contains entries showing the values and labels for each enum type. +/// The internal representation of a given enum value is actually the OID of its associated row in +/// `pg_enum`. Reference: [`https://www.postgresql.org/docs/current/catalog-pg-enum.html`] +pub const PG_ENUM_TABLE_NAME: &str = "pg_enum"; +pub const PG_ENUM_COLUMNS: &[SystemCatalogColumnsDef<'_>] = &[ + (DataType::Int32, "oid"), + (DataType::Int32, "enumtypid"), + (DataType::Float32, "enumsortorder"), + (DataType::Varchar, "enumlabel"), +]; diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs index db88391be7614..ff2cce4aeeea2 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_keywords.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_keywords.rs index 64a60a2b20c18..17e22243a939e 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_keywords.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_keywords.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_matviews.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_matviews.rs index 09c65796103e4..3f7f700afdd43 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_matviews.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_matviews.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_namespace.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_namespace.rs index edb444681c5b5..ea87470d287f5 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_namespace.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_namespace.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_opclass.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_opclass.rs index b2ce311094e62..600efa8322a4c 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_opclass.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_opclass.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_operator.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_operator.rs index d043bec05f0e2..a1e48fe1b23c3 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_operator.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_operator.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_roles.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_roles.rs index 5763befd80924..48a770a0b2336 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_roles.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_roles.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs index fba412d97b611..6e335d4c44869 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_shdescription.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_shdescription.rs index 49a3863e61ab9..43c8fa75956ab 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_shdescription.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_shdescription.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_stat_activity.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_stat_activity.rs index c27b6b5ba6b43..68a261df628d8 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_stat_activity.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_stat_activity.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_tablespace.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_tablespace.rs index 9075f39b8c569..87f97622515f0 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_tablespace.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_tablespace.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_type.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_type.rs index a015ff7e6f6a6..cd2dbca3b6090 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_type.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_type.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_user.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_user.rs index 432ed8da70f11..9d88a4ceacf61 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_user.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_user.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_views.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_views.rs index 2c5d886a20899..ab792e9da2867 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_views.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_views.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/mod.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/mod.rs index d03ec3e4942ee..8806018bde043 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/mod.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,5 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod rw_ddl_progress; mod rw_meta_snapshot; + +pub use rw_ddl_progress::*; pub use rw_meta_snapshot::*; diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_ddl_progress.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_ddl_progress.rs new file mode 100644 index 0000000000000..dc13832e5f8b8 --- /dev/null +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_ddl_progress.rs @@ -0,0 +1,25 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_common::types::DataType; + +use crate::catalog::system_catalog::SystemCatalogColumnsDef; + +pub const RW_DDL_PROGRESS_TABLE_NAME: &str = "rw_ddl_progress"; + +pub const RW_DDL_PROGRESS_COLUMNS: &[SystemCatalogColumnsDef<'_>] = &[ + (DataType::Int64, "ddl_id"), + (DataType::Varchar, "ddl_statement"), + (DataType::Varchar, "progress"), +]; diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs index 2895bc2e9f967..02cafe61854f8 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/catalog/table_catalog.rs b/src/frontend/src/catalog/table_catalog.rs index ed397ec940c50..298923635cad7 100644 --- a/src/frontend/src/catalog/table_catalog.rs +++ b/src/frontend/src/catalog/table_catalog.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,16 +14,18 @@ use std::collections::{HashMap, HashSet}; +use fixedbitset::FixedBitSet; use itertools::Itertools; -use risingwave_common::catalog::{TableDesc, TableId}; +use risingwave_common::catalog::{ColumnCatalog, TableDesc, TableId, TableVersionId}; use risingwave_common::constants::hummock::TABLE_OPTION_DUMMY_RETENTION_SECOND; use risingwave_common::error::{ErrorCode, RwError}; +use risingwave_connector::sink::catalog::desc::SinkDesc; +use risingwave_connector::sink::catalog::{SinkId, SinkType}; use risingwave_pb::catalog::table::{ OptionalAssociatedSourceId, TableType as ProstTableType, TableVersion as ProstTableVersion, }; use risingwave_pb::catalog::{ColumnIndex as ProstColumnIndex, Table as ProstTable}; -use super::column_catalog::ColumnCatalog; use super::{ColumnId, DatabaseId, FragmentId, RelationCatalog, SchemaId}; use crate::optimizer::property::FieldOrder; use crate::user::UserId; @@ -122,6 +124,9 @@ pub struct TableCatalog { /// Per-table catalog version, used by schema change. `None` for internal tables and tests. pub version: Option, + + /// the column indices which could receive watermarks. + pub watermark_columns: FixedBitSet, } #[derive(Copy, Clone, Debug, PartialEq)] @@ -167,7 +172,7 @@ impl TableType { /// The version of a table, used by schema change. See [`ProstTableVersion`]. #[derive(Clone, Debug, PartialEq)] pub struct TableVersion { - pub version_id: u64, + pub version_id: TableVersionId, pub next_column_id: ColumnId, } @@ -175,8 +180,10 @@ impl TableVersion { /// Create an initial version for a table, with the given max column id. #[cfg(test)] pub fn new_initial_for_test(max_column_id: ColumnId) -> Self { + use risingwave_common::catalog::INITIAL_TABLE_VERSION_ID; + Self { - version_id: 0, + version_id: INITIAL_TABLE_VERSION_ID, next_column_id: max_column_id.next(), } } @@ -292,6 +299,7 @@ impl TableCatalog { .unwrap_or(TABLE_OPTION_DUMMY_RETENTION_SECOND), value_indices: self.value_indices.clone(), read_prefix_len_hint: self.read_prefix_len_hint, + watermark_columns: self.watermark_columns.clone(), } } @@ -307,8 +315,8 @@ impl TableCatalog { pub fn to_internal_table_prost(&self) -> ProstTable { use risingwave_common::catalog::{DatabaseId, SchemaId}; self.to_prost( - SchemaId::placeholder() as u32, - DatabaseId::placeholder() as u32, + SchemaId::placeholder().schema_id, + DatabaseId::placeholder().database_id, ) } @@ -356,6 +364,21 @@ impl TableCatalog { handle_pk_conflict: self.handle_pk_conflict, read_prefix_len_hint: self.read_prefix_len_hint as u32, version: self.version.as_ref().map(TableVersion::to_prost), + watermark_indices: self.watermark_columns.ones().map(|x| x as _).collect_vec(), + } + } + + pub fn to_sink_desc(&self, properties: WithOptions, sink_type: SinkType) -> SinkDesc { + SinkDesc { + id: SinkId::placeholder(), + name: self.name.clone(), + columns: self.columns.clone(), + pk: self.pk.iter().map(|x| x.to_order_pair()).collect(), + stream_key: self.stream_key.clone(), + distribution_key: self.distribution_key.clone(), + definition: self.definition.clone(), + properties: properties.into_inner(), + sink_type, } } } @@ -382,6 +405,10 @@ impl From for TableCatalog { } let pk = tb.pk.iter().map(FieldOrder::from_protobuf).collect(); + let mut watermark_columns = FixedBitSet::with_capacity(columns.len()); + for idx in tb.watermark_indices { + watermark_columns.insert(idx as _); + } Self { id: id.into(), @@ -407,6 +434,7 @@ impl From for TableCatalog { handle_pk_conflict: tb.handle_pk_conflict, read_prefix_len_hint: tb.read_prefix_len_hint as usize, version: tb.version.map(TableVersion::from_prost), + watermark_columns, } } } @@ -427,7 +455,9 @@ impl RelationCatalog for TableCatalog { mod tests { use std::collections::HashMap; - use risingwave_common::catalog::{ColumnDesc, ColumnId, TableId}; + use risingwave_common::catalog::{ + row_id_column_desc, ColumnCatalog, ColumnDesc, ColumnId, TableId, + }; use risingwave_common::constants::hummock::PROPERTIES_RETENTION_SECOND_KEY; use risingwave_common::test_prelude::*; use risingwave_common::types::*; @@ -437,8 +467,6 @@ mod tests { }; use super::*; - use crate::catalog::column_catalog::ColumnCatalog; - use crate::catalog::row_id_column_desc; use crate::catalog::table_catalog::{TableCatalog, TableType}; use crate::optimizer::property::{Direction, FieldOrder}; use crate::WithOptions; @@ -504,6 +532,7 @@ mod tests { version: 0, next_column_id: 2, }), + watermark_indices: vec![], } .into(); @@ -565,6 +594,7 @@ mod tests { handle_pk_conflict: false, read_prefix_len_hint: 0, version: Some(TableVersion::new_initial_for_test(ColumnId::new(1))), + watermark_columns: FixedBitSet::with_capacity(2), } ); assert_eq!(table, TableCatalog::from(table.to_prost(0, 0))); diff --git a/src/frontend/src/catalog/view_catalog.rs b/src/frontend/src/catalog/view_catalog.rs index 27e9a28c92725..71401657592c4 100644 --- a/src/frontend/src/catalog/view_catalog.rs +++ b/src/frontend/src/catalog/view_catalog.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/expr/agg_call.rs b/src/frontend/src/expr/agg_call.rs index 1396d19456e22..a028053a7e110 100644 --- a/src/frontend/src/expr/agg_call.rs +++ b/src/frontend/src/expr/agg_call.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/expr/correlated_input_ref.rs b/src/frontend/src/expr/correlated_input_ref.rs index cbeccfb6124d7..ba6318846180e 100644 --- a/src/frontend/src/expr/correlated_input_ref.rs +++ b/src/frontend/src/expr/correlated_input_ref.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/expr/expr_mutator.rs b/src/frontend/src/expr/expr_mutator.rs index b7fac163dcc7e..126cc1042db72 100644 --- a/src/frontend/src/expr/expr_mutator.rs +++ b/src/frontend/src/expr/expr_mutator.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/expr/expr_rewriter.rs b/src/frontend/src/expr/expr_rewriter.rs index 3da8747b7923e..ab9720fd80c58 100644 --- a/src/frontend/src/expr/expr_rewriter.rs +++ b/src/frontend/src/expr/expr_rewriter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/expr/expr_visitor.rs b/src/frontend/src/expr/expr_visitor.rs index 4e493f207c88a..b2b1bdf330d46 100644 --- a/src/frontend/src/expr/expr_visitor.rs +++ b/src/frontend/src/expr/expr_visitor.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/expr/function_call.rs b/src/frontend/src/expr/function_call.rs index e8c4cbed4fcad..fa3d6a98d33ac 100644 --- a/src/frontend/src/expr/function_call.rs +++ b/src/frontend/src/expr/function_call.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ use itertools::Itertools; use risingwave_common::catalog::Schema; use risingwave_common::error::{ErrorCode, Result}; use risingwave_common::types::DataType; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_expr::vector_op::cast::literal_parsing; use super::{cast_ok, infer_some_all, infer_type, CastContext, Expr, ExprImpl, Literal}; @@ -172,7 +173,7 @@ impl FunctionCall { std::cmp::Ordering::Equal => { let inputs = inputs .into_iter() - .zip_eq(fields.to_vec()) + .zip_eq_fast(fields.to_vec()) .map(|(e, t)| Self::new_cast(e, t, allows)) .collect::>>()?; let return_type = DataType::new_struct( @@ -268,6 +269,23 @@ impl FunctionCall { pub fn inputs_mut(&mut self) -> &mut [ExprImpl] { self.inputs.as_mut() } + + pub(super) fn from_expr_proto( + function_call: &risingwave_pb::expr::FunctionCall, + expr_type: ExprType, + ret_type: DataType, + ) -> Result { + let inputs: Vec<_> = function_call + .get_children() + .iter() + .map(ExprImpl::from_expr_proto) + .try_collect()?; + Ok(Self { + func_type: expr_type, + return_type: ret_type, + inputs, + }) + } } impl Expr for FunctionCall { diff --git a/src/frontend/src/expr/input_ref.rs b/src/frontend/src/expr/input_ref.rs index 359538b1076c7..5b9cd79319d90 100644 --- a/src/frontend/src/expr/input_ref.rs +++ b/src/frontend/src/expr/input_ref.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -121,6 +121,16 @@ impl InputRef { r#type: Some(self.data_type.to_protobuf()), } } + + pub(super) fn from_expr_proto( + input_ref: &risingwave_pb::expr::InputRefExpr, + ret_type: DataType, + ) -> risingwave_common::error::Result { + Ok(Self { + index: input_ref.get_column_idx() as usize, + data_type: ret_type, + }) + } } impl Expr for InputRef { diff --git a/src/frontend/src/expr/literal.rs b/src/frontend/src/expr/literal.rs index bb631a6cbd44c..d695805ad8287 100644 --- a/src/frontend/src/expr/literal.rs +++ b/src/frontend/src/expr/literal.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,7 +15,8 @@ use risingwave_common::array::list_array::display_for_explain; use risingwave_common::types::to_text::ToText; use risingwave_common::types::{literal_type_match, DataType, Datum}; -use risingwave_common::util::value_encoding::serialize_datum; +use risingwave_common::util::value_encoding::{deserialize_datum, serialize_datum}; +use risingwave_pb::data::Datum as ProstDatum; use risingwave_pb::expr::expr_node::RexNode; use super::Expr; @@ -51,6 +52,7 @@ impl std::fmt::Debug for Literal { | DataType::Timestamptz | DataType::Time | DataType::Interval + | DataType::Jsonb | DataType::Struct(_) => write!( f, "'{}'", @@ -77,6 +79,16 @@ impl Literal { pub fn get_data(&self) -> &Datum { &self.data } + + pub(super) fn from_expr_proto( + proto: &risingwave_pb::expr::ExprNode, + ) -> risingwave_common::error::Result { + let data_type = proto.get_return_type()?; + Ok(Self { + data: value_encoding_to_literal(&proto.rex_node, &data_type.into())?, + data_type: data_type.into(), + }) + } } impl Expr for Literal { @@ -89,20 +101,32 @@ impl Expr for Literal { ExprNode { expr_type: self.get_expr_type() as i32, return_type: Some(self.return_type().to_protobuf()), - rex_node: literal_to_value_encoding(self.get_data()), + rex_node: Some(literal_to_value_encoding(self.get_data())), } } } /// Convert a literal value (datum) into protobuf. -fn literal_to_value_encoding(d: &Datum) -> Option { - if d.is_none() { - return None; - } - use risingwave_pb::data::Datum as ProstDatum; - +fn literal_to_value_encoding(d: &Datum) -> RexNode { let body = serialize_datum(d.as_ref()); - Some(RexNode::Constant(ProstDatum { body })) + RexNode::Constant(ProstDatum { body }) +} + +/// Convert protobuf into a literal value (datum). +fn value_encoding_to_literal( + proto: &Option, + ty: &DataType, +) -> risingwave_common::error::Result { + if let Some(rex_node) = proto { + if let RexNode::Constant(prost_datum) = rex_node { + let datum = deserialize_datum(prost_datum.body.as_ref(), ty)?; + Ok(datum) + } else { + unreachable!() + } + } else { + Ok(None) + } } #[cfg(test)] @@ -123,7 +147,7 @@ mod tests { ]); let data = Some(ScalarImpl::Struct(value.clone())); let node = literal_to_value_encoding(&data); - if let RexNode::Constant(prost) = node.as_ref().unwrap() { + if let RexNode::Constant(prost) = node { let data2 = deserialize_datum( prost.get_body().as_slice(), &DataType::new_struct( @@ -146,7 +170,7 @@ mod tests { ]); let data = Some(ScalarImpl::List(value.clone())); let node = literal_to_value_encoding(&data); - if let RexNode::Constant(prost) = node.as_ref().unwrap() { + if let RexNode::Constant(prost) = node { let data2 = deserialize_datum( prost.get_body().as_slice(), &DataType::List { diff --git a/src/frontend/src/expr/mod.rs b/src/frontend/src/expr/mod.rs index 1e746849c67b8..427c1565fa0eb 100644 --- a/src/frontend/src/expr/mod.rs +++ b/src/frontend/src/expr/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ use risingwave_common::array::ListValue; use risingwave_common::error::Result; use risingwave_common::types::{DataType, Datum, Scalar}; use risingwave_expr::expr::{build_from_prost, AggKind}; +use risingwave_pb::expr::expr_node::RexNode; use risingwave_pb::expr::{ExprNode, ProjectSetSelectItem}; mod agg_call; @@ -526,17 +527,17 @@ impl ExprImpl { } } - pub fn as_comparison_cond(&self) -> Option<(InputRef, ExprType, InputRef)> { - fn reverse_comparison(comparison: ExprType) -> ExprType { - match comparison { - ExprType::LessThan => ExprType::GreaterThan, - ExprType::LessThanOrEqual => ExprType::GreaterThanOrEqual, - ExprType::GreaterThan => ExprType::LessThan, - ExprType::GreaterThanOrEqual => ExprType::LessThanOrEqual, - _ => unreachable!(), - } + fn reverse_comparison(comparison: ExprType) -> ExprType { + match comparison { + ExprType::LessThan => ExprType::GreaterThan, + ExprType::LessThanOrEqual => ExprType::GreaterThanOrEqual, + ExprType::GreaterThan => ExprType::LessThan, + ExprType::GreaterThanOrEqual => ExprType::LessThanOrEqual, + _ => unreachable!(), } + } + pub fn as_comparison_cond(&self) -> Option<(InputRef, ExprType, InputRef)> { if let ExprImpl::FunctionCall(function_call) = self { match function_call.get_expr_type() { ty @ (ExprType::LessThan @@ -548,7 +549,7 @@ impl ExprImpl { if x.index < y.index { Some((*x, ty, *y)) } else { - Some((*y, reverse_comparison(ty), *x)) + Some((*y, Self::reverse_comparison(ty), *x)) } } else { None @@ -561,6 +562,61 @@ impl ExprImpl { } } + // Accepts expressions of the form `input_expr cmp now() [+- const_expr]` or + // `now() [+- const_expr] cmp input_expr`, where `input_expr` contains an + // `InputRef` and contains no `now()`. + // + // Canonicalizes to the first ordering and returns (input_expr, cmp, now_expr) + pub fn as_now_comparison_cond(&self) -> Option<(ExprImpl, ExprType, ExprImpl)> { + if let ExprImpl::FunctionCall(function_call) = self { + match function_call.get_expr_type() { + ty @ (ExprType::LessThan + | ExprType::LessThanOrEqual + | ExprType::GreaterThan + | ExprType::GreaterThanOrEqual) => { + let (_, op1, op2) = function_call.clone().decompose_as_binary(); + if op1.count_nows() == 0 + && op1.has_input_ref() + && op2.count_nows() > 0 + && op2.is_now_offset() + { + Some((op1, ty, op2)) + } else if op2.count_nows() == 0 + && op2.has_input_ref() + && op1.count_nows() > 0 + && op1.is_now_offset() + { + Some((op2, Self::reverse_comparison(ty), op1)) + } else { + None + } + } + _ => None, + } + } else { + None + } + } + + // Checks if expr is of the form `now() [+- const_expr]` + fn is_now_offset(&self) -> bool { + if let ExprImpl::FunctionCall(f) = self { + match f.get_expr_type() { + ExprType::Now => true, + ExprType::Add | ExprType::Subtract => { + let (_, lhs, rhs) = f.clone().decompose_as_binary(); + lhs.as_function_call() + .map(|f| f.get_expr_type() == ExprType::Now) + .unwrap_or(false) + && rhs.is_const() + } + _ => false, + } + } else { + false + } + } + pub fn as_eq_const(&self) -> Option<(InputRef, ExprImpl)> { if let ExprImpl::FunctionCall(function_call) = self && function_call.get_expr_type() == ExprType::Equal{ @@ -671,6 +727,24 @@ impl ExprImpl { }), } } + + pub fn from_expr_proto(proto: &ExprNode) -> Result { + let rex_node = proto.get_rex_node()?; + let ret_type = proto.get_return_type()?.into(); + let expr_type = proto.get_expr_type()?; + Ok(match rex_node { + RexNode::InputRef(input_ref) => { + Self::InputRef(Box::new(InputRef::from_expr_proto(input_ref, ret_type)?)) + } + RexNode::Constant(_) => Self::Literal(Box::new(Literal::from_expr_proto(proto)?)), + RexNode::Udf(udf) => Self::UserDefinedFunction(Box::new( + UserDefinedFunction::from_expr_proto(udf, ret_type)?, + )), + RexNode::FuncCall(function_call) => Self::FunctionCall(Box::new( + FunctionCall::from_expr_proto(function_call, expr_type, ret_type)?, + )), + }) + } } impl Expr for ExprImpl { diff --git a/src/frontend/src/expr/order_by_expr.rs b/src/frontend/src/expr/order_by_expr.rs index 3f214f4dc29ad..23be49e6b5599 100644 --- a/src/frontend/src/expr/order_by_expr.rs +++ b/src/frontend/src/expr/order_by_expr.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/expr/session_timezone.rs b/src/frontend/src/expr/session_timezone.rs index 7fcd6098ab80c..97b65e22e2b90 100644 --- a/src/frontend/src/expr/session_timezone.rs +++ b/src/frontend/src/expr/session_timezone.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ pub use risingwave_pb::expr::expr_node::Type as ExprType; pub use crate::expr::expr_rewriter::ExprRewriter; pub use crate::expr::function_call::FunctionCall; -use crate::expr::{Expr, ExprImpl}; +use crate::expr::{Expr, ExprImpl, Literal}; /// `SessionTimezone` will be used to resolve session /// timezone-dependent casts, comparisons or arithmetic. @@ -135,7 +135,90 @@ impl SessionTimezone { None } // TODO: handle tstz-related arithmetic with timezone - ExprType::Add | ExprType::Subtract => None, + // We first translate to timestamp to handle years, months and days, + // then we translate back to timestamptz handle hours and milliseconds + // + // For performance concern, we assume that most the intervals are const-evaled. + // + // We impl the following expression tree: + // + // [+/-] + // / \ + // timestamptz [-] + // / / \ + // [+/-] interval date_trunc + // / \ / \ + // timestamp date_trunc 'day' interval + // / / \ + // timestamptz 'day' interval + // + // + // Const-evaled expr tree: + // + // [+/-] + // / \ + // timestamptz interval_non_date_part + // / + // [+/-] + // / \ + // timestamp interval_date_part + // / + // timestamptz + ExprType::Subtract | ExprType::Add => { + assert_eq!(inputs.len(), 2); + let canonical_match = matches!(inputs[0].return_type(), DataType::Timestamptz) + && matches!(inputs[1].return_type(), DataType::Interval); + let inverse_match = matches!(inputs[1].return_type(), DataType::Timestamptz) + && matches!(inputs[0].return_type(), DataType::Interval); + assert!(!(inverse_match && func_type == ExprType::Subtract)); // This should never have been parsed. + if canonical_match || inverse_match { + let (orig_timestamptz, interval) = + if func_type == ExprType::Add && inverse_match { + (inputs[1].clone(), inputs[0].clone()) + } else { + (inputs[0].clone(), inputs[1].clone()) + }; + let interval_date_part: ExprImpl = FunctionCall::new_unchecked( + ExprType::DateTrunc, + vec![ + Literal::new(Some("day".into()), DataType::Varchar).into(), + interval.clone(), + ], + DataType::Interval, + ) + .into(); + let interval_non_date_part = FunctionCall::new_unchecked( + ExprType::Subtract, + vec![interval, interval_date_part.clone()], + DataType::Interval, + ) + .into(); + let timestamp = self + .with_timezone(ExprType::Cast, &vec![orig_timestamptz], DataType::Timestamp) + .unwrap(); + let timestamp_op_date_part = FunctionCall::new_unchecked( + func_type, + vec![timestamp, interval_date_part], + DataType::Timestamp, + ) + .into(); + let timestamptz = self + .with_timezone( + ExprType::Cast, + &vec![timestamp_op_date_part], + DataType::Timestamptz, + ) + .unwrap(); + let timestamptz_op_non_date_part = FunctionCall::new_unchecked( + func_type, + vec![timestamptz, interval_non_date_part], + DataType::Timestamptz, + ) + .into(); + return Some(timestamptz_op_non_date_part); + } + None + } _ => None, } } diff --git a/src/frontend/src/expr/subquery.rs b/src/frontend/src/expr/subquery.rs index 0d934acb6447a..84fbd7d55c979 100644 --- a/src/frontend/src/expr/subquery.rs +++ b/src/frontend/src/expr/subquery.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/expr/table_function.rs b/src/frontend/src/expr/table_function.rs index 59e6992d787a8..f85a3989bd8c9 100644 --- a/src/frontend/src/expr/table_function.rs +++ b/src/frontend/src/expr/table_function.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/expr/type_inference/cast.rs b/src/frontend/src/expr/type_inference/cast.rs index 79c22281381aa..549f5b275b825 100644 --- a/src/frontend/src/expr/type_inference/cast.rs +++ b/src/frontend/src/expr/type_inference/cast.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ use itertools::Itertools as _; use risingwave_common::error::{ErrorCode, Result}; use risingwave_common::types::{DataType, DataTypeName}; +use risingwave_common::util::iter_util::ZipEqFast; pub use risingwave_expr::sig::cast::*; use crate::expr::{Expr as _, ExprImpl}; @@ -153,7 +154,7 @@ fn cast_ok_struct(source: &DataType, target: &DataType, allows: CastContext) -> // ... and all fields are castable lty.fields .iter() - .zip_eq(rty.fields.iter()) + .zip_eq_fast(rty.fields.iter()) .all(|(src, dst)| src == dst || cast_ok(src, dst, allows)) } // The automatic casts to string types are treated as assignment casts, while the automatic diff --git a/src/frontend/src/expr/type_inference/func.rs b/src/frontend/src/expr/type_inference/func.rs index e9178c1e39558..d952788be5930 100644 --- a/src/frontend/src/expr/type_inference/func.rs +++ b/src/frontend/src/expr/type_inference/func.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ use num_integer::Integer as _; use risingwave_common::error::{ErrorCode, Result}; use risingwave_common::types::struct_type::StructType; use risingwave_common::types::{DataType, DataTypeName, ScalarImpl}; +use risingwave_common::util::iter_util::ZipEqFast; pub use risingwave_expr::sig::func::*; use super::{align_types, cast_ok_base, CastContext}; @@ -43,7 +44,7 @@ pub fn infer_type(func_type: ExprType, inputs: &mut Vec) -> Result { // If only one side is nested type, these two types can never be casted. - return Err(ErrorCode::BindError(format!( + Err(ErrorCode::BindError(format!( "cannot infer type because unmatched types: left={:?} right={:?}", l, r )) - .into()); + .into()) } (NestedType::Type(l), NestedType::Type(r)) => { // If both sides are concrete types, try cast in either direction. @@ -662,7 +663,7 @@ fn top_matches<'a>( let mut n_exact = 0; let mut n_preferred = 0; let mut castable = true; - for (formal, actual) in sig.inputs_type.iter().zip_eq(inputs) { + for (formal, actual) in sig.inputs_type.iter().zip_eq_fast(inputs) { let Some(actual) = actual else { continue }; if formal == actual { n_exact += 1; @@ -756,7 +757,7 @@ fn narrow_category<'a>( .filter(|sig| { sig.inputs_type .iter() - .zip_eq(&categories) + .zip_eq_fast(&categories) .all(|(formal, category)| { // category.is_none() means the actual argument is non-null and skipped category // selection. diff --git a/src/frontend/src/expr/type_inference/mod.rs b/src/frontend/src/expr/type_inference/mod.rs index 46e4d8c06c5b7..8135787ea7cdd 100644 --- a/src/frontend/src/expr/type_inference/mod.rs +++ b/src/frontend/src/expr/type_inference/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/expr/user_defined_function.rs b/src/frontend/src/expr/user_defined_function.rs index 7600478be1249..b9e457861b837 100644 --- a/src/frontend/src/expr/user_defined_function.rs +++ b/src/frontend/src/expr/user_defined_function.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,6 +14,8 @@ use std::sync::Arc; +use itertools::Itertools; +use risingwave_common::catalog::FunctionId; use risingwave_common::types::DataType; use super::{Expr, ExprImpl}; @@ -29,6 +31,36 @@ impl UserDefinedFunction { pub fn new(catalog: Arc, args: Vec) -> Self { Self { args, catalog } } + + pub(super) fn from_expr_proto( + udf: &risingwave_pb::expr::UserDefinedFunction, + ret_type: DataType, + ) -> risingwave_common::error::Result { + let args: Vec<_> = udf + .get_children() + .iter() + .map(ExprImpl::from_expr_proto) + .try_collect()?; + + // function catalog + let arg_types = udf.get_arg_types().iter().map_into().collect_vec(); + let catalog = FunctionCatalog { + // FIXME(yuhao): function id is not in udf proto. + id: FunctionId::placeholder(), + name: udf.get_name().clone(), + // FIXME(yuhao): owner is not in udf proto. + owner: u32::MAX - 1, + arg_types, + return_type: ret_type, + language: udf.get_language().clone(), + path: udf.get_path().clone(), + }; + + Ok(Self { + args, + catalog: Arc::new(catalog), + }) + } } impl Expr for UserDefinedFunction { diff --git a/src/frontend/src/expr/utils.rs b/src/frontend/src/expr/utils.rs index ec9c34d176c4f..4a73f27af1546 100644 --- a/src/frontend/src/expr/utils.rs +++ b/src/frontend/src/expr/utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/expr/window_function.rs b/src/frontend/src/expr/window_function.rs index 85bc2d0c86c96..7303a15557ede 100644 --- a/src/frontend/src/expr/window_function.rs +++ b/src/frontend/src/expr/window_function.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/handler/alter_system.rs b/src/frontend/src/handler/alter_system.rs new file mode 100644 index 0000000000000..8f6fed17186e8 --- /dev/null +++ b/src/frontend/src/handler/alter_system.rs @@ -0,0 +1,39 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use pgwire::pg_response::StatementType; +use risingwave_common::error::Result; +use risingwave_sqlparser::ast::{Ident, SetVariableValue, Value}; + +use super::{HandlerArgs, RwPgResponse}; + +pub async fn handle_alter_system( + handler_args: HandlerArgs, + param: Ident, + value: SetVariableValue, +) -> Result { + let value = match value { + SetVariableValue::Literal(Value::DoubleQuotedString(s)) + | SetVariableValue::Literal(Value::SingleQuotedString(s)) => Some(s), + SetVariableValue::Default => None, + _ => Some(value.to_string()), + }; + handler_args + .session + .env() + .meta_client() + .set_system_param(param.to_string(), value) + .await?; + Ok(RwPgResponse::empty_result(StatementType::ALTER_SYSTEM)) +} diff --git a/src/frontend/src/handler/alter_table.rs b/src/frontend/src/handler/alter_table.rs index 9f74ab692ad98..2b38a6e61af94 100644 --- a/src/frontend/src/handler/alter_table.rs +++ b/src/frontend/src/handler/alter_table.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,7 +15,9 @@ use anyhow::Context; use pgwire::pg_response::{PgResponse, StatementType}; use risingwave_common::error::{ErrorCode, Result}; +use risingwave_pb::catalog::Table; use risingwave_pb::stream_plan::stream_fragment_graph::Parallelism; +use risingwave_pb::stream_plan::StreamFragmentGraph; use risingwave_sqlparser::ast::{ColumnDef, ObjectName, Statement}; use risingwave_sqlparser::parser::Parser; @@ -93,11 +95,14 @@ pub async fn handle_add_column( panic!("unexpected statement type: {:?}", definition); }; - let (graph, source, table) = { + let (graph, table) = { let context = OptimizerContext::from_handler_args(handler_args); let (plan, source, table) = gen_create_table_plan(context, table_name, columns, constraints, col_id_gen)?; + // We should already have rejected the case where the table has a connector. + assert!(source.is_none()); + // TODO: avoid this backward conversion. if TableCatalog::from(&table).pk_column_ids() != original_catalog.pk_column_ids() { Err(ErrorCode::InvalidInputSyntax( @@ -105,12 +110,21 @@ pub async fn handle_add_column( ))? } - let mut graph = build_graph(plan); - graph.parallelism = session - .config() - .get_streaming_parallelism() - .map(|parallelism| Parallelism { parallelism }); - (graph, source, table) + let graph = StreamFragmentGraph { + parallelism: session + .config() + .get_streaming_parallelism() + .map(|parallelism| Parallelism { parallelism }), + ..build_graph(plan) + }; + + // Fill the original table ID. + let table = Table { + id: original_catalog.id().table_id(), + ..table + }; + + (graph, table) }; // TODO: for test purpose only, we drop the original table and create a new one. This is wrong @@ -118,10 +132,13 @@ pub async fn handle_add_column( if cfg!(debug_assertions) { let catalog_writer = session.env().catalog_writer(); + // TODO: call replace_table RPC + // catalog_writer.replace_table(table, graph).await?; + catalog_writer .drop_table(None, original_catalog.id()) .await?; - catalog_writer.create_table(source, table, graph).await?; + catalog_writer.create_table(None, table, graph).await?; Ok(PgResponse::empty_result_with_notice( StatementType::ALTER_TABLE, @@ -139,11 +156,12 @@ pub async fn handle_add_column( mod tests { use std::collections::HashMap; - use risingwave_common::catalog::{DEFAULT_DATABASE_NAME, DEFAULT_SCHEMA_NAME}; + use risingwave_common::catalog::{ + row_id_column_name, DEFAULT_DATABASE_NAME, DEFAULT_SCHEMA_NAME, + }; use risingwave_common::types::DataType; use crate::catalog::root_catalog::SchemaPath; - use crate::catalog::row_id_column_name; use crate::test_utils::LocalFrontend; #[tokio::test] diff --git a/src/frontend/src/handler/alter_user.rs b/src/frontend/src/handler/alter_user.rs index 477b46ac43489..ee3330d4d942b 100644 --- a/src/frontend/src/handler/alter_user.rs +++ b/src/frontend/src/handler/alter_user.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/handler/create_database.rs b/src/frontend/src/handler/create_database.rs index 2d925de94e6af..408d7f49bd963 100644 --- a/src/frontend/src/handler/create_database.rs +++ b/src/frontend/src/handler/create_database.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/handler/create_function.rs b/src/frontend/src/handler/create_function.rs index 45190941fa7a7..7160218bc64f3 100644 --- a/src/frontend/src/handler/create_function.rs +++ b/src/frontend/src/handler/create_function.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/handler/create_index.rs b/src/frontend/src/handler/create_index.rs index d9fd991501492..e746068ce25e5 100644 --- a/src/frontend/src/handler/create_index.rs +++ b/src/frontend/src/handler/create_index.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ use itertools::Itertools; use pgwire::pg_response::{PgResponse, StatementType}; use risingwave_common::catalog::{IndexId, TableDesc, TableId}; use risingwave_common::error::{ErrorCode, Result, RwError}; +use risingwave_common::util::sort_util::{OrderPair, OrderType}; use risingwave_pb::catalog::{Index as ProstIndex, Table as ProstTable}; use risingwave_pb::stream_plan::stream_fragment_graph::Parallelism; use risingwave_pb::user::grant_privilege::{Action, Object}; @@ -34,6 +35,7 @@ use crate::handler::HandlerArgs; use crate::optimizer::plan_node::{LogicalProject, LogicalScan, StreamMaterialize}; use crate::optimizer::property::{Distribution, FieldOrder, Order, RequiredDist}; use crate::optimizer::{OptimizerContext, OptimizerContextRef, PlanRef, PlanRoot}; +use crate::scheduler::streaming_manager::CreatingStreamingJobInfo; use crate::session::SessionImpl; use crate::stream_fragmenter::build_graph; @@ -83,6 +85,14 @@ pub(crate) fn gen_create_index_plan( .map(|(x, y)| (y.name.clone(), x)) .collect::>(); + let to_order_pair = |(ident, order): &(Ident, OrderType)| { + let x = ident.real_value(); + table_desc_map + .get(&x) + .map(|x| OrderPair::new(*x, *order)) + .ok_or_else(|| ErrorCode::ItemNotFound(x).into()) + }; + let to_column_indices = |ident: &Ident| { let x = ident.real_value(); table_desc_map @@ -93,7 +103,7 @@ pub(crate) fn gen_create_index_plan( let mut index_columns = columns .iter() - .map(to_column_indices) + .map(to_order_pair) .try_collect::<_, Vec<_>, RwError>()?; let mut include_columns = if include.is_empty() { @@ -121,7 +131,7 @@ pub(crate) fn gen_create_index_plan( let mut set = HashSet::new(); index_columns = index_columns .into_iter() - .filter(|x| set.insert(*x)) + .filter(|x| set.insert(x.column_idx)) .collect_vec(); // Remove include columns are already in index columns @@ -133,7 +143,12 @@ pub(crate) fn gen_create_index_plan( // Remove duplicate columns of distributed by columns let distributed_by_columns = distributed_by_columns.into_iter().unique().collect_vec(); // Distributed by columns should be a prefix of index columns - if !index_columns.starts_with(&distributed_by_columns) { + if !index_columns + .iter() + .map(|x| x.column_idx) + .collect_vec() + .starts_with(&distributed_by_columns) + { return Err(ErrorCode::InvalidInputSyntax( "Distributed by columns should be a prefix of index columns".to_string(), ) @@ -189,6 +204,9 @@ pub(crate) fn gen_create_index_plan( .map(InputRef::to_expr_proto) .collect_vec(), original_columns: index_columns + .iter() + .map(|x| x.column_idx) + .collect_vec() .iter() .chain(include_columns.iter()) .map(|index| *index as i32) @@ -251,7 +269,7 @@ fn assemble_materialize( table_desc: Rc, context: OptimizerContextRef, index_name: String, - index_columns: &[usize], + index_columns: &[OrderPair], include_columns: &[usize], distributed_by_columns_len: usize, ) -> Result { @@ -259,6 +277,8 @@ fn assemble_materialize( // LogicalProject(index_columns, include_columns) // LogicalScan(table_desc) + let definition = context.normalized_sql().to_owned(); + let logical_scan = LogicalScan::create( table_name, false, @@ -269,6 +289,9 @@ fn assemble_materialize( ); let exprs = index_columns + .iter() + .map(|x| x.column_idx) + .collect_vec() .iter() .chain(include_columns.iter()) .map(|&i| { @@ -283,6 +306,9 @@ fn assemble_materialize( project_required_cols.toggle_range(0..logical_project.schema().len()); let out_names: Vec = index_columns + .iter() + .map(|x| x.column_idx) + .collect_vec() .iter() .chain(include_columns.iter()) .map(|&i| table_desc.columns.get(i).unwrap().name.clone()) @@ -294,27 +320,25 @@ fn assemble_materialize( (0..distributed_by_columns_len).collect(), )), Order::new( - (0..index_columns.len()) - .into_iter() - .map(FieldOrder::ascending) + index_columns + .iter() + .enumerate() + .map(|(i, order_pair)| match order_pair.order_type { + OrderType::Ascending => FieldOrder::ascending(i), + OrderType::Descending => FieldOrder::descending(i), + }) .collect(), ), project_required_cols, out_names, ) - .gen_index_plan(index_name) + .gen_index_plan(index_name, definition) } -fn check_columns(columns: Vec) -> Result> { +fn check_columns(columns: Vec) -> Result> { columns .into_iter() .map(|column| { - if column.asc.is_some() { - return Err( - ErrorCode::NotImplemented("asc not supported".into(), None.into()).into(), - ); - } - if column.nulls_first.is_some() { return Err(ErrorCode::NotImplemented( "nulls_first not supported".into(), @@ -326,7 +350,16 @@ fn check_columns(columns: Vec) -> Result> { use risingwave_sqlparser::ast::Expr; if let Expr::Identifier(ident) = column.expr { - Ok::<_, RwError>(ident) + Ok::<(_, _), RwError>(( + ident, + column.asc.map_or(OrderType::Ascending, |x| { + if x { + OrderType::Ascending + } else { + OrderType::Descending + } + }), + )) } else { Err(ErrorCode::NotImplemented( "only identifier is supported for create index".into(), @@ -387,6 +420,17 @@ pub async fn handle_create_index( serde_json::to_string_pretty(&graph).unwrap() ); + let _job_guard = + session + .env() + .creating_streaming_job_tracker() + .guard(CreatingStreamingJobInfo::new( + session.session_id(), + index.database_id, + index.schema_id, + index.name.clone(), + )); + let catalog_writer = session.env().catalog_writer(); catalog_writer .create_index(index, index_table, graph) diff --git a/src/frontend/src/handler/create_mv.rs b/src/frontend/src/handler/create_mv.rs index 052597b70e2f2..ed3b2e762ad2c 100644 --- a/src/frontend/src/handler/create_mv.rs +++ b/src/frontend/src/handler/create_mv.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ use crate::binder::{Binder, BoundQuery, BoundSetExpr}; use crate::handler::HandlerArgs; use crate::optimizer::{OptimizerContext, OptimizerContextRef, PlanRef}; use crate::planner::Planner; +use crate::scheduler::streaming_manager::CreatingStreamingJobInfo; use crate::session::SessionImpl; use crate::stream_fragmenter::build_graph; @@ -145,6 +146,17 @@ pub async fn handle_create_mv( (table, graph) }; + let _job_guard = + session + .env() + .creating_streaming_job_tracker() + .guard(CreatingStreamingJobInfo::new( + session.session_id(), + table.database_id, + table.schema_id, + table.name.clone(), + )); + let catalog_writer = session.env().catalog_writer(); catalog_writer .create_materialized_view(table, graph) @@ -180,11 +192,12 @@ pub mod tests { use std::collections::HashMap; use pgwire::pg_response::StatementType::CREATE_MATERIALIZED_VIEW; - use risingwave_common::catalog::{DEFAULT_DATABASE_NAME, DEFAULT_SCHEMA_NAME}; + use risingwave_common::catalog::{ + row_id_column_name, DEFAULT_DATABASE_NAME, DEFAULT_SCHEMA_NAME, + }; use risingwave_common::types::DataType; use crate::catalog::root_catalog::SchemaPath; - use crate::catalog::row_id_column_name; use crate::test_utils::{create_proto_file, LocalFrontend, PROTO_FILE_DATA}; #[tokio::test] diff --git a/src/frontend/src/handler/create_schema.rs b/src/frontend/src/handler/create_schema.rs index 37c5310dae77b..c2de94cfa5878 100644 --- a/src/frontend/src/handler/create_schema.rs +++ b/src/frontend/src/handler/create_schema.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs index ac52015f66aec..23a7102670efd 100644 --- a/src/frontend/src/handler/create_sink.rs +++ b/src/frontend/src/handler/create_sink.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,8 +13,9 @@ // limitations under the License. use pgwire::pg_response::{PgResponse, StatementType}; +use risingwave_common::catalog::{DatabaseId, SchemaId, UserId}; use risingwave_common::error::Result; -use risingwave_pb::catalog::{Sink as ProstSink, Table}; +use risingwave_connector::sink::catalog::SinkCatalog; use risingwave_pb::stream_plan::stream_fragment_graph::Parallelism; use risingwave_sqlparser::ast::{ CreateSink, CreateSinkStatement, ObjectName, Query, Select, SelectItem, SetExpr, TableFactor, @@ -26,28 +27,11 @@ use super::RwPgResponse; use crate::binder::Binder; use crate::handler::HandlerArgs; use crate::optimizer::{OptimizerContext, OptimizerContextRef, PlanRef}; +use crate::scheduler::streaming_manager::CreatingStreamingJobInfo; use crate::session::SessionImpl; use crate::stream_fragmenter::build_graph; use crate::Planner; -fn into_sink_prost(table: Table) -> ProstSink { - ProstSink { - id: 0, - schema_id: table.schema_id, - database_id: table.database_id, - name: table.name, - columns: table.columns, - pk: table.pk, - dependent_relations: table.dependent_relations, - distribution_key: table.distribution_key, - stream_key: table.stream_key, - append_only: table.append_only, - properties: table.properties, - owner: table.owner, - definition: table.definition, - } -} - pub fn gen_sink_query_from_name(from_name: ObjectName) -> Result { let table_factor = TableFactor::Table { name: from_name, @@ -77,7 +61,7 @@ pub fn gen_sink_plan( session: &SessionImpl, context: OptimizerContextRef, stmt: CreateSinkStatement, -) -> Result<(PlanRef, ProstSink)> { +) -> Result<(PlanRef, SinkCatalog)> { let db_name = session.database(); let (sink_schema_name, sink_table_name) = Binder::resolve_schema_qualified_name(db_name, stmt.sink_name.clone())?; @@ -109,11 +93,13 @@ pub fn gen_sink_plan( let sink_plan = plan_root.gen_sink_plan(sink_table_name, definition, properties)?; - let sink_catalog_prost = sink_plan - .sink_catalog() - .to_prost(sink_schema_id, sink_database_id); - - let sink_prost = into_sink_prost(sink_catalog_prost); + let sink_desc = sink_plan.sink_desc().clone(); + let sink_catalog = sink_desc.into_catalog( + SchemaId::new(sink_schema_id), + DatabaseId::new(sink_database_id), + UserId::new(session.user_id()), + vec![], + ); let sink_plan: PlanRef = sink_plan.into(); @@ -125,7 +111,7 @@ pub fn gen_sink_plan( ctx.trace(sink_plan.explain_to_string().unwrap()); } - Ok((sink_plan, sink_prost)) + Ok((sink_plan, sink_catalog)) } pub async fn handle_create_sink( @@ -147,8 +133,19 @@ pub async fn handle_create_sink( (sink, graph) }; + let _job_guard = + session + .env() + .creating_streaming_job_tracker() + .guard(CreatingStreamingJobInfo::new( + session.session_id(), + sink.database_id.database_id, + sink.schema_id.schema_id, + sink.name.clone(), + )); + let catalog_writer = session.env().catalog_writer(); - catalog_writer.create_sink(sink, graph).await?; + catalog_writer.create_sink(sink.to_proto(), graph).await?; Ok(PgResponse::empty_result(StatementType::CREATE_SINK)) } diff --git a/src/frontend/src/handler/create_source.rs b/src/frontend/src/handler/create_source.rs index 1da2227eef860..d1dbca29fbc2a 100644 --- a/src/frontend/src/handler/create_source.rs +++ b/src/frontend/src/handler/create_source.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,26 +16,34 @@ use std::collections::HashMap; use itertools::Itertools; use pgwire::pg_response::{PgResponse, StatementType}; -use risingwave_common::catalog::ColumnDesc; +use risingwave_common::catalog::{ + columns_extend, is_column_ids_dedup, ColumnCatalog, ColumnDesc, ROW_ID_COLUMN_ID, +}; use risingwave_common::error::ErrorCode::{self, ProtocolError}; use risingwave_common::error::{Result, RwError}; use risingwave_common::types::DataType; -use risingwave_connector::parser::{AvroParser, ProtobufParser}; +use risingwave_connector::parser::{ + AvroParserConfig, DebeziumAvroParserConfig, ProtobufParserConfig, +}; use risingwave_connector::source::KAFKA_CONNECTOR; use risingwave_pb::catalog::{ - ColumnIndex as ProstColumnIndex, Source as ProstSource, StreamSourceInfo, + ColumnIndex as ProstColumnIndex, Source as ProstSource, StreamSourceInfo, WatermarkDesc, }; use risingwave_pb::plan_common::RowFormatType; -use risingwave_sqlparser::ast::{AvroSchema, CreateSourceStatement, ProtobufSchema, SourceSchema}; +use risingwave_sqlparser::ast::{ + AvroSchema, CreateSourceStatement, DebeziumAvroSchema, ProtobufSchema, SourceSchema, + SourceWatermark, +}; use super::create_table::bind_sql_table_constraints; use super::RwPgResponse; use crate::binder::Binder; -use crate::catalog::column_catalog::ColumnCatalog; -use crate::catalog::{ColumnId, ROW_ID_COLUMN_ID}; +use crate::catalog::ColumnId; +use crate::expr::Expr; use crate::handler::create_table::{bind_sql_columns, ColumnIdGenerator}; use crate::handler::HandlerArgs; use crate::optimizer::plan_node::KAFKA_TIMESTAMP_COLUMN_NAME; +use crate::session::SessionImpl; pub(crate) const UPSTREAM_SOURCE_KEY: &str = "connector"; @@ -44,10 +52,10 @@ async fn extract_avro_table_schema( schema: &AvroSchema, with_properties: HashMap, ) -> Result> { - let parser = AvroParser::new( + let parser = AvroParserConfig::new( + &with_properties, schema.row_schema_location.0.as_str(), schema.use_schema_registry, - with_properties, ) .await?; let vec_column_desc = parser.map_to_columns()?; @@ -60,16 +68,33 @@ async fn extract_avro_table_schema( .collect_vec()) } +async fn extract_debezium_avro_table_schema( + schema: &DebeziumAvroSchema, + with_properties: HashMap, +) -> Result> { + let parser = + DebeziumAvroParserConfig::new(&with_properties, schema.row_schema_location.0.as_str()) + .await?; + let vec_column_desc = parser.map_to_columns()?; + Ok(vec_column_desc + .into_iter() + .map(|col| ColumnCatalog { + column_desc: col.into(), + is_hidden: false, + }) + .collect_vec()) +} + /// Map a protobuf schema to a relational schema. async fn extract_protobuf_table_schema( schema: &ProtobufSchema, with_properties: HashMap, ) -> Result> { - let parser = ProtobufParser::new( + let parser = ProtobufParserConfig::new( + &with_properties, &schema.row_schema_location.0, &schema.message_name.0, schema.use_schema_registry, - with_properties, ) .await?; let column_descs = parser.map_to_columns()?; @@ -136,7 +161,8 @@ pub(crate) async fn resolve_source_schema( ))); } - columns.extend( + columns_extend( + columns, extract_protobuf_table_schema(protobuf_schema, with_properties.clone()).await?, ); @@ -165,7 +191,10 @@ pub(crate) async fn resolve_source_schema( ))); } - columns.extend(extract_avro_table_schema(avro_schema, with_properties.clone()).await?); + columns_extend( + columns, + extract_avro_table_schema(avro_schema, with_properties.clone()).await?, + ); StreamSourceInfo { row_format: RowFormatType::Avro as i32, @@ -232,6 +261,52 @@ pub(crate) async fn resolve_source_schema( csv_has_header: csv_info.has_header, ..Default::default() }, + + SourceSchema::Native => StreamSourceInfo { + row_format: RowFormatType::Native as i32, + ..Default::default() + }, + + SourceSchema::DebeziumAvro(avro_schema) => { + if row_id_index.is_some() { + return Err(RwError::from(ProtocolError( + "Primary key must be specified when creating table with row format + debezium_avro." + .to_string(), + ))); + } + + if columns.len() != pk_column_ids.len() { + return Err(RwError::from(ProtocolError( + "User can only specify primary key columns when creating table with row + format debezium_avro." + .to_string(), + ))); + } + + let mut full_columns = + extract_debezium_avro_table_schema(avro_schema, with_properties.clone()).await?; + + for pk_column in columns.iter() { + let index = full_columns + .iter() + .position(|c| c.column_desc.name == pk_column.column_desc.name) + .ok_or_else(|| { + RwError::from(ProtocolError(format!( + "pk column {} not exists", + pk_column.column_desc.name + ))) + })?; + let _ = full_columns.remove(index); + } + + columns_extend(columns, full_columns); + StreamSourceInfo { + row_format: RowFormatType::DebeziumAvro as i32, + row_schema_location: avro_schema.row_schema_location.0.clone(), + ..Default::default() + } + } }; Ok(source_info) @@ -255,10 +330,44 @@ fn check_and_add_timestamp_column( } } +fn bind_source_watermark( + session: &SessionImpl, + name: String, + source_watermarks: Vec, + column_catalogs: &[ColumnCatalog], +) -> Result> { + let mut binder = Binder::new(session); + binder.bind_columns_to_context(name.clone(), column_catalogs.to_vec())?; + + let watermark_descs = source_watermarks + .into_iter() + .map(|source_watermark| { + let col_name = source_watermark.column.real_value(); + let watermark_idx = binder.get_column_binding_index(name.clone(), &col_name)?; + + let expr = binder.bind_expr(source_watermark.expr)?.to_expr_proto(); + + Ok::<_, RwError>(WatermarkDesc { + watermark_idx: watermark_idx as u32, + expr: Some(expr), + }) + }) + .try_collect()?; + Ok(watermark_descs) +} + pub async fn handle_create_source( handler_args: HandlerArgs, stmt: CreateSourceStatement, ) -> Result { + let session = handler_args.session.clone(); + + session.check_relation_name_duplicated(stmt.source_name.clone())?; + + let db_name = session.database(); + let (schema_name, name) = Binder::resolve_schema_qualified_name(db_name, stmt.source_name)?; + let (database_id, schema_id) = session.get_database_and_schema_id_for_create(schema_name)?; + let with_properties = handler_args.with_options.inner().clone(); let mut col_id_gen = ColumnIdGenerator::new_initial(); @@ -268,8 +377,11 @@ pub async fn handle_create_source( check_and_add_timestamp_column(&with_properties, &mut column_descs, &mut col_id_gen); - let (mut columns, pk_column_ids, row_id_index) = - bind_sql_table_constraints(column_descs, pk_column_id_from_columns, stmt.constraints)?; + let (mut columns, pk_column_ids, row_id_index) = bind_sql_table_constraints( + column_descs.clone(), + pk_column_id_from_columns, + stmt.constraints, + )?; if row_id_index.is_none() { return Err(ErrorCode::InvalidInputSyntax( "Source does not support PRIMARY KEY constraint, please use \"CREATE TABLE\" instead" @@ -288,16 +400,15 @@ pub async fn handle_create_source( ) .await?; - let row_id_index = row_id_index.map(|index| ProstColumnIndex { index: index as _ }); - let pk_column_ids = pk_column_ids.into_iter().map(Into::into).collect(); - - let session = handler_args.session.clone(); + debug_assert!(is_column_ids_dedup(&columns)); - session.check_relation_name_duplicated(stmt.source_name.clone())?; + let watermark_descs = + bind_source_watermark(&session, name.clone(), stmt.source_watermarks, &columns)?; + // TODO(yuhao): allow multiple watermark on source. + assert!(watermark_descs.len() <= 1); - let db_name = session.database(); - let (schema_name, name) = Binder::resolve_schema_qualified_name(db_name, stmt.source_name)?; - let (database_id, schema_id) = session.get_database_and_schema_id_for_create(schema_name)?; + let row_id_index = row_id_index.map(|index| ProstColumnIndex { index: index as _ }); + let pk_column_ids = pk_column_ids.into_iter().map(Into::into).collect(); let columns = columns.into_iter().map(|c| c.to_protobuf()).collect_vec(); @@ -312,6 +423,7 @@ pub async fn handle_create_source( properties: with_properties, info: Some(source_info), owner: session.user_id(), + watermark_descs, }; let catalog_writer = session.env().catalog_writer(); @@ -324,11 +436,12 @@ pub async fn handle_create_source( pub mod tests { use std::collections::HashMap; - use risingwave_common::catalog::{DEFAULT_DATABASE_NAME, DEFAULT_SCHEMA_NAME}; + use risingwave_common::catalog::{ + row_id_column_name, DEFAULT_DATABASE_NAME, DEFAULT_SCHEMA_NAME, + }; use risingwave_common::types::DataType; use crate::catalog::root_catalog::SchemaPath; - use crate::catalog::row_id_column_name; use crate::test_utils::{create_proto_file, LocalFrontend, PROTO_FILE_DATA}; #[tokio::test] diff --git a/src/frontend/src/handler/create_table.rs b/src/frontend/src/handler/create_table.rs index 603433203c251..4b2e27b47237a 100644 --- a/src/frontend/src/handler/create_table.rs +++ b/src/frontend/src/handler/create_table.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,7 +18,9 @@ use std::rc::Rc; use fixedbitset::FixedBitSet; use itertools::Itertools; use pgwire::pg_response::{PgResponse, StatementType}; -use risingwave_common::catalog::ColumnDesc; +use risingwave_common::catalog::{ + ColumnCatalog, ColumnDesc, TableVersionId, INITIAL_TABLE_VERSION_ID, USER_COLUMN_ID_OFFSET, +}; use risingwave_common::error::{ErrorCode, Result}; use risingwave_pb::catalog::{ ColumnIndex as ProstColumnIndex, Source as ProstSource, StreamSourceInfo, Table as ProstTable, @@ -31,9 +33,8 @@ use risingwave_sqlparser::ast::{ use super::create_source::resolve_source_schema; use super::RwPgResponse; use crate::binder::{bind_data_type, bind_struct_field}; -use crate::catalog::column_catalog::ColumnCatalog; use crate::catalog::table_catalog::TableVersion; -use crate::catalog::{check_valid_column_name, ColumnId, USER_COLUMN_ID_OFFSET}; +use crate::catalog::{check_valid_column_name, ColumnId}; use crate::handler::create_source::UPSTREAM_SOURCE_KEY; use crate::handler::HandlerArgs; use crate::optimizer::plan_node::LogicalSource; @@ -42,14 +43,6 @@ use crate::optimizer::{OptimizerContext, OptimizerContextRef, PlanRef, PlanRoot} use crate::stream_fragmenter::build_graph; use crate::{Binder, TableCatalog, WithOptions}; -#[derive(PartialEq, Clone, Debug)] -pub enum DmlFlag { - /// used for `create table` - All, - /// used for `create table with (append_only = true)` - AppendOnly, -} - /// Column ID generator for a new table or a new version of an existing table to alter. #[derive(Debug)] pub struct ColumnIdGenerator { @@ -68,7 +61,7 @@ pub struct ColumnIdGenerator { /// /// For a new table, this is 0. For altering an existing table, this is the **next** version ID /// of the `version_id` field in the original table catalog. - pub version_id: u64, + pub version_id: TableVersionId, } impl ColumnIdGenerator { @@ -94,7 +87,7 @@ impl ColumnIdGenerator { Self { existing: HashMap::new(), next_column_id: ColumnId::from(USER_COLUMN_ID_OFFSET), - version_id: 0, + version_id: INITIAL_TABLE_VERSION_ID, } } @@ -401,6 +394,7 @@ fn gen_table_plan_inner( properties: context.with_options().inner().clone(), info: Some(source_info), owner: session.user_id(), + watermark_descs: vec![], }); let source_catalog = source.as_ref().map(|source| Rc::new((source).into())); @@ -434,20 +428,21 @@ fn gen_table_plan_inner( out_names, ); - // Handle pk conflict in materialize executor only when the table is not append-only. - let (handle_pk_conflict, dml_flag) = if context.with_options().append_only() { - (false, DmlFlag::AppendOnly) - } else { - (true, DmlFlag::All) - }; + let append_only = context.with_options().append_only(); + + if append_only && row_id_index.is_none() { + return Err(ErrorCode::InvalidInputSyntax( + "PRIMARY KEY constraint can not be appiled on a append only table.".to_owned(), + ) + .into()); + } let materialize = plan_root.gen_table_plan( name, columns, definition, - handle_pk_conflict, row_id_index, - dml_flag, + append_only, version, )?; @@ -541,12 +536,13 @@ pub fn check_create_table_with_source( mod tests { use std::collections::HashMap; - use risingwave_common::catalog::{Field, DEFAULT_DATABASE_NAME, DEFAULT_SCHEMA_NAME}; + use risingwave_common::catalog::{ + row_id_column_name, Field, DEFAULT_DATABASE_NAME, DEFAULT_SCHEMA_NAME, + }; use risingwave_common::types::DataType; use super::*; use crate::catalog::root_catalog::SchemaPath; - use crate::catalog::row_id_column_name; use crate::test_utils::LocalFrontend; #[test] diff --git a/src/frontend/src/handler/create_table_as.rs b/src/frontend/src/handler/create_table_as.rs index 4054d40989d55..fb53d539b34b5 100644 --- a/src/frontend/src/handler/create_table_as.rs +++ b/src/frontend/src/handler/create_table_as.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ use risingwave_pb::stream_plan::stream_fragment_graph::Parallelism; use risingwave_sqlparser::ast::{ColumnDef, ObjectName, Query, Statement}; use super::{HandlerArgs, RwPgResponse}; -use crate::binder::{BoundSetExpr, BoundStatement}; +use crate::binder::BoundStatement; use crate::handler::create_table::{gen_create_table_plan_without_bind, ColumnIdGenerator}; use crate::handler::query::handle_query; use crate::{build_graph, Binder, OptimizerContext}; @@ -55,16 +55,6 @@ pub async fn handle_create_as( let mut binder = Binder::new(&session); let bound = binder.bind(Statement::Query(query.clone()))?; if let BoundStatement::Query(query) = bound { - // Check if all expressions have an alias - if let BoundSetExpr::Select(select) = &query.body { - if select.aliases.iter().any(Option::is_none) { - return Err(ErrorCode::BindError( - "An alias must be specified for an expression".to_string(), - ) - .into()); - } - } - let mut col_id_gen = ColumnIdGenerator::new_initial(); // Create ColumnCatelog by Field @@ -89,6 +79,7 @@ pub async fn handle_create_as( .into()); } + // Override column name if it specified in creaet statement. columns.iter().enumerate().for_each(|(idx, column)| { column_descs[idx].name = column.name.real_value(); }); @@ -129,5 +120,5 @@ pub async fn handle_create_as( returning: vec![], }; - handle_query(handler_args, insert, false).await + handle_query(handler_args, insert, vec![]).await } diff --git a/src/frontend/src/handler/create_user.rs b/src/frontend/src/handler/create_user.rs index 25ad2af7c6b8f..04df58e48e3eb 100644 --- a/src/frontend/src/handler/create_user.rs +++ b/src/frontend/src/handler/create_user.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,12 +15,13 @@ use pgwire::pg_response::{PgResponse, StatementType}; use risingwave_common::error::ErrorCode::PermissionDenied; use risingwave_common::error::Result; -use risingwave_pb::user::UserInfo; +use risingwave_pb::user::grant_privilege::{Action, ActionWithGrantOption, Object}; +use risingwave_pb::user::{GrantPrivilege, UserInfo}; use risingwave_sqlparser::ast::{CreateUserStatement, UserOption, UserOptions}; use super::RwPgResponse; use crate::binder::Binder; -use crate::catalog::CatalogError; +use crate::catalog::{CatalogError, DatabaseId}; use crate::handler::HandlerArgs; use crate::user::user_authentication::encrypted_password; @@ -28,6 +29,7 @@ fn make_prost_user_info( user_name: String, options: &UserOptions, session_user: &UserInfo, + database_id: DatabaseId, ) -> Result { if !session_user.is_super { let require_super = options @@ -45,10 +47,22 @@ fn make_prost_user_info( } } + // Since we don't have concept of PUBLIC group yet, here we simply grant new user with CONNECT + // action of session database. + let grant_privileges = vec![GrantPrivilege { + action_with_opts: vec![ActionWithGrantOption { + action: Action::Connect as i32, + with_grant_option: true, + granted_by: session_user.id, + }], + object: Some(Object::DatabaseId(database_id)), + }]; + let mut user_info = UserInfo { name: user_name, // the LOGIN option is implied if it is not explicitly specified. can_login: true, + grant_privileges, ..Default::default() }; @@ -85,6 +99,13 @@ pub async fn handle_create_user( stmt: CreateUserStatement, ) -> Result { let session = handler_args.session; + let database_id = { + let catalog_reader = session.env().catalog_reader().read_guard(); + catalog_reader + .get_database_by_name(session.database()) + .expect("session database should exist") + .id() + }; let user_info = { let user_name = Binder::resolve_user_name(stmt.user_name)?; let user_reader = session.env().user_info_reader().read_guard(); @@ -96,7 +117,7 @@ pub async fn handle_create_user( .get_user_by_name(session.user_name()) .ok_or_else(|| CatalogError::NotFound("user", session.user_name().to_string()))?; - make_prost_user_info(user_name, &stmt.with_options, session_user)? + make_prost_user_info(user_name, &stmt.with_options, session_user, database_id)? }; let user_info_writer = session.env().user_info_writer(); diff --git a/src/frontend/src/handler/create_view.rs b/src/frontend/src/handler/create_view.rs index aeba5e2b26cd0..9884ffb7db480 100644 --- a/src/frontend/src/handler/create_view.rs +++ b/src/frontend/src/handler/create_view.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,9 +16,9 @@ use std::collections::HashSet; -use itertools::Itertools; use pgwire::pg_response::{PgResponse, StatementType}; use risingwave_common::error::Result; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_pb::catalog::View as ProstView; use risingwave_sqlparser::ast::{Ident, ObjectName, Query, Statement}; @@ -71,7 +71,7 @@ pub async fn handle_create_view( schema .fields() .iter() - .zip_eq(columns) + .zip_eq_fast(columns) .map(|(f, c)| { let mut field = f.clone(); field.name = c.real_value(); diff --git a/src/frontend/src/handler/describe.rs b/src/frontend/src/handler/describe.rs index c2c1ca1d496f2..25497bc08c215 100644 --- a/src/frontend/src/handler/describe.rs +++ b/src/frontend/src/handler/describe.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -35,11 +35,43 @@ pub fn handle_describe(handler_args: HandlerArgs, table_name: ObjectName) -> Res let mut binder = Binder::new(&session); let relation = binder.bind_relation_by_name(table_name.clone(), None)?; // For Source, it doesn't have table catalog so use get source to get column descs. - let (columns, indices): (Vec, Vec>) = { - let (catalogs, indices) = match relation { - Relation::Source(s) => (s.catalog.columns, vec![]), - Relation::BaseTable(t) => (t.table_catalog.columns, t.table_indexes), - Relation::SystemTable(t) => (t.sys_table_catalog.columns, vec![]), + let (columns, pk_columns, indices): (Vec, Vec, Vec>) = { + let (column_catalogs, pk_column_catalogs, indices) = match relation { + Relation::Source(s) => { + let pk_column_catalogs = s + .catalog + .pk_col_ids + .iter() + .map(|&column_id| { + s.catalog + .columns + .iter() + .filter(|x| x.column_id() == column_id) + .exactly_one() + .unwrap() + .clone() + }) + .collect_vec(); + (s.catalog.columns, pk_column_catalogs, vec![]) + } + Relation::BaseTable(t) => { + let pk_column_catalogs = t + .table_catalog + .pk() + .iter() + .map(|idx| t.table_catalog.columns[idx.index].clone()) + .collect_vec(); + (t.table_catalog.columns, pk_column_catalogs, t.table_indexes) + } + Relation::SystemTable(t) => { + let pk_column_catalogs = t + .sys_table_catalog + .pk + .iter() + .map(|idx| t.sys_table_catalog.columns[*idx].clone()) + .collect_vec(); + (t.sys_table_catalog.columns, pk_column_catalogs, vec![]) + } _ => { return Err( CatalogError::NotFound("table or source", table_name.to_string()).into(), @@ -47,11 +79,15 @@ pub fn handle_describe(handler_args: HandlerArgs, table_name: ObjectName) -> Res } }; ( - catalogs + column_catalogs .iter() .filter(|c| !c.is_hidden) .map(|c| c.column_desc.clone()) .collect(), + pk_column_catalogs + .iter() + .map(|c| c.column_desc.clone()) + .collect(), indices, ) }; @@ -59,6 +95,20 @@ pub fn handle_describe(handler_args: HandlerArgs, table_name: ObjectName) -> Res // Convert all column descs to rows let mut rows = col_descs_to_rows(columns); + // Convert primary key to rows + if !pk_columns.is_empty() { + rows.push(Row::new(vec![ + Some("primary key".into()), + Some( + format!( + "{}", + display_comma_separated(&pk_columns.into_iter().map(|x| x.name).collect_vec()), + ) + .into(), + ), + ])); + } + // Convert all indexes to rows rows.extend(indices.iter().map(|index| { let index_table = index.index_table.clone(); @@ -149,7 +199,7 @@ mod tests { async fn test_describe_handler() { let frontend = LocalFrontend::new(Default::default()).await; frontend - .run_sql("create table t (v1 int, v2 int);") + .run_sql("create table t (v1 int, v2 int, v3 int primary key, v4 int);") .await .unwrap(); @@ -180,7 +230,10 @@ mod tests { let expected_columns: HashMap = maplit::hashmap! { "v1".into() => "Int32".into(), "v2".into() => "Int32".into(), - "idx1".into() => "index(v1, v2) distributed by(v1, v2)".into(), + "v3".into() => "Int32".into(), + "v4".into() => "Int32".into(), + "primary key".into() => "v3".into(), + "idx1".into() => "index(v1, v2, v3) include(v4) distributed by(v1, v2)".into(), }; assert_eq!(columns, expected_columns); diff --git a/src/frontend/src/handler/drop_database.rs b/src/frontend/src/handler/drop_database.rs index 2a7a1103e869a..a2417d5059232 100644 --- a/src/frontend/src/handler/drop_database.rs +++ b/src/frontend/src/handler/drop_database.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/handler/drop_function.rs b/src/frontend/src/handler/drop_function.rs index 99849a339ceb9..4ae58f447d624 100644 --- a/src/frontend/src/handler/drop_function.rs +++ b/src/frontend/src/handler/drop_function.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/handler/drop_index.rs b/src/frontend/src/handler/drop_index.rs index 9ba36cbc4f1a2..ff49347671b55 100644 --- a/src/frontend/src/handler/drop_index.rs +++ b/src/frontend/src/handler/drop_index.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/handler/drop_mv.rs b/src/frontend/src/handler/drop_mv.rs index 5ff6391e74ec0..34f10f64ebeca 100644 --- a/src/frontend/src/handler/drop_mv.rs +++ b/src/frontend/src/handler/drop_mv.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/handler/drop_schema.rs b/src/frontend/src/handler/drop_schema.rs index f262e809ddd16..b789e2f9fedeb 100644 --- a/src/frontend/src/handler/drop_schema.rs +++ b/src/frontend/src/handler/drop_schema.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/handler/drop_sink.rs b/src/frontend/src/handler/drop_sink.rs index adb5cfa70321b..81af96449991a 100644 --- a/src/frontend/src/handler/drop_sink.rs +++ b/src/frontend/src/handler/drop_sink.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -56,7 +56,7 @@ pub async fn handle_drop_sink( }; let catalog_writer = session.env().catalog_writer(); - catalog_writer.drop_sink(sink_id).await?; + catalog_writer.drop_sink(sink_id.sink_id).await?; Ok(PgResponse::empty_result(StatementType::DROP_SINK)) } diff --git a/src/frontend/src/handler/drop_source.rs b/src/frontend/src/handler/drop_source.rs index 22a757da498a0..4383e451e4a02 100644 --- a/src/frontend/src/handler/drop_source.rs +++ b/src/frontend/src/handler/drop_source.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/handler/drop_table.rs b/src/frontend/src/handler/drop_table.rs index f699e972074c1..3f019775490a6 100644 --- a/src/frontend/src/handler/drop_table.rs +++ b/src/frontend/src/handler/drop_table.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/handler/drop_user.rs b/src/frontend/src/handler/drop_user.rs index e0671fde1ff4b..aac3aea064e42 100644 --- a/src/frontend/src/handler/drop_user.rs +++ b/src/frontend/src/handler/drop_user.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/handler/drop_view.rs b/src/frontend/src/handler/drop_view.rs index 216e69549b7bc..48c0a9973bc07 100644 --- a/src/frontend/src/handler/drop_view.rs +++ b/src/frontend/src/handler/drop_view.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/handler/explain.rs b/src/frontend/src/handler/explain.rs index 9dd799e5ee29a..c41c69f5239ec 100644 --- a/src/frontend/src/handler/explain.rs +++ b/src/frontend/src/handler/explain.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,7 +23,8 @@ use super::create_index::gen_create_index_plan; use super::create_mv::gen_create_mv_plan; use super::create_sink::gen_sink_plan; use super::create_table::{ - check_create_table_with_source, gen_create_table_plan, ColumnIdGenerator, + check_create_table_with_source, gen_create_table_plan, gen_create_table_plan_with_source, + ColumnIdGenerator, }; use super::query::gen_batch_query_plan; use super::RwPgResponse; @@ -34,7 +35,7 @@ use crate::scheduler::BatchPlanFragmenter; use crate::stream_fragmenter::build_graph; use crate::utils::explain_stream_graph; -pub fn handle_explain( +pub async fn handle_explain( handler_args: HandlerArgs, stmt: Statement, options: ExplainOptions, @@ -48,140 +49,145 @@ pub fn handle_explain( let session = context.session_ctx().clone(); - let plan = match stmt { - Statement::CreateView { - or_replace: false, - materialized: true, - query, - name, - columns, - .. - } => gen_create_mv_plan(&session, context.into(), *query, name, columns)?.0, + let mut plan_fragmenter = None; + let mut rows = { + let plan = match stmt { + Statement::CreateView { + or_replace: false, + materialized: true, + query, + name, + columns, + .. + } => gen_create_mv_plan(&session, context.into(), *query, name, columns)?.0, - Statement::CreateSink { stmt } => gen_sink_plan(&session, context.into(), stmt)?.0, + Statement::CreateSink { stmt } => gen_sink_plan(&session, context.into(), stmt)?.0, - Statement::CreateTable { - name, - columns, - constraints, - source_schema, - .. - } => match check_create_table_with_source(&handler_args.with_options, source_schema)? { - Some(_) => { - return Err(ErrorCode::NotImplemented( - "explain create table with a connector".to_string(), - None.into(), - ) - .into()) - } - None => { - gen_create_table_plan( - context, - name, - columns, - constraints, - ColumnIdGenerator::new_initial(), - )? - .0 - } - }, + Statement::CreateTable { + name, + columns, + constraints, + source_schema, + .. + } => match check_create_table_with_source(&handler_args.with_options, source_schema)? { + Some(s) => { + gen_create_table_plan_with_source( + context, + name, + columns, + constraints, + s, + ColumnIdGenerator::new_initial(), + ) + .await? + .0 + } + None => { + gen_create_table_plan( + context, + name, + columns, + constraints, + ColumnIdGenerator::new_initial(), + )? + .0 + } + }, - Statement::CreateIndex { - name, - table_name, - columns, - include, - distributed_by, - .. - } => { - gen_create_index_plan( - &session, - context.into(), + Statement::CreateIndex { name, table_name, columns, include, distributed_by, - )? - .0 - } - - Statement::CreateSource { .. } => { - return Err(ErrorCode::NotImplemented( - "explain create source".to_string(), - 4776.into(), - ) - .into()); - } + .. + } => { + gen_create_index_plan( + &session, + context.into(), + name, + table_name, + columns, + include, + distributed_by, + )? + .0 + } - stmt => gen_batch_query_plan(&session, context.into(), stmt)?.0, - }; + stmt => gen_batch_query_plan(&session, context.into(), stmt)?.0, + }; - let ctx = plan.plan_base().ctx.clone(); - let explain_trace = ctx.is_explain_trace(); - let explain_verbose = ctx.is_explain_verbose(); + let ctx = plan.plan_base().ctx.clone(); + let explain_trace = ctx.is_explain_trace(); + let explain_verbose = ctx.is_explain_verbose(); - let mut rows = if explain_trace { - let trace = ctx.take_trace(); - trace - .iter() - .flat_map(|s| s.lines()) - .map(|s| Row::new(vec![Some(s.to_string().into())])) - .collect::>() - } else { - vec![] - }; + let mut rows = if explain_trace { + let trace = ctx.take_trace(); + trace + .iter() + .flat_map(|s| s.lines()) + .map(|s| Row::new(vec![Some(s.to_string().into())])) + .collect::>() + } else { + vec![] + }; - match options.explain_type { - ExplainType::DistSql => match plan.convention() { - Convention::Logical => unreachable!(), - Convention::Batch => { - let plan_fragmenter = BatchPlanFragmenter::new( - session.env().worker_node_manager_ref(), - session.env().catalog_reader().clone(), - ); - let query = plan_fragmenter.split(plan)?; - let stage_graph_json = serde_json::to_string_pretty(&query.stage_graph).unwrap(); - rows.extend( - vec![stage_graph_json] - .iter() - .flat_map(|s| s.lines()) - .map(|s| Row::new(vec![Some(s.to_string().into())])), - ); - } - Convention::Stream => { - let graph = build_graph(plan); - rows.extend( - explain_stream_graph(&graph, explain_verbose)? - .lines() - .map(|s| Row::new(vec![Some(s.to_string().into())])), - ); - } - }, - ExplainType::Physical => { - // if explain trace is open, the plan has been in the rows - if !explain_trace { - let output = plan.explain_to_string()?; - rows.extend( - output - .lines() - .map(|s| Row::new(vec![Some(s.to_string().into())])), - ); + match options.explain_type { + ExplainType::DistSql => match plan.convention() { + Convention::Logical => unreachable!(), + Convention::Batch => { + plan_fragmenter = Some(BatchPlanFragmenter::new( + session.env().worker_node_manager_ref(), + session.env().catalog_reader().clone(), + plan, + )?); + } + Convention::Stream => { + let graph = build_graph(plan); + rows.extend( + explain_stream_graph(&graph, explain_verbose)? + .lines() + .map(|s| Row::new(vec![Some(s.to_string().into())])), + ); + } + }, + ExplainType::Physical => { + // if explain trace is open, the plan has been in the rows + if !explain_trace { + let output = plan.explain_to_string()?; + rows.extend( + output + .lines() + .map(|s| Row::new(vec![Some(s.to_string().into())])), + ); + } } - } - ExplainType::Logical => { - // if explain trace is open, the plan has been in the rows - if !explain_trace { - let output = plan.ctx().take_logical().ok_or_else(|| { - ErrorCode::InternalError("Logical plan not found for query".into()) - })?; - rows.extend( - output - .lines() - .map(|s| Row::new(vec![Some(s.to_string().into())])), - ); + ExplainType::Logical => { + // if explain trace is open, the plan has been in the rows + if !explain_trace { + let output = plan.ctx().take_logical().ok_or_else(|| { + ErrorCode::InternalError("Logical plan not found for query".into()) + })?; + rows.extend( + output + .lines() + .map(|s| Row::new(vec![Some(s.to_string().into())])), + ); + } } } + rows + }; + + if let Some(plan_fragmenter) = plan_fragmenter { + let query = plan_fragmenter.generate_complete_query().await?; + let stage_graph_json = serde_json::to_string_pretty(&query.stage_graph).unwrap(); + rows.extend( + vec![stage_graph_json] + .iter() + .flat_map(|s| s.lines()) + .map(|s| Row::new(vec![Some(s.to_string().into())])), + ); } Ok(PgResponse::new_for_stream( diff --git a/src/frontend/src/handler/flush.rs b/src/frontend/src/handler/flush.rs index 2bd67c6bb8b29..5ef8b51ca80b8 100644 --- a/src/frontend/src/handler/flush.rs +++ b/src/frontend/src/handler/flush.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/handler/handle_privilege.rs b/src/frontend/src/handler/handle_privilege.rs index 0313fe83a2ab7..b677f28b11cbe 100644 --- a/src/frontend/src/handler/handle_privilege.rs +++ b/src/frontend/src/handler/handle_privilege.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -98,10 +98,11 @@ fn make_prost_privilege( grant_objs.push(ProstObject::AllTablesSchemaId(schema.id())); } } - _ => { - return Err(ErrorCode::BindError( - "GRANT statement does not support this object type".to_string(), - ) + o => { + return Err(ErrorCode::BindError(format!( + "GRANT statement does not support object type: {:?}", + o + )) .into()); } }; @@ -241,10 +242,16 @@ mod tests { .await .unwrap(); - let database_id = { + let (session_database_id, database_id) = { let catalog_reader = session.env().catalog_reader(); let reader = catalog_reader.read_guard(); - reader.get_database_by_name("db1").unwrap().id() + ( + reader + .get_database_by_name(session.database()) + .unwrap() + .id(), + reader.get_database_by_name("db1").unwrap().id(), + ) }; { @@ -253,21 +260,31 @@ mod tests { let user_info = reader.get_user_by_name("user1").unwrap(); assert_eq!( user_info.grant_privileges, - vec![ProstPrivilege { - action_with_opts: vec![ - ActionWithGrantOption { + vec![ + ProstPrivilege { + action_with_opts: vec![ActionWithGrantOption { action: Action::Connect as i32, with_grant_option: true, - granted_by: DEFAULT_SUPER_USER_ID, - }, - ActionWithGrantOption { - action: Action::Create as i32, - with_grant_option: true, - granted_by: DEFAULT_SUPER_USER_ID, - } - ], - object: Some(ProstObject::DatabaseId(database_id)), - }] + granted_by: session.user_id(), + }], + object: Some(ProstObject::DatabaseId(session_database_id)), + }, + ProstPrivilege { + action_with_opts: vec![ + ActionWithGrantOption { + action: Action::Connect as i32, + with_grant_option: true, + granted_by: DEFAULT_SUPER_USER_ID, + }, + ActionWithGrantOption { + action: Action::Create as i32, + with_grant_option: true, + granted_by: DEFAULT_SUPER_USER_ID, + } + ], + object: Some(ProstObject::DatabaseId(database_id)), + } + ] ); } @@ -282,6 +299,7 @@ mod tests { assert!(user_info .grant_privileges .iter() + .filter(|gp| gp.object == Some(ProstObject::DatabaseId(database_id))) .all(|p| p.action_with_opts.iter().all(|ao| !ao.with_grant_option))); } @@ -293,7 +311,18 @@ mod tests { let user_reader = session.env().user_info_reader(); let reader = user_reader.read_guard(); let user_info = reader.get_user_by_name("user1").unwrap(); - assert!(user_info.grant_privileges.is_empty()); + assert_eq!( + user_info.grant_privileges, + vec![ProstPrivilege { + action_with_opts: vec![ActionWithGrantOption { + action: Action::Connect as i32, + with_grant_option: true, + granted_by: session.user_id(), + }], + object: Some(ProstObject::DatabaseId(session_database_id)), + }] + ); } + frontend.run_sql("DROP USER user1").await.unwrap(); } } diff --git a/src/frontend/src/handler/mod.rs b/src/frontend/src/handler/mod.rs index 56747b2a710ef..5a95c796e4471 100644 --- a/src/frontend/src/handler/mod.rs +++ b/src/frontend/src/handler/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ use futures::{Stream, StreamExt}; use pgwire::pg_response::StatementType::{ABORT, BEGIN, COMMIT, ROLLBACK, START_TRANSACTION}; use pgwire::pg_response::{PgResponse, RowSetResult}; use pgwire::pg_server::BoxedError; -use pgwire::types::Row; +use pgwire::types::{Format, Row}; use risingwave_common::error::{ErrorCode, Result}; use risingwave_sqlparser::ast::*; @@ -30,6 +30,7 @@ use crate::scheduler::{DistributedQueryStream, LocalQueryStream}; use crate::session::SessionImpl; use crate::utils::WithOptions; +mod alter_system; mod alter_table; pub mod alter_user; mod create_database; @@ -42,7 +43,7 @@ pub mod create_source; pub mod create_table; pub mod create_table_as; pub mod create_user; -mod create_view; +pub mod create_view; mod describe; mod drop_database; pub mod drop_function; @@ -151,7 +152,7 @@ pub async fn handle( session: Arc, stmt: Statement, sql: &str, - format: bool, + formats: Vec, ) -> Result { session.clear_cancel_query_flag(); let handler_args = HandlerArgs::new(session, &stmt, sql)?; @@ -160,7 +161,7 @@ pub async fn handle( statement, analyze, options, - } => explain::handle_explain(handler_args, *statement, options, analyze), + } => explain::handle_explain(handler_args, *statement, options, analyze).await, Statement::CreateSource { stmt } => { create_source::handle_create_source(handler_args, stmt).await } @@ -307,7 +308,7 @@ pub async fn handle( Statement::Query(_) | Statement::Insert { .. } | Statement::Delete { .. } - | Statement::Update { .. } => query::handle_query(handler_args, stmt, format).await, + | Statement::Update { .. } => query::handle_query(handler_args, stmt, formats).await, Statement::CreateView { materialized, name, @@ -316,6 +317,7 @@ pub async fn handle( with_options: _, // It is put in OptimizerContext or_replace, // not supported + emit_mode, } => { if or_replace { return Err(ErrorCode::NotImplemented( @@ -324,6 +326,13 @@ pub async fn handle( ) .into()); } + if emit_mode == Some(EmitMode::OnWindowClose) { + return Err(ErrorCode::NotImplemented( + "CREATE MATERIALIZED VIEW EMIT ON WINDOW CLOSE".to_string(), + None.into(), + ) + .into()); + } if materialized { create_mv::handle_create_mv(handler_args, name, *query, columns).await } else { @@ -336,7 +345,7 @@ pub async fn handle( variable, value, } => variable::handle_set(handler_args, variable, value), - Statement::ShowVariable { variable } => variable::handle_show(handler_args, variable), + Statement::ShowVariable { variable } => variable::handle_show(handler_args, variable).await, Statement::CreateIndex { name, table_name, @@ -367,6 +376,9 @@ pub async fn handle( name, operation: AlterTableOperation::AddColumn { column_def }, } => alter_table::handle_add_column(handler_args, name, column_def).await, + Statement::AlterSystem { param, value } => { + alter_system::handle_alter_system(handler_args, param, value).await + } // Ignore `StartTransaction` and `BEGIN`,`Abort`,`Rollback`,`Commit`temporarily.Its not // final implementation. // 1. Fully support transaction is too hard and gives few benefits to us. diff --git a/src/frontend/src/handler/privilege.rs b/src/frontend/src/handler/privilege.rs index 604137ab0a47d..e144990898f1f 100644 --- a/src/frontend/src/handler/privilege.rs +++ b/src/frontend/src/handler/privilege.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/handler/query.rs b/src/frontend/src/handler/query.rs index 76d02f4d6e1b7..361ec520859c0 100644 --- a/src/frontend/src/handler/query.rs +++ b/src/frontend/src/handler/query.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ use futures::StreamExt; use itertools::Itertools; use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_response::{PgResponse, StatementType}; +use pgwire::types::Format; use postgres_types::FromSql; use risingwave_common::catalog::Schema; use risingwave_common::error::{ErrorCode, Result, RwError}; @@ -93,7 +94,7 @@ pub fn gen_batch_query_plan( pub async fn handle_query( handler_args: HandlerArgs, stmt: Statement, - format: bool, + formats: Vec, ) -> Result { let stmt_type = to_statement_type(&stmt)?; let session = handler_args.session.clone(); @@ -102,7 +103,7 @@ pub async fn handle_query( let mut notice = String::new(); // Subblock to make sure PlanRef (an Rc) is dropped before `await` below. - let (query, query_mode, output_schema) = { + let (plan_fragmenter, query_mode, output_schema) = { let context = OptimizerContext::from_handler_args(handler_args); let (plan, query_mode, schema) = gen_batch_query_plan(&session, context.into(), stmt)?; @@ -115,11 +116,12 @@ pub async fn handle_query( let plan_fragmenter = BatchPlanFragmenter::new( session.env().worker_node_manager_ref(), session.env().catalog_reader().clone(), - ); - let query = plan_fragmenter.split(plan)?; + plan, + )?; context.append_notice(&mut notice); - (query, query_mode, schema) + (plan_fragmenter, query_mode, schema) }; + let query = plan_fragmenter.generate_complete_query().await?; tracing::trace!("Generated query after plan fragmenter: {:?}", &query); let pg_descs = output_schema @@ -133,6 +135,9 @@ pub async fn handle_query( .map(|f| f.data_type()) .collect_vec(); + // Used in counting row count. + let first_field_format = formats.first().copied().unwrap_or(Format::Text); + let mut row_stream = { let query_epoch = session.config().get_query_epoch(); let query_snapshot = if let Some(query_epoch) = query_epoch { @@ -149,7 +154,7 @@ pub async fn handle_query( QueryMode::Local => PgResponseStream::LocalQuery(DataChunkToRowSetAdapter::new( local_execute(session.clone(), query, query_snapshot).await?, column_types, - format, + formats, session.clone(), )), // Local mode do not support cancel tasks. @@ -157,7 +162,7 @@ pub async fn handle_query( PgResponseStream::DistributedQuery(DataChunkToRowSetAdapter::new( distribute_execute(session.clone(), query, query_snapshot).await?, column_types, - format, + formats, session.clone(), )) } @@ -169,16 +174,23 @@ pub async fn handle_query( | StatementType::INSERT_RETURNING | StatementType::DELETE_RETURNING | StatementType::UPDATE_RETURNING => None, + StatementType::INSERT | StatementType::DELETE | StatementType::UPDATE => { - let first_row_set = row_stream - .next() - .await - .expect("compute node should return affected rows in output") - .map_err(|err| RwError::from(ErrorCode::InternalError(format!("{}", err))))?; + let first_row_set = row_stream.next().await; + let first_row_set = match first_row_set { + None => { + return Err(RwError::from(ErrorCode::InternalError( + "no affected rows in output".to_string(), + ))) + } + Some(row) => { + row.map_err(|err| RwError::from(ErrorCode::InternalError(format!("{}", err))))? + } + }; let affected_rows_str = first_row_set[0].values()[0] .as_ref() .expect("compute node should return affected rows in output"); - if format { + if let Format::Binary = first_field_format { Some( i64::from_sql(&postgres_types::Type::INT8, affected_rows_str) .unwrap() @@ -197,30 +209,34 @@ pub async fn handle_query( _ => unreachable!(), }; - // Implicitly flush the writes. - // FIXME(bugen): the DMLs with `RETURNING` clause is done only after the `row_stream` is fully - // consumed, so implicitly flushing here doesn't work. - if session.config().get_implicit_flush() { - flush_for_write(&session, stmt_type).await?; - } + // We need to do some post work after the query is finished and before the `Complete` response + // it sent. This is achieved by the `callback` in `PgResponse`. + let callback = async move { + // Implicitly flush the writes. + if session.config().get_implicit_flush() && stmt_type.is_dml() { + do_flush(&session).await?; + } - // update some metrics - if query_mode == QueryMode::Local { - session - .env() - .frontend_metrics - .latency_local_execution - .observe(query_start_time.elapsed().as_secs_f64()); + // update some metrics + if query_mode == QueryMode::Local { + session + .env() + .frontend_metrics + .latency_local_execution + .observe(query_start_time.elapsed().as_secs_f64()); - session - .env() - .frontend_metrics - .query_counter_local_execution - .inc(); - } + session + .env() + .frontend_metrics + .query_counter_local_execution + .inc(); + } + + Ok(()) + }; - Ok(PgResponse::new_for_stream_with_notice( - stmt_type, rows_count, row_stream, pg_descs, notice, + Ok(PgResponse::new_for_stream_extra( + stmt_type, rows_count, row_stream, pg_descs, notice, callback, )) } @@ -289,10 +305,3 @@ pub async fn local_execute( Ok(execution.stream_rows()) } - -pub async fn flush_for_write(session: &SessionImpl, stmt_type: StatementType) -> Result<()> { - if stmt_type.is_dml() { - do_flush(session).await?; - } - Ok(()) -} diff --git a/src/frontend/src/handler/show.rs b/src/frontend/src/handler/show.rs index d83499662b6d8..5d87b3cc02a58 100644 --- a/src/frontend/src/handler/show.rs +++ b/src/frontend/src/handler/show.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/handler/util.rs b/src/frontend/src/handler/util.rs index 86a309766107c..2fc8d00af2b5e 100644 --- a/src/frontend/src/handler/util.rs +++ b/src/frontend/src/handler/util.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,13 +22,14 @@ use itertools::Itertools; use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_response::RowSetResult; use pgwire::pg_server::BoxedError; -use pgwire::types::Row; +use pgwire::types::{Format, FormatIterator, Row}; use pin_project_lite::pin_project; use risingwave_common::array::DataChunk; use risingwave_common::catalog::{ColumnDesc, Field}; -use risingwave_common::error::Result as RwResult; +use risingwave_common::error::{ErrorCode, Result as RwResult}; use risingwave_common::row::Row as _; use risingwave_common::types::{DataType, ScalarRefImpl}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_expr::vector_op::timestamptz::timestamptz_to_string; use crate::session::SessionImpl; @@ -47,7 +48,7 @@ pin_project! { #[pin] chunk_stream: VS, column_types: Vec, - format: bool, + formats: Vec, session_data: StaticSessionData, } } @@ -64,7 +65,7 @@ where pub fn new( chunk_stream: VS, column_types: Vec, - format: bool, + formats: Vec, session: Arc, ) -> Self { let session_data = StaticSessionData { @@ -73,7 +74,7 @@ where Self { chunk_stream, column_types, - format, + formats, session_data, } } @@ -92,7 +93,7 @@ where Poll::Ready(chunk) => match chunk { Some(chunk_result) => match chunk_result { Ok(chunk) => Poll::Ready(Some( - to_pg_rows(this.column_types, chunk, *this.format, this.session_data) + to_pg_rows(this.column_types, chunk, this.formats, this.session_data) .map_err(|err| err.into()), )), Err(err) => Poll::Ready(Some(Err(err))), @@ -107,19 +108,20 @@ where fn pg_value_format( data_type: &DataType, d: ScalarRefImpl<'_>, - format: bool, + format: Format, session_data: &StaticSessionData, ) -> RwResult { // format == false means TEXT format // format == true means BINARY format - if !format { - if *data_type == DataType::Timestamptz { - Ok(timestamptz_to_string_with_session_data(d, session_data)) - } else { - Ok(d.text_format(data_type).into()) + match format { + Format::Text => { + if *data_type == DataType::Timestamptz { + Ok(timestamptz_to_string_with_session_data(d, session_data)) + } else { + Ok(d.text_format(data_type).into()) + } } - } else { - d.binary_format(data_type) + Format::Binary => d.binary_format(data_type), } } @@ -140,16 +142,21 @@ fn timestamptz_to_string_with_session_data( fn to_pg_rows( column_types: &[DataType], chunk: DataChunk, - format: bool, + formats: &[Format], session_data: &StaticSessionData, ) -> RwResult> { + assert_eq!(chunk.dimension(), column_types.len()); + chunk .rows() .map(|r| { + let format_iter = FormatIterator::new(formats, chunk.dimension()) + .map_err(ErrorCode::InternalError)?; let row = r .iter() - .zip_eq(column_types) - .map(|(data, t)| match data { + .zip_eq_fast(column_types) + .zip_eq_fast(format_iter) + .map(|((data, t), format)| match data { Some(data) => Some(pg_value_format(t, data, format, session_data)).transpose(), None => Ok(None), }) @@ -190,6 +197,8 @@ pub fn to_pg_field(f: &Field) -> PgFieldDescriptor { #[cfg(test)] mod tests { + use bytes::BytesMut; + use postgres_types::{ToSql, Type}; use risingwave_common::array::*; use super::*; @@ -222,7 +231,7 @@ mod tests { DataType::Varchar, ], chunk, - false, + &[], &static_session, ); let expected: Vec>> = vec![ @@ -250,6 +259,50 @@ mod tests { assert_eq!(vec, expected); } + #[test] + fn test_to_pg_rows_mix_format() { + let chunk = DataChunk::from_pretty( + "i I f T + 1 6 6.01 aaa + ", + ); + let static_session = StaticSessionData { + timezone: "UTC".into(), + }; + let rows = to_pg_rows( + &[ + DataType::Int32, + DataType::Int64, + DataType::Float32, + DataType::Varchar, + ], + chunk, + &[Format::Binary, Format::Binary, Format::Binary, Format::Text], + &static_session, + ); + let mut raw_params = vec![BytesMut::new(); 3]; + 1_i32.to_sql(&Type::ANY, &mut raw_params[0]).unwrap(); + 6_i64.to_sql(&Type::ANY, &mut raw_params[1]).unwrap(); + 6.01_f32.to_sql(&Type::ANY, &mut raw_params[2]).unwrap(); + let raw_params = raw_params + .into_iter() + .map(|b| b.freeze()) + .collect::>(); + let expected: Vec>> = vec![vec![ + Some(raw_params[0].clone()), + Some(raw_params[1].clone()), + Some(raw_params[2].clone()), + Some("aaa".into()), + ]]; + let vec = rows + .unwrap() + .into_iter() + .map(|r| r.values().iter().cloned().collect_vec()) + .collect_vec(); + + assert_eq!(vec, expected); + } + #[test] fn test_value_format() { use {DataType as T, ScalarRefImpl as S}; @@ -258,29 +311,43 @@ mod tests { }; let f = |t, d, f| pg_value_format(t, d, f, &static_session).unwrap(); - assert_eq!(&f(&T::Float32, S::Float32(1_f32.into()), false), "1"); - assert_eq!(&f(&T::Float32, S::Float32(f32::NAN.into()), false), "NaN"); - assert_eq!(&f(&T::Float64, S::Float64(f64::NAN.into()), false), "NaN"); + assert_eq!(&f(&T::Float32, S::Float32(1_f32.into()), Format::Text), "1"); + assert_eq!( + &f(&T::Float32, S::Float32(f32::NAN.into()), Format::Text), + "NaN" + ); + assert_eq!( + &f(&T::Float64, S::Float64(f64::NAN.into()), Format::Text), + "NaN" + ); assert_eq!( - &f(&T::Float32, S::Float32(f32::INFINITY.into()), false), + &f(&T::Float32, S::Float32(f32::INFINITY.into()), Format::Text), "Infinity" ); assert_eq!( - &f(&T::Float32, S::Float32(f32::NEG_INFINITY.into()), false), + &f( + &T::Float32, + S::Float32(f32::NEG_INFINITY.into()), + Format::Text + ), "-Infinity" ); assert_eq!( - &f(&T::Float64, S::Float64(f64::INFINITY.into()), false), + &f(&T::Float64, S::Float64(f64::INFINITY.into()), Format::Text), "Infinity" ); assert_eq!( - &f(&T::Float64, S::Float64(f64::NEG_INFINITY.into()), false), + &f( + &T::Float64, + S::Float64(f64::NEG_INFINITY.into()), + Format::Text + ), "-Infinity" ); - assert_eq!(&f(&T::Boolean, S::Bool(true), false), "t"); - assert_eq!(&f(&T::Boolean, S::Bool(false), false), "f"); + assert_eq!(&f(&T::Boolean, S::Bool(true), Format::Text), "t"); + assert_eq!(&f(&T::Boolean, S::Bool(false), Format::Text), "f"); assert_eq!( - &f(&T::Timestamptz, S::Int64(-1), false), + &f(&T::Timestamptz, S::Int64(-1), Format::Text), "1969-12-31 23:59:59.999999+00:00" ); } diff --git a/src/frontend/src/handler/variable.rs b/src/frontend/src/handler/variable.rs index 80871a73d6ae0..ae5d8e232f7da 100644 --- a/src/frontend/src/handler/variable.rs +++ b/src/frontend/src/handler/variable.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -48,10 +48,17 @@ pub fn handle_set( Ok(PgResponse::empty_result(StatementType::SET_OPTION)) } -pub(super) fn handle_show(handler_args: HandlerArgs, variable: Vec) -> Result { - let config_reader = handler_args.session.config(); +pub(super) async fn handle_show( + handler_args: HandlerArgs, + variable: Vec, +) -> Result { // TODO: Verify that the name used in `show` command is indeed always case-insensitive. let name = variable.iter().map(|e| e.real_value()).join(" "); + if name.eq_ignore_ascii_case("PARAMETERS") { + return handle_show_system_params(handler_args).await; + } + // Show session config. + let config_reader = handler_args.session.config(); if name.eq_ignore_ascii_case("ALL") { return handle_show_all(handler_args.clone()); } @@ -69,7 +76,7 @@ pub(super) fn handle_show(handler_args: HandlerArgs, variable: Vec) -> Re )) } -pub(super) fn handle_show_all(handler_args: HandlerArgs) -> Result { +fn handle_show_all(handler_args: HandlerArgs) -> Result { let config_reader = handler_args.session.config(); let all_variables = config_reader.get_all(); @@ -108,3 +115,35 @@ pub(super) fn handle_show_all(handler_args: HandlerArgs) -> Result ], )) } + +async fn handle_show_system_params(handler_args: HandlerArgs) -> Result { + let params = handler_args + .session + .env() + .meta_client() + .get_system_params() + .await?; + let rows = params + .to_kv() + .into_iter() + .map(|(k, v)| Row::new(vec![Some(k.into()), Some(v.into())])) + .collect_vec(); + + Ok(RwPgResponse::new_for_stream( + StatementType::SHOW_COMMAND, + None, + rows.into(), + vec![ + PgFieldDescriptor::new( + "Name".to_string(), + DataType::VARCHAR.to_oid(), + DataType::VARCHAR.type_len(), + ), + PgFieldDescriptor::new( + "Value".to_string(), + DataType::VARCHAR.to_oid(), + DataType::VARCHAR.type_len(), + ), + ], + )) +} diff --git a/src/frontend/src/health_service.rs b/src/frontend/src/health_service.rs index c5164a9633a75..5ff1f4a85f8c6 100644 --- a/src/frontend/src/health_service.rs +++ b/src/frontend/src/health_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/lib.rs b/src/frontend/src/lib.rs index 48406ac6cfa08..9954f6d003775 100644 --- a/src/frontend/src/lib.rs +++ b/src/frontend/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -47,6 +47,7 @@ pub use planner::Planner; mod scheduler; pub mod session; mod stream_fragmenter; +use risingwave_common_proc_macro::OverrideConfig; pub use stream_fragmenter::build_graph; mod utils; pub use utils::{explain_stream_graph, WithOptions}; @@ -65,43 +66,71 @@ use clap::Parser; use pgwire::pg_server::pg_serve; use session::SessionManagerImpl; +/// Command-line arguments for frontend-node. #[derive(Parser, Clone, Debug)] pub struct FrontendOpts { - // TODO: rename to listen_address and separate out the port. - #[clap(long, default_value = "127.0.0.1:4566")] - pub host: String, - - // Optional, we will use listen_address if not specified. - #[clap(long)] - pub client_address: Option, + // TODO: rename to listen_addr and separate out the port. + /// The address that this service listens to. + /// Usually the localhost + desired port. + #[clap( + long, + alias = "host", + env = "RW_LISTEN_ADDR", + default_value = "127.0.0.1:4566" + )] + pub listen_addr: String, + + /// The address for contacting this instance of the service. + /// This would be synonymous with the service's "public address" + /// or "identifying address". + /// Optional, we will use listen_addr if not specified. + #[clap(long, env = "RW_ADVERTISE_ADDR", alias = "client-address")] + pub advertise_addr: Option, // TODO: This is currently unused. - #[clap(long)] + #[clap(long, env = "RW_PORT")] pub port: Option, - #[clap(long, default_value = "http://127.0.0.1:5690")] + /// The address via which we will attempt to connect to a leader meta node. + #[clap(long, env = "RW_META_ADDR", default_value = "http://127.0.0.1:5690")] pub meta_addr: String, - #[clap(long, default_value = "127.0.0.1:2222")] + #[clap( + long, + env = "RW_PROMETHEUS_LISTENER_ADDR", + default_value = "127.0.0.1:2222" + )] pub prometheus_listener_addr: String, - #[clap(long, default_value = "127.0.0.1:6786")] + #[clap( + long, + env = "RW_HEALTH_CHECK_LISTENER_ADDR", + default_value = "127.0.0.1:6786" + )] pub health_check_listener_addr: String, - /// Used for control the metrics level, similar to log level. - /// 0 = close metrics - /// >0 = open metrics - #[clap(long, default_value = "0")] - pub metrics_level: u32, - /// The path of `risingwave.toml` configuration file. /// /// If empty, default configuration values will be used. /// /// Note that internal system parameters should be defined in the configuration file at /// [`risingwave_common::config`] instead of command line arguments. - #[clap(long, default_value = "")] + #[clap(long, env = "RW_CONFIG_PATH", default_value = "")] pub config_path: String, + + #[clap(flatten)] + override_opts: OverrideConfigOpts, +} + +/// Command-line arguments for frontend-node that overrides the config file. +#[derive(Parser, Clone, Debug, OverrideConfig)] +struct OverrideConfigOpts { + /// Used for control the metrics level, similar to log level. + /// 0 = close metrics + /// >0 = open metrics + #[clap(long, env = "RW_METRICS_LEVEL")] + #[override_opts(path = server.metrics_level)] + pub metrics_level: Option, } impl Default for FrontendOpts { @@ -120,8 +149,9 @@ pub fn start(opts: FrontendOpts) -> Pin + Send>> { // WARNING: don't change the function signature. Making it `async fn` will cause // slow compile in release mode. Box::pin(async move { - let session_mgr = Arc::new(SessionManagerImpl::new(&opts).await.unwrap()); - pg_serve(&opts.host, session_mgr, Some(TlsConfig::new_default())) + let listen_addr = opts.listen_addr.clone(); + let session_mgr = Arc::new(SessionManagerImpl::new(opts).await.unwrap()); + pg_serve(&listen_addr, session_mgr, Some(TlsConfig::new_default())) .await .unwrap(); }) diff --git a/src/frontend/src/meta_client.rs b/src/frontend/src/meta_client.rs index 138b27a3b26c6..a7ac0de32f44a 100644 --- a/src/frontend/src/meta_client.rs +++ b/src/frontend/src/meta_client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,10 +15,12 @@ use std::collections::HashMap; use risingwave_pb::backup_service::MetaSnapshotMetadata; +use risingwave_pb::ddl_service::DdlProgress; use risingwave_pb::hummock::HummockSnapshot; use risingwave_pb::meta::list_table_fragments_response::TableFragmentInfo; +use risingwave_pb::meta::CreatingJobInfo; use risingwave_rpc_client::error::Result; -use risingwave_rpc_client::{HummockMetaClient, MetaClient}; +use risingwave_rpc_client::{HummockMetaClient, MetaClient, SystemParamsReader}; /// A wrapper around the `MetaClient` that only provides a minor set of meta rpc. /// Most of the rpc to meta are delegated by other separate structs like `CatalogWriter`, @@ -33,6 +35,8 @@ pub trait FrontendMetaClient: Send + Sync { async fn flush(&self, checkpoint: bool) -> Result; + async fn cancel_creating_jobs(&self, infos: Vec) -> Result<()>; + async fn list_table_fragments( &self, table_ids: &[u32], @@ -43,6 +47,12 @@ pub trait FrontendMetaClient: Send + Sync { async fn unpin_snapshot_before(&self, epoch: u64) -> Result<()>; async fn list_meta_snapshots(&self) -> Result>; + + async fn get_system_params(&self) -> Result; + + async fn set_system_param(&self, param: String, value: Option) -> Result<()>; + + async fn list_ddl_progress(&self) -> Result>; } pub struct FrontendMetaClientImpl(pub MetaClient); @@ -61,6 +71,10 @@ impl FrontendMetaClient for FrontendMetaClientImpl { self.0.flush(checkpoint).await } + async fn cancel_creating_jobs(&self, infos: Vec) -> Result<()> { + self.0.cancel_creating_jobs(infos).await + } + async fn list_table_fragments( &self, table_ids: &[u32], @@ -80,4 +94,17 @@ impl FrontendMetaClient for FrontendMetaClientImpl { let manifest = self.0.get_meta_snapshot_manifest().await?; Ok(manifest.snapshot_metadata) } + + async fn get_system_params(&self) -> Result { + self.0.get_system_params().await + } + + async fn set_system_param(&self, param: String, value: Option) -> Result<()> { + self.0.set_system_param(param, value).await + } + + async fn list_ddl_progress(&self) -> Result> { + let ddl_progress = self.0.get_ddl_progress().await?; + Ok(ddl_progress) + } } diff --git a/src/frontend/src/monitor/mod.rs b/src/frontend/src/monitor/mod.rs index d3bea48c110a1..1b938ddae600f 100644 --- a/src/frontend/src/monitor/mod.rs +++ b/src/frontend/src/monitor/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/monitor/stats.rs b/src/frontend/src/monitor/stats.rs index 2f02bebb2d281..35dd16cde7302 100644 --- a/src/frontend/src/monitor/stats.rs +++ b/src/frontend/src/monitor/stats.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/observer/mod.rs b/src/frontend/src/observer/mod.rs index 554b7bcb73bcd..7e38b6f060d02 100644 --- a/src/frontend/src/observer/mod.rs +++ b/src/frontend/src/observer/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/observer/observer_manager.rs b/src/frontend/src/observer/observer_manager.rs index 3e148050954f5..525064609ed7a 100644 --- a/src/frontend/src/observer/observer_manager.rs +++ b/src/frontend/src/observer/observer_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,11 +16,11 @@ use std::sync::Arc; use parking_lot::RwLock; use risingwave_common::catalog::CatalogVersion; -use risingwave_common::util::compress::decompress_data; +use risingwave_common::hash::ParallelUnitMapping; use risingwave_common_service::observer_manager::{ObserverState, SubscribeFrontend}; use risingwave_pb::common::WorkerNode; use risingwave_pb::meta::subscribe_response::{Info, Operation}; -use risingwave_pb::meta::SubscribeResponse; +use risingwave_pb::meta::{FragmentParallelUnitMapping, SubscribeResponse}; use tokio::sync::watch::Sender; use crate::catalog::root_catalog::Catalog; @@ -121,12 +121,15 @@ impl ObserverState for FrontendObserverNode { snapshot .parallel_unit_mappings .iter() - .map(|mapping| { - ( - mapping.fragment_id, - decompress_data(&mapping.original_indices, &mapping.data), - ) - }) + .map( + |FragmentParallelUnitMapping { + fragment_id, + mapping, + }| { + let mapping = ParallelUnitMapping::from_protobuf(mapping.as_ref().unwrap()); + (*fragment_id, mapping) + }, + ) .collect(), ); self.hummock_snapshot_manager @@ -267,32 +270,30 @@ impl FrontendObserverNode { return; }; match info { - Info::ParallelUnitMapping(parallel_unit_mapping) => match resp.operation() { - Operation::Add => { - let fragment_id = parallel_unit_mapping.fragment_id; - let mapping = decompress_data( - ¶llel_unit_mapping.original_indices, - ¶llel_unit_mapping.data, - ); - self.worker_node_manager - .insert_fragment_mapping(fragment_id, mapping); - } - Operation::Delete => { - let fragment_id = parallel_unit_mapping.fragment_id; - self.worker_node_manager - .remove_fragment_mapping(&fragment_id); - } - Operation::Update => { - let fragment_id = parallel_unit_mapping.fragment_id; - let mapping = decompress_data( - ¶llel_unit_mapping.original_indices, - ¶llel_unit_mapping.data, - ); - self.worker_node_manager - .update_fragment_mapping(fragment_id, mapping); + Info::ParallelUnitMapping(parallel_unit_mapping) => { + let fragment_id = parallel_unit_mapping.fragment_id; + let mapping = || { + ParallelUnitMapping::from_protobuf( + parallel_unit_mapping.mapping.as_ref().unwrap(), + ) + }; + + match resp.operation() { + Operation::Add => { + self.worker_node_manager + .insert_fragment_mapping(fragment_id, mapping()); + } + Operation::Delete => { + self.worker_node_manager + .remove_fragment_mapping(&fragment_id); + } + Operation::Update => { + self.worker_node_manager + .update_fragment_mapping(fragment_id, mapping()); + } + _ => panic!("receive an unsupported notify {:?}", resp), } - _ => panic!("receive an unsupported notify {:?}", resp), - }, + } _ => unreachable!(), } } diff --git a/src/frontend/src/optimizer/delta_join_solver.rs b/src/frontend/src/optimizer/delta_join_solver.rs index 252edf45db911..f65ba78ada901 100644 --- a/src/frontend/src/optimizer/delta_join_solver.rs +++ b/src/frontend/src/optimizer/delta_join_solver.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/heuristic_optimizer.rs b/src/frontend/src/optimizer/heuristic_optimizer.rs index e06f98070e9ce..0ddbe0f96abe4 100644 --- a/src/frontend/src/optimizer/heuristic_optimizer.rs +++ b/src/frontend/src/optimizer/heuristic_optimizer.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/mod.rs b/src/frontend/src/optimizer/mod.rs index 96936f8c3e6ec..181d82a7decbc 100644 --- a/src/frontend/src/optimizer/mod.rs +++ b/src/frontend/src/optimizer/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,15 +23,17 @@ pub use plan_rewriter::PlanRewriter; mod plan_visitor; pub use plan_visitor::PlanVisitor; mod optimizer_context; +mod plan_expr_rewriter; mod rule; - use fixedbitset::FixedBitSet; use itertools::Itertools as _; pub use optimizer_context::*; +use plan_expr_rewriter::ConstEvalRewriter; use plan_rewriter::ShareSourceRewriter; use property::Order; -use risingwave_common::catalog::{Field, Schema}; +use risingwave_common::catalog::{ColumnCatalog, Field, Schema}; use risingwave_common::error::{ErrorCode, Result}; +use risingwave_common::util::iter_util::ZipEqDebug; use self::heuristic_optimizer::{ApplyOrder, HeuristicOptimizer}; use self::plan_node::{ @@ -46,11 +48,10 @@ use self::plan_visitor::{ }; use self::property::RequiredDist; use self::rule::*; -use crate::catalog::column_catalog::ColumnCatalog; use crate::catalog::table_catalog::{TableType, TableVersion}; -use crate::handler::create_table::DmlFlag; use crate::optimizer::plan_node::{ BatchExchange, ColumnPruningContext, PlanNodeType, PlanTreeNode, PredicatePushdownContext, + RewriteExprsRecursive, }; use crate::optimizer::property::Distribution; use crate::utils::Condition; @@ -119,7 +120,7 @@ impl PlanRoot { .out_fields .ones() .map(|i| self.plan.schema().fields()[i].clone()) - .zip_eq(&self.out_names) + .zip_eq_debug(&self.out_names) .map(|(field, name)| Field { name: name.clone(), ..field @@ -194,6 +195,10 @@ impl PlanRoot { /// Apply logical optimization to the plan. pub fn gen_optimized_logical_plan(&self) -> Result { + self.gen_optimized_logical_plan_inner(false) + } + + fn gen_optimized_logical_plan_inner(&self, for_stream: bool) -> Result { let mut plan = self.plan.clone(); let ctx = plan.ctx(); let explain_trace = ctx.is_explain_trace(); @@ -203,9 +208,21 @@ impl PlanRoot { ctx.trace(plan.explain_to_string().unwrap()); } + // If share plan is disable, we need to remove all the share operator generated by the + // binder, e.g. CTE and View. However, we still need to share source to ensure self + // source join can return correct result. + if !ctx.session_ctx().config().get_enable_share_plan() { + plan = self.optimize_by_rules( + plan, + "DAG To Tree".to_string(), + vec![DagToTreeRule::create()], + ApplyOrder::TopDown, + ); + } + // Replace source to share source. - // Perform share source at the beginning so that we can benefit from predicate pushdown and - // column pruning for the share operator. + // Perform share source at the beginning so that we can benefit from predicate pushdown + // and column pruning for the share operator. plan = ShareSourceRewriter::share_source(plan); if explain_trace { ctx.trace("Share Source:"); @@ -316,6 +333,16 @@ impl PlanRoot { ctx.trace(plan.explain_to_string().unwrap()); } + // If for stream, push down predicates with now into a left-semi join + if for_stream { + plan = self.optimize_by_rules( + plan, + "Push down filter with now into a left semijoin".to_string(), + vec![FilterWithNowToJoinRule::create()], + ApplyOrder::TopDown, + ); + } + // Push down the calculation of inputs of join's condition. plan = self.optimize_by_rules( plan, @@ -380,8 +407,9 @@ impl PlanRoot { // merge should be applied before eliminate ProjectMergeRule::create(), ProjectEliminateRule::create(), + TrivialProjectToValuesRule::create(), // project-join merge should be applied after merge - // and eliminate + // eliminate and to values ProjectJoinMergeRule::create(), AggProjectMergeRule::create(), ], @@ -395,6 +423,7 @@ impl PlanRoot { OverAggToTopNRule::create(), ProjectMergeRule::create(), ProjectEliminateRule::create(), + TrivialProjectToValuesRule::create(), ], ApplyOrder::TopDown, ); @@ -435,9 +464,26 @@ impl PlanRoot { ApplyOrder::TopDown, ); + plan = self.optimize_by_rules( + plan, + "Agg on Index".to_string(), + vec![TopNOnIndexRule::create()], + ApplyOrder::TopDown, + ); + // Convert to physical plan node plan = plan.to_batch_with_order_required(&self.required_order)?; + // TODO: SessionTimezone substitution + // Const eval of exprs at the last minute + // plan = const_eval_exprs(plan)?; + + // let ctx = plan.ctx(); + // if ctx.is_explain_trace() { + // ctx.trace("Const eval exprs:"); + // ctx.trace(plan.explain_to_string().unwrap()); + // } + #[cfg(debug_assertions)] InputRefValidator.validate(plan.clone()); assert!(*plan.distribution() == Distribution::Single, "{}", plan); @@ -542,14 +588,18 @@ impl PlanRoot { Ok(plan) } + pub fn gen_optimized_logical_plan_for_stream(&self) -> Result { + self.gen_optimized_logical_plan_inner(true) + } + /// Generate create index or create materialize view plan. fn gen_stream_plan(&mut self) -> Result { let ctx = self.plan.ctx(); let explain_trace = ctx.is_explain_trace(); - let plan = match self.plan.convention() { + let mut plan = match self.plan.convention() { Convention::Logical => { - let plan = self.gen_optimized_logical_plan()?; + let plan = self.gen_optimized_logical_plan_for_stream()?; let (plan, out_col_change) = plan.logical_rewrite_for_stream(&mut Default::default())?; @@ -575,14 +625,24 @@ impl PlanRoot { ctx.trace(plan.explain_to_string().unwrap()); } - // TODO: enable delta join - // // Rewrite joins with index to delta join - // plan = self.optimize_by_rules( - // plan, - // "To IndexDeltaJoin".to_string(), - // vec![IndexDeltaJoinRule::create()], - // ApplyOrder::BottomUp, - // ); + if ctx.session_ctx().config().get_streaming_enable_delta_join() { + // TODO: make it a logical optimization. + // Rewrite joins with index to delta join + plan = self.optimize_by_rules( + plan, + "To IndexDeltaJoin".to_string(), + vec![IndexDeltaJoinRule::create()], + ApplyOrder::BottomUp, + ); + } + + // Const eval of exprs at the last minute + // plan = const_eval_exprs(plan)?; + + // if ctx.is_explain_trace() { + // ctx.trace("Const eval exprs:"); + // ctx.trace(plan.explain_to_string().unwrap()); + // } #[cfg(debug_assertions)] InputRefValidator.validate(plan.clone()); @@ -597,9 +657,8 @@ impl PlanRoot { table_name: String, columns: Vec, definition: String, - handle_pk_conflict: bool, row_id_index: Option, - dml_flag: DmlFlag, + append_only: bool, version: Option, ) -> Result { let mut stream_plan = self.gen_stream_plan()?; @@ -607,7 +666,7 @@ impl PlanRoot { // Add DML node. stream_plan = StreamDml::new( stream_plan, - dml_flag == DmlFlag::AppendOnly, + append_only, columns.iter().map(|c| c.column_desc.clone()).collect(), ) .into(); @@ -623,7 +682,7 @@ impl PlanRoot { self.required_order.clone(), columns, definition, - handle_pk_conflict, + !append_only, row_id_index, version, ) @@ -650,7 +709,11 @@ impl PlanRoot { } /// Optimize and generate a create index plan. - pub fn gen_index_plan(&mut self, index_name: String) -> Result { + pub fn gen_index_plan( + &mut self, + index_name: String, + definition: String, + ) -> Result { let stream_plan = self.gen_stream_plan()?; StreamMaterialize::create( @@ -660,7 +723,7 @@ impl PlanRoot { self.required_order.clone(), self.out_fields.clone(), self.out_names.clone(), - "".into(), // TODO: fill definition here for `SHOW CREATE` + definition, TableType::Index, ) } @@ -685,7 +748,7 @@ impl PlanRoot { // Note: we first plan it like a materialized view, and then rewrite it into a sink. TableType::MaterializedView, ) - .map(|plan| plan.rewrite_into_sink(properties)) + .and_then(|plan| plan.rewrite_into_sink(properties)) } /// Set the plan root's required dist. @@ -694,6 +757,17 @@ impl PlanRoot { } } +#[allow(dead_code)] +fn const_eval_exprs(plan: PlanRef) -> Result { + let mut const_eval_rewriter = ConstEvalRewriter { error: None }; + + let plan = plan.rewrite_exprs_recursive(&mut const_eval_rewriter); + if let Some(error) = const_eval_rewriter.error { + return Err(error); + } + Ok(plan) +} + #[cfg(test)] mod tests { use risingwave_common::catalog::Field; diff --git a/src/frontend/src/optimizer/optimizer_context.rs b/src/frontend/src/optimizer/optimizer_context.rs index ed5b50145d060..e4c9ed1101dca 100644 --- a/src/frontend/src/optimizer/optimizer_context.rs +++ b/src/frontend/src/optimizer/optimizer_context.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -46,6 +46,8 @@ pub struct OptimizerContext { with_options: WithOptions, /// Store the Session Timezone and whether it was used. session_timezone: RefCell, + /// Store expr display id. + next_expr_display_id: RefCell, } pub type OptimizerContextRef = Rc; @@ -73,6 +75,7 @@ impl OptimizerContext { next_correlated_id: RefCell::new(0), with_options: handler_args.with_options, session_timezone, + next_expr_display_id: RefCell::new(0), } } @@ -91,6 +94,7 @@ impl OptimizerContext { next_correlated_id: RefCell::new(0), with_options: Default::default(), session_timezone: RefCell::new(SessionTimezone::new("UTC".into())), + next_expr_display_id: RefCell::new(0), } .into() } @@ -100,6 +104,11 @@ impl OptimizerContext { PlanNodeId(*self.next_plan_node_id.borrow()) } + pub fn next_expr_display_id(&self) -> usize { + *self.next_expr_display_id.borrow_mut() += 1; + *self.next_expr_display_id.borrow() + } + pub fn next_correlated_id(&self) -> CorrelatedId { *self.next_correlated_id.borrow_mut() += 1; *self.next_correlated_id.borrow() diff --git a/src/frontend/src/optimizer/plan_expr_rewriter/const_eval_rewriter.rs b/src/frontend/src/optimizer/plan_expr_rewriter/const_eval_rewriter.rs new file mode 100644 index 0000000000000..7fdfa1857713d --- /dev/null +++ b/src/frontend/src/optimizer/plan_expr_rewriter/const_eval_rewriter.rs @@ -0,0 +1,50 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_common::error::RwError; + +use crate::expr::{Expr, ExprImpl, ExprRewriter, Literal}; + +pub(crate) struct ConstEvalRewriter { + pub(crate) error: Option, +} +impl ExprRewriter for ConstEvalRewriter { + fn rewrite_expr(&mut self, expr: ExprImpl) -> ExprImpl { + if self.error.is_some() { + return expr; + } + if expr.is_const() { + let data_type = expr.return_type(); + match expr.eval_row_const() { + Ok(datum) => Literal::new(datum, data_type).into(), + Err(e) => { + self.error = Some(e); + expr + } + } + } else { + match expr { + ExprImpl::InputRef(inner) => self.rewrite_input_ref(*inner), + ExprImpl::Literal(inner) => self.rewrite_literal(*inner), + ExprImpl::FunctionCall(inner) => self.rewrite_function_call(*inner), + ExprImpl::AggCall(inner) => self.rewrite_agg_call(*inner), + ExprImpl::Subquery(inner) => self.rewrite_subquery(*inner), + ExprImpl::CorrelatedInputRef(inner) => self.rewrite_correlated_input_ref(*inner), + ExprImpl::TableFunction(inner) => self.rewrite_table_function(*inner), + ExprImpl::WindowFunction(inner) => self.rewrite_window_function(*inner), + ExprImpl::UserDefinedFunction(inner) => self.rewrite_user_defined_function(*inner), + } + } + } +} diff --git a/src/frontend/src/optimizer/plan_expr_rewriter/mod.rs b/src/frontend/src/optimizer/plan_expr_rewriter/mod.rs new file mode 100644 index 0000000000000..4c2df9169315f --- /dev/null +++ b/src/frontend/src/optimizer/plan_expr_rewriter/mod.rs @@ -0,0 +1,16 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod const_eval_rewriter; +pub(crate) use const_eval_rewriter::ConstEvalRewriter; diff --git a/src/frontend/src/optimizer/plan_node/batch_delete.rs b/src/frontend/src/optimizer/plan_node/batch_delete.rs index 1264da1193e00..7e697c1fb6879 100644 --- a/src/frontend/src/optimizer/plan_node/batch_delete.rs +++ b/src/frontend/src/optimizer/plan_node/batch_delete.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,12 +14,14 @@ use std::fmt; +use risingwave_common::catalog::INITIAL_TABLE_VERSION_ID; use risingwave_common::error::Result; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::DeleteNode; use super::{ - LogicalDelete, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch, + ExprRewritable, LogicalDelete, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, + ToDistributedBatch, }; use crate::optimizer::plan_node::ToLocalBatch; use crate::optimizer::property::{Distribution, Order, RequiredDist}; @@ -74,6 +76,7 @@ impl ToBatchProst for BatchDelete { fn to_batch_prost_body(&self) -> NodeBody { NodeBody::Delete(DeleteNode { table_id: self.logical.table_id().table_id(), + table_version_id: INITIAL_TABLE_VERSION_ID, // TODO: use correct version id returning: self.logical.has_returning(), }) } @@ -86,3 +89,5 @@ impl ToLocalBatch for BatchDelete { Ok(self.clone_with_input(new_input).into()) } } + +impl ExprRewritable for BatchDelete {} diff --git a/src/frontend/src/optimizer/plan_node/batch_exchange.rs b/src/frontend/src/optimizer/plan_node/batch_exchange.rs index dbc401f0113c3..aabd57b898979 100644 --- a/src/frontend/src/optimizer/plan_node/batch_exchange.rs +++ b/src/frontend/src/optimizer/plan_node/batch_exchange.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,7 +18,9 @@ use risingwave_common::error::Result; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::{ExchangeNode, MergeSortExchangeNode}; -use super::{PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch}; +use super::{ + ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch, +}; use crate::optimizer::plan_node::ToLocalBatch; use crate::optimizer::property::{Distribution, DistributionDisplay, Order, OrderDisplay}; @@ -98,3 +100,5 @@ impl ToLocalBatch for BatchExchange { unreachable!() } } + +impl ExprRewritable for BatchExchange {} diff --git a/src/frontend/src/optimizer/plan_node/batch_expand.rs b/src/frontend/src/optimizer/plan_node/batch_expand.rs index 5c54f5f80f93b..7f6232a0bf921 100644 --- a/src/frontend/src/optimizer/plan_node/batch_expand.rs +++ b/src/frontend/src/optimizer/plan_node/batch_expand.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ use risingwave_pb::batch_plan::expand_node::Subset; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::ExpandNode; +use super::ExprRewritable; use crate::optimizer::plan_node::{ LogicalExpand, PlanBase, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch, ToLocalBatch, }; @@ -99,3 +100,5 @@ impl ToLocalBatch for BatchExpand { Ok(self.clone_with_input(new_input).into()) } } + +impl ExprRewritable for BatchExpand {} diff --git a/src/frontend/src/optimizer/plan_node/batch_filter.rs b/src/frontend/src/optimizer/plan_node/batch_filter.rs index 517f8aae858c6..c92634c2713e1 100644 --- a/src/frontend/src/optimizer/plan_node/batch_filter.rs +++ b/src/frontend/src/optimizer/plan_node/batch_filter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,8 +19,10 @@ use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::FilterNode; use super::generic::GenericPlanRef; -use super::{LogicalFilter, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch}; -use crate::expr::{Expr, ExprImpl}; +use super::{ + ExprRewritable, LogicalFilter, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch, +}; +use crate::expr::{Expr, ExprImpl, ExprRewriter}; use crate::optimizer::plan_node::{PlanBase, ToLocalBatch}; use crate::utils::Condition; @@ -93,3 +95,20 @@ impl ToLocalBatch for BatchFilter { Ok(self.clone_with_input(new_input).into()) } } + +impl ExprRewritable for BatchFilter { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_filter() + .unwrap() + .clone(), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/batch_group_topn.rs b/src/frontend/src/optimizer/plan_node/batch_group_topn.rs index 97af7cc93088f..ac9ad13c5c99a 100644 --- a/src/frontend/src/optimizer/plan_node/batch_group_topn.rs +++ b/src/frontend/src/optimizer/plan_node/batch_group_topn.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,7 +18,10 @@ use risingwave_common::error::Result; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::GroupTopNNode; -use super::{LogicalTopN, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch}; +use super::{ + ExprRewritable, LogicalTopN, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, + ToDistributedBatch, +}; use crate::optimizer::plan_node::ToLocalBatch; use crate::optimizer::property::{Order, RequiredDist}; @@ -95,3 +98,5 @@ impl ToLocalBatch for BatchGroupTopN { Ok(self.clone_with_input(input).into()) } } + +impl ExprRewritable for BatchGroupTopN {} diff --git a/src/frontend/src/optimizer/plan_node/batch_hash_agg.rs b/src/frontend/src/optimizer/plan_node/batch_hash_agg.rs index 2a88ed78fd6cf..55cebf0190072 100644 --- a/src/frontend/src/optimizer/plan_node/batch_hash_agg.rs +++ b/src/frontend/src/optimizer/plan_node/batch_hash_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,8 +19,12 @@ use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::HashAggNode; use super::generic::{GenericPlanRef, PlanAggCall}; -use super::{LogicalAgg, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch}; -use crate::optimizer::plan_node::ToLocalBatch; +use super::{ + ExprRewritable, LogicalAgg, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, + ToDistributedBatch, +}; +use crate::expr::ExprRewriter; +use crate::optimizer::plan_node::{BatchExchange, ToLocalBatch}; use crate::optimizer::property::{Distribution, Order, RequiredDist}; #[derive(Debug, Clone)] @@ -75,7 +79,40 @@ impl ToDistributedBatch for BatchHashAgg { &Order::any(), &RequiredDist::shard_by_key(self.input().schema().len(), self.group_key()), )?; - Ok(self.clone_with_input(new_input).into()) + if self.logical.can_two_phase_agg() && self.logical.two_phase_agg_forced() { + // partial agg + let partial_agg: PlanRef = self.clone_with_input(new_input).into(); + + // insert exchange + let exchange = BatchExchange::new( + partial_agg, + Order::any(), + Distribution::HashShard((0..self.group_key().len()).collect()), + ) + .into(); + + // insert total agg + let total_agg_types = self + .logical + .agg_calls() + .iter() + .enumerate() + .map(|(partial_output_idx, agg_call)| { + agg_call.partial_to_total_agg_call( + partial_output_idx + self.group_key().len(), + false, + ) + }) + .collect(); + let total_agg_logical = LogicalAgg::new( + total_agg_types, + (0..self.group_key().len()).collect(), + exchange, + ); + Ok(BatchHashAgg::new(total_agg_logical).into()) + } else { + Ok(self.clone_with_input(new_input).into()) + } } } @@ -107,3 +144,20 @@ impl ToLocalBatch for BatchHashAgg { Ok(self.clone_with_input(new_input).into()) } } + +impl ExprRewritable for BatchHashAgg { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_agg() + .unwrap() + .clone(), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/batch_hash_join.rs b/src/frontend/src/optimizer/plan_node/batch_hash_join.rs index fffbaeb215a1e..77fdef9ffefeb 100644 --- a/src/frontend/src/optimizer/plan_node/batch_hash_join.rs +++ b/src/frontend/src/optimizer/plan_node/batch_hash_join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,10 +22,10 @@ use risingwave_pb::plan_common::JoinType; use super::generic::GenericPlanRef; use super::{ - EqJoinPredicate, LogicalJoin, PlanBase, PlanRef, PlanTreeNodeBinary, ToBatchProst, - ToDistributedBatch, + EqJoinPredicate, ExprRewritable, LogicalJoin, PlanBase, PlanRef, PlanTreeNodeBinary, + ToBatchProst, ToDistributedBatch, }; -use crate::expr::Expr; +use crate::expr::{Expr, ExprRewriter}; use crate::optimizer::plan_node::utils::IndicesDisplay; use crate::optimizer::plan_node::{EqJoinPredicateDisplay, ToLocalBatch}; use crate::optimizer::property::{Distribution, Order, RequiredDist}; @@ -262,3 +262,21 @@ impl ToLocalBatch for BatchHashJoin { Ok(self.clone_with_left_right(left, right).into()) } } + +impl ExprRewritable for BatchHashJoin { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_join() + .unwrap() + .clone(), + self.eq_join_predicate.rewrite_exprs(r), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/batch_hop_window.rs b/src/frontend/src/optimizer/plan_node/batch_hop_window.rs index ad5d88a87c036..dfd460642fc02 100644 --- a/src/frontend/src/optimizer/plan_node/batch_hop_window.rs +++ b/src/frontend/src/optimizer/plan_node/batch_hop_window.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,7 +19,8 @@ use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::HopWindowNode; use super::{ - LogicalHopWindow, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch, + ExprRewritable, LogicalHopWindow, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, + ToDistributedBatch, }; use crate::optimizer::plan_node::ToLocalBatch; use crate::optimizer::property::{Order, RequiredDist}; @@ -117,3 +118,5 @@ impl ToLocalBatch for BatchHopWindow { Ok(self.clone_with_input(new_input).into()) } } + +impl ExprRewritable for BatchHopWindow {} diff --git a/src/frontend/src/optimizer/plan_node/batch_insert.rs b/src/frontend/src/optimizer/plan_node/batch_insert.rs index ac778c17415a3..7668aa91a2b5b 100644 --- a/src/frontend/src/optimizer/plan_node/batch_insert.rs +++ b/src/frontend/src/optimizer/plan_node/batch_insert.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,12 +14,15 @@ use std::fmt; +use risingwave_common::catalog::INITIAL_TABLE_VERSION_ID; use risingwave_common::error::Result; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::InsertNode; use risingwave_pb::catalog::ColumnIndex; -use super::{LogicalInsert, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch}; +use super::{ + ExprRewritable, LogicalInsert, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch, +}; use crate::optimizer::plan_node::{PlanBase, ToLocalBatch}; use crate::optimizer::property::{Distribution, Order, RequiredDist}; @@ -80,6 +83,7 @@ impl ToBatchProst for BatchInsert { .collect(); NodeBody::Insert(InsertNode { table_id: self.logical.table_id().table_id(), + table_version_id: INITIAL_TABLE_VERSION_ID, // TODO: use correct version id column_indices, row_id_index: self .logical @@ -97,3 +101,5 @@ impl ToLocalBatch for BatchInsert { Ok(self.clone_with_input(new_input).into()) } } + +impl ExprRewritable for BatchInsert {} diff --git a/src/frontend/src/optimizer/plan_node/batch_limit.rs b/src/frontend/src/optimizer/plan_node/batch_limit.rs index 4a1415807c119..4682f169b2358 100644 --- a/src/frontend/src/optimizer/plan_node/batch_limit.rs +++ b/src/frontend/src/optimizer/plan_node/batch_limit.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,7 +18,10 @@ use risingwave_common::error::Result; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::LimitNode; -use super::{LogicalLimit, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch}; +use super::{ + ExprRewritable, LogicalLimit, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, + ToDistributedBatch, +}; use crate::optimizer::plan_node::ToLocalBatch; use crate::optimizer::property::{Order, RequiredDist}; @@ -46,8 +49,15 @@ impl BatchLimit { let new_offset = 0; let logical_partial_limit = LogicalLimit::new(input, new_limit, new_offset); let batch_partial_limit = Self::new(logical_partial_limit); - let ensure_single_dist = RequiredDist::single() - .enforce_if_not_satisfies(batch_partial_limit.into(), &Order::any())?; + let any_order = Order::any(); + let ensure_single_dist = RequiredDist::single().enforce_if_not_satisfies( + batch_partial_limit.into(), + if self.order().field_order.is_empty() { + &any_order + } else { + self.order() + }, + )?; let batch_global_limit = self.clone_with_input(ensure_single_dist); Ok(batch_global_limit.into()) } @@ -94,3 +104,5 @@ impl ToLocalBatch for BatchLimit { self.two_phase_limit(self.input().to_local()?) } } + +impl ExprRewritable for BatchLimit {} diff --git a/src/frontend/src/optimizer/plan_node/batch_lookup_join.rs b/src/frontend/src/optimizer/plan_node/batch_lookup_join.rs index 48b43e1f02189..3a2b7a124cf99 100644 --- a/src/frontend/src/optimizer/plan_node/batch_lookup_join.rs +++ b/src/frontend/src/optimizer/plan_node/batch_lookup_join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,7 +20,8 @@ use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::{DistributedLookupJoinNode, LocalLookupJoinNode}; use super::generic::GenericPlanRef; -use crate::expr::Expr; +use super::ExprRewritable; +use crate::expr::{Expr, ExprRewriter}; use crate::optimizer::plan_node::utils::IndicesDisplay; use crate::optimizer::plan_node::{ EqJoinPredicate, EqJoinPredicateDisplay, LogicalJoin, PlanBase, PlanTreeNodeBinary, @@ -177,7 +178,11 @@ impl ToDistributedBatch for BatchLookupJoin { let input = self.input().to_distributed_with_required( &Order::any(), &RequiredDist::PhysicalDist(Distribution::UpstreamHashShard( - self.eq_join_predicate.left_eq_indexes(), + self.eq_join_predicate + .left_eq_indexes() + .into_iter() + .take(self.lookup_prefix_len) + .collect(), self.right_table_desc.table_id, )), )?; @@ -281,3 +286,24 @@ impl ToLocalBatch for BatchLookupJoin { Ok(self.clone_with_distributed_lookup(input, false).into()) } } + +impl ExprRewritable for BatchLookupJoin { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self { + base: self.base.clone_with_new_plan_id(), + logical: self + .logical + .rewrite_exprs(r) + .as_logical_join() + .unwrap() + .clone(), + eq_join_predicate: self.eq_join_predicate.rewrite_exprs(r), + ..Self::clone(self) + } + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/batch_nested_loop_join.rs b/src/frontend/src/optimizer/plan_node/batch_nested_loop_join.rs index d15ed007482e0..490cc65d733c8 100644 --- a/src/frontend/src/optimizer/plan_node/batch_nested_loop_join.rs +++ b/src/frontend/src/optimizer/plan_node/batch_nested_loop_join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,8 +20,11 @@ use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::NestedLoopJoinNode; use super::generic::GenericPlanRef; -use super::{LogicalJoin, PlanBase, PlanRef, PlanTreeNodeBinary, ToBatchProst, ToDistributedBatch}; -use crate::expr::{Expr, ExprImpl}; +use super::{ + ExprRewritable, LogicalJoin, PlanBase, PlanRef, PlanTreeNodeBinary, ToBatchProst, + ToDistributedBatch, +}; +use crate::expr::{Expr, ExprImpl, ExprRewriter}; use crate::optimizer::plan_node::utils::IndicesDisplay; use crate::optimizer::plan_node::ToLocalBatch; use crate::optimizer::property::{Distribution, Order, RequiredDist}; @@ -155,3 +158,20 @@ impl ToLocalBatch for BatchNestedLoopJoin { Ok(self.clone_with_left_right(left, right).into()) } } + +impl ExprRewritable for BatchNestedLoopJoin { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_join() + .unwrap() + .clone(), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/batch_project.rs b/src/frontend/src/optimizer/plan_node/batch_project.rs index 58d69dbd41706..538575bd2f903 100644 --- a/src/frontend/src/optimizer/plan_node/batch_project.rs +++ b/src/frontend/src/optimizer/plan_node/batch_project.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,9 +21,10 @@ use risingwave_pb::expr::ExprNode; use super::generic::GenericPlanRef; use super::{ - LogicalProject, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch, + ExprRewritable, LogicalProject, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, + ToDistributedBatch, }; -use crate::expr::{Expr, ExprImpl}; +use crate::expr::{Expr, ExprImpl, ExprRewriter}; use crate::optimizer::plan_node::ToLocalBatch; /// `BatchProject` implements [`super::LogicalProject`] to evaluate specified expressions on input @@ -105,3 +106,20 @@ impl ToLocalBatch for BatchProject { Ok(self.clone_with_input(new_input).into()) } } + +impl ExprRewritable for BatchProject { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_project() + .unwrap() + .clone(), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/batch_project_set.rs b/src/frontend/src/optimizer/plan_node/batch_project_set.rs index aae2ffd9331d5..9380ce37f7e25 100644 --- a/src/frontend/src/optimizer/plan_node/batch_project_set.rs +++ b/src/frontend/src/optimizer/plan_node/batch_project_set.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,6 +19,8 @@ use risingwave_common::error::Result; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::ProjectSetNode; +use super::ExprRewritable; +use crate::expr::ExprRewriter; use crate::optimizer::plan_node::{ LogicalProjectSet, PlanBase, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch, ToLocalBatch, }; @@ -93,3 +95,20 @@ impl ToLocalBatch for BatchProjectSet { Ok(self.clone_with_input(new_input).into()) } } + +impl ExprRewritable for BatchProjectSet { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_project_set() + .unwrap() + .clone(), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/batch_seq_scan.rs b/src/frontend/src/optimizer/plan_node/batch_seq_scan.rs index f0aa09edf7c88..3e45fc760b231 100644 --- a/src/frontend/src/optimizer/plan_node/batch_seq_scan.rs +++ b/src/frontend/src/optimizer/plan_node/batch_seq_scan.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,11 +20,13 @@ use risingwave_common::error::Result; use risingwave_common::types::ScalarImpl; use risingwave_common::util::scan_range::{is_full_range, ScanRange}; use risingwave_pb::batch_plan::plan_node::NodeBody; +use risingwave_pb::batch_plan::row_seq_scan_node::ChunkSize; use risingwave_pb::batch_plan::{RowSeqScanNode, SysRowSeqScanNode}; use risingwave_pb::plan_common::ColumnDesc as ProstColumnDesc; -use super::{PlanBase, PlanRef, ToBatchProst, ToDistributedBatch}; +use super::{ExprRewritable, PlanBase, PlanRef, ToBatchProst, ToDistributedBatch}; use crate::catalog::ColumnId; +use crate::expr::ExprRewriter; use crate::optimizer::plan_node::{LogicalScan, ToLocalBatch}; use crate::optimizer::property::{Distribution, DistributionDisplay, Order}; @@ -239,6 +241,10 @@ impl ToBatchProst for BatchSeqScan { // To be filled by the scheduler. vnode_bitmap: None, ordered: !self.order().is_any(), + chunk_size: self + .logical + .chunk_size() + .map(|chunk_size| ChunkSize { chunk_size }), }) } } @@ -262,3 +268,21 @@ impl ToLocalBatch for BatchSeqScan { Ok(Self::new_inner(self.logical.clone(), dist, self.scan_ranges.clone()).into()) } } + +impl ExprRewritable for BatchSeqScan { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_scan() + .unwrap() + .clone(), + self.scan_ranges.clone(), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/batch_simple_agg.rs b/src/frontend/src/optimizer/plan_node/batch_simple_agg.rs index 4a356bb6ee48e..afcb5b62a72ab 100644 --- a/src/frontend/src/optimizer/plan_node/batch_simple_agg.rs +++ b/src/frontend/src/optimizer/plan_node/batch_simple_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,7 +19,11 @@ use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::SortAggNode; use super::generic::{GenericPlanRef, PlanAggCall}; -use super::{LogicalAgg, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch}; +use super::{ + ExprRewritable, LogicalAgg, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, + ToDistributedBatch, +}; +use crate::expr::ExprRewriter; use crate::optimizer::plan_node::{BatchExchange, ToLocalBatch}; use crate::optimizer::property::{Distribution, Order, RequiredDist}; @@ -73,7 +77,7 @@ impl ToDistributedBatch for BatchSimpleAgg { // TODO: distinct agg cannot use 2-phase agg yet. if dist_input.distribution().satisfies(&RequiredDist::AnyShard) - && self.logical.can_agg_two_phase() + && self.logical.can_two_phase_agg() { // partial agg let partial_agg = self.clone_with_input(dist_input).into(); @@ -128,3 +132,20 @@ impl ToLocalBatch for BatchSimpleAgg { Ok(self.clone_with_input(new_input).into()) } } + +impl ExprRewritable for BatchSimpleAgg { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_agg() + .unwrap() + .clone(), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/batch_sort.rs b/src/frontend/src/optimizer/plan_node/batch_sort.rs index 0c4b2d481c2f2..ca1d02c6f7d23 100644 --- a/src/frontend/src/optimizer/plan_node/batch_sort.rs +++ b/src/frontend/src/optimizer/plan_node/batch_sort.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,7 +18,9 @@ use risingwave_common::error::Result; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::SortNode; -use super::{PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch}; +use super::{ + ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch, +}; use crate::optimizer::plan_node::ToLocalBatch; use crate::optimizer::property::{Order, OrderDisplay}; @@ -84,3 +86,5 @@ impl ToLocalBatch for BatchSort { Ok(self.clone_with_input(new_input).into()) } } + +impl ExprRewritable for BatchSort {} diff --git a/src/frontend/src/optimizer/plan_node/batch_sort_agg.rs b/src/frontend/src/optimizer/plan_node/batch_sort_agg.rs index 477aab7d7f07f..1e3056dff4427 100644 --- a/src/frontend/src/optimizer/plan_node/batch_sort_agg.rs +++ b/src/frontend/src/optimizer/plan_node/batch_sort_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,8 +21,11 @@ use risingwave_pb::batch_plan::SortAggNode; use risingwave_pb::expr::ExprNode; use super::generic::{GenericPlanRef, PlanAggCall}; -use super::{LogicalAgg, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch}; -use crate::expr::{Expr, ExprImpl, InputRef}; +use super::{ + ExprRewritable, LogicalAgg, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, + ToDistributedBatch, +}; +use crate::expr::{Expr, ExprImpl, ExprRewriter, InputRef}; use crate::optimizer::plan_node::ToLocalBatch; use crate::optimizer::property::{Distribution, Order, RequiredDist}; @@ -143,3 +146,20 @@ impl ToLocalBatch for BatchSortAgg { Ok(self.clone_with_input(new_input).into()) } } + +impl ExprRewritable for BatchSortAgg { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_agg() + .unwrap() + .clone(), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/batch_source.rs b/src/frontend/src/optimizer/plan_node/batch_source.rs index 968340b31af6f..b4f7780f274e7 100644 --- a/src/frontend/src/optimizer/plan_node/batch_source.rs +++ b/src/frontend/src/optimizer/plan_node/batch_source.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,7 +19,10 @@ use risingwave_common::error::Result; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::SourceNode; -use super::{LogicalSource, PlanBase, PlanRef, ToBatchProst, ToDistributedBatch, ToLocalBatch}; +use super::{ + ExprRewritable, LogicalSource, PlanBase, PlanRef, ToBatchProst, ToDistributedBatch, + ToLocalBatch, +}; use crate::optimizer::property::{Distribution, Order}; /// [`BatchSource`] represents a table/connector source at the very beginning of the graph. @@ -105,3 +108,4 @@ impl ToBatchProst for BatchSource { }) } } +impl ExprRewritable for BatchSource {} diff --git a/src/frontend/src/optimizer/plan_node/batch_table_function.rs b/src/frontend/src/optimizer/plan_node/batch_table_function.rs index daf2e0edbf3d4..355a6c60db0f6 100644 --- a/src/frontend/src/optimizer/plan_node/batch_table_function.rs +++ b/src/frontend/src/optimizer/plan_node/batch_table_function.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,7 +18,10 @@ use risingwave_common::error::Result; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::TableFunctionNode; -use super::{PlanBase, PlanRef, PlanTreeNodeLeaf, ToBatchProst, ToDistributedBatch}; +use super::{ + ExprRewritable, PlanBase, PlanRef, PlanTreeNodeLeaf, ToBatchProst, ToDistributedBatch, +}; +use crate::expr::ExprRewriter; use crate::optimizer::plan_node::logical_table_function::LogicalTableFunction; use crate::optimizer::plan_node::ToLocalBatch; use crate::optimizer::property::{Distribution, Order}; @@ -78,3 +81,20 @@ impl ToLocalBatch for BatchTableFunction { Ok(Self::with_dist(self.logical().clone(), Distribution::Single).into()) } } + +impl ExprRewritable for BatchTableFunction { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_table_function() + .unwrap() + .clone(), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/batch_topn.rs b/src/frontend/src/optimizer/plan_node/batch_topn.rs index 1bb79ea16d52d..2dc5645efbca0 100644 --- a/src/frontend/src/optimizer/plan_node/batch_topn.rs +++ b/src/frontend/src/optimizer/plan_node/batch_topn.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,7 +18,10 @@ use risingwave_common::error::Result; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::TopNNode; -use super::{LogicalTopN, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch}; +use super::{ + ExprRewritable, LogicalTopN, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, + ToDistributedBatch, +}; use crate::optimizer::plan_node::ToLocalBatch; use crate::optimizer::property::{Order, RequiredDist}; @@ -102,3 +105,5 @@ impl ToLocalBatch for BatchTopN { self.two_phase_topn(self.input().to_local()?) } } + +impl ExprRewritable for BatchTopN {} diff --git a/src/frontend/src/optimizer/plan_node/batch_union.rs b/src/frontend/src/optimizer/plan_node/batch_union.rs index 6af61030a4428..4ac6e87218ada 100644 --- a/src/frontend/src/optimizer/plan_node/batch_union.rs +++ b/src/frontend/src/optimizer/plan_node/batch_union.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ use risingwave_common::error::Result; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::UnionNode; -use super::{PlanRef, ToBatchProst, ToDistributedBatch}; +use super::{ExprRewritable, PlanRef, ToBatchProst, ToDistributedBatch}; use crate::optimizer::plan_node::{LogicalUnion, PlanBase, PlanTreeNode, ToLocalBatch}; use crate::optimizer::property::{Distribution, Order, RequiredDist}; @@ -100,3 +100,5 @@ impl ToLocalBatch for BatchUnion { Ok(self.clone_with_inputs(&new_inputs?)) } } + +impl ExprRewritable for BatchUnion {} diff --git a/src/frontend/src/optimizer/plan_node/batch_update.rs b/src/frontend/src/optimizer/plan_node/batch_update.rs index 847ae1cdb85c0..94bc258e69de3 100644 --- a/src/frontend/src/optimizer/plan_node/batch_update.rs +++ b/src/frontend/src/optimizer/plan_node/batch_update.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,15 +14,17 @@ use std::fmt; +use risingwave_common::catalog::INITIAL_TABLE_VERSION_ID; use risingwave_common::error::Result; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::UpdateNode; use super::generic::GenericPlanRef; use super::{ - LogicalUpdate, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, ToDistributedBatch, + ExprRewritable, LogicalUpdate, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchProst, + ToDistributedBatch, }; -use crate::expr::Expr; +use crate::expr::{Expr, ExprRewriter}; use crate::optimizer::plan_node::ToLocalBatch; use crate::optimizer::property::{Distribution, Order, RequiredDist}; @@ -89,6 +91,7 @@ impl ToBatchProst for BatchUpdate { NodeBody::Update(UpdateNode { exprs, table_id: self.logical.table_id().table_id(), + table_version_id: INITIAL_TABLE_VERSION_ID, // TODO: use correct version id returning: self.logical.has_returning(), }) } @@ -101,3 +104,20 @@ impl ToLocalBatch for BatchUpdate { Ok(self.clone_with_input(new_input).into()) } } + +impl ExprRewritable for BatchUpdate { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_update() + .unwrap() + .clone(), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/batch_values.rs b/src/frontend/src/optimizer/plan_node/batch_values.rs index cda867d649407..21b719d3821eb 100644 --- a/src/frontend/src/optimizer/plan_node/batch_values.rs +++ b/src/frontend/src/optimizer/plan_node/batch_values.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,8 +20,11 @@ use risingwave_pb::batch_plan::values_node::ExprTuple; use risingwave_pb::batch_plan::ValuesNode; use super::generic::GenericPlanRef; -use super::{LogicalValues, PlanBase, PlanRef, PlanTreeNodeLeaf, ToBatchProst, ToDistributedBatch}; -use crate::expr::{Expr, ExprImpl}; +use super::{ + ExprRewritable, LogicalValues, PlanBase, PlanRef, PlanTreeNodeLeaf, ToBatchProst, + ToDistributedBatch, +}; +use crate::expr::{Expr, ExprImpl, ExprRewriter}; use crate::optimizer::plan_node::ToLocalBatch; use crate::optimizer::property::{Distribution, Order}; @@ -104,3 +107,20 @@ impl ToLocalBatch for BatchValues { Ok(Self::with_dist(self.logical().clone(), Distribution::Single).into()) } } + +impl ExprRewritable for BatchValues { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_values() + .unwrap() + .clone(), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/col_pruning.rs b/src/frontend/src/optimizer/plan_node/col_pruning.rs index e0facab36a68b..b706ec7ac387e 100644 --- a/src/frontend/src/optimizer/plan_node/col_pruning.rs +++ b/src/frontend/src/optimizer/plan_node/col_pruning.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_node/convert.rs b/src/frontend/src/optimizer/plan_node/convert.rs index e17cbef286c71..fca2bf20f2c78 100644 --- a/src/frontend/src/optimizer/plan_node/convert.rs +++ b/src/frontend/src/optimizer/plan_node/convert.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_node/eq_join_predicate.rs b/src/frontend/src/optimizer/plan_node/eq_join_predicate.rs index e12be72edfff4..b684cc70c2024 100644 --- a/src/frontend/src/optimizer/plan_node/eq_join_predicate.rs +++ b/src/frontend/src/optimizer/plan_node/eq_join_predicate.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ use std::fmt; use risingwave_common::catalog::Schema; -use crate::expr::{ExprType, FunctionCall, InputRef, InputRefDisplay}; +use crate::expr::{ExprRewriter, ExprType, FunctionCall, InputRef, InputRefDisplay}; use crate::utils::{ColIndexMapping, Condition, ConditionDisplay}; /// The join predicate used in optimizer @@ -242,6 +242,12 @@ impl EqJoinPredicate { Self::new(self.other_cond, new_eq_keys, self.left_cols_num) } + + pub fn rewrite_exprs(&self, rewriter: &mut (impl ExprRewriter + ?Sized)) -> Self { + let mut new = self.clone(); + new.other_cond = new.other_cond.rewrite_expr(rewriter); + new + } } pub struct EqJoinPredicateDisplay<'a> { diff --git a/src/frontend/src/optimizer/plan_node/expr_rewritable.rs b/src/frontend/src/optimizer/plan_node/expr_rewritable.rs new file mode 100644 index 0000000000000..2957ac89fa1ac --- /dev/null +++ b/src/frontend/src/optimizer/plan_node/expr_rewritable.rs @@ -0,0 +1,47 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::ops::Deref; + +use super::*; +use crate::expr::ExprRewriter; + +/// Rewrites expressions in a `PlanRef`. Due to `Share` operator, +/// the `ExprRewriter` needs to be idempotent i.e. applying it more than once +/// to the same `ExprImpl` will be a noop on subsequent applications. +/// `rewrite_exprs` should only return a plan with the given node modified. +/// To rewrite recursively, call `rewrite_exprs_recursive` on [`RewriteExprsRecursive`]. +pub trait ExprRewritable { + fn has_rewritable_expr(&self) -> bool { + false + } + + fn rewrite_exprs(&self, _r: &mut dyn ExprRewriter) -> PlanRef { + unimplemented!() + } +} + +impl ExprRewritable for PlanRef { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + if self.deref().has_rewritable_expr() { + self.deref().rewrite_exprs(r) + } else { + self.clone() + } + } +} diff --git a/src/frontend/src/optimizer/plan_node/generic/agg.rs b/src/frontend/src/optimizer/plan_node/generic/agg.rs index 6d98797d425b0..7c6815107e3fe 100644 --- a/src/frontend/src/optimizer/plan_node/generic/agg.rs +++ b/src/frontend/src/optimizer/plan_node/generic/agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::fmt; use itertools::Itertools; @@ -46,6 +46,14 @@ pub struct Agg { pub input: PlanRef, } +impl Agg { + pub(crate) fn rewrite_exprs(&mut self, r: &mut dyn ExprRewriter) { + self.agg_calls.iter_mut().for_each(|call| { + call.filter = call.filter.clone().rewrite_expr(r); + }); + } +} + impl GenericPlanNode for Agg { fn schema(&self) -> Schema { let fields = self @@ -66,7 +74,7 @@ impl GenericPlanNode for Agg { } fn logical_pk(&self) -> Option> { - Some((0..self.group_key.len()).into_iter().collect_vec()) + Some((0..self.group_key.len()).collect_vec()) } fn ctx(&self) -> OptimizerContextRef { @@ -288,9 +296,14 @@ impl Agg { AggCallState::ResultValue } } - AggKind::Sum | AggKind::Sum0 | AggKind::Count | AggKind::Avg => { - AggCallState::ResultValue - } + AggKind::Sum + | AggKind::Sum0 + | AggKind::Count + | AggKind::Avg + | AggKind::StddevPop + | AggKind::StddevSamp + | AggKind::VarPop + | AggKind::VarSamp => AggCallState::ResultValue, AggKind::ApproxCountDistinct => { if !in_append_only { // FIXME: now the approx count distinct on a non-append-only stream does not @@ -353,6 +366,66 @@ impl Agg { internal_table_catalog_builder.build(tb_dist) } + /// Infer dedup tables for distinct agg calls, partitioned by distinct columns. + /// Since distinct agg calls only dedup on the first argument, the key of the result map is + /// `usize`, i.e. the distinct column index. + /// + /// Dedup table schema: + /// group key | distinct key | count for AGG1(distinct x) | count for AGG2(distinct x) | ... + pub fn infer_distinct_dedup_tables( + &self, + me: &impl GenericPlanRef, + vnode_col_idx: Option, + ) -> HashMap { + let in_dist_key = self.input.distribution().dist_column_indices().to_vec(); + let in_fields = self.input.schema().fields(); + + self.agg_calls + .iter() + .enumerate() + .filter(|(_, call)| call.distinct) // only distinct agg calls need dedup table + .into_group_map_by(|(_, call)| call.inputs[0].index) // one table per distinct column + .into_iter() + .map(|(distinct_col, indices_and_calls)| { + let mut table_builder = + TableCatalogBuilder::new(me.ctx().with_options().internal_table_subset()); + + let key_cols = self + .group_key + .iter() + .copied() + .chain(std::iter::once(distinct_col)) + .collect_vec(); + for &idx in &key_cols { + let table_col_idx = table_builder.add_column(&in_fields[idx]); + table_builder.add_order_column(table_col_idx, OrderType::Ascending); + } + + // Agg calls with same distinct column share the same dedup table, but they may have + // different filter conditions, so the count of occurrence of one distinct key may + // differ among different calls. We add one column for each call in the dedup table. + for (call_index, _) in indices_and_calls { + table_builder.add_column(&Field { + data_type: DataType::Int64, + name: format!("count_for_agg_call_{}", call_index), + sub_fields: vec![], + type_name: String::default(), + }); + } + table_builder + .set_value_indices((key_cols.len()..table_builder.columns().len()).collect()); + + let mapping = ColIndexMapping::with_included_columns(&key_cols, in_fields.len()); + if let Some(idx) = vnode_col_idx.and_then(|idx| mapping.try_map(idx)) { + table_builder.set_vnode_col_idx(idx); + } + let dist_key = mapping.rewrite_dist_key(&in_dist_key).unwrap_or_default(); + let table = table_builder.build(dist_key); + (distinct_col, table) + }) + .collect() + } + pub fn decompose(self) -> (Vec, Vec, PlanRef) { (self.agg_calls, self.group_key, self.input) } @@ -572,6 +645,9 @@ impl PlanAggCall { AggKind::ArrayAgg => { panic!("2-phase ArrayAgg is not supported yet") } + AggKind::StddevPop | AggKind::StddevSamp | AggKind::VarPop | AggKind::VarSamp => { + panic!("Stddev/Var aggregation should have been rewritten to Sum, Count and Case") + } }; PlanAggCall { agg_kind: total_agg_kind, diff --git a/src/frontend/src/optimizer/plan_node/generic/dynamic_filter.rs b/src/frontend/src/optimizer/plan_node/generic/dynamic_filter.rs index 03b591df449c3..741334a48678b 100644 --- a/src/frontend/src/optimizer/plan_node/generic/dynamic_filter.rs +++ b/src/frontend/src/optimizer/plan_node/generic/dynamic_filter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_node/generic/expand.rs b/src/frontend/src/optimizer/plan_node/generic/expand.rs index b151e1fac7e19..98da72eed6eb2 100644 --- a/src/frontend/src/optimizer/plan_node/generic/expand.rs +++ b/src/frontend/src/optimizer/plan_node/generic/expand.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_node/generic/filter.rs b/src/frontend/src/optimizer/plan_node/generic/filter.rs index fdb8b793310dc..2a606432cb50d 100644 --- a/src/frontend/src/optimizer/plan_node/generic/filter.rs +++ b/src/frontend/src/optimizer/plan_node/generic/filter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ use risingwave_common::catalog::Schema; use super::{GenericPlanNode, GenericPlanRef}; +use crate::expr::ExprRewriter; use crate::optimizer::optimizer_context::OptimizerContextRef; use crate::utils::Condition; @@ -41,3 +42,9 @@ impl GenericPlanNode for Filter { self.input.ctx() } } + +impl Filter { + pub(crate) fn rewrite_exprs(&mut self, r: &mut dyn ExprRewriter) { + self.predicate = self.predicate.clone().rewrite_expr(r); + } +} diff --git a/src/frontend/src/optimizer/plan_node/generic/hop_window.rs b/src/frontend/src/optimizer/plan_node/generic/hop_window.rs index f21d035b01631..28f0b57eadfa1 100644 --- a/src/frontend/src/optimizer/plan_node/generic/hop_window.rs +++ b/src/frontend/src/optimizer/plan_node/generic/hop_window.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_node/generic/join.rs b/src/frontend/src/optimizer/plan_node/generic/join.rs index 9e4b394b55bfb..c7b2245c6f285 100644 --- a/src/frontend/src/optimizer/plan_node/generic/join.rs +++ b/src/frontend/src/optimizer/plan_node/generic/join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ use risingwave_common::catalog::Schema; use risingwave_pb::plan_common::JoinType; use super::{EqJoinPredicate, GenericPlanNode, GenericPlanRef}; +use crate::expr::ExprRewriter; use crate::optimizer::optimizer_context::OptimizerContextRef; use crate::utils::{ColIndexMapping, Condition}; @@ -34,6 +35,12 @@ pub struct Join { pub output_indices: Vec, } +impl Join { + pub(crate) fn rewrite_exprs(&mut self, r: &mut dyn ExprRewriter) { + self.on = self.on.clone().rewrite_expr(r); + } +} + impl GenericPlanNode for Join { fn schema(&self) -> Schema { let left_schema = self.left.schema(); diff --git a/src/frontend/src/optimizer/plan_node/generic/mod.rs b/src/frontend/src/optimizer/plan_node/generic/mod.rs index 8cf462826e53f..19f0d9a497e6c 100644 --- a/src/frontend/src/optimizer/plan_node/generic/mod.rs +++ b/src/frontend/src/optimizer/plan_node/generic/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_node/generic/project.rs b/src/frontend/src/optimizer/plan_node/generic/project.rs index e16618f43cd62..fe917c951fe0b 100644 --- a/src/frontend/src/optimizer/plan_node/generic/project.rs +++ b/src/frontend/src/optimizer/plan_node/generic/project.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,13 +14,15 @@ use std::collections::HashMap; use std::fmt; +use std::fmt::Formatter; use fixedbitset::FixedBitSet; use itertools::Itertools; use risingwave_common::catalog::{Field, Schema}; +use risingwave_common::util::iter_util::ZipEqFast; use super::{GenericPlanNode, GenericPlanRef}; -use crate::expr::{assert_input_ref, Expr, ExprDisplay, ExprImpl, InputRef}; +use crate::expr::{assert_input_ref, Expr, ExprDisplay, ExprImpl, ExprRewriter, InputRef}; use crate::optimizer::optimizer_context::OptimizerContextRef; use crate::utils::ColIndexMapping; @@ -50,26 +52,44 @@ pub struct Project { _private: (), } +impl Project { + pub(crate) fn rewrite_exprs(&mut self, r: &mut dyn ExprRewriter) { + self.exprs = self + .exprs + .iter() + .map(|e| r.rewrite_expr(e.clone())) + .collect(); + } +} + impl GenericPlanNode for Project { fn schema(&self) -> Schema { let o2i = self.o2i_col_mapping(); let exprs = &self.exprs; let input_schema = self.input.schema(); + let ctx = self.ctx(); let fields = exprs .iter() .enumerate() - .map(|(id, expr)| { + .map(|(i, expr)| { // Get field info from o2i. - let (name, sub_fields, type_name) = match o2i.try_map(id) { + let (name, sub_fields, type_name) = match o2i.try_map(i) { Some(input_idx) => { let field = input_schema.fields()[input_idx].clone(); (field.name, field.sub_fields, field.type_name) } - None => ( - format!("{:?}", ExprDisplay { expr, input_schema }), - vec![], - String::new(), - ), + None => match expr { + ExprImpl::InputRef(_) | ExprImpl::Literal(_) => ( + format!("{:?}", ExprDisplay { expr, input_schema }), + vec![], + String::new(), + ), + _ => ( + format!("$expr{}", ctx.next_expr_display_id()), + vec![], + String::new(), + ), + }, }; Field::with_struct(expr.return_type(), name, sub_fields, type_name) }) @@ -151,16 +171,30 @@ impl Project { (self.exprs, self.input) } - pub fn fmt_with_name(&self, f: &mut fmt::Formatter<'_>, name: &str) -> fmt::Result { + pub fn fmt_with_name( + &self, + f: &mut fmt::Formatter<'_>, + name: &str, + schema: &Schema, + ) -> fmt::Result { let mut builder = f.debug_struct(name); builder.field( "exprs", &self .exprs .iter() - .map(|expr| ExprDisplay { - expr, - input_schema: self.input.schema(), + .zip_eq_fast(schema.fields().iter()) + .map(|(expr, field)| AliasedExpr { + expr: ExprDisplay { + expr, + input_schema: self.input.schema(), + }, + alias: { + match expr { + ExprImpl::InputRef(_) | ExprImpl::Literal(_) => None, + _ => Some(field.name.clone()), + } + }, }) .collect_vec(), ); @@ -197,7 +231,7 @@ impl Project { && self .exprs .iter() - .zip_eq(self.input.schema().fields()) + .zip_eq_fast(self.input.schema().fields()) .enumerate() .all(|(i, (expr, field))| { matches!(expr, ExprImpl::InputRef(input_ref) if **input_ref == InputRef::new(i, field.data_type())) @@ -248,3 +282,18 @@ impl ProjectBuilder { Project::new(self.exprs, input) } } + +/// Auxiliary struct for displaying `expr AS alias` +pub struct AliasedExpr<'a> { + pub expr: ExprDisplay<'a>, + pub alias: Option, +} + +impl fmt::Debug for AliasedExpr<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match &self.alias { + Some(alias) => write!(f, "{:?} as {}", self.expr, alias), + None => write!(f, "{:?}", self.expr), + } + } +} diff --git a/src/frontend/src/optimizer/plan_node/generic/project_set.rs b/src/frontend/src/optimizer/plan_node/generic/project_set.rs index 2bd4d69759c00..bafbb29cf8d5a 100644 --- a/src/frontend/src/optimizer/plan_node/generic/project_set.rs +++ b/src/frontend/src/optimizer/plan_node/generic/project_set.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ use risingwave_common::catalog::{Field, Schema}; use risingwave_common::types::DataType; use super::{GenericPlanNode, GenericPlanRef}; -use crate::expr::{Expr, ExprDisplay, ExprImpl}; +use crate::expr::{Expr, ExprDisplay, ExprImpl, ExprRewriter}; use crate::optimizer::optimizer_context::OptimizerContextRef; use crate::utils::ColIndexMapping; @@ -34,6 +34,16 @@ pub struct ProjectSet { pub input: PlanRef, } +impl ProjectSet { + pub(crate) fn rewrite_exprs(&mut self, r: &mut dyn ExprRewriter) { + self.select_list = self + .select_list + .iter() + .map(|e| r.rewrite_expr(e.clone())) + .collect(); + } +} + impl GenericPlanNode for ProjectSet { fn schema(&self) -> Schema { let input_schema = self.input.schema(); diff --git a/src/frontend/src/optimizer/plan_node/generic/scan.rs b/src/frontend/src/optimizer/plan_node/generic/scan.rs index 1e0a72bba783b..a66e8af16b0e8 100644 --- a/src/frontend/src/optimizer/plan_node/generic/scan.rs +++ b/src/frontend/src/optimizer/plan_node/generic/scan.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ use risingwave_common::catalog::{ColumnDesc, Field, Schema, TableDesc}; use super::GenericPlanNode; use crate::catalog::{ColumnId, IndexCatalog}; +use crate::expr::ExprRewriter; use crate::optimizer::optimizer_context::OptimizerContextRef; use crate::utils::Condition; @@ -36,6 +37,14 @@ pub struct Scan { pub indexes: Vec>, /// The pushed down predicates. It refers to column indexes of the table. pub predicate: Condition, + /// Help RowSeqScan executor use a better chunk size + pub chunk_size: Option, +} + +impl Scan { + pub(crate) fn rewrite_exprs(&mut self, r: &mut dyn ExprRewriter) { + self.predicate = self.predicate.clone().rewrite_expr(r); + } } impl GenericPlanNode for Scan { diff --git a/src/frontend/src/optimizer/plan_node/generic/share.rs b/src/frontend/src/optimizer/plan_node/generic/share.rs index ffc4f6b66421a..e67495d51a99a 100644 --- a/src/frontend/src/optimizer/plan_node/generic/share.rs +++ b/src/frontend/src/optimizer/plan_node/generic/share.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_node/generic/source.rs b/src/frontend/src/optimizer/plan_node/generic/source.rs index d6506ccd8c063..3c1787617822b 100644 --- a/src/frontend/src/optimizer/plan_node/generic/source.rs +++ b/src/frontend/src/optimizer/plan_node/generic/source.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_node/generic/top_n.rs b/src/frontend/src/optimizer/plan_node/generic/top_n.rs index 82577b31a59cc..6f9710985f646 100644 --- a/src/frontend/src/optimizer/plan_node/generic/top_n.rs +++ b/src/frontend/src/optimizer/plan_node/generic/top_n.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_node/generic/union.rs b/src/frontend/src/optimizer/plan_node/generic/union.rs index 57bb33f429fc4..a3e28f935cbde 100644 --- a/src/frontend/src/optimizer/plan_node/generic/union.rs +++ b/src/frontend/src/optimizer/plan_node/generic/union.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_node/logical_agg.rs b/src/frontend/src/optimizer/plan_node/logical_agg.rs index 33af0c57018f9..dc2a4881419e1 100644 --- a/src/frontend/src/optimizer/plan_node/logical_agg.rs +++ b/src/frontend/src/optimizer/plan_node/logical_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,12 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use std::{fmt, iter}; use fixedbitset::FixedBitSet; use itertools::Itertools; use risingwave_common::error::{ErrorCode, Result, TrackingIssue}; -use risingwave_common::types::DataType; +use risingwave_common::types::{DataType, Datum, OrderedF64, ScalarImpl}; use risingwave_expr::expr::AggKind; use super::generic::{ @@ -25,9 +26,9 @@ use super::generic::{ ProjectBuilder, }; use super::{ - BatchHashAgg, BatchSimpleAgg, ColPrunable, PlanBase, PlanRef, PlanTreeNodeUnary, - PredicatePushdown, StreamGlobalSimpleAgg, StreamHashAgg, StreamLocalSimpleAgg, StreamProject, - ToBatch, ToStream, + BatchHashAgg, BatchSimpleAgg, ColPrunable, ExprRewritable, PlanBase, PlanRef, + PlanTreeNodeUnary, PredicatePushdown, StreamGlobalSimpleAgg, StreamHashAgg, + StreamLocalSimpleAgg, StreamProject, ToBatch, ToStream, }; use crate::catalog::table_catalog::TableCatalog; use crate::expr::{ @@ -69,6 +70,15 @@ impl LogicalAgg { self.core.infer_stream_agg_state(&self.base, vnode_col_idx) } + /// Infer dedup tables for distinct agg calls. + pub fn infer_distinct_dedup_tables( + &self, + vnode_col_idx: Option, + ) -> HashMap { + self.core + .infer_distinct_dedup_tables(&self.base, vnode_col_idx) + } + /// Generate plan for stateless 2-phase streaming agg. /// Should only be used iff input is distributed. Input must be converted to stream form. fn gen_stateless_two_phase_streaming_agg_plan(&self, stream_input: PlanRef) -> Result { @@ -150,6 +160,9 @@ impl LogicalAgg { } else { let exchange = RequiredDist::shard_by_key(input_col_num, self.group_key()) .enforce_if_not_satisfies(local_agg.into(), &Order::any())?; + // Local phase should have reordered the group keys into their required order. + // we can just follow it. + let group_key = (0..self.group_key().len()).collect(); let global_agg = StreamHashAgg::new( LogicalAgg::new( self.agg_calls() @@ -162,7 +175,7 @@ impl LogicalAgg { ) }) .collect(), - self.group_key().to_vec(), + group_key, exchange, ), None, @@ -172,10 +185,10 @@ impl LogicalAgg { } fn gen_dist_stream_agg_plan(&self, stream_input: PlanRef) -> Result { - // having group key, is not simple agg. we will just use shuffle agg - // TODO(stonepage): in some situation the 2-phase agg is better. maybe some switch or - // hints for it. - if !self.group_key().is_empty() { + // Shuffle agg if group key is present. + // If we are forced to use two phase aggregation, + // we should not do shuffle aggregation. + if !self.group_key().is_empty() && !self.two_phase_agg_forced() { return Ok(StreamHashAgg::new( self.clone_with_input( RequiredDist::shard_by_key(stream_input.schema().len(), self.group_key()) @@ -186,7 +199,7 @@ impl LogicalAgg { .into()); } - // now only simple agg + // now only simple agg (either single or two phase). let input_dist = stream_input.distribution().clone(); let input_append_only = stream_input.append_only(); @@ -199,7 +212,7 @@ impl LogicalAgg { // some agg function can not rewrite to 2-phase agg // we can only generate stand alone plan for the simple agg - let all_agg_calls_can_use_two_phase = self.can_agg_two_phase(); + let all_agg_calls_can_use_two_phase = self.can_two_phase_agg(); if !all_agg_calls_can_use_two_phase { return gen_single_plan(stream_input); } @@ -210,7 +223,10 @@ impl LogicalAgg { matches!(c.agg_kind, AggKind::Sum | AggKind::Count) || (matches!(c.agg_kind, AggKind::Min | AggKind::Max) && input_append_only) }); - if all_local_are_stateless && input_dist.satisfies(&RequiredDist::AnyShard) { + if all_local_are_stateless + && input_dist.satisfies(&RequiredDist::AnyShard) + && self.group_key().is_empty() + { return self.gen_stateless_two_phase_streaming_agg_plan(stream_input); } @@ -232,15 +248,34 @@ impl LogicalAgg { .any(|call| matches!(call.agg_kind, AggKind::StringAgg | AggKind::ArrayAgg)) } - pub(crate) fn can_agg_two_phase(&self) -> bool { - self.agg_calls().iter().all(|call| { - matches!( - call.agg_kind, - AggKind::Min | AggKind::Max | AggKind::Sum | AggKind::Count - ) && !call.distinct - // QUESTION: why do we need `&& call.order_by_fields.is_empty()` ? - // && call.order_by_fields.is_empty() - }) && !self.is_agg_result_affected_by_order() + pub(crate) fn two_phase_agg_forced(&self) -> bool { + self.base + .ctx() + .session_ctx() + .config() + .get_force_two_phase_agg() + } + + fn two_phase_agg_enabled(&self) -> bool { + self.base + .ctx() + .session_ctx() + .config() + .get_enable_two_phase_agg() + } + + pub(crate) fn can_two_phase_agg(&self) -> bool { + !self.agg_calls().is_empty() + && self.agg_calls().iter().all(|call| { + matches!( + call.agg_kind, + AggKind::Min | AggKind::Max | AggKind::Sum | AggKind::Count + ) && !call.distinct + // QUESTION: why do we need `&& call.order_by_fields.is_empty()` ? + // && call.order_by_fields.is_empty() + }) + && !self.is_agg_result_affected_by_order() + && self.two_phase_agg_enabled() } // Check if the output of the aggregation needs to be sorted and return ordering req by group @@ -444,16 +479,21 @@ impl LogicalAggBuilder { agg_call: AggCall, ) -> std::result::Result { let return_type = agg_call.return_type(); - let (agg_kind, inputs, distinct, mut order_by, filter) = agg_call.decompose(); + let (agg_kind, inputs, mut distinct, mut order_by, filter) = agg_call.decompose(); match &agg_kind { - AggKind::Min - | AggKind::Max - | AggKind::Sum + AggKind::Min | AggKind::Max => { + distinct = false; + order_by = OrderBy::any(); + } + AggKind::Sum | AggKind::Count | AggKind::Avg - | AggKind::ApproxCountDistinct => { - // this order by is unnecessary. - order_by = OrderBy::new(vec![]); + | AggKind::ApproxCountDistinct + | AggKind::StddevSamp + | AggKind::StddevPop + | AggKind::VarPop + | AggKind::VarSamp => { + order_by = OrderBy::any(); } _ => { // To be conservative, we just treat newly added AggKind in the future as not @@ -497,58 +537,253 @@ impl LogicalAggBuilder { ) })?; - if agg_kind == AggKind::Avg { - assert_eq!(inputs.len(), 1); + match agg_kind { + AggKind::Avg => { + assert_eq!(inputs.len(), 1); + + let left_return_type = + AggCall::infer_return_type(&AggKind::Sum, &[inputs[0].return_type()]).unwrap(); + + // Rewrite avg to cast(sum as avg_return_type) / count. + self.agg_calls.push(PlanAggCall { + agg_kind: AggKind::Sum, + return_type: left_return_type.clone(), + inputs: inputs.clone(), + distinct, + order_by_fields: order_by_fields.clone(), + filter: filter.clone(), + }); + let left = ExprImpl::from(InputRef::new( + self.group_key.len() + self.agg_calls.len() - 1, + left_return_type, + )) + .cast_implicit(return_type) + .unwrap(); + + let right_return_type = + AggCall::infer_return_type(&AggKind::Count, &[inputs[0].return_type()]) + .unwrap(); + + self.agg_calls.push(PlanAggCall { + agg_kind: AggKind::Count, + return_type: right_return_type.clone(), + inputs, + distinct, + order_by_fields, + filter, + }); - let left_return_type = - AggCall::infer_return_type(&AggKind::Sum, &[inputs[0].return_type()]).unwrap(); + let right = InputRef::new( + self.group_key.len() + self.agg_calls.len() - 1, + right_return_type, + ); - // Rewrite avg to cast(sum as avg_return_type) / count. - self.agg_calls.push(PlanAggCall { - agg_kind: AggKind::Sum, - return_type: left_return_type.clone(), - inputs: inputs.clone(), - distinct, - order_by_fields: order_by_fields.clone(), - filter: filter.clone(), - }); - let left = ExprImpl::from(InputRef::new( - self.group_key.len() + self.agg_calls.len() - 1, - left_return_type, - )) - .cast_implicit(return_type) - .unwrap(); + Ok(ExprImpl::from( + FunctionCall::new(ExprType::Divide, vec![left, right.into()]).unwrap(), + )) + } - let right_return_type = - AggCall::infer_return_type(&AggKind::Count, &[inputs[0].return_type()]).unwrap(); + // We compute `var_samp` as + // (sum(sq) - sum * sum / count) / (count - 1) + // and `var_pop` as + // (sum(sq) - sum * sum / count) / count + // Since we don't have the square function, we use the plain Multiply for squaring, + // which is in a sense more general than the pow function, especially when calculating + // covariances in the future. Also we don't have the sqrt function for rooting, so we + // use pow(x, 0.5) to simulate + AggKind::StddevPop | AggKind::StddevSamp | AggKind::VarPop | AggKind::VarSamp => { + let input = inputs.iter().exactly_one().unwrap(); + + // first, we compute sum of squared as sum_sq + let squared_input_expr = ExprImpl::from( + FunctionCall::new( + ExprType::Multiply, + vec![ExprImpl::from(input.clone()), ExprImpl::from(input.clone())], + ) + .unwrap(), + ); + + let squared_input_proj_index = self + .input_proj_builder + .add_expr(&squared_input_expr) + .unwrap(); + + let sum_of_squares_return_type = + AggCall::infer_return_type(&AggKind::Sum, &[squared_input_expr.return_type()]) + .unwrap(); + + self.agg_calls.push(PlanAggCall { + agg_kind: AggKind::Sum, + return_type: sum_of_squares_return_type.clone(), + inputs: vec![InputRef::new( + squared_input_proj_index, + squared_input_expr.return_type(), + )], + distinct, + order_by_fields: order_by_fields.clone(), + filter: filter.clone(), + }); - self.agg_calls.push(PlanAggCall { - agg_kind: AggKind::Count, - return_type: right_return_type.clone(), - inputs, - distinct, - order_by_fields, - filter, - }); + let sum_of_squares_expr = ExprImpl::from(InputRef::new( + self.group_key.len() + self.agg_calls.len() - 1, + sum_of_squares_return_type, + )) + .cast_implicit(return_type.clone()) + .unwrap(); + + // after that, we compute sum + let sum_return_type = + AggCall::infer_return_type(&AggKind::Sum, &[input.return_type()]).unwrap(); + + self.agg_calls.push(PlanAggCall { + agg_kind: AggKind::Sum, + return_type: sum_return_type.clone(), + inputs: inputs.clone(), + distinct, + order_by_fields: order_by_fields.clone(), + filter: filter.clone(), + }); - let right = InputRef::new( - self.group_key.len() + self.agg_calls.len() - 1, - right_return_type, - ); + let sum_expr = ExprImpl::from(InputRef::new( + self.group_key.len() + self.agg_calls.len() - 1, + sum_return_type, + )) + .cast_implicit(return_type.clone()) + .unwrap(); + + // then, we compute count + let count_return_type = + AggCall::infer_return_type(&AggKind::Count, &[input.return_type()]).unwrap(); + + self.agg_calls.push(PlanAggCall { + agg_kind: AggKind::Count, + return_type: count_return_type.clone(), + inputs, + distinct, + order_by_fields, + filter, + }); - Ok(ExprImpl::from( - FunctionCall::new(ExprType::Divide, vec![left, right.into()]).unwrap(), - )) - } else { - self.agg_calls.push(PlanAggCall { - agg_kind, - return_type: return_type.clone(), - inputs, - distinct, - order_by_fields, - filter, - }); - Ok(InputRef::new(self.group_key.len() + self.agg_calls.len() - 1, return_type).into()) + let count_expr = ExprImpl::from(InputRef::new( + self.group_key.len() + self.agg_calls.len() - 1, + count_return_type, + )); + + // we start with variance + + // sum * sum + let square_of_sum_expr = ExprImpl::from( + FunctionCall::new(ExprType::Multiply, vec![sum_expr.clone(), sum_expr]) + .unwrap(), + ); + + // sum_sq - sum * sum / count + let numerator_expr = ExprImpl::from( + FunctionCall::new( + ExprType::Subtract, + vec![ + sum_of_squares_expr, + ExprImpl::from( + FunctionCall::new( + ExprType::Divide, + vec![square_of_sum_expr, count_expr.clone()], + ) + .unwrap(), + ), + ], + ) + .unwrap(), + ); + + // count or count - 1 + let denominator_expr = match agg_kind { + AggKind::StddevPop | AggKind::VarPop => count_expr.clone(), + AggKind::StddevSamp | AggKind::VarSamp => ExprImpl::from( + FunctionCall::new( + ExprType::Subtract, + vec![ + count_expr.clone(), + ExprImpl::from(Literal::new( + Datum::from(ScalarImpl::Int64(1)), + DataType::Int64, + )), + ], + ) + .unwrap(), + ), + _ => unreachable!(), + }; + + let mut target_expr = ExprImpl::from( + FunctionCall::new(ExprType::Divide, vec![numerator_expr, denominator_expr]) + .unwrap(), + ); + + // stddev = sqrt(variance) + if matches!(agg_kind, AggKind::StddevPop | AggKind::StddevSamp) { + target_expr = ExprImpl::from( + FunctionCall::new( + ExprType::Pow, + vec![ + target_expr.clone(), + // TODO: The decimal implementation now still relies on float64, so + // float64 is still used here + ExprImpl::from(Literal::new( + Datum::from(ScalarImpl::Float64(OrderedF64::from(0.5))), + DataType::Float64, + )), + ], + ) + .unwrap(), + ); + } + + match agg_kind { + AggKind::VarPop | AggKind::StddevPop => Ok(target_expr), + AggKind::StddevSamp | AggKind::VarSamp => { + let less_than_expr = ExprImpl::from( + FunctionCall::new( + ExprType::LessThanOrEqual, + vec![ + count_expr, + ExprImpl::from(Literal::new( + Datum::from(ScalarImpl::Int64(1)), + DataType::Int64, + )), + ], + ) + .unwrap(), + ); + let null_expr = ExprImpl::from(Literal::new(None, return_type)); + + let case_expr = ExprImpl::from( + FunctionCall::new( + ExprType::Case, + vec![less_than_expr, null_expr, target_expr], + ) + .unwrap(), + ); + + Ok(case_expr) + } + _ => unreachable!(), + } + } + + _ => { + self.agg_calls.push(PlanAggCall { + agg_kind, + return_type: return_type.clone(), + inputs, + distinct, + order_by_fields, + filter, + }); + Ok( + InputRef::new(self.group_key.len() + self.agg_calls.len() - 1, return_type) + .into(), + ) + } } } } @@ -788,6 +1023,22 @@ impl fmt::Display for LogicalAgg { } } +impl ExprRewritable for LogicalAgg { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + let mut core = self.core.clone(); + core.rewrite_exprs(r); + Self { + base: self.base.clone_with_new_plan_id(), + core, + } + .into() + } +} + impl ColPrunable for LogicalAgg { fn prune_col(&self, required_cols: &[usize], ctx: &mut ColumnPruningContext) -> PlanRef { let group_key_required_cols = FixedBitSet::from_iter(self.group_key().iter().copied()); @@ -948,7 +1199,7 @@ impl ToStream for LogicalAgg { // LogicalAgg. // Please note that the index of group key need not be changed. - let mut output_indices = (0..self.schema().len()).into_iter().collect_vec(); + let mut output_indices = (0..self.schema().len()).collect_vec(); output_indices .iter_mut() .skip(self.group_key().len()) diff --git a/src/frontend/src/optimizer/plan_node/logical_apply.rs b/src/frontend/src/optimizer/plan_node/logical_apply.rs index 610ac069b8d47..4ec4aac229f99 100644 --- a/src/frontend/src/optimizer/plan_node/logical_apply.rs +++ b/src/frontend/src/optimizer/plan_node/logical_apply.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,8 +25,8 @@ use super::{ }; use crate::expr::{CorrelatedId, Expr, ExprImpl, ExprRewriter, InputRef}; use crate::optimizer::plan_node::{ - ColumnPruningContext, LogicalFilter, PredicatePushdownContext, RewriteStreamContext, - ToStreamContext, + ColumnPruningContext, ExprRewritable, LogicalFilter, PredicatePushdownContext, + RewriteStreamContext, ToStreamContext, }; use crate::optimizer::property::FunctionalDependencySet; use crate::utils::{ColIndexMapping, Condition, ConditionDisplay}; @@ -295,6 +295,19 @@ impl ColPrunable for LogicalApply { } } +impl ExprRewritable for LogicalApply { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + let mut new = self.clone(); + new.on = new.on.rewrite_expr(r); + new.base = new.base.clone_with_new_plan_id(); + new.into() + } +} + impl PredicatePushdown for LogicalApply { fn predicate_pushdown( &self, diff --git a/src/frontend/src/optimizer/plan_node/logical_delete.rs b/src/frontend/src/optimizer/plan_node/logical_delete.rs index 698dded990e94..d69cc6c867c04 100644 --- a/src/frontend/src/optimizer/plan_node/logical_delete.rs +++ b/src/frontend/src/optimizer/plan_node/logical_delete.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,8 +19,8 @@ use risingwave_common::error::Result; use risingwave_common::types::DataType; use super::{ - gen_filter_and_pushdown, BatchDelete, ColPrunable, PlanBase, PlanRef, PlanTreeNodeUnary, - PredicatePushdown, ToBatch, ToStream, + gen_filter_and_pushdown, BatchDelete, ColPrunable, ExprRewritable, PlanBase, PlanRef, + PlanTreeNodeUnary, PredicatePushdown, ToBatch, ToStream, }; use crate::catalog::TableId; use crate::optimizer::plan_node::{ @@ -126,6 +126,8 @@ impl ColPrunable for LogicalDelete { } } +impl ExprRewritable for LogicalDelete {} + impl PredicatePushdown for LogicalDelete { fn predicate_pushdown( &self, diff --git a/src/frontend/src/optimizer/plan_node/logical_expand.rs b/src/frontend/src/optimizer/plan_node/logical_expand.rs index 4ef6b09c83cea..c314acdac6293 100644 --- a/src/frontend/src/optimizer/plan_node/logical_expand.rs +++ b/src/frontend/src/optimizer/plan_node/logical_expand.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ use risingwave_common::error::Result; use super::generic::GenericPlanNode; use super::{ - gen_filter_and_pushdown, generic, BatchExpand, ColPrunable, PlanBase, PlanRef, + gen_filter_and_pushdown, generic, BatchExpand, ColPrunable, ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, PredicatePushdown, StreamExpand, ToBatch, ToStream, }; use crate::optimizer::plan_node::{ @@ -157,6 +157,8 @@ impl ColPrunable for LogicalExpand { } } +impl ExprRewritable for LogicalExpand {} + impl PredicatePushdown for LogicalExpand { fn predicate_pushdown( &self, diff --git a/src/frontend/src/optimizer/plan_node/logical_filter.rs b/src/frontend/src/optimizer/plan_node/logical_filter.rs index fc3164e4d2741..aecb974010e17 100644 --- a/src/frontend/src/optimizer/plan_node/logical_filter.rs +++ b/src/frontend/src/optimizer/plan_node/logical_filter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,22 +16,18 @@ use std::fmt; use fixedbitset::FixedBitSet; use itertools::Itertools; +use risingwave_common::bail; use risingwave_common::error::Result; -use risingwave_common::types::DataType; -use risingwave_common::{must_match, try_match_expand}; -use risingwave_expr::ExprError; -use risingwave_pb::expr::expr_node::Type; use super::generic::{self, GenericPlanNode}; use super::{ - ColPrunable, CollectInputRef, LogicalProject, PlanBase, PlanRef, PlanTreeNodeUnary, - PredicatePushdown, ToBatch, ToStream, + ColPrunable, CollectInputRef, ExprRewritable, LogicalProject, PlanBase, PlanRef, + PlanTreeNodeUnary, PredicatePushdown, ToBatch, ToStream, }; -use crate::expr::{assert_input_ref, ExprImpl, InputRef}; -use crate::optimizer::plan_node::stream_now::StreamNow; +use crate::expr::{assert_input_ref, ExprImpl, ExprRewriter}; use crate::optimizer::plan_node::{ BatchFilter, ColumnPruningContext, PredicatePushdownContext, RewriteStreamContext, - StreamDynamicFilter, StreamFilter, StreamProject, ToStreamContext, + StreamFilter, ToStreamContext, }; use crate::utils::{ColIndexMapping, Condition, ConditionDisplay}; @@ -179,6 +175,22 @@ impl ColPrunable for LogicalFilter { } } +impl ExprRewritable for LogicalFilter { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + let mut core = self.core.clone(); + core.rewrite_exprs(r); + Self { + base: self.base.clone_with_new_plan_id(), + core, + } + .into() + } +} + impl PredicatePushdown for LogicalFilter { fn predicate_pushdown( &self, @@ -198,18 +210,6 @@ impl ToBatch for LogicalFilter { } } -/// Apply filters by selectivity and then applicabiliy of watermark - equality condition -/// first, then conditions of the form T > now() - Y (the timestamp needs to be greater -/// than a watermark), then conditions similar to T < now() - Y -fn convert_comparator_to_priority(comparator: Type) -> i32 { - match comparator { - Type::Equal => 0, - Type::GreaterThan | Type::GreaterThanOrEqual => 1, - Type::LessThan | Type::LessThanOrEqual => 2, - _ => -1, - } -} - impl ToStream for LogicalFilter { fn to_stream(&self, ctx: &mut ToStreamContext) -> Result { let new_input = self.input().to_stream(ctx)?; @@ -220,104 +220,23 @@ impl ToStream for LogicalFilter { .iter() .any(|cond| cond.count_nows() > 0); if has_now { - let mut conjunctions = predicate.conjunctions.clone(); - // Check if the now expr is valid - for conjunction in &conjunctions { - if conjunction.count_nows() > 0 { - let comparator_expr = try_match_expand!(conjunction, ExprImpl::FunctionCall)?; - if convert_comparator_to_priority(comparator_expr.get_expr_type()) < 0 { - // TODO: We should avoid using `ExprError` in frontend, same 2 below. - return Err(ExprError::InvalidParam { - name: "now", - reason: String::from("now expression must be placed in a comparison"), - } - .into()); - } - try_match_expand!(&comparator_expr.inputs()[0], ExprImpl::InputRef)?; - let now_expr = - try_match_expand!(&comparator_expr.inputs()[1], ExprImpl::FunctionCall)?; - match now_expr.get_expr_type() { - Type::Now => { - // Do nothing. - } - Type::Add | Type::Subtract => { - if try_match_expand!(&now_expr.inputs()[0], ExprImpl::FunctionCall)? - .get_expr_type() - != Type::Now - || !matches!( - &now_expr.inputs()[1], - ExprImpl::Literal(_) | ExprImpl::FunctionCall(_) - ) - || now_expr.inputs()[1].has_input_ref() - { - return Err(ExprError::InvalidParam { - name: "now", - reason: String::from("expressions containing now must be of the form `col [cmp] now() +- [literal]`"), - } - .into()); - } - } - _ => { - return Err(ExprError::InvalidParam { - name: "now", - reason: String::from("now delta expression must be a trivial add/subtract expression"), - } - .into()); - } - } - } - } - - let mut now_conds = conjunctions - .drain_filter(|cond| cond.count_nows() > 0) - .map(|cond| { - must_match!(cond, ExprImpl::FunctionCall(function_call) => { - (convert_comparator_to_priority(function_call.get_expr_type()), function_call) - }) - }) - .collect_vec(); - now_conds.sort_by_key(|(comparator_priority, _)| *comparator_priority); - // We do simple logical filters first because it can reduce size of dynamic filter's - // cache. - - let mut cur_streaming = if conjunctions.is_empty() { - new_input - } else { - let simple_logical = LogicalFilter::new(self.input(), Condition { conjunctions }); - PlanRef::from(StreamFilter::new( - simple_logical.clone_with_input(new_input), - )) - }; - // Rewrite each now condition. Replace `NowExpr` with `StreamNow` and replace - // `LogicalFilter` with `DynamicFilter`. - for (_, now_cond) in now_conds { - let left_index = must_match!(now_cond.inputs()[0], ExprImpl::InputRef(box ref input_ref) => input_ref.index()); - let rht = must_match!(now_cond.inputs()[1], ExprImpl::FunctionCall(box ref function_call) => { - match function_call.get_expr_type() { - Type::Now => PlanRef::from(StreamNow::new(self.ctx())), - Type::Add | Type::Subtract => { - let mut now_delta_expr = function_call.clone(); - now_delta_expr.inputs_mut()[0] = ExprImpl::from(InputRef::new(0, DataType::Timestamptz)); - // We cannot call `LogicalProject::to_stream()` here, because its input is already a stream. - StreamProject::new(LogicalProject::new(StreamNow::new(self.ctx()).into(), vec![ExprImpl::from(now_delta_expr)])).into() - }, - // We can panic here because we have checked above - _ => panic!(), - } - }); - cur_streaming = StreamDynamicFilter::new( - left_index, - now_cond.get_expr_type(), - cur_streaming, - rht, - ) - .into(); + if predicate + .conjunctions + .iter() + .any(|expr| expr.count_nows() > 0 && expr.as_now_comparison_cond().is_none()) + { + bail!( + "Conditions containing now must be of the form `input_expr cmp now() [+- const_expr]` or \ + `now() [+- const_expr] cmp input_expr`, where `input_expr` references a column \ + and contains no `now()`." + ); } - Ok(cur_streaming) - } else { - let new_logical = self.clone_with_input(new_input); - Ok(StreamFilter::new(new_logical).into()) + bail!( + "All `now()` exprs were valid, but the condition must have at least one now expr as a lower bound." + ); } + let new_logical = self.clone_with_input(new_input); + Ok(StreamFilter::new(new_logical).into()) } fn logical_rewrite_for_stream( diff --git a/src/frontend/src/optimizer/plan_node/logical_hop_window.rs b/src/frontend/src/optimizer/plan_node/logical_hop_window.rs index 5b13b08409d91..14fef7889bb4a 100644 --- a/src/frontend/src/optimizer/plan_node/logical_hop_window.rs +++ b/src/frontend/src/optimizer/plan_node/logical_hop_window.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,10 +22,10 @@ use risingwave_common::types::{DataType, IntervalUnit}; use super::generic::GenericPlanNode; use super::{ - gen_filter_and_pushdown, generic, BatchHopWindow, ColPrunable, PlanBase, PlanRef, - PlanTreeNodeUnary, PredicatePushdown, StreamHopWindow, ToBatch, ToStream, + gen_filter_and_pushdown, generic, BatchHopWindow, ColPrunable, ExprRewritable, LogicalFilter, + PlanBase, PlanRef, PlanTreeNodeUnary, PredicatePushdown, StreamHopWindow, ToBatch, ToStream, }; -use crate::expr::InputRef; +use crate::expr::{ExprType, FunctionCall, InputRef}; use crate::optimizer::plan_node::{ ColumnPruningContext, PredicatePushdownContext, RewriteStreamContext, ToStreamContext, }; @@ -40,6 +40,8 @@ pub struct LogicalHopWindow { } impl LogicalHopWindow { + /// just used in optimizer and the function will not check if the `time_col`'s value is NULL + /// compared with `LogicalHopWindow::create` fn new( input: PlanRef, time_col: InputRef, @@ -48,8 +50,8 @@ impl LogicalHopWindow { output_indices: Option>, ) -> Self { // if output_indices is not specified, use default output_indices - let output_indices = output_indices - .unwrap_or_else(|| (0..input.schema().len() + 2).into_iter().collect_vec()); + let output_indices = + output_indices.unwrap_or_else(|| (0..input.schema().len() + 2).collect_vec()); let output_type = DataType::window_of(&time_col.data_type).unwrap(); let original_schema: Schema = input .schema() @@ -117,13 +119,20 @@ impl LogicalHopWindow { self.core.into_parts() } - /// the function will check if the cond is bool expression + /// used for binder and planner. The function will add a filter operator to ignore records with + /// NULL time value. pub fn create( input: PlanRef, time_col: InputRef, window_slide: IntervalUnit, window_size: IntervalUnit, ) -> PlanRef { + let input = LogicalFilter::create_with_expr( + input, + FunctionCall::new(ExprType::IsNotNull, vec![time_col.clone().into()]) + .unwrap() + .into(), + ); Self::new(input, time_col, window_slide, window_size, None).into() } @@ -327,6 +336,8 @@ impl ColPrunable for LogicalHopWindow { } } +impl ExprRewritable for LogicalHopWindow {} + impl PredicatePushdown for LogicalHopWindow { /// Keep predicate on time window parameters (`window_start`, `window_end`), /// the rest may be pushed-down. diff --git a/src/frontend/src/optimizer/plan_node/logical_insert.rs b/src/frontend/src/optimizer/plan_node/logical_insert.rs index d1cbe68993fbd..e18f7b7518049 100644 --- a/src/frontend/src/optimizer/plan_node/logical_insert.rs +++ b/src/frontend/src/optimizer/plan_node/logical_insert.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,8 +19,8 @@ use risingwave_common::error::Result; use risingwave_common::types::DataType; use super::{ - gen_filter_and_pushdown, BatchInsert, ColPrunable, PlanBase, PlanRef, PlanTreeNodeUnary, - PredicatePushdown, ToBatch, ToStream, + gen_filter_and_pushdown, BatchInsert, ColPrunable, ExprRewritable, PlanBase, PlanRef, + PlanTreeNodeUnary, PredicatePushdown, ToBatch, ToStream, }; use crate::catalog::TableId; use crate::optimizer::plan_node::{ @@ -36,7 +36,7 @@ use crate::utils::{ColIndexMapping, Condition}; #[derive(Debug, Clone)] pub struct LogicalInsert { pub base: PlanBase, - table_source_name: String, // explain-only + table_name: String, // explain-only table_id: TableId, input: PlanRef, column_indices: Vec, // columns in which to insert @@ -48,7 +48,7 @@ impl LogicalInsert { /// Create a [`LogicalInsert`] node. Used internally by optimizer. pub fn new( input: PlanRef, - table_source_name: String, + table_name: String, table_id: TableId, column_indices: Vec, row_id_index: Option, @@ -64,7 +64,7 @@ impl LogicalInsert { let base = PlanBase::new_logical(ctx, schema, vec![], functional_dependency); Self { base, - table_source_name, + table_name, table_id, input, column_indices, @@ -76,7 +76,7 @@ impl LogicalInsert { /// Create a [`LogicalInsert`] node. Used by planner. pub fn create( input: PlanRef, - table_source_name: String, + table_name: String, table_id: TableId, column_indices: Vec, row_id_index: Option, @@ -84,7 +84,7 @@ impl LogicalInsert { ) -> Result { Ok(Self::new( input, - table_source_name, + table_name, table_id, column_indices, row_id_index, @@ -97,7 +97,7 @@ impl LogicalInsert { f, "{} {{ table: {}{} }}", name, - self.table_source_name, + self.table_name, if self.returning { ", returning: true" } else { @@ -135,7 +135,7 @@ impl PlanTreeNodeUnary for LogicalInsert { fn clone_with_input(&self, input: PlanRef) -> Self { Self::new( input, - self.table_source_name.clone(), + self.table_name.clone(), self.table_id, self.column_indices.clone(), self.row_id_index, @@ -160,6 +160,8 @@ impl ColPrunable for LogicalInsert { } } +impl ExprRewritable for LogicalInsert {} + impl PredicatePushdown for LogicalInsert { fn predicate_pushdown( &self, diff --git a/src/frontend/src/optimizer/plan_node/logical_join.rs b/src/frontend/src/optimizer/plan_node/logical_join.rs index d001f4a496122..f55bc0f96d6b8 100644 --- a/src/frontend/src/optimizer/plan_node/logical_join.rs +++ b/src/frontend/src/optimizer/plan_node/logical_join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,8 +24,9 @@ use risingwave_pb::plan_common::JoinType; use super::generic::GenericPlanNode; use super::{ - generic, BatchProject, ColPrunable, CollectInputRef, LogicalProject, PlanBase, PlanRef, - PlanTreeNodeBinary, PredicatePushdown, StreamHashJoin, StreamProject, ToBatch, ToStream, + generic, BatchProject, ColPrunable, CollectInputRef, ExprRewritable, LogicalProject, PlanBase, + PlanRef, PlanTreeNodeBinary, PredicatePushdown, StreamHashJoin, StreamProject, ToBatch, + ToStream, }; use crate::expr::{Expr, ExprImpl, ExprRewriter, ExprType, InputRef}; use crate::optimizer::plan_node::generic::GenericPlanRef; @@ -536,9 +537,7 @@ impl LogicalJoin { .map(|x| x.as_input_ref().unwrap().index) .collect_vec() } else { - (0..logical_scan.output_col_idx().len()) - .into_iter() - .collect_vec() + (0..logical_scan.output_col_idx().len()).collect_vec() }; let left_schema_len = logical_join.left().schema().len(); @@ -786,6 +785,22 @@ impl ColPrunable for LogicalJoin { } } +impl ExprRewritable for LogicalJoin { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + let mut core = self.core.clone(); + core.rewrite_exprs(r); + Self { + base: self.base.clone_with_new_plan_id(), + core, + } + .into() + } +} + fn is_pure_fn_except_for_input_ref(expr: &ExprImpl) -> bool { match expr { ExprImpl::Literal(_) => true, @@ -1083,11 +1098,11 @@ impl LogicalJoin { ctx: &mut ToStreamContext, ) -> Result> { // If there is exactly one predicate, it is a comparison (<, <=, >, >=), and the - // join is a `Inner` join, we can convert the scalar subquery into a + // join is a `Inner` or `LeftSemi` join, we can convert the scalar subquery into a // `StreamDynamicFilter` - // Check if `Inner` subquery (no `IN` or `EXISTS` keywords) - if self.join_type() != JoinType::Inner { + // Check if `Inner`/`LeftSemi` + if !matches!(self.join_type(), JoinType::Inner | JoinType::LeftSemi) { return Ok(None); } diff --git a/src/frontend/src/optimizer/plan_node/logical_limit.rs b/src/frontend/src/optimizer/plan_node/logical_limit.rs index c1814f65f1105..3be0c575d02bd 100644 --- a/src/frontend/src/optimizer/plan_node/logical_limit.rs +++ b/src/frontend/src/optimizer/plan_node/logical_limit.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,8 +17,8 @@ use std::fmt; use risingwave_common::error::{ErrorCode, Result, RwError}; use super::{ - gen_filter_and_pushdown, BatchLimit, ColPrunable, PlanBase, PlanRef, PlanTreeNodeUnary, - PredicatePushdown, ToBatch, ToStream, + gen_filter_and_pushdown, BatchLimit, ColPrunable, ExprRewritable, PlanBase, PlanRef, + PlanTreeNodeUnary, PredicatePushdown, ToBatch, ToStream, }; use crate::optimizer::plan_node::{ ColumnPruningContext, PredicatePushdownContext, RewriteStreamContext, ToStreamContext, @@ -99,6 +99,8 @@ impl ColPrunable for LogicalLimit { } } +impl ExprRewritable for LogicalLimit {} + impl PredicatePushdown for LogicalLimit { fn predicate_pushdown( &self, diff --git a/src/frontend/src/optimizer/plan_node/logical_multi_join.rs b/src/frontend/src/optimizer/plan_node/logical_multi_join.rs index f43a895c786c9..cb1847e9a31ba 100644 --- a/src/frontend/src/optimizer/plan_node/logical_multi_join.rs +++ b/src/frontend/src/optimizer/plan_node/logical_multi_join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,8 +20,9 @@ use risingwave_common::error::{ErrorCode, Result, RwError}; use risingwave_pb::plan_common::JoinType; use super::{ - ColPrunable, LogicalFilter, LogicalJoin, LogicalProject, PlanBase, PlanNodeType, PlanRef, - PlanTreeNodeBinary, PlanTreeNodeUnary, PredicatePushdown, ToBatch, ToStream, + ColPrunable, ExprRewritable, LogicalFilter, LogicalJoin, LogicalProject, PlanBase, + PlanNodeType, PlanRef, PlanTreeNodeBinary, PlanTreeNodeUnary, PredicatePushdown, ToBatch, + ToStream, }; use crate::expr::{ExprImpl, ExprRewriter}; use crate::optimizer::plan_node::{ @@ -360,7 +361,6 @@ impl LogicalMultiJoin { .cloned() .flat_map(|input_idx| { (0..self.inputs[input_idx].schema().len()) - .into_iter() .map(move |col_idx| self.inner_i2o_mappings[input_idx].map(col_idx)) }) .enumerate() @@ -523,6 +523,15 @@ impl ColPrunable for LogicalMultiJoin { } } +impl ExprRewritable for LogicalMultiJoin { + fn rewrite_exprs(&self, _r: &mut dyn ExprRewriter) -> PlanRef { + panic!( + "Method not available for `LogicalMultiJoin` which is a placeholder node with \ + a temporary lifetime. It only facilitates join reordering during logical planning." + ) + } +} + impl PredicatePushdown for LogicalMultiJoin { fn predicate_pushdown( &self, diff --git a/src/frontend/src/optimizer/plan_node/logical_now.rs b/src/frontend/src/optimizer/plan_node/logical_now.rs new file mode 100644 index 0000000000000..89c2df4775ef0 --- /dev/null +++ b/src/frontend/src/optimizer/plan_node/logical_now.rs @@ -0,0 +1,109 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt; + +use itertools::Itertools; +use risingwave_common::bail; +use risingwave_common::catalog::{Field, Schema}; +use risingwave_common::error::Result; +use risingwave_common::types::DataType; + +use super::utils::IndicesDisplay; +use super::{ + ColPrunable, ColumnPruningContext, ExprRewritable, LogicalFilter, PlanBase, PlanRef, + PredicatePushdown, RewriteStreamContext, StreamNow, ToBatch, ToStream, ToStreamContext, +}; +use crate::optimizer::property::FunctionalDependencySet; +use crate::utils::ColIndexMapping; +use crate::OptimizerContextRef; + +#[derive(Clone, Debug)] +pub struct LogicalNow { + pub base: PlanBase, +} + +impl LogicalNow { + pub fn new(ctx: OptimizerContextRef) -> Self { + let schema = Schema::new(vec![Field { + data_type: DataType::Timestamptz, + name: String::from("now"), + sub_fields: vec![], + type_name: String::default(), + }]); + let base = PlanBase::new_logical(ctx, schema, vec![], FunctionalDependencySet::default()); + Self { base } + } +} + +impl fmt::Display for LogicalNow { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let verbose = self.base.ctx.is_explain_verbose(); + let mut builder = f.debug_struct("LogicalNow"); + + if verbose { + // For now, output all columns from the left side. Make it explicit here. + builder.field( + "output", + &IndicesDisplay { + indices: &(0..self.schema().fields.len()).collect_vec(), + input_schema: self.schema(), + }, + ); + } + + builder.finish() + } +} + +impl_plan_tree_node_for_leaf! { LogicalNow } + +impl ExprRewritable for LogicalNow {} + +impl PredicatePushdown for LogicalNow { + fn predicate_pushdown( + &self, + predicate: crate::utils::Condition, + _ctx: &mut super::PredicatePushdownContext, + ) -> crate::PlanRef { + LogicalFilter::create(self.clone().into(), predicate) + } +} + +impl ToStream for LogicalNow { + fn logical_rewrite_for_stream( + &self, + _ctx: &mut RewriteStreamContext, + ) -> Result<(PlanRef, ColIndexMapping)> { + Ok((self.clone().into(), ColIndexMapping::new(vec![Some(0)]))) + } + + /// `to_stream` is equivalent to `to_stream_with_dist_required(RequiredDist::Any)` + fn to_stream(&self, _ctx: &mut ToStreamContext) -> Result { + Ok(StreamNow::new(self.clone(), self.ctx()).into()) + } +} + +impl ToBatch for LogicalNow { + fn to_batch(&self) -> Result { + bail!("`LogicalNow` can only be converted to stream") + } +} + +/// The trait for column pruning, only logical plan node will use it, though all plan node impl it. +impl ColPrunable for LogicalNow { + fn prune_col(&self, _required_cols: &[usize], _ctx: &mut ColumnPruningContext) -> PlanRef { + self.clone().into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/logical_over_agg.rs b/src/frontend/src/optimizer/plan_node/logical_over_agg.rs index 3af433c6f0dce..75e88e37ec2a1 100644 --- a/src/frontend/src/optimizer/plan_node/logical_over_agg.rs +++ b/src/frontend/src/optimizer/plan_node/logical_over_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,8 +22,8 @@ use risingwave_common::types::DataType; use super::generic::{PlanAggOrderByField, PlanAggOrderByFieldDisplay}; use super::{ - gen_filter_and_pushdown, ColPrunable, LogicalProject, PlanBase, PlanRef, PlanTreeNodeUnary, - PredicatePushdown, ToBatch, ToStream, + gen_filter_and_pushdown, ColPrunable, ExprRewritable, LogicalProject, PlanBase, PlanRef, + PlanTreeNodeUnary, PredicatePushdown, ToBatch, ToStream, }; use crate::expr::{Expr, ExprImpl, InputRef, InputRefDisplay, WindowFunction, WindowFunctionType}; use crate::optimizer::plan_node::{ @@ -264,6 +264,8 @@ impl ColPrunable for LogicalOverAgg { } } +impl ExprRewritable for LogicalOverAgg {} + impl PredicatePushdown for LogicalOverAgg { fn predicate_pushdown( &self, diff --git a/src/frontend/src/optimizer/plan_node/logical_project.rs b/src/frontend/src/optimizer/plan_node/logical_project.rs index a6183994cd6db..34f92e75e3be0 100644 --- a/src/frontend/src/optimizer/plan_node/logical_project.rs +++ b/src/frontend/src/optimizer/plan_node/logical_project.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,10 +20,11 @@ use risingwave_common::error::Result; use super::generic::{self, GenericPlanNode, Project}; use super::{ - gen_filter_and_pushdown, BatchProject, ColPrunable, PlanBase, PlanRef, PlanTreeNodeUnary, - PredicatePushdown, StreamProject, ToBatch, ToStream, + gen_filter_and_pushdown, BatchProject, ColPrunable, ExprRewritable, PlanBase, PlanRef, + PlanTreeNodeUnary, PredicatePushdown, StreamProject, ToBatch, ToStream, }; use crate::expr::{ExprImpl, ExprRewriter, ExprVisitor, InputRef}; +use crate::optimizer::plan_node::generic::GenericPlanRef; use crate::optimizer::plan_node::{ CollectInputRef, ColumnPruningContext, PredicatePushdownContext, RewriteStreamContext, ToStreamContext, @@ -111,7 +112,7 @@ impl LogicalProject { } pub(super) fn fmt_with_name(&self, f: &mut fmt::Formatter<'_>, name: &str) -> fmt::Result { - self.core.fmt_with_name(f, name) + self.core.fmt_with_name(f, name, self.base.schema()) } pub fn is_identity(&self) -> bool { @@ -204,6 +205,22 @@ impl ColPrunable for LogicalProject { } } +impl ExprRewritable for LogicalProject { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + let mut core = self.core.clone(); + core.rewrite_exprs(r); + Self { + base: self.base.clone_with_new_plan_id(), + core, + } + .into() + } +} + impl PredicatePushdown for LogicalProject { fn predicate_pushdown( &self, diff --git a/src/frontend/src/optimizer/plan_node/logical_project_set.rs b/src/frontend/src/optimizer/plan_node/logical_project_set.rs index bc56312a6b7a5..2954922c77ed1 100644 --- a/src/frontend/src/optimizer/plan_node/logical_project_set.rs +++ b/src/frontend/src/optimizer/plan_node/logical_project_set.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,8 +17,8 @@ use std::fmt; use risingwave_common::error::Result; use super::{ - generic, BatchProjectSet, ColPrunable, LogicalFilter, LogicalProject, PlanBase, PlanRef, - PlanTreeNodeUnary, PredicatePushdown, StreamProjectSet, ToBatch, ToStream, + generic, BatchProjectSet, ColPrunable, ExprRewritable, LogicalFilter, LogicalProject, PlanBase, + PlanRef, PlanTreeNodeUnary, PredicatePushdown, StreamProjectSet, ToBatch, ToStream, }; use crate::expr::{Expr, ExprImpl, ExprRewriter, FunctionCall, InputRef, TableFunction}; use crate::optimizer::plan_node::generic::GenericPlanNode; @@ -257,6 +257,22 @@ impl ColPrunable for LogicalProjectSet { } } +impl ExprRewritable for LogicalProjectSet { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + let mut core = self.core.clone(); + core.rewrite_exprs(r); + Self { + base: self.base.clone_with_new_plan_id(), + core, + } + .into() + } +} + impl PredicatePushdown for LogicalProjectSet { fn predicate_pushdown( &self, diff --git a/src/frontend/src/optimizer/plan_node/logical_scan.rs b/src/frontend/src/optimizer/plan_node/logical_scan.rs index 6d851b3212c42..bfe1b19303407 100644 --- a/src/frontend/src/optimizer/plan_node/logical_scan.rs +++ b/src/frontend/src/optimizer/plan_node/logical_scan.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,12 +20,12 @@ use fixedbitset::FixedBitSet; use itertools::Itertools; use risingwave_common::catalog::{ColumnDesc, Field, Schema, TableDesc}; use risingwave_common::error::{ErrorCode, Result, RwError}; -use risingwave_common::util::sort_util::OrderType; +use risingwave_common::util::sort_util::{OrderPair, OrderType}; use super::generic::{GenericPlanNode, GenericPlanRef}; use super::{ - generic, BatchFilter, BatchProject, ColPrunable, PlanBase, PlanRef, PredicatePushdown, - StreamTableScan, ToBatch, ToStream, + generic, BatchFilter, BatchProject, ColPrunable, ExprRewritable, PlanBase, PlanRef, + PredicatePushdown, StreamTableScan, ToBatch, ToStream, }; use crate::catalog::{ColumnId, IndexCatalog}; use crate::expr::{ @@ -86,6 +86,7 @@ impl LogicalScan { table_desc, indexes, predicate, + chunk_size: None, }; let schema = core.schema(); @@ -116,7 +117,7 @@ impl LogicalScan { Self::new( table_name, is_sys_table, - (0..table_desc.columns.len()).into_iter().collect(), + (0..table_desc.columns.len()).collect(), table_desc, indexes, ctx, @@ -256,6 +257,11 @@ impl LogicalScan { .collect() } + pub fn watermark_columns(&self) -> FixedBitSet { + let watermark_columns = &self.table_desc().watermark_columns; + self.i2o_col_mapping().rewrite_bitset(watermark_columns) + } + pub fn to_index_scan( &self, index_name: &str, @@ -300,6 +306,20 @@ impl LogicalScan { ) } + /// used by optimizer (currently `top_n_on_index_rule`) to help reduce useless `chunk_size` at + /// executor + pub fn set_chunk_size(&mut self, chunk_size: u32) { + self.core.chunk_size = Some(chunk_size); + } + + pub fn chunk_size(&self) -> Option { + self.core.chunk_size + } + + pub fn primary_key(&self) -> Vec { + self.core.table_desc.pk.clone() + } + /// a vec of `InputRef` corresponding to `output_col_idx`, which can represent a pulled project. fn output_idx_to_input_ref(&self) -> Vec { let output_idx = self @@ -450,6 +470,22 @@ impl ColPrunable for LogicalScan { } } +impl ExprRewritable for LogicalScan { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + let mut core = self.core.clone(); + core.rewrite_exprs(r); + Self { + base: self.base.clone_with_new_plan_id(), + core, + } + .into() + } +} + impl PredicatePushdown for LogicalScan { fn predicate_pushdown( &self, diff --git a/src/frontend/src/optimizer/plan_node/logical_share.rs b/src/frontend/src/optimizer/plan_node/logical_share.rs index dfea313066e26..49caa4bd1a4db 100644 --- a/src/frontend/src/optimizer/plan_node/logical_share.rs +++ b/src/frontend/src/optimizer/plan_node/logical_share.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,7 +20,8 @@ use risingwave_common::error::Result; use super::generic::{self, GenericPlanNode}; use super::{ - ColPrunable, PlanBase, PlanRef, PlanTreeNodeUnary, PredicatePushdown, ToBatch, ToStream, + ColPrunable, ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, PredicatePushdown, ToBatch, + ToStream, }; use crate::optimizer::plan_node::generic::GenericPlanRef; use crate::optimizer::plan_node::{ @@ -118,6 +119,8 @@ impl ColPrunable for LogicalShare { } } +impl ExprRewritable for LogicalShare {} + impl PredicatePushdown for LogicalShare { fn predicate_pushdown( &self, diff --git a/src/frontend/src/optimizer/plan_node/logical_source.rs b/src/frontend/src/optimizer/plan_node/logical_source.rs index 680e1bba82f33..1486ae0ea72d4 100644 --- a/src/frontend/src/optimizer/plan_node/logical_source.rs +++ b/src/frontend/src/optimizer/plan_node/logical_source.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,9 +23,10 @@ use risingwave_common::error::Result; use risingwave_connector::source::DataType; use super::generic::{GenericPlanNode, GenericPlanRef}; +use super::stream_watermark_filter::StreamWatermarkFilter; use super::{ - generic, BatchSource, ColPrunable, LogicalFilter, LogicalProject, PlanBase, PlanRef, - PredicatePushdown, StreamRowIdGen, StreamSource, ToBatch, ToStream, + generic, BatchSource, ColPrunable, ExprRewritable, LogicalFilter, LogicalProject, PlanBase, + PlanRef, PredicatePushdown, StreamRowIdGen, StreamSource, ToBatch, ToStream, }; use crate::catalog::source_catalog::SourceCatalog; use crate::catalog::ColumnId; @@ -161,6 +162,8 @@ impl ColPrunable for LogicalSource { } } +impl ExprRewritable for LogicalSource {} + /// A util function to extract kafka offset timestamp range. /// /// Currently we only support limiting kafka offset timestamp range using literals, e.g. we only @@ -354,7 +357,10 @@ impl ToBatch for LogicalSource { impl ToStream for LogicalSource { fn to_stream(&self, _ctx: &mut ToStreamContext) -> Result { let mut plan: PlanRef = StreamSource::new(self.clone()).into(); - if let Some(row_id_index) = self.core.row_id_index && self.core.gen_row_id{ + if let Some(catalog) = self.source_catalog() && !catalog.watermark_descs.is_empty(){ + plan = StreamWatermarkFilter::new(plan, catalog.watermark_descs.clone()).into(); + } + if let Some(row_id_index) = self.core.row_id_index && self.core.gen_row_id { plan = StreamRowIdGen::new(plan, row_id_index).into(); } Ok(plan) diff --git a/src/frontend/src/optimizer/plan_node/logical_table_function.rs b/src/frontend/src/optimizer/plan_node/logical_table_function.rs index f5f900272c9a8..8f763cdcecc00 100644 --- a/src/frontend/src/optimizer/plan_node/logical_table_function.rs +++ b/src/frontend/src/optimizer/plan_node/logical_table_function.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,8 +17,11 @@ use std::fmt; use risingwave_common::catalog::{Field, Schema}; use risingwave_common::error::{ErrorCode, Result}; -use super::{ColPrunable, LogicalFilter, PlanBase, PlanRef, PredicatePushdown, ToBatch, ToStream}; -use crate::expr::{Expr, TableFunction}; +use super::{ + ColPrunable, ExprRewritable, LogicalFilter, PlanBase, PlanRef, PredicatePushdown, ToBatch, + ToStream, +}; +use crate::expr::{Expr, ExprRewriter, TableFunction}; use crate::optimizer::optimizer_context::OptimizerContextRef; use crate::optimizer::plan_node::{ BatchTableFunction, ColumnPruningContext, PredicatePushdownContext, RewriteStreamContext, @@ -68,6 +71,24 @@ impl ColPrunable for LogicalTableFunction { } } +impl ExprRewritable for LogicalTableFunction { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + let mut new = self.clone(); + new.table_function.args = new + .table_function + .args + .into_iter() + .map(|e| r.rewrite_expr(e)) + .collect(); + new.base = self.base.clone_with_new_plan_id(); + new.into() + } +} + impl PredicatePushdown for LogicalTableFunction { fn predicate_pushdown( &self, diff --git a/src/frontend/src/optimizer/plan_node/logical_topn.rs b/src/frontend/src/optimizer/plan_node/logical_topn.rs index 38dba4e90f641..3ddf04e476097 100644 --- a/src/frontend/src/optimizer/plan_node/logical_topn.rs +++ b/src/frontend/src/optimizer/plan_node/logical_topn.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,8 +20,9 @@ use risingwave_common::error::{ErrorCode, Result, RwError}; use super::generic::GenericPlanNode; use super::{ - gen_filter_and_pushdown, generic, BatchGroupTopN, ColPrunable, PlanBase, PlanRef, - PlanTreeNodeUnary, PredicatePushdown, StreamGroupTopN, StreamProject, ToBatch, ToStream, + gen_filter_and_pushdown, generic, BatchGroupTopN, ColPrunable, ExprRewritable, PlanBase, + PlanRef, PlanTreeNodeUnary, PredicatePushdown, StreamGroupTopN, StreamProject, ToBatch, + ToStream, }; use crate::expr::{ExprType, FunctionCall, InputRef}; use crate::optimizer::plan_node::{ @@ -353,6 +354,8 @@ impl ColPrunable for LogicalTopN { } } +impl ExprRewritable for LogicalTopN {} + impl PredicatePushdown for LogicalTopN { fn predicate_pushdown( &self, diff --git a/src/frontend/src/optimizer/plan_node/logical_union.rs b/src/frontend/src/optimizer/plan_node/logical_union.rs index 2d8d357a8cb41..17cb5575be68f 100644 --- a/src/frontend/src/optimizer/plan_node/logical_union.rs +++ b/src/frontend/src/optimizer/plan_node/logical_union.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ use itertools::Itertools; use risingwave_common::error::Result; use risingwave_common::types::{DataType, Scalar}; -use super::{ColPrunable, PlanBase, PlanRef, PredicatePushdown, ToBatch, ToStream}; +use super::{ColPrunable, ExprRewritable, PlanBase, PlanRef, PredicatePushdown, ToBatch, ToStream}; use crate::expr::{ExprImpl, InputRef, Literal}; use crate::optimizer::plan_node::generic::{GenericPlanNode, GenericPlanRef}; use crate::optimizer::plan_node::stream_union::StreamUnion; @@ -109,6 +109,8 @@ impl ColPrunable for LogicalUnion { } } +impl ExprRewritable for LogicalUnion {} + impl PredicatePushdown for LogicalUnion { fn predicate_pushdown( &self, diff --git a/src/frontend/src/optimizer/plan_node/logical_update.rs b/src/frontend/src/optimizer/plan_node/logical_update.rs index 698098150a586..932d9a02fea9d 100644 --- a/src/frontend/src/optimizer/plan_node/logical_update.rs +++ b/src/frontend/src/optimizer/plan_node/logical_update.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,11 +19,11 @@ use risingwave_common::error::Result; use risingwave_common::types::DataType; use super::{ - gen_filter_and_pushdown, BatchUpdate, ColPrunable, PlanBase, PlanRef, PlanTreeNodeUnary, - PredicatePushdown, ToBatch, ToStream, + gen_filter_and_pushdown, BatchUpdate, ColPrunable, ExprRewritable, PlanBase, PlanRef, + PlanTreeNodeUnary, PredicatePushdown, ToBatch, ToStream, }; use crate::catalog::TableId; -use crate::expr::ExprImpl; +use crate::expr::{ExprImpl, ExprRewriter}; use crate::optimizer::plan_node::{ ColumnPruningContext, PredicatePushdownContext, RewriteStreamContext, ToStreamContext, }; @@ -48,7 +48,7 @@ impl LogicalUpdate { /// Create a [`LogicalUpdate`] node. Used internally by optimizer. pub fn new( input: PlanRef, - table_source_name: String, + table_name: String, table_id: TableId, exprs: Vec, returning: bool, @@ -63,7 +63,7 @@ impl LogicalUpdate { let base = PlanBase::new_logical(ctx, schema, vec![], fd_set); Self { base, - table_name: table_source_name, + table_name, table_id, input, exprs, @@ -74,18 +74,12 @@ impl LogicalUpdate { /// Create a [`LogicalUpdate`] node. Used by planner. pub fn create( input: PlanRef, - table_source_name: String, + table_name: String, table_id: TableId, exprs: Vec, returning: bool, ) -> Result { - Ok(Self::new( - input, - table_source_name, - table_id, - exprs, - returning, - )) + Ok(Self::new(input, table_name, table_id, exprs, returning)) } pub(super) fn fmt_with_name(&self, f: &mut fmt::Formatter<'_>, name: &str) -> fmt::Result { @@ -141,6 +135,19 @@ impl fmt::Display for LogicalUpdate { } } +impl ExprRewritable for LogicalUpdate { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + let mut new = self.clone(); + new.exprs = new.exprs.into_iter().map(|e| r.rewrite_expr(e)).collect(); + new.base = new.base.clone_with_new_plan_id(); + new.into() + } +} + impl ColPrunable for LogicalUpdate { fn prune_col(&self, _required_cols: &[usize], ctx: &mut ColumnPruningContext) -> PlanRef { let required_cols: Vec<_> = (0..self.input.schema().len()).collect(); diff --git a/src/frontend/src/optimizer/plan_node/logical_values.rs b/src/frontend/src/optimizer/plan_node/logical_values.rs index a2ef0269632f1..8795cefda3ad8 100644 --- a/src/frontend/src/optimizer/plan_node/logical_values.rs +++ b/src/frontend/src/optimizer/plan_node/logical_values.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,10 +19,10 @@ use risingwave_common::catalog::Schema; use risingwave_common::error::{ErrorCode, Result, RwError}; use super::{ - BatchValues, ColPrunable, LogicalFilter, PlanBase, PlanRef, PredicatePushdown, ToBatch, - ToStream, + BatchValues, ColPrunable, ExprRewritable, LogicalFilter, PlanBase, PlanRef, PredicatePushdown, + ToBatch, ToStream, }; -use crate::expr::{Expr, ExprImpl}; +use crate::expr::{Expr, ExprImpl, ExprRewriter}; use crate::optimizer::optimizer_context::OptimizerContextRef; use crate::optimizer::plan_node::{ ColumnPruningContext, PredicatePushdownContext, RewriteStreamContext, ToStreamContext, @@ -76,6 +76,29 @@ impl fmt::Display for LogicalValues { } } +impl ExprRewritable for LogicalValues { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + let mut new = self.clone(); + new.rows = new + .rows + .iter() + .map(|exprs| { + exprs + .iter() + .map(|e| r.rewrite_expr(e.clone())) + .collect::>() + }) + .collect::>() + .into(); + new.base = new.base.clone_with_new_plan_id(); + new.into() + } +} + impl ColPrunable for LogicalValues { fn prune_col(&self, required_cols: &[usize], _ctx: &mut ColumnPruningContext) -> PlanRef { let rows = self diff --git a/src/frontend/src/optimizer/plan_node/mod.rs b/src/frontend/src/optimizer/plan_node/mod.rs index bd08752a1a567..a4869ee2b00bd 100644 --- a/src/frontend/src/optimizer/plan_node/mod.rs +++ b/src/frontend/src/optimizer/plan_node/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -60,6 +60,7 @@ pub trait PlanNode: + Display + Downcast + ColPrunable + + ExprRewritable + ToBatch + ToStream + ToDistributedBatch @@ -85,6 +86,22 @@ pub enum Convention { Stream, } +pub(crate) trait RewriteExprsRecursive { + fn rewrite_exprs_recursive(&self, r: &mut impl ExprRewriter) -> PlanRef; +} + +impl RewriteExprsRecursive for PlanRef { + fn rewrite_exprs_recursive(&self, r: &mut impl ExprRewriter) -> PlanRef { + let new = self.rewrite_exprs(r); + let inputs: Vec = new + .inputs() + .iter() + .map(|plan_ref| plan_ref.rewrite_exprs_recursive(r)) + .collect(); + new.clone_with_inputs(&inputs[..]) + } +} + impl ColPrunable for PlanRef { fn prune_col(&self, required_cols: &[usize], ctx: &mut ColumnPruningContext) -> PlanRef { if let Some(logical_share) = self.as_logical_share() { @@ -225,6 +242,11 @@ impl PlanTreeNode for PlanRef { // We can't clone `LogicalShare`, but only can replace input instead. logical_share.replace_input(inputs[0].clone()); self.clone() + } else if let Some(stream_share) = self.clone().as_stream_share() { + assert_eq!(inputs.len(), 1); + // We can't clone `StreamShare`, but only can replace input instead. + stream_share.replace_input(inputs[0].clone()); + self.clone() } else { // Dispatch to dyn PlanNode instead of PlanRef. let dyn_t = self.deref(); @@ -407,6 +429,8 @@ mod plan_tree_node; pub use plan_tree_node::*; mod col_pruning; pub use col_pruning::*; +mod expr_rewritable; +pub use expr_rewritable::*; mod convert; pub use convert::*; mod eq_join_predicate; @@ -456,6 +480,7 @@ mod logical_insert; mod logical_join; mod logical_limit; mod logical_multi_join; +mod logical_now; mod logical_over_agg; mod logical_project; mod logical_project_set; @@ -489,6 +514,7 @@ mod stream_sink; mod stream_source; mod stream_table_scan; mod stream_topn; +mod stream_watermark_filter; mod stream_share; mod stream_union; @@ -528,6 +554,7 @@ pub use logical_insert::LogicalInsert; pub use logical_join::LogicalJoin; pub use logical_limit::LogicalLimit; pub use logical_multi_join::{LogicalMultiJoin, LogicalMultiJoinBuilder}; +pub use logical_now::LogicalNow; pub use logical_over_agg::{LogicalOverAgg, PlanWindowFunction}; pub use logical_project::LogicalProject; pub use logical_project_set::LogicalProjectSet; @@ -563,8 +590,9 @@ pub use stream_source::StreamSource; pub use stream_table_scan::StreamTableScan; pub use stream_topn::StreamTopN; pub use stream_union::StreamUnion; +pub use stream_watermark_filter::StreamWatermarkFilter; -use crate::expr::{ExprImpl, InputRef, Literal}; +use crate::expr::{ExprImpl, ExprRewriter, InputRef, Literal}; use crate::optimizer::optimizer_context::OptimizerContextRef; use crate::stream_fragmenter::BuildFragmentGraphState; use crate::utils::{ColIndexMapping, Condition}; @@ -606,6 +634,7 @@ macro_rules! for_all_plan_nodes { , { Logical, Union } , { Logical, OverAgg } , { Logical, Share } + , { Logical, Now } // , { Logical, Sort } we don't need a LogicalSort, just require the Order , { Batch, SimpleAgg } , { Batch, HashAgg } @@ -655,6 +684,7 @@ macro_rules! for_all_plan_nodes { , { Stream, Dml } , { Stream, Now } , { Stream, Share } + , { Stream, WatermarkFilter } } }; } @@ -685,6 +715,7 @@ macro_rules! for_logical_plan_nodes { , { Logical, Union } , { Logical, OverAgg } , { Logical, Share } + , { Logical, Now } // , { Logical, Sort} not sure if we will support Order by clause in subquery/view/MV // if we don't support that, we don't need LogicalSort, just require the Order at the top of query } @@ -753,6 +784,7 @@ macro_rules! for_stream_plan_nodes { , { Stream, Dml } , { Stream, Now } , { Stream, Share } + , { Stream, WatermarkFilter } } }; } diff --git a/src/frontend/src/optimizer/plan_node/plan_base.rs b/src/frontend/src/optimizer/plan_node/plan_base.rs index 6bd99e9892f6b..725c8f214cfdf 100644 --- a/src/frontend/src/optimizer/plan_node/plan_base.rs +++ b/src/frontend/src/optimizer/plan_node/plan_base.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -76,7 +76,7 @@ impl PlanBase { functional_dependency: FunctionalDependencySet, ) -> Self { let id = ctx.next_plan_node_id(); - let watermark_cols = FixedBitSet::with_capacity(schema.len()); + let watermark_columns = FixedBitSet::with_capacity(schema.len()); Self { id, ctx, @@ -87,7 +87,7 @@ impl PlanBase { // Logical plan node won't touch `append_only` field append_only: true, functional_dependency, - watermark_columns: watermark_cols, + watermark_columns, } } @@ -123,7 +123,7 @@ impl PlanBase { ) -> Self { let id = ctx.next_plan_node_id(); let functional_dependency = FunctionalDependencySet::new(schema.len()); - let watermark_cols = FixedBitSet::with_capacity(schema.len()); + let watermark_columns = FixedBitSet::with_capacity(schema.len()); Self { id, ctx, @@ -134,7 +134,7 @@ impl PlanBase { // Batch plan node won't touch `append_only` field append_only: true, functional_dependency, - watermark_columns: watermark_cols, + watermark_columns, } } @@ -149,6 +149,12 @@ impl PlanBase { plan_node.watermark_columns().clone(), ) } + + pub fn clone_with_new_plan_id(&self) -> Self { + let mut new = self.clone(); + new.id = self.ctx.next_plan_node_id(); + new + } } macro_rules! impl_base_delegate { diff --git a/src/frontend/src/optimizer/plan_node/plan_tree_node.rs b/src/frontend/src/optimizer/plan_node/plan_tree_node.rs index 5ea69dfdbfbf0..e1435a6b7b20d 100644 --- a/src/frontend/src/optimizer/plan_node/plan_tree_node.rs +++ b/src/frontend/src/optimizer/plan_node/plan_tree_node.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_node/plan_tree_node_v2.rs b/src/frontend/src/optimizer/plan_node/plan_tree_node_v2.rs index 6f2bf156e309e..e598c7dd61caa 100644 --- a/src/frontend/src/optimizer/plan_node/plan_tree_node_v2.rs +++ b/src/frontend/src/optimizer/plan_node/plan_tree_node_v2.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_node/predicate_pushdown.rs b/src/frontend/src/optimizer/plan_node/predicate_pushdown.rs index f799481e4ab74..75f3f2a745036 100644 --- a/src/frontend/src/optimizer/plan_node/predicate_pushdown.rs +++ b/src/frontend/src/optimizer/plan_node/predicate_pushdown.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_node/stream.rs b/src/frontend/src/optimizer/plan_node/stream.rs index 963e59726f1b5..afa2bd78607ed 100644 --- a/src/frontend/src/optimizer/plan_node/stream.rs +++ b/src/frontend/src/optimizer/plan_node/stream.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ use pb::stream_node as pb_node; use risingwave_common::catalog::{ColumnDesc, Field, Schema}; use risingwave_common::types::DataType; use risingwave_common::util::sort_util::OrderType; +use risingwave_connector::sink::catalog::desc::SinkDesc; use risingwave_pb::catalog::ColumnIndex; use risingwave_pb::stream_plan as pb; use smallvec::SmallVec; @@ -220,7 +221,7 @@ impl HashJoin { pub fn infer_internal_and_degree_table_catalog( input: &impl StreamPlanRef, join_key_indices: Vec, - ) -> (TableCatalog, TableCatalog) { + ) -> (TableCatalog, TableCatalog, Vec) { let schema = input.schema(); let internal_table_dist_keys = input.distribution().dist_column_indices().to_vec(); @@ -242,8 +243,17 @@ impl HashJoin { let join_key_len = join_key_indices.len(); let mut pk_indices = join_key_indices; - // TODO(yuhao): dedup the dist key and pk. - pk_indices.extend(input.logical_pk()); + // dedup the pk in dist key.. + let mut deduped_input_pk_indices = vec![]; + for input_pk_idx in input.logical_pk() { + if !pk_indices.contains(input_pk_idx) + && !deduped_input_pk_indices.contains(input_pk_idx) + { + deduped_input_pk_indices.push(*input_pk_idx); + } + } + + pk_indices.extend(deduped_input_pk_indices.clone()); // Build internal table let mut internal_table_catalog_builder = @@ -276,6 +286,7 @@ impl HashJoin { ( internal_table_catalog_builder.build(internal_table_dist_keys), degree_table_catalog_builder.build(degree_table_dist_keys), + deduped_input_pk_indices, ) } } @@ -335,7 +346,7 @@ impl_plan_tree_node_v2_for_stream_unary_node_with_core_delegating!(Project, core #[derive(Debug, Clone)] pub struct Sink { pub input: PlanRef, - pub sink_desc: TableCatalog, + pub sink_desc: SinkDesc, } impl_plan_tree_node_v2_for_stream_unary_node!(Sink, input); /// [`Source`] represents a table/connector source at the very beginning of the graph. @@ -483,6 +494,7 @@ pub fn to_stream_prost_body( .iter() .map(ColumnDesc::to_protobuf) .collect(), + table_desc: Some(left_table_desc.to_protobuf()), }), right_info: Some(ArrangementInfo { arrange_key_orders: right_table_desc.arrange_key_orders_prost(), @@ -492,6 +504,7 @@ pub fn to_stream_prost_body( .iter() .map(ColumnDesc::to_protobuf) .collect(), + table_desc: Some(right_table_desc.to_protobuf()), }), output_indices: me.core.output_indices.iter().map(|&x| x as u32).collect(), }) @@ -525,6 +538,7 @@ pub fn to_stream_prost_body( let me = &me.core; let result_table = me.infer_result_table(base, None); let agg_states = me.infer_stream_agg_state(base, None); + let distinct_dedup_tables = me.infer_distinct_dedup_tables(base, None); ProstNode::GlobalSimpleAgg(SimpleAggNode { agg_calls: me @@ -548,6 +562,10 @@ pub fn to_stream_prost_body( .with_id(state.gen_table_id_wrapped()) .to_internal_table_prost(), ), + distinct_dedup_tables: distinct_dedup_tables + .into_iter() + .map(|(key_idx, table)| (key_idx as u32, table.to_internal_table_prost())) + .collect(), }) } Node::GroupTopN(me) => { @@ -569,6 +587,7 @@ pub fn to_stream_prost_body( Node::HashAgg(me) => { let result_table = me.core.infer_result_table(base, me.vnode_col_idx); let agg_states = me.core.infer_stream_agg_state(base, me.vnode_col_idx); + let distinct_dedup_tables = me.core.infer_distinct_dedup_tables(base, me.vnode_col_idx); ProstNode::HashAgg(HashAggNode { group_key: me.core.group_key.iter().map(|&idx| idx as u32).collect(), @@ -589,6 +608,10 @@ pub fn to_stream_prost_body( .with_id(state.gen_table_id_wrapped()) .to_internal_table_prost(), ), + distinct_dedup_tables: distinct_dedup_tables + .into_iter() + .map(|(key_idx, table)| (key_idx as u32, table.to_internal_table_prost())) + .collect(), }) } Node::HashJoin(me) => { @@ -597,16 +620,27 @@ pub fn to_stream_prost_body( let left_key_indices_prost = left_key_indices.iter().map(|&idx| idx as i32).collect(); let right_key_indices_prost = right_key_indices.iter().map(|&idx| idx as i32).collect(); - let (left_table, left_degree_table) = HashJoin::infer_internal_and_degree_table_catalog( - &me.core.left.0, - left_key_indices, - ); - let (right_table, right_degree_table) = + let (left_table, left_degree_table, left_deduped_input_pk_indices) = + HashJoin::infer_internal_and_degree_table_catalog( + &me.core.left.0, + left_key_indices, + ); + let (right_table, right_degree_table, right_deduped_input_pk_indices) = HashJoin::infer_internal_and_degree_table_catalog( &me.core.right.0, right_key_indices, ); + let left_deduped_input_pk_indices = left_deduped_input_pk_indices + .iter() + .map(|idx| *idx as u32) + .collect(); + + let right_deduped_input_pk_indices = right_deduped_input_pk_indices + .iter() + .map(|idx| *idx as u32) + .collect(); + let (left_table, left_degree_table) = ( left_table.with_id(state.gen_table_id_wrapped()), left_degree_table.with_id(state.gen_table_id_wrapped()), @@ -632,6 +666,8 @@ pub fn to_stream_prost_body( right_table: Some(right_table.to_internal_table_prost()), left_degree_table: Some(left_degree_table.to_internal_table_prost()), right_degree_table: Some(right_degree_table.to_internal_table_prost()), + left_deduped_input_pk_indices, + right_deduped_input_pk_indices, output_indices: me.core.output_indices.iter().map(|&x| x as u32).collect(), is_append_only: me.is_append_only, }) @@ -662,6 +698,7 @@ pub fn to_stream_prost_body( agg_call_states: vec![], result_table: None, is_append_only: me.input.0.append_only, + distinct_dedup_tables: Default::default(), }) } Node::Materialize(me) => { @@ -706,15 +743,7 @@ pub fn to_stream_prost_body( .collect(), }), Node::Sink(me) => ProstNode::Sink(SinkNode { - table_id: me.sink_desc.id().into(), - properties: me.sink_desc.properties.inner().clone(), - fields: me - .sink_desc - .columns() - .iter() - .map(|c| Field::from(c.column_desc.clone()).to_prost()) - .collect(), - sink_pk: me.sink_desc.pk().iter().map(|c| c.index as u32).collect(), + sink_desc: Some(me.sink_desc.to_proto()), }), Node::Source(me) => { let me = &me.core.catalog; diff --git a/src/frontend/src/optimizer/plan_node/stream_delta_join.rs b/src/frontend/src/optimizer/plan_node/stream_delta_join.rs index 907dd66baed60..272e6115ddfa1 100644 --- a/src/frontend/src/optimizer/plan_node/stream_delta_join.rs +++ b/src/frontend/src/optimizer/plan_node/stream_delta_join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,10 +21,12 @@ use risingwave_pb::stream_plan::stream_node::NodeBody; use risingwave_pb::stream_plan::{ArrangementInfo, DeltaIndexJoinNode}; use super::generic::GenericPlanRef; -use super::{LogicalJoin, PlanBase, PlanRef, PlanTreeNodeBinary, StreamHashJoin, StreamNode}; -use crate::expr::Expr; +use super::{ExprRewritable, LogicalJoin, PlanBase, PlanRef, PlanTreeNodeBinary, StreamNode}; +use crate::expr::{Expr, ExprRewriter}; +use crate::optimizer::plan_node::stream::StreamPlanRef; use crate::optimizer::plan_node::utils::IndicesDisplay; use crate::optimizer::plan_node::{EqJoinPredicate, EqJoinPredicateDisplay}; +use crate::optimizer::property::Distribution; use crate::stream_fragmenter::BuildFragmentGraphState; /// [`StreamDeltaJoin`] implements [`super::LogicalJoin`] with delta join. It requires its two @@ -50,11 +52,9 @@ impl StreamDeltaJoin { if eq_join_predicate.has_non_eq() { todo!("non-eq condition not supported for delta join"); } - let dist = StreamHashJoin::derive_dist( - logical.left().distribution(), - logical.right().distribution(), - &logical, - ); + + // FIXME: delta join could have arbitrary distribution. + let dist = Distribution::SomeShard; let watermark_columns = { let from_left = logical @@ -154,10 +154,23 @@ impl StreamNode for StreamDeltaJoin { fn to_stream_prost_body(&self, _state: &mut BuildFragmentGraphState) -> NodeBody { let left = self.left(); let right = self.right(); - let left_table = left.as_stream_index_scan().unwrap(); - let right_table = right.as_stream_index_scan().unwrap(); - let left_table_desc = left_table.logical().table_desc(); - let right_table_desc = right_table.logical().table_desc(); + + let left_table = if let Some(stream_index_scan) = left.as_stream_index_scan() { + stream_index_scan.logical() + } else if let Some(stream_table_scan) = left.as_stream_table_scan() { + stream_table_scan.logical() + } else { + unreachable!(); + }; + let left_table_desc = left_table.table_desc(); + let right_table = if let Some(stream_index_scan) = right.as_stream_index_scan() { + stream_index_scan.logical() + } else if let Some(stream_table_scan) = right.as_stream_table_scan() { + stream_table_scan.logical() + } else { + unreachable!(); + }; + let right_table_desc = right_table.table_desc(); // TODO: add a separate delta join node in proto, or move fragmenter to frontend so that we // don't need an intermediate representation. @@ -188,22 +201,26 @@ impl StreamNode for StreamDeltaJoin { left_table_id: left_table_desc.table_id.table_id(), right_table_id: right_table_desc.table_id.table_id(), left_info: Some(ArrangementInfo { + // TODO: remove it arrange_key_orders: left_table_desc.arrange_key_orders_prost(), + // TODO: remove it column_descs: left_table - .logical() .column_descs() .iter() .map(ColumnDesc::to_protobuf) .collect(), + table_desc: Some(left_table_desc.to_protobuf()), }), right_info: Some(ArrangementInfo { + // TODO: remove it arrange_key_orders: right_table_desc.arrange_key_orders_prost(), + // TODO: remove it column_descs: right_table - .logical() .column_descs() .iter() .map(ColumnDesc::to_protobuf) .collect(), + table_desc: Some(right_table_desc.to_protobuf()), }), output_indices: self .logical @@ -214,3 +231,21 @@ impl StreamNode for StreamDeltaJoin { }) } } + +impl ExprRewritable for StreamDeltaJoin { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_join() + .unwrap() + .clone(), + self.eq_join_predicate.rewrite_exprs(r), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/stream_derive.rs b/src/frontend/src/optimizer/plan_node/stream_derive.rs index c00fc72507552..387ede5f1b7dd 100644 --- a/src/frontend/src/optimizer/plan_node/stream_derive.rs +++ b/src/frontend/src/optimizer/plan_node/stream_derive.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_node/stream_dml.rs b/src/frontend/src/optimizer/plan_node/stream_dml.rs index b1d99ff4a3d9e..2c03e713145f4 100644 --- a/src/frontend/src/optimizer/plan_node/stream_dml.rs +++ b/src/frontend/src/optimizer/plan_node/stream_dml.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,10 +15,10 @@ use std::fmt; use fixedbitset::FixedBitSet; -use risingwave_common::catalog::ColumnDesc; +use risingwave_common::catalog::{ColumnDesc, INITIAL_TABLE_VERSION_ID}; use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; -use super::{PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; +use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; use crate::stream_fragmenter::BuildFragmentGraphState; #[derive(Clone, Debug)] @@ -37,7 +37,7 @@ impl StreamDml { input.functional_dependency().clone(), input.distribution().clone(), append_only, - FixedBitSet::with_capacity(input.schema().len()), + FixedBitSet::with_capacity(input.schema().len()), // no watermark if dml is allowed ); Self { @@ -84,7 +84,10 @@ impl StreamNode for StreamDml { ProstStreamNode::Dml(DmlNode { // Meta will fill this table id. table_id: 0, + table_version_id: INITIAL_TABLE_VERSION_ID, // TODO: use correct table version id column_descs: self.column_descs.iter().map(Into::into).collect(), }) } } + +impl ExprRewritable for StreamDml {} diff --git a/src/frontend/src/optimizer/plan_node/stream_dynamic_filter.rs b/src/frontend/src/optimizer/plan_node/stream_dynamic_filter.rs index 09644694e22d0..186e699087b7e 100644 --- a/src/frontend/src/optimizer/plan_node/stream_dynamic_filter.rs +++ b/src/frontend/src/optimizer/plan_node/stream_dynamic_filter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,8 +21,8 @@ pub use risingwave_pb::expr::expr_node::Type as ExprType; use risingwave_pb::stream_plan::stream_node::NodeBody; use risingwave_pb::stream_plan::DynamicFilterNode; -use super::generic; use super::utils::IndicesDisplay; +use super::{generic, ExprRewritable}; use crate::expr::Expr; use crate::optimizer::plan_node::generic::GenericPlanRef; use crate::optimizer::plan_node::{PlanBase, PlanTreeNodeBinary, StreamNode}; @@ -40,17 +40,17 @@ impl StreamDynamicFilter { pub fn new(left_index: usize, comparator: ExprType, left: PlanRef, right: PlanRef) -> Self { assert_eq!(right.schema().len(), 1); - let watermark_cols = { - let mut watermark_cols = FixedBitSet::with_capacity(left.schema().len()); + let watermark_columns = { + let mut watermark_columns = FixedBitSet::with_capacity(left.schema().len()); if right.watermark_columns()[0] { match comparator { ExprType::GreaterThan | ExprType::GreaterThanOrEqual => { - watermark_cols.set(left_index, true) + watermark_columns.set(left_index, true) } _ => {} } } - watermark_cols + watermark_columns }; // TODO: derive from input @@ -62,8 +62,7 @@ impl StreamDynamicFilter { left.distribution().clone(), false, /* we can have a new abstraction for append only and monotonically increasing * in the future */ - // TODO: https://github.com/risingwavelabs/risingwave/issues/7205 - watermark_cols, + watermark_columns, ); let core = generic::DynamicFilter { comparator, @@ -77,10 +76,6 @@ impl StreamDynamicFilter { pub fn left_index(&self) -> usize { self.core.left_index } - - pub fn comparator(&self) -> &ExprType { - &self.core.comparator - } } impl fmt::Display for StreamDynamicFilter { @@ -136,12 +131,11 @@ impl_plan_tree_node_for_binary! { StreamDynamicFilter } impl StreamNode for StreamDynamicFilter { fn to_stream_prost_body(&self, state: &mut BuildFragmentGraphState) -> NodeBody { use generic::dynamic_filter::*; - let condition = self.core.predicate().as_expr_unless_true().map(|x| { - self.base - .ctx() - .expr_with_session_timezone(x) - .to_expr_proto() - }); + let condition = self + .core + .predicate() + .as_expr_unless_true() + .map(|x| x.to_expr_proto()); let left_index = self.core.left_index; let left_table = infer_left_internal_table_catalog(&self.base, left_index) .with_id(state.gen_table_id_wrapped()); @@ -156,3 +150,5 @@ impl StreamNode for StreamDynamicFilter { }) } } + +impl ExprRewritable for StreamDynamicFilter {} diff --git a/src/frontend/src/optimizer/plan_node/stream_exchange.rs b/src/frontend/src/optimizer/plan_node/stream_exchange.rs index 3f40f5dc68597..e8f5d36cf53ed 100644 --- a/src/frontend/src/optimizer/plan_node/stream_exchange.rs +++ b/src/frontend/src/optimizer/plan_node/stream_exchange.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ use std::fmt; use risingwave_pb::stream_plan::stream_node::NodeBody; use risingwave_pb::stream_plan::{DispatchStrategy, DispatcherType, ExchangeNode}; -use super::{PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; +use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; use crate::optimizer::property::{Distribution, DistributionDisplay}; use crate::stream_fragmenter::BuildFragmentGraphState; @@ -91,3 +91,5 @@ impl StreamNode for StreamExchange { }) } } + +impl ExprRewritable for StreamExchange {} diff --git a/src/frontend/src/optimizer/plan_node/stream_expand.rs b/src/frontend/src/optimizer/plan_node/stream_expand.rs index 5baed436205ce..c4193ea15b623 100644 --- a/src/frontend/src/optimizer/plan_node/stream_expand.rs +++ b/src/frontend/src/optimizer/plan_node/stream_expand.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ use risingwave_pb::stream_plan::expand_node::Subset; use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; use risingwave_pb::stream_plan::ExpandNode; -use super::{LogicalExpand, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; +use super::{ExprRewritable, LogicalExpand, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; use crate::optimizer::property::Distribution; use crate::stream_fragmenter::BuildFragmentGraphState; @@ -31,7 +31,10 @@ pub struct StreamExpand { impl StreamExpand { pub fn new(logical: LogicalExpand) -> Self { - let dist = match logical.input().distribution() { + let input = logical.input(); + let schema = logical.schema().clone(); + + let dist = match input.distribution() { Distribution::Single => Distribution::Single, Distribution::SomeShard | Distribution::HashShard(_) @@ -39,15 +42,22 @@ impl StreamExpand { Distribution::Broadcast => unreachable!(), }; + let mut watermark_columns = FixedBitSet::with_capacity(schema.len()); + watermark_columns.extend( + input + .watermark_columns() + .ones() + .map(|idx| idx + input.schema().len()), + ); + let base = PlanBase::new_stream( logical.base.ctx.clone(), - logical.schema().clone(), + schema, logical.base.logical_pk.to_vec(), logical.functional_dependency().clone(), dist, - logical.input().append_only(), - // TODO: https://github.com/risingwavelabs/risingwave/issues/7205 - FixedBitSet::with_capacity(logical.schema().len()), + input.append_only(), + watermark_columns, ); StreamExpand { base, logical } } @@ -91,3 +101,5 @@ fn subset_to_protobuf(subset: &[usize]) -> Subset { let column_indices = subset.iter().map(|key| *key as u32).collect(); Subset { column_indices } } + +impl ExprRewritable for StreamExpand {} diff --git a/src/frontend/src/optimizer/plan_node/stream_filter.rs b/src/frontend/src/optimizer/plan_node/stream_filter.rs index 8db56048bb84d..6599c69344576 100644 --- a/src/frontend/src/optimizer/plan_node/stream_filter.rs +++ b/src/frontend/src/optimizer/plan_node/stream_filter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; use risingwave_pb::stream_plan::FilterNode; use super::generic::GenericPlanRef; -use super::{LogicalFilter, PlanRef, PlanTreeNodeUnary, StreamNode}; -use crate::expr::{Expr, ExprImpl}; +use super::{ExprRewritable, LogicalFilter, PlanRef, PlanTreeNodeUnary, StreamNode}; +use crate::expr::{Expr, ExprImpl, ExprRewriter}; use crate::optimizer::plan_node::PlanBase; use crate::stream_fragmenter::BuildFragmentGraphState; use crate::utils::Condition; @@ -85,3 +85,20 @@ impl StreamNode for StreamFilter { }) } } + +impl ExprRewritable for StreamFilter { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_filter() + .unwrap() + .clone(), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/stream_global_simple_agg.rs b/src/frontend/src/optimizer/plan_node/stream_global_simple_agg.rs index 687c684b4078f..8846868b0ebc6 100644 --- a/src/frontend/src/optimizer/plan_node/stream_global_simple_agg.rs +++ b/src/frontend/src/optimizer/plan_node/stream_global_simple_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,7 +18,8 @@ use fixedbitset::FixedBitSet; use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; use super::generic::PlanAggCall; -use super::{LogicalAgg, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; +use super::{ExprRewritable, LogicalAgg, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; +use crate::expr::ExprRewriter; use crate::optimizer::plan_node::generic::GenericPlanRef; use crate::optimizer::property::Distribution; use crate::stream_fragmenter::BuildFragmentGraphState; @@ -33,6 +34,7 @@ impl StreamGlobalSimpleAgg { pub fn new(logical: LogicalAgg) -> Self { let ctx = logical.base.ctx.clone(); let pk_indices = logical.base.logical_pk.to_vec(); + let schema = logical.schema().clone(); let input = logical.input(); let input_dist = input.distribution(); let dist = match input_dist { @@ -40,16 +42,19 @@ impl StreamGlobalSimpleAgg { _ => panic!(), }; + // Empty because watermark column(s) must be in group key and global simple agg have no + // group key. + let watermark_columns = FixedBitSet::with_capacity(schema.len()); + // Simple agg executor might change the append-only behavior of the stream. let base = PlanBase::new_stream( ctx, - logical.schema().clone(), + schema, pk_indices, logical.functional_dependency().clone(), dist, false, - // TODO: https://github.com/risingwavelabs/risingwave/issues/7205 - FixedBitSet::with_capacity(logical.schema().len()), + watermark_columns, ); StreamGlobalSimpleAgg { base, logical } } @@ -88,6 +93,7 @@ impl StreamNode for StreamGlobalSimpleAgg { use risingwave_pb::stream_plan::*; let result_table = self.logical.infer_result_table(None); let agg_states = self.logical.infer_stream_agg_state(None); + let distinct_dedup_tables = self.logical.infer_distinct_dedup_tables(None); ProstStreamNode::GlobalSimpleAgg(SimpleAggNode { agg_calls: self @@ -112,6 +118,34 @@ impl StreamNode for StreamGlobalSimpleAgg { .with_id(state.gen_table_id_wrapped()) .to_internal_table_prost(), ), + distinct_dedup_tables: distinct_dedup_tables + .into_iter() + .map(|(key_idx, table)| { + ( + key_idx as u32, + table + .with_id(state.gen_table_id_wrapped()) + .to_internal_table_prost(), + ) + }) + .collect(), }) } } + +impl ExprRewritable for StreamGlobalSimpleAgg { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_agg() + .unwrap() + .clone(), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/stream_group_topn.rs b/src/frontend/src/optimizer/plan_node/stream_group_topn.rs index 0f2e313d12492..f043eefd5b46a 100644 --- a/src/frontend/src/optimizer/plan_node/stream_group_topn.rs +++ b/src/frontend/src/optimizer/plan_node/stream_group_topn.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ use std::fmt; use fixedbitset::FixedBitSet; use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; -use super::{LogicalTopN, PlanBase, PlanTreeNodeUnary, StreamNode}; +use super::{ExprRewritable, LogicalTopN, PlanBase, PlanTreeNodeUnary, StreamNode}; use crate::optimizer::property::{Order, OrderDisplay}; use crate::stream_fragmenter::BuildFragmentGraphState; use crate::PlanRef; @@ -36,15 +36,28 @@ impl StreamGroupTopN { assert!(!logical.group_key().is_empty()); assert!(logical.limit() > 0); let input = logical.input(); + let schema = input.schema().clone(); + + let watermark_columns = if input.append_only() { + input.watermark_columns().clone() + } else { + let mut watermark_columns = FixedBitSet::with_capacity(schema.len()); + for &idx in logical.group_key() { + if input.watermark_columns().contains(idx) { + watermark_columns.insert(idx); + } + } + watermark_columns + }; + let base = PlanBase::new_stream( input.ctx(), - input.schema().clone(), + schema, input.logical_pk().to_vec(), input.functional_dependency().clone(), input.distribution().clone(), false, - // TODO: https://github.com/risingwavelabs/risingwave/issues/7205 - FixedBitSet::with_capacity(logical.schema().len()), + watermark_columns, ); StreamGroupTopN { base, @@ -81,6 +94,7 @@ impl StreamNode for StreamGroupTopN { .logical .infer_internal_table_catalog(self.vnode_col_idx) .with_id(state.gen_table_id_wrapped()); + assert!(!self.group_key().is_empty()); let group_topn_node = GroupTopNNode { limit: self.limit(), offset: self.offset(), @@ -89,14 +103,21 @@ impl StreamNode for StreamGroupTopN { table: Some(table.to_internal_table_prost()), order_by: self.topn_order().to_protobuf(), }; - - ProstStreamNode::GroupTopN(group_topn_node) + if self.input().append_only() { + ProstStreamNode::AppendOnlyGroupTopN(group_topn_node) + } else { + ProstStreamNode::GroupTopN(group_topn_node) + } } } impl fmt::Display for StreamGroupTopN { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut builder = f.debug_struct("StreamGroupTopN"); + let mut builder = f.debug_struct(if self.input().append_only() { + "StreamAppendOnlyGroupTopN" + } else { + "StreamGroupTopN" + }); let input = self.input(); let input_schema = input.schema(); builder.field( @@ -131,3 +152,5 @@ impl PlanTreeNodeUnary for StreamGroupTopN { Self::new(self.logical.clone_with_input(input), self.vnode_col_idx) } } + +impl ExprRewritable for StreamGroupTopN {} diff --git a/src/frontend/src/optimizer/plan_node/stream_hash_agg.rs b/src/frontend/src/optimizer/plan_node/stream_hash_agg.rs index f959ed427c4ef..b8184bb627428 100644 --- a/src/frontend/src/optimizer/plan_node/stream_hash_agg.rs +++ b/src/frontend/src/optimizer/plan_node/stream_hash_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,7 +18,8 @@ use fixedbitset::FixedBitSet; use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; use super::generic::PlanAggCall; -use super::{LogicalAgg, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; +use super::{ExprRewritable, LogicalAgg, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; +use crate::expr::ExprRewriter; use crate::optimizer::plan_node::generic::GenericPlanRef; use crate::optimizer::property::Distribution; use crate::stream_fragmenter::BuildFragmentGraphState; @@ -36,6 +37,7 @@ impl StreamHashAgg { pub fn new(logical: LogicalAgg, vnode_col_idx: Option) -> Self { let ctx = logical.base.ctx.clone(); let pk_indices = logical.base.logical_pk.to_vec(); + let schema = logical.schema().clone(); let input = logical.input(); let input_dist = input.distribution(); let dist = match input_dist { @@ -44,16 +46,24 @@ impl StreamHashAgg { .rewrite_provided_distribution(input_dist), d => d.clone(), }; + + let mut watermark_columns = FixedBitSet::with_capacity(schema.len()); + // Watermark column(s) must be in group key. + for (idx, input_idx) in logical.group_key().iter().enumerate() { + if input.watermark_columns().contains(*input_idx) { + watermark_columns.insert(idx); + } + } + // Hash agg executor might change the append-only behavior of the stream. let base = PlanBase::new_stream( ctx, - logical.schema().clone(), + schema, pk_indices, logical.functional_dependency().clone(), dist, false, - // TODO: https://github.com/risingwavelabs/risingwave/issues/7205 - FixedBitSet::with_capacity(logical.schema().len()), + watermark_columns, ); StreamHashAgg { base, @@ -97,6 +107,7 @@ impl StreamNode for StreamHashAgg { use risingwave_pb::stream_plan::*; let result_table = self.logical.infer_result_table(self.vnode_col_idx); let agg_states = self.logical.infer_stream_agg_state(self.vnode_col_idx); + let distinct_dedup_tables = self.logical.infer_distinct_dedup_tables(self.vnode_col_idx); ProstStreamNode::HashAgg(HashAggNode { group_key: self.group_key().iter().map(|idx| *idx as u32).collect(), @@ -116,6 +127,35 @@ impl StreamNode for StreamHashAgg { .with_id(state.gen_table_id_wrapped()) .to_internal_table_prost(), ), + distinct_dedup_tables: distinct_dedup_tables + .into_iter() + .map(|(key_idx, table)| { + ( + key_idx as u32, + table + .with_id(state.gen_table_id_wrapped()) + .to_internal_table_prost(), + ) + }) + .collect(), }) } } + +impl ExprRewritable for StreamHashAgg { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_agg() + .unwrap() + .clone(), + self.vnode_col_idx, + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/stream_hash_join.rs b/src/frontend/src/optimizer/plan_node/stream_hash_join.rs index 4c3734576356a..3c6e9b72d80d8 100644 --- a/src/frontend/src/optimizer/plan_node/stream_hash_join.rs +++ b/src/frontend/src/optimizer/plan_node/stream_hash_join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,8 +21,10 @@ use risingwave_pb::plan_common::JoinType; use risingwave_pb::stream_plan::stream_node::NodeBody; use risingwave_pb::stream_plan::HashJoinNode; -use super::{LogicalJoin, PlanBase, PlanRef, PlanTreeNodeBinary, StreamDeltaJoin, StreamNode}; -use crate::expr::Expr; +use super::{ + ExprRewritable, LogicalJoin, PlanBase, PlanRef, PlanTreeNodeBinary, StreamDeltaJoin, StreamNode, +}; +use crate::expr::{Expr, ExprRewriter}; use crate::optimizer::plan_node::generic::GenericPlanRef; use crate::optimizer::plan_node::utils::IndicesDisplay; use crate::optimizer::plan_node::{EqJoinPredicate, EqJoinPredicateDisplay}; @@ -220,14 +222,26 @@ impl StreamNode for StreamHashJoin { .collect_vec(); use super::stream::HashJoin; - let (left_table, left_degree_table) = HashJoin::infer_internal_and_degree_table_catalog( - self.left().plan_base(), - left_key_indices, - ); - let (right_table, right_degree_table) = HashJoin::infer_internal_and_degree_table_catalog( - self.right().plan_base(), - right_key_indices, - ); + let (left_table, left_degree_table, left_deduped_input_pk_indices) = + HashJoin::infer_internal_and_degree_table_catalog( + self.left().plan_base(), + left_key_indices, + ); + let (right_table, right_degree_table, right_deduped_input_pk_indices) = + HashJoin::infer_internal_and_degree_table_catalog( + self.right().plan_base(), + right_key_indices, + ); + + let left_deduped_input_pk_indices = left_deduped_input_pk_indices + .iter() + .map(|idx| *idx as u32) + .collect_vec(); + + let right_deduped_input_pk_indices = right_deduped_input_pk_indices + .iter() + .map(|idx| *idx as u32) + .collect_vec(); let (left_table, left_degree_table) = ( left_table.with_id(state.gen_table_id_wrapped()), @@ -259,6 +273,8 @@ impl StreamNode for StreamHashJoin { right_table: Some(right_table.to_internal_table_prost()), left_degree_table: Some(left_degree_table.to_internal_table_prost()), right_degree_table: Some(right_degree_table.to_internal_table_prost()), + left_deduped_input_pk_indices, + right_deduped_input_pk_indices, output_indices: self .logical .output_indices() @@ -269,3 +285,21 @@ impl StreamNode for StreamHashJoin { }) } } + +impl ExprRewritable for StreamHashJoin { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_join() + .unwrap() + .clone(), + self.eq_join_predicate.rewrite_exprs(r), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/stream_hop_window.rs b/src/frontend/src/optimizer/plan_node/stream_hop_window.rs index 8f987e25dc12e..afe318cb66a77 100644 --- a/src/frontend/src/optimizer/plan_node/stream_hop_window.rs +++ b/src/frontend/src/optimizer/plan_node/stream_hop_window.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,11 +14,10 @@ use std::fmt; -use fixedbitset::FixedBitSet; use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; use risingwave_pb::stream_plan::HopWindowNode; -use super::{LogicalHopWindow, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; +use super::{ExprRewritable, LogicalHopWindow, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; use crate::stream_fragmenter::BuildFragmentGraphState; /// [`StreamHopWindow`] represents a hop window table function. @@ -33,19 +32,26 @@ impl StreamHopWindow { let ctx = logical.base.ctx.clone(); let pk_indices = logical.base.logical_pk.to_vec(); let input = logical.input(); + let schema = logical.schema().clone(); let i2o = logical.i2o_col_mapping(); let dist = i2o.rewrite_provided_distribution(input.distribution()); + let mut watermark_columns = i2o.rewrite_bitset(input.watermark_columns()); + if watermark_columns.contains(logical.core.time_col.index) { + // Watermark on `time_col` indicates watermark on both `window_start` and `window_end`. + watermark_columns.insert(schema.len() - 2); // window_start + watermark_columns.insert(schema.len() - 1); // window_end + } + let base = PlanBase::new_stream( ctx, - logical.schema().clone(), + schema, pk_indices, logical.functional_dependency().clone(), dist, logical.input().append_only(), - // TODO: https://github.com/risingwavelabs/risingwave/issues/7205 - FixedBitSet::with_capacity(logical.schema().len()), + watermark_columns, ); Self { base, logical } } @@ -85,3 +91,5 @@ impl StreamNode for StreamHopWindow { }) } } + +impl ExprRewritable for StreamHopWindow {} diff --git a/src/frontend/src/optimizer/plan_node/stream_index_scan.rs b/src/frontend/src/optimizer/plan_node/stream_index_scan.rs index e67bf2bd1bc4b..10d725f0a8b9c 100644 --- a/src/frontend/src/optimizer/plan_node/stream_index_scan.rs +++ b/src/frontend/src/optimizer/plan_node/stream_index_scan.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,13 +14,13 @@ use std::fmt; -use fixedbitset::FixedBitSet; use itertools::Itertools; use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; -use risingwave_pb::stream_plan::StreamNode as ProstStreamPlan; +use risingwave_pb::stream_plan::{ChainType, StreamNode as ProstStreamPlan}; -use super::{LogicalScan, PlanBase, PlanNodeId, StreamNode}; +use super::{ExprRewritable, LogicalScan, PlanBase, PlanNodeId, PlanRef, StreamNode}; use crate::catalog::ColumnId; +use crate::expr::ExprRewriter; use crate::optimizer::plan_node::utils::IndicesDisplay; use crate::optimizer::property::{Distribution, DistributionDisplay}; use crate::stream_fragmenter::BuildFragmentGraphState; @@ -34,12 +34,25 @@ pub struct StreamIndexScan { pub base: PlanBase, logical: LogicalScan, batch_plan_id: PlanNodeId, + chain_type: ChainType, } impl StreamIndexScan { - pub fn new(logical: LogicalScan) -> Self { + pub fn new(logical: LogicalScan, chain_type: ChainType) -> Self { let ctx = logical.base.ctx.clone(); + let distribution = { + let distribution_key = logical + .distribution_key() + .expect("distribution key of stream chain must exist in output columns"); + if distribution_key.is_empty() { + Distribution::Single + } else { + // See also `BatchSeqScan::clone_with_dist`. + Distribution::UpstreamHashShard(distribution_key, logical.table_desc().table_id) + } + }; + let batch_plan_id = ctx.next_plan_node_id(); // TODO: derive from input let base = PlanBase::new_stream( @@ -47,15 +60,15 @@ impl StreamIndexScan { logical.schema().clone(), logical.base.logical_pk.clone(), logical.functional_dependency().clone(), - Distribution::HashShard(logical.distribution_key().unwrap()), + distribution, false, // TODO: determine the `append-only` field of table scan - // TODO: https://github.com/risingwavelabs/risingwave/issues/7205 - FixedBitSet::with_capacity(logical.schema().len()), + logical.watermark_columns(), ); Self { base, logical, batch_plan_id, + chain_type, } } @@ -75,17 +88,14 @@ impl fmt::Display for StreamIndexScan { let verbose = self.base.ctx.is_explain_verbose(); let mut builder = f.debug_struct("StreamIndexScan"); - builder.field("index", &self.logical.table_name()).field( - "columns", - &format_args!( - "[{}]", - match verbose { - false => self.logical.column_names(), - true => self.logical.column_names_with_table_prefix(), - } - .join(", ") - ), - ); + let v = match verbose { + false => self.logical.column_names(), + true => self.logical.column_names_with_table_prefix(), + } + .join(", "); + builder + .field("index", &format_args!("{}", self.logical.table_name())) + .field("columns", &format_args!("[{}]", v)); if verbose { builder.field( @@ -137,6 +147,21 @@ impl StreamIndexScan { // The merge node should be empty ProstStreamPlan { node_body: Some(ProstStreamNode::Merge(Default::default())), + identity: "Upstream".into(), + fields: self + .logical + .table_desc() + .columns + .iter() + .map(|c| risingwave_common::catalog::Field::from(c).to_prost()) + .collect(), + stream_key: self + .logical + .table_desc() + .stream_key + .iter() + .map(|i| *i as _) + .collect(), ..Default::default() }, ProstStreamPlan { @@ -151,8 +176,7 @@ impl StreamIndexScan { ], node_body: Some(ProstStreamNode::Chain(ChainNode { table_id: self.logical.table_desc().table_id.table_id, - same_worker_node: true, - chain_type: ChainType::Chain as i32, + chain_type: self.chain_type as i32, // The fields from upstream upstream_fields: self .logical @@ -181,3 +205,21 @@ impl StreamIndexScan { } } } + +impl ExprRewritable for StreamIndexScan { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_scan() + .unwrap() + .clone(), + self.chain_type, + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/stream_local_simple_agg.rs b/src/frontend/src/optimizer/plan_node/stream_local_simple_agg.rs index 3560a6c1f0cac..25fbe75d01733 100644 --- a/src/frontend/src/optimizer/plan_node/stream_local_simple_agg.rs +++ b/src/frontend/src/optimizer/plan_node/stream_local_simple_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,7 +19,8 @@ use itertools::Itertools; use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; use super::generic::PlanAggCall; -use super::{LogicalAgg, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; +use super::{ExprRewritable, LogicalAgg, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; +use crate::expr::ExprRewriter; use crate::optimizer::plan_node::generic::GenericPlanRef; use crate::optimizer::property::RequiredDist; use crate::stream_fragmenter::BuildFragmentGraphState; @@ -40,19 +41,27 @@ impl StreamLocalSimpleAgg { pub fn new(logical: LogicalAgg) -> Self { let ctx = logical.base.ctx.clone(); let pk_indices = logical.base.logical_pk.to_vec(); + let schema = logical.schema().clone(); let input = logical.input(); let input_dist = input.distribution(); debug_assert!(input_dist.satisfies(&RequiredDist::AnyShard)); + let mut watermark_columns = FixedBitSet::with_capacity(schema.len()); + // Watermark column(s) must be in group key. + for (idx, input_idx) in logical.group_key().iter().enumerate() { + if input.watermark_columns().contains(*input_idx) { + watermark_columns.insert(idx); + } + } + let base = PlanBase::new_stream( ctx, - logical.schema().clone(), + schema, pk_indices, logical.functional_dependency().clone(), input_dist.clone(), input.append_only(), - // TODO: https://github.com/risingwavelabs/risingwave/issues/7205 - FixedBitSet::with_capacity(logical.schema().len()), + watermark_columns, ); StreamLocalSimpleAgg { base, logical } } @@ -98,6 +107,24 @@ impl StreamNode for StreamLocalSimpleAgg { agg_call_states: vec![], result_table: None, is_append_only: self.input().append_only(), + distinct_dedup_tables: Default::default(), }) } } + +impl ExprRewritable for StreamLocalSimpleAgg { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_agg() + .unwrap() + .clone(), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/stream_materialize.rs b/src/frontend/src/optimizer/plan_node/stream_materialize.rs index cbda7a74e7602..92d6c5c45405a 100644 --- a/src/frontend/src/optimizer/plan_node/stream_materialize.rs +++ b/src/frontend/src/optimizer/plan_node/stream_materialize.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,17 +15,21 @@ use std::assert_matches::assert_matches; use std::collections::HashSet; use std::fmt; +use std::io::{Error, ErrorKind}; use fixedbitset::FixedBitSet; use itertools::Itertools; -use risingwave_common::catalog::{ColumnDesc, TableId}; +use risingwave_common::catalog::{ColumnCatalog, ColumnDesc, TableId, USER_COLUMN_ID_OFFSET}; use risingwave_common::error::{ErrorCode, Result}; +use risingwave_connector::sink::catalog::SinkType; +use risingwave_connector::sink::{ + SINK_FORMAT_APPEND_ONLY, SINK_FORMAT_OPTION, SINK_USER_FORCE_APPEND_ONLY_OPTION, +}; use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; -use super::{PlanRef, PlanTreeNodeUnary, StreamNode, StreamSink}; -use crate::catalog::column_catalog::ColumnCatalog; +use super::{ExprRewritable, PlanRef, PlanTreeNodeUnary, StreamNode, StreamSink}; use crate::catalog::table_catalog::{TableCatalog, TableType, TableVersion}; -use crate::catalog::{FragmentId, USER_COLUMN_ID_OFFSET}; +use crate::catalog::FragmentId; use crate::optimizer::plan_node::{PlanBase, PlanNode}; use crate::optimizer::property::{Direction, Distribution, FieldOrder, Order, RequiredDist}; use crate::stream_fragmenter::BuildFragmentGraphState; @@ -201,6 +205,7 @@ impl StreamMaterialize { ) -> Result { let input = rewritten_input; + let watermark_columns = input.watermark_columns().clone(); // Note(congyi): avoid pk duplication let pk_indices = input.logical_pk().iter().copied().unique().collect_vec(); let schema = input.schema(); @@ -265,6 +270,7 @@ impl StreamMaterialize { handle_pk_conflict, read_prefix_len_hint, version, + watermark_columns, }) } @@ -279,14 +285,41 @@ impl StreamMaterialize { } /// Rewrite this plan node into [`StreamSink`] with the given `properties`. - pub fn rewrite_into_sink(self, properties: WithOptions) -> StreamSink { - let Self { - base, - input, - mut table, - } = self; - table.properties = properties; - StreamSink::with_base(input, table, base) + pub fn rewrite_into_sink(self, properties: WithOptions) -> Result { + let frontend_derived_append_only = self.table.append_only; + let user_defined_append_only = + properties.value_eq_ignore_case(SINK_FORMAT_OPTION, SINK_FORMAT_APPEND_ONLY); + let user_force_append_only = + properties.value_eq_ignore_case(SINK_USER_FORCE_APPEND_ONLY_OPTION, "true"); + + let sink_type = match ( + frontend_derived_append_only, + user_defined_append_only, + user_force_append_only, + ) { + (true, true, _) => SinkType::AppendOnly, + (false, true, true) => SinkType::ForceAppendOnly, + (_, false, false) => SinkType::Upsert, + (false, true, false) => { + return Err(ErrorCode::SinkError(Box::new(Error::new( + ErrorKind::InvalidInput, + "The sink cannot be append-only. Please add \"force_append_only='true'\" in WITH options to force the sink to be append-only. Notice that this will cause the sink executor to drop any UPDATE or DELETE message.", + ))) + .into()); + } + (_, false, true) => { + return Err(ErrorCode::SinkError(Box::new(Error::new( + ErrorKind::InvalidInput, + "Cannot force the sink to be append-only without \"format='append_only'\"in WITH options", + ))) + .into()); + } + }; + + Ok(StreamSink::new( + self.input, + self.table.to_sink_desc(properties, sink_type), + )) } } @@ -358,3 +391,5 @@ impl StreamNode for StreamMaterialize { }) } } + +impl ExprRewritable for StreamMaterialize {} diff --git a/src/frontend/src/optimizer/plan_node/stream_now.rs b/src/frontend/src/optimizer/plan_node/stream_now.rs index 6e44e2279863f..9d7ec3999d876 100644 --- a/src/frontend/src/optimizer/plan_node/stream_now.rs +++ b/src/frontend/src/optimizer/plan_node/stream_now.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ use risingwave_pb::stream_plan::NowNode; use super::generic::GenericPlanRef; use super::stream::StreamPlanRef; use super::utils::{IndicesDisplay, TableCatalogBuilder}; -use super::{PlanBase, StreamNode}; +use super::{ExprRewritable, LogicalNow, PlanBase, StreamNode}; use crate::optimizer::property::{Distribution, FunctionalDependencySet}; use crate::stream_fragmenter::BuildFragmentGraphState; use crate::OptimizerContextRef; @@ -35,15 +35,15 @@ pub struct StreamNow { } impl StreamNow { - pub fn new(ctx: OptimizerContextRef) -> Self { + pub fn new(_logical: LogicalNow, ctx: OptimizerContextRef) -> Self { let schema = Schema::new(vec![Field { data_type: DataType::Timestamptz, name: String::from("now"), sub_fields: vec![], type_name: String::default(), }]); - let mut watermark_cols = FixedBitSet::with_capacity(1); - watermark_cols.set(0, true); + let mut watermark_columns = FixedBitSet::with_capacity(1); + watermark_columns.set(0, true); let base = PlanBase::new_stream( ctx, schema, @@ -51,7 +51,7 @@ impl StreamNow { FunctionalDependencySet::default(), Distribution::Single, false, - watermark_cols, + watermark_columns, ); Self { base } } @@ -97,3 +97,5 @@ impl StreamNode for StreamNow { }) } } + +impl ExprRewritable for StreamNow {} diff --git a/src/frontend/src/optimizer/plan_node/stream_project.rs b/src/frontend/src/optimizer/plan_node/stream_project.rs index 715b6f45dc265..8658db4889fde 100644 --- a/src/frontend/src/optimizer/plan_node/stream_project.rs +++ b/src/frontend/src/optimizer/plan_node/stream_project.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,12 +16,14 @@ use std::fmt; use fixedbitset::FixedBitSet; use itertools::Itertools; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; use risingwave_pb::stream_plan::ProjectNode; use super::generic::GenericPlanRef; -use super::{LogicalProject, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; -use crate::expr::{try_derive_watermark, Expr, ExprDisplay, ExprImpl}; +use super::{ExprRewritable, LogicalProject, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; +use crate::expr::{try_derive_watermark, Expr, ExprDisplay, ExprImpl, ExprRewriter}; +use crate::optimizer::plan_node::generic::AliasedExpr; use crate::stream_fragmenter::BuildFragmentGraphState; /// `StreamProject` implements [`super::LogicalProject`] to evaluate specified expressions on input @@ -45,7 +47,16 @@ impl fmt::Display for StreamProject { &self .exprs() .iter() - .map(|expr| ExprDisplay { expr, input_schema }) + .zip_eq_fast(self.base.schema().fields().iter()) + .map(|(expr, field)| AliasedExpr { + expr: ExprDisplay { expr, input_schema }, + alias: { + match expr { + ExprImpl::InputRef(_) | ExprImpl::Literal(_) => None, + _ => Some(field.name.clone()), + } + }, + }) .collect_vec(), ); if !self.watermark_derivations.is_empty() { @@ -70,17 +81,18 @@ impl StreamProject { let ctx = logical.base.ctx.clone(); let input = logical.input(); let pk_indices = logical.base.logical_pk.to_vec(); + let schema = logical.schema().clone(); let distribution = logical .i2o_col_mapping() .rewrite_provided_distribution(input.distribution()); let mut watermark_derivations = vec![]; - let mut watermark_cols = FixedBitSet::with_capacity(logical.schema().len()); + let mut watermark_columns = FixedBitSet::with_capacity(schema.len()); for (expr_idx, expr) in logical.exprs().iter().enumerate() { if let Some(input_idx) = try_derive_watermark(expr) { if input.watermark_columns().contains(input_idx) { watermark_derivations.push((input_idx, expr_idx)); - watermark_cols.insert(expr_idx); + watermark_columns.insert(expr_idx); } } } @@ -88,13 +100,12 @@ impl StreamProject { // input's `append_only`. let base = PlanBase::new_stream( ctx, - logical.schema().clone(), + schema, pk_indices, logical.functional_dependency().clone(), distribution, logical.input().append_only(), - // TODO: https://github.com/risingwavelabs/risingwave/issues/7205 - watermark_cols, + watermark_columns, ); StreamProject { base, @@ -150,3 +161,20 @@ impl StreamNode for StreamProject { }) } } + +impl ExprRewritable for StreamProject { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_project() + .unwrap() + .clone(), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/stream_project_set.rs b/src/frontend/src/optimizer/plan_node/stream_project_set.rs index 84baafa0ec4c0..e23eb10128664 100644 --- a/src/frontend/src/optimizer/plan_node/stream_project_set.rs +++ b/src/frontend/src/optimizer/plan_node/stream_project_set.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,7 +19,8 @@ use itertools::Itertools; use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; use risingwave_pb::stream_plan::ProjectSetNode; -use super::{LogicalProjectSet, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; +use super::{ExprRewritable, LogicalProjectSet, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; +use crate::expr::{try_derive_watermark, ExprRewriter}; use crate::stream_fragmenter::BuildFragmentGraphState; #[derive(Debug, Clone)] @@ -33,20 +34,31 @@ impl StreamProjectSet { let ctx = logical.base.ctx.clone(); let input = logical.input(); let pk_indices = logical.base.logical_pk.to_vec(); + let schema = logical.schema().clone(); let distribution = logical .i2o_col_mapping() .rewrite_provided_distribution(input.distribution()); + + let mut watermark_columns = FixedBitSet::with_capacity(schema.len()); + for (expr_idx, expr) in logical.select_list().iter().enumerate() { + if let Some(input_idx) = try_derive_watermark(expr) { + if input.watermark_columns().contains(input_idx) { + // The first column of ProjectSet is `projected_row_id`. + watermark_columns.insert(expr_idx + 1); + } + } + } + // ProjectSet executor won't change the append-only behavior of the stream, so it depends on // input's `append_only`. let base = PlanBase::new_stream( ctx, - logical.schema().clone(), + schema, pk_indices, logical.functional_dependency().clone(), distribution, logical.input().append_only(), - // TODO: https://github.com/risingwavelabs/risingwave/issues/7205 - FixedBitSet::with_capacity(logical.schema().len()), + watermark_columns, ); StreamProjectSet { base, logical } } @@ -82,3 +94,20 @@ impl StreamNode for StreamProjectSet { }) } } + +impl ExprRewritable for StreamProjectSet { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_project_set() + .unwrap() + .clone(), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/stream_row_id_gen.rs b/src/frontend/src/optimizer/plan_node/stream_row_id_gen.rs index e03e324e1a7b7..689007ae92e1d 100644 --- a/src/frontend/src/optimizer/plan_node/stream_row_id_gen.rs +++ b/src/frontend/src/optimizer/plan_node/stream_row_id_gen.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ use std::fmt; use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; -use super::{PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; +use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; use crate::stream_fragmenter::BuildFragmentGraphState; #[derive(Clone, Debug)] @@ -35,7 +35,6 @@ impl StreamRowIdGen { input.functional_dependency().clone(), input.distribution().clone(), input.append_only(), - // TODO: https://github.com/risingwavelabs/risingwave/issues/7205 input.watermark_columns().clone(), ); Self { @@ -77,3 +76,5 @@ impl StreamNode for StreamRowIdGen { }) } } + +impl ExprRewritable for StreamRowIdGen {} diff --git a/src/frontend/src/optimizer/plan_node/stream_share.rs b/src/frontend/src/optimizer/plan_node/stream_share.rs index 53b2eb831efdd..66361b3b89174 100644 --- a/src/frontend/src/optimizer/plan_node/stream_share.rs +++ b/src/frontend/src/optimizer/plan_node/stream_share.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ use risingwave_pb::stream_plan::{ DispatchStrategy, DispatcherType, ExchangeNode, StreamNode as ProstStreamPlan, }; -use super::{PlanRef, PlanTreeNodeUnary, StreamNode}; +use super::{ExprRewritable, PlanRef, PlanTreeNodeUnary, StreamNode}; use crate::optimizer::plan_node::{LogicalShare, PlanBase, PlanTreeNode}; use crate::optimizer::property::Distribution; use crate::stream_fragmenter::BuildFragmentGraphState; @@ -46,7 +46,6 @@ impl StreamShare { logical.functional_dependency().clone(), dist, logical.input().append_only(), - // TODO: https://github.com/risingwavelabs/risingwave/issues/7205 logical.input().watermark_columns().clone(), ); StreamShare { base, logical } @@ -69,6 +68,12 @@ impl PlanTreeNodeUnary for StreamShare { } } +impl StreamShare { + pub fn replace_input(&self, plan: PlanRef) { + self.logical.replace_input(plan); + } +} + impl_plan_tree_node_for_unary! { StreamShare } impl StreamNode for StreamShare { @@ -138,3 +143,5 @@ impl StreamShare { } } } + +impl ExprRewritable for StreamShare {} diff --git a/src/frontend/src/optimizer/plan_node/stream_sink.rs b/src/frontend/src/optimizer/plan_node/stream_sink.rs index 4321a7f9ff274..9818f515a5ec6 100644 --- a/src/frontend/src/optimizer/plan_node/stream_sink.rs +++ b/src/frontend/src/optimizer/plan_node/stream_sink.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,40 +14,34 @@ use std::fmt; -use risingwave_common::catalog::Field; +use risingwave_connector::sink::catalog::desc::SinkDesc; use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; -use super::{PlanBase, PlanRef, StreamNode}; +use super::{ExprRewritable, PlanBase, PlanRef, StreamNode}; use crate::optimizer::plan_node::PlanTreeNodeUnary; use crate::stream_fragmenter::BuildFragmentGraphState; -use crate::TableCatalog; /// [`StreamSink`] represents a table/connector sink at the very end of the graph. #[derive(Debug, Clone)] pub struct StreamSink { pub base: PlanBase, input: PlanRef, - // TODO(yuhao): Maybe use a real `SinkCatalog` here. @st1page - sink_catalog: TableCatalog, + sink_desc: SinkDesc, } impl StreamSink { #[must_use] - pub fn new(input: PlanRef, sink_catalog: TableCatalog) -> Self { + pub fn new(input: PlanRef, sink_desc: SinkDesc) -> Self { let base = PlanBase::derive_stream_plan_base(&input); - Self::with_base(input, sink_catalog, base) - } - - pub fn with_base(input: PlanRef, sink_catalog: TableCatalog, base: PlanBase) -> Self { Self { base, input, - sink_catalog, + sink_desc, } } - pub fn sink_catalog(&self) -> &TableCatalog { - &self.sink_catalog + pub fn sink_desc(&self) -> &SinkDesc { + &self.sink_desc } } @@ -57,7 +51,7 @@ impl PlanTreeNodeUnary for StreamSink { } fn clone_with_input(&self, input: PlanRef) -> Self { - Self::new(input, self.sink_catalog.clone()) + Self::new(input, self.sink_desc.clone()) // TODO(nanderstabel): Add assertions (assert_eq!) } } @@ -76,20 +70,9 @@ impl StreamNode for StreamSink { use risingwave_pb::stream_plan::*; ProstStreamNode::Sink(SinkNode { - table_id: self.sink_catalog.id().into(), - fields: self - .sink_catalog - .columns() - .iter() - .map(|c| Field::from(c.column_desc.clone()).to_prost()) - .collect(), - sink_pk: self - .sink_catalog - .pk() - .iter() - .map(|c| c.index as u32) - .collect(), - properties: self.sink_catalog.properties.inner().clone(), + sink_desc: Some(self.sink_desc.to_proto()), }) } } + +impl ExprRewritable for StreamSink {} diff --git a/src/frontend/src/optimizer/plan_node/stream_source.rs b/src/frontend/src/optimizer/plan_node/stream_source.rs index a44ab36b20d89..5151186525689 100644 --- a/src/frontend/src/optimizer/plan_node/stream_source.rs +++ b/src/frontend/src/optimizer/plan_node/stream_source.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ use risingwave_pb::catalog::ColumnIndex; use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; use risingwave_pb::stream_plan::{SourceNode, StreamSource as ProstStreamSource}; -use super::{LogicalSource, PlanBase, StreamNode}; +use super::{ExprRewritable, LogicalSource, PlanBase, StreamNode}; use crate::optimizer::property::Distribution; use crate::stream_fragmenter::BuildFragmentGraphState; @@ -33,6 +33,14 @@ pub struct StreamSource { impl StreamSource { pub fn new(logical: LogicalSource) -> Self { + let mut watermark_columns = FixedBitSet::with_capacity(logical.schema().len()); + if let Some(catalog) = logical.source_catalog() { + catalog + .watermark_descs + .iter() + .for_each(|desc| watermark_columns.insert(desc.watermark_idx as usize)) + } + let base = PlanBase::new_stream( logical.ctx(), logical.schema().clone(), @@ -44,7 +52,7 @@ impl StreamSource { .catalog .as_ref() .map_or(true, |s| s.append_only), - FixedBitSet::with_capacity(logical.schema().len()), + watermark_columns, ); Self { base, logical } } @@ -103,3 +111,5 @@ impl StreamNode for StreamSource { ProstStreamNode::Source(SourceNode { source_inner }) } } + +impl ExprRewritable for StreamSource {} diff --git a/src/frontend/src/optimizer/plan_node/stream_table_scan.rs b/src/frontend/src/optimizer/plan_node/stream_table_scan.rs index 89f40488b81a2..d53d51fa110b3 100644 --- a/src/frontend/src/optimizer/plan_node/stream_table_scan.rs +++ b/src/frontend/src/optimizer/plan_node/stream_table_scan.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,14 +16,16 @@ use std::collections::HashMap; use std::fmt; use std::rc::Rc; -use fixedbitset::FixedBitSet; use itertools::Itertools; use risingwave_common::catalog::{Field, TableDesc}; use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; -use risingwave_pb::stream_plan::StreamNode as ProstStreamPlan; +use risingwave_pb::stream_plan::{ChainType, StreamNode as ProstStreamPlan}; -use super::{LogicalScan, PlanBase, PlanNodeId, StreamIndexScan, StreamNode}; +use super::{ + ExprRewritable, LogicalScan, PlanBase, PlanNodeId, PlanRef, StreamIndexScan, StreamNode, +}; use crate::catalog::ColumnId; +use crate::expr::ExprRewriter; use crate::optimizer::plan_node::utils::IndicesDisplay; use crate::optimizer::property::{Distribution, DistributionDisplay}; use crate::stream_fragmenter::BuildFragmentGraphState; @@ -36,23 +38,33 @@ pub struct StreamTableScan { pub base: PlanBase, logical: LogicalScan, batch_plan_id: PlanNodeId, + chain_type: ChainType, } impl StreamTableScan { pub fn new(logical: LogicalScan) -> Self { + Self::new_with_chain_type(logical, ChainType::Backfill) + } + + pub fn new_with_chain_type(logical: LogicalScan, chain_type: ChainType) -> Self { let ctx = logical.base.ctx.clone(); let batch_plan_id = ctx.next_plan_node_id(); let distribution = { - let distribution_key = logical - .distribution_key() - .expect("distribution key of stream chain must exist in output columns"); - if distribution_key.is_empty() { - Distribution::Single - } else { - // See also `BatchSeqScan::clone_with_dist`. - Distribution::UpstreamHashShard(distribution_key, logical.table_desc().table_id) + match logical.distribution_key() { + Some(distribution_key) => { + if distribution_key.is_empty() { + Distribution::Single + } else { + // See also `BatchSeqScan::clone_with_dist`. + Distribution::UpstreamHashShard( + distribution_key, + logical.table_desc().table_id, + ) + } + } + None => Distribution::SomeShard, } }; let base = PlanBase::new_stream( @@ -62,13 +74,13 @@ impl StreamTableScan { logical.functional_dependency().clone(), distribution, logical.table_desc().append_only, - // TODO: https://github.com/risingwavelabs/risingwave/issues/7205 - FixedBitSet::with_capacity(logical.schema().len()), + logical.watermark_columns(), ); Self { base, logical, batch_plan_id, + chain_type, } } @@ -85,12 +97,17 @@ impl StreamTableScan { index_name: &str, index_table_desc: Rc, primary_to_secondary_mapping: &HashMap, + chain_type: ChainType, ) -> StreamIndexScan { - StreamIndexScan::new(self.logical.to_index_scan( - index_name, - index_table_desc, - primary_to_secondary_mapping, - )) + StreamIndexScan::new( + self.logical + .to_index_scan(index_name, index_table_desc, primary_to_secondary_mapping), + chain_type, + ) + } + + pub fn chain_type(&self) -> ChainType { + self.chain_type } } @@ -189,8 +206,7 @@ impl StreamTableScan { ], node_body: Some(ProstStreamNode::Chain(ChainNode { table_id: self.logical.table_desc().table_id.table_id, - same_worker_node: false, - chain_type: ChainType::Backfill as i32, + chain_type: self.chain_type as i32, // The fields from upstream upstream_fields: self .logical @@ -223,3 +239,20 @@ impl StreamTableScan { } } } + +impl ExprRewritable for StreamTableScan { + fn has_rewritable_expr(&self) -> bool { + true + } + + fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { + Self::new( + self.logical + .rewrite_exprs(r) + .as_logical_scan() + .unwrap() + .clone(), + ) + .into() + } +} diff --git a/src/frontend/src/optimizer/plan_node/stream_topn.rs b/src/frontend/src/optimizer/plan_node/stream_topn.rs index afb296b5a7f71..a69be61f66a20 100644 --- a/src/frontend/src/optimizer/plan_node/stream_topn.rs +++ b/src/frontend/src/optimizer/plan_node/stream_topn.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,9 +14,10 @@ use std::fmt; +use fixedbitset::FixedBitSet; use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; -use super::{LogicalTopN, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; +use super::{ExprRewritable, LogicalTopN, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; use crate::optimizer::property::{Distribution, Order}; use crate::stream_fragmenter::BuildFragmentGraphState; @@ -32,19 +33,22 @@ impl StreamTopN { assert!(logical.group_key().is_empty()); assert!(logical.limit() > 0); let ctx = logical.base.ctx.clone(); + let input = logical.input(); + let schema = input.schema().clone(); let dist = match logical.input().distribution() { Distribution::Single => Distribution::Single, _ => panic!(), }; + let watermark_columns = FixedBitSet::with_capacity(schema.len()); let base = PlanBase::new_stream( ctx, - logical.schema().clone(), - logical.input().logical_pk().to_vec(), + schema, + input.logical_pk().to_vec(), logical.functional_dependency().clone(), dist, false, - logical.input().watermark_columns().clone(), + watermark_columns, ); StreamTopN { base, logical } } @@ -110,3 +114,4 @@ impl StreamNode for StreamTopN { } } } +impl ExprRewritable for StreamTopN {} diff --git a/src/frontend/src/optimizer/plan_node/stream_union.rs b/src/frontend/src/optimizer/plan_node/stream_union.rs index ccc52ca69d632..4eccf732f1904 100644 --- a/src/frontend/src/optimizer/plan_node/stream_union.rs +++ b/src/frontend/src/optimizer/plan_node/stream_union.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,12 +13,13 @@ // limitations under the License. use std::fmt; +use std::ops::BitAnd; use fixedbitset::FixedBitSet; use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; use risingwave_pb::stream_plan::UnionNode; -use super::PlanRef; +use super::{ExprRewritable, PlanRef}; use crate::optimizer::plan_node::stream::StreamPlanRef; use crate::optimizer::plan_node::{LogicalUnion, PlanBase, PlanTreeNode, StreamNode}; use crate::stream_fragmenter::BuildFragmentGraphState; @@ -34,22 +35,26 @@ impl StreamUnion { pub fn new(logical: LogicalUnion) -> Self { let ctx = logical.base.ctx.clone(); let pk_indices = logical.base.logical_pk.to_vec(); + let schema = logical.schema().clone(); let inputs = logical.inputs(); let dist = inputs[0].distribution().clone(); assert!(logical .inputs() .iter() .all(|input| *input.distribution() == dist)); + let watermark_columns = inputs.iter().fold( + FixedBitSet::with_capacity(schema.len()), + |acc_watermark_columns, input| acc_watermark_columns.bitand(input.watermark_columns()), + ); let base = PlanBase::new_stream( ctx, - logical.schema().clone(), + schema, pk_indices, logical.functional_dependency().clone(), dist, logical.inputs().iter().all(|x| x.append_only()), - // TODO: https://github.com/risingwavelabs/risingwave/issues/7205 - FixedBitSet::with_capacity(logical.schema().len()), + watermark_columns, ); StreamUnion { base, logical } } @@ -83,3 +88,5 @@ impl StreamNode for StreamUnion { ProstStreamNode::Union(UnionNode {}) } } + +impl ExprRewritable for StreamUnion {} diff --git a/src/frontend/src/optimizer/plan_node/stream_watermark_filter.rs b/src/frontend/src/optimizer/plan_node/stream_watermark_filter.rs new file mode 100644 index 0000000000000..e67ebb293eda5 --- /dev/null +++ b/src/frontend/src/optimizer/plan_node/stream_watermark_filter.rs @@ -0,0 +1,159 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::fmt; + +use itertools::Itertools; +use risingwave_common::catalog::{Field, Schema}; +use risingwave_common::error::RwError; +use risingwave_common::types::DataType; +use risingwave_common::util::sort_util::OrderType; +use risingwave_pb::catalog::WatermarkDesc; +use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode; + +use super::utils::TableCatalogBuilder; +use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode}; +use crate::expr::{ExprDisplay, ExprImpl}; +use crate::stream_fragmenter::BuildFragmentGraphState; +use crate::{TableCatalog, WithOptions}; + +#[derive(Clone, Debug)] +pub struct StreamWatermarkFilter { + pub base: PlanBase, + input: PlanRef, + watermark_descs: Vec, +} + +impl StreamWatermarkFilter { + pub fn new(input: PlanRef, watermark_descs: Vec) -> Self { + let base = PlanBase::new_stream( + input.ctx(), + input.schema().clone(), + input.logical_pk().to_vec(), + input.functional_dependency().clone(), + input.distribution().clone(), + input.append_only(), + // TODO: https://github.com/risingwavelabs/risingwave/issues/7205 + input.watermark_columns().clone(), + ); + Self::with_base(base, input, watermark_descs) + } + + fn with_base(base: PlanBase, input: PlanRef, watermark_descs: Vec) -> Self { + Self { + base, + input, + watermark_descs, + } + } +} + +impl fmt::Display for StreamWatermarkFilter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + struct DisplayWatermarkDesc<'a> { + watermark_idx: u32, + expr: ExprImpl, + input_schema: &'a Schema, + } + + impl fmt::Debug for DisplayWatermarkDesc<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let expr_display = ExprDisplay { + expr: &self.expr, + input_schema: self.input_schema, + }; + write!(f, "idx: {}, expr: {}", self.watermark_idx, expr_display) + } + } + + let mut builder = f.debug_struct("StreamWatermarkFilter"); + let input_schema = self.input.schema(); + + let display_watermark_descs: Vec<_> = self + .watermark_descs + .iter() + .map(|desc| { + Ok::<_, RwError>(DisplayWatermarkDesc { + watermark_idx: desc.watermark_idx, + expr: ExprImpl::from_expr_proto(desc.get_expr()?)?, + input_schema, + }) + }) + .try_collect() + .map_err(|_| fmt::Error)?; + builder.field("watermark_descs", &display_watermark_descs); + builder.finish() + } +} + +impl PlanTreeNodeUnary for StreamWatermarkFilter { + fn input(&self) -> PlanRef { + self.input.clone() + } + + fn clone_with_input(&self, input: PlanRef) -> Self { + Self::new(input, self.watermark_descs.clone()) + } +} + +impl_plan_tree_node_for_unary! {StreamWatermarkFilter} + +pub fn infer_internal_table_catalog(watermark_type: DataType) -> TableCatalog { + let mut builder = TableCatalogBuilder::new(WithOptions::new(HashMap::default())); + + let key = Field { + data_type: DataType::Int16, + name: "vnode".to_string(), + sub_fields: vec![], + type_name: "".to_string(), + }; + let value = Field { + data_type: watermark_type, + name: "offset".to_string(), + sub_fields: vec![], + type_name: "".to_string(), + }; + + let ordered_col_idx = builder.add_column(&key); + builder.add_column(&value); + builder.add_order_column(ordered_col_idx, OrderType::Ascending); + + builder.set_vnode_col_idx(0); + builder.set_value_indices(vec![1]); + + builder.build(vec![0]) +} + +impl StreamNode for StreamWatermarkFilter { + fn to_stream_prost_body(&self, state: &mut BuildFragmentGraphState) -> ProstStreamNode { + use risingwave_pb::stream_plan::*; + + // TODO(yuhao): allow multiple watermark on source. + let [watermark_desc]: [_; 1] = self.watermark_descs.clone().try_into().unwrap(); + let watermark_type = (&watermark_desc.expr.unwrap().return_type.unwrap()).into(); + + let table = infer_internal_table_catalog(watermark_type); + + ProstStreamNode::WatermarkFilter(WatermarkFilterNode { + watermark_descs: self.watermark_descs.clone(), + tables: vec![table + .with_id(state.gen_table_id_wrapped()) + .to_internal_table_prost()], + }) + } +} + +// TODO(yuhao): may impl a `ExprRewritable` after store `ExplImpl` in catalog. +impl ExprRewritable for StreamWatermarkFilter {} diff --git a/src/frontend/src/optimizer/plan_node/to_prost.rs b/src/frontend/src/optimizer/plan_node/to_prost.rs index 62d7af8dac78b..eb08c91a5e1f4 100644 --- a/src/frontend/src/optimizer/plan_node/to_prost.rs +++ b/src/frontend/src/optimizer/plan_node/to_prost.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_node/utils.rs b/src/frontend/src/optimizer/plan_node/utils.rs index 564fc87d56218..f40989bda3f5f 100644 --- a/src/frontend/src/optimizer/plan_node/utils.rs +++ b/src/frontend/src/optimizer/plan_node/utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,11 +15,11 @@ use std::collections::HashMap; use std::{fmt, vec}; +use fixedbitset::FixedBitSet; use itertools::Itertools; -use risingwave_common::catalog::{ColumnDesc, Field, Schema}; +use risingwave_common::catalog::{ColumnCatalog, ColumnDesc, Field, Schema}; use risingwave_common::util::sort_util::OrderType; -use crate::catalog::column_catalog::ColumnCatalog; use crate::catalog::table_catalog::TableType; use crate::catalog::{FragmentId, TableCatalog, TableId}; use crate::optimizer::property::{Direction, FieldOrder}; @@ -35,6 +35,7 @@ pub struct TableCatalogBuilder { vnode_col_idx: Option, column_names: HashMap, read_prefix_len_hint: usize, + watermark_columns: Option, } /// For DRY, mainly used for construct internal table catalog in stateful streaming executors. @@ -92,6 +93,11 @@ impl TableCatalogBuilder { self.value_indices = Some(value_indices); } + #[allow(dead_code)] + pub fn set_watermark_columns(&mut self, watermark_columns: FixedBitSet) { + self.watermark_columns = Some(watermark_columns); + } + /// Check the column name whether exist before. if true, record occurrence and change the name /// to avoid duplicate. fn avoid_duplicate_col_name(&mut self, column_desc: &mut ColumnDesc) { @@ -113,6 +119,10 @@ impl TableCatalogBuilder { /// Consume builder and create `TableCatalog` (for proto). pub fn build(self, distribution_key: Vec) -> TableCatalog { assert!(self.read_prefix_len_hint <= self.pk.len()); + let watermark_columns = match self.watermark_columns { + Some(w) => w, + None => FixedBitSet::with_capacity(self.columns.len()), + }; TableCatalog { id: TableId::placeholder(), associated_source_id: None, @@ -138,6 +148,7 @@ impl TableCatalogBuilder { handle_pk_conflict: false, read_prefix_len_hint: self.read_prefix_len_hint, version: None, // the internal table is not versioned and can't be schema changed + watermark_columns, } } diff --git a/src/frontend/src/optimizer/plan_rewriter/mod.rs b/src/frontend/src/optimizer/plan_rewriter/mod.rs index a03c000b2eec9..fb61a36f34716 100644 --- a/src/frontend/src/optimizer/plan_rewriter/mod.rs +++ b/src/frontend/src/optimizer/plan_rewriter/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_rewriter/share_source_rewriter.rs b/src/frontend/src/optimizer/plan_rewriter/share_source_rewriter.rs index a9f84db3c76fc..d466c9a2683c9 100644 --- a/src/frontend/src/optimizer/plan_rewriter/share_source_rewriter.rs +++ b/src/frontend/src/optimizer/plan_rewriter/share_source_rewriter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_visitor/input_ref_validator.rs b/src/frontend/src/optimizer/plan_visitor/input_ref_validator.rs index e4c3fe809c14e..240360e5eae81 100644 --- a/src/frontend/src/optimizer/plan_visitor/input_ref_validator.rs +++ b/src/frontend/src/optimizer/plan_visitor/input_ref_validator.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_visitor/max_one_row_visitor.rs b/src/frontend/src/optimizer/plan_visitor/max_one_row_visitor.rs index df7d4a31db4af..4851cb302fde1 100644 --- a/src/frontend/src/optimizer/plan_visitor/max_one_row_visitor.rs +++ b/src/frontend/src/optimizer/plan_visitor/max_one_row_visitor.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,10 +16,11 @@ use std::collections::HashSet; use crate::optimizer::plan_node::{ LogicalAgg, LogicalApply, LogicalExpand, LogicalFilter, LogicalHopWindow, LogicalLimit, - LogicalProjectSet, LogicalTopN, LogicalUnion, LogicalValues, PlanTreeNodeBinary, - PlanTreeNodeUnary, + LogicalNow, LogicalProject, LogicalProjectSet, LogicalTopN, LogicalUnion, LogicalValues, + PlanTreeNodeBinary, PlanTreeNodeUnary, }; use crate::optimizer::plan_visitor::PlanVisitor; +use crate::optimizer::PlanTreeNode; pub struct MaxOneRowVisitor; @@ -41,6 +42,14 @@ impl PlanVisitor for MaxOneRowVisitor { plan.limit() <= 1 || self.visit(plan.input()) } + fn visit_logical_now(&mut self, _plan: &LogicalNow) -> bool { + true + } + + fn visit_logical_project(&mut self, plan: &LogicalProject) -> bool { + self.visit(plan.input()) + } + fn visit_logical_top_n(&mut self, plan: &LogicalTopN) -> bool { (plan.limit() <= 1 && !plan.with_ties()) || self.visit(plan.input()) } @@ -88,3 +97,50 @@ impl PlanVisitor for HasMaxOneRowApply { plan.max_one_row() | self.visit(plan.left()) | self.visit(plan.right()) } } + +pub struct CountRows; + +impl PlanVisitor> for CountRows { + fn merge(_a: Option, _b: Option) -> Option { + // Impossible to determine count e.g. after a join + None + } + + fn visit_logical_agg(&mut self, plan: &LogicalAgg) -> Option { + if plan.group_key().is_empty() { + Some(1) + } else { + None + } + } + + fn visit_logical_values(&mut self, plan: &LogicalValues) -> Option { + Some(plan.rows().len()) + } + + fn visit_logical_project(&mut self, plan: &LogicalProject) -> Option { + self.visit(plan.input()) + } + + fn visit_logical_union(&mut self, plan: &LogicalUnion) -> Option { + if !plan.all() { + // We cannot deal with deduplication + return None; + } + plan.inputs() + .iter() + .fold(Some(0), |init, i| match (init, self.visit(i.clone())) { + (None, _) => None, + (_, None) => None, + (Some(a), Some(b)) => Some(a + b), + }) + } + + fn visit_logical_filter(&mut self, _plan: &LogicalFilter) -> Option { + None + } + + fn visit_logical_now(&mut self, _plan: &LogicalNow) -> Option { + Some(1) + } +} diff --git a/src/frontend/src/optimizer/plan_visitor/mod.rs b/src/frontend/src/optimizer/plan_visitor/mod.rs index d78bf41edcd2a..99ab0dfc2b1b4 100644 --- a/src/frontend/src/optimizer/plan_visitor/mod.rs +++ b/src/frontend/src/optimizer/plan_visitor/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_visitor/plan_correlated_id_finder.rs b/src/frontend/src/optimizer/plan_visitor/plan_correlated_id_finder.rs index c51c727d73ab5..3929da18900dc 100644 --- a/src/frontend/src/optimizer/plan_visitor/plan_correlated_id_finder.rs +++ b/src/frontend/src/optimizer/plan_visitor/plan_correlated_id_finder.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/plan_visitor/share_parent_counter.rs b/src/frontend/src/optimizer/plan_visitor/share_parent_counter.rs index a2143473f94da..c74142375f845 100644 --- a/src/frontend/src/optimizer/plan_visitor/share_parent_counter.rs +++ b/src/frontend/src/optimizer/plan_visitor/share_parent_counter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/property/distribution.rs b/src/frontend/src/optimizer/property/distribution.rs index 734ad70ab05d0..cdc45c4d12328 100644 --- a/src/frontend/src/optimizer/property/distribution.rs +++ b/src/frontend/src/optimizer/property/distribution.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -50,17 +50,18 @@ use fixedbitset::FixedBitSet; use itertools::Itertools; use risingwave_common::catalog::{FieldDisplay, Schema, TableId}; use risingwave_common::error::Result; -use risingwave_common::hash::{ParallelUnitId, VnodeMapping}; +use risingwave_common::hash::{ParallelUnitId, ParallelUnitMapping}; use risingwave_pb::batch_plan::exchange_info::{ ConsistentHashInfo, Distribution as DistributionProst, DistributionMode, HashInfo, }; use risingwave_pb::batch_plan::ExchangeInfo; use super::super::plan_node::*; +use crate::catalog::catalog_service::CatalogReader; use crate::optimizer::plan_node::stream::StreamPlanRef; use crate::optimizer::property::Order; use crate::optimizer::PlanRef; -use crate::scheduler::BatchPlanFragmenter; +use crate::scheduler::worker_node_manager::WorkerNodeManagerRef; /// the distribution property provided by a operator. #[derive(Debug, Clone, PartialEq)] @@ -108,7 +109,12 @@ pub enum RequiredDist { } impl Distribution { - pub fn to_prost(&self, output_count: u32, fragmenter: &BatchPlanFragmenter) -> ExchangeInfo { + pub fn to_prost( + &self, + output_count: u32, + catalog_reader: &CatalogReader, + worker_node_manager: &WorkerNodeManagerRef, + ) -> ExchangeInfo { ExchangeInfo { mode: match self { Distribution::Single => DistributionMode::Single, @@ -139,19 +145,18 @@ impl Distribution { "hash key should not be empty, use `Single` instead" ); - let vnode_mapping = Self::get_vnode_mapping(fragmenter, table_id) - .expect("vnode_mapping of UpstreamHashShard should not be none"); + let vnode_mapping = + Self::get_vnode_mapping(catalog_reader, worker_node_manager, table_id) + .expect("vnode_mapping of UpstreamHashShard should not be none"); let pu2id_map: HashMap = vnode_mapping - .iter() - .sorted() - .dedup() + .iter_unique() .enumerate() - .map(|(i, &pu)| (pu, i as u32)) + .map(|(i, pu)| (pu, i as u32)) .collect(); Some(DistributionProst::ConsistentHashInfo(ConsistentHashInfo { - vmap: vnode_mapping.iter().map(|x| pu2id_map[x]).collect_vec(), + vmap: vnode_mapping.iter().map(|x| pu2id_map[&x]).collect_vec(), key: key.iter().map(|num| *num as u32).collect(), })) } @@ -196,18 +201,14 @@ impl Distribution { #[inline(always)] fn get_vnode_mapping( - fragmenter: &BatchPlanFragmenter, + catalog_reader: &CatalogReader, + worker_node_manager: &WorkerNodeManagerRef, table_id: &TableId, - ) -> Option { - fragmenter - .catalog_reader() + ) -> Option { + catalog_reader .read_guard() .get_table_by_id(table_id) - .map(|table| { - fragmenter - .worker_node_manager() - .get_fragment_mapping(&table.fragment_id) - }) + .map(|table| worker_node_manager.get_fragment_mapping(&table.fragment_id)) .ok() .flatten() } diff --git a/src/frontend/src/optimizer/property/func_dep.rs b/src/frontend/src/optimizer/property/func_dep.rs index 54279acf289c8..f292ad611dda0 100644 --- a/src/frontend/src/optimizer/property/func_dep.rs +++ b/src/frontend/src/optimizer/property/func_dep.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -297,7 +297,7 @@ impl FunctionalDependencySet { assert!( self.is_key_inner(&key), "{:?} is not a key!", - key.ones().into_iter().collect_vec() + key.ones().collect_vec() ); let mut new_key = key.clone(); for i in key.ones() { diff --git a/src/frontend/src/optimizer/property/mod.rs b/src/frontend/src/optimizer/property/mod.rs index 3c2976c37f9dd..8d2cdc7c2f001 100644 --- a/src/frontend/src/optimizer/property/mod.rs +++ b/src/frontend/src/optimizer/property/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/property/order.rs b/src/frontend/src/optimizer/property/order.rs index 48a0098801541..a6922fb609784 100644 --- a/src/frontend/src/optimizer/property/order.rs +++ b/src/frontend/src/optimizer/property/order.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/rule/agg_dedup_group_key_rule.rs b/src/frontend/src/optimizer/rule/agg_dedup_group_key_rule.rs index d00339ecdb790..5c3d64221d91d 100644 --- a/src/frontend/src/optimizer/rule/agg_dedup_group_key_rule.rs +++ b/src/frontend/src/optimizer/rule/agg_dedup_group_key_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -68,7 +68,7 @@ impl Rule for AggDedupGroupKeyRule { new_agg.into(), group_key_mapping .into_iter() - .chain((deduped_group_key_num..deduped_group_key_num + agg_call_num).into_iter()), + .chain(deduped_group_key_num..deduped_group_key_num + agg_call_num), ); Some(proj.into()) } diff --git a/src/frontend/src/optimizer/rule/agg_project_merge_rule.rs b/src/frontend/src/optimizer/rule/agg_project_merge_rule.rs index c7665f52b14a7..012d2e460a82d 100644 --- a/src/frontend/src/optimizer/rule/agg_project_merge_rule.rs +++ b/src/frontend/src/optimizer/rule/agg_project_merge_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/rule/apply_agg_transpose_rule.rs b/src/frontend/src/optimizer/rule/apply_agg_transpose_rule.rs index 29acd58a09a4b..a695b0a7e15db 100644 --- a/src/frontend/src/optimizer/rule/apply_agg_transpose_rule.rs +++ b/src/frontend/src/optimizer/rule/apply_agg_transpose_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/rule/apply_filter_transpose_rule.rs b/src/frontend/src/optimizer/rule/apply_filter_transpose_rule.rs index 73d66c5d7564d..006d8a19346c7 100644 --- a/src/frontend/src/optimizer/rule/apply_filter_transpose_rule.rs +++ b/src/frontend/src/optimizer/rule/apply_filter_transpose_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/rule/apply_join_transpose_rule.rs b/src/frontend/src/optimizer/rule/apply_join_transpose_rule.rs index 09ddf7e300281..5af1a6a3e8910 100644 --- a/src/frontend/src/optimizer/rule/apply_join_transpose_rule.rs +++ b/src/frontend/src/optimizer/rule/apply_join_transpose_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,9 +23,7 @@ use crate::expr::{ CollectInputRef, CorrelatedId, CorrelatedInputRef, Expr, ExprImpl, ExprRewriter, ExprType, ExprVisitor, FunctionCall, InputRef, }; -use crate::optimizer::plan_node::{ - LogicalApply, LogicalFilter, LogicalJoin, LogicalProject, PlanTreeNodeBinary, -}; +use crate::optimizer::plan_node::{LogicalApply, LogicalFilter, LogicalJoin, PlanTreeNodeBinary}; use crate::optimizer::plan_visitor::{ExprCorrelatedIdFinder, PlanCorrelatedIdFinder}; use crate::optimizer::PlanRef; use crate::utils::{ColIndexMapping, Condition}; @@ -157,36 +155,38 @@ impl Rule for ApplyJoinTransposeRule { JoinType::Unspecified => unreachable!(), }; - if push_left && push_right { - Some(self.push_apply_both_side( + let out = if push_left && push_right { + self.push_apply_both_side( apply_left, join, apply_on, apply_join_type, correlated_id, correlated_indices, - )) + ) } else if push_left { - Some(self.push_apply_left_side( + self.push_apply_left_side( apply_left, join, apply_on, apply_join_type, correlated_id, correlated_indices, - )) + ) } else if push_right { - Some(self.push_apply_right_side( + self.push_apply_right_side( apply_left, join, apply_on, apply_join_type, correlated_id, correlated_indices, - )) + ) } else { unreachable!(); - } + }; + assert_eq!(out.schema(), plan.schema()); + Some(out) } } @@ -365,19 +365,41 @@ impl ApplyJoinTransposeRule { correlated_indices, false, ); + let output_indices: Vec<_> = { + let (apply_left_len, join_right_len) = match apply_join_type { + JoinType::LeftSemi | JoinType::LeftAnti => (apply_left_len, 0), + JoinType::RightSemi | JoinType::RightAnti => (0, join.right().schema().len()), + _ => (apply_left_len, join.right().schema().len()), + }; + + let left_iter = join_left_len..join_left_len + apply_left_len; + let right_iter = (0..join_left_len).chain( + join_left_len + apply_left_len..join_left_len + apply_left_len + join_right_len, + ); + + match join.join_type() { + JoinType::LeftSemi | JoinType::LeftAnti => left_iter.collect(), + JoinType::RightSemi | JoinType::RightAnti => right_iter.collect(), + _ => left_iter.chain(right_iter).collect(), + } + }; + let mut output_indices_mapping = + ColIndexMapping::new(output_indices.iter().map(|x| Some(*x)).collect()); let new_join = LogicalJoin::new( join.left().clone(), new_join_right.clone(), join.join_type(), new_join_condition, - ); + ) + .clone_with_output_indices(output_indices); // Leave other condition for predicate push down to deal with LogicalFilter::create( new_join.into(), Condition { conjunctions: other_condition, - }, + } + .rewrite_expr(&mut output_indices_mapping), ) } @@ -511,78 +533,46 @@ impl ApplyJoinTransposeRule { correlated_indices, false, ); + + let output_indices: Vec<_> = { + let (apply_left_len, join_right_len) = match apply_join_type { + JoinType::LeftSemi | JoinType::LeftAnti => (apply_left_len, 0), + JoinType::RightSemi | JoinType::RightAnti => (0, join.right().schema().len()), + _ => (apply_left_len, join.right().schema().len()), + }; + + let left_iter = 0..join_left_len + apply_left_len; + let right_iter = join_left_len + apply_left_len * 2 + ..join_left_len + apply_left_len * 2 + join_right_len; + + match join.join_type() { + JoinType::LeftSemi | JoinType::LeftAnti => left_iter.collect(), + JoinType::RightSemi | JoinType::RightAnti => right_iter.collect(), + _ => left_iter.chain(right_iter).collect(), + } + }; let new_join = LogicalJoin::new( new_join_left.clone(), new_join_right.clone(), join.join_type(), new_join_condition, - ); + ) + .clone_with_output_indices(output_indices.clone()); match join.join_type() { JoinType::LeftSemi | JoinType::LeftAnti | JoinType::RightSemi | JoinType::RightAnti => { new_join.into() } JoinType::Inner | JoinType::LeftOuter | JoinType::RightOuter | JoinType::FullOuter => { - // Use project to provide a natural join - let mut project_exprs: Vec = vec![]; - - let d_offset = if join.join_type() == JoinType::RightOuter { - new_join_left.schema().len() - } else { - 0 - }; - - project_exprs.extend( - apply_left - .schema() - .fields - .iter() - .enumerate() - .map(|(i, field)| { - ExprImpl::InputRef(Box::new(InputRef::new( - i + d_offset, - field.data_type.clone(), - ))) - }) - .collect_vec(), - ); - - project_exprs.extend( - new_join_left - .schema() - .fields - .iter() - .enumerate() - .skip(apply_left_len) - .map(|(i, field)| { - ExprImpl::InputRef(Box::new(InputRef::new(i, field.data_type.clone()))) - }) - .collect_vec(), - ); - project_exprs.extend( - new_join_right - .schema() - .fields - .iter() - .enumerate() - .skip(apply_left_len) - .map(|(i, field)| { - ExprImpl::InputRef(Box::new(InputRef::new( - i + new_join_left.schema().len(), - field.data_type.clone(), - ))) - }) - .collect_vec(), - ); - - let new_project = LogicalProject::create(new_join.into(), project_exprs); - + let mut output_indices_mapping = + ColIndexMapping::new(output_indices.iter().map(|x| Some(*x)).collect()); // Leave other condition for predicate push down to deal with LogicalFilter::create( - new_project, + new_join.into(), Condition { conjunctions: other_condition, - }, + } + .rewrite_expr(&mut output_indices_mapping), ) } JoinType::Unspecified => unreachable!(), diff --git a/src/frontend/src/optimizer/rule/apply_project_transpose_rule.rs b/src/frontend/src/optimizer/rule/apply_project_transpose_rule.rs index f95c8d8078712..a5a158f3f2e01 100644 --- a/src/frontend/src/optimizer/rule/apply_project_transpose_rule.rs +++ b/src/frontend/src/optimizer/rule/apply_project_transpose_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/rule/apply_scan_rule.rs b/src/frontend/src/optimizer/rule/apply_scan_rule.rs index 81cf6acaf254a..c69c7f80fa7d3 100644 --- a/src/frontend/src/optimizer/rule/apply_scan_rule.rs +++ b/src/frontend/src/optimizer/rule/apply_scan_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/rule/apply_share_eliminate_rule.rs b/src/frontend/src/optimizer/rule/apply_share_eliminate_rule.rs index 0ce71992306ce..ec7fc5b509b77 100644 --- a/src/frontend/src/optimizer/rule/apply_share_eliminate_rule.rs +++ b/src/frontend/src/optimizer/rule/apply_share_eliminate_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/rule/apply_to_join_rule.rs b/src/frontend/src/optimizer/rule/apply_to_join_rule.rs index 334730bfb38b8..68ba7d77de27a 100644 --- a/src/frontend/src/optimizer/rule/apply_to_join_rule.rs +++ b/src/frontend/src/optimizer/rule/apply_to_join_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/rule/dag_to_tree_rule.rs b/src/frontend/src/optimizer/rule/dag_to_tree_rule.rs index fac195c045641..8fd773471da51 100644 --- a/src/frontend/src/optimizer/rule/dag_to_tree_rule.rs +++ b/src/frontend/src/optimizer/rule/dag_to_tree_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/rule/distinct_agg_rule.rs b/src/frontend/src/optimizer/rule/distinct_agg_rule.rs index 20e348b86ab1d..8dc0af95e87cb 100644 --- a/src/frontend/src/optimizer/rule/distinct_agg_rule.rs +++ b/src/frontend/src/optimizer/rule/distinct_agg_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -92,7 +92,6 @@ impl DistinctAggRule { column_subsets.push(subset); } - let mut num_of_subsets_for_distinct_agg = 0; distinct_aggs.iter().for_each(|agg_call| { let subset = { let mut subset = FixedBitSet::from_iter(group_keys.iter().cloned()); @@ -106,14 +105,19 @@ impl DistinctAggRule { flag_values.push(flag_value); hash_map.insert(subset.clone(), flag_value); column_subsets.push(subset); - num_of_subsets_for_distinct_agg += 1; } }); - if num_of_subsets_for_distinct_agg <= 1 { + let n_different_distinct = distinct_aggs + .iter() + .unique_by(|agg_call| agg_call.input_indices()) + .count(); + assert_ne!(n_different_distinct, 0); // since `distinct_aggs` is not empty here + if n_different_distinct == 1 { // no need to have expand if there is only one distinct aggregates. return Some((input, flag_values, false)); } + let expand = LogicalExpand::create(input, column_subsets); // manual version of column pruning for expand. let project = Self::build_project(input_schema_len, expand, group_keys, agg_calls); @@ -268,7 +272,11 @@ impl DistinctAggRule { | AggKind::Avg | AggKind::StringAgg | AggKind::ArrayAgg - | AggKind::FirstValue => (), + | AggKind::FirstValue + | AggKind::StddevPop + | AggKind::StddevSamp + | AggKind::VarPop + | AggKind::VarSamp => (), AggKind::Count => { agg_call.agg_kind = AggKind::Sum0; } diff --git a/src/frontend/src/optimizer/rule/index_delta_join_rule.rs b/src/frontend/src/optimizer/rule/index_delta_join_rule.rs index ddd1d906fca69..ed9677f68ebb5 100644 --- a/src/frontend/src/optimizer/rule/index_delta_join_rule.rs +++ b/src/frontend/src/optimizer/rule/index_delta_join_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,10 +16,10 @@ use std::rc::Rc; use itertools::Itertools; use risingwave_pb::plan_common::JoinType; +use risingwave_pb::stream_plan::ChainType; use super::super::plan_node::*; use super::{BoxedRule, Rule}; -use crate::optimizer::property::{Distribution, Order, RequiredDist}; /// Use index scan and delta joins for supported queries. pub struct IndexDeltaJoinRule {} @@ -51,11 +51,11 @@ impl Rule for IndexDeltaJoinRule { let left_indices = join.eq_join_predicate().left_eq_indexes(); let right_indices = join.eq_join_predicate().right_eq_indexes(); - fn match_indexes(join_indices: &[usize], table_scan: &StreamTableScan) -> Option { - if table_scan.logical().indexes().is_empty() { - return None; - } - + fn match_indexes( + join_indices: &[usize], + table_scan: &StreamTableScan, + chain_type: ChainType, + ) -> Option { for index in table_scan.logical().indexes() { // Only full covering index can be used in delta join if !index.full_covering() { @@ -97,31 +97,46 @@ impl Rule for IndexDeltaJoinRule { index.index_table.name.as_str(), index.index_table.table_desc().into(), p2s_mapping, + chain_type, ) .into(), ); } - None - } + // Primary table is also an index. + let primary_table = table_scan.logical(); + if let Some(primary_table_distribution_key) = primary_table.distribution_key() + && primary_table_distribution_key == join_indices { + // Check join key is prefix of primary table order key + let primary_table_order_key_prefix = primary_table.table_desc().pk.iter() + .map(|x| x.column_idx) + .take(primary_table_distribution_key.len()) + .collect_vec(); - if let Some(left) = match_indexes(&left_indices, input_left) { - if let Some(right) = match_indexes(&right_indices, input_right) { - // We already ensured that index and join use the same distribution, so we directly - // replace the children with stream index scan without inserting any exchanges. + if primary_table_order_key_prefix != join_indices { + return None; + } - fn upstream_hash_shard_to_hash_shard(plan: PlanRef) -> PlanRef { - if let Distribution::UpstreamHashShard(key, _) = plan.distribution() { - RequiredDist::hash_shard(key) - .enforce_if_not_satisfies(plan, &Order::any()) - .unwrap() - } else { - plan - } + if chain_type != table_scan.chain_type() { + Some( + StreamTableScan::new_with_chain_type(table_scan.logical().clone(), chain_type).into() + ) + } else { + Some(table_scan.clone().into()) } - let left = upstream_hash_shard_to_hash_shard(left); - let right = upstream_hash_shard_to_hash_shard(right); + } else { + None + } + } + // Delta join only needs to backfill one stream flow and others should be upstream only + // chain. Here we choose the left one to backfill and right one to upstream only + // chain. + if let Some(left) = match_indexes(&left_indices, input_left, ChainType::Backfill) { + if let Some(right) = match_indexes(&right_indices, input_right, ChainType::UpstreamOnly) + { + // We already ensured that index and join use the same distribution, so we directly + // replace the children with stream index scan without inserting any exchanges. Some( join.to_delta_join() .clone_with_left_right(left, right) @@ -137,7 +152,6 @@ impl Rule for IndexDeltaJoinRule { } impl IndexDeltaJoinRule { - #[expect(dead_code)] pub fn create() -> BoxedRule { Box::new(Self {}) } diff --git a/src/frontend/src/optimizer/rule/index_selection_rule.rs b/src/frontend/src/optimizer/rule/index_selection_rule.rs index e8ae78b0541f9..5510251dff8c0 100644 --- a/src/frontend/src/optimizer/rule/index_selection_rule.rs +++ b/src/frontend/src/optimizer/rule/index_selection_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -56,6 +56,7 @@ use risingwave_common::catalog::Schema; use risingwave_common::types::{ DataType, Decimal, IntervalUnit, NaiveDateTimeWrapper, NaiveDateWrapper, NaiveTimeWrapper, }; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_pb::plan_common::JoinType; use super::{BoxedRule, Rule}; @@ -203,7 +204,7 @@ impl IndexSelectionRule { let conjunctions = index .primary_table_pk_ref_to_index_table() .iter() - .zip_eq(index.primary_table.pk.iter()) + .zip_eq_fast(index.primary_table.pk.iter()) .map(|(x, y)| { Self::create_null_safe_equal_expr( x.index, @@ -711,6 +712,7 @@ impl<'a> TableScanIoEstimator<'a> { DataType::Interval => size_of::(), DataType::Varchar => 20, DataType::Bytea => 20, + DataType::Jsonb => 20, DataType::Struct { .. } => 20, DataType::List { .. } => 20, } diff --git a/src/frontend/src/optimizer/rule/join_commute_rule.rs b/src/frontend/src/optimizer/rule/join_commute_rule.rs index 6f788e0ba5b4b..f3195994f7e12 100644 --- a/src/frontend/src/optimizer/rule/join_commute_rule.rs +++ b/src/frontend/src/optimizer/rule/join_commute_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/rule/max_one_row_eliminate_rule.rs b/src/frontend/src/optimizer/rule/max_one_row_eliminate_rule.rs index 2ac35b4ab27af..6f14314c9be2a 100644 --- a/src/frontend/src/optimizer/rule/max_one_row_eliminate_rule.rs +++ b/src/frontend/src/optimizer/rule/max_one_row_eliminate_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/rule/merge_multijoin_rule.rs b/src/frontend/src/optimizer/rule/merge_multijoin_rule.rs index 5eddc1f0f619d..c496a906400ae 100644 --- a/src/frontend/src/optimizer/rule/merge_multijoin_rule.rs +++ b/src/frontend/src/optimizer/rule/merge_multijoin_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -37,9 +37,9 @@ impl MergeMultiJoinRule { #[cfg(test)] mod tests { - use itertools::Itertools; use risingwave_common::catalog::{Field, Schema}; use risingwave_common::types::DataType; + use risingwave_common::util::iter_util::ZipEqFast; use risingwave_pb::expr::expr_node::Type; use risingwave_pb::plan_common::JoinType; @@ -115,12 +115,11 @@ mod tests { let multijoin_builder = LogicalMultiJoinBuilder::new(join_1.into()); let multi_join = multijoin_builder.build(); - for (input, schema) in - multi_join - .inputs() - .iter() - .zip_eq(vec![mid.schema(), left.schema(), right.schema()]) - { + for (input, schema) in multi_join.inputs().iter().zip_eq_fast(vec![ + mid.schema(), + left.schema(), + right.schema(), + ]) { assert_eq!(input.schema(), schema); } diff --git a/src/frontend/src/optimizer/rule/mod.rs b/src/frontend/src/optimizer/rule/mod.rs index fdc34b8eb664c..de39488b55368 100644 --- a/src/frontend/src/optimizer/rule/mod.rs +++ b/src/frontend/src/optimizer/rule/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -81,6 +81,12 @@ mod dag_to_tree_rule; pub use dag_to_tree_rule::*; mod apply_share_eliminate_rule; pub use apply_share_eliminate_rule::*; +mod top_n_on_index_rule; +pub use top_n_on_index_rule::*; +mod stream; +pub use stream::filter_with_now_to_join_rule::*; +mod trivial_project_to_values_rule; +pub use trivial_project_to_values_rule::*; #[macro_export] macro_rules! for_all_rules { @@ -112,6 +118,9 @@ macro_rules! for_all_rules { ,{UnionMergeRule} ,{DagToTreeRule} ,{AggDedupGroupKeyRule} + ,{FilterWithNowToJoinRule} + ,{TopNOnIndexRule} + ,{TrivialProjectToValuesRule} } }; } diff --git a/src/frontend/src/optimizer/rule/over_agg_to_topn_rule.rs b/src/frontend/src/optimizer/rule/over_agg_to_topn_rule.rs index cb4c04fa5af5b..31675c9db4597 100644 --- a/src/frontend/src/optimizer/rule/over_agg_to_topn_rule.rs +++ b/src/frontend/src/optimizer/rule/over_agg_to_topn_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/rule/project_eliminate_rule.rs b/src/frontend/src/optimizer/rule/project_eliminate_rule.rs index f3dfe6156b9d8..f2fb1906ad63e 100644 --- a/src/frontend/src/optimizer/rule/project_eliminate_rule.rs +++ b/src/frontend/src/optimizer/rule/project_eliminate_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/rule/project_join_merge_rule.rs b/src/frontend/src/optimizer/rule/project_join_merge_rule.rs index 79d35147e696c..9c4b823eb7f98 100644 --- a/src/frontend/src/optimizer/rule/project_join_merge_rule.rs +++ b/src/frontend/src/optimizer/rule/project_join_merge_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/rule/project_merge_rule.rs b/src/frontend/src/optimizer/rule/project_merge_rule.rs index 061d08afdfb92..a8127022c0cbf 100644 --- a/src/frontend/src/optimizer/rule/project_merge_rule.rs +++ b/src/frontend/src/optimizer/rule/project_merge_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/rule/pull_up_correlated_predicate_rule.rs b/src/frontend/src/optimizer/rule/pull_up_correlated_predicate_rule.rs index 1cc79fdf07a10..b8b9ff4ab4c4f 100644 --- a/src/frontend/src/optimizer/rule/pull_up_correlated_predicate_rule.rs +++ b/src/frontend/src/optimizer/rule/pull_up_correlated_predicate_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/rule/push_calculation_of_join_rule.rs b/src/frontend/src/optimizer/rule/push_calculation_of_join_rule.rs index 78143b28efdce..7042d26a953d9 100644 --- a/src/frontend/src/optimizer/rule/push_calculation_of_join_rule.rs +++ b/src/frontend/src/optimizer/rule/push_calculation_of_join_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ use fixedbitset::FixedBitSet; use itertools::Itertools; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_pb::expr::expr_node::Type; use super::BoxedRule; @@ -36,11 +37,26 @@ impl Rule for PushCalculationOfJoinRule { let (left_exprs, right_exprs, indices_and_ty_of_func_calls) = Self::find_comparison_exprs(left_col_num, right_col_num, &exprs); + // Store only the expressions that need a new column in the projection + let left_exprs_non_input_ref: Vec<_> = left_exprs + .iter() + .filter(|e| e.as_input_ref().is_none()) + .cloned() + .collect(); + let right_exprs_non_input_ref: Vec<_> = right_exprs + .iter() + .filter(|e| e.as_input_ref().is_none()) + .cloned() + .collect(); + // used to shift indices of input_refs pointing the right side of `join` with // `left_exprs.len`. let mut col_index_mapping = { let map = (0..left_col_num) - .chain((left_col_num..left_col_num + right_col_num).map(|i| i + left_exprs.len())) + .chain( + (left_col_num..left_col_num + right_col_num) + .map(|i| i + left_exprs_non_input_ref.len()), + ) .map(Some) .collect_vec(); ColIndexMapping::new(map) @@ -56,21 +72,35 @@ impl Rule for PushCalculationOfJoinRule { // `left_index` and `right_index` will scan through `left_exprs` and `right_exprs` // respectively. let mut left_index = left_col_num; - let mut right_index = left_col_num + left_exprs.len() + right_col_num; + let mut right_index = left_col_num + left_exprs_non_input_ref.len() + right_col_num; + let mut right_exprs_mapping = { + let map = (0..right_col_num) + .map(|i| i + left_col_num + left_exprs_non_input_ref.len()) + .map(Some) + .collect_vec(); + ColIndexMapping::new(map) + }; // replace chosen function calls. for (((index_of_func_call, ty), left_expr), right_expr) in indices_and_ty_of_func_calls .into_iter() - .zip_eq(&left_exprs) - .zip_eq(&right_exprs) + .zip_eq_fast(&left_exprs) + .zip_eq_fast(&right_exprs) { - let left_input = InputRef::new(left_index, left_expr.return_type()); - let right_input = InputRef::new(right_index, right_expr.return_type()); - exprs[index_of_func_call] = - FunctionCall::new(ty, vec![left_input.into(), right_input.into()]) - .unwrap() - .into(); - left_index += 1; - right_index += 1; + let left_input = if left_expr.as_input_ref().is_some() { + left_expr.clone() + } else { + left_index += 1; + InputRef::new(left_index - 1, left_expr.return_type()).into() + }; + let right_input = if right_expr.as_input_ref().is_some() { + right_exprs_mapping.rewrite_expr(right_expr.clone()) + } else { + right_index += 1; + InputRef::new(right_index - 1, right_expr.return_type()).into() + }; + exprs[index_of_func_call] = FunctionCall::new(ty, vec![left_input, right_input]) + .unwrap() + .into(); } on = Condition { conjunctions: exprs, @@ -88,10 +118,12 @@ impl Rule for PushCalculationOfJoinRule { exprs.extend(appended_exprs); LogicalProject::create(input, exprs) }; - if !left_exprs.is_empty() { - // avoid unnecessary `project`s. - left = new_input(left, left_exprs); - right = new_input(right, right_exprs); + // avoid unnecessary `project`s. + if !left_exprs_non_input_ref.is_empty() { + left = new_input(left, left_exprs_non_input_ref); + } + if !right_exprs_non_input_ref.is_empty() { + right = new_input(right, right_exprs_non_input_ref); } Some(LogicalJoin::with_output_indices(left, right, join_type, on, output_indices).into()) diff --git a/src/frontend/src/optimizer/rule/reorder_multijoin_rule.rs b/src/frontend/src/optimizer/rule/reorder_multijoin_rule.rs index 2fbf23919167d..cdda30f46a7e8 100644 --- a/src/frontend/src/optimizer/rule/reorder_multijoin_rule.rs +++ b/src/frontend/src/optimizer/rule/reorder_multijoin_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -37,9 +37,10 @@ impl ReorderMultiJoinRule { #[cfg(test)] mod tests { - use itertools::Itertools; + use risingwave_common::catalog::{Field, Schema}; use risingwave_common::types::DataType; + use risingwave_common::util::iter_util::ZipEqFast; use risingwave_pb::expr::expr_node::Type; use risingwave_pb::plan_common::JoinType; @@ -122,7 +123,7 @@ mod tests { ); let multijoin_builder = LogicalMultiJoinBuilder::new(join_1.into()); let multi_join = multijoin_builder.build(); - for (input, schema) in multi_join.inputs().iter().zip_eq(vec![ + for (input, schema) in multi_join.inputs().iter().zip_eq_fast(vec![ relation_a.schema(), relation_c.schema(), relation_b.schema(), diff --git a/src/frontend/src/optimizer/rule/stream/filter_with_now_to_join_rule.rs b/src/frontend/src/optimizer/rule/stream/filter_with_now_to_join_rule.rs new file mode 100644 index 0000000000000..69d6a71f2c7a9 --- /dev/null +++ b/src/frontend/src/optimizer/rule/stream/filter_with_now_to_join_rule.rs @@ -0,0 +1,136 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_common::types::DataType; +use risingwave_pb::expr::expr_node::Type; +use risingwave_pb::plan_common::JoinType; + +use crate::expr::{try_derive_watermark, ExprRewriter, FunctionCall, InputRef}; +use crate::optimizer::plan_node::generic::GenericPlanRef; +use crate::optimizer::plan_node::{LogicalFilter, LogicalJoin, LogicalNow}; +use crate::optimizer::rule::{BoxedRule, Rule}; +use crate::optimizer::PlanRef; +use crate::utils::Condition; + +/// Convert `LogicalFilter` with now in predicate to left-semi `LogicalJoin` +/// Only applies to stream. +pub struct FilterWithNowToJoinRule {} +impl Rule for FilterWithNowToJoinRule { + fn apply(&self, plan: PlanRef) -> Option { + let filter: &LogicalFilter = plan.as_logical_filter()?; + + let lhs_len = filter.base.schema().len(); + + let mut now_filters = vec![]; + let mut remainder = vec![]; + + let mut rewriter = NowAsInputRef::new(lhs_len); + + // If the `now` is not a valid dynamic filter expression, we will not push it down. + filter.predicate().conjunctions.iter().for_each(|expr| { + if let Some((input_expr, cmp, now_expr)) = expr.as_now_comparison_cond() { + let now_expr = rewriter.rewrite_expr(now_expr); + + // as a sanity check, ensure that this expression will derive a watermark + // on the output of the now executor + debug_assert_eq!(try_derive_watermark(&now_expr), Some(lhs_len)); + + now_filters.push(FunctionCall::new(cmp, vec![input_expr, now_expr]).unwrap()); + } else { + remainder.push(expr.clone()); + } + }); + + // We want to put `input_expr >/>= now_expr` before `input_expr BoxedRule { + Box::new(FilterWithNowToJoinRule {}) + } +} + +fn rank_cmp(cmp: Type) -> u8 { + match cmp { + Type::GreaterThan | Type::GreaterThanOrEqual => 0, + Type::LessThan | Type::LessThanOrEqual => 1, + _ => 2, + } +} + +struct NowAsInputRef { + index: usize, +} +impl ExprRewriter for NowAsInputRef { + fn rewrite_function_call(&mut self, func_call: FunctionCall) -> crate::expr::ExprImpl { + let (func_type, inputs, ret) = func_call.decompose(); + let inputs = inputs + .into_iter() + .map(|expr| self.rewrite_expr(expr)) + .collect(); + match func_type { + Type::Now => InputRef { + index: self.index, + data_type: DataType::Timestamptz, + } + .into(), + _ => FunctionCall::new_unchecked(func_type, inputs, ret).into(), + } + } +} + +impl NowAsInputRef { + fn new(lhs_len: usize) -> Self { + Self { index: lhs_len } + } +} diff --git a/src/frontend/src/optimizer/rule/stream/mod.rs b/src/frontend/src/optimizer/rule/stream/mod.rs new file mode 100644 index 0000000000000..39cf0c8d1185f --- /dev/null +++ b/src/frontend/src/optimizer/rule/stream/mod.rs @@ -0,0 +1,15 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub(crate) mod filter_with_now_to_join_rule; diff --git a/src/frontend/src/optimizer/rule/top_n_on_index_rule.rs b/src/frontend/src/optimizer/rule/top_n_on_index_rule.rs new file mode 100644 index 0000000000000..f8aa3ef5290d9 --- /dev/null +++ b/src/frontend/src/optimizer/rule/top_n_on_index_rule.rs @@ -0,0 +1,157 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +use std::collections::BTreeMap; + +use risingwave_common::util::sort_util::OrderType; + +use super::{BoxedRule, Rule}; +use crate::optimizer::plan_node::{LogicalLimit, LogicalScan, LogicalTopN, PlanTreeNodeUnary}; +use crate::optimizer::property::{Direction, FieldOrder, Order}; +use crate::optimizer::PlanRef; + +pub struct TopNOnIndexRule {} + +impl Rule for TopNOnIndexRule { + fn apply(&self, plan: PlanRef) -> Option { + let logical_top_n: &LogicalTopN = plan.as_logical_top_n()?; + let logical_scan: LogicalScan = logical_top_n.input().as_logical_scan()?.to_owned(); + if !logical_scan.predicate().always_true() { + return None; + } + let order = logical_top_n.topn_order(); + if order.field_order.is_empty() { + return None; + } + let output_col_map = logical_scan + .output_col_idx() + .iter() + .cloned() + .enumerate() + .map(|(id, col)| (col, id)) + .collect::>(); + if let Some(p) = self.try_on_pk(logical_top_n, logical_scan.clone(), order, &output_col_map) + { + Some(p) + } else { + self.try_on_index(logical_top_n, logical_scan, order, &output_col_map) + } + } +} + +impl TopNOnIndexRule { + pub fn create() -> BoxedRule { + Box::new(TopNOnIndexRule {}) + } + + fn try_on_index( + &self, + logical_top_n: &LogicalTopN, + logical_scan: LogicalScan, + order: &Order, + output_col_map: &BTreeMap, + ) -> Option { + let unmatched_idx = output_col_map.len(); + let index = logical_scan.indexes().iter().find(|idx| { + let s2p_mapping = idx.secondary_to_primary_mapping(); + Order { + field_order: idx + .index_table + .pk() + .iter() + .map(|idx_item| FieldOrder { + index: *output_col_map + .get( + s2p_mapping + .get(&idx_item.index) + .expect("should be in s2p mapping"), + ) + .unwrap_or(&unmatched_idx), + direct: idx_item.direct, + }) + .collect(), + } + .satisfies(order) + })?; + + let p2s_mapping = index.primary_to_secondary_mapping(); + + let mut index_scan = if logical_scan + .required_col_idx() + .iter() + .all(|x| p2s_mapping.contains_key(x)) + { + Some(logical_scan.to_index_scan( + &index.name, + index.index_table.table_desc().into(), + p2s_mapping, + )) + } else { + None + }?; + + index_scan.set_chunk_size( + ((u32::MAX as u64).min(logical_top_n.limit() + logical_top_n.offset())) as u32, + ); + + let logical_limit = LogicalLimit::create( + index_scan.into(), + logical_top_n.limit(), + logical_top_n.offset(), + ); + Some(logical_limit) + } + + fn try_on_pk( + &self, + logical_top_n: &LogicalTopN, + mut logical_scan: LogicalScan, + order: &Order, + output_col_map: &BTreeMap, + ) -> Option { + let unmatched_idx = output_col_map.len(); + let primary_key = logical_scan.primary_key(); + let primary_key_order = Order { + field_order: primary_key + .into_iter() + .map(|op| FieldOrder { + index: *output_col_map.get(&op.column_idx).unwrap_or(&unmatched_idx), + direct: if op.order_type == OrderType::Ascending { + Direction::Asc + } else { + Direction::Desc + }, + }) + .collect::>(), + }; + if primary_key_order.satisfies(order) { + logical_scan.set_chunk_size( + ((u32::MAX as u64).min(logical_top_n.limit() + logical_top_n.offset())) as u32, + ); + let logical_limit = LogicalLimit::create( + logical_scan.into(), + logical_top_n.limit(), + logical_top_n.offset(), + ); + Some(logical_limit) + } else { + None + } + } +} diff --git a/src/frontend/src/optimizer/rule/translate_apply_rule.rs b/src/frontend/src/optimizer/rule/translate_apply_rule.rs index 3417ffbf32734..5de1fe84bbbbc 100644 --- a/src/frontend/src/optimizer/rule/translate_apply_rule.rs +++ b/src/frontend/src/optimizer/rule/translate_apply_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ use risingwave_pb::plan_common::JoinType; use super::{BoxedRule, Rule}; use crate::expr::{ExprImpl, ExprType, FunctionCall, InputRef}; +use crate::optimizer::plan_node::generic::GenericPlanRef; use crate::optimizer::plan_node::{ LogicalAgg, LogicalApply, LogicalJoin, LogicalProject, LogicalScan, LogicalShare, PlanTreeNodeBinary, PlanTreeNodeUnary, @@ -95,8 +96,13 @@ impl Rule for TranslateApplyRule { // the domain. Distinct + Project + The Left of Apply // Use Share - let logical_share = LogicalShare::new(left); - left = logical_share.into(); + left = if left.ctx().session_ctx().config().get_enable_share_plan() { + let logical_share = LogicalShare::new(left); + logical_share.into() + } else { + left + }; + let distinct = LogicalAgg::new( vec![], correlated_indices.clone().into_iter().collect_vec(), diff --git a/src/frontend/src/optimizer/rule/trivial_project_to_values_rule.rs b/src/frontend/src/optimizer/rule/trivial_project_to_values_rule.rs new file mode 100644 index 0000000000000..d57088401450b --- /dev/null +++ b/src/frontend/src/optimizer/rule/trivial_project_to_values_rule.rs @@ -0,0 +1,44 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{BoxedRule, Rule}; +use crate::optimizer::plan_node::{LogicalValues, PlanTreeNodeUnary}; +use crate::optimizer::plan_visitor::CountRows; +use crate::optimizer::{PlanRef, PlanVisitor}; + +pub struct TrivialProjectToValuesRule {} +impl Rule for TrivialProjectToValuesRule { + fn apply(&self, plan: PlanRef) -> Option { + let project = plan.as_logical_project()?; + if project.exprs().iter().all(|e| e.is_const()) { + let mut count_rows = CountRows; + count_rows.visit(project.input()).map(|count| { + LogicalValues::new( + vec![project.exprs().clone(); count], + project.schema().clone(), + project.ctx(), + ) + .into() + }) + } else { + None + } + } +} + +impl TrivialProjectToValuesRule { + pub fn create() -> BoxedRule { + Box::new(TrivialProjectToValuesRule {}) + } +} diff --git a/src/frontend/src/optimizer/rule/union_merge_rule.rs b/src/frontend/src/optimizer/rule/union_merge_rule.rs index 62e8dcc6bdfbf..169c9b72c530f 100644 --- a/src/frontend/src/optimizer/rule/union_merge_rule.rs +++ b/src/frontend/src/optimizer/rule/union_merge_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/optimizer/rule/union_to_distinct_rule.rs b/src/frontend/src/optimizer/rule/union_to_distinct_rule.rs index 40f3ba15bb541..e408d51e1db10 100644 --- a/src/frontend/src/optimizer/rule/union_to_distinct_rule.rs +++ b/src/frontend/src/optimizer/rule/union_to_distinct_rule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/planner/delete.rs b/src/frontend/src/planner/delete.rs index a4d6b9f95ec35..9814d364e628a 100644 --- a/src/frontend/src/planner/delete.rs +++ b/src/frontend/src/planner/delete.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/planner/insert.rs b/src/frontend/src/planner/insert.rs index 6c5ad7770fa8d..c7b94d34195e8 100644 --- a/src/frontend/src/planner/insert.rs +++ b/src/frontend/src/planner/insert.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/planner/mod.rs b/src/frontend/src/planner/mod.rs index 76b427581ad31..54cc5ae7f9130 100644 --- a/src/frontend/src/planner/mod.rs +++ b/src/frontend/src/planner/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/planner/query.rs b/src/frontend/src/planner/query.rs index 3e248bed0301c..7a24f3a90e624 100644 --- a/src/frontend/src/planner/query.rs +++ b/src/frontend/src/planner/query.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/planner/relation.rs b/src/frontend/src/planner/relation.rs index 8473e8db2884f..3146fd20faba7 100644 --- a/src/frontend/src/planner/relation.rs +++ b/src/frontend/src/planner/relation.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/planner/select.rs b/src/frontend/src/planner/select.rs index 5f89cf206d280..dd15678c61a56 100644 --- a/src/frontend/src/planner/select.rs +++ b/src/frontend/src/planner/select.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ use itertools::Itertools; use risingwave_common::catalog::Schema; use risingwave_common::error::{ErrorCode, Result}; use risingwave_common::types::DataType; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_expr::ExprError; use risingwave_pb::plan_common::JoinType; @@ -349,7 +350,7 @@ impl Planner { for (subquery, correlated_indices) in rewriter .subqueries .into_iter() - .zip_eq(rewriter.correlated_indices_collection) + .zip_eq_fast(rewriter.correlated_indices_collection) { let mut right = self.plan_query(subquery.query)?.into_subplan(); diff --git a/src/frontend/src/planner/set_expr.rs b/src/frontend/src/planner/set_expr.rs index c735b563d2f4f..cb32b5930b708 100644 --- a/src/frontend/src/planner/set_expr.rs +++ b/src/frontend/src/planner/set_expr.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/planner/set_operation.rs b/src/frontend/src/planner/set_operation.rs index 78ff6a384af95..85fc0a98d2fec 100644 --- a/src/frontend/src/planner/set_operation.rs +++ b/src/frontend/src/planner/set_operation.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/planner/statement.rs b/src/frontend/src/planner/statement.rs index f3f1041ab902a..5eb176b60c4f8 100644 --- a/src/frontend/src/planner/statement.rs +++ b/src/frontend/src/planner/statement.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/planner/update.rs b/src/frontend/src/planner/update.rs index 1b1828acc9d9c..2dfca750aee99 100644 --- a/src/frontend/src/planner/update.rs +++ b/src/frontend/src/planner/update.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/planner/values.rs b/src/frontend/src/planner/values.rs index 176a9d13d5034..fb33167132868 100644 --- a/src/frontend/src/planner/values.rs +++ b/src/frontend/src/planner/values.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/scheduler/distributed/mod.rs b/src/frontend/src/scheduler/distributed/mod.rs index e00676fd12429..6cfcef8f1879c 100644 --- a/src/frontend/src/scheduler/distributed/mod.rs +++ b/src/frontend/src/scheduler/distributed/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/scheduler/distributed/query.rs b/src/frontend/src/scheduler/distributed/query.rs index 91c9d7448a057..ab27821327de2 100644 --- a/src/frontend/src/scheduler/distributed/query.rs +++ b/src/frontend/src/scheduler/distributed/query.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // Licensed under the Apache License, Version 2.0 (the "License"); // // you may not use this file except in compliance with the License. @@ -14,10 +14,14 @@ use std::collections::HashMap; use std::default::Default; +use std::fmt::{Debug, Formatter}; use std::mem; use std::sync::Arc; use anyhow::anyhow; +use futures::executor::block_on; +use petgraph::dot::{Config, Dot}; +use petgraph::Graph; use pgwire::pg_server::SessionId; use risingwave_common::array::DataChunk; use risingwave_pb::batch_plan::{TaskId as TaskIdProst, TaskOutputId as TaskOutputIdProst}; @@ -151,6 +155,8 @@ impl QueryExecution { query_execution_info, }; + tracing::trace!("Starting query: {:?}", self.query.query_id); + // Not trace the error here, it will be processed in scheduler. tokio::spawn(async move { runner.run(pinned_snapshot).await }); @@ -164,6 +170,7 @@ impl QueryExecution { self.query.query_id ); + tracing::trace!("Query {:?} started.", self.query.query_id); Ok(root_stage) } _ => { @@ -222,6 +229,33 @@ impl QueryExecution { } } +impl Debug for QueryRunner { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let mut graph = Graph::::new(); + let mut stage_id_to_node_id = HashMap::new(); + for stage in &self.stage_executions { + let node_id = graph.add_node(format!("{} {}", stage.0, block_on(stage.1.state()))); + stage_id_to_node_id.insert(stage.0, node_id); + } + + for stage in &self.stage_executions { + let stage_id = stage.0; + if let Some(child_stages) = self.query.stage_graph.get_child_stages(stage_id) { + for child_stage in child_stages { + graph.add_edge( + *stage_id_to_node_id.get(stage_id).unwrap(), + *stage_id_to_node_id.get(child_stage).unwrap(), + "".to_string(), + ); + } + } + } + + // Visit https://dreampuf.github.io/GraphvizOnline/ to display the result + writeln!(f, "{}", Dot::with_config(&graph, &[Config::EdgeNoLabel])) + } +} + impl QueryRunner { async fn run(mut self, pinned_snapshot: PinnedHummockSnapshot) { // Start leaf stages. @@ -238,6 +272,8 @@ impl QueryRunner { let has_lookup_join_stage = self.query.has_lookup_join_stage(); // To convince the compiler that `pinned_snapshot` will only be dropped once. let mut pinned_snapshot_to_drop = Some(pinned_snapshot); + + let mut finished_stage_cnt = 0usize; while let Some(msg_inner) = self.msg_receiver.recv().await { match msg_inner { Stage(Scheduled(stage_id)) => { @@ -280,19 +316,29 @@ impl QueryRunner { self.query.query_id, id, reason ); - self.handle_cancel_or_failed_stage(reason).await; + self.clean_all_stages(Some(reason)).await; // One stage failed, not necessary to execute schedule stages. break; } + Stage(StageEvent::Completed(_)) => { + finished_stage_cnt += 1; + assert!(finished_stage_cnt <= self.stage_executions.len()); + if finished_stage_cnt == self.stage_executions.len() { + tracing::trace!( + "Query {:?} completed, starting to clean stage tasks.", + &self.query.query_id + ); + // Now all stages completed, we should remove all + self.clean_all_stages(None).await; + break; + } + } QueryMessage::CancelQuery => { - self.handle_cancel_or_failed_stage(SchedulerError::QueryCancelError) + self.clean_all_stages(Some(SchedulerError::QueryCancelError)) .await; // One stage failed, not necessary to execute schedule stages. break; } - rest => { - unimplemented!("unsupported message \"{:?}\" for QueryRunner.run", rest); - } } } } @@ -346,30 +392,33 @@ impl QueryRunner { /// Handle ctrl-c query or failed execution. Should stop all executions and send error to query /// result fetcher. - async fn handle_cancel_or_failed_stage(mut self, reason: SchedulerError) { - let err_str = reason.to_string(); - // Consume sender here and send error to root stage. - let root_stage_sender = mem::take(&mut self.root_stage_sender); - // It's possible we receive stage failed event message multi times and the - // sender has been consumed in first failed event. - if let Some(sender) = root_stage_sender { - if let Err(e) = sender.send(Err(reason)) { - warn!("Query execution dropped: {:?}", e); - } else { - debug!( - "Root stage failure event for {:?} sent.", - self.query.query_id - ); + async fn clean_all_stages(&mut self, error: Option) { + let error_msg = error.as_ref().map(|e| e.to_string()); + if let Some(reason) = error { + // Consume sender here and send error to root stage. + let root_stage_sender = mem::take(&mut self.root_stage_sender); + // It's possible we receive stage failed event message multi times and the + // sender has been consumed in first failed event. + if let Some(sender) = root_stage_sender { + if let Err(e) = sender.send(Err(reason)) { + warn!("Query execution dropped: {:?}", e); + } else { + debug!( + "Root stage failure event for {:?} sent.", + self.query.query_id + ); + } } - } - // If root stage has been taken (None), then root stage is responsible for send error to - // Query Result Fetcher. + // If root stage has been taken (None), then root stage is responsible for send error to + // Query Result Fetcher. + } + tracing::trace!("Cleaning stages in query [{:?}]", self.query.query_id); // Stop all running stages. for stage_execution in self.stage_executions.values() { // The stop is return immediately so no need to spawn tasks. - stage_execution.stop(err_str.clone()).await; + stage_execution.stop(error_msg.clone()).await; } } } @@ -380,8 +429,10 @@ pub(crate) mod tests { use std::rc::Rc; use std::sync::{Arc, RwLock}; + use fixedbitset::FixedBitSet; use risingwave_common::catalog::{ColumnDesc, TableDesc}; use risingwave_common::constants::hummock::TABLE_OPTION_DUMMY_RETENTION_SECOND; + use risingwave_common::hash::ParallelUnitMapping; use risingwave_common::types::DataType; use risingwave_pb::common::{HostAddress, ParallelUnit, WorkerNode, WorkerType}; use risingwave_pb::plan_common::JoinType; @@ -481,6 +532,7 @@ pub(crate) mod tests { retention_seconds: TABLE_OPTION_DUMMY_RETENTION_SECOND, value_indices: vec![0, 1, 2], read_prefix_len_hint: 0, + watermark_columns: FixedBitSet::with_capacity(3), }), vec![], ctx, @@ -545,7 +597,7 @@ pub(crate) mod tests { ), ) .into(); - let batch_exchange_node3: PlanRef = BatchExchange::new( + let batch_exchange_node: PlanRef = BatchExchange::new( hash_join_node.clone(), Order::default(), Distribution::Single, @@ -584,13 +636,18 @@ pub(crate) mod tests { }; let workers = vec![worker1, worker2, worker3]; let worker_node_manager = Arc::new(WorkerNodeManager::mock(workers)); - worker_node_manager.insert_fragment_mapping(0, vec![]); + worker_node_manager.insert_fragment_mapping(0, ParallelUnitMapping::new_single(0)); let catalog = Arc::new(parking_lot::RwLock::new(Catalog::default())); catalog.write().insert_table_id_mapping(table_id, 0); let catalog_reader = CatalogReader::new(catalog); // Break the plan node into fragments. - let fragmenter = BatchPlanFragmenter::new(worker_node_manager, catalog_reader); - fragmenter.split(batch_exchange_node3.clone()).unwrap() + let fragmenter = BatchPlanFragmenter::new( + worker_node_manager, + catalog_reader, + batch_exchange_node.clone(), + ) + .unwrap(); + fragmenter.generate_complete_query().await.unwrap() } fn generate_parallel_units(start_id: u32, node_id: u32) -> Vec { diff --git a/src/frontend/src/scheduler/distributed/query_manager.rs b/src/frontend/src/scheduler/distributed/query_manager.rs index 711b6d3ead70d..5efa36fbad71f 100644 --- a/src/frontend/src/scheduler/distributed/query_manager.rs +++ b/src/frontend/src/scheduler/distributed/query_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/scheduler/distributed/stage.rs b/src/frontend/src/scheduler/distributed/stage.rs index 3ab9311504643..676bae1e709bd 100644 --- a/src/frontend/src/scheduler/distributed/stage.rs +++ b/src/frontend/src/scheduler/distributed/stage.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::assert_matches::assert_matches; use std::cell::RefCell; use std::collections::HashMap; use std::mem; @@ -20,6 +21,7 @@ use std::sync::Arc; use anyhow::anyhow; use arc_swap::ArcSwap; +use futures::stream::Fuse; use futures::{stream, StreamExt}; use futures_async_stream::for_await; use itertools::Itertools; @@ -27,8 +29,9 @@ use rand::seq::SliceRandom; use risingwave_batch::executor::ExecutorBuilder; use risingwave_batch::task::TaskId as TaskIdBatch; use risingwave_common::array::DataChunk; -use risingwave_common::hash::VnodeMapping; +use risingwave_common::hash::ParallelUnitMapping; use risingwave_common::util::addr::HostAddr; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::select_all; use risingwave_connector::source::SplitMetaData; use risingwave_pb::batch_plan::plan_node::NodeBody; @@ -76,9 +79,11 @@ enum StageState { Failed, } +#[derive(Debug)] enum StageMessage { - /// Contains the reason why need to stop (e.g. Execution failure). - Stop(String), + /// Contains the reason why need to stop (e.g. Execution failure). The message is `None` if + /// it's normal stop. + Stop(Option), } #[derive(Debug)] @@ -90,6 +95,7 @@ pub enum StageEvent { id: StageId, reason: SchedulerError, }, + /// All tasks in stage finished. Completed(StageId), } @@ -167,7 +173,7 @@ impl StageExecution { catalog_reader: CatalogReader, ctx: ExecutionContextRef, ) -> Self { - let tasks = (0..stage.parallelism) + let tasks = (0..stage.parallelism.unwrap()) .map(|task_id| (task_id, TaskStatusHolder::new(task_id))) .collect(); Self { @@ -189,7 +195,7 @@ impl StageExecution { let mut s = self.state.write().await; let cur_state = mem::replace(&mut *s, StageState::Failed); match cur_state { - StageState::Pending { msg_sender } => { + Pending { msg_sender } => { let runner = StageRunner { epoch: self.epoch.clone(), stage: self.stage.clone(), @@ -213,6 +219,11 @@ impl StageExecution { *s = StageState::Started; spawn(async move { runner.run(receiver).await }); + tracing::trace!( + "Stage {:?}-{:?} started.", + self.stage.query_id.id, + self.stage.id + ) } _ => { unreachable!("Only expect to schedule stage once"); @@ -220,20 +231,25 @@ impl StageExecution { } } - pub async fn stop(&self, err_str: String) { + pub async fn stop(&self, error: Option) { // Send message to tell Stage Runner stop. if let Some(shutdown_tx) = self.shutdown_tx.write().await.take() { // It's possible that the stage has not been scheduled, so the channel sender is // None. - if shutdown_tx.send(StageMessage::Stop(err_str)).is_err() { + if shutdown_tx.send(StageMessage::Stop(error)).is_err() { // The stage runner handle has already closed. so do no-op. + tracing::trace!( + "Failed to send stop message stage: {:?}-{:?}", + self.stage.query_id, + self.stage.id + ); } } } pub async fn is_scheduled(&self) -> bool { let s = self.state.read().await; - matches!(*s, StageState::Running { .. }) + matches!(*s, StageState::Running { .. } | StageState::Completed) } pub async fn is_pending(&self) -> bool { @@ -241,6 +257,17 @@ impl StageExecution { matches!(*s, StageState::Pending { .. }) } + pub async fn state(&self) -> &'static str { + let s = self.state.read().await; + match *s { + Pending { .. } => "Pending", + StageState::Started => "Started", + StageState::Running => "Running", + StageState::Completed => "Completed", + StageState::Failed => "Failed", + } + } + pub fn get_task_status_unchecked(&self, task_id: TaskId) -> Arc { self.tasks[&task_id].get_status() } @@ -315,7 +342,7 @@ impl StageRunner { for (i, (parallel_unit_id, worker)) in parallel_unit_ids .into_iter() - .zip_eq(workers.into_iter()) + .zip_eq_fast(workers.into_iter()) .enumerate() { let task_id = TaskIdProst { @@ -328,7 +355,7 @@ impl StageRunner { futures.push(self.schedule_task(task_id, plan_fragment, Some(worker))); } } else if let Some(source_info) = self.stage.source_info.as_ref() { - for (id, split) in source_info.split_info().iter().enumerate() { + for (id, split) in source_info.split_info().unwrap().iter().enumerate() { let task_id = TaskIdProst { query_id: self.stage.query_id.id.clone(), stage_id: self.stage.id, @@ -340,7 +367,7 @@ impl StageRunner { } } else { - for id in 0..self.stage.parallelism { + for id in 0..self.stage.parallelism.unwrap() { let task_id = TaskIdProst { query_id: self.stage.query_id.id.clone(), stage_id: self.stage.id, @@ -360,87 +387,106 @@ impl StageRunner { } // Merge different task streams into a single stream. - let mut all_streams = select_all(buffered_streams); + let mut all_streams = select_all(buffered_streams).take_until(shutdown_rx); // Process the stream until finished. let mut running_task_cnt = 0; let mut finished_task_cnt = 0; let mut sent_signal_to_next = false; - let mut shutdown_rx = shutdown_rx; - // This loop will stops once receive a stop message, otherwise keep processing status - // message. - loop { - tokio::select! { - biased; - _ = &mut shutdown_rx => { - // Received shutdown signal from query runner, should send abort RPC to all CNs. - // change state to aborted. Note that the task cancel can only happen after schedule all these tasks to CN. - // This can be an optimization for future: How to stop before schedule tasks. - self.abort_all_running_tasks().await?; - break; + + while let Some(status_res_inner) = all_streams.next().await { + // The status can be Running, Finished, Failed etc. This stream contains status from + // different tasks. + let status = status_res_inner.map_err(SchedulerError::from)?; + // Note: For Task execution failure, it now becomes a Rpc Error and will return here. + // Do not process this as task status like Running/Finished/ etc. + + use risingwave_pb::task_service::task_info::TaskStatus as TaskStatusProst; + match TaskStatusProst::from_i32(status.task_info.as_ref().unwrap().task_status).unwrap() + { + TaskStatusProst::Running => { + running_task_cnt += 1; + // The task running count should always less or equal than the registered tasks + // number. + assert!(running_task_cnt <= self.tasks.keys().len()); + // All tasks in this stage have been scheduled. Notify query runner to schedule + // next stage. + if running_task_cnt == self.tasks.keys().len() { + self.notify_stage_scheduled(QueryMessage::Stage(StageEvent::Scheduled( + self.stage.id, + ))) + .await; + sent_signal_to_next = true; + } } - status_res = all_streams.next() => { - if let Some(stauts_res_inner) = status_res { - // The status can be Running, Finished, Failed etc. This stream contains status from - // different tasks. - let status = stauts_res_inner.map_err(SchedulerError::from)?; - // Note: For Task execution failure, it now becomes a Rpc Error and will return here. - // Do not process this as task status like Running/Finished/ etc. - - use risingwave_pb::task_service::task_info::TaskStatus as TaskStatusProst; - match TaskStatusProst::from_i32(status.task_info.as_ref().unwrap().task_status).unwrap() { - TaskStatusProst::Running => { - running_task_cnt += 1; - // The task running count should always less or equal than the registered tasks - // number. - assert!(running_task_cnt <= self.tasks.keys().len()); - // All tasks in this stage have been scheduled. Notify query runner to schedule next - // stage. - if running_task_cnt == self.tasks.keys().len() { - self.notify_schedule_next_stage().await; - sent_signal_to_next = true; - } - } - - TaskStatusProst::Finished => { - finished_task_cnt += 1; - assert!(finished_task_cnt <= self.tasks.keys().len()); - if finished_task_cnt == self.tasks.keys().len() { - assert!(sent_signal_to_next); - // All tasks finished without failure, just break this loop and return Ok. - break; - } - } - - TaskStatusProst::Aborted => { - // Unspecified means some channel has send error. - // Aborted means some other tasks failed, so return Ok. - break; - } - - TaskStatusProst::Unspecified => { - // Unspecified means some channel has send error or there is a limit operator in parent stage. - warn!("received Unspecified task status may due to task execution got channel sender error"); - } - - status => { - // The remain possible variant is Failed, but now they won't be pushed from CN. - unimplemented!("Unexpected task status {:?}", status); - } - } - } else { - // After processing all stream status, we must have sent signal (Either Scheduled or - // Failed) to Query Runner. If this is not true, query runner will stuck cuz it do not receive any signals. - if !sent_signal_to_next { - // For now, this kind of situation may come from recovery test: CN may get killed before reporting status, so sent signal flag is not set yet. - // In this case, batch query is expected to fail. Client in simulation test should retry this query (w/o kill nodes). - return Err(TaskExecutionError("compute node lose connection before response".to_string())); - } - break; + + TaskStatusProst::Finished => { + finished_task_cnt += 1; + assert!(finished_task_cnt <= self.tasks.keys().len()); + assert!(running_task_cnt >= finished_task_cnt); + if finished_task_cnt == self.tasks.keys().len() { + // All tasks finished without failure, we should not break + // this loop + self.notify_stage_completed().await; + sent_signal_to_next = true; + break; } } + + status => { + // The remain possible variant is Failed, but now they won't be pushed from CN. + unreachable!("Unexpected task status {:?}", status); + } + } + } + + tracing::trace!( + "Stage [{:?}-{:?}], running task count: {}, finished task count: {}", + self.stage.query_id, + self.stage.id, + running_task_cnt, + finished_task_cnt + ); + + if let Some(shutdown) = all_streams.take_future() { + // After processing all stream status, we must have sent signal (Either Scheduled or + // Failed) to Query Runner. If this is not true, query runner will stuck cuz it do + // not receive any signals. + if !sent_signal_to_next { + // For now, this kind of situation may come from recovery test: CN may get + // killed before reporting status, so sent signal flag is not set yet. + // In this case, batch query is expected to fail. Client in simulation test + // should retry this query (w/o kill nodes). + return Err(TaskExecutionError( + "Compute node lost connection before finishing responding".to_string(), + )); } + tracing::trace!( + "Stage [{:?}-{:?}] waiting for stopping signal.", + self.stage.query_id, + self.stage.id + ); + // Waiting for shutdown signal. + shutdown.await.expect("Sender should not exited."); } + + // Received shutdown signal from query runner, should send abort RPC to all CNs. + // change state to aborted. Note that the task cancel can only happen after schedule + // all these tasks to CN. This can be an optimization for future: + // How to stop before schedule tasks. + tracing::trace!( + "Stopping stage: {:?}-{:?}, task_num: {}", + self.stage.query_id, + self.stage.id, + self.tasks.len() + ); + self.abort_all_scheduled_tasks().await?; + + tracing::trace!( + "Stage runner [{:?}-{:?}] existed. ", + self.stage.query_id, + self.stage.id + ); Ok(()) } @@ -461,7 +507,7 @@ impl StageRunner { // Notify QueryRunner to poll chunk from result_rx. let (result_tx, result_rx) = tokio::sync::mpsc::channel(100); - self.send_event(QueryMessage::Stage(StageEvent::ScheduledRoot(result_rx))) + self.notify_stage_scheduled(QueryMessage::Stage(StageEvent::ScheduledRoot(result_rx))) .await; let executor = ExecutorBuilder::new( @@ -486,7 +532,7 @@ impl StageRunner { warn!("Root executor has been dropped before receive any events so the send is failed"); } // Different from below, return this function and report error. - return Err(SchedulerError::TaskExecutionError(err_str)); + return Err(TaskExecutionError(err_str)); } else { // Same for below. if let Err(_e) = result_tx.send(chunk.map_err(|e| e.into())).await { @@ -496,19 +542,30 @@ impl StageRunner { } if let Some(err) = terminated_chunk_stream.take_result() { - let stage_message = err.expect("Sender should always exist!"); + let stage_message = err.expect("The sender should always exist!"); // Terminated by other tasks execution error, so no need to return error here. match stage_message { - StageMessage::Stop(err_str) => { + StageMessage::Stop(Some(err_str)) => { // Tell Query Result Fetcher to stop polling and attach failure reason as str. if let Err(_e) = result_tx.send(Err(TaskExecutionError(err_str))).await { warn!("Send task execution failed"); } } + StageMessage::Stop(None) => { + unreachable!() + } } + } else { + self.notify_stage_completed().await; } + tracing::trace!( + "Stage runner [{:?}-{:?}] existed. ", + self.stage.query_id, + self.stage.id + ); + Ok(()) } @@ -517,7 +574,7 @@ impl StageRunner { shutdown_rx: oneshot::Receiver, ) -> SchedulerResult<()> { // If root, we execute it locally. - if self.stage.id != 0 { + if !self.is_root_stage() { self.schedule_tasks(shutdown_rx).await?; } else { self.schedule_tasks_for_root(shutdown_rx).await?; @@ -526,7 +583,7 @@ impl StageRunner { } #[inline(always)] - fn get_vnode_mapping(&self, table_id: &TableId) -> Option { + fn get_vnode_mapping(&self, table_id: &TableId) -> Option { self.catalog_reader .read_guard() .get_table_by_id(table_id) @@ -563,11 +620,8 @@ impl StageRunner { .unwrap() .table_id, )) - .unwrap_or_default() - .iter() - .copied() - .sorted() - .dedup() + .unwrap() + .iter_unique() .collect_vec(); let pu = id2pu_vec[task_id as usize]; @@ -582,9 +636,8 @@ impl StageRunner { }; let worker_node = match vnode_mapping { - Some(parallel_unit_ids) => { - let parallel_unit_ids = - parallel_unit_ids.into_iter().sorted().dedup().collect_vec(); + Some(mapping) => { + let parallel_unit_ids = mapping.iter_unique().collect_vec(); let candidates = self .worker_node_manager .get_workers_by_parallel_unit_ids(¶llel_unit_ids)?; @@ -613,40 +666,56 @@ impl StageRunner { } /// Write message into channel to notify query runner current stage have been scheduled. - async fn notify_schedule_next_stage(&self) { - // If all tasks of this stage is scheduled, tell the query manager to schedule next. + async fn notify_stage_scheduled(&self, msg: QueryMessage) { + self.notify_stage_state_changed( + |old_state| { + assert_matches!(old_state, StageState::Started); + StageState::Running + }, + msg, + ) + .await + } + + /// Notify query execution that this stage completed. + async fn notify_stage_completed(&self) { + self.notify_stage_state_changed( + |old_state| { + assert_matches!(old_state, StageState::Running); + StageState::Completed + }, + QueryMessage::Stage(StageEvent::Completed(self.stage.id)), + ) + .await + } + + async fn notify_stage_state_changed(&self, new_state: F, msg: QueryMessage) + where + F: FnOnce(StageState) -> StageState, + { { - // Changing state let mut s = self.state.write().await; - let state = mem::replace(&mut *s, StageState::Failed); - match state { - StageState::Started => { - *s = StageState::Running; - } - _ => unreachable!( - "The state can not be {:?} for query-{:?}-{:?} to do notify ", - state, self.stage.query_id.id, self.stage.id - ), - } + let old_state = mem::replace(&mut *s, StageState::Failed); + *s = new_state(old_state); } - self.send_event(QueryMessage::Stage(StageEvent::Scheduled(self.stage.id))) - .await; + + self.send_event(msg).await; } /// Abort all registered tasks. Note that here we do not care which part of tasks has already /// failed or completed, cuz the abort task will not fail if the task has already die. /// See PR (#4560). - async fn abort_all_running_tasks(&self) -> SchedulerResult<()> { + async fn abort_all_scheduled_tasks(&self) -> SchedulerResult<()> { // Set state to failed. - { - let mut state = self.state.write().await; - // Ignore if already finished. - if let &StageState::Completed = &*state { - return Ok(()); - } - // FIXME: Be careful for state jump back. - *state = StageState::Failed - } + // { + // let mut state = self.state.write().await; + // // Ignore if already finished. + // if let &StageState::Completed = &*state { + // return Ok(()); + // } + // // FIXME: Be careful for state jump back. + // *state = StageState::Failed + // } for (task, task_status) in self.tasks.iter() { // 1. Collect task info and client. @@ -662,7 +731,7 @@ impl StageRunner { let query_id = self.stage.query_id.id.clone(); let stage_id = self.stage.id; let task_id = *task; - tokio::spawn(async move { + spawn(async move { if let Err(e) = client .abort(AbortTaskRequest { task_id: Some(risingwave_pb::batch_plan::TaskId { @@ -688,7 +757,7 @@ impl StageRunner { task_id: TaskIdProst, plan_fragment: PlanFragment, worker: Option, - ) -> SchedulerResult> { + ) -> SchedulerResult>> { let worker_node_addr = worker .unwrap_or(self.worker_node_manager.next_random()?) .host @@ -704,7 +773,8 @@ impl StageRunner { let stream_status = compute_client .create_task(task_id, plan_fragment, self.epoch.clone()) .await - .map_err(|e| anyhow!(e))?; + .map_err(|e| anyhow!(e))? + .fuse(); self.tasks[&t_id].inner.store(Arc::new(TaskStatus { _task_id: t_id, @@ -724,7 +794,7 @@ impl StageRunner { let plan_node_prost = self.convert_plan_node(&self.stage.root, task_id, partition, identity_id); - let exchange_info = self.stage.exchange_info.clone(); + let exchange_info = self.stage.exchange_info.clone().unwrap(); PlanFragment { root: Some(plan_node_prost), @@ -832,6 +902,10 @@ impl StageRunner { } } } + + fn is_root_stage(&self) -> bool { + self.stage.id == 0 + } } impl TaskStatus { diff --git a/src/frontend/src/scheduler/error.rs b/src/frontend/src/scheduler/error.rs index bde9a524a46c7..de08881be829a 100644 --- a/src/frontend/src/scheduler/error.rs +++ b/src/frontend/src/scheduler/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/scheduler/hummock_snapshot_manager.rs b/src/frontend/src/scheduler/hummock_snapshot_manager.rs index bfdff9b0d1cea..9f5782dff1569 100644 --- a/src/frontend/src/scheduler/hummock_snapshot_manager.rs +++ b/src/frontend/src/scheduler/hummock_snapshot_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/scheduler/local.rs b/src/frontend/src/scheduler/local.rs index 8ffe116dc74ea..cb7afd3b89365 100644 --- a/src/frontend/src/scheduler/local.rs +++ b/src/frontend/src/scheduler/local.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,16 +16,17 @@ use std::collections::HashMap; use std::sync::Arc; +use anyhow::Context; use futures::executor::block_on; use futures::StreamExt; use futures_async_stream::try_stream; -use itertools::Itertools; use pgwire::pg_server::BoxedError; use risingwave_batch::executor::{BoxedDataChunkStream, ExecutorBuilder}; use risingwave_batch::task::TaskId; use risingwave_common::array::DataChunk; use risingwave_common::bail; use risingwave_common::error::RwError; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::stream_cancel::{cancellable_stream, Tripwire}; use risingwave_connector::source::SplitMetaData; use risingwave_pb::batch_plan::exchange_info::DistributionMode; @@ -154,7 +155,7 @@ impl LocalQueryExecution { fn create_plan_fragment(&self) -> SchedulerResult { let root_stage_id = self.query.root_stage_id(); let root_stage = self.query.stage_graph.stages.get(&root_stage_id).unwrap(); - assert_eq!(root_stage.parallelism, 1); + assert_eq!(root_stage.parallelism.unwrap(), 1); let second_stage_id = self.query.stage_graph.get_child_stages(&root_stage_id); let plan_node_prost = match second_stage_id { None => { @@ -235,7 +236,7 @@ impl LocalQueryExecution { let workers = self.front_env.worker_node_manager().get_workers_by_parallel_unit_ids(¶llel_unit_ids)?; for (idx, (worker_node, partition)) in - (workers.into_iter().zip_eq(vnode_bitmaps.into_iter())).enumerate() + (workers.into_iter().zip_eq_fast(vnode_bitmaps.into_iter())).enumerate() { let second_stage_plan_node = self.convert_plan_node( &second_stage.root, @@ -268,7 +269,7 @@ impl LocalQueryExecution { sources.push(exchange_source); } } else if let Some(source_info) = &second_stage.source_info { - for (id,split) in source_info.split_info().iter().enumerate() { + for (id,split) in source_info.split_info().unwrap().iter().enumerate() { let second_stage_plan_node = self.convert_plan_node( &second_stage.root, &mut None, @@ -318,7 +319,7 @@ impl LocalQueryExecution { epoch: Some(self.snapshot.get_batch_query_epoch()), }; - let workers = if second_stage.parallelism == 1 { + let workers = if second_stage.parallelism.unwrap() == 1 { vec![self.front_env.worker_node_manager().next_random()?] } else { self.front_env.worker_node_manager().list_worker_nodes() @@ -404,19 +405,20 @@ impl LocalQueryExecution { .inner_side_table_desc .as_ref() .expect("no side table desc"); - node.inner_side_vnode_mapping = self + let table = self .front_env .catalog_reader() .read_guard() .get_table_by_id(&side_table_desc.table_id.into()) - .map(|table| { - self.front_env - .worker_node_manager() - .get_fragment_mapping(&table.fragment_id) - }) - .ok() - .flatten() - .unwrap_or_default(); + .context("side table not found")?; + let mapping = self + .front_env + .worker_node_manager() + .get_fragment_mapping(&table.fragment_id) + .context("fragment mapping not found")?; + + // TODO: should we use `pb::ParallelUnitMapping` here? + node.inner_side_vnode_mapping = mapping.to_expanded(); node.worker_nodes = self.front_env.worker_node_manager().list_worker_nodes(); } diff --git a/src/frontend/src/scheduler/mod.rs b/src/frontend/src/scheduler/mod.rs index efe548392b366..0fbf1a2b2f368 100644 --- a/src/frontend/src/scheduler/mod.rs +++ b/src/frontend/src/scheduler/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -34,6 +34,7 @@ pub use local::*; use crate::scheduler::task_context::FrontendBatchTaskContext; mod error; +pub mod streaming_manager; mod task_context; pub mod worker_node_manager; diff --git a/src/frontend/src/scheduler/plan_fragmenter.rs b/src/frontend/src/scheduler/plan_fragmenter.rs index a190f9b2e0dfb..265a88ae558cb 100644 --- a/src/frontend/src/scheduler/plan_fragmenter.rs +++ b/src/frontend/src/scheduler/plan_fragmenter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,13 +17,13 @@ use std::fmt::{Debug, Formatter}; use std::sync::Arc; use anyhow::anyhow; +use async_recursion::async_recursion; use enum_as_inner::EnumAsInner; -use futures::executor::block_on; use itertools::Itertools; use risingwave_common::buffer::{Bitmap, BitmapBuilder}; use risingwave_common::catalog::TableDesc; use risingwave_common::error::RwError; -use risingwave_common::hash::{ParallelUnitId, VirtualNode, VnodeMapping}; +use risingwave_common::hash::{ParallelUnitId, ParallelUnitMapping, VirtualNode}; use risingwave_common::util::scan_range::ScanRange; use risingwave_connector::source::{ConnectorProperties, SplitEnumeratorImpl, SplitImpl}; use risingwave_pb::batch_plan::plan_node::NodeBody; @@ -116,10 +116,12 @@ impl ExecutionPlanNode { /// `BatchPlanFragmenter` splits a query plan into fragments. pub struct BatchPlanFragmenter { query_id: QueryId, - stage_graph_builder: StageGraphBuilder, next_stage_id: StageId, worker_node_manager: WorkerNodeManagerRef, catalog_reader: CatalogReader, + + stage_graph_builder: Option, + stage_graph: Option, } impl Default for QueryId { @@ -131,14 +133,36 @@ impl Default for QueryId { } impl BatchPlanFragmenter { - pub fn new(worker_node_manager: WorkerNodeManagerRef, catalog_reader: CatalogReader) -> Self { - Self { + pub fn new( + worker_node_manager: WorkerNodeManagerRef, + catalog_reader: CatalogReader, + batch_node: PlanRef, + ) -> SchedulerResult { + let mut plan_fragmenter = Self { query_id: Default::default(), - stage_graph_builder: StageGraphBuilder::new(), + stage_graph_builder: Some(StageGraphBuilder::new()), next_stage_id: 0, worker_node_manager, catalog_reader, - } + stage_graph: None, + }; + plan_fragmenter.split_into_stage(batch_node)?; + Ok(plan_fragmenter) + } + + /// Split the plan node into each stages, based on exchange node. + fn split_into_stage(&mut self, batch_node: PlanRef) -> SchedulerResult<()> { + let root_stage = self.new_stage( + batch_node, + Some(Distribution::Single.to_prost(1, &self.catalog_reader, &self.worker_node_manager)), + )?; + self.stage_graph = Some( + self.stage_graph_builder + .take() + .unwrap() + .build(root_stage.id), + ); + Ok(()) } } @@ -200,19 +224,57 @@ impl Query { } } +#[derive(Debug, Clone)] +pub struct SourceFetchInfo { + pub connector: ConnectorProperties, + pub timebound: (Option, Option), +} + #[derive(Clone, Debug)] -pub struct SourceScanInfo { +pub enum SourceScanInfo { /// Split Info - split_info: Vec, + Incomplete(SourceFetchInfo), + Complete(Vec), } impl SourceScanInfo { - pub fn new(split_info: Vec) -> Self { - Self { split_info } + pub fn new(fetch_info: SourceFetchInfo) -> Self { + Self::Incomplete(fetch_info) + } + + pub async fn complete(self) -> SchedulerResult { + let fetch_info = match self { + SourceScanInfo::Incomplete(fetch_info) => fetch_info, + SourceScanInfo::Complete(_) => { + unreachable!("Never call complete when SourceScanInfo is already complete") + } + }; + let mut enumerator = SplitEnumeratorImpl::create(fetch_info.connector).await?; + let kafka_enumerator = match enumerator { + SplitEnumeratorImpl::Kafka(ref mut kafka_enumerator) => kafka_enumerator, + _ => { + return Err(SchedulerError::Internal(anyhow!( + "Unsupported to query directly from this source" + ))) + } + }; + let split_info = kafka_enumerator + .list_splits_batch(fetch_info.timebound.0, fetch_info.timebound.1) + .await? + .into_iter() + .map(SplitImpl::Kafka) + .collect_vec(); + + Ok(SourceScanInfo::Complete(split_info)) } - pub fn split_info(&self) -> &Vec { - &self.split_info + pub fn split_info(&self) -> SchedulerResult<&Vec> { + match self { + Self::Incomplete(_) => Err(SchedulerError::Internal(anyhow!( + "Should not get split info from incomplete source scan info" + ))), + Self::Complete(split_info) => Ok(split_info), + } } } @@ -270,16 +332,20 @@ pub enum PartitionInfo { } /// Fragment part of `Query`. +#[derive(Clone)] pub struct QueryStage { pub query_id: QueryId, pub id: StageId, pub root: Arc, - pub exchange_info: ExchangeInfo, - pub parallelism: u32, + pub exchange_info: Option, + pub parallelism: Option, /// Indicates whether this stage contains a table scan node and the table's information if so. pub table_scan_info: Option, pub source_info: Option, pub has_lookup_join: bool, + + /// Used to generage exchange information when complete source scan information. + children_exhchange_distribution: Option>, } impl QueryStage { @@ -295,6 +361,48 @@ impl QueryStage { pub fn has_lookup_join(&self) -> bool { self.has_lookup_join } + + pub fn clone_with_exchange_info(&self, exchange_info: Option) -> Self { + if let Some(exchange_info) = exchange_info { + return Self { + query_id: self.query_id.clone(), + id: self.id, + root: self.root.clone(), + exchange_info: Some(exchange_info), + parallelism: self.parallelism, + table_scan_info: self.table_scan_info.clone(), + source_info: self.source_info.clone(), + has_lookup_join: self.has_lookup_join, + children_exhchange_distribution: self.children_exhchange_distribution.clone(), + }; + } + self.clone() + } + + pub fn clone_with_exchange_info_and_complete_source_info( + &self, + exchange_info: Option, + source_info: SourceScanInfo, + ) -> Self { + assert!(matches!(source_info, SourceScanInfo::Complete(_))); + let exchange_info = if let Some(exchange_info) = exchange_info { + Some(exchange_info) + } else { + self.exchange_info.clone() + }; + + Self { + query_id: self.query_id.clone(), + id: self.id, + root: self.root.clone(), + exchange_info, + parallelism: Some(source_info.split_info().unwrap().len() as u32), + table_scan_info: self.table_scan_info.clone(), + source_info: Some(source_info), + has_lookup_join: self.has_lookup_join, + children_exhchange_distribution: None, + } + } } impl Debug for QueryStage { @@ -327,22 +435,24 @@ struct QueryStageBuilder { query_id: QueryId, id: StageId, root: Option>, - parallelism: u32, - exchange_info: ExchangeInfo, + parallelism: Option, + exchange_info: Option, children_stages: Vec, /// See also [`QueryStage::table_scan_info`]. table_scan_info: Option, source_info: Option, has_lookup_join: bool, + + children_exhchange_distribution: HashMap, } impl QueryStageBuilder { fn new( id: StageId, query_id: QueryId, - parallelism: u32, - exchange_info: ExchangeInfo, + parallelism: Option, + exchange_info: Option, table_scan_info: Option, source_info: Option, has_lookup_join: bool, @@ -357,10 +467,16 @@ impl QueryStageBuilder { table_scan_info, source_info, has_lookup_join, + children_exhchange_distribution: HashMap::new(), } } fn finish(self, stage_graph_builder: &mut StageGraphBuilder) -> QueryStageRef { + let children_exhchange_distribution = if self.parallelism.is_none() { + Some(self.children_exhchange_distribution) + } else { + None + }; let stage = Arc::new(QueryStage { query_id: self.query_id, id: self.id, @@ -370,6 +486,7 @@ impl QueryStageBuilder { table_scan_info: self.table_scan_info, source_info: self.source_info, has_lookup_join: self.has_lookup_join, + children_exhchange_distribution, }); stage_graph_builder.add_node(stage.clone()); @@ -418,6 +535,95 @@ impl StageGraph { ret.into_iter().rev() } + + async fn complete( + self, + catalog_reader: &CatalogReader, + worker_node_manager: &WorkerNodeManagerRef, + ) -> SchedulerResult { + let mut complete_stages = HashMap::new(); + self.complete_stage( + self.stages.get(&self.root_stage_id).unwrap().clone(), + None, + &mut complete_stages, + catalog_reader, + worker_node_manager, + ) + .await?; + Ok(StageGraph { + root_stage_id: self.root_stage_id, + stages: complete_stages, + child_edges: self.child_edges, + parent_edges: self.parent_edges, + }) + } + + #[async_recursion] + async fn complete_stage( + &self, + stage: QueryStageRef, + exchange_info: Option, + complete_stages: &mut HashMap, + catalog_reader: &CatalogReader, + worker_node_manager: &WorkerNodeManagerRef, + ) -> SchedulerResult<()> { + let parallelism = if stage.parallelism.is_some() { + // If the stage has parallelism, it means it's a complete stage. + complete_stages.insert( + stage.id, + Arc::new(stage.clone_with_exchange_info(exchange_info)), + ); + None + } else { + assert!(matches!( + stage.source_info, + Some(SourceScanInfo::Incomplete(_)) + )); + let complete_source_info = stage + .source_info + .as_ref() + .unwrap() + .clone() + .complete() + .await?; + + let complete_stage = Arc::new(stage.clone_with_exchange_info_and_complete_source_info( + exchange_info, + complete_source_info, + )); + let parallelism = complete_stage.parallelism; + complete_stages.insert(stage.id, complete_stage); + parallelism + }; + + for child_stage_id in self.child_edges.get(&stage.id).unwrap_or(&HashSet::new()) { + let exchange_info = if let Some(parallelism) = parallelism { + let exchange_distribution = stage + .children_exhchange_distribution + .as_ref() + .unwrap() + .get(child_stage_id) + .expect("Exchange distribution is not consistent with the stage graph"); + Some(exchange_distribution.to_prost( + parallelism, + catalog_reader, + worker_node_manager, + )) + } else { + None + }; + self.complete_stage( + self.stages.get(child_stage_id).unwrap().clone(), + exchange_info, + complete_stages, + catalog_reader, + worker_node_manager, + ) + .await?; + } + + Ok(()) + } } struct StageGraphBuilder { @@ -466,20 +672,26 @@ impl StageGraphBuilder { } impl BatchPlanFragmenter { - /// Split the plan node into each stages, based on exchange node. - pub fn split(mut self, batch_node: PlanRef) -> SchedulerResult { - let root_stage = self.new_stage(batch_node, Distribution::Single.to_prost(1, &self))?; - let stage_graph = self.stage_graph_builder.build(root_stage.id); + /// After split, the `stage_graph` in the framenter may has the stage with incomplete source + /// info, we need to fetch the source info to complete the stage in this function. + /// Why separate this two step(`split()` and `generate_complete_query()`)? + /// The step of fetching source info is a async operation so that we can't do it in the split + /// step. + pub async fn generate_complete_query(self) -> SchedulerResult { + let stage_graph = self.stage_graph.unwrap(); + let new_stage_graph = stage_graph + .complete(&self.catalog_reader, &self.worker_node_manager) + .await?; Ok(Query { - stage_graph, query_id: self.query_id, + stage_graph: new_stage_graph, }) } fn new_stage( &mut self, root: PlanRef, - exchange_info: ExchangeInfo, + exchange_info: Option, ) -> SchedulerResult { let next_stage_id = self.next_stage_id; self.next_stage_id += 1; @@ -518,8 +730,10 @@ impl BatchPlanFragmenter { } else { // System table } - } else { - // No table scan + } else if source_info.is_some() { + return Err(SchedulerError::Internal(anyhow!( + "The stage has single distribution, but contains a source operator" + ))); } 1 } @@ -535,18 +749,23 @@ impl BatchPlanFragmenter { { has_lookup_join = true; lookup_join_parallelism - } else if let Some(source_info) = &source_info { - source_info.split_info().len() + } else if source_info.is_some() { + 0 } else { self.worker_node_manager.worker_node_count() } } }; + let parallelism = if parallelism == 0 { + None + } else { + Some(parallelism as u32) + }; let mut builder = QueryStageBuilder::new( next_stage_id, self.query_id.clone(), - parallelism as u32, + parallelism, exchange_info, table_scan_info, source_info, @@ -555,7 +774,7 @@ impl BatchPlanFragmenter { self.visit_node(root, &mut builder, None)?; - Ok(builder.finish(&mut self.stage_graph_builder)) + Ok(builder.finish(self.stage_graph_builder.as_mut().unwrap())) } fn visit_node( @@ -592,9 +811,22 @@ impl BatchPlanFragmenter { parent_exec_node: Option<&mut ExecutionPlanNode>, ) -> SchedulerResult<()> { let mut execution_plan_node = ExecutionPlanNode::from(node.clone()); - let child_exchange_info = node.distribution().to_prost(builder.parallelism, self); + let child_exchange_info = if let Some(parallelism) = builder.parallelism { + Some(node.distribution().to_prost( + parallelism, + &self.catalog_reader, + &self.worker_node_manager, + )) + } else { + None + }; let child_stage = self.new_stage(node.inputs()[0].clone(), child_exchange_info)?; execution_plan_node.source_stage_id = Some(child_stage.id); + if builder.parallelism.is_none() { + builder + .children_exhchange_distribution + .insert(child_stage.id, node.distribution().clone()); + } if let Some(parent) = parent_exec_node { parent.children.push(Arc::new(execution_plan_node)); @@ -620,25 +852,11 @@ impl BatchPlanFragmenter { let source_catalog = source_node.logical().source_catalog(); if let Some(source_catalog) = source_catalog { let property = ConnectorProperties::extract(source_catalog.properties.clone())?; - let mut enumerator = block_on(SplitEnumeratorImpl::create(property))?; - let kafka_enumerator = match enumerator { - SplitEnumeratorImpl::Kafka(ref mut kafka_enumerator) => kafka_enumerator, - _ => { - return Err(SchedulerError::Internal(anyhow!( - "Unsupported to query directly from this source" - ))) - } - }; let timestamp_bound = source_node.logical().kafka_timestamp_range_value(); - // println!("Timestamp bound: {:?}", timestamp_bound); - let split_info = block_on( - kafka_enumerator.list_splits_batch(timestamp_bound.0, timestamp_bound.1), - )? - .into_iter() - .map(SplitImpl::Kafka) - .collect_vec(); - // println!("Split info: {:?}", split_info); - return Ok(Some(SourceScanInfo::new(split_info))); + return Ok(Some(SourceScanInfo::new(SourceFetchInfo { + connector: property, + timebound: timestamp_bound, + }))); } } @@ -735,31 +953,19 @@ impl BatchPlanFragmenter { } } -// TODO: let frontend store owner_mapping directly? -fn vnode_mapping_to_owner_mapping(vnode_mapping: VnodeMapping) -> HashMap { - let mut m: HashMap = HashMap::new(); - let num_vnodes = vnode_mapping.len(); - for (i, parallel_unit_id) in vnode_mapping.into_iter().enumerate() { - let bitmap = m - .entry(parallel_unit_id) - .or_insert_with(|| BitmapBuilder::zeroed(num_vnodes)); - bitmap.set(i, true); - } - m.into_iter().map(|(k, v)| (k, v.finish())).collect() -} - /// Try to derive the partition to read from the scan range. /// It can be derived if the value of the distribution key is already known. fn derive_partitions( scan_ranges: &[ScanRange], table_desc: &TableDesc, - vnode_mapping: &VnodeMapping, + vnode_mapping: &ParallelUnitMapping, ) -> HashMap { let num_vnodes = vnode_mapping.len(); let mut partitions: HashMap)> = HashMap::new(); if scan_ranges.is_empty() { - return vnode_mapping_to_owner_mapping(vnode_mapping.clone()) + return vnode_mapping + .to_bitmaps() .into_iter() .map(|(k, vnode_bitmap)| { ( @@ -781,9 +987,8 @@ fn derive_partitions( match vnode { None => { // put this scan_range to all partitions - vnode_mapping_to_owner_mapping(vnode_mapping.clone()) - .into_iter() - .for_each(|(parallel_unit_id, vnode_bitmap)| { + vnode_mapping.to_bitmaps().into_iter().for_each( + |(parallel_unit_id, vnode_bitmap)| { let (bitmap, scan_ranges) = partitions .entry(parallel_unit_id) .or_insert_with(|| (BitmapBuilder::zeroed(num_vnodes), vec![])); @@ -792,11 +997,12 @@ fn derive_partitions( .enumerate() .for_each(|(vnode, b)| bitmap.set(vnode, b)); scan_ranges.push(scan_range.to_protobuf()); - }); + }, + ); } // scan a single partition Some(vnode) => { - let parallel_unit_id = vnode_mapping[vnode.to_index()]; + let parallel_unit_id = vnode_mapping[vnode]; let (bitmap, scan_ranges) = partitions .entry(parallel_unit_id) .or_insert_with(|| (BitmapBuilder::zeroed(num_vnodes), vec![])); @@ -873,12 +1079,12 @@ mod tests { assert_eq!(root_exchange.root.node_type(), PlanNodeType::BatchExchange); assert_eq!(root_exchange.root.source_stage_id, Some(1)); assert!(matches!(root_exchange.root.node, NodeBody::Exchange(_))); - assert_eq!(root_exchange.parallelism, 1); + assert_eq!(root_exchange.parallelism, Some(1)); assert!(!root_exchange.has_table_scan()); let join_node = query.stage_graph.stages.get(&1).unwrap(); assert_eq!(join_node.root.node_type(), PlanNodeType::BatchHashJoin); - assert_eq!(join_node.parallelism, 3); + assert_eq!(join_node.parallelism, Some(3)); assert!(matches!(join_node.root.node, NodeBody::HashJoin(_))); assert_eq!(join_node.root.source_stage_id, None); diff --git a/src/frontend/src/scheduler/streaming_manager.rs b/src/frontend/src/scheduler/streaming_manager.rs new file mode 100644 index 0000000000000..04e030aabcb99 --- /dev/null +++ b/src/frontend/src/scheduler/streaming_manager.rs @@ -0,0 +1,133 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::fmt::{Debug, Formatter}; +use std::sync::Arc; + +use itertools::Itertools; +use parking_lot::RwLock; +use pgwire::pg_server::SessionId; +use risingwave_pb::meta::CreatingJobInfo; +use uuid::Uuid; + +use crate::catalog::{DatabaseId, SchemaId}; +use crate::meta_client::FrontendMetaClient; + +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +pub struct TaskId { + pub id: String, +} + +impl std::fmt::Display for TaskId { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "TaskId:{}", self.id) + } +} + +impl Default for TaskId { + fn default() -> Self { + Self { + id: Uuid::new_v4().to_string(), + } + } +} + +pub type StreamingJobTrackerRef = Arc; + +pub struct StreamingJobTracker { + creating_streaming_job: RwLock>, + meta_client: Arc, +} + +impl StreamingJobTracker { + pub fn new(meta_client: Arc) -> Self { + Self { + creating_streaming_job: RwLock::new(HashMap::default()), + meta_client, + } + } +} + +#[derive(Clone, Default)] +pub struct CreatingStreamingJobInfo { + /// Identified by process_id, secret_key. + session_id: SessionId, + info: CreatingJobInfo, +} + +impl CreatingStreamingJobInfo { + pub fn new( + session_id: SessionId, + database_id: DatabaseId, + schema_id: SchemaId, + name: String, + ) -> Self { + Self { + session_id, + info: CreatingJobInfo { + database_id, + schema_id, + name, + }, + } + } +} + +pub struct StreamingJobGuard<'a> { + task_id: TaskId, + tracker: &'a StreamingJobTracker, +} + +impl<'a> Drop for StreamingJobGuard<'a> { + fn drop(&mut self) { + self.tracker.delete_job(&self.task_id); + } +} + +impl StreamingJobTracker { + pub fn guard(&self, task_info: CreatingStreamingJobInfo) -> StreamingJobGuard<'_> { + let task_id = TaskId::default(); + self.add_job(task_id.clone(), task_info); + StreamingJobGuard { + task_id, + tracker: self, + } + } + + fn add_job(&self, task_id: TaskId, info: CreatingStreamingJobInfo) { + self.creating_streaming_job.write().insert(task_id, info); + } + + fn delete_job(&self, task_id: &TaskId) { + self.creating_streaming_job.write().remove(task_id); + } + + pub fn abort_jobs(&self, session_id: SessionId) { + let jobs = self + .creating_streaming_job + .read() + .values() + .filter(|job| job.session_id == session_id) + .cloned() + .collect_vec(); + + let client = self.meta_client.clone(); + tokio::spawn(async move { + client + .cancel_creating_jobs(jobs.into_iter().map(|job| job.info).collect_vec()) + .await + }); + } +} diff --git a/src/frontend/src/scheduler/task_context.rs b/src/frontend/src/scheduler/task_context.rs index 29b8ef7d60123..41da6e2d563ac 100644 --- a/src/frontend/src/scheduler/task_context.rs +++ b/src/frontend/src/scheduler/task_context.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,8 +20,8 @@ use risingwave_common::catalog::SysCatalogReaderRef; use risingwave_common::config::BatchConfig; use risingwave_common::error::Result; use risingwave_common::util::addr::{is_local_address, HostAddr}; +use risingwave_connector::source::monitor::SourceMetrics; use risingwave_rpc_client::ComputeClientPoolRef; -use risingwave_source::monitor::SourceMetrics; use crate::catalog::system_catalog::SysCatalogReaderImpl; use crate::session::{AuthContext, FrontendEnv}; @@ -86,7 +86,7 @@ impl BatchTaskContext for FrontendBatchTaskContext { todo!() } - fn get_mem_usage(&self) -> usize { + fn mem_usage(&self) -> usize { todo!() } } diff --git a/src/frontend/src/scheduler/worker_node_manager.rs b/src/frontend/src/scheduler/worker_node_manager.rs index dab01b7116569..913e83678a64d 100644 --- a/src/frontend/src/scheduler/worker_node_manager.rs +++ b/src/frontend/src/scheduler/worker_node_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ use std::sync::{Arc, RwLock}; use rand::seq::SliceRandom; use risingwave_common::bail; -use risingwave_common::hash::{ParallelUnitId, VnodeMapping}; +use risingwave_common::hash::{ParallelUnitId, ParallelUnitMapping}; use risingwave_common::util::worker_util::get_pu_to_worker_mapping; use risingwave_pb::common::WorkerNode; @@ -33,7 +33,7 @@ pub struct WorkerNodeManager { struct WorkerNodeManagerInner { worker_nodes: Vec, /// fragment vnode mapping info. - fragment_vnode_mapping: HashMap, + fragment_vnode_mapping: HashMap, } pub type WorkerNodeManagerRef = Arc; @@ -76,7 +76,11 @@ impl WorkerNodeManager { .retain(|x| *x != node); } - pub fn refresh(&self, nodes: Vec, mapping: HashMap) { + pub fn refresh( + &self, + nodes: Vec, + mapping: HashMap, + ) { let mut write_guard = self.inner.write().unwrap(); write_guard.worker_nodes = nodes; write_guard.fragment_vnode_mapping = mapping; @@ -126,7 +130,7 @@ impl WorkerNodeManager { Ok(workers) } - pub fn get_fragment_mapping(&self, fragment_id: &FragmentId) -> Option { + pub fn get_fragment_mapping(&self, fragment_id: &FragmentId) -> Option { self.inner .read() .unwrap() @@ -135,7 +139,11 @@ impl WorkerNodeManager { .cloned() } - pub fn insert_fragment_mapping(&self, fragment_id: FragmentId, vnode_mapping: VnodeMapping) { + pub fn insert_fragment_mapping( + &self, + fragment_id: FragmentId, + vnode_mapping: ParallelUnitMapping, + ) { self.inner .write() .unwrap() @@ -144,7 +152,11 @@ impl WorkerNodeManager { .unwrap(); } - pub fn update_fragment_mapping(&self, fragment_id: FragmentId, vnode_mapping: VnodeMapping) { + pub fn update_fragment_mapping( + &self, + fragment_id: FragmentId, + vnode_mapping: ParallelUnitMapping, + ) { self.inner .write() .unwrap() diff --git a/src/frontend/src/session.rs b/src/frontend/src/session.rs index 15c498232a9bf..38ef2a77ff5d6 100644 --- a/src/frontend/src/session.rs +++ b/src/frontend/src/session.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ use parking_lot::{RwLock, RwLockReadGuard}; use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_response::PgResponse; use pgwire::pg_server::{BoxedError, Session, SessionId, SessionManager, UserAuthenticator}; +use pgwire::types::Format; use rand::RngCore; use risingwave_common::array::DataChunk; use risingwave_common::catalog::DEFAULT_SCHEMA_NAME; @@ -37,19 +38,21 @@ use risingwave_common::session_config::ConfigMap; use risingwave_common::types::DataType; use risingwave_common::util::addr::HostAddr; use risingwave_common::util::stream_cancel::{stream_tripwire, Trigger, Tripwire}; +use risingwave_common::{GIT_SHA, RW_VERSION}; use risingwave_common_service::observer_manager::ObserverManager; use risingwave_common_service::MetricsManager; +use risingwave_connector::source::monitor::SourceMetrics; use risingwave_pb::common::WorkerType; use risingwave_pb::health::health_server::HealthServer; use risingwave_pb::user::auth_info::EncryptionType; use risingwave_pb::user::grant_privilege::{Action, Object}; use risingwave_rpc_client::{ComputeClientPool, ComputeClientPoolRef, MetaClient}; -use risingwave_source::monitor::SourceMetrics; use risingwave_sqlparser::ast::{ObjectName, ShowObject, Statement}; use risingwave_sqlparser::parser::Parser; use tokio::sync::oneshot::Sender; use tokio::sync::watch; use tokio::task::JoinHandle; +use tracing::info; use crate::binder::Binder; use crate::catalog::catalog_service::{CatalogReader, CatalogWriter, CatalogWriterImpl}; @@ -64,6 +67,7 @@ use crate::monitor::FrontendMetrics; use crate::observer::FrontendObserverNode; use crate::optimizer::OptimizerContext; use crate::planner::Planner; +use crate::scheduler::streaming_manager::{StreamingJobTracker, StreamingJobTrackerRef}; use crate::scheduler::worker_node_manager::{WorkerNodeManager, WorkerNodeManagerRef}; use crate::scheduler::SchedulerError::QueryCancelError; use crate::scheduler::{HummockSnapshotManager, HummockSnapshotManagerRef, QueryManager}; @@ -99,6 +103,10 @@ pub struct FrontendEnv { source_metrics: Arc, batch_config: BatchConfig, + + /// Track creating streaming jobs, used to cancel creating streaming job when cancel request + /// received. + creating_streaming_job_tracker: StreamingJobTrackerRef, } type SessionMapRef = Arc>>>; @@ -125,6 +133,7 @@ impl FrontendEnv { ); let server_addr = HostAddr::try_from("127.0.0.1:4565").unwrap(); let client_pool = Arc::new(ComputeClientPool::default()); + let creating_streaming_tracker = StreamingJobTracker::new(meta_client.clone()); Self { meta_client, catalog_writer, @@ -140,32 +149,37 @@ impl FrontendEnv { frontend_metrics: Arc::new(FrontendMetrics::for_test()), batch_config: BatchConfig::default(), source_metrics: Arc::new(SourceMetrics::default()), + creating_streaming_job_tracker: Arc::new(creating_streaming_tracker), } } pub async fn init( - opts: &FrontendOpts, + opts: FrontendOpts, ) -> Result<(Self, JoinHandle<()>, JoinHandle<()>, Sender<()>)> { - let config = load_config(&opts.config_path); - tracing::info!( - "Starting frontend node with\nfrontend config {:?}", - config.server + let config = load_config(&opts.config_path, Some(opts.override_opts)); + info!("Starting frontend node"); + info!("> config: {:?}", config); + info!( + "> debug assertions: {}", + if cfg!(debug_assertions) { "on" } else { "off" } ); + info!("> version: {} ({})", RW_VERSION, GIT_SHA); + let batch_config = config.batch; let frontend_address: HostAddr = opts - .client_address + .advertise_addr .as_ref() .unwrap_or_else(|| { - tracing::warn!("Client address is not specified, defaulting to host address"); - &opts.host + tracing::warn!("advertise addr is not specified, defaulting to listen_addr"); + &opts.listen_addr }) .parse() .unwrap(); - tracing::info!("Client address is {}", frontend_address); + info!("advertise addr is {}", frontend_address); // Register in meta by calling `AddWorkerNode` RPC. - let meta_client = MetaClient::register_new( + let (meta_client, _) = MetaClient::register_new( opts.meta_addr.clone().as_str(), WorkerType::Frontend, &frontend_address, @@ -232,7 +246,7 @@ impl FrontendEnv { let frontend_metrics = Arc::new(FrontendMetrics::new(registry.clone())); let source_metrics = Arc::new(SourceMetrics::new(registry.clone())); - if opts.metrics_level > 0 { + if config.server.metrics_level > 0 { MetricsManager::boot_metrics_service(opts.prometheus_listener_addr.clone(), registry); } @@ -245,11 +259,14 @@ impl FrontendEnv { .await .unwrap(); }); - tracing::info!( + info!( "Health Check RPC Listener is set up on {}", opts.health_check_listener_addr.clone() ); + let creating_streaming_job_tracker = + Arc::new(StreamingJobTracker::new(frontend_meta_client.clone())); + Ok(( Self { catalog_reader, @@ -266,6 +283,7 @@ impl FrontendEnv { sessions_map: Arc::new(Mutex::new(HashMap::new())), batch_config, source_metrics, + creating_streaming_job_tracker, }, observer_join_handle, heartbeat_join_handle, @@ -332,6 +350,10 @@ impl FrontendEnv { pub fn source_metrics(&self) -> Arc { self.source_metrics.clone() } + + pub fn creating_streaming_job_tracker(&self) -> &StreamingJobTrackerRef { + &self.creating_streaming_job_tracker + } } pub struct AuthContext { @@ -497,15 +519,19 @@ impl SessionImpl { pub fn cancel_current_query(&self) { let mut flag_guard = self.current_query_cancel_flag.lock().unwrap(); if let Some(trigger) = flag_guard.take() { - tracing::info!("Trying to cancel query in local mode."); + info!("Trying to cancel query in local mode."); // Current running query is in local mode trigger.abort(); - tracing::info!("Cancel query request sent."); + info!("Cancel query request sent."); } else { - tracing::info!("Trying to cancel query in distributed mode."); + info!("Trying to cancel query in distributed mode."); self.env.query_manager().cancel_queries_in_session(self.id) } } + + pub fn cancel_current_creating_job(&self) { + self.env.creating_streaming_job_tracker.abort_jobs(self.id); + } } pub struct SessionManagerImpl { @@ -608,13 +634,22 @@ impl SessionManager for SessionManagerImpl { } } - /// Used when cancel request happened, returned corresponding session ref. + /// Used when cancel request happened. fn cancel_queries_in_session(&self, session_id: SessionId) { let guard = self.env.sessions_map.lock().unwrap(); if let Some(session) = guard.get(&session_id) { session.cancel_current_query() } else { - tracing::info!("Current session finished, ignoring cancel query request") + info!("Current session finished, ignoring cancel query request") + } + } + + fn cancel_creating_jobs_in_session(&self, session_id: SessionId) { + let guard = self.env.sessions_map.lock().unwrap(); + if let Some(session) = guard.get(&session_id) { + session.cancel_current_creating_job() + } else { + info!("Current session finished, ignoring cancel creating request") } } @@ -624,7 +659,7 @@ impl SessionManager for SessionManagerImpl { } impl SessionManagerImpl { - pub async fn new(opts: &FrontendOpts) -> Result { + pub async fn new(opts: FrontendOpts) -> Result { let (env, join_handle, heartbeat_join_handle, heartbeat_shutdown_sender) = FrontendEnv::init(opts).await?; Ok(Self { @@ -652,11 +687,7 @@ impl Session for SessionImpl { async fn run_statement( self: Arc, sql: &str, - - // format: indicate the query PgResponse format (Only meaningful for SELECT queries). - // false: TEXT - // true: BINARY - format: bool, + formats: Vec, ) -> std::result::Result, BoxedError> { // Parse sql. let mut stmts = Parser::parse_sql(sql) @@ -674,7 +705,7 @@ impl Session for SessionImpl { } let stmt = stmts.swap_remove(0); let rsp = { - let mut handle_fut = Box::pin(handle(self, stmt, sql, format)); + let mut handle_fut = Box::pin(handle(self, stmt, sql, formats)); if cfg!(debug_assertions) { // Report the SQL in the log periodically if the query is slow. const SLOW_QUERY_LOG_PERIOD: Duration = Duration::from_secs(60); @@ -696,6 +727,36 @@ impl Session for SessionImpl { Ok(rsp) } + /// A copy of run_statement but exclude the parser part so each run must be at most one + /// statement. The str sql use the to_string of AST. Consider Reuse later. + async fn run_one_query( + self: Arc, + stmt: Statement, + format: Format, + ) -> std::result::Result, BoxedError> { + let sql_str = stmt.to_string(); + let rsp = { + let mut handle_fut = Box::pin(handle(self, stmt, &sql_str, vec![format])); + if cfg!(debug_assertions) { + // Report the SQL in the log periodically if the query is slow. + const SLOW_QUERY_LOG_PERIOD: Duration = Duration::from_secs(60); + loop { + match tokio::time::timeout(SLOW_QUERY_LOG_PERIOD, &mut handle_fut).await { + Ok(result) => break result, + Err(_) => tracing::warn!( + sql_str, + "slow query has been running for another {SLOW_QUERY_LOG_PERIOD:?}" + ), + } + } + } else { + handle_fut.await + } + } + .inspect_err(|e| tracing::error!("failed to handle sql:\n{}:\n{}", sql_str, e))?; + Ok(rsp) + } + async fn infer_return_type( self: Arc, sql: &str, diff --git a/src/frontend/src/stream_fragmenter/graph/fragment_graph.rs b/src/frontend/src/stream_fragmenter/graph/fragment_graph.rs index deacee672b509..bd24e6a8b5e0b 100644 --- a/src/frontend/src/stream_fragmenter/graph/fragment_graph.rs +++ b/src/frontend/src/stream_fragmenter/graph/fragment_graph.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -53,9 +53,6 @@ pub struct StreamFragmentEdge { /// Dispatch strategy for the fragment. pub dispatch_strategy: DispatchStrategy, - /// Whether the two linked nodes should be placed on the same worker node - pub same_worker_node: bool, - /// A unique identifier of this edge. Generally it should be exchange node's operator id. When /// rewriting fragments into delta joins or when inserting 1-to-1 exchange, there will be /// virtual links generated. @@ -139,7 +136,6 @@ impl StreamFragmentGraph { upstream_id, downstream_id, dispatch_strategy: Some(edge.dispatch_strategy), - same_worker_node: edge.same_worker_node, link_id: edge.link_id, }; diff --git a/src/frontend/src/stream_fragmenter/graph/mod.rs b/src/frontend/src/stream_fragmenter/graph/mod.rs index f8de5723aa7e0..a5a02377d40c9 100644 --- a/src/frontend/src/stream_fragmenter/graph/mod.rs +++ b/src/frontend/src/stream_fragmenter/graph/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/stream_fragmenter/mod.rs b/src/frontend/src/stream_fragmenter/mod.rs index 4e3051857024a..85151bef07925 100644 --- a/src/frontend/src/stream_fragmenter/mod.rs +++ b/src/frontend/src/stream_fragmenter/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -253,13 +253,15 @@ fn build_fragment( current_fragment.is_singleton = node.is_singleton; } - NodeBody::Now(_) => current_fragment.fragment_type_mask |= FragmentTypeFlag::Now as u32, + NodeBody::Now(_) => { + current_fragment.fragment_type_mask |= FragmentTypeFlag::Now as u32; + current_fragment.is_singleton = true; + } _ => {} }; // handle join logic - // TODO: frontend won't generate delta index join now, so this branch will never hit. if let NodeBody::DeltaIndexJoin(delta_index_join) = stream_node.node_body.as_mut().unwrap() { if delta_index_join.get_join_type()? == JoinType::Inner && delta_index_join.condition.is_none() @@ -293,7 +295,6 @@ fn build_fragment( current_fragment.fragment_id, StreamFragmentEdge { dispatch_strategy: exchange_node_strategy, - same_worker_node: false, link_id: child_node.operator_id, }, ); diff --git a/src/frontend/src/stream_fragmenter/rewrite/delta_join.rs b/src/frontend/src/stream_fragmenter/rewrite/delta_join.rs index 0b567fae34875..fec3424c2ac00 100644 --- a/src/frontend/src/stream_fragmenter/rewrite/delta_join.rs +++ b/src/frontend/src/stream_fragmenter/rewrite/delta_join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,32 +15,24 @@ use std::rc::Rc; use itertools::Itertools; -use risingwave_common::catalog::{ColumnDesc, Field}; use risingwave_common::error::Result; -use risingwave_common::try_match_expand; -use risingwave_common::util::sort_util::OrderType; -use risingwave_pb::plan_common::{Field as ProstField, OrderType as ProstOrderType}; +use risingwave_pb::plan_common::Field as ProstField; use risingwave_pb::stream_plan::lookup_node::ArrangementTableId; use risingwave_pb::stream_plan::stream_node::NodeBody; use risingwave_pb::stream_plan::{ - ArrangementInfo, DispatchStrategy, DispatcherType, ExchangeNode, LookupNode, LookupUnionNode, - StreamNode, + DispatchStrategy, DispatcherType, ExchangeNode, LookupNode, LookupUnionNode, StreamNode, }; use super::super::{BuildFragmentGraphState, StreamFragment, StreamFragmentEdge}; -use crate::catalog::TableCatalog; -use crate::optimizer::plan_node::utils::TableCatalogBuilder; use crate::stream_fragmenter::build_and_add_fragment; -use crate::WithOptions; -/// All exchanges inside delta join is one-to-one exchange. -fn build_exchange_for_delta_join( +fn build_no_shuffle_exchange_for_delta_join( state: &mut BuildFragmentGraphState, upstream: &StreamNode, ) -> StreamNode { StreamNode { operator_id: state.gen_operator_id() as u64, - identity: "Exchange (Lookup and Merge)".into(), + identity: "NO SHUFFLE Exchange (Lookup and Merge)".into(), fields: upstream.fields.clone(), stream_key: upstream.stream_key.clone(), node_body: Some(NodeBody::Exchange(ExchangeNode { @@ -51,6 +43,24 @@ fn build_exchange_for_delta_join( } } +fn build_consistent_hash_shuffle_exchange_for_delta_join( + state: &mut BuildFragmentGraphState, + upstream: &StreamNode, + column_indices: Vec, +) -> StreamNode { + StreamNode { + operator_id: state.gen_operator_id() as u64, + identity: "HASH Exchange (Lookup and Merge)".into(), + fields: upstream.fields.clone(), + stream_key: upstream.stream_key.clone(), + node_body: Some(NodeBody::Exchange(ExchangeNode { + strategy: Some(dispatch_consistent_hash_shuffle(column_indices)), + })), + input: vec![], + append_only: upstream.append_only, + } +} + fn dispatch_no_shuffle() -> DispatchStrategy { DispatchStrategy { r#type: DispatcherType::NoShuffle.into(), @@ -58,6 +68,14 @@ fn dispatch_no_shuffle() -> DispatchStrategy { } } +fn dispatch_consistent_hash_shuffle(column_indices: Vec) -> DispatchStrategy { + // Actually Hash shuffle is consistent hash shuffle now. + DispatchStrategy { + r#type: DispatcherType::Hash.into(), + column_indices, + } +} + fn build_lookup_for_delta_join( state: &mut BuildFragmentGraphState, (exchange_node_arrangement, exchange_node_stream): (&StreamNode, &StreamNode), @@ -94,10 +112,26 @@ fn build_delta_join_inner( let arrange_0 = arrange_0_frag.node.as_ref().unwrap(); let arrange_1 = arrange_1_frag.node.as_ref().unwrap(); - let exchange_a0l0 = build_exchange_for_delta_join(state, arrange_0); - let exchange_a0l1 = build_exchange_for_delta_join(state, arrange_0); - let exchange_a1l0 = build_exchange_for_delta_join(state, arrange_1); - let exchange_a1l1 = build_exchange_for_delta_join(state, arrange_1); + let exchange_a0l0 = build_no_shuffle_exchange_for_delta_join(state, arrange_0); + let exchange_a0l1 = build_consistent_hash_shuffle_exchange_for_delta_join( + state, + arrange_0, + delta_join_node + .left_key + .iter() + .map(|x| *x as u32) + .collect_vec(), + ); + let exchange_a1l0 = build_consistent_hash_shuffle_exchange_for_delta_join( + state, + arrange_1, + delta_join_node + .right_key + .iter() + .map(|x| *x as u32) + .collect_vec(), + ); + let exchange_a1l1 = build_no_shuffle_exchange_for_delta_join(state, arrange_1); let i0_length = arrange_0.fields.len(); let i1_length = arrange_1.fields.len(); @@ -129,19 +163,6 @@ fn build_delta_join_inner( }, column_mapping: lookup_0_column_reordering, arrangement_table_info: delta_join_node.left_info.clone(), - arrangement_table: Some( - infer_internal_table_catalog( - delta_join_node.left_info.as_ref(), - // Use Arrange node's dist key. - try_match_expand!(arrange_0.get_node_body().unwrap(), NodeBody::Arrange)? - .distribution_key - .clone() - .iter() - .map(|x| *x as usize) - .collect(), - ) - .to_internal_table_prost(), - ), }, ); let lookup_1_column_reordering = { @@ -171,70 +192,75 @@ fn build_delta_join_inner( }, column_mapping: lookup_1_column_reordering, arrangement_table_info: delta_join_node.right_info.clone(), - arrangement_table: Some( - infer_internal_table_catalog( - delta_join_node.right_info.as_ref(), - // Use Arrange node's dist key. - try_match_expand!(arrange_1.get_node_body().unwrap(), NodeBody::Arrange)? - .distribution_key - .clone() - .iter() - .map(|x| *x as usize) - .collect(), - ) - .to_internal_table_prost(), - ), }, ); let lookup_0_frag = build_and_add_fragment(state, lookup_0)?; let lookup_1_frag = build_and_add_fragment(state, lookup_1)?; + // Place index(arrange) together with corresponding lookup operator, so that we can lookup on + // the same node. state.fragment_graph.add_edge( arrange_0_frag.fragment_id, lookup_0_frag.fragment_id, StreamFragmentEdge { dispatch_strategy: dispatch_no_shuffle(), - same_worker_node: true, link_id: exchange_a0l0.operator_id, }, ); + // Use consistent hash shuffle to distribute the index(arrange) to another lookup operator, so + // that we can find the correct node to lookup. state.fragment_graph.add_edge( arrange_0_frag.fragment_id, lookup_1_frag.fragment_id, StreamFragmentEdge { - dispatch_strategy: dispatch_no_shuffle(), - // stream input doesn't need to be on the same worker node as lookup - same_worker_node: false, + dispatch_strategy: dispatch_consistent_hash_shuffle( + delta_join_node + .left_key + .iter() + .map(|x| *x as u32) + .collect_vec(), + ), link_id: exchange_a0l1.operator_id, }, ); + // Use consistent hash shuffle to distribute the index(arrange) to another lookup operator, so + // that we can find the correct node to lookup. state.fragment_graph.add_edge( arrange_1_frag.fragment_id, lookup_0_frag.fragment_id, StreamFragmentEdge { - dispatch_strategy: dispatch_no_shuffle(), - // stream input doesn't need to be on the same worker node as lookup - same_worker_node: false, + dispatch_strategy: dispatch_consistent_hash_shuffle( + delta_join_node + .right_key + .iter() + .map(|x| *x as u32) + .collect_vec(), + ), link_id: exchange_a1l0.operator_id, }, ); + // Place index(arrange) together with corresponding lookup operator, so that we can lookup on + // the same node. state.fragment_graph.add_edge( arrange_1_frag.fragment_id, lookup_1_frag.fragment_id, StreamFragmentEdge { dispatch_strategy: dispatch_no_shuffle(), - same_worker_node: true, link_id: exchange_a1l1.operator_id, }, ); - let exchange_l0m = build_exchange_for_delta_join(state, node); - let exchange_l1m = build_exchange_for_delta_join(state, node); + let exchange_l0m = + build_consistent_hash_shuffle_exchange_for_delta_join(state, node, node.stream_key.clone()); + let exchange_l1m = + build_consistent_hash_shuffle_exchange_for_delta_join(state, node, node.stream_key.clone()); + // LookupUnion's inputs might have different distribution and we need to unify them by using + // hash shuffle. let union = StreamNode { operator_id: state.gen_operator_id() as u64, identity: "Union".into(), @@ -249,8 +275,7 @@ fn build_delta_join_inner( lookup_0_frag.fragment_id, current_fragment.fragment_id, StreamFragmentEdge { - dispatch_strategy: dispatch_no_shuffle(), - same_worker_node: false, + dispatch_strategy: dispatch_consistent_hash_shuffle(node.stream_key.clone()), link_id: exchange_l0m.operator_id, }, ); @@ -259,8 +284,7 @@ fn build_delta_join_inner( lookup_1_frag.fragment_id, current_fragment.fragment_id, StreamFragmentEdge { - dispatch_strategy: dispatch_no_shuffle(), - same_worker_node: false, + dispatch_strategy: dispatch_consistent_hash_shuffle(node.stream_key.clone()), link_id: exchange_l1m.operator_id, }, ); @@ -291,7 +315,8 @@ pub(crate) fn build_delta_join_without_arrange( } panic!("exchange other than no_shuffle not allowed between delta join and arrange"); } else { - unimplemented!() + // pass + node } } @@ -312,25 +337,3 @@ pub(crate) fn build_delta_join_without_arrange( Ok(union) } - -fn infer_internal_table_catalog( - arrangement_info: Option<&ArrangementInfo>, - distribution_key: Vec, -) -> TableCatalog { - let arrangement_info = arrangement_info.unwrap(); - // FIXME(st1page) - let mut internal_table_catalog_builder = TableCatalogBuilder::new(WithOptions::default()); - for column_desc in &arrangement_info.column_descs { - internal_table_catalog_builder.add_column(&Field::from(&ColumnDesc::from(column_desc))); - } - - for order in &arrangement_info.arrange_key_orders { - internal_table_catalog_builder.add_order_column( - order.index as usize, - OrderType::from_prost(&ProstOrderType::from_i32(order.order_type).unwrap()), - ); - } - - // TODO: give the real look-up keys - internal_table_catalog_builder.build(distribution_key) -} diff --git a/src/frontend/src/stream_fragmenter/rewrite/mod.rs b/src/frontend/src/stream_fragmenter/rewrite/mod.rs index 3d14e31824f72..7eb36f15492aa 100644 --- a/src/frontend/src/stream_fragmenter/rewrite/mod.rs +++ b/src/frontend/src/stream_fragmenter/rewrite/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/test_utils.rs b/src/frontend/src/test_utils.rs index b729df2605a80..db227c01b7520 100644 --- a/src/frontend/src/test_utils.rs +++ b/src/frontend/src/test_utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -34,12 +34,15 @@ use risingwave_pb::catalog::{ Schema as ProstSchema, Sink as ProstSink, Source as ProstSource, Table as ProstTable, View as ProstView, }; +use risingwave_pb::ddl_service::DdlProgress; use risingwave_pb::hummock::HummockSnapshot; use risingwave_pb::meta::list_table_fragments_response::TableFragmentInfo; +use risingwave_pb::meta::{CreatingJobInfo, SystemParams}; use risingwave_pb::stream_plan::StreamFragmentGraph; use risingwave_pb::user::update_user_request::UpdateField; use risingwave_pb::user::{GrantPrivilege, UserInfo}; use risingwave_rpc_client::error::Result as RpcResult; +use risingwave_rpc_client::SystemParamsReader; use tempfile::{Builder, NamedTempFile}; use crate::catalog::catalog_service::CatalogWriter; @@ -74,6 +77,10 @@ impl SessionManager for LocalFrontend { todo!() } + fn cancel_creating_jobs_in_session(&self, _session_id: SessionId) { + todo!() + } + fn end_session(&self, _session: &Self::Session) { todo!() } @@ -91,7 +98,7 @@ impl LocalFrontend { sql: impl Into, ) -> std::result::Result> { let sql = sql.into(); - self.session_ref().run_statement(sql.as_str(), false).await + self.session_ref().run_statement(sql.as_str(), vec![]).await } pub async fn run_user_sql( @@ -103,7 +110,7 @@ impl LocalFrontend { ) -> std::result::Result> { let sql = sql.into(); self.session_user_ref(database, user_name, user_id) - .run_statement(sql.as_str(), false) + .run_statement(sql.as_str(), vec![]) .await } @@ -227,8 +234,11 @@ impl CatalogWriter for MockCatalogWriter { Ok(()) } - async fn create_view(&self, _view: ProstView) -> Result<()> { - todo!() + async fn create_view(&self, mut view: ProstView) -> Result<()> { + view.id = self.gen_id(); + self.catalog.write().create_view(&view); + self.add_table_or_source_id(view.id, view.schema_id, view.database_id); + Ok(()) } async fn create_table( @@ -246,6 +256,11 @@ impl CatalogWriter for MockCatalogWriter { Ok(()) } + async fn replace_table(&self, table: ProstTable, _graph: StreamFragmentGraph) -> Result<()> { + self.catalog.write().update_table(&table); + Ok(()) + } + async fn create_source(&self, source: ProstSource) -> Result<()> { self.create_source_inner(source).map(|_| ()) } @@ -654,6 +669,10 @@ impl FrontendMetaClient for MockFrontendMetaClient { }) } + async fn cancel_creating_jobs(&self, _infos: Vec) -> RpcResult<()> { + Ok(()) + } + async fn list_table_fragments( &self, _table_ids: &[u32], @@ -672,6 +691,18 @@ impl FrontendMetaClient for MockFrontendMetaClient { async fn list_meta_snapshots(&self) -> RpcResult> { Ok(vec![]) } + + async fn get_system_params(&self) -> RpcResult { + Ok(SystemParams::default().into()) + } + + async fn set_system_param(&self, _param: String, _value: Option) -> RpcResult<()> { + Ok(()) + } + + async fn list_ddl_progress(&self) -> RpcResult> { + Ok(vec![]) + } } #[cfg(test)] diff --git a/src/frontend/src/user/mod.rs b/src/frontend/src/user/mod.rs index 9c698617582c5..c0e23f2a69d92 100644 --- a/src/frontend/src/user/mod.rs +++ b/src/frontend/src/user/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/user/user_authentication.rs b/src/frontend/src/user/user_authentication.rs index 013be5b3b65bf..6e39b3eb61cf9 100644 --- a/src/frontend/src/user/user_authentication.rs +++ b/src/frontend/src/user/user_authentication.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/user/user_manager.rs b/src/frontend/src/user/user_manager.rs index a5a6407a57fda..3620eef51114a 100644 --- a/src/frontend/src/user/user_manager.rs +++ b/src/frontend/src/user/user_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/user/user_privilege.rs b/src/frontend/src/user/user_privilege.rs index c03ed8133888a..14ade81f0b0d8 100644 --- a/src/frontend/src/user/user_privilege.rs +++ b/src/frontend/src/user/user_privilege.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/user/user_service.rs b/src/frontend/src/user/user_service.rs index 245a81b96587e..50ec2f60cf199 100644 --- a/src/frontend/src/user/user_service.rs +++ b/src/frontend/src/user/user_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/utils/column_index_mapping.rs b/src/frontend/src/utils/column_index_mapping.rs index 883014ba0cc0f..73c19a9b754e3 100644 --- a/src/frontend/src/utils/column_index_mapping.rs +++ b/src/frontend/src/utils/column_index_mapping.rs @@ -1,5 +1,5 @@ use std::cmp::max; -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/utils/condition.rs b/src/frontend/src/utils/condition.rs index bfbc986d70b26..25705fa43016b 100644 --- a/src/frontend/src/utils/condition.rs +++ b/src/frontend/src/utils/condition.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/utils/connected_components.rs b/src/frontend/src/utils/connected_components.rs index 6d4506494ba60..5a61a38bebc73 100644 --- a/src/frontend/src/utils/connected_components.rs +++ b/src/frontend/src/utils/connected_components.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/utils/mod.rs b/src/frontend/src/utils/mod.rs index 9448f1b0a9184..002dc86c32f9c 100644 --- a/src/frontend/src/utils/mod.rs +++ b/src/frontend/src/utils/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/utils/rewrite_index.rs b/src/frontend/src/utils/rewrite_index.rs index 1dd47da2d31b9..2107cb55d5219 100644 --- a/src/frontend/src/utils/rewrite_index.rs +++ b/src/frontend/src/utils/rewrite_index.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/frontend/src/utils/stream_graph_formatter.rs b/src/frontend/src/utils/stream_graph_formatter.rs index f3a1c1a3c5b43..017c76593a740 100644 --- a/src/frontend/src/utils/stream_graph_formatter.rs +++ b/src/frontend/src/utils/stream_graph_formatter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -180,10 +180,6 @@ impl StreamGraphFormatter { }) .join(", ") )), - stream_node::NodeBody::AppendOnlyTopN(node) => Some(format!( - "state table: {}", - self.add_table(node.get_table().unwrap()) - )), stream_node::NodeBody::HashJoin(node) => Some(format!( "left table: {}, right table {},{}{}", self.add_table(node.get_left_table().unwrap()), @@ -201,9 +197,9 @@ impl StreamGraphFormatter { "state table: {}", self.add_table(node.get_table().unwrap()) )), - stream_node::NodeBody::Lookup(node) => Some(format!( - "arrange table: {}", - self.add_table(node.get_arrangement_table().unwrap()) + stream_node::NodeBody::AppendOnlyTopN(node) => Some(format!( + "state table: {}", + self.add_table(node.get_table().unwrap()) )), stream_node::NodeBody::Arrange(node) => Some(format!( "arrange table: {}", @@ -218,6 +214,10 @@ impl StreamGraphFormatter { "state table: {}", self.add_table(node.get_table().unwrap()) )), + stream_node::NodeBody::AppendOnlyGroupTopN(node) => Some(format!( + "state table: {}", + self.add_table(node.get_table().unwrap()) + )), stream_node::NodeBody::Now(node) => Some(format!( "state table: {}", self.add_table(node.get_state_table().unwrap()) diff --git a/src/frontend/src/utils/with_options.rs b/src/frontend/src/utils/with_options.rs index 80e616f1bd191..ab4d012b86bf4 100644 --- a/src/frontend/src/utils/with_options.rs +++ b/src/frontend/src/utils/with_options.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -68,12 +68,7 @@ impl WithOptions { /// Parse the append only property from the options. pub fn append_only(&self) -> bool { - if let Some(val) = self.inner.get(options::APPEND_ONLY) { - if val.eq_ignore_ascii_case("true") { - return true; - } - } - false + self.value_eq_ignore_case(options::APPEND_ONLY, "true") } /// Get a subset of the options from the given keys. @@ -96,6 +91,15 @@ impl WithOptions { pub fn internal_table_subset(&self) -> Self { self.subset([options::RETENTION_SECONDS]) } + + pub fn value_eq_ignore_case(&self, key: &str, val: &str) -> bool { + if let Some(inner_val) = self.inner.get(key) { + if inner_val.eq_ignore_ascii_case(val) { + return true; + } + } + false + } } impl TryFrom<&[SqlOption]> for WithOptions { diff --git a/src/java_binding/Cargo.toml b/src/java_binding/Cargo.toml index be6c9fdf1b4d3..61f4cf3b757d0 100644 --- a/src/java_binding/Cargo.toml +++ b/src/java_binding/Cargo.toml @@ -3,12 +3,19 @@ name = "risingwave_java_binding" version = "0.1.0" edition = "2021" +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] bytes = "1" futures = { version = "0.3", default-features = false, features = ["alloc"] } -itertools = "0.10" jni = "0.20.0" +prost = "0.11" risingwave_common = { path = "../common" } +risingwave_hummock_sdk = { path = "../storage/hummock_sdk" } risingwave_object_store = { path = "../object_store" } risingwave_pb = { path = "../prost" } risingwave_storage = { path = "../storage" } diff --git a/src/java_binding/java/com_risingwave_java_binding_Binding.h b/src/java_binding/java/com_risingwave_java_binding_Binding.h index d6d961dc83b49..91be8c9b22393 100644 --- a/src/java_binding/java/com_risingwave_java_binding_Binding.h +++ b/src/java_binding/java/com_risingwave_java_binding_Binding.h @@ -10,10 +10,10 @@ extern "C" { /* * Class: com_risingwave_java_binding_Binding * Method: iteratorNew - * Signature: ()J + * Signature: ([B)J */ JNIEXPORT jlong JNICALL Java_com_risingwave_java_binding_Binding_iteratorNew - (JNIEnv *, jclass); + (JNIEnv *, jclass, jbyteArray); /* * Class: com_risingwave_java_binding_Binding @@ -47,6 +47,22 @@ JNIEXPORT jbyteArray JNICALL Java_com_risingwave_java_binding_Binding_rowGetKey JNIEXPORT jboolean JNICALL Java_com_risingwave_java_binding_Binding_rowIsNull (JNIEnv *, jclass, jlong, jint); +/* + * Class: com_risingwave_java_binding_Binding + * Method: rowGetInt16Value + * Signature: (JI)S + */ +JNIEXPORT jshort JNICALL Java_com_risingwave_java_binding_Binding_rowGetInt16Value + (JNIEnv *, jclass, jlong, jint); + +/* + * Class: com_risingwave_java_binding_Binding + * Method: rowGetInt32Value + * Signature: (JI)I + */ +JNIEXPORT jint JNICALL Java_com_risingwave_java_binding_Binding_rowGetInt32Value + (JNIEnv *, jclass, jlong, jint); + /* * Class: com_risingwave_java_binding_Binding * Method: rowGetInt64Value @@ -55,6 +71,30 @@ JNIEXPORT jboolean JNICALL Java_com_risingwave_java_binding_Binding_rowIsNull JNIEXPORT jlong JNICALL Java_com_risingwave_java_binding_Binding_rowGetInt64Value (JNIEnv *, jclass, jlong, jint); +/* + * Class: com_risingwave_java_binding_Binding + * Method: rowGetFloatValue + * Signature: (JI)F + */ +JNIEXPORT jfloat JNICALL Java_com_risingwave_java_binding_Binding_rowGetFloatValue + (JNIEnv *, jclass, jlong, jint); + +/* + * Class: com_risingwave_java_binding_Binding + * Method: rowGetDoubleValue + * Signature: (JI)D + */ +JNIEXPORT jdouble JNICALL Java_com_risingwave_java_binding_Binding_rowGetDoubleValue + (JNIEnv *, jclass, jlong, jint); + +/* + * Class: com_risingwave_java_binding_Binding + * Method: rowGetBooleanValue + * Signature: (JI)Z + */ +JNIEXPORT jboolean JNICALL Java_com_risingwave_java_binding_Binding_rowGetBooleanValue + (JNIEnv *, jclass, jlong, jint); + /* * Class: com_risingwave_java_binding_Binding * Method: rowGetStringValue diff --git a/src/java_binding/java/java-binding/pom.xml b/src/java_binding/java/java-binding/pom.xml index 97846dc6e5d45..072ea8276f811 100644 --- a/src/java_binding/java/java-binding/pom.xml +++ b/src/java_binding/java/java-binding/pom.xml @@ -20,10 +20,14 @@ + + com.risingwave.java + proto + junit junit - 4.11 + 4.13.1 test diff --git a/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/Demo.java b/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/Demo.java index 1c00b19179d12..6a8bd798ed29f 100644 --- a/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/Demo.java +++ b/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/Demo.java @@ -2,25 +2,73 @@ import com.risingwave.java.binding.Iterator; import com.risingwave.java.binding.KeyedRow; +import com.risingwave.java.binding.rpc.MetaClient; +import com.risingwave.proto.Catalog.Table; +import com.risingwave.proto.Hummock.HummockVersion; +import com.risingwave.proto.JavaBinding.KeyRange; +import com.risingwave.proto.JavaBinding.KeyRange.Bound; +import com.risingwave.proto.JavaBinding.ReadPlan; +import java.time.Duration; import java.util.Arrays; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.ScheduledThreadPoolExecutor; /** Hello world! */ public class Demo { public static void main(String[] args) { - try (Iterator iter = new Iterator()) { - while (true) { - try (KeyedRow row = iter.next()) { - if (row == null) { - break; + String objectStore = System.getenv("OBJECT_STORE"); + String dbName = System.getenv("DB_NAME"); + String tableName = System.getenv("TABLE_NAME"); + String metaAddr = System.getenv("META_ADDR"); + String dataDir = System.getenv("DATA_DIR"); + + ScheduledThreadPoolExecutor scheduledThreadPool = new ScheduledThreadPoolExecutor(2); + + KeyRange keyRange = + KeyRange.newBuilder() + .setRightBound(Bound.UNBOUNDED) + .setLeftBound(Bound.UNBOUNDED) + .build(); + try (MetaClient metaClient = new MetaClient(metaAddr, scheduledThreadPool)) { + ScheduledFuture heartbeatFuture = + metaClient.startHeartbeatLoop(Duration.ofMillis(1000)); + HummockVersion version = metaClient.pinVersion(); + Table tableCatalog = metaClient.getTable(dbName, tableName); + ReadPlan readPlan = + ReadPlan.newBuilder() + .setDataDir(dataDir) + .setObjectStoreUrl(objectStore) + .setKeyRange(keyRange) + .setTableId(tableCatalog.getId()) + .setEpoch(version.getMaxCommittedEpoch()) + .setVersion(version) + .setTableCatalog(tableCatalog) + .build(); + + try (Iterator iter = new Iterator(readPlan)) { + while (true) { + try (KeyedRow row = iter.next()) { + if (row == null) { + break; + } + System.out.printf( + "key %s, smallint: %s, int: %s, bigint: %s, float: %s, double: %s, bool: %s, varchar: %s, is null: %s%n", + Arrays.toString(row.getKey()), + row.getShort(0), + row.getInt(1), + row.getLong(2), + row.getFloat(3), + row.getDouble(4), + row.getBoolean(5), + row.getString(6), + row.isNull(7)); } - System.out.printf( - "key %s, id: %d, name: %s, is null: %s%n", - Arrays.toString(row.getKey()), - row.getLong(0), - row.getString(1), - row.isNull(2)); } } + + heartbeatFuture.cancel(false); } + + scheduledThreadPool.shutdown(); } } diff --git a/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/binding/Binding.java b/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/binding/Binding.java index a8354f395e5d2..5e469d09f8513 100644 --- a/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/binding/Binding.java +++ b/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/binding/Binding.java @@ -7,7 +7,7 @@ public class Binding { // iterator method // Return a pointer to the iterator - static native long iteratorNew(); + static native long iteratorNew(byte[] readPlan); // return a pointer to the next row static native long iteratorNext(long pointer); @@ -21,8 +21,18 @@ public class Binding { static native boolean rowIsNull(long pointer, int index); + static native short rowGetInt16Value(long pointer, int index); + + static native int rowGetInt32Value(long pointer, int index); + static native long rowGetInt64Value(long pointer, int index); + static native float rowGetFloatValue(long pointer, int index); + + static native double rowGetDoubleValue(long pointer, int index); + + static native boolean rowGetBooleanValue(long pointer, int index); + static native String rowGetStringValue(long pointer, int index); // Since the underlying rust does not have garbage collection, we will have to manually call diff --git a/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/binding/Iterator.java b/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/binding/Iterator.java index a7139634e6ab1..5c8d4ebf74efa 100644 --- a/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/binding/Iterator.java +++ b/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/binding/Iterator.java @@ -1,11 +1,13 @@ package com.risingwave.java.binding; +import com.risingwave.proto.JavaBinding.ReadPlan; + public class Iterator implements AutoCloseable { - final long pointer; - boolean isClosed; + private final long pointer; + private boolean isClosed; - public Iterator() { - this.pointer = Binding.iteratorNew(); + public Iterator(ReadPlan readPlan) { + this.pointer = Binding.iteratorNew(readPlan.toByteArray()); this.isClosed = false; } diff --git a/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/binding/KeyedRow.java b/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/binding/KeyedRow.java index 7a584745d320f..21fbb94da68e5 100644 --- a/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/binding/KeyedRow.java +++ b/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/binding/KeyedRow.java @@ -1,8 +1,8 @@ package com.risingwave.java.binding; public class KeyedRow implements AutoCloseable { - final long pointer; - boolean isClosed; + private final long pointer; + private boolean isClosed; KeyedRow(long pointer) { this.pointer = pointer; @@ -17,10 +17,30 @@ public boolean isNull(int index) { return Binding.rowIsNull(pointer, index); } + public short getShort(int index) { + return Binding.rowGetInt16Value(pointer, index); + } + + public int getInt(int index) { + return Binding.rowGetInt32Value(pointer, index); + } + public long getLong(int index) { return Binding.rowGetInt64Value(pointer, index); } + public float getFloat(int index) { + return Binding.rowGetFloatValue(pointer, index); + } + + public double getDouble(int index) { + return Binding.rowGetDoubleValue(pointer, index); + } + + public boolean getBoolean(int index) { + return Binding.rowGetBooleanValue(pointer, index); + } + public String getString(int index) { return Binding.rowGetStringValue(pointer, index); } diff --git a/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/binding/rpc/MetaClient.java b/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/binding/rpc/MetaClient.java new file mode 100644 index 0000000000000..ee45d8dd51eaf --- /dev/null +++ b/src/java_binding/java/java-binding/src/main/java/com/risingwave/java/binding/rpc/MetaClient.java @@ -0,0 +1,126 @@ +package com.risingwave.java.binding.rpc; + +import com.risingwave.proto.Catalog.Table; +import com.risingwave.proto.ClusterServiceGrpc; +import com.risingwave.proto.ClusterServiceGrpc.ClusterServiceBlockingStub; +import com.risingwave.proto.Common.HostAddress; +import com.risingwave.proto.Common.WorkerType; +import com.risingwave.proto.DdlServiceGrpc; +import com.risingwave.proto.DdlServiceGrpc.DdlServiceBlockingStub; +import com.risingwave.proto.DdlServiceOuterClass.GetTableRequest; +import com.risingwave.proto.DdlServiceOuterClass.GetTableResponse; +import com.risingwave.proto.HeartbeatServiceGrpc; +import com.risingwave.proto.HeartbeatServiceGrpc.HeartbeatServiceBlockingStub; +import com.risingwave.proto.Hummock.HummockVersion; +import com.risingwave.proto.Hummock.PinVersionRequest; +import com.risingwave.proto.Hummock.PinVersionResponse; +import com.risingwave.proto.HummockManagerServiceGrpc; +import com.risingwave.proto.HummockManagerServiceGrpc.HummockManagerServiceBlockingStub; +import com.risingwave.proto.Meta.AddWorkerNodeRequest; +import com.risingwave.proto.Meta.AddWorkerNodeResponse; +import com.risingwave.proto.Meta.HeartbeatRequest; +import io.grpc.Grpc; +import io.grpc.InsecureChannelCredentials; +import io.grpc.ManagedChannel; +import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.logging.Logger; + +public class MetaClient implements AutoCloseable { + private final int workerId; + + private final ManagedChannel channel; + + // Scheduler for background tasks. + private final ScheduledExecutorService scheduler; + + // RPC stubs. + private final ClusterServiceBlockingStub clusterStub; + private final DdlServiceBlockingStub ddlStub; + private final HeartbeatServiceBlockingStub heartbeatStub; + private final HummockManagerServiceBlockingStub hummockStub; + + private boolean isClosed; + + // A heart beat task that sends a heartbeat to the meta service when run. + private class HeartbeatTask implements Runnable { + Duration timeout; + + HeartbeatTask(Duration timeout) { + this.timeout = timeout; + } + + @Override + public void run() { + HeartbeatRequest req = HeartbeatRequest.newBuilder().setNodeId(workerId).build(); + + try { + heartbeatStub + .withDeadlineAfter(timeout.toMillis(), TimeUnit.MILLISECONDS) + .heartbeat(req); + } catch (Exception e) { + Logger.getGlobal().warning(String.format("Failed to send heartbeat: %s", e)); + } + } + } + + public MetaClient(String metaAddr, ScheduledExecutorService scheduler) { + this.channel = + Grpc.newChannelBuilder(metaAddr, InsecureChannelCredentials.create()).build(); + this.scheduler = scheduler; + + this.clusterStub = ClusterServiceGrpc.newBlockingStub(channel); + this.ddlStub = DdlServiceGrpc.newBlockingStub(channel); + this.hummockStub = HummockManagerServiceGrpc.newBlockingStub(channel); + this.heartbeatStub = HeartbeatServiceGrpc.newBlockingStub(channel); + + this.isClosed = false; + + AddWorkerNodeRequest req = + AddWorkerNodeRequest.newBuilder() + .setWorkerType(WorkerType.RISE_CTL) + .setHost( + HostAddress.newBuilder().setHost("127.0.0.1").setPort(8880).build()) + .setWorkerNodeParallelism(0) + .build(); + AddWorkerNodeResponse resp = clusterStub.addWorkerNode(req); + + this.workerId = resp.getNode().getId(); + } + + public HummockVersion pinVersion() { + PinVersionRequest req = PinVersionRequest.newBuilder().setContextId(workerId).build(); + PinVersionResponse resp = hummockStub.pinVersion(req); + return resp.getPinnedVersion(); + } + + public Table getTable(String databaseName, String tableName) { + GetTableRequest req = + GetTableRequest.newBuilder() + .setDatabaseName(databaseName) + .setTableName(tableName) + .build(); + GetTableResponse resp = ddlStub.getTable(req); + if (resp.hasTable()) { + return resp.getTable(); + } else { + return null; + } + } + + public ScheduledFuture startHeartbeatLoop(Duration interval) { + Runnable heartbeatTask = new HeartbeatTask(interval.multipliedBy(3)); + return scheduler.scheduleWithFixedDelay( + heartbeatTask, interval.toMillis(), interval.toMillis(), TimeUnit.MILLISECONDS); + } + + @Override + public void close() { + if (!isClosed) { + isClosed = true; + this.channel.shutdown(); + } + } +} diff --git a/src/java_binding/java/pom.xml b/src/java_binding/java/pom.xml index fb6be06c033fe..ca682bf49928b 100644 --- a/src/java_binding/java/pom.xml +++ b/src/java_binding/java/pom.xml @@ -75,7 +75,7 @@ com.puppycrawl.tools checkstyle - 8.14 + 8.29 diff --git a/src/java_binding/java/tools/maven/checkstyle.xml b/src/java_binding/java/tools/maven/checkstyle.xml index 8d95428a297aa..33649434a3266 100644 --- a/src/java_binding/java/tools/maven/checkstyle.xml +++ b/src/java_binding/java/tools/maven/checkstyle.xml @@ -235,14 +235,8 @@ This file is based on the checkstyle file of Apache Beam. - - - - - - diff --git a/src/java_binding/make-java-binding.toml b/src/java_binding/make-java-binding.toml index 409f7bfbd3e24..3240d4a59301a 100644 --- a/src/java_binding/make-java-binding.toml +++ b/src/java_binding/make-java-binding.toml @@ -21,7 +21,7 @@ script = ''' #!/usr/bin/env bash set -ex cd src/java_binding/java -mvn clean package +mvn clean install ''' [tasks.run-java-binding-demo] @@ -33,9 +33,7 @@ dependencies = [ script = ''' #!/usr/bin/env bash set -ex -RISINGWAVE_ROOT=. +RISINGWAVE_ROOT=$(git rev-parse --show-toplevel) JAVA_BINDING_ROOT=${RISINGWAVE_ROOT}/src/java_binding -java -cp ${JAVA_BINDING_ROOT}/java/java-binding/target/java-binding-1.0-SNAPSHOT.jar \ - -Djava.library.path=${RISINGWAVE_ROOT}/target/debug \ - com.risingwave.java.Demo +source ${JAVA_BINDING_ROOT}/run_demo.sh ''' \ No newline at end of file diff --git a/src/java_binding/run_demo.sh b/src/java_binding/run_demo.sh new file mode 100644 index 0000000000000..51cd4bc70a036 --- /dev/null +++ b/src/java_binding/run_demo.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +# This script must be executed from `cargo make run-java-binding-demo`. + +set -e + +TABLE_NAME=java_binding_demo +DB_NAME=dev +# Below variables are determined by risedev. +# See the `java-binding-demo` section in risedev.yml. +OBJECT_STORE=minio://hummockadmin:hummockadmin@127.0.0.1:9301/hummock001 +META_ADDR=127.0.0.1:5690 +DATA_DIR=hummock_001 + +${RISINGWAVE_ROOT}/risedev d java-binding-demo + +psql -d ${DB_NAME} -h localhost -p 4566 -U root << EOF +DROP TABLE IF EXISTS ${TABLE_NAME}; +CREATE TABLE ${TABLE_NAME} (v1 smallint, v2 int, v3 bigint, v4 float4, v5 float8, v6 bool, v7 varchar, may_null bigint); +INSERT INTO ${TABLE_NAME} values (1, 1, 1, 1.0, 1.0, false, 'aaa', 1), (2, 2, 2, 2.0, 2.0, true, 'bbb', NULL); +FLUSH; +EOF + +cd ${JAVA_BINDING_ROOT}/java + +TABLE_NAME=${TABLE_NAME} \ +DB_NAME=${DB_NAME} \ +OBJECT_STORE=${OBJECT_STORE} \ +META_ADDR=${META_ADDR} \ +DATA_DIR=${DATA_DIR} \ +mvn exec:exec \ + -pl java-binding \ + -Dexec.executable=java \ + -Dexec.args=" \ + -cp %classpath:java-binding/target*.jar:proto/target/*.jar \ + -Djava.library.path=${RISINGWAVE_ROOT}/target/debug com.risingwave.java.Demo" + +psql -d dev -h localhost -p 4566 -U root << EOF +DROP TABLE ${TABLE_NAME}; +EOF + +cd - +${RISINGWAVE_ROOT}/risedev k > /dev/null diff --git a/src/java_binding/src/iterator.rs b/src/java_binding/src/iterator.rs index a4c3c793c34b1..e65d0e3e2796f 100644 --- a/src/java_binding/src/iterator.rs +++ b/src/java_binding/src/iterator.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,34 +12,28 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::ops::Bound::Unbounded; use std::pin::Pin; use std::sync::Arc; use bytes::Bytes; use futures::TryStreamExt; -use itertools::Itertools; -use risingwave_common::array::DataChunk; -use risingwave_common::catalog::TableId; use risingwave_common::row::{OwnedRow, RowDeserializer}; -use risingwave_common::types::{DataType, ScalarImpl, ScalarRef}; +use risingwave_common::types::ScalarImpl; +use risingwave_hummock_sdk::key::{TableKey, TableKeyRange}; use risingwave_object_store::object::object_metrics::ObjectStoreMetrics; -use risingwave_object_store::object::{InMemObjectStore, ObjectStore, ObjectStoreImpl}; -use risingwave_pb::hummock::HummockVersion; +use risingwave_object_store::object::parse_remote_object_store; +use risingwave_pb::java_binding::key_range::Bound; +use risingwave_pb::java_binding::{KeyRange, ReadPlan}; use risingwave_storage::error::{StorageError, StorageResult}; use risingwave_storage::hummock::local_version::pinned_version::PinnedVersion; -use risingwave_storage::hummock::shared_buffer::shared_buffer_batch::SharedBufferBatch; use risingwave_storage::hummock::store::state_store::HummockStorageIterator; use risingwave_storage::hummock::store::version::HummockVersionReader; use risingwave_storage::hummock::{SstableStore, TieredCache}; use risingwave_storage::monitor::HummockStateStoreMetrics; -use risingwave_storage::storage_value::StorageValue; use risingwave_storage::store::{ReadOptions, StreamTypeOfIter}; -use tokio::runtime::Runtime; use tokio::sync::mpsc::unbounded_channel; pub struct Iterator { - runtime: Runtime, row_serializer: RowDeserializer, stream: Pin>>, } @@ -55,7 +49,21 @@ impl KeyedRow { } pub fn is_null(&self, idx: usize) -> bool { - self.row[idx].is_some() + self.row[idx].is_none() + } + + pub fn get_int16(&self, idx: usize) -> i16 { + match self.row[idx].as_ref().unwrap() { + ScalarImpl::Int16(num) => *num, + _ => unreachable!("type is not int16 at index: {}", idx), + } + } + + pub fn get_int32(&self, idx: usize) -> i32 { + match self.row[idx].as_ref().unwrap() { + ScalarImpl::Int32(num) => *num, + _ => unreachable!("type is not int32 at index: {}", idx), + } } pub fn get_int64(&self, idx: usize) -> i64 { @@ -65,68 +73,48 @@ impl KeyedRow { } } - pub fn get_utf8(&self, idx: usize) -> &str { + pub fn get_f32(&self, idx: usize) -> f32 { match self.row[idx].as_ref().unwrap() { - ScalarImpl::Utf8(s) => s.as_ref(), - _ => unreachable!("type is not utf8 at index: {}", idx), + ScalarImpl::Float32(num) => num.into_inner(), + _ => unreachable!("type is not float32 at index: {}", idx), } } -} -const TEST_EPOCH: u64 = 1000; -const TEST_TABLE_ID: TableId = TableId { table_id: 2333 }; + pub fn get_f64(&self, idx: usize) -> f64 { + match self.row[idx].as_ref().unwrap() { + ScalarImpl::Float64(num) => num.into_inner(), + _ => unreachable!("type is not float64 at index: {}", idx), + } + } -fn gen_mock_schema() -> Vec { - vec![DataType::Int64, DataType::Varchar, DataType::Int64] -} + pub fn get_bool(&self, idx: usize) -> bool { + match self.row[idx].as_ref().unwrap() { + ScalarImpl::Bool(num) => *num, + _ => unreachable!("type is not boolean at index: {}", idx), + } + } -fn gen_mock_imm_lists() -> Vec { - let rows = vec![ - OwnedRow::new(vec![ - Some(ScalarImpl::Int64(100)), - Some(ScalarImpl::Utf8("value_of_100".to_owned_scalar())), - None, - ]), - OwnedRow::new(vec![ - Some(ScalarImpl::Int64(101)), - Some(ScalarImpl::Utf8("value_of_101".to_owned_scalar())), - Some(ScalarImpl::Int64(2333)), - ]), - ]; - let data_chunk = DataChunk::from_rows(&rows, &gen_mock_schema()); - let row_data = data_chunk.serialize(); - let kv_pairs = row_data - .into_iter() - .enumerate() - .map(|(i, row)| { - ( - Bytes::from(format!("key{:?}", i)), - StorageValue::new_put(row), - ) - }) - .collect_vec(); - - let sorted_items = SharedBufferBatch::build_shared_buffer_item_batches(kv_pairs); - let size = SharedBufferBatch::measure_batch_size(&sorted_items); - let imm = SharedBufferBatch::build_shared_buffer_batch( - TEST_EPOCH, - sorted_items, - size, - vec![], - TEST_TABLE_ID, - None, - ); - - vec![imm] + pub fn get_utf8(&self, idx: usize) -> &str { + match self.row[idx].as_ref().unwrap() { + ScalarImpl::Utf8(s) => s.as_ref(), + _ => unreachable!("type is not utf8 at index: {}", idx), + } + } } impl Iterator { - pub fn new() -> StorageResult { + pub async fn new(read_plan: ReadPlan) -> StorageResult { + let object_store = Arc::new( + parse_remote_object_store( + &read_plan.object_store_url, + Arc::new(ObjectStoreMetrics::unused()), + "Hummock", + ) + .await, + ); let sstable_store = Arc::new(SstableStore::new( - Arc::new(ObjectStoreImpl::InMem( - InMemObjectStore::new().monitored(Arc::new(ObjectStoreMetrics::unused())), - )), - "random".to_string(), + object_store, + read_plan.data_dir, 1 << 10, 1 << 10, TieredCache::none(), @@ -134,57 +122,66 @@ impl Iterator { let reader = HummockVersionReader::new(sstable_store, Arc::new(HummockStateStoreMetrics::unused())); - let runtime = tokio::runtime::Runtime::new().unwrap(); - - let stream = runtime.block_on(async { + let stream = { let stream = reader .iter( - (Unbounded, Unbounded), - TEST_EPOCH, + table_key_range_from_prost(read_plan.key_range.unwrap()), + read_plan.epoch, ReadOptions { prefix_hint: None, ignore_range_tombstone: false, retention_seconds: None, - table_id: TEST_TABLE_ID, + table_id: read_plan.table_id.into(), read_version_from_backup: false, }, ( - gen_mock_imm_lists(), vec![], - PinnedVersion::new( - HummockVersion { - id: 0, - levels: Default::default(), - max_committed_epoch: 0, - safe_epoch: 0, - }, - unbounded_channel().0, - ), + vec![], + PinnedVersion::new(read_plan.version.unwrap(), unbounded_channel().0), ), ) .await?; Ok::>>, StorageError>( Box::pin(stream), ) - })?; + }?; Ok(Self { - runtime, - row_serializer: RowDeserializer::new(gen_mock_schema()), + row_serializer: RowDeserializer::new( + read_plan + .table_catalog + .unwrap() + .columns + .into_iter() + .map(|c| (&c.column_desc.unwrap().column_type.unwrap()).into()) + .collect(), + ), stream, }) } - pub fn next(&mut self) -> StorageResult> { - self.runtime.block_on(async { - let item = self.stream.try_next().await?; - Ok(match item { - Some((key, value)) => Some(KeyedRow { - key: key.user_key.table_key.0, - row: self.row_serializer.deserialize(value)?, - }), - None => None, - }) + pub async fn next(&mut self) -> StorageResult> { + let item = self.stream.try_next().await?; + Ok(match item { + Some((key, value)) => Some(KeyedRow { + key: key.user_key.table_key.0, + row: self.row_serializer.deserialize(value)?, + }), + None => None, }) } } + +fn table_key_range_from_prost(r: KeyRange) -> TableKeyRange { + let map_bound = |b, v| match b { + Bound::Unbounded => std::ops::Bound::Unbounded, + Bound::Included => std::ops::Bound::Included(TableKey(v)), + Bound::Excluded => std::ops::Bound::Excluded(TableKey(v)), + _ => unreachable!(), + }; + let left_bound = r.left_bound(); + let right_bound = r.right_bound(); + let left = map_bound(left_bound, r.left); + let right = map_bound(right_bound, r.right); + (left, right) +} diff --git a/src/java_binding/src/lib.rs b/src/java_binding/src/lib.rs index d5530bc4b007a..4d81bf5237841 100644 --- a/src/java_binding/src/lib.rs +++ b/src/java_binding/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ #![feature(error_generic_member_access)] #![feature(provide_any)] +#![feature(once_cell)] mod iterator; @@ -21,44 +22,87 @@ use std::backtrace::Backtrace; use std::marker::PhantomData; use std::ops::Deref; use std::panic::catch_unwind; +use std::slice::from_raw_parts; +use std::sync::LazyLock; use iterator::{Iterator, KeyedRow}; -use jni::objects::{JClass, JObject, JString}; -use jni::sys::{jboolean, jbyteArray, jint, jlong}; +use jni::objects::{AutoArray, JClass, JObject, JString, ReleaseMode}; +use jni::sys::{jboolean, jbyte, jbyteArray, jdouble, jfloat, jint, jlong, jshort}; use jni::JNIEnv; +use prost::{DecodeError, Message}; use risingwave_storage::error::StorageError; use thiserror::Error; +use tokio::runtime::Runtime; + +static RUNTIME: LazyLock = LazyLock::new(|| tokio::runtime::Runtime::new().unwrap()); #[derive(Error, Debug)] enum BindingError { #[error("JniError {error}")] - JniError { + Jni { #[from] error: jni::errors::Error, backtrace: Backtrace, }, #[error("StorageError {error}")] - StorageError { + Storage { #[from] error: StorageError, backtrace: Backtrace, }, + + #[error("DecodeError {error}")] + Decode { + #[from] + error: DecodeError, + backtrace: Backtrace, + }, } type Result = std::result::Result; +/// Wrapper around [`jbyteArray`] that adds a lifetime and provides utilities to manipulate the +/// underlying array. It matches C's representation of a raw pointer, so it can be used in any of +/// the extern function argument positions that would take a [`jbyteArray`]. +// Note: use `JObject` internally to conveniently derive `Default` so that it can be returned +// instead of `jbyteArray` in `execute_and_catch`. #[repr(transparent)] #[derive(Default)] -pub struct ByteArray<'a>(JObject<'a>); +pub struct JByteArray<'a>(JObject<'a>); -impl<'a> From for ByteArray<'a> { +impl<'a> From for JByteArray<'a> { #[allow(clippy::not_unsafe_ptr_arg_deref)] fn from(inner: jbyteArray) -> Self { unsafe { Self(JObject::from_raw(inner)) } } } +impl<'a> JByteArray<'a> { + fn to_guarded_slice(&self, env: JNIEnv<'a>) -> Result> { + let array = env.get_byte_array_elements(self.0.into_raw(), ReleaseMode::NoCopyBack)?; + let slice = unsafe { from_raw_parts(array.as_ptr() as *mut u8, array.size()? as usize) }; + Ok(SliceGuard { + _array: array, + slice, + }) + } +} + +/// Wrapper around `&[u8]` derived from `jbyteArray` to prevent it from being auto-released. +pub struct SliceGuard<'a> { + _array: AutoArray<'a, jbyte>, + slice: &'a [u8], +} + +impl<'a> Deref for SliceGuard<'a> { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + self.slice + } +} + #[repr(transparent)] pub struct Pointer<'a, T> { pointer: jlong, @@ -141,7 +185,7 @@ where Ok(Ok(ret)) => ret, Ok(Err(e)) => { match e { - BindingError::JniError { + BindingError::Jni { error: jni::errors::Error::JavaException, backtrace, } => { @@ -164,10 +208,15 @@ where } #[no_mangle] -pub extern "system" fn Java_com_risingwave_java_binding_Binding_iteratorNew( - env: EnvParam<'_>, +pub extern "system" fn Java_com_risingwave_java_binding_Binding_iteratorNew<'a>( + env: EnvParam<'a>, + read_plan: JByteArray<'a>, ) -> Pointer<'static, Iterator> { - execute_and_catch(env, move || Ok(Iterator::new()?.into())) + execute_and_catch(env, move || { + let read_plan = Message::decode(read_plan.to_guarded_slice(*env)?.deref())?; + let iter = RUNTIME.block_on(Iterator::new(read_plan))?; + Ok(iter.into()) + }) } #[no_mangle] @@ -175,9 +224,11 @@ pub extern "system" fn Java_com_risingwave_java_binding_Binding_iteratorNext<'a> env: EnvParam<'a>, mut pointer: Pointer<'a, Iterator>, ) -> Pointer<'static, KeyedRow> { - execute_and_catch(env, move || match pointer.as_mut().next()? { - None => Ok(Pointer::null()), - Some(row) => Ok(row.into()), + execute_and_catch(env, move || { + match RUNTIME.block_on(pointer.as_mut().next())? { + None => Ok(Pointer::null()), + Some(row) => Ok(row.into()), + } }) } @@ -193,9 +244,9 @@ pub extern "system" fn Java_com_risingwave_java_binding_Binding_iteratorClose( pub extern "system" fn Java_com_risingwave_java_binding_Binding_rowGetKey<'a>( env: EnvParam<'a>, pointer: Pointer<'a, KeyedRow>, -) -> ByteArray<'a> { +) -> JByteArray<'a> { execute_and_catch(env, move || { - Ok(ByteArray::from( + Ok(JByteArray::from( env.byte_array_from_slice(pointer.as_ref().key())?, )) }) @@ -207,10 +258,27 @@ pub extern "system" fn Java_com_risingwave_java_binding_Binding_rowIsNull<'a>( pointer: Pointer<'a, KeyedRow>, idx: jint, ) -> jboolean { - execute_and_catch( - env, - move || Ok(pointer.as_ref().is_null(idx as usize) as u8), - ) + execute_and_catch(env, move || { + Ok(pointer.as_ref().is_null(idx as usize) as jboolean) + }) +} + +#[no_mangle] +pub extern "system" fn Java_com_risingwave_java_binding_Binding_rowGetInt16Value<'a>( + env: EnvParam<'a>, + pointer: Pointer<'a, KeyedRow>, + idx: jint, +) -> jshort { + execute_and_catch(env, move || Ok(pointer.as_ref().get_int16(idx as usize))) +} + +#[no_mangle] +pub extern "system" fn Java_com_risingwave_java_binding_Binding_rowGetInt32Value<'a>( + env: EnvParam<'a>, + pointer: Pointer<'a, KeyedRow>, + idx: jint, +) -> jint { + execute_and_catch(env, move || Ok(pointer.as_ref().get_int32(idx as usize))) } #[no_mangle] @@ -222,6 +290,35 @@ pub extern "system" fn Java_com_risingwave_java_binding_Binding_rowGetInt64Value execute_and_catch(env, move || Ok(pointer.as_ref().get_int64(idx as usize))) } +#[no_mangle] +pub extern "system" fn Java_com_risingwave_java_binding_Binding_rowGetFloatValue<'a>( + env: EnvParam<'a>, + pointer: Pointer<'a, KeyedRow>, + idx: jint, +) -> jfloat { + execute_and_catch(env, move || Ok(pointer.as_ref().get_f32(idx as usize))) +} + +#[no_mangle] +pub extern "system" fn Java_com_risingwave_java_binding_Binding_rowGetDoubleValue<'a>( + env: EnvParam<'a>, + pointer: Pointer<'a, KeyedRow>, + idx: jint, +) -> jdouble { + execute_and_catch(env, move || Ok(pointer.as_ref().get_f64(idx as usize))) +} + +#[no_mangle] +pub extern "system" fn Java_com_risingwave_java_binding_Binding_rowGetBooleanValue<'a>( + env: EnvParam<'a>, + pointer: Pointer<'a, KeyedRow>, + idx: jint, +) -> jboolean { + execute_and_catch(env, move || { + Ok(pointer.as_ref().get_bool(idx as usize) as jboolean) + }) +} + #[no_mangle] pub extern "system" fn Java_com_risingwave_java_binding_Binding_rowGetStringValue<'a>( env: EnvParam<'a>, diff --git a/src/meta/Cargo.toml b/src/meta/Cargo.toml index 6e6f5e444bc85..04ace1b013dcc 100644 --- a/src/meta/Cargo.toml +++ b/src/meta/Cargo.toml @@ -7,19 +7,23 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = "1" arc-swap = "1" assert_matches = "1" -async-stream = "0.3" async-trait = "0.1" -byteorder = "1" bytes = { version = "1", features = ["serde"] } -chrono = { version = "0.4", default-features = false, features = ["clock", "std"] } clap = { version = "3", features = ["derive", "env"] } -crc32fast = "1" -derivative = "2" +crepe = "0.1" +easy-ext = "1" either = "1" +enum-as-inner = "0.5" etcd-client = { version = "0.2", package = "madsim-etcd-client" } fail = "0.5" function_name = "0.3.0" @@ -32,7 +36,6 @@ mime_guess = "2" num-integer = "0.1" num-traits = "0.2" parking_lot = { version = "0.12", features = ["arc_lock"] } -paste = "1" prometheus = "0.13" prometheus-http-query = "0.6" prost = "0.11" @@ -40,6 +43,7 @@ rand = "0.8" reqwest = "0.11" risingwave_backup = { path = "../storage/backup" } risingwave_common = { path = "../common" } +risingwave_common_proc_macro = { path = "../common/proc_macro" } risingwave_common_service = { path = "../common/common_service" } risingwave_connector = { path = "../connector" } risingwave_hummock_sdk = { path = "../storage/hummock_sdk" } @@ -48,10 +52,7 @@ risingwave_pb = { path = "../prost" } risingwave_rpc_client = { path = "../rpc_client" } scopeguard = "1.1.0" serde = { version = "1", features = ["derive"] } -serde_derive = "1" serde_json = "1" -serial_test = "0.9" -smallvec = "1" sync-point = { path = "../utils/sync-point" } thiserror = "1" tokio = { version = "0.2", package = "madsim-tokio", features = [ @@ -64,13 +65,11 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [ ] } tokio-retry = "0.3" tokio-stream = { version = "0.1", features = ["net"] } -toml = "0.5" tonic = { version = "0.2", package = "madsim-tonic" } tower = { version = "0.4", features = ["util", "load-shed"] } tracing = "0.1" url = "2" uuid = { version = "1", features = ["v4"] } -xxhash-rust = { version = "0.8.5", features = ["xxh64"] } [target.'cfg(not(madsim))'.dependencies] axum = "0.6" diff --git a/src/meta/src/backup_restore/backup_manager.rs b/src/meta/src/backup_restore/backup_manager.rs index b4bd288c77280..edcd57b73bfce 100644 --- a/src/meta/src/backup_restore/backup_manager.rs +++ b/src/meta/src/backup_restore/backup_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -189,6 +189,14 @@ impl BackupManager { /// Deletes existent backups from backup storage. pub async fn delete_backups(&self, ids: &[MetaSnapshotId]) -> MetaResult<()> { self.backup_store.delete(ids).await?; + self.env + .notification_manager() + .notify_hummock_without_version( + Operation::Update, + Info::MetaBackupManifestId(MetaBackupManifestId { + id: self.backup_store.manifest().manifest_id, + }), + ); Ok(()) } diff --git a/src/meta/src/backup_restore/error.rs b/src/meta/src/backup_restore/error.rs index 8ee9547c77bdd..e5b085eb1bdef 100644 --- a/src/meta/src/backup_restore/error.rs +++ b/src/meta/src/backup_restore/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/backup_restore/meta_snapshot_builder.rs b/src/meta/src/backup_restore/meta_snapshot_builder.rs index 8dc9179895eeb..539b770793b9c 100644 --- a/src/meta/src/backup_restore/meta_snapshot_builder.rs +++ b/src/meta/src/backup_restore/meta_snapshot_builder.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,13 +20,16 @@ use risingwave_backup::error::BackupResult; use risingwave_backup::meta_snapshot::{ClusterMetadata, MetaSnapshot}; use risingwave_backup::MetaSnapshotId; use risingwave_hummock_sdk::compaction_group::hummock_version_ext::HummockVersionUpdateExt; -use risingwave_pb::catalog::{Database, Index, Schema, Sink, Source, Table, View}; +use risingwave_pb::catalog::{Database, Function, Index, Schema, Sink, Source, Table, View}; use risingwave_pb::hummock::{HummockVersion, HummockVersionDelta, HummockVersionStats}; use risingwave_pb::user::UserInfo; +use crate::manager::model::get_system_params_at_snapshot; use crate::model::MetadataModel; use crate::storage::{MetaStore, Snapshot, DEFAULT_COLUMN_FAMILY}; +const VERSION: u32 = 1; + pub struct MetaSnapshotBuilder { snapshot: MetaSnapshot, meta_store: Arc, @@ -41,6 +44,7 @@ impl MetaSnapshotBuilder { } pub async fn build(&mut self, id: MetaSnapshotId) -> BackupResult<()> { + self.snapshot.format_version = VERSION; self.snapshot.id = id; // Caveat: snapshot impl of etcd meta store doesn't prevent it from expiration. // So expired snapshot read may return error. If that happens, @@ -69,13 +73,11 @@ impl MetaSnapshotBuilder { .next() .ok_or_else(|| anyhow!("hummock version stats not found in meta store"))?; let compaction_groups = - crate::hummock::compaction_group::CompactionGroup::list_at_snapshot::( - &meta_store_snapshot, - ) - .await? - .iter() - .map(MetadataModel::to_protobuf) - .collect(); + crate::hummock::model::CompactionGroup::list_at_snapshot::(&meta_store_snapshot) + .await? + .iter() + .map(MetadataModel::to_protobuf) + .collect(); let table_fragments = crate::model::TableFragments::list_at_snapshot::(&meta_store_snapshot) .await? @@ -91,6 +93,10 @@ impl MetaSnapshotBuilder { let sink = Sink::list_at_snapshot::(&meta_store_snapshot).await?; let source = Source::list_at_snapshot::(&meta_store_snapshot).await?; let view = View::list_at_snapshot::(&meta_store_snapshot).await?; + let function = Function::list_at_snapshot::(&meta_store_snapshot).await?; + let system_param = get_system_params_at_snapshot::(&meta_store_snapshot) + .await? + .ok_or_else(|| anyhow!("system params not found in meta store"))?; self.snapshot.metadata = ClusterMetadata { default_cf, @@ -106,6 +112,8 @@ impl MetaSnapshotBuilder { view, table_fragments, user_info, + function, + system_param, }; Ok(()) } @@ -139,8 +147,10 @@ mod tests { use risingwave_pb::hummock::{HummockVersion, HummockVersionStats}; use crate::backup_restore::meta_snapshot_builder::MetaSnapshotBuilder; + use crate::manager::model::SystemParamsModel; use crate::model::MetadataModel; use crate::storage::{MemStore, MetaStore, DEFAULT_COLUMN_FAMILY}; + use crate::MetaOpts; #[tokio::test] async fn test_snapshot_builder() { @@ -175,6 +185,15 @@ mod tests { .insert(meta_store.deref()) .await .unwrap(); + let err = builder.build(1).await.unwrap_err(); + let err = assert_matches!(err, BackupError::Other(e) => e); + assert_eq!("system params not found in meta store", err.to_error_str()); + + MetaOpts::test(true) + .init_system_params() + .insert(meta_store.deref()) + .await + .unwrap(); let mut builder = MetaSnapshotBuilder::new(meta_store.clone()); builder.build(1).await.unwrap(); diff --git a/src/meta/src/backup_restore/metrics.rs b/src/meta/src/backup_restore/metrics.rs index d558321b88718..c133e4b5fb426 100644 --- a/src/meta/src/backup_restore/metrics.rs +++ b/src/meta/src/backup_restore/metrics.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/backup_restore/mod.rs b/src/meta/src/backup_restore/mod.rs index 7b0ad1932e874..4115929f01729 100644 --- a/src/meta/src/backup_restore/mod.rs +++ b/src/meta/src/backup_restore/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/backup_restore/restore.rs b/src/meta/src/backup_restore/restore.rs index 36644a51b637e..cf01331c34b48 100644 --- a/src/meta/src/backup_restore/restore.rs +++ b/src/meta/src/backup_restore/restore.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,12 +17,14 @@ use itertools::Itertools; use risingwave_backup::error::{BackupError, BackupResult}; use risingwave_backup::meta_snapshot::MetaSnapshot; use risingwave_backup::storage::MetaSnapshotStorageRef; +use risingwave_common::config::MetaBackend; use crate::backup_restore::utils::{get_backup_store, get_meta_store, MetaStoreBackendImpl}; -use crate::hummock::compaction_group::CompactionGroup; +use crate::dispatch_meta_store; +use crate::hummock::model::CompactionGroup; +use crate::manager::model::SystemParamsModel; use crate::model::{MetadataModel, TableFragments}; use crate::storage::{MetaStore, DEFAULT_COLUMN_FAMILY}; -use crate::{dispatch_meta_store, Backend}; /// Command-line arguments for restore. #[derive(Parser, Debug, Clone)] @@ -32,8 +34,8 @@ pub struct RestoreOpts { #[clap(long)] pub meta_snapshot_id: u64, /// Type of meta store to restore. - #[clap(long, arg_enum, default_value_t = Backend::Etcd)] - pub meta_store_type: Backend, + #[clap(long, arg_enum, default_value_t = MetaBackend::Etcd)] + pub meta_store_type: MetaBackend, /// Etcd endpoints. #[clap(long, default_value_t = String::from(""))] pub etcd_endpoints: String, @@ -52,6 +54,9 @@ pub struct RestoreOpts { /// Directory of storage to fetch meta snapshot from. #[clap(long, default_value_t = String::from("backup"))] pub storage_directory: String, + /// Print the target snapshot, but won't restore to meta store. + #[clap(long)] + pub dry_run: bool, } async fn restore_metadata_model( @@ -67,6 +72,19 @@ async fn restore_metadata_model( Ok(()) } +async fn restore_system_param_model( + meta_store: &S, + metadata: &[T], +) -> BackupResult<()> { + if T::get(meta_store).await?.is_some() { + return Err(BackupError::NonemptyMetaStorage); + } + for d in metadata { + d.insert(meta_store).await?; + } + Ok(()) +} + async fn restore_default_cf( meta_store: &S, snapshot: &MetaSnapshot, @@ -114,6 +132,8 @@ async fn restore_metadata(meta_store: S, snapshot: MetaSnapshot) - restore_metadata_model(&meta_store, &snapshot.metadata.sink).await?; restore_metadata_model(&meta_store, &snapshot.metadata.view).await?; restore_metadata_model(&meta_store, &snapshot.metadata.source).await?; + restore_metadata_model(&meta_store, &snapshot.metadata.function).await?; + restore_system_param_model(&meta_store, &[snapshot.metadata.system_param]).await?; Ok(()) } @@ -145,13 +165,18 @@ async fn restore_impl( target_id ))); } + let mut target_snapshot = backup_store.get(target_id).await?; + tracing::info!( + "snapshot {} before rewrite:\n{}", + target_id, + target_snapshot + ); let newest_id = snapshot_list .into_iter() .map(|m| m.id) .max() .expect("should exist"); - let newest_snapshot = backup_store.get(newest_id).await?; - let mut target_snapshot = backup_store.get(target_id).await?; + assert!(newest_id >= target_id); // Always use newest snapshot's `default_cf` during restoring, in order not to corrupt shared // data of snapshots. Otherwise, for example if we restore a older SST id generator, an // existent SST in object store is at risk of being overwrote by the restored cluster. @@ -159,17 +184,27 @@ async fn restore_impl( // - Value is monotonically non-decreasing. // - Value is memcomparable. // - Keys of newest_snapshot is a superset of that of target_snapshot. - assert!(newest_snapshot.id >= target_snapshot.id); - for (k, v) in &target_snapshot.metadata.default_cf { - let newest_v = newest_snapshot - .metadata - .default_cf - .get(k) - .unwrap_or_else(|| panic!("violate superset requirement. key {:x?}", k)); - assert!(newest_v >= v, "violate monotonicity requirement"); + if newest_id > target_id { + let newest_snapshot = backup_store.get(newest_id).await?; + for (k, v) in &target_snapshot.metadata.default_cf { + let newest_v = newest_snapshot + .metadata + .default_cf + .get(k) + .unwrap_or_else(|| panic!("violate superset requirement. key {:x?}", k)); + assert!(newest_v >= v, "violate monotonicity requirement"); + } + target_snapshot.metadata.default_cf = newest_snapshot.metadata.default_cf; + tracing::info!( + "snapshot {} after rewrite by snapshot {}:\n{}", + target_id, + target_snapshot, + newest_id + ); + } + if opts.dry_run { + return Ok(()); } - target_snapshot.metadata.default_cf = newest_snapshot.metadata.default_cf; - dispatch_meta_store!(meta_store.clone(), store, { restore_metadata(store.clone(), target_snapshot.clone()).await?; }); @@ -181,10 +216,10 @@ pub async fn restore(opts: RestoreOpts) -> BackupResult<()> { let result = restore_impl(opts, None, None).await; match &result { Ok(_) => { - tracing::info!("restore succeeded"); + tracing::info!("command succeeded"); } Err(e) => { - tracing::warn!("restore failed: {}", e); + tracing::warn!("command failed: {}", e); } } result @@ -195,13 +230,16 @@ mod tests { use std::collections::HashMap; use clap::Parser; + use itertools::Itertools; use risingwave_backup::meta_snapshot::{ClusterMetadata, MetaSnapshot}; use risingwave_pb::hummock::HummockVersion; + use risingwave_pb::meta::SystemParams; use crate::backup_restore::restore::restore_impl; use crate::backup_restore::utils::{get_backup_store, get_meta_store, MetaStoreBackendImpl}; use crate::backup_restore::RestoreOpts; use crate::dispatch_meta_store; + use crate::manager::model::SystemParamsModel; use crate::model::MetadataModel; use crate::storage::{MetaStore, DEFAULT_COLUMN_FAMILY}; @@ -217,6 +255,20 @@ mod tests { ]) } + fn get_system_params() -> SystemParams { + SystemParams { + barrier_interval_ms: Some(101), + checkpoint_frequency: Some(102), + sstable_size_mb: Some(103), + block_size_kb: Some(104), + bloom_false_positive: Some(0.1), + state_store: Some("state_store".to_string()), + data_directory: Some("data_directory".to_string()), + backup_storage_url: Some("backup_storage_url".to_string()), + backup_storage_directory: Some("backup_storage_directory".to_string()), + } + } + #[tokio::test] async fn test_restore_basic() { let opts = get_restore_opts(); @@ -227,6 +279,7 @@ mod tests { hummock_version.insert(&store).await.unwrap(); }); let empty_meta_store = get_meta_store(opts.clone()).await.unwrap(); + let system_param = get_system_params(); let snapshot = MetaSnapshot { id: opts.meta_snapshot_id, metadata: ClusterMetadata { @@ -234,8 +287,10 @@ mod tests { id: 123, ..Default::default() }, + system_param: system_param.clone(), ..Default::default() }, + ..Default::default() }; // target snapshot not found @@ -259,11 +314,23 @@ mod tests { restore_impl( opts.clone(), - Some(empty_meta_store), + Some(empty_meta_store.clone()), Some(backup_store.clone()), ) .await .unwrap(); + + dispatch_meta_store!(empty_meta_store, store, { + let restored_hummock_version = HummockVersion::list(&store) + .await + .unwrap() + .into_iter() + .next() + .unwrap(); + assert_eq!(restored_hummock_version.id, 123); + let restored_system_param = SystemParams::get(&store).await.unwrap().unwrap(); + assert_eq!(restored_system_param, system_param); + }); } #[tokio::test] @@ -274,8 +341,10 @@ mod tests { id: opts.meta_snapshot_id, metadata: ClusterMetadata { default_cf: HashMap::from([(vec![1u8, 2u8], memcomparable::to_vec(&10).unwrap())]), + system_param: get_system_params(), ..Default::default() }, + ..Default::default() }; backup_store.create(&snapshot).await.unwrap(); @@ -302,7 +371,13 @@ mod tests { .await .unwrap(); dispatch_meta_store!(empty_meta_store, store, { - let mut kvs = store.list_cf(DEFAULT_COLUMN_FAMILY).await.unwrap(); + let mut kvs = store + .list_cf(DEFAULT_COLUMN_FAMILY) + .await + .unwrap() + .into_iter() + .map(|(_, v)| v) + .collect_vec(); kvs.sort(); assert_eq!( kvs, @@ -323,8 +398,10 @@ mod tests { id: opts.meta_snapshot_id, metadata: ClusterMetadata { default_cf: HashMap::from([(vec![1u8, 2u8], memcomparable::to_vec(&10).unwrap())]), + system_param: get_system_params(), ..Default::default() }, + ..Default::default() }; backup_store.create(&snapshot).await.unwrap(); @@ -352,8 +429,10 @@ mod tests { id: opts.meta_snapshot_id, metadata: ClusterMetadata { default_cf: HashMap::from([(vec![1u8, 2u8], memcomparable::to_vec(&10).unwrap())]), + system_param: get_system_params(), ..Default::default() }, + ..Default::default() }; backup_store.create(&snapshot).await.unwrap(); @@ -371,4 +450,49 @@ mod tests { .await .unwrap(); } + + #[tokio::test] + async fn test_dry_run() { + let mut opts = get_restore_opts(); + assert!(!opts.dry_run); + opts.dry_run = true; + let backup_store = get_backup_store(opts.clone()).await.unwrap(); + let empty_meta_store = get_meta_store(opts.clone()).await.unwrap(); + let system_param = get_system_params(); + let snapshot = MetaSnapshot { + id: opts.meta_snapshot_id, + metadata: ClusterMetadata { + default_cf: HashMap::from([ + ( + "some_key_1".as_bytes().to_vec(), + memcomparable::to_vec(&10).unwrap(), + ), + ( + "some_key_2".as_bytes().to_vec(), + memcomparable::to_vec(&"some_value_2".to_string()).unwrap(), + ), + ]), + hummock_version: HummockVersion { + id: 123, + ..Default::default() + }, + system_param: system_param.clone(), + ..Default::default() + }, + ..Default::default() + }; + backup_store.create(&snapshot).await.unwrap(); + restore_impl( + opts.clone(), + Some(empty_meta_store.clone()), + Some(backup_store.clone()), + ) + .await + .unwrap(); + + dispatch_meta_store!(empty_meta_store, store, { + assert!(HummockVersion::list(&store).await.unwrap().is_empty()); + assert!(SystemParams::get(&store).await.unwrap().is_none()); + }); + } } diff --git a/src/meta/src/backup_restore/utils.rs b/src/meta/src/backup_restore/utils.rs index e734300d632b8..10d5c1b31be8d 100644 --- a/src/meta/src/backup_restore/utils.rs +++ b/src/meta/src/backup_restore/utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,12 +18,13 @@ use std::time::Duration; use etcd_client::ConnectOptions; use risingwave_backup::error::BackupResult; use risingwave_backup::storage::{MetaSnapshotStorageRef, ObjectStoreMetaSnapshotStorage}; +use risingwave_common::config::MetaBackend; use risingwave_object_store::object::object_metrics::ObjectStoreMetrics; use risingwave_object_store::object::parse_remote_object_store; use crate::backup_restore::RestoreOpts; use crate::storage::{EtcdMetaStore, MemStore, WrappedEtcdClient as EtcdClient}; -use crate::{Backend, MetaStoreBackend}; +use crate::MetaStoreBackend; #[derive(Clone)] pub enum MetaStoreBackendImpl { @@ -44,7 +45,7 @@ macro_rules! dispatch_meta_store { // Code is copied from src/meta/src/rpc/server.rs. TODO #6482: extract method. pub async fn get_meta_store(opts: RestoreOpts) -> BackupResult { let meta_store_backend = match opts.meta_store_type { - Backend::Etcd => MetaStoreBackend::Etcd { + MetaBackend::Etcd => MetaStoreBackend::Etcd { endpoints: opts .etcd_endpoints .split(',') @@ -55,7 +56,7 @@ pub async fn get_meta_store(opts: RestoreOpts) -> BackupResult None, }, }, - Backend::Mem => MetaStoreBackend::Mem, + MetaBackend::Mem => MetaStoreBackend::Mem, }; match meta_store_backend { MetaStoreBackend::Etcd { @@ -80,7 +81,6 @@ pub async fn get_backup_store(opts: RestoreOpts) -> BackupResult>, + upstream_mview_actors: HashMap>, dispatchers: HashMap>, init_split_assignment: SplitAssignment, + definition: String, }, + /// `CancelStreamingJob` command generates a `Stop` barrier including the actors of the given + /// table fragment. + /// + /// The collecting and cleaning part works exactly the same as `DropStreamingJobs` command. + CancelStreamingJob(TableFragments), /// `Reschedule` command generates a `Update` barrier by the [`Reschedule`] of each fragment. /// Mainly used for scaling and migration. @@ -134,6 +141,9 @@ impl Command { table_fragments, .. } => CommandChanges::CreateTable(table_fragments.table_id()), Command::DropStreamingJobs(table_ids) => CommandChanges::DropTables(table_ids.clone()), + Command::CancelStreamingJob(table_fragments) => { + CommandChanges::DropTables(std::iter::once(table_fragments.table_id()).collect()) + } Command::RescheduleFragment(reschedules) => { let to_add = reschedules .values() @@ -267,6 +277,11 @@ where })) } + Command::CancelStreamingJob(table_fragments) => { + let actors = table_fragments.actor_ids(); + Some(Mutation::Stop(StopMutation { actors })) + } + Command::RescheduleFragment(reschedules) => { let mut dispatcher_update = HashMap::new(); for (_fragment_id, reschedule) in reschedules.iter() { @@ -290,7 +305,8 @@ where dispatcher_id, hash_mapping: reschedule .upstream_dispatcher_mapping - .clone(), + .as_ref() + .map(|m| m.to_protobuf()), added_downstream_actor_id: reschedule.added_actors.clone(), removed_downstream_actor_id: reschedule .removed_actors @@ -339,6 +355,7 @@ where MergeUpdate { actor_id, upstream_fragment_id: fragment_id, + new_upstream_fragment_id: None, added_upstream_actor_id: reschedule.added_actors.clone(), removed_upstream_actor_id: reschedule .removed_actors @@ -409,6 +426,38 @@ where } } + /// For `CancelStreamingJob`, returns the actors of the `Chain` nodes. For other commands, + /// returns an empty set. + pub fn actors_to_cancel(&self) -> HashSet { + match &self.command { + Command::CancelStreamingJob(table_fragments) => table_fragments.chain_actor_ids(), + _ => Default::default(), + } + } + + /// Clean up actors in CNs if needed, used by drop, cancel and reschedule commands. + async fn clean_up( + &self, + actors_to_clean: impl IntoIterator)>, + ) -> MetaResult<()> { + let futures = actors_to_clean.into_iter().map(|(node_id, actors)| { + let node = self.info.node_map.get(&node_id).unwrap(); + let request_id = Uuid::new_v4().to_string(); + + async move { + let client = self.client_pool.get(node).await?; + let request = DropActorsRequest { + request_id, + actor_ids: actors.to_owned(), + }; + client.drop_actors(request).await + } + }); + + try_join_all(futures).await?; + Ok(()) + } + /// Do some stuffs after barriers are collected and the new storage version is committed, for /// the given command. pub async fn post_collect(&self) -> MetaResult<()> { @@ -446,36 +495,33 @@ where Command::DropStreamingJobs(table_ids) => { // Tell compute nodes to drop actors. let node_actors = self.fragment_manager.table_node_actors(table_ids).await?; - let futures = node_actors.iter().map(|(node_id, actors)| { - let node = self.info.node_map.get(node_id).unwrap(); - let request_id = Uuid::new_v4().to_string(); - - async move { - let client = self.client_pool.get(node).await?; - let request = DropActorsRequest { - request_id, - actor_ids: actors.to_owned(), - }; - client.drop_actors(request).await - } - }); - - try_join_all(futures).await?; - + self.clean_up(node_actors).await?; // Drop fragment info in meta store. self.fragment_manager .drop_table_fragments_vec(table_ids) .await?; } + Command::CancelStreamingJob(table_fragments) => { + let node_actors = table_fragments.worker_actor_ids(); + self.clean_up(node_actors).await?; + // Drop fragment info in meta store. + self.fragment_manager + .drop_table_fragments_vec(&HashSet::from_iter(std::iter::once( + table_fragments.table_id(), + ))) + .await?; + } + Command::CreateStreamingJob { table_fragments, dispatchers, - table_mview_map, + upstream_mview_actors, init_split_assignment, + .. } => { - let mut dependent_table_actors = Vec::with_capacity(table_mview_map.len()); - for (table_id, actors) in table_mview_map { + let mut dependent_table_actors = Vec::with_capacity(upstream_mview_actors.len()); + for (table_id, actors) in upstream_mview_actors { let downstream_actors = dispatchers .iter() .filter(|(upstream_actor_id, _)| actors.contains(upstream_actor_id)) @@ -530,23 +576,7 @@ where } } } - - let drop_actor_futures = - node_dropped_actors.into_iter().map(|(node_id, actors)| { - let node = self.info.node_map.get(&node_id).unwrap(); - let request_id = Uuid::new_v4().to_string(); - - async move { - let client = self.client_pool.get(node).await?; - let request = DropActorsRequest { - request_id, - actor_ids: actors.to_owned(), - }; - client.drop_actors(request).await - } - }); - - try_join_all(drop_actor_futures).await?; + self.clean_up(node_dropped_actors).await?; // Update fragment info after rescheduling in meta store. self.fragment_manager diff --git a/src/meta/src/barrier/info.rs b/src/meta/src/barrier/info.rs index d6ce33e0b1e37..0f19171246727 100644 --- a/src/meta/src/barrier/info.rs +++ b/src/meta/src/barrier/info.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/barrier/mod.rs b/src/meta/src/barrier/mod.rs index bf21a8c063239..b570fb91f8186 100644 --- a/src/meta/src/barrier/mod.rs +++ b/src/meta/src/barrier/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ use risingwave_common::util::epoch::INVALID_EPOCH; use risingwave_hummock_sdk::{ExtendedSstableInfo, HummockSstableId}; use risingwave_pb::common::worker_node::State::Running; use risingwave_pb::common::WorkerType; +use risingwave_pb::ddl_service::DdlProgress; use risingwave_pb::meta::subscribe_response::{Info, Operation}; use risingwave_pb::meta::table_fragments::actor_status::ActorState; use risingwave_pb::stream_plan::Barrier; @@ -148,6 +149,8 @@ pub struct GlobalBarrierManager { metrics: Arc, pub(crate) env: MetaSrvEnv, + + tracker: Mutex>, } /// Controls the concurrent execution of commands. @@ -215,6 +218,17 @@ where Ok(!self.finished_commands.is_empty()) } + fn cancel_command(&mut self, cancelled_command: TrackingCommand) { + if let Some(index) = self + .command_ctx_queue + .iter() + .position(|x| x.command_ctx.prev_epoch == cancelled_command.context.prev_epoch) + { + self.command_ctx_queue.remove(index); + self.remove_changes(cancelled_command.context.command.changes()); + } + } + /// Before resolving the actors to be sent or collected, we should first record the newly /// created table and added actors into checkpoint control, so that `can_actor_send_or_collect` /// will return `true`. @@ -249,10 +263,6 @@ where fn post_resolve(&mut self, command: &Command) { match command.changes() { CommandChanges::DropTables(tables) => { - assert!( - self.creating_tables.is_disjoint(&tables), - "conflict table in concurrent checkpoint" - ); assert!( self.dropping_tables.is_disjoint(&tables), "duplicated table in concurrent checkpoint" @@ -274,7 +284,8 @@ where /// Barrier can be sent to and collected from an actor if: /// 1. The actor is Running and not being dropped or removed in rescheduling. - /// 2. The actor is Inactive and belongs to a creating MV or adding in rescheduling. + /// 2. The actor is Inactive and belongs to a creating MV or adding in rescheduling and not + /// belongs to a canceling command. fn can_actor_send_or_collect( &self, s: ActorState, @@ -287,7 +298,7 @@ where self.creating_tables.contains(&table_id) || self.adding_actors.contains(&actor_id); match s { - ActorState::Inactive => adding, + ActorState::Inactive => adding && !removing, ActorState::Running => !removing, ActorState::Unspecified => unreachable!(), } @@ -484,7 +495,7 @@ where ); let snapshot_manager = SnapshotManager::new(hummock_manager.clone()).into(); - + let tracker = CreateMviewProgressTracker::new(); Self { interval, enable_recovery, @@ -499,6 +510,7 @@ where source_manager, metrics, env, + tracker: Mutex::new(tracker), } } @@ -525,7 +537,6 @@ where /// Start an infinite loop to take scheduled barriers and send them. async fn run(&self, mut shutdown_rx: Receiver<()>) { - let mut tracker = CreateMviewProgressTracker::new(); let mut state = BarrierManagerState::create(self.env.meta_store()).await; if self.enable_recovery { // handle init, here we simply trigger a recovery process to achieve the consistency. We @@ -576,7 +587,6 @@ where prev_epoch, result, &mut state, - &mut tracker, &mut checkpoint_control, ) .await; @@ -748,9 +758,9 @@ where }); let result = try_join_all(collect_futures).await; - barrier_complete_tx + let _ = barrier_complete_tx .send((prev_epoch, result.map_err(Into::into))) - .unwrap(); + .inspect_err(|err| tracing::warn!("failed to complete barrier: {err}")); } /// Changes the state to `Complete`, and try to commit all epoch that state is `Complete` in @@ -760,7 +770,6 @@ where prev_epoch: u64, result: MetaResult>, state: &mut BarrierManagerState, - tracker: &mut CreateMviewProgressTracker, checkpoint_control: &mut CheckpointControl, ) { if let Err(err) = result { @@ -769,7 +778,7 @@ where fail_point!("inject_barrier_err_success"); let fail_node = checkpoint_control.barrier_failed(); tracing::warn!("Failed to complete epoch {}: {:?}", prev_epoch, err); - self.do_recovery(err, fail_node, state, tracker, checkpoint_control) + self.do_recovery(err, fail_node, state, checkpoint_control) .await; return; } @@ -779,10 +788,7 @@ where let (mut index, mut err_msg) = (0, None); for (i, node) in complete_nodes.iter_mut().enumerate() { assert!(matches!(node.state, Completed(_))); - if let Err(err) = self - .complete_barrier(node, tracker, checkpoint_control) - .await - { + if let Err(err) = self.complete_barrier(node, checkpoint_control).await { index = i; err_msg = Some(err); break; @@ -793,7 +799,7 @@ where let fail_nodes = complete_nodes .drain(index..) .chain(checkpoint_control.barrier_failed().into_iter()); - self.do_recovery(err, fail_nodes, state, tracker, checkpoint_control) + self.do_recovery(err, fail_nodes, state, checkpoint_control) .await; } } @@ -803,7 +809,6 @@ where err: MetaError, fail_nodes: impl IntoIterator>, state: &mut BarrierManagerState, - tracker: &mut CreateMviewProgressTracker, checkpoint_control: &mut CheckpointControl, ) { checkpoint_control.clear_changes(); @@ -821,7 +826,12 @@ where if self.enable_recovery { // If failed, enter recovery mode. self.set_status(BarrierManagerStatus::Recovering).await; + let mut tracker = self.tracker.lock().await; *tracker = CreateMviewProgressTracker::new(); + self.snapshot_manager + .unpin_all() + .await + .expect("unpin meta's snapshots"); let new_epoch = self.recovery(state.in_flight_prev_epoch).await; state.in_flight_prev_epoch = new_epoch; state @@ -838,7 +848,6 @@ where async fn complete_barrier( &self, node: &mut EpochNode, - tracker: &mut CreateMviewProgressTracker, checkpoint_control: &mut CheckpointControl, ) -> MetaResult<()> { let prev_epoch = node.command_ctx.prev_epoch.0; @@ -847,7 +856,7 @@ where // We must ensure all epochs are committed in ascending order, // because the storage engine will query from new to old in the order in which // the L0 layer files are generated. - // See https://github.com/singularity-data/risingwave/issues/1251 + // See https://github.com/risingwave-labs/risingwave/issues/1251 let checkpoint = node.command_ctx.checkpoint; let (sst_to_worker, synced_ssts) = collect_synced_ssts(resps); // hummock_manager commit epoch. @@ -888,17 +897,31 @@ where notifier.notify_collected(); }); + // Save `cancelled_command` for Create MVs. + let actors_to_cancel = node.command_ctx.actors_to_cancel(); + let cancelled_command = if !actors_to_cancel.is_empty() { + let mut tracker = self.tracker.lock().await; + tracker.find_cancelled_command(actors_to_cancel) + } else { + None + }; + // Save `finished_commands` for Create MVs. let finished_commands = { let mut commands = vec![]; - if let Some(command) = tracker.add(TrackingCommand { - context: node.command_ctx.clone(), - notifiers, - }) { + let version_stats = self.hummock_manager.get_version_stats().await; + let mut tracker = self.tracker.lock().await; + if let Some(command) = tracker.add( + TrackingCommand { + context: node.command_ctx.clone(), + notifiers, + }, + &version_stats, + ) { commands.push(command); } for progress in resps.iter().flat_map(|r| &r.create_mview_progress) { - if let Some(command) = tracker.update(progress) { + if let Some(command) = tracker.update(progress, &version_stats) { commands.push(command); } } @@ -917,6 +940,10 @@ where self.scheduled_barriers.force_checkpoint_in_next_barrier(); } + if let Some(command) = cancelled_command { + checkpoint_control.cancel_command(command); + } + node.timer.take().unwrap().observe_duration(); node.wait_commit_timer.take().unwrap().observe_duration(); @@ -951,6 +978,10 @@ where info } + + pub async fn get_ddl_progress(&self) -> Vec { + self.tracker.lock().await.gen_ddl_progress() + } } pub type BarrierManagerRef = Arc>; diff --git a/src/meta/src/barrier/notifier.rs b/src/meta/src/barrier/notifier.rs index 75b57984163a7..640886079854c 100644 --- a/src/meta/src/barrier/notifier.rs +++ b/src/meta/src/barrier/notifier.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/barrier/progress.rs b/src/meta/src/barrier/progress.rs index 54cbf821e91c7..5d07f073b8310 100644 --- a/src/meta/src/barrier/progress.rs +++ b/src/meta/src/barrier/progress.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,23 +13,29 @@ // limitations under the License. use std::collections::hash_map::Entry; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::sync::Arc; +use itertools::Itertools; +use risingwave_common::catalog::TableId; use risingwave_common::util::epoch::Epoch; +use risingwave_pb::ddl_service::DdlProgress; +use risingwave_pb::hummock::HummockVersionStats; use risingwave_pb::stream_service::barrier_complete_response::CreateMviewProgress; use super::command::CommandContext; use super::notifier::Notifier; +use crate::barrier::Command; use crate::model::ActorId; use crate::storage::MetaStore; type CreateMviewEpoch = Epoch; +type ConsumedRows = u64; #[derive(Clone, Copy)] enum ChainState { ConsumingSnapshot, - ConsumingUpstream(Epoch), + ConsumingUpstream(Epoch, ConsumedRows), Done, } @@ -38,11 +44,32 @@ struct Progress { states: HashMap, done_count: usize, + + /// From 0 to 1. + progress: f64, + + /// Creating mv id. + creating_mv_id: TableId, + + /// Upstream mv count. Keep track of how many times each upstream MV appears. + upstream_mv_count: HashMap, + + /// Upstream mvs total key count. + upstream_total_key_count: u64, + + /// DDL definition + definition: String, } impl Progress { /// Create a [`Progress`] for some creating mview, with all `actors` containing the chain nodes. - fn new(actors: impl IntoIterator) -> Self { + fn new( + actors: impl IntoIterator, + creating_mv_id: TableId, + upstream_mv_count: HashMap, + upstream_total_key_count: u64, + definition: String, + ) -> Self { let states = actors .into_iter() .map(|a| (a, ChainState::ConsumingSnapshot)) @@ -52,13 +79,19 @@ impl Progress { Self { states, done_count: 0, + progress: 0.0, + creating_mv_id, + upstream_mv_count, + upstream_total_key_count, + definition, } } /// Update the progress of `actor`. - fn update(&mut self, actor: ActorId, new_state: ChainState) { + fn update(&mut self, actor: ActorId, new_state: ChainState, upstream_total_key_count: u64) { + self.upstream_total_key_count = upstream_total_key_count; match self.states.get_mut(&actor).unwrap() { - state @ (ChainState::ConsumingSnapshot | ChainState::ConsumingUpstream(_)) => { + state @ (ChainState::ConsumingSnapshot | ChainState::ConsumingUpstream(_, _)) => { if matches!(new_state, ChainState::Done) { self.done_count += 1; } @@ -66,6 +99,7 @@ impl Progress { } ChainState::Done => panic!("should not report done multiple times"), } + self.calculate_progress(); } /// Returns whether all chains are done. @@ -78,6 +112,35 @@ impl Progress { fn actors(&self) -> impl Iterator + '_ { self.states.keys().cloned() } + + /// `progress` = `done_ratio` + (1 - `done_ratio`) * (`consumed_rows` / `remaining_rows`). + fn calculate_progress(&mut self) -> f64 { + if self.states.is_empty() { + return 1.0; + } + let done_ratio: f64 = (self.done_count) as f64 / self.states.len() as f64; + let mut remaining_rows = self.upstream_total_key_count as f64 * (1_f64 - done_ratio); + if remaining_rows == 0.0 { + remaining_rows = 1.0; + } + let consumed_rows: u64 = self + .states + .values() + .map(|x| match x { + ChainState::ConsumingUpstream(_, rows) => *rows, + _ => 0, + }) + .sum(); + let calculate_progress = + done_ratio + (1_f64 - done_ratio) * consumed_rows as f64 / remaining_rows; + if self.progress < calculate_progress { + self.progress = calculate_progress; + if self.progress > 1.0 { + self.progress = 1.0; + } + } + self.progress + } } /// The command tracking by the [`CreateMviewProgressTracker`]. @@ -107,10 +170,46 @@ impl CreateMviewProgressTracker { } } + pub fn gen_ddl_progress(&self) -> Vec { + self.progress_map + .values() + .map(|(x, _)| DdlProgress { + id: x.creating_mv_id.table_id as u64, + statement: x.definition.clone(), + progress: format!("{:.2}%", x.progress * 100.0), + }) + .collect() + } + + /// Try to find the target create-streaming-job command from track. + /// + /// Return the target command as it should be cancelled based on the input actors. + pub fn find_cancelled_command( + &mut self, + actors_to_cancel: HashSet, + ) -> Option> { + let epochs = actors_to_cancel + .into_iter() + .map(|actor_id| self.actor_map.get(&actor_id)) + .collect_vec(); + assert!(epochs.iter().all_equal()); + // If the target command found in progress map, return and remove it. Note that the command + // should have finished if not found. + if let Some(Some(epoch)) = epochs.first() { + Some(self.progress_map.remove(epoch).unwrap().1) + } else { + None + } + } + /// Add a new create-mview DDL command to track. /// /// If the actors to track is empty, return the given command as it can be finished immediately. - pub fn add(&mut self, command: TrackingCommand) -> Option> { + pub fn add( + &mut self, + command: TrackingCommand, + version_stats: &HummockVersionStats, + ) -> Option> { let actors = command.context.actors_to_track(); if actors.is_empty() { // The command can be finished immediately. @@ -122,7 +221,54 @@ impl CreateMviewProgressTracker { self.actor_map.insert(actor, ddl_epoch); } - let progress = Progress::new(actors); + let (creating_mv_id, upstream_mv_count, upstream_total_key_count, definition) = + if let Command::CreateStreamingJob { + table_fragments, + dispatchers, + upstream_mview_actors, + definition, + .. + } = &command.context.command + { + // Keep track of how many times each upstream MV appears. + let mut upstream_mv_count = HashMap::new(); + for (table_id, actors) in upstream_mview_actors { + assert!(!actors.is_empty()); + let dispatch_count: usize = dispatchers + .iter() + .filter(|(upstream_actor_id, _)| actors.contains(upstream_actor_id)) + .map(|(_, v)| v.len()) + .sum(); + upstream_mv_count.insert(*table_id, dispatch_count / actors.len()); + } + + let upstream_total_key_count: u64 = upstream_mv_count + .iter() + .map(|(upstream_mv, count)| { + *count as u64 + * version_stats + .table_stats + .get(&upstream_mv.table_id) + .map_or(0, |stat| stat.total_key_count as u64) + }) + .sum(); + ( + table_fragments.table_id(), + upstream_mv_count, + upstream_total_key_count, + definition.to_string(), + ) + } else { + unreachable!("Must be CreateStreamingJob."); + }; + + let progress = Progress::new( + actors, + creating_mv_id, + upstream_mv_count, + upstream_total_key_count, + definition, + ); let old = self.progress_map.insert(ddl_epoch, (progress, command)); assert!(old.is_none()); None @@ -131,7 +277,11 @@ impl CreateMviewProgressTracker { /// Update the progress of `actor` according to the Prost struct. /// /// If all actors in this MV have finished, returns the command. - pub fn update(&mut self, progress: &CreateMviewProgress) -> Option> { + pub fn update( + &mut self, + progress: &CreateMviewProgress, + version_stats: &HummockVersionStats, + ) -> Option> { let actor = progress.chain_actor_id; let Some(epoch) = self.actor_map.get(&actor).copied() else { panic!("no tracked progress for actor {}, is it already finished?", actor); @@ -140,13 +290,26 @@ impl CreateMviewProgressTracker { let new_state = if progress.done { ChainState::Done } else { - ChainState::ConsumingUpstream(progress.consumed_epoch.into()) + ChainState::ConsumingUpstream(progress.consumed_epoch.into(), progress.consumed_rows) }; match self.progress_map.entry(epoch) { Entry::Occupied(mut o) => { let progress = &mut o.get_mut().0; - progress.update(actor, new_state); + + let upstream_total_key_count: u64 = progress + .upstream_mv_count + .iter() + .map(|(upstream_mv, count)| { + *count as u64 + * version_stats + .table_stats + .get(&upstream_mv.table_id) + .map_or(0, |stat| stat.total_key_count as u64) + }) + .sum(); + + progress.update(actor, new_state, upstream_total_key_count); if progress.is_done() { tracing::debug!("all actors done for creating mview with epoch {}!", epoch); @@ -162,7 +325,7 @@ impl CreateMviewProgressTracker { } Entry::Vacant(_) => { tracing::warn!( - "update the progress of an inexistent create-mview DDL: {progress:?}" + "update the progress of an non-existent creating streaming job: {progress:?}, which could be cancelled" ); None } diff --git a/src/meta/src/barrier/recovery.rs b/src/meta/src/barrier/recovery.rs index e93d2043af988..0b99ea6f0fd28 100644 --- a/src/meta/src/barrier/recovery.rs +++ b/src/meta/src/barrier/recovery.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -44,9 +44,9 @@ where S: MetaStore, { // Retry base interval in milliseconds. - const RECOVERY_RETRY_BASE_INTERVAL: u64 = 100; + const RECOVERY_RETRY_BASE_INTERVAL: u64 = 20; // Retry max interval. - const RECOVERY_RETRY_MAX_INTERVAL: Duration = Duration::from_secs(10); + const RECOVERY_RETRY_MAX_INTERVAL: Duration = Duration::from_secs(5); #[inline(always)] /// Initialize a retry strategy for operation in recovery. @@ -88,17 +88,14 @@ where // unregister compaction group for dirty table fragments. let _ = self.hummock_manager - .unregister_table_ids( - &to_drop_streaming_ids - .iter() - .map(|t| t.table_id) - .collect_vec(), + .unregister_table_fragments_vec( + &to_drop_table_fragments ) .await.inspect_err(|e| tracing::warn!( - "Failed to unregister compaction group for {:#?}.\nThey will be cleaned up on node restart.\n{:#?}", - to_drop_streaming_ids, - e) + "Failed to unregister compaction group for {:#?}. They will be cleaned up on node restart. {:#?}", + to_drop_table_fragments, + e) ); // clean up source connector dirty changes. diff --git a/src/meta/src/barrier/schedule.rs b/src/meta/src/barrier/schedule.rs index e629f7426a4c1..5dc62ed57618a 100644 --- a/src/meta/src/barrier/schedule.rs +++ b/src/meta/src/barrier/schedule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ use std::sync::Arc; use std::time::Instant; use anyhow::anyhow; +use risingwave_common::catalog::TableId; use risingwave_pb::hummock::HummockSnapshot; use tokio::sync::{oneshot, watch, RwLock}; @@ -96,6 +97,24 @@ impl BarrierScheduler { } } + /// Try to cancel scheduled cmd for create streaming job, return true if cancelled. + pub async fn try_cancel_scheduled_create(&self, table_id: TableId) -> bool { + let mut queue = self.inner.queue.write().await; + if let Some(idx) = queue.iter().position(|scheduled| { + if let Command::CreateStreamingJob {table_fragments, ..} = &scheduled.command + && table_fragments.table_id() == table_id { + true + } else { + false + } + }) { + queue.remove(idx).unwrap(); + true + } else { + false + } + } + /// Attach `new_notifiers` to the very first scheduled barrier. If there's no one scheduled, a /// default barrier will be created. If `new_checkpoint` is true, the barrier will become a /// checkpoint. diff --git a/src/meta/src/barrier/snapshot.rs b/src/meta/src/barrier/snapshot.rs index b1cad655acb9e..bf3ba08b8a63a 100644 --- a/src/meta/src/barrier/snapshot.rs +++ b/src/meta/src/barrier/snapshot.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -82,4 +82,11 @@ impl SnapshotManager { Ok(()) } + + pub async fn unpin_all(&self) -> MetaResult<()> { + let mut snapshots = self.snapshots.lock().await; + self.hummock_manager.unpin_snapshot(META_NODE_ID).await?; + snapshots.clear(); + Ok(()) + } } diff --git a/src/meta/src/dashboard/mod.rs b/src/meta/src/dashboard/mod.rs index 566dd5317ddac..8a3d5498d587a 100644 --- a/src/meta/src/dashboard/mod.rs +++ b/src/meta/src/dashboard/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/dashboard/prometheus.rs b/src/meta/src/dashboard/prometheus.rs index 50abe1d6fff3f..b5e40d6f7160e 100644 --- a/src/meta/src/dashboard/prometheus.rs +++ b/src/meta/src/dashboard/prometheus.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/dashboard/proxy.rs b/src/meta/src/dashboard/proxy.rs index c4a1c4859c24c..4cfc25b05378a 100644 --- a/src/meta/src/dashboard/proxy.rs +++ b/src/meta/src/dashboard/proxy.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/error.rs b/src/meta/src/error.rs index 150dcd7afa627..f2c1eba0845c2 100644 --- a/src/meta/src/error.rs +++ b/src/meta/src/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -58,6 +58,12 @@ enum MetaErrorInner { #[error("Election failed: {0}")] Election(etcd_client::Error), + #[error("Cancelled: {0}")] + Cancelled(String), + + #[error("SystemParams error: {0}")] + SystemParams(String), + #[error(transparent)] Internal(anyhow::Error), } @@ -116,9 +122,17 @@ impl MetaError { MetaErrorInner::Duplicated(relation, name.into()).into() } + pub fn system_param(s: T) -> Self { + MetaErrorInner::SystemParams(s.to_string()).into() + } + pub fn unavailable(s: String) -> Self { MetaErrorInner::Unavailable(s).into() } + + pub fn cancelled(s: String) -> Self { + MetaErrorInner::Cancelled(s).into() + } } impl From for MetaError { @@ -160,6 +174,7 @@ impl From for tonic::Status { MetaErrorInner::CatalogIdNotFound(_, _) => tonic::Status::not_found(err.to_string()), MetaErrorInner::Duplicated(_, _) => tonic::Status::already_exists(err.to_string()), MetaErrorInner::Unavailable(_) => tonic::Status::unavailable(err.to_string()), + MetaErrorInner::Cancelled(_) => tonic::Status::cancelled(err.to_string()), _ => tonic::Status::internal(err.to_string()), } } diff --git a/src/meta/src/hummock/compaction/compaction_config.rs b/src/meta/src/hummock/compaction/compaction_config.rs index d87899b39e152..8bd2b29e54b29 100644 --- a/src/meta/src/hummock/compaction/compaction_config.rs +++ b/src/meta/src/hummock/compaction/compaction_config.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ const DEFAULT_TARGET_FILE_SIZE_BASE: u64 = 32 * 1024 * 1024; // 32MB const DEFAULT_MAX_SUB_COMPACTION: u32 = 4; const MAX_LEVEL: u64 = 6; const DEFAULT_LEVEL_MULTIPLIER: u64 = 5; +const DEFAULT_MAX_SPACE_RECLAIM_BYTES: u64 = 512 * 1024 * 1024; // 512MB; pub struct CompactionConfigBuilder { config: CompactionConfig, @@ -59,6 +60,7 @@ impl CompactionConfigBuilder { | CompactionFilterFlag::TTL) .into(), max_sub_compaction: DEFAULT_MAX_SUB_COMPACTION, + max_space_reclaim_bytes: DEFAULT_MAX_SPACE_RECLAIM_BYTES, }, } } @@ -102,4 +104,5 @@ builder_field! { compression_algorithm: Vec, compaction_filter_mask: u32, max_sub_compaction: u32, + max_space_reclaim_bytes: u64, } diff --git a/src/meta/src/hummock/compaction/level_selector.rs b/src/meta/src/hummock/compaction/level_selector.rs index 66d1d3d54d5b6..71885539f015c 100644 --- a/src/meta/src/hummock/compaction/level_selector.rs +++ b/src/meta/src/hummock/compaction/level_selector.rs @@ -1,4 +1,5 @@ -// Copyright 2023 Singularity Data +use std::collections::HashMap; +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,31 +17,36 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). - use std::sync::Arc; use risingwave_hummock_sdk::HummockCompactionTaskId; use risingwave_pb::hummock::hummock_version::Levels; -use risingwave_pb::hummock::CompactionConfig; +use risingwave_pb::hummock::{compact_task, CompactionConfig}; -use crate::hummock::compaction::compaction_config::CompactionConfigBuilder; -use crate::hummock::compaction::min_overlap_compaction_picker::MinOverlappingPicker; +use super::picker::{ + SpaceReclaimCompactionPicker, SpaceReclaimPickerState, TtlPickerState, + TtlReclaimCompactionPicker, +}; +use super::{ + create_compaction_task, LevelCompactionPicker, ManualCompactionOption, ManualCompactionPicker, + TierCompactionPicker, +}; use crate::hummock::compaction::overlap_strategy::OverlapStrategy; use crate::hummock::compaction::{ - create_overlap_strategy, CompactionInput, CompactionPicker, CompactionTask, - LevelCompactionPicker, LocalPickerStatistic, LocalSelectorStatistic, TierCompactionPicker, + create_overlap_strategy, CompactionPicker, CompactionTask, LocalPickerStatistic, + LocalSelectorStatistic, MinOverlappingPicker, }; use crate::hummock::level_handler::LevelHandler; +use crate::hummock::model::CompactionGroup; use crate::rpc::metrics::MetaMetrics; const SCORE_BASE: u64 = 100; pub trait LevelSelector: Sync + Send { - fn need_compaction(&self, levels: &Levels, level_handlers: &[LevelHandler]) -> bool; - fn pick_compaction( - &self, + &mut self, task_id: HummockCompactionTaskId, + group: &CompactionGroup, levels: &Levels, level_handlers: &mut [LevelHandler], selector_stats: &mut LocalSelectorStatistic, @@ -49,6 +55,8 @@ pub trait LevelSelector: Sync + Send { fn report_statistic_metrics(&self, _metrics: &MetaMetrics) {} fn name(&self) -> &'static str; + + fn task_type(&self) -> compact_task::TaskType; } #[derive(Default)] @@ -63,63 +71,39 @@ pub struct SelectContext { pub score_levels: Vec<(u64, usize, usize)>, } -pub struct LevelSelectorCore { +pub struct DynamicLevelSelectorCore { config: Arc, - overlap_strategy: Arc, -} - -pub struct DynamicLevelSelector { - inner: LevelSelectorCore, } -impl Default for DynamicLevelSelector { - fn default() -> Self { - let config = Arc::new(CompactionConfigBuilder::new().build()); - let overlap_strategy = create_overlap_strategy(config.compaction_mode()); - DynamicLevelSelector::new(config, overlap_strategy) - } -} - -impl DynamicLevelSelector { - pub fn new(config: Arc, overlap_strategy: Arc) -> Self { - Self { - inner: LevelSelectorCore::new(config, overlap_strategy), - } - } -} +#[derive(Default)] +pub struct DynamicLevelSelector {} -impl LevelSelectorCore { - pub fn new(config: Arc, overlap_strategy: Arc) -> Self { - Self { - config, - overlap_strategy, - } +impl DynamicLevelSelectorCore { + pub fn new(config: Arc) -> Self { + Self { config } } pub fn get_config(&self) -> &CompactionConfig { self.config.as_ref() } - pub fn get_overlap_strategy(&self) -> Arc { - self.overlap_strategy.clone() - } - fn create_compaction_picker( &self, select_level: usize, target_level: usize, + overlap_strategy: Arc, ) -> Box { if select_level == 0 { if target_level == 0 { Box::new(TierCompactionPicker::new( self.config.clone(), - self.overlap_strategy.clone(), + overlap_strategy, )) } else { Box::new(LevelCompactionPicker::new( target_level, self.config.clone(), - self.overlap_strategy.clone(), + overlap_strategy, )) } } else { @@ -128,7 +112,7 @@ impl LevelSelectorCore { select_level, target_level, self.config.max_bytes_for_level_base, - self.overlap_strategy.clone(), + overlap_strategy, )) } } @@ -252,61 +236,40 @@ impl LevelSelectorCore { ctx.score_levels.sort_by(|a, b| b.0.cmp(&a.0)); ctx } - - pub fn create_compaction_task( - &self, - input: CompactionInput, - base_level: usize, - ) -> CompactionTask { - let target_file_size = if input.target_level == 0 { - self.config.target_file_size_base - } else { - assert!(input.target_level >= base_level); - let step = (input.target_level - base_level) / 2; - self.config.target_file_size_base << step - }; - let compression_algorithm = if input.target_level == 0 { - self.config.compression_algorithm[0].clone() - } else { - let idx = input.target_level - base_level + 1; - self.config.compression_algorithm[idx].clone() - }; - CompactionTask { - input, - compression_algorithm, - target_file_size, - } - } } impl LevelSelector for DynamicLevelSelector { - fn need_compaction(&self, levels: &Levels, level_handlers: &[LevelHandler]) -> bool { - let ctx = self.inner.get_priority_levels(levels, level_handlers); - ctx.score_levels - .first() - .map(|(score, _, _)| *score > SCORE_BASE) - .unwrap_or(false) - } - fn pick_compaction( - &self, + &mut self, task_id: HummockCompactionTaskId, + compaction_group: &CompactionGroup, levels: &Levels, level_handlers: &mut [LevelHandler], selector_stats: &mut LocalSelectorStatistic, ) -> Option { - let ctx = self.inner.get_priority_levels(levels, level_handlers); + let dynamic_level_core = + DynamicLevelSelectorCore::new(compaction_group.compaction_config.clone()); + let overlap_strategy = + create_overlap_strategy(compaction_group.compaction_config.compaction_mode()); + let ctx = dynamic_level_core.get_priority_levels(levels, level_handlers); for (score, select_level, target_level) in ctx.score_levels { if score <= SCORE_BASE { return None; } - let picker = self - .inner - .create_compaction_picker(select_level, target_level); + let mut picker = dynamic_level_core.create_compaction_picker( + select_level, + target_level, + overlap_strategy.clone(), + ); let mut stats = LocalPickerStatistic::default(); if let Some(ret) = picker.pick_compaction(levels, level_handlers, &mut stats) { ret.add_pending_task(task_id, level_handlers); - return Some(self.inner.create_compaction_task(ret, ctx.base_level)); + return Some(create_compaction_task( + dynamic_level_core.get_config(), + ret, + ctx.base_level, + self.task_type(), + )); } selector_stats .skip_picker @@ -318,6 +281,160 @@ impl LevelSelector for DynamicLevelSelector { fn name(&self) -> &'static str { "DynamicLevelSelector" } + + fn task_type(&self) -> compact_task::TaskType { + compact_task::TaskType::Dynamic + } +} + +pub struct ManualCompactionSelector { + option: ManualCompactionOption, +} + +impl ManualCompactionSelector { + pub fn new(option: ManualCompactionOption) -> Self { + Self { option } + } +} + +impl LevelSelector for ManualCompactionSelector { + fn pick_compaction( + &mut self, + task_id: HummockCompactionTaskId, + group: &CompactionGroup, + levels: &Levels, + level_handlers: &mut [LevelHandler], + _selector_stats: &mut LocalSelectorStatistic, + ) -> Option { + let dynamic_level_core = DynamicLevelSelectorCore::new(group.compaction_config.clone()); + let overlap_strategy = create_overlap_strategy(group.compaction_config.compaction_mode()); + let ctx = dynamic_level_core.calculate_level_base_size(levels); + let (mut picker, base_level) = { + let target_level = if self.option.level == 0 { + ctx.base_level + } else if self.option.level == group.compaction_config.max_level as usize { + self.option.level + } else { + self.option.level + 1 + }; + if self.option.level > 0 && self.option.level < ctx.base_level { + return None; + } + ( + ManualCompactionPicker::new(overlap_strategy, self.option.clone(), target_level), + ctx.base_level, + ) + }; + + let compaction_input = + picker.pick_compaction(levels, level_handlers, &mut LocalPickerStatistic::default())?; + compaction_input.add_pending_task(task_id, level_handlers); + + Some(create_compaction_task( + group.compaction_config.as_ref(), + compaction_input, + base_level, + self.task_type(), + )) + } + + fn name(&self) -> &'static str { + "ManualCompactionSelector" + } + + fn task_type(&self) -> compact_task::TaskType { + compact_task::TaskType::Manual + } +} + +#[derive(Default)] +pub struct SpaceReclaimCompactionSelector { + state: HashMap, +} + +impl LevelSelector for SpaceReclaimCompactionSelector { + fn pick_compaction( + &mut self, + task_id: HummockCompactionTaskId, + group: &CompactionGroup, + levels: &Levels, + level_handlers: &mut [LevelHandler], + _selector_stats: &mut LocalSelectorStatistic, + ) -> Option { + let dynamic_level_core = DynamicLevelSelectorCore::new(group.compaction_config.clone()); + let mut picker = SpaceReclaimCompactionPicker::new( + group.compaction_config.max_space_reclaim_bytes, + levels.member_table_ids.iter().cloned().collect(), + ); + let ctx = dynamic_level_core.calculate_level_base_size(levels); + let state = self + .state + .entry(group.group_id) + .or_insert_with(SpaceReclaimPickerState::default); + let compaction_input = picker.pick_compaction(levels, level_handlers, state)?; + compaction_input.add_pending_task(task_id, level_handlers); + + Some(create_compaction_task( + dynamic_level_core.get_config(), + compaction_input, + ctx.base_level, + self.task_type(), + )) + } + + fn name(&self) -> &'static str { + "SpaceReclaimCompaction" + } + + fn task_type(&self) -> compact_task::TaskType { + compact_task::TaskType::SpaceReclaim + } +} + +#[derive(Default)] +pub struct TtlCompactionSelector { + state: HashMap, +} + +impl LevelSelector for TtlCompactionSelector { + fn pick_compaction( + &mut self, + task_id: HummockCompactionTaskId, + group: &CompactionGroup, + levels: &Levels, + level_handlers: &mut [LevelHandler], + _selector_stats: &mut LocalSelectorStatistic, + ) -> Option { + let dynamic_level_core = DynamicLevelSelectorCore::new(group.compaction_config.clone()); + let ctx = dynamic_level_core.calculate_level_base_size(levels); + let picker = + TtlReclaimCompactionPicker::new(group.compaction_config.max_space_reclaim_bytes); + let state = self + .state + .entry(group.group_id) + .or_insert_with(TtlPickerState::default); + let compaction_input = picker.pick_compaction(levels, level_handlers, state)?; + compaction_input.add_pending_task(task_id, level_handlers); + + Some(create_compaction_task( + group.compaction_config.as_ref(), + compaction_input, + ctx.base_level, + self.task_type(), + )) + } + + fn name(&self) -> &'static str { + "TtlCompaction" + } + + fn task_type(&self) -> compact_task::TaskType { + compact_task::TaskType::Ttl + } +} + +pub fn default_level_selector() -> Box { + Box::::default() } #[cfg(test)] @@ -331,7 +448,6 @@ pub mod tests { use super::*; use crate::hummock::compaction::compaction_config::CompactionConfigBuilder; - use crate::hummock::compaction::overlap_strategy::RangeOverlapStrategy; use crate::hummock::test_utils::iterator_test_key_of_epoch; pub fn push_table_level0_overlapping(levels: &mut Levels, sst: SstableInfo) { @@ -393,6 +509,30 @@ pub mod tests { } } + pub fn generate_table_with_table_ids( + id: u64, + table_prefix: u64, + left: usize, + right: usize, + epoch: u64, + table_ids: Vec, + ) -> SstableInfo { + SstableInfo { + id, + key_range: Some(KeyRange { + left: iterator_test_key_of_epoch(table_prefix, left, epoch), + right: iterator_test_key_of_epoch(table_prefix, right, epoch), + right_exclusive: false, + }), + file_size: (right - left + 1) as u64, + table_ids, + meta_offset: 0, + stale_key_count: 0, + total_key_count: 0, + divide_version: 0, + } + } + pub fn generate_tables( ids: Range, keys: Range, @@ -486,8 +626,7 @@ pub mod tests { .level0_tier_compact_file_number(2) .compaction_mode(CompactionMode::Range as i32) .build(); - let selector = - LevelSelectorCore::new(Arc::new(config), Arc::new(RangeOverlapStrategy::default())); + let selector = DynamicLevelSelectorCore::new(Arc::new(config)); let levels = vec![ generate_level(1, vec![]), generate_level(2, generate_tables(0..5, 0..1000, 3, 10)), @@ -497,6 +636,7 @@ pub mod tests { let mut levels = Levels { levels, l0: Some(generate_l0_nonoverlapping_sublevels(vec![])), + ..Default::default() }; let ctx = selector.calculate_level_base_size(&levels); assert_eq!(ctx.base_level, 2); @@ -558,6 +698,7 @@ pub mod tests { .level0_tier_compact_file_number(4) .compaction_mode(CompactionMode::Range as i32) .build(); + let group_config = CompactionGroup::new(1, config.clone()); let levels = vec![ generate_level(1, vec![]), generate_level(2, generate_tables(0..5, 0..1000, 3, 10)), @@ -572,16 +713,20 @@ pub mod tests { 3, 10, ))), + ..Default::default() }; - let selector = DynamicLevelSelector::new( - Arc::new(config.clone()), - Arc::new(RangeOverlapStrategy::default()), - ); - let mut levels_handlers = (0..5).into_iter().map(LevelHandler::new).collect_vec(); + let mut selector = DynamicLevelSelector::default(); + let mut levels_handlers = (0..5).map(LevelHandler::new).collect_vec(); let mut local_stats = LocalSelectorStatistic::default(); let compaction = selector - .pick_compaction(1, &levels, &mut levels_handlers, &mut local_stats) + .pick_compaction( + 1, + &group_config, + &levels, + &mut levels_handlers, + &mut local_stats, + ) .unwrap(); // trivial move. assert_compaction_task(&compaction, &levels_handlers); @@ -594,17 +739,21 @@ pub mod tests { .max_bytes_for_level_base(100) .compaction_filter_mask(compaction_filter_flag.into()) .build(); - let selector = DynamicLevelSelector::new( - Arc::new(config.clone()), - Arc::new(RangeOverlapStrategy::default()), - ); + let group_config = CompactionGroup::new(1, config.clone()); + let mut selector = DynamicLevelSelector::default(); levels.l0.as_mut().unwrap().sub_levels.clear(); levels.l0.as_mut().unwrap().total_file_size = 0; push_tables_level0_nonoverlapping(&mut levels, generate_tables(15..25, 0..600, 3, 20)); - let mut levels_handlers = (0..5).into_iter().map(LevelHandler::new).collect_vec(); + let mut levels_handlers = (0..5).map(LevelHandler::new).collect_vec(); let compaction = selector - .pick_compaction(1, &levels, &mut levels_handlers, &mut local_stats) + .pick_compaction( + 1, + &group_config, + &levels, + &mut levels_handlers, + &mut local_stats, + ) .unwrap(); assert_compaction_task(&compaction, &levels_handlers); assert_eq!(compaction.input.input_levels[0].level_idx, 0); @@ -616,7 +765,13 @@ pub mod tests { levels.l0.as_mut().unwrap().sub_levels.clear(); levels.levels[1].table_infos = generate_tables(20..30, 0..1000, 3, 10); let compaction = selector - .pick_compaction(2, &levels, &mut levels_handlers, &mut local_stats) + .pick_compaction( + 2, + &group_config, + &levels, + &mut levels_handlers, + &mut local_stats, + ) .unwrap(); assert_compaction_task(&compaction, &levels_handlers); assert_eq!(compaction.input.input_levels[0].level_idx, 3); @@ -630,8 +785,13 @@ pub mod tests { assert_eq!(compaction.compression_algorithm.as_str(), "Lz4",); // no compaction need to be scheduled because we do not calculate the size of pending files // to score. - let compaction = - selector.pick_compaction(2, &levels, &mut levels_handlers, &mut local_stats); + let compaction = selector.pick_compaction( + 2, + &group_config, + &levels, + &mut levels_handlers, + &mut local_stats, + ); assert!(compaction.is_none()); } } diff --git a/src/meta/src/hummock/compaction/mod.rs b/src/meta/src/hummock/compaction/mod.rs index 1a970180fa4e9..2c015e44760a7 100644 --- a/src/meta/src/hummock/compaction/mod.rs +++ b/src/meta/src/hummock/compaction/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,33 +14,34 @@ pub mod compaction_config; mod level_selector; -mod manual_compaction_picker; -mod min_overlap_compaction_picker; mod overlap_strategy; -mod prost_type; -mod tier_compaction_picker; use risingwave_hummock_sdk::prost_key_range::KeyRangeExt; -use risingwave_pb::hummock::compact_task::TaskStatus; -pub use tier_compaction_picker::TierCompactionPicker; -mod base_level_compaction_picker; +use risingwave_pb::hummock::compact_task::{self, TaskStatus}; + +mod picker; use std::collections::{HashMap, HashSet}; use std::fmt::{Debug, Formatter}; use std::sync::Arc; -pub use base_level_compaction_picker::LevelCompactionPicker; +use picker::{ + LevelCompactionPicker, ManualCompactionPicker, MinOverlappingPicker, TierCompactionPicker, +}; use risingwave_hummock_sdk::{CompactionGroupId, HummockCompactionTaskId, HummockEpoch}; use risingwave_pb::hummock::compaction_config::CompactionMode; use risingwave_pb::hummock::hummock_version::Levels; use risingwave_pb::hummock::{CompactTask, CompactionConfig, InputLevel, KeyRange, LevelType}; -use crate::hummock::compaction::level_selector::{DynamicLevelSelector, LevelSelector}; -use crate::hummock::compaction::manual_compaction_picker::ManualCompactionSelector; +pub use crate::hummock::compaction::level_selector::{ + default_level_selector, DynamicLevelSelector, LevelSelector, ManualCompactionSelector, + SpaceReclaimCompactionSelector, TtlCompactionSelector, +}; use crate::hummock::compaction::overlap_strategy::{OverlapStrategy, RangeOverlapStrategy}; use crate::hummock::level_handler::LevelHandler; +use crate::hummock::model::CompactionGroup; use crate::rpc::metrics::MetaMetrics; pub struct CompactStatus { - compaction_group_id: CompactionGroupId, + pub(crate) compaction_group_id: CompactionGroupId, pub(crate) level_handlers: Vec, } @@ -91,6 +92,7 @@ pub struct CompactionTask { pub input: CompactionInput, pub compression_algorithm: String, pub target_file_size: u64, + pub compaction_task_type: compact_task::TaskType, } pub fn create_overlap_strategy(compaction_mode: CompactionMode) -> Arc { @@ -116,27 +118,15 @@ impl CompactStatus { &mut self, levels: &Levels, task_id: HummockCompactionTaskId, - compaction_group_id: CompactionGroupId, - manual_compaction_option: Option, - compaction_config: CompactionConfig, + group: &CompactionGroup, stats: &mut LocalSelectorStatistic, + selector: &mut Box, ) -> Option { // When we compact the files, we must make the result of compaction meet the following // conditions, for any user key, the epoch of it in the file existing in the lower // layer must be larger. - - let ret = if let Some(manual_compaction_option) = manual_compaction_option { - self.manual_pick_compaction( - levels, - task_id, - manual_compaction_option, - compaction_config, - stats, - )? - } else { - self.pick_compaction(levels, task_id, compaction_config, stats)? - }; - + let ret = + selector.pick_compaction(task_id, group, levels, &mut self.level_handlers, stats)?; let target_level_id = ret.input.target_level; let compression_algorithm = match ret.compression_algorithm.as_str() { @@ -156,7 +146,7 @@ impl CompactStatus { // level. gc_delete_keys: target_level_id == self.level_handlers.len() - 1, task_status: TaskStatus::Pending as i32, - compaction_group_id, + compaction_group_id: group.group_id, existing_table_ids: vec![], compression_algorithm, target_file_size: ret.target_file_size, @@ -164,6 +154,7 @@ impl CompactStatus { table_options: HashMap::default(), current_epoch_time: 0, target_sub_level_id: ret.input.target_sub_level_id, + task_type: ret.compaction_task_type as i32, }; Some(compact_task) } @@ -191,36 +182,6 @@ impl CompactStatus { false } - fn pick_compaction( - &mut self, - levels: &Levels, - task_id: HummockCompactionTaskId, - compaction_config: CompactionConfig, - stats: &mut LocalSelectorStatistic, - ) -> Option { - self.create_level_selector(compaction_config) - .pick_compaction(task_id, levels, &mut self.level_handlers, stats) - } - - fn manual_pick_compaction( - &mut self, - levels: &Levels, - task_id: HummockCompactionTaskId, - manual_compaction_option: ManualCompactionOption, - compaction_config: CompactionConfig, - stats: &mut LocalSelectorStatistic, - ) -> Option { - // manual_compaction no need to select level - // level determined by option - let overlap_strategy = create_overlap_strategy(compaction_config.compaction_mode()); - ManualCompactionSelector::new( - Arc::new(compaction_config), - overlap_strategy, - manual_compaction_option, - ) - .pick_compaction(task_id, levels, &mut self.level_handlers, stats) - } - /// Declares a task as either succeeded, failed or canceled. pub fn report_compact_task(&mut self, compact_task: &CompactTask) { for level in &compact_task.input_ssts { @@ -244,21 +205,9 @@ impl CompactStatus { pub fn compaction_group_id(&self) -> CompactionGroupId { self.compaction_group_id } - - /// Creates a level selector. - /// - /// The method should be lightweight because we recreate a level selector everytime so that the - /// latest compaction config is applied to it. - fn create_level_selector(&self, compaction_config: CompactionConfig) -> Box { - let overlap_strategy = create_overlap_strategy(compaction_config.compaction_mode()); - Box::new(DynamicLevelSelector::new( - Arc::new(compaction_config), - overlap_strategy, - )) - } } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub struct ManualCompactionOption { /// Filters out SSTs to pick. Has no effect if empty. pub sst_ids: Vec, @@ -336,9 +285,37 @@ impl LocalSelectorStatistic { pub trait CompactionPicker { fn pick_compaction( - &self, + &mut self, levels: &Levels, level_handlers: &[LevelHandler], stats: &mut LocalPickerStatistic, ) -> Option; } + +pub fn create_compaction_task( + compaction_config: &CompactionConfig, + input: CompactionInput, + base_level: usize, + compaction_task_type: compact_task::TaskType, +) -> CompactionTask { + let target_file_size = if input.target_level == 0 { + compaction_config.target_file_size_base + } else { + assert!(input.target_level >= base_level); + let step = (input.target_level - base_level) / 2; + compaction_config.target_file_size_base << step + }; + let compression_algorithm = if input.target_level == 0 { + compaction_config.compression_algorithm[0].clone() + } else { + let idx = input.target_level - base_level + 1; + compaction_config.compression_algorithm[idx].clone() + }; + + CompactionTask { + input, + compression_algorithm, + target_file_size, + compaction_task_type, + } +} diff --git a/src/meta/src/hummock/compaction/overlap_strategy.rs b/src/meta/src/hummock/compaction/overlap_strategy.rs index 137e00f0e60a5..c3af574c81e1f 100644 --- a/src/meta/src/hummock/compaction/overlap_strategy.rs +++ b/src/meta/src/hummock/compaction/overlap_strategy.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/hummock/compaction/base_level_compaction_picker.rs b/src/meta/src/hummock/compaction/picker/base_level_compaction_picker.rs similarity index 96% rename from src/meta/src/hummock/compaction/base_level_compaction_picker.rs rename to src/meta/src/hummock/compaction/picker/base_level_compaction_picker.rs index 88b1f2f535653..9a2430ef83bc4 100644 --- a/src/meta/src/hummock/compaction/base_level_compaction_picker.rs +++ b/src/meta/src/hummock/compaction/picker/base_level_compaction_picker.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,9 +20,10 @@ use risingwave_pb::hummock::{ CompactionConfig, InputLevel, Level, LevelType, OverlappingLevel, SstableInfo, }; -use crate::hummock::compaction::min_overlap_compaction_picker::MinOverlappingPicker; use crate::hummock::compaction::overlap_strategy::OverlapStrategy; -use crate::hummock::compaction::{CompactionInput, CompactionPicker, LocalPickerStatistic}; +use crate::hummock::compaction::{ + CompactionInput, CompactionPicker, LocalPickerStatistic, MinOverlappingPicker, +}; use crate::hummock::level_handler::LevelHandler; fn cal_file_size(table_infos: &[SstableInfo]) -> u64 { @@ -37,7 +38,7 @@ pub struct LevelCompactionPicker { impl CompactionPicker for LevelCompactionPicker { fn pick_compaction( - &self, + &mut self, levels: &Levels, level_handlers: &[LevelHandler], stats: &mut LocalPickerStatistic, @@ -228,7 +229,7 @@ pub mod tests { #[test] fn test_compact_l0_to_l1() { - let picker = create_compaction_picker_for_test(); + let mut picker = create_compaction_picker_for_test(); let l0 = generate_level( 0, vec![ @@ -250,6 +251,7 @@ pub mod tests { generate_table(0, 1, 301, 400, 1), ], )], + ..Default::default() }; let mut local_stats = LocalPickerStatistic::default(); let mut levels_handler = vec![LevelHandler::new(0), LevelHandler::new(1)]; @@ -314,7 +316,7 @@ pub mod tests { .compaction_mode(CompactionMode::Range as i32) .build(), ); - let picker = + let mut picker = LevelCompactionPicker::new(1, config, Arc::new(RangeOverlapStrategy::default())); let levels = vec![Level { @@ -334,6 +336,7 @@ pub mod tests { sub_levels: vec![], total_file_size: 0, }), + ..Default::default() }; push_tables_level0_nonoverlapping(&mut levels, vec![generate_table(1, 1, 50, 60, 2)]); push_tables_level0_nonoverlapping( @@ -375,7 +378,7 @@ pub mod tests { fn test_l0_to_l1_compact_conflict() { // When picking L0->L1, L0's selecting_key_range should not be overlapped with L0's // compacting_key_range. - let picker = create_compaction_picker_for_test(); + let mut picker = create_compaction_picker_for_test(); let levels = vec![Level { level_idx: 1, level_type: LevelType::Nonoverlapping as i32, @@ -389,6 +392,7 @@ pub mod tests { sub_levels: vec![], total_file_size: 0, }), + ..Default::default() }; push_tables_level0_nonoverlapping( &mut levels, @@ -406,7 +410,7 @@ pub mod tests { ret.add_pending_task(0, &mut levels_handler); push_tables_level0_nonoverlapping(&mut levels, vec![generate_table(3, 1, 250, 300, 3)]); - let picker = + let mut picker = TierCompactionPicker::new(picker.config.clone(), picker.overlap_strategy.clone()); assert!(picker .pick_compaction(&levels, &levels_handler, &mut local_stats) @@ -417,7 +421,7 @@ pub mod tests { fn test_compact_to_l1_concurrently() { // When picking L0->L1, L0's selecting_key_range should not be overlapped with any L1 files // under compaction. - let picker = create_compaction_picker_for_test(); + let mut picker = create_compaction_picker_for_test(); let mut levels = Levels { levels: vec![Level { @@ -430,6 +434,7 @@ pub mod tests { l0: Some(generate_l0_nonoverlapping_sublevels(vec![generate_table( 1, 1, 160, 280, 2, )])), + ..Default::default() }; let mut levels_handler = vec![LevelHandler::new(0), LevelHandler::new(1)]; @@ -472,7 +477,7 @@ pub mod tests { fn test_compacting_key_range_overlap_intra_l0() { // When picking L0->L0, L0's selecting_key_range should not be overlapped with L0's // compacting_key_range. - let picker = create_compaction_picker_for_test(); + let mut picker = create_compaction_picker_for_test(); let mut levels = Levels { levels: vec![Level { @@ -486,6 +491,7 @@ pub mod tests { generate_table(1, 1, 100, 210, 2), generate_table(2, 1, 200, 250, 2), ])), + ..Default::default() }; let mut levels_handler = vec![LevelHandler::new(0), LevelHandler::new(1)]; @@ -508,7 +514,7 @@ pub mod tests { .level0_tier_compact_file_number(2) .max_compaction_bytes(1000) .build(); - let picker = LevelCompactionPicker::new( + let mut picker = LevelCompactionPicker::new( 1, Arc::new(config), Arc::new(RangeOverlapStrategy::default()), @@ -527,6 +533,7 @@ pub mod tests { sub_level_id: 0, }], l0: Some(generate_l0_nonoverlapping_sublevels(vec![])), + ..Default::default() }; push_tables_level0_nonoverlapping( &mut levels, @@ -567,7 +574,7 @@ pub mod tests { #[test] fn test_skip_compact_write_amplification_limit() { - let picker = create_compaction_picker_for_test(); + let mut picker = create_compaction_picker_for_test(); let mut levels = Levels { levels: vec![Level { level_idx: 1, @@ -581,6 +588,7 @@ pub mod tests { sub_level_id: 0, }], l0: Some(generate_l0_nonoverlapping_sublevels(vec![])), + ..Default::default() }; push_tables_level0_nonoverlapping( &mut levels, @@ -615,6 +623,7 @@ pub mod tests { let levels = Levels { l0: Some(l0), levels: vec![generate_level(1, vec![generate_table(3, 1, 0, 100000, 1)])], + ..Default::default() }; let levels_handler = vec![LevelHandler::new(0), LevelHandler::new(1)]; @@ -626,7 +635,7 @@ pub mod tests { ); // Only include sub-level 0 results will violate MAX_WRITE_AMPLIFICATION. // So all sub-levels are included to make write amplification < MAX_WRITE_AMPLIFICATION. - let picker = + let mut picker = LevelCompactionPicker::new(1, config, Arc::new(RangeOverlapStrategy::default())); let ret = picker .pick_compaction(&levels, &levels_handler, &mut local_stats) @@ -651,7 +660,7 @@ pub mod tests { .max_compaction_bytes(50000) .build(), ); - let picker = + let mut picker = LevelCompactionPicker::new(1, config, Arc::new(RangeOverlapStrategy::default())); let ret = picker .pick_compaction(&levels, &levels_handler, &mut local_stats) @@ -688,6 +697,7 @@ pub mod tests { let levels = Levels { l0: Some(l0), levels: vec![generate_level(1, vec![generate_table(3, 1, 0, 100000, 1)])], + ..Default::default() }; let mut levels_handler = vec![LevelHandler::new(0), LevelHandler::new(1)]; let mut local_stats = LocalPickerStatistic::default(); @@ -717,7 +727,7 @@ pub mod tests { // Only include sub-level 0 results will violate MAX_WRITE_AMPLIFICATION. // But stopped by pending sub-level when trying to include more sub-levels. - let picker = LevelCompactionPicker::new( + let mut picker = LevelCompactionPicker::new( 1, config.clone(), Arc::new(RangeOverlapStrategy::default()), @@ -732,7 +742,7 @@ pub mod tests { } // No more pending sub-level so we can get a task now. - let picker = + let mut picker = LevelCompactionPicker::new(1, config, Arc::new(RangeOverlapStrategy::default())); picker .pick_compaction(&levels, &levels_handler, &mut local_stats) diff --git a/src/meta/src/hummock/compaction/manual_compaction_picker.rs b/src/meta/src/hummock/compaction/picker/manual_compaction_picker.rs similarity index 90% rename from src/meta/src/hummock/compaction/manual_compaction_picker.rs rename to src/meta/src/hummock/compaction/picker/manual_compaction_picker.rs index 9b58c315c41f6..a511dc30f6be6 100644 --- a/src/meta/src/hummock/compaction/manual_compaction_picker.rs +++ b/src/meta/src/hummock/compaction/picker/manual_compaction_picker.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,18 +17,14 @@ use std::sync::Arc; use itertools::Itertools; use risingwave_hummock_sdk::compaction_group::hummock_version_ext::HummockLevelsExt; -use risingwave_hummock_sdk::HummockCompactionTaskId; use risingwave_pb::hummock::hummock_version::Levels; -use risingwave_pb::hummock::{ - CompactionConfig, InputLevel, Level, LevelType, OverlappingLevel, SstableInfo, -}; +use risingwave_pb::hummock::{InputLevel, Level, LevelType, OverlappingLevel, SstableInfo}; -use super::overlap_strategy::OverlapInfo; -use crate::hummock::compaction::level_selector::{LevelSelector, LevelSelectorCore}; -use crate::hummock::compaction::overlap_strategy::{OverlapStrategy, RangeOverlapInfo}; +use crate::hummock::compaction::overlap_strategy::{ + OverlapInfo, OverlapStrategy, RangeOverlapInfo, +}; use crate::hummock::compaction::{ - CompactionInput, CompactionPicker, CompactionTask, LocalPickerStatistic, - LocalSelectorStatistic, ManualCompactionOption, + CompactionInput, CompactionPicker, LocalPickerStatistic, ManualCompactionOption, }; use crate::hummock::level_handler::LevelHandler; @@ -130,6 +126,7 @@ impl ManualCompactionPicker { if level_handlers[0].is_level_pending_compact(level) { return None; } + // Pick this sub_level. max_sub_level_idx = idx; } @@ -212,7 +209,7 @@ impl ManualCompactionPicker { impl CompactionPicker for ManualCompactionPicker { fn pick_compaction( - &self, + &mut self, levels: &Levels, level_handlers: &[LevelHandler], _stats: &mut LocalPickerStatistic, @@ -320,72 +317,11 @@ impl CompactionPicker for ManualCompactionPicker { } } -pub struct ManualCompactionSelector { - inner: LevelSelectorCore, - option: ManualCompactionOption, -} - -impl ManualCompactionSelector { - pub fn new( - config: Arc, - overlap_strategy: Arc, - option: ManualCompactionOption, - ) -> Self { - Self { - inner: LevelSelectorCore::new(config, overlap_strategy), - option, - } - } -} - -impl LevelSelector for ManualCompactionSelector { - fn need_compaction(&self, levels: &Levels, _: &[LevelHandler]) -> bool { - let ctx = self.inner.calculate_level_base_size(levels); - if self.option.level > 0 && self.option.level < ctx.base_level { - return false; - } - true - } - - fn pick_compaction( - &self, - task_id: HummockCompactionTaskId, - levels: &Levels, - level_handlers: &mut [LevelHandler], - _selector_stats: &mut LocalSelectorStatistic, - ) -> Option { - let ctx = self.inner.calculate_level_base_size(levels); - let target_level = if self.option.level == 0 { - ctx.base_level - } else if self.option.level == self.inner.get_config().max_level as usize { - self.option.level - } else { - self.option.level + 1 - }; - if self.option.level > 0 && self.option.level < ctx.base_level { - return None; - } - let picker = ManualCompactionPicker::new( - self.inner.get_overlap_strategy(), - self.option.clone(), - target_level, - ); - - let ret = - picker.pick_compaction(levels, level_handlers, &mut LocalPickerStatistic::default())?; - ret.add_pending_task(task_id, level_handlers); - Some(self.inner.create_compaction_task(ret, ctx.base_level)) - } - - fn name(&self) -> &'static str { - "ManualCompactionSelector" - } -} - #[cfg(test)] pub mod tests { use std::collections::HashSet; + use risingwave_pb::hummock::compact_task; pub use risingwave_pb::hummock::{KeyRange, Level, LevelType}; use super::*; @@ -394,7 +330,10 @@ pub mod tests { assert_compaction_task, generate_l0_nonoverlapping_sublevels, generate_l0_overlapping_sublevels, generate_level, generate_table, }; + use crate::hummock::compaction::level_selector::{LevelSelector, ManualCompactionSelector}; use crate::hummock::compaction::overlap_strategy::RangeOverlapStrategy; + use crate::hummock::compaction::LocalSelectorStatistic; + use crate::hummock::model::CompactionGroup; use crate::hummock::test_utils::iterator_test_key_of_epoch; fn clean_task_state(level_handler: &mut LevelHandler) { @@ -458,6 +397,7 @@ pub mod tests { let mut levels = Levels { levels, l0: Some(generate_l0_nonoverlapping_sublevels(vec![])), + ..Default::default() }; let mut levels_handler = vec![ LevelHandler::new(0), @@ -479,7 +419,7 @@ pub mod tests { }; let target_level = option.level + 1; - let picker = ManualCompactionPicker::new( + let mut picker = ManualCompactionPicker::new( Arc::new(RangeOverlapStrategy::default()), option, target_level, @@ -500,7 +440,7 @@ pub mod tests { // test all key range let option = ManualCompactionOption::default(); let target_level = option.level + 1; - let picker = ManualCompactionPicker::new( + let mut picker = ManualCompactionPicker::new( Arc::new(RangeOverlapStrategy::default()), option, target_level, @@ -532,7 +472,7 @@ pub mod tests { }; let target_level = option.level + 1; - let picker = ManualCompactionPicker::new( + let mut picker = ManualCompactionPicker::new( Arc::new(RangeOverlapStrategy::default()), option, target_level, @@ -572,7 +512,7 @@ pub mod tests { }; let target_level = option.level + 1; - let picker = ManualCompactionPicker::new( + let mut picker = ManualCompactionPicker::new( Arc::new(RangeOverlapStrategy::default()), option, target_level, @@ -643,6 +583,7 @@ pub mod tests { let levels = Levels { levels, l0: Some(l0), + ..Default::default() }; let levels_handler = vec![ @@ -670,6 +611,7 @@ pub mod tests { let levels = Levels { levels, l0: Some(l0), + ..Default::default() }; let levels_handler = vec![LevelHandler::new(0), LevelHandler::new(1)]; @@ -689,6 +631,7 @@ pub mod tests { let levels = Levels { levels, l0: Some(l0), + ..Default::default() }; let levels_handler = vec![LevelHandler::new(0), LevelHandler::new(1)]; let option = ManualCompactionOption { @@ -701,7 +644,7 @@ pub mod tests { }, internal_table_id: HashSet::default(), }; - let picker = + let mut picker = ManualCompactionPicker::new(Arc::new(RangeOverlapStrategy::default()), option, 0); assert!(picker .pick_compaction( @@ -727,7 +670,7 @@ pub mod tests { }, internal_table_id: HashSet::default(), }; - let picker = ManualCompactionPicker::new( + let mut picker = ManualCompactionPicker::new( Arc::new(RangeOverlapStrategy::default()), option.clone(), 0, @@ -738,7 +681,7 @@ pub mod tests { .is_none()); // pick_l0_to_base_level - let picker = + let mut picker = ManualCompactionPicker::new(Arc::new(RangeOverlapStrategy::default()), option, 1); let mut expected = vec![vec![5, 6], vec![7, 8], vec![9, 10]]; expected.reverse(); @@ -774,7 +717,7 @@ pub mod tests { }, internal_table_id: HashSet::default(), }; - let picker = + let mut picker = ManualCompactionPicker::new(Arc::new(RangeOverlapStrategy::default()), option, 1); let mut expected = vec![vec![5, 6], vec![7, 8]]; expected.reverse(); @@ -825,7 +768,7 @@ pub mod tests { }, internal_table_id: HashSet::default(), }; - let picker = ManualCompactionPicker::new( + let mut picker = ManualCompactionPicker::new( Arc::new(RangeOverlapStrategy::default()), option.clone(), // l0 to l0 will ignore target_level @@ -867,7 +810,7 @@ pub mod tests { // No matching internal table id. internal_table_id: HashSet::from([100]), }; - let picker = ManualCompactionPicker::new( + let mut picker = ManualCompactionPicker::new( Arc::new(RangeOverlapStrategy::default()), option, target_level, @@ -889,7 +832,7 @@ pub mod tests { // Include all sub level's table ids internal_table_id: HashSet::from([1, 2, 3]), }; - let picker = ManualCompactionPicker::new( + let mut picker = ManualCompactionPicker::new( Arc::new(RangeOverlapStrategy::default()), option, target_level, @@ -933,7 +876,7 @@ pub mod tests { // Only include bottom sub level's table id internal_table_id: HashSet::from([3]), }; - let picker = ManualCompactionPicker::new( + let mut picker = ManualCompactionPicker::new( Arc::new(RangeOverlapStrategy::default()), option, target_level, @@ -977,7 +920,7 @@ pub mod tests { // picked. internal_table_id: HashSet::from([1]), }; - let picker = ManualCompactionPicker::new( + let mut picker = ManualCompactionPicker::new( Arc::new(RangeOverlapStrategy::default()), option, target_level, @@ -1020,7 +963,7 @@ pub mod tests { // Only include bottom sub level's table id internal_table_id: HashSet::from([3]), }; - let picker = ManualCompactionPicker::new( + let mut picker = ManualCompactionPicker::new( Arc::new(RangeOverlapStrategy::default()), option, target_level, @@ -1053,7 +996,7 @@ pub mod tests { // No matching internal table id. internal_table_id: HashSet::from([100]), }; - let picker = ManualCompactionPicker::new( + let mut picker = ManualCompactionPicker::new( Arc::new(RangeOverlapStrategy::default()), option, target_level, @@ -1076,7 +1019,7 @@ pub mod tests { // Only include partial input level's table id internal_table_id: HashSet::from([1]), }; - let picker = ManualCompactionPicker::new( + let mut picker = ManualCompactionPicker::new( Arc::new(RangeOverlapStrategy::default()), option, target_level, @@ -1127,7 +1070,7 @@ pub mod tests { }, internal_table_id: HashSet::default(), }; - let picker = ManualCompactionPicker::new( + let mut picker = ManualCompactionPicker::new( Arc::new(RangeOverlapStrategy::default()), option.clone(), input_level + 1, @@ -1173,7 +1116,7 @@ pub mod tests { }, internal_table_id: HashSet::default(), }; - let picker = ManualCompactionPicker::new( + let mut picker = ManualCompactionPicker::new( Arc::new(RangeOverlapStrategy::default()), option.clone(), *input_level as _, @@ -1201,7 +1144,8 @@ pub mod tests { #[test] fn test_manual_compaction_selector_l0() { - let config = Arc::new(CompactionConfigBuilder::new().max_level(4).build()); + let config = CompactionConfigBuilder::new().max_level(4).build(); + let group_config = CompactionGroup::new(1, config); let l0 = generate_l0_nonoverlapping_sublevels(vec![ generate_table(0, 1, 0, 500, 1), generate_table(1, 1, 0, 500, 1), @@ -1227,8 +1171,9 @@ pub mod tests { let levels = Levels { levels, l0: Some(l0), + ..Default::default() }; - let mut levels_handler = (0..5).into_iter().map(LevelHandler::new).collect_vec(); + let mut levels_handler = (0..5).map(LevelHandler::new).collect_vec(); let mut local_stats = LocalSelectorStatistic::default(); // pick_l0_to_sub_level @@ -1243,13 +1188,15 @@ pub mod tests { internal_table_id: HashSet::default(), level: 0, }; - let selector = ManualCompactionSelector::new( - config.clone(), - Arc::new(RangeOverlapStrategy::default()), - option, - ); + let mut selector = ManualCompactionSelector::new(option); let task = selector - .pick_compaction(1, &levels, &mut levels_handler, &mut local_stats) + .pick_compaction( + 1, + &group_config, + &levels, + &mut levels_handler, + &mut local_stats, + ) .unwrap(); assert_compaction_task(&task, &levels_handler); assert_eq!(task.input.input_levels.len(), 2); @@ -1276,13 +1223,15 @@ pub mod tests { internal_table_id: HashSet::default(), level: 0, }; - let selector = ManualCompactionSelector::new( - config, - Arc::new(RangeOverlapStrategy::default()), - option, - ); + let mut selector = ManualCompactionSelector::new(option); let task = selector - .pick_compaction(2, &levels, &mut levels_handler, &mut local_stats) + .pick_compaction( + 2, + &group_config, + &levels, + &mut levels_handler, + &mut local_stats, + ) .unwrap(); assert_compaction_task(&task, &levels_handler); assert_eq!(task.input.input_levels.len(), 3); @@ -1296,7 +1245,8 @@ pub mod tests { /// tests `DynamicLevelSelector::manual_pick_compaction` #[test] fn test_manual_compaction_selector() { - let config = Arc::new(CompactionConfigBuilder::new().max_level(4).build()); + let config = CompactionConfigBuilder::new().max_level(4).build(); + let group_config = CompactionGroup::new(1, config); let l0 = generate_l0_nonoverlapping_sublevels(vec![]); assert_eq!(l0.sub_levels.len(), 0); let levels = vec![ @@ -1316,6 +1266,9 @@ pub mod tests { generate_table(2, 1, 0, 100, 1), generate_table(3, 1, 101, 200, 1), generate_table(4, 1, 222, 300, 1), + generate_table(5, 1, 333, 400, 1), + generate_table(6, 1, 444, 500, 1), + generate_table(7, 1, 555, 600, 1), ], total_file_size: 0, sub_level_id: 0, @@ -1325,8 +1278,9 @@ pub mod tests { let levels = Levels { levels, l0: Some(l0), + ..Default::default() }; - let mut levels_handler = (0..5).into_iter().map(LevelHandler::new).collect_vec(); + let mut levels_handler = (0..5).map(LevelHandler::new).collect_vec(); let mut local_stats = LocalSelectorStatistic::default(); // pick l3 -> l4 @@ -1341,13 +1295,15 @@ pub mod tests { internal_table_id: HashSet::default(), level: 3, }; - let selector = ManualCompactionSelector::new( - config.clone(), - Arc::new(RangeOverlapStrategy::default()), - option, - ); + let mut selector = ManualCompactionSelector::new(option); let task = selector - .pick_compaction(1, &levels, &mut levels_handler, &mut local_stats) + .pick_compaction( + 1, + &group_config, + &levels, + &mut levels_handler, + &mut local_stats, + ) .unwrap(); assert_compaction_task(&task, &levels_handler); assert_eq!(task.input.input_levels.len(), 2); @@ -1376,22 +1332,27 @@ pub mod tests { internal_table_id: HashSet::default(), level: 4, }; - let selector = ManualCompactionSelector::new( - config, - Arc::new(RangeOverlapStrategy::default()), - option, - ); - + let mut selector = ManualCompactionSelector::new(option); let task = selector - .pick_compaction(1, &levels, &mut levels_handler, &mut local_stats) + .pick_compaction( + 1, + &group_config, + &levels, + &mut levels_handler, + &mut local_stats, + ) .unwrap(); assert_compaction_task(&task, &levels_handler); assert_eq!(task.input.input_levels.len(), 2); assert_eq!(task.input.input_levels[0].level_idx, 4); - assert_eq!(task.input.input_levels[0].table_infos.len(), 3); + assert_eq!(task.input.input_levels[0].table_infos.len(), 6); assert_eq!(task.input.input_levels[1].level_idx, 4); assert_eq!(task.input.input_levels[1].table_infos.len(), 0); assert_eq!(task.input.target_level, 4); + assert!(matches!( + task.compaction_task_type, + compact_task::TaskType::Manual + )); } } } diff --git a/src/meta/src/hummock/compaction/min_overlap_compaction_picker.rs b/src/meta/src/hummock/compaction/picker/min_overlap_compaction_picker.rs similarity index 97% rename from src/meta/src/hummock/compaction/min_overlap_compaction_picker.rs rename to src/meta/src/hummock/compaction/picker/min_overlap_compaction_picker.rs index c8adb27222931..058f3f111e7d0 100644 --- a/src/meta/src/hummock/compaction/min_overlap_compaction_picker.rs +++ b/src/meta/src/hummock/compaction/picker/min_overlap_compaction_picker.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,9 +18,8 @@ use risingwave_hummock_sdk::compaction_group::hummock_version_ext::HummockLevels use risingwave_pb::hummock::hummock_version::Levels; use risingwave_pb::hummock::{InputLevel, LevelType, SstableInfo}; -use super::CompactionPicker; use crate::hummock::compaction::overlap_strategy::OverlapStrategy; -use crate::hummock::compaction::{CompactionInput, LocalPickerStatistic}; +use crate::hummock::compaction::{CompactionInput, CompactionPicker, LocalPickerStatistic}; use crate::hummock::level_handler::LevelHandler; pub struct MinOverlappingPicker { @@ -97,7 +96,7 @@ impl MinOverlappingPicker { impl CompactionPicker for MinOverlappingPicker { fn pick_compaction( - &self, + &mut self, levels: &Levels, level_handlers: &[LevelHandler], stats: &mut LocalPickerStatistic, @@ -143,7 +142,7 @@ pub mod tests { #[test] fn test_compact_l1() { - let picker = + let mut picker = MinOverlappingPicker::new(1, 2, 10000, Arc::new(RangeOverlapStrategy::default())); let levels = vec![ Level { @@ -174,6 +173,7 @@ pub mod tests { let levels = Levels { levels, l0: Some(generate_l0_nonoverlapping_sublevels(vec![])), + ..Default::default() }; let mut level_handlers = vec![ LevelHandler::new(0), @@ -219,7 +219,7 @@ pub mod tests { #[test] fn test_expand_l1_files() { - let picker = + let mut picker = MinOverlappingPicker::new(1, 2, 10000, Arc::new(RangeOverlapStrategy::default())); let levels = vec![ Level { @@ -247,6 +247,7 @@ pub mod tests { let levels = Levels { levels, l0: Some(generate_l0_nonoverlapping_sublevels(vec![])), + ..Default::default() }; let levels_handler = vec![ LevelHandler::new(0), diff --git a/src/meta/src/hummock/compaction/picker/mod.rs b/src/meta/src/hummock/compaction/picker/mod.rs new file mode 100644 index 0000000000000..cb68a097363b8 --- /dev/null +++ b/src/meta/src/hummock/compaction/picker/mod.rs @@ -0,0 +1,27 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod base_level_compaction_picker; +mod manual_compaction_picker; +mod min_overlap_compaction_picker; +mod space_reclaim_compaction_picker; +mod tier_compaction_picker; +mod ttl_reclaim_compaction_picker; + +pub use base_level_compaction_picker::LevelCompactionPicker; +pub use manual_compaction_picker::ManualCompactionPicker; +pub use min_overlap_compaction_picker::MinOverlappingPicker; +pub use space_reclaim_compaction_picker::{SpaceReclaimCompactionPicker, SpaceReclaimPickerState}; +pub use tier_compaction_picker::TierCompactionPicker; +pub use ttl_reclaim_compaction_picker::{TtlPickerState, TtlReclaimCompactionPicker}; diff --git a/src/meta/src/hummock/compaction/picker/space_reclaim_compaction_picker.rs b/src/meta/src/hummock/compaction/picker/space_reclaim_compaction_picker.rs new file mode 100644 index 0000000000000..bdb4937ce4da4 --- /dev/null +++ b/src/meta/src/hummock/compaction/picker/space_reclaim_compaction_picker.rs @@ -0,0 +1,312 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashSet; + +use risingwave_pb::hummock::hummock_version::Levels; +use risingwave_pb::hummock::{InputLevel, SstableInfo}; + +use crate::hummock::compaction::CompactionInput; +use crate::hummock::level_handler::LevelHandler; + +pub struct SpaceReclaimCompactionPicker { + // config + pub max_space_reclaim_bytes: u64, + pub all_table_ids: HashSet, +} + +#[derive(Default)] +pub struct SpaceReclaimPickerState { + pub last_select_index: usize, +} + +impl SpaceReclaimCompactionPicker { + pub fn new(max_space_reclaim_bytes: u64, all_table_ids: HashSet) -> Self { + Self { + max_space_reclaim_bytes, + all_table_ids, + } + } + + fn filter(&self, sst: &SstableInfo) -> bool { + let table_id_in_sst = sst.table_ids.iter().cloned().collect::>(); + // it means all the table exist , so we not need to pick this sst + table_id_in_sst + .iter() + .all(|id| self.all_table_ids.contains(id)) + } +} + +impl SpaceReclaimCompactionPicker { + pub fn pick_compaction( + &mut self, + levels: &Levels, + level_handlers: &[LevelHandler], + state: &mut SpaceReclaimPickerState, + ) -> Option { + assert!(!levels.levels.is_empty()); + let reclaimed_level = levels.levels.last().unwrap(); + let mut select_input_ssts = vec![]; + let level_handler = &level_handlers[reclaimed_level.level_idx as usize]; + + if state.last_select_index >= reclaimed_level.table_infos.len() { + state.last_select_index = 0; + } + + let start_indedx = state.last_select_index; + let mut select_file_size = 0; + + for sst in &reclaimed_level.table_infos[start_indedx..] { + state.last_select_index += 1; + if level_handler.is_pending_compact(&sst.id) || self.filter(sst) { + continue; + } + + select_input_ssts.push(sst.clone()); + select_file_size += sst.file_size; + if select_file_size > self.max_space_reclaim_bytes { + break; + } + } + + if select_input_ssts.is_empty() { + return None; + } + + Some(CompactionInput { + input_levels: vec![ + InputLevel { + level_idx: reclaimed_level.level_idx, + level_type: reclaimed_level.level_type, + table_infos: select_input_ssts, + }, + InputLevel { + level_idx: reclaimed_level.level_idx, + level_type: reclaimed_level.level_type, + table_infos: vec![], + }, + ], + target_level: reclaimed_level.level_idx as usize, + target_sub_level_id: 0, + }) + } +} + +#[cfg(test)] +mod test { + + use itertools::Itertools; + use risingwave_pb::hummock::compact_task; + pub use risingwave_pb::hummock::{KeyRange, Level, LevelType}; + + use super::*; + use crate::hummock::compaction::compaction_config::CompactionConfigBuilder; + use crate::hummock::compaction::level_selector::tests::{ + assert_compaction_task, generate_l0_nonoverlapping_sublevels, generate_level, + generate_table_with_table_ids, + }; + use crate::hummock::compaction::level_selector::SpaceReclaimCompactionSelector; + use crate::hummock::compaction::{LevelSelector, LocalSelectorStatistic}; + use crate::hummock::model::CompactionGroup; + + #[test] + fn test_space_reclaim_compaction_selector() { + let max_space_reclaim_bytes = 400; + let config = CompactionConfigBuilder::new() + .max_level(4) + .max_space_reclaim_bytes(max_space_reclaim_bytes) + .build(); + let group_config = CompactionGroup::new(1, config); + + let l0 = generate_l0_nonoverlapping_sublevels(vec![]); + assert_eq!(l0.sub_levels.len(), 0); + let levels = vec![ + generate_level(1, vec![]), + generate_level(2, vec![]), + generate_level( + 3, + vec![ + generate_table_with_table_ids(0, 1, 150, 151, 1, vec![0]), + generate_table_with_table_ids(1, 1, 250, 251, 1, vec![1]), + ], + ), + Level { + level_idx: 4, + level_type: LevelType::Nonoverlapping as i32, + table_infos: vec![ + generate_table_with_table_ids(2, 1, 0, 100, 1, vec![2]), + generate_table_with_table_ids(3, 1, 101, 200, 1, vec![3]), + generate_table_with_table_ids(4, 1, 222, 300, 1, vec![4]), + generate_table_with_table_ids(5, 1, 333, 400, 1, vec![5]), + generate_table_with_table_ids(6, 1, 444, 500, 1, vec![6]), + generate_table_with_table_ids(7, 1, 555, 600, 1, vec![7]), + generate_table_with_table_ids(8, 1, 666, 700, 1, vec![8]), + generate_table_with_table_ids(9, 1, 777, 800, 1, vec![9]), + generate_table_with_table_ids(10, 1, 888, 900, 1, vec![10]), + ], + total_file_size: 0, + sub_level_id: 0, + }, + ]; + assert_eq!(levels.len(), 4); + let mut levels = Levels { + levels, + l0: Some(l0), + ..Default::default() + }; + let mut levels_handler = (0..5).map(LevelHandler::new).collect_vec(); + let mut local_stats = LocalSelectorStatistic::default(); + + let mut selector = SpaceReclaimCompactionSelector::default(); + { + // pick space reclaim + let task = selector + .pick_compaction( + 1, + &group_config, + &levels, + &mut levels_handler, + &mut local_stats, + ) + .unwrap(); + assert_compaction_task(&task, &levels_handler); + assert_eq!(task.input.input_levels.len(), 2); + assert_eq!(task.input.input_levels[0].level_idx, 4); + assert_eq!(task.input.input_levels[0].table_infos.len(), 5); + + let mut start_id = 2; + for sst in &task.input.input_levels[0].table_infos { + assert_eq!(start_id, sst.id); + start_id += 1; + } + + assert_eq!(task.input.input_levels[1].level_idx, 4); + assert_eq!(task.input.input_levels[1].table_infos.len(), 0); + assert_eq!(task.input.target_level, 4); + assert!(matches!( + task.compaction_task_type, + compact_task::TaskType::SpaceReclaim + )); + + // in this case, no files is pending, so it limit by max_space_reclaim_bytes + let select_file_size: u64 = task.input.input_levels[0] + .table_infos + .iter() + .map(|sst| sst.file_size) + .sum(); + assert!(select_file_size > max_space_reclaim_bytes); + } + + { + for level_handler in &mut levels_handler { + for pending_task_id in &level_handler.pending_tasks_ids() { + level_handler.remove_task(*pending_task_id); + } + } + + // pick space reclaim + let task = selector + .pick_compaction( + 1, + &group_config, + &levels, + &mut levels_handler, + &mut local_stats, + ) + .unwrap(); + assert_compaction_task(&task, &levels_handler); + assert_eq!(task.input.input_levels.len(), 2); + assert_eq!(task.input.input_levels[0].level_idx, 4); + + // test select index, picker will select file from last_select_index + let all_file_count = levels.get_levels().last().unwrap().get_table_infos().len(); + assert_eq!( + task.input.input_levels[0].table_infos.len(), + all_file_count - 5 + ); + + let mut start_id = 7; + for sst in &task.input.input_levels[0].table_infos { + assert_eq!(start_id, sst.id); + start_id += 1; + } + + assert_eq!(task.input.input_levels[1].level_idx, 4); + assert_eq!(task.input.input_levels[1].table_infos.len(), 0); + assert_eq!(task.input.target_level, 4); + assert!(matches!( + task.compaction_task_type, + compact_task::TaskType::SpaceReclaim + )); + } + + { + for level_handler in &mut levels_handler { + for pending_task_id in &level_handler.pending_tasks_ids() { + level_handler.remove_task(*pending_task_id); + } + } + + levels.member_table_ids = vec![2, 3, 4, 5, 6, 7, 8, 9, 10]; + // pick space reclaim + let task = selector.pick_compaction( + 1, + &group_config, + &levels, + &mut levels_handler, + &mut local_stats, + ); + assert!(task.is_none()); + } + + { + for level_handler in &mut levels_handler { + for pending_task_id in &level_handler.pending_tasks_ids() { + level_handler.remove_task(*pending_task_id); + } + } + + levels.member_table_ids = vec![2, 3, 4, 5, 6, 7, 8, 9]; + // pick space reclaim + let task = selector + .pick_compaction( + 1, + &group_config, + &levels, + &mut levels_handler, + &mut local_stats, + ) + .unwrap(); + assert_compaction_task(&task, &levels_handler); + assert_eq!(task.input.input_levels.len(), 2); + assert_eq!(task.input.input_levels[0].level_idx, 4); + + assert_eq!(task.input.input_levels[0].table_infos.len(), 1); + + let mut start_id = 10; + for sst in &task.input.input_levels[0].table_infos { + assert_eq!(start_id, sst.id); + start_id += 1; + } + + assert_eq!(task.input.input_levels[1].level_idx, 4); + assert_eq!(task.input.input_levels[1].table_infos.len(), 0); + assert_eq!(task.input.target_level, 4); + assert!(matches!( + task.compaction_task_type, + compact_task::TaskType::SpaceReclaim + )); + } + } +} diff --git a/src/meta/src/hummock/compaction/tier_compaction_picker.rs b/src/meta/src/hummock/compaction/picker/tier_compaction_picker.rs similarity index 83% rename from src/meta/src/hummock/compaction/tier_compaction_picker.rs rename to src/meta/src/hummock/compaction/picker/tier_compaction_picker.rs index f272a5da630d4..4e463e52d5ede 100644 --- a/src/meta/src/hummock/compaction/tier_compaction_picker.rs +++ b/src/meta/src/hummock/compaction/picker/tier_compaction_picker.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,9 +17,10 @@ use std::sync::Arc; use risingwave_pb::hummock::hummock_version::Levels; use risingwave_pb::hummock::{CompactionConfig, InputLevel, LevelType, OverlappingLevel}; -use crate::hummock::compaction::min_overlap_compaction_picker::MinOverlappingPicker; use crate::hummock::compaction::overlap_strategy::OverlapStrategy; -use crate::hummock::compaction::{CompactionInput, CompactionPicker, LocalPickerStatistic}; +use crate::hummock::compaction::{ + CompactionInput, CompactionPicker, LocalPickerStatistic, MinOverlappingPicker, +}; use crate::hummock::level_handler::LevelHandler; pub struct TierCompactionPicker { @@ -67,7 +68,7 @@ impl TierCompactionPicker { let max_compaction_bytes = std::cmp::min( self.config.max_compaction_bytes, - self.config.max_bytes_for_level_base * 2, + self.config.sub_level_max_compaction_bytes, ); let mut compaction_bytes = level.total_file_size; @@ -76,7 +77,7 @@ impl TierCompactionPicker { let mut waiting_enough_files = true; for other in &l0.sub_levels[idx + 1..] { - if compaction_bytes >= max_compaction_bytes { + if compaction_bytes > max_compaction_bytes { waiting_enough_files = false; break; } @@ -121,12 +122,11 @@ impl TierCompactionPicker { // compact task never be trigger. if level.level_type == non_overlapping_type && is_write_amp_large - && waiting_enough_files + && select_level_inputs.len() < self.config.level0_tier_compact_file_number as usize { stats.skip_by_write_amp_limit += 1; continue; } - select_level_inputs.reverse(); return Some(CompactionInput { @@ -194,7 +194,7 @@ impl TierCompactionPicker { impl CompactionPicker for TierCompactionPicker { fn pick_compaction( - &self, + &mut self, levels: &Levels, level_handlers: &[LevelHandler], stats: &mut LocalPickerStatistic, @@ -216,8 +216,9 @@ impl CompactionPicker for TierCompactionPicker { pub mod tests { use std::sync::Arc; + use risingwave_hummock_sdk::compaction_group::hummock_version_ext::new_sub_level; use risingwave_pb::hummock::hummock_version::Levels; - use risingwave_pb::hummock::LevelType; + use risingwave_pb::hummock::{LevelType, OverlappingLevel}; use crate::hummock::compaction::compaction_config::CompactionConfigBuilder; use crate::hummock::compaction::level_selector::tests::{ @@ -243,7 +244,8 @@ pub mod tests { .level0_tier_compact_file_number(2) .build(), ); - let picker = TierCompactionPicker::new(config, Arc::new(RangeOverlapStrategy::default())); + let mut picker = + TierCompactionPicker::new(config, Arc::new(RangeOverlapStrategy::default())); // Cannot trivial move because there is only 1 sub-level. let l0 = generate_l0_overlapping_sublevels(vec![vec![ @@ -253,6 +255,7 @@ pub mod tests { let mut levels = Levels { l0: Some(l0), levels: vec![], + ..Default::default() }; levels.l0.as_mut().unwrap().sub_levels[0].level_type = LevelType::Nonoverlapping as i32; let mut local_stats = LocalPickerStatistic::default(); @@ -272,6 +275,7 @@ pub mod tests { let mut levels = Levels { l0: Some(l0), levels: vec![], + ..Default::default() }; let ret = picker .pick_compaction(&levels, &levels_handler, &mut local_stats) @@ -320,6 +324,7 @@ pub mod tests { let levels = Levels { l0: Some(l0), levels: vec![], + ..Default::default() }; let levels_handler = vec![LevelHandler::new(0)]; let config = Arc::new( @@ -327,7 +332,8 @@ pub mod tests { .level0_tier_compact_file_number(2) .build(), ); - let picker = TierCompactionPicker::new(config, Arc::new(RangeOverlapStrategy::default())); + let mut picker = + TierCompactionPicker::new(config, Arc::new(RangeOverlapStrategy::default())); let mut local_stats = LocalPickerStatistic::default(); let ret = picker .pick_compaction(&levels, &levels_handler, &mut local_stats) @@ -344,6 +350,7 @@ pub mod tests { let empty_level = Levels { l0: Some(generate_l0_overlapping_sublevels(vec![])), levels: vec![], + ..Default::default() }; assert!(picker .pick_compaction(&empty_level, &levels_handler, &mut local_stats) @@ -355,21 +362,22 @@ pub mod tests { let l0 = generate_l0_overlapping_sublevels(vec![ vec![ generate_table(4, 1, 10, 90, 1), - generate_table(5, 1, 210, 220, 1), + generate_table(5, 1, 200, 220, 1), ], - vec![generate_table(6, 1, 0, 100, 1)], - vec![generate_table(7, 1, 0, 100, 1)], + vec![generate_table(6, 1, 1, 100, 1)], + vec![generate_table(7, 1, 1, 100, 1)], ]); let mut levels = Levels { l0: Some(l0), levels: vec![], + ..Default::default() }; levels.l0.as_mut().unwrap().sub_levels[0].level_type = LevelType::Nonoverlapping as i32; let levels_handler = vec![LevelHandler::new(0), LevelHandler::new(1)]; let config = Arc::new( CompactionConfigBuilder::new() .level0_tier_compact_file_number(2) - .sub_level_max_compaction_bytes(1) + .sub_level_max_compaction_bytes(100) .max_compaction_bytes(500000) .build(), ); @@ -377,24 +385,54 @@ pub mod tests { let mut local_stats = LocalPickerStatistic::default(); // sub-level 0 is excluded because it's nonoverlapping and violating // sub_level_max_compaction_bytes. - let picker = - TierCompactionPicker::new(config.clone(), Arc::new(RangeOverlapStrategy::default())); + let mut picker = + TierCompactionPicker::new(config, Arc::new(RangeOverlapStrategy::default())); let ret = picker .pick_compaction(&levels, &levels_handler, &mut local_stats) .unwrap(); assert_eq!(ret.input_levels.len(), 2); assert_eq!(ret.target_level, 0); assert_eq!(ret.target_sub_level_id, 1); + } - // sub-level 0 is included because it's overlapping even if violating - // sub_level_max_compaction_bytes. - levels.l0.as_mut().unwrap().sub_levels[0].level_type = LevelType::Overlapping as i32; - let picker = TierCompactionPicker::new(config, Arc::new(RangeOverlapStrategy::default())); - let ret = picker - .pick_compaction(&levels, &levels_handler, &mut local_stats) - .unwrap(); - assert_eq!(ret.input_levels.len(), 3); - assert_eq!(ret.target_level, 0); - assert_eq!(ret.target_sub_level_id, 0); + #[test] + fn test_write_amp_bug_skip() { + let l1 = new_sub_level( + 1, + LevelType::Nonoverlapping, + vec![ + generate_table(3, 1, 1, 50, 1), + generate_table(4, 1, 51, 100, 1), + ], + ); + let l2 = new_sub_level( + 2, + LevelType::Nonoverlapping, + vec![ + generate_table(3, 1, 1, 50, 1), + generate_table(4, 1, 51, 200, 1), + ], + ); + let levels = Levels { + l0: Some(OverlappingLevel { + total_file_size: l1.total_file_size + l2.total_file_size, + sub_levels: vec![l1, l2], + }), + levels: vec![], + ..Default::default() + }; + let config = Arc::new( + CompactionConfigBuilder::new() + .level0_tier_compact_file_number(4) + .sub_level_max_compaction_bytes(100) + .max_compaction_bytes(500000) + .build(), + ); + let levels_handler = vec![LevelHandler::new(0), LevelHandler::new(1)]; + let mut local_stats = LocalPickerStatistic::default(); + let mut picker = + TierCompactionPicker::new(config, Arc::new(RangeOverlapStrategy::default())); + let ret = picker.pick_compaction(&levels, &levels_handler, &mut local_stats); + assert!(ret.is_none()); } } diff --git a/src/meta/src/hummock/compaction/picker/ttl_reclaim_compaction_picker.rs b/src/meta/src/hummock/compaction/picker/ttl_reclaim_compaction_picker.rs new file mode 100644 index 0000000000000..d04a934a7cdaa --- /dev/null +++ b/src/meta/src/hummock/compaction/picker/ttl_reclaim_compaction_picker.rs @@ -0,0 +1,231 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_pb::hummock::hummock_version::Levels; +use risingwave_pb::hummock::InputLevel; + +use crate::hummock::compaction::CompactionInput; +use crate::hummock::level_handler::LevelHandler; + +#[derive(Default)] +pub struct TtlPickerState { + last_select_index: usize, +} + +pub struct TtlReclaimCompactionPicker { + max_ttl_reclaim_bytes: u64, + // todo: filter table option +} + +impl TtlReclaimCompactionPicker { + pub fn new(max_ttl_reclaim_bytes: u64) -> Self { + Self { + max_ttl_reclaim_bytes, + } + } +} + +impl TtlReclaimCompactionPicker { + pub fn pick_compaction( + &self, + levels: &Levels, + level_handlers: &[LevelHandler], + state: &mut TtlPickerState, + ) -> Option { + assert!(!levels.levels.is_empty()); + let reclaimed_level = levels.levels.last().unwrap(); + let mut select_input_ssts = vec![]; + let level_handler = &level_handlers[reclaimed_level.level_idx as usize]; + + if state.last_select_index >= reclaimed_level.table_infos.len() { + state.last_select_index = 0; + } + + let start_indedx = state.last_select_index; + let mut select_file_size = 0; + + for sst in &reclaimed_level.table_infos[start_indedx..] { + state.last_select_index += 1; + if level_handler.is_pending_compact(&sst.id) { + continue; + } + + select_input_ssts.push(sst.clone()); + select_file_size += sst.file_size; + if select_file_size > self.max_ttl_reclaim_bytes { + break; + } + } + + if select_input_ssts.is_empty() { + return None; + } + + Some(CompactionInput { + input_levels: vec![ + InputLevel { + level_idx: reclaimed_level.level_idx, + level_type: reclaimed_level.level_type, + table_infos: select_input_ssts, + }, + InputLevel { + level_idx: reclaimed_level.level_idx, + level_type: reclaimed_level.level_type, + table_infos: vec![], + }, + ], + target_level: reclaimed_level.level_idx as usize, + target_sub_level_id: 0, + }) + } +} + +#[cfg(test)] +mod test { + + use itertools::Itertools; + use risingwave_pb::hummock::compact_task; + pub use risingwave_pb::hummock::{KeyRange, Level, LevelType}; + + use super::*; + use crate::hummock::compaction::compaction_config::CompactionConfigBuilder; + use crate::hummock::compaction::level_selector::tests::{ + assert_compaction_task, generate_l0_nonoverlapping_sublevels, generate_level, + generate_table, + }; + use crate::hummock::compaction::level_selector::{LevelSelector, TtlCompactionSelector}; + use crate::hummock::compaction::LocalSelectorStatistic; + use crate::hummock::model::CompactionGroup; + + #[test] + fn test_ttl_reclaim_compaction_selector() { + let config = CompactionConfigBuilder::new() + .max_level(4) + .max_space_reclaim_bytes(400) + .build(); + let group_config = CompactionGroup::new(1, config); + let l0 = generate_l0_nonoverlapping_sublevels(vec![]); + assert_eq!(l0.sub_levels.len(), 0); + let levels = vec![ + generate_level(1, vec![]), + generate_level(2, vec![]), + generate_level( + 3, + vec![ + generate_table(0, 1, 150, 151, 1), + generate_table(1, 1, 250, 251, 1), + ], + ), + Level { + level_idx: 4, + level_type: LevelType::Nonoverlapping as i32, + table_infos: vec![ + generate_table(2, 1, 0, 100, 1), + generate_table(3, 1, 101, 200, 1), + generate_table(4, 1, 222, 300, 1), + generate_table(5, 1, 333, 400, 1), + generate_table(6, 1, 444, 500, 1), + generate_table(7, 1, 555, 600, 1), + generate_table(8, 1, 666, 700, 1), + generate_table(9, 1, 777, 800, 1), + generate_table(10, 1, 888, 900, 1), + ], + total_file_size: 0, + sub_level_id: 0, + }, + ]; + assert_eq!(levels.len(), 4); + let levels = Levels { + levels, + l0: Some(l0), + ..Default::default() + }; + let mut levels_handler = (0..5).map(LevelHandler::new).collect_vec(); + let mut local_stats = LocalSelectorStatistic::default(); + let mut selector = TtlCompactionSelector::default(); + { + // pick space reclaim + let task = selector + .pick_compaction( + 1, + &group_config, + &levels, + &mut levels_handler, + &mut local_stats, + ) + .unwrap(); + assert_compaction_task(&task, &levels_handler); + assert_eq!(task.input.input_levels.len(), 2); + assert_eq!(task.input.input_levels[0].level_idx, 4); + assert_eq!(task.input.input_levels[0].table_infos.len(), 5); + + let mut start_id = 2; + for sst in &task.input.input_levels[0].table_infos { + assert_eq!(start_id, sst.id); + start_id += 1; + } + + assert_eq!(task.input.input_levels[1].level_idx, 4); + assert_eq!(task.input.input_levels[1].table_infos.len(), 0); + assert_eq!(task.input.target_level, 4); + assert!(matches!( + task.compaction_task_type, + compact_task::TaskType::Ttl + )); + } + + { + for level_handler in &mut levels_handler { + for pending_task_id in &level_handler.pending_tasks_ids() { + level_handler.remove_task(*pending_task_id); + } + } + + // pick space reclaim + let task = selector + .pick_compaction( + 1, + &group_config, + &levels, + &mut levels_handler, + &mut local_stats, + ) + .unwrap(); + assert_compaction_task(&task, &levels_handler); + assert_eq!(task.input.input_levels.len(), 2); + assert_eq!(task.input.input_levels[0].level_idx, 4); + + // test select index, picker will select file from last_select_index + let all_file_count = levels.get_levels().last().unwrap().get_table_infos().len(); + assert_eq!( + task.input.input_levels[0].table_infos.len(), + all_file_count - 5 + ); + + let mut start_id = 7; + for sst in &task.input.input_levels[0].table_infos { + assert_eq!(start_id, sst.id); + start_id += 1; + } + + assert_eq!(task.input.input_levels[1].level_idx, 4); + assert_eq!(task.input.input_levels[1].table_infos.len(), 0); + assert_eq!(task.input.target_level, 4); + assert!(matches!( + task.compaction_task_type, + compact_task::TaskType::Ttl + )); + } + } +} diff --git a/src/meta/src/hummock/compaction_schedule_policy.rs b/src/meta/src/hummock/compaction_schedule_policy.rs index da095f0b996c1..f49f88e31b38b 100644 --- a/src/meta/src/hummock/compaction_schedule_policy.rs +++ b/src/meta/src/hummock/compaction_schedule_policy.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -369,12 +369,13 @@ mod tests { use risingwave_common::try_match_expand; use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; use risingwave_hummock_sdk::HummockContextId; - use risingwave_pb::hummock::compact_task::TaskStatus; + use risingwave_pb::hummock::compact_task::{self, TaskStatus}; use risingwave_pb::hummock::subscribe_compact_tasks_response::Task; use risingwave_pb::hummock::{CompactTask, CompactTaskAssignment, InputLevel, SstableInfo}; use tokio::sync::mpsc::error::TryRecvError; use crate::hummock::compaction::compaction_config::CompactionConfigBuilder; + use crate::hummock::compaction::default_level_selector; use crate::hummock::compaction_schedule_policy::{ CompactionSchedulePolicy, RoundRobinPolicy, ScoredPolicy, }; @@ -438,6 +439,7 @@ mod tests { table_options: HashMap::default(), current_epoch_time: 0, target_sub_level_id: 0, + task_type: compact_task::TaskType::Dynamic as i32, } } @@ -513,7 +515,10 @@ mod tests { )); let task = hummock_manager - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap() .unwrap(); diff --git a/src/meta/src/hummock/compaction_scheduler.rs b/src/meta/src/hummock/compaction_scheduler.rs index be7b02b6ce252..0beacea7168e6 100644 --- a/src/meta/src/hummock/compaction_scheduler.rs +++ b/src/meta/src/hummock/compaction_scheduler.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::sync::Arc; use std::time::Duration; use parking_lot::Mutex; use risingwave_hummock_sdk::compact::compact_task_to_string; use risingwave_hummock_sdk::CompactionGroupId; -use risingwave_pb::hummock::compact_task::TaskStatus; +use risingwave_pb::hummock::compact_task::{self, TaskStatus}; use risingwave_pb::hummock::subscribe_compact_tasks_response::Task; use risingwave_pb::hummock::CompactTask; use tokio::sync::mpsc::error::SendError; @@ -28,6 +28,9 @@ use tokio::sync::oneshot::Receiver; use tokio::sync::Notify; use super::Compactor; +use crate::hummock::compaction::{ + DynamicLevelSelector, LevelSelector, SpaceReclaimCompactionSelector, TtlCompactionSelector, +}; use crate::hummock::error::Error; use crate::hummock::{CompactorManagerRef, HummockManagerRef}; use crate::manager::{LocalNotification, MetaSrvEnv}; @@ -36,10 +39,12 @@ use crate::storage::MetaStore; pub type CompactionSchedulerRef = Arc>; pub type CompactionRequestChannelRef = Arc; +type CompactionRequestChannelItem = (CompactionGroupId, compact_task::TaskType); + /// [`CompactionRequestChannel`] wrappers a mpsc channel and deduplicate requests from same /// compaction groups. pub struct CompactionRequestChannel { - request_tx: UnboundedSender, + request_tx: UnboundedSender, scheduled: Mutex>, } @@ -53,7 +58,7 @@ pub enum ScheduleStatus { } impl CompactionRequestChannel { - pub fn new(request_tx: UnboundedSender) -> Self { + pub fn new(request_tx: UnboundedSender) -> Self { Self { request_tx, scheduled: Default::default(), @@ -64,12 +69,13 @@ impl CompactionRequestChannel { pub fn try_sched_compaction( &self, compaction_group: CompactionGroupId, - ) -> Result> { + task_type: compact_task::TaskType, + ) -> Result> { let mut guard = self.scheduled.lock(); if guard.contains(&compaction_group) { return Ok(false); } - self.request_tx.send(compaction_group)?; + self.request_tx.send((compaction_group, task_type))?; guard.insert(compaction_group); Ok(true) } @@ -113,7 +119,8 @@ where } pub async fn start(&self, mut shutdown_rx: Receiver<()>) { - let (sched_tx, mut sched_rx) = tokio::sync::mpsc::unbounded_channel::(); + let (sched_tx, mut sched_rx) = + tokio::sync::mpsc::unbounded_channel::(); let sched_channel = Arc::new(CompactionRequestChannel::new(sched_tx)); self.hummock_manager.init_compaction_scheduler( @@ -126,11 +133,19 @@ where self.env.opts.periodic_compaction_interval_sec, )); min_trigger_interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); + + let mut min_space_reclaim_trigger_interval = tokio::time::interval(Duration::from_secs( + self.env.opts.periodic_space_reclaim_compaction_interval_sec, + )); + min_space_reclaim_trigger_interval + .set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); + let mut compaction_selectors = Self::init_selectors(); + loop { - let compaction_group: CompactionGroupId = tokio::select! { - compaction_group = sched_rx.recv() => { - match compaction_group { - Some(compaction_group) => compaction_group, + let (compaction_group, task_type) = tokio::select! { + recv = sched_rx.recv() => { + match recv { + Some((compaction_group, task_type)) => (compaction_group, task_type), None => { tracing::warn!("Compactor Scheduler: The Hummock manager has dropped the connection, it means it has either died or started a new session. Exiting."); @@ -138,6 +153,7 @@ where } } }, + _ = min_trigger_interval.tick() => { // Disable periodic trigger for compaction_deterministic_test. if self.env.opts.compaction_deterministic_test { @@ -145,12 +161,27 @@ where } // Periodically trigger compaction for all compaction groups. for cg_id in self.hummock_manager.compaction_group_ids().await { - if let Err(e) = sched_channel.try_sched_compaction(cg_id) { - tracing::warn!("Failed to schedule compaction for compaction group {}. {}", cg_id, e); + if let Err(e) = sched_channel.try_sched_compaction(cg_id, compact_task::TaskType::Dynamic) { + tracing::warn!("Failed to schedule base compaction for compaction group {}. {}", cg_id, e); } } continue; }, + + _ = min_space_reclaim_trigger_interval.tick() => { + // Disable periodic trigger for compaction_deterministic_test. + if self.env.opts.compaction_deterministic_test { + continue; + } + // Periodically trigger space_reclaim compaction for all compaction groups. + for cg_id in self.hummock_manager.compaction_group_ids().await { + if let Err(e) = sched_channel.try_sched_compaction(cg_id, compact_task::TaskType::SpaceReclaim) { + tracing::warn!("Failed to schedule base compaction for compaction group {}. {}", cg_id, e); + } + } + continue; + } + // Shutdown compactor scheduler _ = &mut shutdown_rx => { break; @@ -174,13 +205,30 @@ where } } }; - - // Pick a task and assign it to this compactor. - self.pick_and_assign(compaction_group, compactor, sched_channel.clone()) + let selector = compaction_selectors.get_mut(&task_type).unwrap(); + self.pick_and_assign(compaction_group, compactor, sched_channel.clone(), selector) .await; } } + fn init_selectors() -> HashMap> { + let mut compaction_selectors: HashMap> = + HashMap::default(); + compaction_selectors.insert( + compact_task::TaskType::Dynamic, + Box::::default(), + ); + compaction_selectors.insert( + compact_task::TaskType::SpaceReclaim, + Box::::default(), + ); + compaction_selectors.insert( + compact_task::TaskType::Ttl, + Box::::default(), + ); + compaction_selectors + } + /// Tries to pick a compaction task, schedule it to a compactor. /// /// Returns true if a task is successfully picked and sent. @@ -189,9 +237,10 @@ where compaction_group: CompactionGroupId, compactor: Arc, sched_channel: Arc, + selector: &mut Box, ) -> ScheduleStatus { let schedule_status = self - .pick_and_assign_impl(compaction_group, compactor, sched_channel) + .pick_and_assign_impl(compaction_group, compactor, sched_channel, selector) .await; let cancel_state = match &schedule_status { @@ -233,12 +282,14 @@ where compaction_group: CompactionGroupId, compactor: Arc, sched_channel: Arc, + selector: &mut Box, ) -> ScheduleStatus { // 1. Pick a compaction task. let compact_task = self .hummock_manager - .get_compact_task(compaction_group) + .get_compact_task(compaction_group, selector) .await; + let compact_task = match compact_task { Ok(Some(compact_task)) => compact_task, Ok(None) => { @@ -305,7 +356,9 @@ where } // 4. Reschedule it with best effort, in case there are more tasks. - if let Err(e) = sched_channel.try_sched_compaction(compaction_group) { + if let Err(e) = + sched_channel.try_sched_compaction(compaction_group, compact_task.task_type()) + { tracing::error!( "Failed to reschedule compaction group {} after sending new task {}. {:#?}", compaction_group, @@ -323,9 +376,11 @@ mod tests { use assert_matches::assert_matches; use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; - use risingwave_hummock_sdk::CompactionGroupId; - use crate::hummock::compaction_scheduler::{CompactionRequestChannel, ScheduleStatus}; + use crate::hummock::compaction::default_level_selector; + use crate::hummock::compaction_scheduler::{ + CompactionRequestChannel, CompactionRequestChannelItem, ScheduleStatus, + }; use crate::hummock::test_utils::{add_ssts, setup_compute_env}; use crate::hummock::CompactionScheduler; @@ -337,7 +392,8 @@ mod tests { let compaction_scheduler = CompactionScheduler::new(env, hummock_manager.clone(), compactor_manager.clone()); - let (request_tx, _request_rx) = tokio::sync::mpsc::unbounded_channel::(); + let (request_tx, _request_rx) = + tokio::sync::mpsc::unbounded_channel::(); let request_channel = Arc::new(CompactionRequestChannel::new(request_tx)); // Add a compactor with invalid context_id. @@ -352,7 +408,8 @@ mod tests { .pick_and_assign( StaticCompactionGroupId::StateDefault.into(), compactor, - request_channel.clone() + request_channel.clone(), + &mut default_level_selector(), ) .await ); @@ -365,7 +422,8 @@ mod tests { .pick_and_assign( StaticCompactionGroupId::StateDefault.into(), compactor, - request_channel.clone() + request_channel.clone(), + &mut default_level_selector(), ) .await, ScheduleStatus::AssignFailure(_) @@ -382,7 +440,8 @@ mod tests { .pick_and_assign( StaticCompactionGroupId::StateDefault.into(), compactor, - request_channel.clone() + request_channel.clone(), + &mut default_level_selector(), ) .await ); @@ -411,7 +470,8 @@ mod tests { .pick_and_assign( StaticCompactionGroupId::StateDefault.into(), compactor, - request_channel.clone() + request_channel.clone(), + &mut default_level_selector(), ) .await ); @@ -437,7 +497,8 @@ mod tests { compactor_manager.clone(), ); - let (request_tx, _request_rx) = tokio::sync::mpsc::unbounded_channel::(); + let (request_tx, _request_rx) = + tokio::sync::mpsc::unbounded_channel::(); let request_channel = Arc::new(CompactionRequestChannel::new(request_tx)); let _sst_infos = add_ssts(1, hummock_manager.as_ref(), context_id).await; @@ -453,7 +514,8 @@ mod tests { .pick_and_assign( StaticCompactionGroupId::StateDefault.into(), compactor, - request_channel.clone() + request_channel.clone(), + &mut default_level_selector(), ) .await ); @@ -468,7 +530,8 @@ mod tests { .pick_and_assign( StaticCompactionGroupId::StateDefault.into(), compactor, - request_channel.clone() + request_channel.clone(), + &mut default_level_selector(), ) .await, ScheduleStatus::AssignFailure(_) @@ -485,7 +548,8 @@ mod tests { .pick_and_assign( StaticCompactionGroupId::StateDefault.into(), compactor, - request_channel.clone() + request_channel.clone(), + &mut default_level_selector(), ) .await, ScheduleStatus::SendFailure(_) @@ -510,7 +574,8 @@ mod tests { .pick_and_assign( StaticCompactionGroupId::StateDefault.into(), compactor, - request_channel.clone() + request_channel.clone(), + &mut default_level_selector(), ) .await, ScheduleStatus::AssignFailure(_) @@ -538,7 +603,8 @@ mod tests { .pick_and_assign( StaticCompactionGroupId::StateDefault.into(), compactor, - request_channel.clone() + request_channel.clone(), + &mut default_level_selector(), ) .await, ScheduleStatus::Ok diff --git a/src/meta/src/hummock/compactor_manager.rs b/src/meta/src/hummock/compactor_manager.rs index 4b60a53021e5f..3425f0ae56db5 100644 --- a/src/meta/src/hummock/compactor_manager.rs +++ b/src/meta/src/hummock/compactor_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -205,6 +205,14 @@ impl CompactorManager { rx } + /// Used when meta exiting to support graceful shutdown. + pub fn abort_all_compactors(&self) { + let mut policy = self.policy.write(); + while let Some(compactor) = policy.next_compactor() { + policy.remove_compactor(compactor.context_id); + } + } + pub fn pause_compactor(&self, context_id: HummockContextId) { let mut policy = self.policy.write(); policy.pause_compactor(context_id); @@ -396,6 +404,7 @@ mod tests { use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; use risingwave_pb::hummock::CompactTaskProgress; + use crate::hummock::compaction::default_level_selector; use crate::hummock::test_utils::{add_ssts, setup_compute_env}; use crate::hummock::CompactorManager; @@ -410,7 +419,10 @@ mod tests { let _receiver = compactor_manager.add_compactor(context_id, 1); let _compactor = hummock_manager.get_idle_compactor().await.unwrap(); let task = hummock_manager - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap() .unwrap(); diff --git a/src/meta/src/hummock/error.rs b/src/meta/src/hummock/error.rs index f8098c8e53443..e4b482867a4f1 100644 --- a/src/meta/src/hummock/error.rs +++ b/src/meta/src/hummock/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,8 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use risingwave_hummock_sdk::compaction_group::StateTableId; -use risingwave_hummock_sdk::{CompactionGroupId, HummockContextId, HummockSstableId}; +use risingwave_hummock_sdk::{HummockContextId, HummockSstableId}; use thiserror::Error; use crate::model::MetadataModelError; @@ -31,10 +30,8 @@ pub enum Error { CompactorUnreachable(HummockContextId), #[error("compaction task {0} already assigned to compactor {1}")] CompactionTaskAlreadyAssigned(u64, HummockContextId), - #[error("compaction group {0} not found")] - InvalidCompactionGroup(CompactionGroupId), - #[error("compaction group member {0} not found")] - InvalidCompactionGroupMember(StateTableId), + #[error("compaction group error {0}")] + CompactionGroup(String), #[error("SST {0} is invalid")] InvalidSst(HummockSstableId), #[error(transparent)] diff --git a/src/meta/src/hummock/level_handler.rs b/src/meta/src/hummock/level_handler.rs index a3d096dc11d5b..42011463ad89e 100644 --- a/src/meta/src/hummock/level_handler.rs +++ b/src/meta/src/hummock/level_handler.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/hummock/manager/compaction.rs b/src/meta/src/hummock/manager/compaction.rs index 51188c02c870e..3f3234c64c3c1 100644 --- a/src/meta/src/hummock/manager/compaction.rs +++ b/src/meta/src/hummock/manager/compaction.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,15 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap}; use function_name::named; use itertools::Itertools; use risingwave_hummock_sdk::{CompactionGroupId, HummockCompactionTaskId, HummockContextId}; -use risingwave_pb::hummock::{CompactTaskAssignment, CompactionConfig}; +use risingwave_pb::hummock::{compact_task, CompactTaskAssignment}; -use crate::hummock::compaction::CompactStatus; -use crate::hummock::error::Result; +use crate::hummock::compaction::{CompactStatus, LevelSelector}; use crate::hummock::manager::read_lock; use crate::hummock::HummockManager; use crate::model::BTreeMapTransaction; @@ -34,6 +33,9 @@ pub struct Compaction { pub compaction_statuses: BTreeMap, pub deterministic_mode: bool, + + pub compaction_selectors: + HashMap>>, } impl Compaction { @@ -41,10 +43,10 @@ impl Compaction { pub fn cancel_assigned_tasks_for_context_ids( &mut self, context_ids: &[HummockContextId], - ) -> Result<( + ) -> ( BTreeMapTransaction<'_, CompactionGroupId, CompactStatus>, BTreeMapTransaction<'_, HummockCompactionTaskId, CompactTaskAssignment>, - )> { + ) { let mut compact_statuses = BTreeMapTransaction::new(&mut self.compaction_statuses); let mut compact_task_assignment = BTreeMapTransaction::new(&mut self.compact_task_assignment); @@ -84,7 +86,7 @@ impl Compaction { compact_task_assignment.remove(task_id); } } - Ok((compact_statuses, compact_task_assignment)) + (compact_statuses, compact_task_assignment) } } @@ -110,16 +112,6 @@ where .count() as u64 } - pub async fn get_compaction_config( - &self, - compaction_group_id: CompactionGroupId, - ) -> CompactionConfig { - self.compaction_group(compaction_group_id) - .await - .expect("compaction group exists") - .compaction_config() - } - #[named] pub async fn list_all_tasks_ids(&self) -> Vec { let compaction = read_lock!(self, compaction).await; @@ -179,9 +171,7 @@ mod tests { ); // irrelevant context id - let (compact_status, assignment) = compaction - .cancel_assigned_tasks_for_context_ids(&[22]) - .unwrap(); + let (compact_status, assignment) = compaction.cancel_assigned_tasks_for_context_ids(&[22]); compact_status.commit_memory(); assignment.commit_memory(); assert_eq!( @@ -203,9 +193,7 @@ mod tests { ); // target context id - let (compact_status, assignment) = compaction - .cancel_assigned_tasks_for_context_ids(&[11]) - .unwrap(); + let (compact_status, assignment) = compaction.cancel_assigned_tasks_for_context_ids(&[11]); compact_status.commit_memory(); assignment.commit_memory(); assert_eq!( diff --git a/src/meta/src/hummock/manager/compaction_group_manager.rs b/src/meta/src/hummock/manager/compaction_group_manager.rs index b86ad830e332e..811a9d5f2ac9e 100644 --- a/src/meta/src/hummock/manager/compaction_group_manager.rs +++ b/src/meta/src/hummock/manager/compaction_group_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,32 +12,43 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{BTreeMap, HashMap}; use std::ops::DerefMut; +use std::sync::Arc; use function_name::named; use itertools::Itertools; -use risingwave_common::catalog::TableOption; +use risingwave_hummock_sdk::compaction_group::hummock_version_ext::{ + build_version_delta_after_version, get_compaction_group_ids, get_compaction_group_sst_ids, + get_member_table_ids, try_get_compaction_group_id_by_table_id, HummockVersionExt, + HummockVersionUpdateExt, +}; use risingwave_hummock_sdk::compaction_group::{StateTableId, StaticCompactionGroupId}; use risingwave_hummock_sdk::CompactionGroupId; +use risingwave_pb::hummock::group_delta::DeltaType; use risingwave_pb::hummock::rise_ctl_update_compaction_config_request::mutable_config::MutableConfig; -use risingwave_pb::hummock::CompactionConfig; -use tokio::sync::RwLock; +use risingwave_pb::hummock::{ + CompactionConfig, CompactionGroupInfo, GroupConstruct, GroupDelta, GroupDestroy, + GroupMetaChange, +}; +use tokio::sync::{OnceCell, RwLock}; -use super::versioning::Versioning; use super::write_lock; use crate::hummock::compaction::compaction_config::CompactionConfigBuilder; -use crate::hummock::compaction_group::CompactionGroup; use crate::hummock::error::{Error, Result}; -use crate::hummock::manager::HummockManager; -use crate::manager::{IdCategory, IdGeneratorManagerRef, MetaSrvEnv}; -use crate::model::{BTreeMapTransaction, MetadataModel, TableFragments, ValTransaction}; +use crate::hummock::manager::{drop_sst, read_lock, HummockManager}; +use crate::hummock::metrics_utils::remove_compaction_group_in_sst_stat; +use crate::hummock::model::CompactionGroup; +use crate::manager::{IdCategory, MetaSrvEnv}; +use crate::model::{ + BTreeMapEntryTransaction, BTreeMapTransaction, MetadataModel, TableFragments, ValTransaction, +}; use crate::storage::{MetaStore, Transaction}; impl HummockManager { pub(super) async fn build_compaction_group_manager( env: &MetaSrvEnv, - ) -> Result>> { + ) -> Result> { let config = CompactionConfigBuilder::new().build(); Self::build_compaction_group_manager_with_config(env, config).await } @@ -45,65 +56,34 @@ impl HummockManager { pub(super) async fn build_compaction_group_manager_with_config( env: &MetaSrvEnv, config: CompactionConfig, - ) -> Result>> { - let id_generator_ref = env.id_gen_manager_ref(); - let compaction_group_manager = RwLock::new(CompactionGroupManagerInner { - id_generator_ref, + ) -> Result> { + let compaction_group_manager = RwLock::new(CompactionGroupManager { compaction_groups: BTreeMap::new(), - index: BTreeMap::new(), + default_config: config, }); compaction_group_manager .write() .await - .init(&config, env.meta_store()) + .init(env.meta_store()) .await?; Ok(compaction_group_manager) } - pub async fn compaction_groups(&self) -> Vec { - self.compaction_group_manager - .read() - .await - .compaction_groups - .values() - .cloned() - .collect_vec() - } - - pub async fn compaction_groups_and_index( + /// Gets compaction group config for `compaction_group_id` if exists, or returns default. + pub async fn get_compaction_group_config( &self, - ) -> ( - Vec, - BTreeMap, - ) { - let compaction_group_manager = self.compaction_group_manager.read().await; - ( - compaction_group_manager - .compaction_groups - .values() - .cloned() - .collect_vec(), - compaction_group_manager.index.clone(), - ) - } - - pub async fn compaction_group_ids(&self) -> Vec { + compaction_group_id: CompactionGroupId, + ) -> CompactionGroup { self.compaction_group_manager .read() .await - .compaction_groups - .values() - .map(|cg| cg.group_id) - .collect_vec() + .get_compaction_group_config(compaction_group_id) } - pub async fn compaction_group(&self, id: CompactionGroupId) -> Option { - self.compaction_group_manager - .read() - .await - .compaction_groups - .get(&id) - .cloned() + /// Should not be called inside [`HummockManager`], because it requests locks internally. + #[named] + pub async fn compaction_group_ids(&self) -> Vec { + get_compaction_group_ids(&read_lock!(self, versioning).await.current_version) } /// Registers `table_fragments` to compaction groups. @@ -116,7 +96,6 @@ impl HummockManager { .get("independent_compaction_group") .map(|s| s == "1") == Some(true); - let table_option = TableOption::build_table_option(table_properties); let mut pairs = vec![]; // materialized_view pairs.push(( @@ -126,7 +105,6 @@ impl HummockManager { } else { CompactionGroupId::from(StaticCompactionGroupId::MaterializedView) }, - table_option, )); // internal states for table_id in table_fragments.internal_table_ids() { @@ -138,134 +116,255 @@ impl HummockManager { } else { CompactionGroupId::from(StaticCompactionGroupId::StateDefault) }, - table_option, )); } - self.register_table_ids(&mut pairs).await + self.register_table_ids(&pairs).await?; + Ok(pairs.iter().map(|(table_id, ..)| *table_id).collect_vec()) } /// Unregisters `table_fragments` from compaction groups - pub async fn unregister_table_fragments(&self, table_fragments: &TableFragments) -> Result<()> { - self.unregister_table_ids(&table_fragments.all_table_ids().collect_vec()) - .await - } - - /// Unregisters stale members and groups - pub async fn purge_stale(&self, table_fragments_list: &[TableFragments]) -> Result<()> { - { - let mut guard = self.compaction_group_manager.write().await; - let registered_members = guard - .compaction_groups - .values() - .flat_map(|cg| cg.member_table_ids.iter()) - .cloned() - .collect_vec(); - let valid_ids = table_fragments_list - .iter() - .flat_map(|table_fragments| table_fragments.all_table_ids()) - .collect_vec(); - let to_unregister = registered_members - .into_iter() - .filter(|table_id| !valid_ids.contains(table_id)) - .collect_vec(); - guard - .unregister(None, &to_unregister, self.env.meta_store()) - .await?; - } - self.purge_stale_groups().await - } - - pub async fn internal_table_ids_by_compaction_group_id( + pub async fn unregister_table_fragments_vec( &self, - compaction_group_id: u64, - ) -> Result> { - let compaction_group_manager = self.compaction_group_manager.read().await; - let table_id_set = - compaction_group_manager.table_ids_by_compaction_group_id(compaction_group_id)?; - Ok(table_id_set) + table_fragments: &[TableFragments], + ) -> Result<()> { + self.unregister_table_ids( + &table_fragments + .iter() + .flat_map(|t| t.all_table_ids()) + .collect_vec(), + ) + .await } + /// Unregisters stale members and groups + /// The caller should ensure [`table_fragments_list`] remain unchanged during [`purge`]. + /// Currently [`purge`] is only called during meta service start ups. #[named] - pub async fn register_table_ids( - &self, - pairs: &mut [(StateTableId, CompactionGroupId, TableOption)], - ) -> Result> { - let mut versioning_guard = write_lock!(self, versioning).await; - let versioning = versioning_guard.deref_mut(); - self.compaction_group_manager - .write() - .await - .register(self, versioning, pairs, self.env.meta_store()) - .await + pub async fn purge(&self, table_fragments_list: &[TableFragments]) -> Result<()> { + let valid_ids = table_fragments_list + .iter() + .flat_map(|table_fragments| table_fragments.all_table_ids()) + .collect_vec(); + let registered_members = + get_member_table_ids(&read_lock!(self, versioning).await.current_version); + let to_unregister = registered_members + .into_iter() + .filter(|table_id| !valid_ids.contains(table_id)) + .collect_vec(); + // As we have released versioning lock, the version that `to_unregister` is calculated from + // may not be the same as the one used in unregister_table_ids. It is OK. + self.unregister_table_ids(&to_unregister).await?; + Ok(()) } + /// Prefer using [`register_table_fragments`]. + /// Use [`register_table_ids`] only when [`TableFragments`] is unavailable. #[named] - pub async fn register_new_group( + pub async fn register_table_ids( &self, - group_id: CompactionGroupId, - group_config: CompactionConfig, - tables: &[(StateTableId, TableOption)], + pairs: &[(StateTableId, CompactionGroupId)], ) -> Result<()> { + if pairs.is_empty() { + return Ok(()); + } let mut versioning_guard = write_lock!(self, versioning).await; let versioning = versioning_guard.deref_mut(); - self.compaction_group_manager - .write() - .await - .register_new_group( - self, - versioning, - group_id, - group_config, - tables, - self.env.meta_store(), - ) - .await - } + let current_version = &versioning.current_version; - #[named] - pub async fn remove_group_by_id(&self, group_id: CompactionGroupId) -> Result<()> { - let mut versioning_guard = write_lock!(self, versioning).await; - let versioning = versioning_guard.deref_mut(); - self.compaction_group_manager - .write() - .await - .remove_group_by_id(self, versioning, group_id, self.env.meta_store()) - .await + for (table_id, _) in pairs.iter() { + if let Some(old_group) = + try_get_compaction_group_id_by_table_id(current_version, *table_id) + { + return Err(Error::CompactionGroup(format!( + "table {} already in group {}", + *table_id, old_group + ))); + } + } + // All NewCompactionGroup pairs are mapped to one new compaction group. + let new_compaction_group_id: OnceCell = OnceCell::new(); + let mut new_version_delta = BTreeMapEntryTransaction::new_insert( + &mut versioning.hummock_version_deltas, + current_version.id + 1, + build_version_delta_after_version(current_version), + ); + + for (table_id, raw_group_id) in pairs.iter() { + let mut group_id = *raw_group_id; + if group_id == StaticCompactionGroupId::NewCompactionGroup as u64 { + let mut is_group_init = false; + group_id = *new_compaction_group_id + .get_or_try_init(|| async { + self.env + .id_gen_manager() + .generate::<{ IdCategory::CompactionGroup }>() + .await + .map(|new_group_id| { + is_group_init = true; + new_group_id + }) + }) + .await?; + if is_group_init { + let group_deltas = &mut new_version_delta + .group_deltas + .entry(group_id) + .or_default() + .group_deltas; + // The config for inexistent group may have been created in + // compaction test. + let config = self + .compaction_group_manager + .read() + .await + .get_compaction_group_config(group_id) + .compaction_config + .as_ref() + .clone(); + group_deltas.push(GroupDelta { + delta_type: Some(DeltaType::GroupConstruct(GroupConstruct { + group_config: Some(config), + group_id, + ..Default::default() + })), + }); + } + } + let group_deltas = &mut new_version_delta + .group_deltas + .entry(group_id) + .or_default() + .group_deltas; + group_deltas.push(GroupDelta { + delta_type: Some(DeltaType::GroupMetaChange(GroupMetaChange { + table_ids_add: vec![*table_id], + ..Default::default() + })), + }); + } + + let mut trx = Transaction::default(); + new_version_delta.apply_to_txn(&mut trx)?; + self.env.meta_store().txn(trx).await?; + let sst_split_info = versioning + .current_version + .apply_version_delta(&new_version_delta); + assert!(sst_split_info.is_empty()); + new_version_delta.commit(); + + self.notify_last_version_delta(versioning); + + Ok(()) } + /// Prefer using [`unregister_table_fragments_vec`]. + /// Only Use [`unregister_table_ids`] only when [`TableFragments`] is unavailable. #[named] pub async fn unregister_table_ids(&self, table_ids: &[StateTableId]) -> Result<()> { + if table_ids.is_empty() { + return Ok(()); + } let mut versioning_guard = write_lock!(self, versioning).await; let versioning = versioning_guard.deref_mut(); + let current_version = &versioning.current_version; + + let mut new_version_delta = BTreeMapEntryTransaction::new_insert( + &mut versioning.hummock_version_deltas, + current_version.id + 1, + build_version_delta_after_version(current_version), + ); + + let mut modified_groups: HashMap = + HashMap::new(); + // Remove member tables + for table_id in table_ids.iter().unique() { + let group_id = match try_get_compaction_group_id_by_table_id(current_version, *table_id) + { + Some(group_id) => group_id, + None => continue, + }; + let group_deltas = &mut new_version_delta + .group_deltas + .entry(group_id) + .or_default() + .group_deltas; + group_deltas.push(GroupDelta { + delta_type: Some(DeltaType::GroupMetaChange(GroupMetaChange { + table_ids_remove: vec![*table_id], + ..Default::default() + })), + }); + modified_groups + .entry(group_id) + .and_modify(|count| *count -= 1) + .or_insert( + current_version + .get_compaction_group_levels(group_id) + .member_table_ids + .len() as u64 + - 1, + ); + } + + // Remove empty group, GC SSTs and remove metric. + let mut branched_ssts = BTreeMapTransaction::new(&mut versioning.branched_ssts); + let groups_to_remove = modified_groups + .into_iter() + .filter_map(|(group_id, member_count)| { + if member_count == 0 && group_id > StaticCompactionGroupId::End as CompactionGroupId + { + return Some(group_id); + } + None + }) + .collect_vec(); + for group_id in &groups_to_remove { + // We don't bother to add IntraLevelDelta to remove SSTs from group, because the entire + // group is to be removed. + // However, we need to take care of SST GC for the removed group. + for sst_id in get_compaction_group_sst_ids(current_version, *group_id) { + if drop_sst(&mut branched_ssts, *group_id, sst_id) { + new_version_delta.gc_sst_ids.push(sst_id); + } + } + let group_deltas = &mut new_version_delta + .group_deltas + .entry(*group_id) + .or_default() + .group_deltas; + group_deltas.push(GroupDelta { + delta_type: Some(DeltaType::GroupDestroy(GroupDestroy {})), + }); + } + + let mut trx = Transaction::default(); + new_version_delta.apply_to_txn(&mut trx)?; + self.env.meta_store().txn(trx).await?; + let sst_split_info = versioning + .current_version + .apply_version_delta(&new_version_delta); + assert!(sst_split_info.is_empty()); + new_version_delta.commit(); + branched_ssts.commit_memory(); + + for group_id in &groups_to_remove { + remove_compaction_group_in_sst_stat(&self.metrics, *group_id); + } + self.notify_last_version_delta(versioning); + + // Purge may cause write to meta store. If it hurts performance while holding versioning + // lock, consider to make it in batch. self.compaction_group_manager .write() .await - .unregister(Some((self, versioning)), table_ids, self.env.meta_store()) - .await - } - - #[named] - async fn purge_stale_groups(&self) -> Result<()> { - let mut versioning_guard = write_lock!(self, versioning).await; - let versioning = versioning_guard.deref_mut(); - let mut guard = self.compaction_group_manager.write().await; - guard - .purge_stale_groups(self, versioning, self.env.meta_store()) + .purge( + &get_compaction_group_ids(&versioning.current_version), + self.env.meta_store(), + ) .await - } - - pub async fn get_table_option( - &self, - id: CompactionGroupId, - table_id: u32, - ) -> Result { - let compaction_group_manager = self.compaction_group_manager.read().await; - compaction_group_manager.table_option_by_table_id(id, table_id) - } - - pub async fn all_table_ids(&self) -> HashSet { - let compaction_group_manager = self.compaction_group_manager.read().await; - compaction_group_manager.all_table_ids() + .inspect_err(|e| tracing::warn!("failed to purge stale compaction group config. {}", e)) + .ok(); + Ok(()) } pub async fn update_compaction_config( @@ -283,16 +382,42 @@ impl HummockManager { ) .await } + + /// Gets complete compaction group info. + /// It is the aggregate of `HummockVersion` and `CompactionGroupConfig` + #[named] + pub async fn list_compaction_group(&self) -> Vec { + let mut versioning_guard = write_lock!(self, versioning).await; + let versioning = versioning_guard.deref_mut(); + let current_version = &versioning.current_version; + let mut compaction_groups = vec![]; + for levels in current_version.levels.values() { + let config = self + .compaction_group_manager + .read() + .await + .get_compaction_group_config(levels.group_id) + .compaction_config; + let group = CompactionGroupInfo { + id: levels.group_id, + parent_id: levels.parent_group_id, + member_table_ids: levels.member_table_ids.clone(), + compaction_config: Some(config.as_ref().clone()), + }; + compaction_groups.push(group); + } + compaction_groups + } } -pub(super) struct CompactionGroupManagerInner { - id_generator_ref: IdGeneratorManagerRef, +#[derive(Default)] +pub(super) struct CompactionGroupManager { compaction_groups: BTreeMap, - index: BTreeMap, + default_config: CompactionConfig, } -impl CompactionGroupManagerInner { - async fn init(&mut self, config: &CompactionConfig, meta_store: &S) -> Result<()> { +impl CompactionGroupManager { + async fn init(&mut self, meta_store: &S) -> Result<()> { let loaded_compaction_groups: BTreeMap = CompactionGroup::list(meta_store) .await? @@ -301,381 +426,91 @@ impl CompactionGroupManagerInner { .collect(); if !loaded_compaction_groups.is_empty() { self.compaction_groups = loaded_compaction_groups; - } else { - let compaction_groups = &mut self.compaction_groups; - let mut new_compaction_groups = BTreeMapTransaction::new(compaction_groups); - let static_compaction_groups = vec![ - CompactionGroup::new(StaticCompactionGroupId::StateDefault.into(), config.clone()), - CompactionGroup::new( - StaticCompactionGroupId::MaterializedView.into(), - config.clone(), - ), - ]; - for static_compaction_group in static_compaction_groups { - new_compaction_groups - .insert(static_compaction_group.group_id(), static_compaction_group); - } - let mut trx = Transaction::default(); - new_compaction_groups.apply_to_txn(&mut trx)?; - meta_store.txn(trx).await?; - new_compaction_groups.commit(); - } - - // Build in-memory index - for (id, compaction_group) in &self.compaction_groups { - for member in &compaction_group.member_table_ids { - assert!(self.index.insert(*member, *id).is_none()); - } } - Ok(()) } - fn gen_compaction_group_snapshot( - compaction_groups: &BTreeMapTransaction<'_, CompactionGroupId, CompactionGroup>, - compaction_group_id_set: Vec, - ) -> HashMap { - compaction_group_id_set - .into_iter() - .filter_map(|group_id| { - compaction_groups - .get(&group_id) - .map(|group| (group_id, group.clone())) + fn get_compaction_group_config( + &self, + compaction_group_id: CompactionGroupId, + ) -> CompactionGroup { + self.compaction_groups + .get(&compaction_group_id) + .cloned() + .unwrap_or_else(|| { + CompactionGroup::new(compaction_group_id, self.default_config.clone()) }) - .collect() - } - - async fn register( - &mut self, - hummock_manager: &HummockManager, - versioning: &mut Versioning, - pairs: &mut [(StateTableId, CompactionGroupId, TableOption)], - meta_store: &S, - ) -> Result> { - for (table_id, new_compaction_group_id, _) in pairs.iter() { - if let Some(old_compaction_group_id) = self.index.get(table_id) { - if old_compaction_group_id != new_compaction_group_id { - return Err(Error::InvalidCompactionGroupMember(*table_id)); - } - } - } - let mut compaction_group_id_set = self.compaction_groups.keys().cloned().collect_vec(); - let old_id_cnt = compaction_group_id_set.len(); - let mut compaction_groups = BTreeMapTransaction::new(&mut self.compaction_groups); - for (table_id, compaction_group_id, table_option) in pairs.iter_mut() { - let mut compaction_group = - if *compaction_group_id == StaticCompactionGroupId::NewCompactionGroup as u64 { - *compaction_group_id = self - .id_generator_ref - .generate::<{ IdCategory::CompactionGroup }>() - .await?; - compaction_group_id_set.push(*compaction_group_id); - compaction_groups.insert( - *compaction_group_id, - CompactionGroup::new( - *compaction_group_id, - CompactionConfigBuilder::new().build(), - ), - ); - compaction_groups.get_mut(*compaction_group_id).unwrap() - } else { - compaction_groups - .get_mut(*compaction_group_id) - .ok_or(Error::InvalidCompactionGroup(*compaction_group_id))? - }; - compaction_group.member_table_ids.insert(*table_id); - compaction_group - .table_id_to_options - .insert(*table_id, *table_option); - } - let mut trx = Transaction::default(); - compaction_groups.apply_to_txn(&mut trx)?; - let mut trx_wrapper = Some(trx); - if compaction_group_id_set.len() > old_id_cnt { - hummock_manager - .sync_group( - None, - versioning, - &Self::gen_compaction_group_snapshot( - &compaction_groups, - compaction_group_id_set, - ), - &mut trx_wrapper, - ) - .await?; - } - if let Some(trx) = trx_wrapper.take() { - meta_store.txn(trx).await?; - } - compaction_groups.commit(); - - // Update in-memory index - for (table_id, compaction_group_id, _) in pairs.iter() { - self.index.insert(*table_id, *compaction_group_id); - } - Ok(pairs.iter().map(|(table_id, ..)| *table_id).collect_vec()) } - async fn unregister( + async fn update_compaction_config( &mut self, - empty_group_vacuum_context: Option<(&HummockManager, &mut Versioning)>, - table_ids: &[StateTableId], + compaction_group_ids: &[CompactionGroupId], + config_to_update: &[MutableConfig], meta_store: &S, ) -> Result<()> { - let compaction_group_id_set = self.compaction_groups.keys().cloned().collect_vec(); - let mut remove_some_group = false; let mut compaction_groups = BTreeMapTransaction::new(&mut self.compaction_groups); - for table_id in table_ids { - let compaction_group_id = self - .index - .get(table_id) - .cloned() - .ok_or(Error::InvalidCompactionGroupMember(*table_id))?; - let mut compaction_group = compaction_groups - .get_mut(compaction_group_id) - .ok_or(Error::InvalidCompactionGroup(compaction_group_id))?; - compaction_group.member_table_ids.remove(table_id); - compaction_group.table_id_to_options.remove(table_id); - if empty_group_vacuum_context.is_some() - && compaction_group_id > StaticCompactionGroupId::End as CompactionGroupId - && compaction_group.member_table_ids.is_empty() - { - remove_some_group = true; - compaction_groups.remove(compaction_group_id); + for compaction_group_id in compaction_group_ids { + if !compaction_groups.contains_key(compaction_group_id) { + compaction_groups.insert( + *compaction_group_id, + CompactionGroup::new(*compaction_group_id, self.default_config.clone()), + ); } + let group = compaction_groups.get(compaction_group_id).unwrap(); + let mut config = group.compaction_config.as_ref().clone(); + update_compaction_config(&mut config, config_to_update); + let mut new_group = group.clone(); + new_group.compaction_config = Arc::new(config); + compaction_groups.insert(*compaction_group_id, new_group); } let mut trx = Transaction::default(); compaction_groups.apply_to_txn(&mut trx)?; - let mut trx_wrapper = Some(trx); - if remove_some_group { - let (hummock_manager, versioning) = empty_group_vacuum_context.unwrap(); - hummock_manager - .sync_group( - None, - versioning, - &Self::gen_compaction_group_snapshot( - &compaction_groups, - compaction_group_id_set, - ), - &mut trx_wrapper, - ) - .await?; - } - if let Some(trx) = trx_wrapper.take() { - meta_store.txn(trx).await?; - } + meta_store.txn(trx).await?; compaction_groups.commit(); - - // Update in-memory index - for table_id in table_ids { - self.index.remove(table_id); - } Ok(()) } - async fn register_new_group( + /// Initializes the config for a group. + /// Should only be used by compaction test. + pub async fn init_compaction_config_for_replay( &mut self, - hummock_manager: &HummockManager, - versioning: &mut Versioning, group_id: CompactionGroupId, - group_config: CompactionConfig, - tables: &[(StateTableId, TableOption)], + config: CompactionConfig, meta_store: &S, ) -> Result<()> { - let mut compaction_group_id_set = self.compaction_groups.keys().cloned().collect_vec(); - let old_id_cnt = compaction_group_id_set.len(); - let is_exist = self.compaction_groups.contains_key(&group_id); - let mut compaction_groups = BTreeMapTransaction::new(&mut self.compaction_groups); - let compaction_group_id = if is_exist { - group_id - } else { - let id = if group_id == StaticCompactionGroupId::NewCompactionGroup as u64 { - self.id_generator_ref - .generate::<{ IdCategory::CompactionGroup }>() - .await? - } else { - group_id - }; - compaction_group_id_set.push(id); - compaction_groups.insert( - id, - CompactionGroup::new( - id, - CompactionConfigBuilder::with_config(group_config).build(), - ), - ); - id - }; - - let mut compaction_group = compaction_groups.get_mut(compaction_group_id).unwrap(); - for (table_id, table_option) in tables.iter() { - compaction_group.member_table_ids.insert(*table_id); - compaction_group - .table_id_to_options - .insert(*table_id, *table_option); - } - - let mut trx = Transaction::default(); - compaction_groups.apply_to_txn(&mut trx)?; - let mut trx_wrapper = Some(trx); - if compaction_group_id_set.len() > old_id_cnt { - hummock_manager - .sync_group( - None, - versioning, - &Self::gen_compaction_group_snapshot( - &compaction_groups, - compaction_group_id_set, - ), - &mut trx_wrapper, - ) - .await?; - } - if let Some(trx) = trx_wrapper.take() { - meta_store.txn(trx).await?; - } - compaction_groups.commit(); - - // Update in-memory index - for (table_id, _) in tables.iter() { - self.index.insert(*table_id, compaction_group_id); - } - - let table_ids = tables - .iter() - .map(|(table_id, _)| table_id) - .cloned() - .collect_vec(); - tracing::info!( - "Compaction group {}, registered table ids {:?}", - compaction_group_id, - table_ids + let insert = BTreeMapEntryTransaction::new_insert( + &mut self.compaction_groups, + group_id, + CompactionGroup { + group_id, + compaction_config: Arc::new(config), + }, ); + let mut trx = Transaction::default(); + insert.apply_to_txn(&mut trx)?; + meta_store.txn(trx).await?; + insert.commit(); Ok(()) } - async fn remove_group_by_id( - &mut self, - hummock_manager: &HummockManager, - versioning: &mut Versioning, - group_id: CompactionGroupId, - meta_store: &S, - ) -> Result<()> { - let compaction_group_id_set = self.compaction_groups.keys().cloned().collect_vec(); - let mut compaction_groups = BTreeMapTransaction::new(&mut self.compaction_groups); - let removed_group = compaction_groups.remove(group_id); - if let Some(group) = removed_group { - let mut trx = Transaction::default(); - compaction_groups.apply_to_txn(&mut trx)?; - let mut trx_wrapper = Some(trx); - hummock_manager - .sync_group( - None, - versioning, - &Self::gen_compaction_group_snapshot( - &compaction_groups, - compaction_group_id_set, - ), - &mut trx_wrapper, - ) - .await?; - if let Some(trx) = trx_wrapper.take() { - meta_store.txn(trx).await?; - } - compaction_groups.commit(); - // Update index - for table_id in group.member_table_ids() { - self.index.remove(table_id); - } - } - Ok(()) - } - - async fn purge_stale_groups( + /// Removes stale group configs. + async fn purge( &mut self, - hummock_manager: &HummockManager, - versioning: &mut Versioning, + existing_groups: &[CompactionGroupId], meta_store: &S, ) -> Result<()> { - let mut remove_some_group = false; - let compaction_group_ids = self.compaction_groups.keys().cloned().collect_vec(); let mut compaction_groups = BTreeMapTransaction::new(&mut self.compaction_groups); - for compaction_group_id in &compaction_group_ids { - let compaction_group = compaction_groups - .get_mut(*compaction_group_id) - .ok_or(Error::InvalidCompactionGroup(*compaction_group_id))?; - if *compaction_group_id > StaticCompactionGroupId::End as CompactionGroupId - && compaction_group.member_table_ids.is_empty() - { - remove_some_group = true; - compaction_groups.remove(*compaction_group_id); - } - } - if remove_some_group { - let mut trx = Transaction::default(); - compaction_groups.apply_to_txn(&mut trx)?; - let mut trx_wrapper = Some(trx); - hummock_manager - .sync_group( - None, - versioning, - &Self::gen_compaction_group_snapshot(&compaction_groups, compaction_group_ids), - &mut trx_wrapper, - ) - .await?; - if let Some(trx) = trx_wrapper.take() { - meta_store.txn(trx).await?; - } - compaction_groups.commit(); - } - Ok(()) - } - - fn compaction_group(&self, compaction_group_id: u64) -> Result<&CompactionGroup> { - match self.compaction_groups.get(&compaction_group_id) { - Some(compaction_group) => Ok(compaction_group), - - None => Err(Error::InvalidCompactionGroup(compaction_group_id)), + let stale_group = compaction_groups + .tree_ref() + .keys() + .cloned() + .filter(|k| !existing_groups.contains(k)) + .collect_vec(); + if stale_group.is_empty() { + return Ok(()); } - } - - pub fn table_ids_by_compaction_group_id( - &self, - compaction_group_id: u64, - ) -> Result> { - let compaction_group = self.compaction_group(compaction_group_id)?; - Ok(compaction_group.member_table_ids.clone()) - } - - pub fn table_option_by_table_id( - &self, - compaction_group_id: u64, - table_id: u32, - ) -> Result { - let compaction_group = self.compaction_group(compaction_group_id)?; - match compaction_group.table_id_to_options().get(&table_id) { - Some(table_option) => Ok(*table_option), - - None => Ok(TableOption::default()), - } - } - - fn all_table_ids(&self) -> HashSet { - self.index.keys().cloned().collect() - } - - async fn update_compaction_config( - &mut self, - compaction_group_ids: &[CompactionGroupId], - config_to_update: &[MutableConfig], - meta_store: &S, - ) -> Result<()> { - let mut compaction_groups = BTreeMapTransaction::new(&mut self.compaction_groups); - for compaction_group_id in compaction_group_ids { - if let Some(mut group) = compaction_groups.get_mut(*compaction_group_id) { - let config = &mut group.compaction_config; - update_compaction_config(config, config_to_update); - } + for group in stale_group { + compaction_groups.remove(group); } let mut trx = Transaction::default(); compaction_groups.apply_to_txn(&mut trx)?; @@ -719,143 +554,81 @@ fn update_compaction_config(target: &mut CompactionConfig, items: &[MutableConfi #[cfg(test)] mod tests { use std::collections::{BTreeMap, HashMap}; - use std::ops::Deref; - use risingwave_common::catalog::{TableId, TableOption}; + use risingwave_common::catalog::TableId; use risingwave_common::constants::hummock::PROPERTIES_RETENTION_SECOND_KEY; - use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; + use risingwave_pb::hummock::rise_ctl_update_compaction_config_request::mutable_config::MutableConfig; use risingwave_pb::meta::table_fragments::Fragment; - use risingwave_pb::stream_plan::StreamEnvironment; - use crate::hummock::manager::compaction_group_manager::CompactionGroupManagerInner; - use crate::hummock::manager::versioning::Versioning; use crate::hummock::test_utils::setup_compute_env; use crate::hummock::HummockManager; use crate::model::TableFragments; - use crate::storage::MemStore; #[tokio::test] async fn test_inner() { - let (env, hummock_manager_ref, ..) = setup_compute_env(8080).await; + let (env, ..) = setup_compute_env(8080).await; let inner = HummockManager::build_compaction_group_manager(&env) .await .unwrap(); - - let registered_number = |inner: &CompactionGroupManagerInner| { - inner - .compaction_groups - .values() - .map(|cg| cg.member_table_ids.len()) - .sum::() - }; - - let table_option_number = |inner: &CompactionGroupManagerInner| { - inner - .compaction_groups - .values() - .map(|cg| cg.table_id_to_options().len()) - .sum::() - }; - - assert!(inner.read().await.index.is_empty()); - assert_eq!(registered_number(inner.read().await.deref()), 0); - - let table_properties = HashMap::from([( - String::from(PROPERTIES_RETENTION_SECOND_KEY), - String::from("300"), - )]); - let table_option = TableOption::build_table_option(&table_properties); - - // Test register - inner - .write() - .await - .register( - &hummock_manager_ref, - &mut Versioning::default(), - &mut [( - 1u32, - StaticCompactionGroupId::StateDefault.into(), - table_option, - )], - env.meta_store(), - ) - .await - .unwrap(); + assert!(inner.read().await.compaction_groups.is_empty()); inner .write() .await - .register( - &hummock_manager_ref, - &mut Versioning::default(), - &mut [( - 2u32, - StaticCompactionGroupId::MaterializedView.into(), - table_option, - )], - env.meta_store(), - ) + .update_compaction_config(&[100, 200], &[], env.meta_store()) .await .unwrap(); - assert_eq!(inner.read().await.index.len(), 2); - assert_eq!(registered_number(inner.read().await.deref()), 2); + assert_eq!(inner.read().await.compaction_groups.len(), 2); // Test init let inner = HummockManager::build_compaction_group_manager(&env) .await .unwrap(); - assert_eq!(inner.read().await.index.len(), 2); - assert_eq!(registered_number(inner.read().await.deref()), 2); - assert_eq!(table_option_number(inner.read().await.deref()), 2); + assert_eq!(inner.read().await.compaction_groups.len(), 2); - // Test unregister inner .write() .await - .unregister( - Some((&hummock_manager_ref, &mut Versioning::default())), - &[2u32], + .update_compaction_config( + &[100, 300], + &[MutableConfig::MaxSubCompaction(123)], env.meta_store(), ) .await .unwrap(); - assert_eq!(inner.read().await.index.len(), 1); - assert_eq!(registered_number(inner.read().await.deref()), 1); - assert_eq!(table_option_number(inner.read().await.deref()), 1); - - // Test init - let inner = HummockManager::build_compaction_group_manager(&env) - .await - .unwrap(); - assert_eq!(inner.read().await.index.len(), 1); - assert_eq!(registered_number(inner.read().await.deref()), 1); - assert_eq!(table_option_number(inner.read().await.deref()), 1); - - // Test table_option_by_table_id - { - let table_option = inner + assert_eq!(inner.read().await.compaction_groups.len(), 3); + assert_eq!( + inner .read() .await - .table_option_by_table_id(StaticCompactionGroupId::StateDefault.into(), 1u32) - .unwrap(); - assert_eq!(300, table_option.retention_seconds.unwrap()); - } - - { - // unregistered table_id - let table_option_default = inner + .get_compaction_group_config(100) + .compaction_config + .max_sub_compaction, + 123 + ); + assert_ne!( + inner .read() .await - .table_option_by_table_id(StaticCompactionGroupId::StateDefault.into(), 2u32); - assert!(table_option_default.is_ok()); - assert_eq!(None, table_option_default.unwrap().retention_seconds); - } + .get_compaction_group_config(200) + .compaction_config + .max_sub_compaction, + 123 + ); + assert_eq!( + inner + .read() + .await + .get_compaction_group_config(300) + .compaction_config + .max_sub_compaction, + 123 + ); } #[tokio::test] async fn test_manager() { let (_, compaction_group_manager, ..) = setup_compute_env(8080).await; - let table_fragment_1 = TableFragments::new( + let table_fragment_1 = TableFragments::for_test( TableId::new(10), BTreeMap::from([( 1, @@ -865,9 +638,8 @@ mod tests { ..Default::default() }, )]), - StreamEnvironment::default(), ); - let table_fragment_2 = TableFragments::new( + let table_fragment_2 = TableFragments::for_test( TableId::new(20), BTreeMap::from([( 2, @@ -877,19 +649,19 @@ mod tests { ..Default::default() }, )]), - StreamEnvironment::default(), ); // Test register_table_fragments let registered_number = || async { compaction_group_manager - .compaction_groups() + .list_compaction_group() .await .iter() .map(|cg| cg.member_table_ids.len()) .sum::() }; - let group_number = || async { compaction_group_manager.compaction_groups().await.len() }; + let group_number = + || async { compaction_group_manager.list_compaction_group().await.len() }; assert_eq!(registered_number().await, 0); let mut table_properties = HashMap::from([( String::from(PROPERTIES_RETENTION_SECOND_KEY), @@ -909,18 +681,18 @@ mod tests { // Test unregister_table_fragments compaction_group_manager - .unregister_table_fragments(&table_fragment_1) + .unregister_table_fragments_vec(&[table_fragment_1.clone()]) .await .unwrap(); assert_eq!(registered_number().await, 4); // Test purge_stale_members: table fragments compaction_group_manager - .purge_stale(&[table_fragment_2]) + .purge(&[table_fragment_2]) .await .unwrap(); assert_eq!(registered_number().await, 4); - compaction_group_manager.purge_stale(&[]).await.unwrap(); + compaction_group_manager.purge(&[]).await.unwrap(); assert_eq!(registered_number().await, 0); // Test `StaticCompactionGroupId::NewCompactionGroup` in `register_table_fragments` @@ -934,11 +706,11 @@ mod tests { .await .unwrap(); assert_eq!(registered_number().await, 4); - assert_eq!(group_number().await, 6); + assert_eq!(group_number().await, 3); // Test `StaticCompactionGroupId::NewCompactionGroup` in `unregister_table_fragments` compaction_group_manager - .unregister_table_fragments(&table_fragment_1) + .unregister_table_fragments_vec(&[table_fragment_1]) .await .unwrap(); assert_eq!(registered_number().await, 0); diff --git a/src/meta/src/hummock/manager/context.rs b/src/meta/src/hummock/manager/context.rs index 2b92292bb4cd6..714d80658a579 100644 --- a/src/meta/src/hummock/manager/context.rs +++ b/src/meta/src/hummock/manager/context.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ use crate::hummock::manager::{ commit_multi_var, read_lock, start_measure_real_process_timer, write_lock, }; use crate::hummock::HummockManager; +use crate::manager::META_NODE_ID; use crate::model::{BTreeMapTransaction, ValTransaction}; use crate::storage::{MetaStore, Transaction}; @@ -54,7 +55,7 @@ where let mut compaction_guard = write_lock!(self, compaction).await; let compaction = compaction_guard.deref_mut(); let (compact_statuses, compact_task_assignment) = - compaction.cancel_assigned_tasks_for_context_ids(context_ids.as_ref())?; + compaction.cancel_assigned_tasks_for_context_ids(context_ids.as_ref()); for context_id in context_ids.as_ref() { self.compactor_manager .purge_heartbeats_for_context(*context_id); @@ -187,4 +188,8 @@ where .await; Ok(()) } + + pub async fn release_meta_context(&self) -> Result<()> { + self.release_contexts([META_NODE_ID]).await + } } diff --git a/src/meta/src/hummock/manager/gc.rs b/src/meta/src/hummock/manager/gc.rs index 708ab0643a745..547eee0dce6bc 100644 --- a/src/meta/src/hummock/manager/gc.rs +++ b/src/meta/src/hummock/manager/gc.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ use risingwave_hummock_sdk::{HummockSstableId, HummockVersionId, INVALID_VERSION use crate::hummock::error::Result; use crate::hummock::manager::{commit_multi_var, read_lock, write_lock}; +use crate::hummock::metrics_utils::trigger_stale_ssts_stat; use crate::hummock::HummockManager; use crate::model::{BTreeMapTransaction, ValTransaction}; use crate::storage::{MetaStore, Transaction}; @@ -59,6 +60,7 @@ where HashSet::from_iter(versioning_guard.ssts_to_delete.values().cloned()); deltas_to_delete.retain(|id| !remain_deltas.contains(id)); versioning_guard.deltas_to_delete.extend(deltas_to_delete); + trigger_stale_ssts_stat(&self.metrics, versioning_guard.ssts_to_delete.len()); Ok(()) } @@ -108,12 +110,13 @@ where .iter() .filter(|sst_id| !tracked_sst_ids.contains(sst_id)) .collect_vec(); - tracing::info!("SST to delete in full GC: {:#?}", to_delete); - write_lock!(self, versioning).await.ssts_to_delete.extend( + let mut versioning_guard = write_lock!(self, versioning).await; + versioning_guard.ssts_to_delete.extend( to_delete .iter() .map(|sst_id| (**sst_id, INVALID_VERSION_ID)), ); + trigger_stale_ssts_stat(&self.metrics, versioning_guard.ssts_to_delete.len()); to_delete.len() } } diff --git a/src/meta/src/hummock/manager/mod.rs b/src/meta/src/hummock/manager/mod.rs index 97b5bcc44a84f..73faf60da8f9d 100644 --- a/src/meta/src/hummock/manager/mod.rs +++ b/src/meta/src/hummock/manager/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use core::panic; use std::borrow::{Borrow, BorrowMut}; use std::collections::{BTreeMap, HashMap, HashSet}; use std::ops::Bound::{Excluded, Included}; @@ -27,40 +28,41 @@ use risingwave_common::monitor::rwlock::MonitoredRwLock; use risingwave_common::util::epoch::{Epoch, INVALID_EPOCH}; use risingwave_hummock_sdk::compact::compact_task_to_string; use risingwave_hummock_sdk::compaction_group::hummock_version_ext::{ - add_new_sub_level, HummockLevelsExt, HummockVersionExt, HummockVersionUpdateExt, + add_new_sub_level, build_initial_compaction_group_levels, build_version_delta_after_version, + get_member_table_ids, try_get_compaction_group_id_by_table_id, HummockVersionExt, + HummockVersionUpdateExt, }; use risingwave_hummock_sdk::{ CompactionGroupId, ExtendedSstableInfo, HummockCompactionTaskId, HummockContextId, HummockEpoch, HummockSstableId, HummockVersionId, SstIdRange, FIRST_VERSION_ID, INVALID_VERSION_ID, }; -use risingwave_pb::hummock::compact_task::TaskStatus; +use risingwave_pb::hummock::compact_task::{self, TaskStatus}; use risingwave_pb::hummock::group_delta::DeltaType; -use risingwave_pb::hummock::hummock_version::Levels; use risingwave_pb::hummock::subscribe_compact_tasks_response::Task; -#[cfg(any(test, feature = "test"))] -use risingwave_pb::hummock::CompactionConfig; use risingwave_pb::hummock::{ - pin_version_response, CompactTask, CompactTaskAssignment, GroupConstruct, GroupDelta, - GroupDestroy, HummockPinnedSnapshot, HummockPinnedVersion, HummockSnapshot, HummockVersion, + version_update_payload, CompactTask, CompactTaskAssignment, CompactionConfig, GroupDelta, + HummockPinnedSnapshot, HummockPinnedVersion, HummockSnapshot, HummockVersion, HummockVersionDelta, HummockVersionDeltas, HummockVersionStats, IntraLevelDelta, LevelType, }; use risingwave_pb::meta::subscribe_response::{Info, Operation}; -use risingwave_pb::meta::MetaLeaderInfo; use tokio::sync::oneshot::Sender; -use tokio::sync::{Notify, RwLockReadGuard, RwLockWriteGuard}; +use tokio::sync::{Notify, RwLockWriteGuard}; use tokio::task::JoinHandle; -use crate::hummock::compaction::{CompactStatus, LocalSelectorStatistic, ManualCompactionOption}; -use crate::hummock::compaction_group::CompactionGroup; +use crate::hummock::compaction::{ + CompactStatus, LevelSelector, LocalSelectorStatistic, ManualCompactionOption, +}; use crate::hummock::compaction_scheduler::CompactionRequestChannelRef; use crate::hummock::error::{Error, Result}; use crate::hummock::metrics_utils::{ - remove_compaction_group_in_sst_stat, trigger_pin_unpin_snapshot_state, - trigger_pin_unpin_version_state, trigger_sst_stat, trigger_version_stat, + trigger_pin_unpin_snapshot_state, trigger_pin_unpin_version_state, trigger_sst_stat, + trigger_version_stat, }; use crate::hummock::CompactorManagerRef; -use crate::manager::{ClusterManagerRef, IdCategory, LocalNotification, MetaSrvEnv, META_NODE_ID}; +use crate::manager::{ + CatalogManagerRef, ClusterManagerRef, IdCategory, LocalNotification, MetaSrvEnv, META_NODE_ID, +}; use crate::model::{ BTreeMapEntryTransaction, BTreeMapTransaction, MetadataModel, ValTransaction, VarTransaction, }; @@ -90,11 +92,11 @@ type Snapshot = ArcSwap; pub struct HummockManager { env: MetaSrvEnv, cluster_manager: ClusterManagerRef, - + catalog_manager: CatalogManagerRef, // `CompactionGroupManager` manages `CompactionGroup`'s members. // Note that all hummock state store user should register to `CompactionGroupManager`. It // includes all state tables of streaming jobs except sink. - compaction_group_manager: tokio::sync::RwLock>, + compaction_group_manager: tokio::sync::RwLock, // When trying to locks compaction and versioning at the same time, compaction lock should // be requested before versioning lock. compaction: MonitoredRwLock, @@ -128,7 +130,7 @@ macro_rules! commit_multi_var { $val_txn.apply_to_txn(&mut trx)?; )* // Commit to state store - $hummock_mgr.commit_trx($hummock_mgr.env.meta_store(), trx, $context_id, $hummock_mgr.env.get_leader_info()) + $hummock_mgr.commit_trx($hummock_mgr.env.meta_store(), trx, $context_id) .await?; // Upon successful commit, commit the change to local in-mem state $( @@ -156,13 +158,13 @@ macro_rules! read_lock { }; } pub(crate) use read_lock; -use risingwave_hummock_sdk::compaction_group::StateTableId; +use risingwave_hummock_sdk::compaction_group::{StateTableId, StaticCompactionGroupId}; use risingwave_hummock_sdk::table_stats::{ add_prost_table_stats_map, purge_prost_table_stats, ProstTableStatsMap, }; use risingwave_pb::catalog::Table; -use risingwave_pb::hummock::pin_version_response::Payload; -use risingwave_pb::hummock::CompactionGroup as ProstCompactionGroup; +use risingwave_pb::hummock::version_update_payload::Payload; +use risingwave_pb::hummock::CompactionGroupInfo as ProstCompactionGroup; /// Acquire write lock of the lock with `lock_name`. /// The macro will use macro `function_name` to get the name of the function of method that calls @@ -191,8 +193,10 @@ macro_rules! start_measure_real_process_timer { } pub(crate) use start_measure_real_process_timer; -use self::compaction_group_manager::CompactionGroupManagerInner; +use super::compaction::ManualCompactionSelector; use super::Compactor; +use crate::hummock::compaction::compaction_config::CompactionConfigBuilder; +use crate::hummock::manager::compaction_group_manager::CompactionGroupManager; use crate::hummock::manager::worker::HummockManagerEventSender; static CANCEL_STATUS_SET: LazyLock> = LazyLock::new(|| { @@ -224,14 +228,16 @@ where cluster_manager: ClusterManagerRef, metrics: Arc, compactor_manager: CompactorManagerRef, + catalog_manager: CatalogManagerRef, ) -> Result> { let compaction_group_manager = Self::build_compaction_group_manager(&env).await?; - Self::with_compaction_group_manager( + Self::new_impl( env, cluster_manager, metrics, compactor_manager, compaction_group_manager, + catalog_manager, ) .await } @@ -243,25 +249,32 @@ where metrics: Arc, compactor_manager: CompactorManagerRef, config: CompactionConfig, - ) -> Result> { + ) -> HummockManagerRef { + use crate::manager::CatalogManager; let compaction_group_manager = - Self::build_compaction_group_manager_with_config(&env, config).await?; - Self::with_compaction_group_manager( + Self::build_compaction_group_manager_with_config(&env, config) + .await + .unwrap(); + let catalog_manager = Arc::new(CatalogManager::new(env.clone()).await.unwrap()); + Self::new_impl( env, cluster_manager, metrics, compactor_manager, compaction_group_manager, + catalog_manager, ) .await + .unwrap() } - async fn with_compaction_group_manager( + async fn new_impl( env: MetaSrvEnv, cluster_manager: ClusterManagerRef, metrics: Arc, compactor_manager: CompactorManagerRef, - compaction_group_manager: tokio::sync::RwLock>, + compaction_group_manager: tokio::sync::RwLock, + catalog_manager: CatalogManagerRef, ) -> Result> { let (tx, rx) = tokio::sync::mpsc::unbounded_channel(); let instance = HummockManager { @@ -276,6 +289,7 @@ where ), metrics, cluster_manager, + catalog_manager, compaction_group_manager, compaction_request_channel: parking_lot::RwLock::new(None), compaction_resume_notifier: parking_lot::RwLock::new(None), @@ -293,7 +307,7 @@ where instance.release_invalid_contexts().await?; instance.cancel_unassigned_compaction_task().await?; // Release snapshots pinned by meta on restarting. - instance.release_contexts([META_NODE_ID]).await?; + instance.release_meta_context().await?; Ok(instance) } @@ -386,19 +400,6 @@ where .map(|version_delta| (version_delta.id, version_delta)) .collect(); - if let Some((_, last_version_delta)) = hummock_version_deltas.last_key_value() { - for (compaction_group_id, group_deltas) in last_version_delta.get_group_deltas() { - if group_deltas.get_group_deltas().iter().any(|group_delta| { - matches!( - group_delta.delta_type.as_ref().unwrap(), - DeltaType::GroupDestroy(_) - ) - }) { - remove_compaction_group_in_sst_stat(&self.metrics, *compaction_group_id); - } - } - } - // Insert the initial version. let mut redo_state = if versions.is_empty() { let mut init_version = HummockVersion { @@ -408,12 +409,14 @@ where safe_epoch: INVALID_EPOCH, }; // Initialize independent levels via corresponding compaction groups' config. - for compaction_group in self.compaction_groups().await { + let default_compaction_config = CompactionConfigBuilder::new().build(); + for group_id in [ + StaticCompactionGroupId::StateDefault as CompactionGroupId, + StaticCompactionGroupId::MaterializedView as CompactionGroupId, + ] { init_version.levels.insert( - compaction_group.group_id(), - ::build_initial_levels( - &compaction_group.compaction_config(), - ), + group_id, + build_initial_compaction_group_levels(group_id, &default_compaction_config), ); } versioning_guard.version_stats = HummockVersionStats::default(); @@ -466,7 +469,7 @@ where let checkpoint_id = versioning_guard.checkpoint_version.id; versioning_guard.ssts_to_delete.clear(); - versioning_guard.extend_ssts_to_delete_from_deltas(..=checkpoint_id); + versioning_guard.extend_ssts_to_delete_from_deltas(..=checkpoint_id, &self.metrics); let preserved_deltas: HashSet = HashSet::from_iter(versioning_guard.ssts_to_delete.values().cloned()); versioning_guard.deltas_to_delete = versioning_guard @@ -491,7 +494,6 @@ where meta_store: &S, trx: Transaction, context_id: Option, - _info: MetaLeaderInfo, ) -> Result<()> { if let Some(context_id) = context_id { if context_id == META_NODE_ID { @@ -511,7 +513,7 @@ where pub async fn pin_version( &self, context_id: HummockContextId, - ) -> Result { + ) -> Result { let mut versioning_guard = write_lock!(self, versioning).await; let _timer = start_measure_real_process_timer!(self); let versioning = versioning_guard.deref_mut(); @@ -728,10 +730,12 @@ where pub async fn get_compact_task_impl( &self, compaction_group_id: CompactionGroupId, - manual_compaction_option: Option, + selector: &mut Box, ) -> Result> { let mut compaction_guard = write_lock!(self, compaction).await; let compaction = compaction_guard.deref_mut(); + let compaction_statuses = &mut compaction.compaction_statuses; + let start_time = Instant::now(); // StoredIdGenerator already implements ids pre-allocation by ID_PREALLOCATE_INTERVAL. let task_id = self @@ -739,31 +743,19 @@ where .id_gen_manager() .generate::<{ IdCategory::HummockCompactionTask }>() .await?; - let group_config = self - .compaction_group(compaction_group_id) - .await - .ok_or(Error::InvalidCompactionGroup(compaction_group_id))?; - let all_table_ids = self.all_table_ids().await; - if !compaction - .compaction_statuses - .contains_key(&compaction_group_id) - { - let mut compact_statuses = - BTreeMapTransaction::new(&mut compaction.compaction_statuses); - let new_compact_status = compact_statuses.new_entry_insert_txn( - compaction_group_id, - CompactStatus::new( - compaction_group_id, - group_config.compaction_config().max_level, - ), - ); - commit_multi_var!(self, None, Transaction::default(), new_compact_status)?; - } + + let group_config = self.get_compaction_group_config(compaction_group_id).await; + self.precheck_compaction_group( + compaction_group_id, + compaction_statuses, + &group_config.compaction_config, + ) + .await?; + let mut compact_status = match compaction.compaction_statuses.get_mut(&compaction_group_id) { Some(c) => VarTransaction::new(c), None => { - // sync_group has not been called for this group, which means no data even written. return Ok(None); } }; @@ -778,18 +770,19 @@ where (versioning_guard.current_version.clone(), watermark) }; if current_version.levels.get(&compaction_group_id).is_none() { - // sync_group has not been called for this group, which means no data even written. + // compaction group has been deleted. return Ok(None); } - let can_trivial_move = manual_compaction_option.is_none(); + + let can_trivial_move = matches!(selector.task_type(), compact_task::TaskType::Dynamic); + let mut stats = LocalSelectorStatistic::default(); let compact_task = compact_status.get_compact_task( current_version.get_compaction_group_levels(compaction_group_id), task_id as HummockCompactionTaskId, - compaction_group_id, - manual_compaction_option, - group_config.compaction_config(), + &group_config, &mut stats, + selector, ); stats.report_to_metrics(compaction_group_id, self.metrics.as_ref()); let mut compact_task = match compact_task { @@ -815,6 +808,7 @@ where start_time.elapsed() ); } else { + let all_table_ids = get_member_table_ids(¤t_version); // to get all relational table_id from sst_info let table_ids = compact_task .input_ssts @@ -834,15 +828,14 @@ where } } - // build table_options - compact_task.table_options = group_config - .table_id_to_options() + compact_task.table_options = self + .catalog_manager + .get_table_options(&compact_task.existing_table_ids) + .await .iter() - .filter(|id_to_option| compact_task.existing_table_ids.contains(id_to_option.0)) - .map(|id_to_option| (*id_to_option.0, id_to_option.1.into())) + .map(|(k, v)| (*k, v.into())) .collect(); compact_task.current_epoch_time = Epoch::now().0; - compact_task.compaction_filter_mask = group_config.compaction_config.compaction_filter_mask; commit_multi_var!(self, None, Transaction::default(), compact_status)?; @@ -893,15 +886,36 @@ where .await } + // need mutex protect + async fn precheck_compaction_group( + &self, + compaction_group_id: CompactionGroupId, + compaction_statuses: &mut BTreeMap, + compaction_config: &CompactionConfig, + ) -> Result<()> { + if !compaction_statuses.contains_key(&compaction_group_id) { + let mut compact_statuses = BTreeMapTransaction::new(compaction_statuses); + let new_compact_status = compact_statuses.new_entry_insert_txn( + compaction_group_id, + CompactStatus::new(compaction_group_id, compaction_config.max_level), + ); + commit_multi_var!(self, None, Transaction::default(), new_compact_status)?; + } + + Ok(()) + } + pub async fn get_compact_task( &self, compaction_group_id: CompactionGroupId, + selector: &mut Box, ) -> Result> { fail_point!("fp_get_compact_task", |_| Err(Error::MetaStore( anyhow::anyhow!("failpoint metastore error") ))); + while let Some(task) = self - .get_compact_task_impl(compaction_group_id, None) + .get_compact_task_impl(compaction_group_id, selector) .await? { if let TaskStatus::Pending = task.task_status() { @@ -909,6 +923,7 @@ where } assert!(CompactStatus::is_trivial_move_task(&task)); } + Ok(None) } @@ -917,7 +932,9 @@ where compaction_group_id: CompactionGroupId, manual_compaction_option: ManualCompactionOption, ) -> Result> { - self.get_compact_task_impl(compaction_group_id, Some(manual_compaction_option)) + let mut selector: Box = + Box::new(ManualCompactionSelector::new(manual_compaction_option)); + self.get_compact_task(compaction_group_id, &mut selector) .await } @@ -1052,15 +1069,8 @@ where let deterministic_mode = self.env.opts.compaction_deterministic_test; let compaction = compaction_guard.deref_mut(); let start_time = Instant::now(); - let compaction_groups: HashSet<_> = - HashSet::from_iter(self.compaction_group_ids().await.into_iter()); let original_keys = compaction.compaction_statuses.keys().cloned().collect_vec(); let mut compact_statuses = BTreeMapTransaction::new(&mut compaction.compaction_statuses); - for group_id in original_keys { - if !compaction_groups.contains(&group_id) { - compact_statuses.remove(group_id); - } - } let assigned_task_num = compaction.compact_task_assignment.len(); let mut compact_task_assignment = BTreeMapTransaction::new(&mut compaction.compact_task_assignment); @@ -1109,6 +1119,13 @@ where let mut versioning_guard = write_lock!(self, versioning).await; let versioning = versioning_guard.deref_mut(); let current_version = &mut versioning.current_version; + // purge stale compact_status + for group_id in original_keys { + if !current_version.levels.contains_key(&group_id) { + compact_statuses.remove(group_id); + compaction.compaction_selectors.remove(&group_id); + } + } let is_success = if let TaskStatus::Success = compact_task.task_status() { let is_expired = !current_version .get_levels() @@ -1155,21 +1172,7 @@ where trigger_version_stat(&self.metrics, current_version, &versioning.version_stats); if !deterministic_mode { - self.env - .notification_manager() - .notify_hummock_without_version( - Operation::Add, - Info::HummockVersionDeltas( - risingwave_pb::hummock::HummockVersionDeltas { - version_deltas: vec![versioning - .hummock_version_deltas - .last_key_value() - .unwrap() - .1 - .clone()], - }, - ), - ); + self.notify_last_version_delta(versioning); } } else { // The compaction task is cancelled or failed. @@ -1184,7 +1187,8 @@ where } let task_status = compact_task.task_status(); - let task_label = task_status.as_str_name(); + let task_status_label = task_status.as_str_name(); + let task_type_label = compact_task.task_type().as_str_name(); if let Some(context_id) = assignee_context_id { // A task heartbeat is removed IFF we report the task status of a task and it still has // a valid assignment, OR we remove the node context from our list of nodes, @@ -1202,6 +1206,7 @@ where original_task_num: assigned_task_num, }); } + // Update compaction task count. // // A corner case is that the compactor is deleted @@ -1214,18 +1219,31 @@ where .with_label_values(&[ &format!("{}:{}", host.host, host.port), &compact_task.compaction_group_id.to_string(), - task_label, + task_type_label, + task_status_label, ]) .inc(); } } else { - // Update compaction task count. The task will be marked as `unassigned`. + // There are two cases where assignee_context_id is not available + // 1. compactor does not exist + // 2. trivial_move + + let label = if CompactStatus::is_trivial_move_task(compact_task) { + // TODO: only support can_trivial_move in DynamicLevelCompcation, will check + // task_type next PR + "trivial-move" + } else { + "unassigned" + }; + self.metrics .compact_frequency .with_label_values(&[ - "unassigned", + label, &compact_task.compaction_group_id.to_string(), - task_label, + task_type_label, + task_status_label, ]) .inc(); } @@ -1245,8 +1263,13 @@ where compact_task.compaction_group_id, ); - if !deterministic_mode { - self.try_send_compaction_request(compact_task.compaction_group_id); + if !deterministic_mode + && matches!(compact_task.task_type(), compact_task::TaskType::Dynamic) + { + self.try_send_compaction_request( + compact_task.compaction_group_id, + compact_task.task_type(), + ); } #[cfg(test)] @@ -1258,219 +1281,6 @@ where Ok(true) } - async fn sync_group<'a>( - &'a self, - compaction: Option<&'a Compaction>, - versioning: &'a mut Versioning, - compaction_groups: &HashMap, - trx_extern_part: &mut Option, - ) -> Result> { - // We need 2 steps to sync groups: - // Insert new groups that are not in current `HummockVersion`; - // Delete old groups that still remain in current `HummockVersion`. - let old_version = versioning.current_version.clone(); - let old_version_groups = old_version - .get_levels() - .iter() - .map(|(group_id, _)| *group_id) - .collect_vec(); - let new_version_id = old_version.id + 1; - let mut new_version_delta = BTreeMapEntryTransaction::new_insert( - &mut versioning.hummock_version_deltas, - new_version_id, - HummockVersionDelta { - prev_id: old_version.id, - safe_epoch: old_version.safe_epoch, - trivial_move: false, - ..Default::default() - }, - ); - - let mut new_hummock_version = old_version; - new_version_delta.id = new_version_id; - new_hummock_version.id = new_version_id; - - if old_version_groups - .iter() - .all(|group_id| compaction_groups.contains_key(group_id)) - && compaction_groups - .keys() - .all(|group_id| new_hummock_version.levels.contains_key(group_id)) - { - return Ok(Some(( - new_version_delta.key, - new_version_delta.new_value, - new_hummock_version, - ))); - } - - let mut deleted_compaction_groups = vec![]; - let mut branched_ssts = BTreeMapTransaction::new(&mut versioning.branched_ssts); - for group_id in old_version_groups { - if !compaction_groups.contains_key(&group_id) { - let group_deltas = &mut new_version_delta - .group_deltas - .entry(group_id) - .or_default() - .group_deltas; - let levels = new_hummock_version.get_levels().get(&group_id).unwrap(); - deleted_compaction_groups.push(group_id); - let mut gc_sst_ids = vec![]; - if let Some(ref l0) = levels.l0 { - for sub_level in l0.get_sub_levels() { - group_deltas.push(GroupDelta { - delta_type: Some(DeltaType::IntraLevel(IntraLevelDelta { - level_idx: sub_level.level_idx, - l0_sub_level_id: sub_level.sub_level_id, - removed_table_ids: sub_level - .get_table_infos() - .iter() - .map(|info| { - let id = info.id; - if drop_sst(&mut branched_ssts, group_id, id) { - gc_sst_ids.push(id); - } - id - }) - .collect(), - ..Default::default() - })), - }); - } - } - for level in &levels.levels { - group_deltas.push(GroupDelta { - delta_type: Some(DeltaType::IntraLevel(IntraLevelDelta { - level_idx: level.level_idx, - removed_table_ids: level - .get_table_infos() - .iter() - .map(|info| { - let id = info.id; - if drop_sst(&mut branched_ssts, group_id, id) { - gc_sst_ids.push(id); - } - id - }) - .collect(), - ..Default::default() - })), - }); - } - group_deltas.push(GroupDelta { - delta_type: Some(DeltaType::GroupDestroy(GroupDestroy {})), - }); - new_version_delta.gc_sst_ids.append(&mut gc_sst_ids); - new_hummock_version.levels.remove(&group_id); - } - } - let mut new_groups = vec![]; - let mut tasks_to_cancel = vec![]; - // these `group_id`s must be unique - for ( - group_id, - CompactionGroup { - compaction_config, - parent_group_id, - member_table_ids, - .. - }, - ) in compaction_groups.iter() - { - if !new_hummock_version.levels.contains_key(group_id) { - new_hummock_version - .levels - .try_insert( - *group_id, - ::build_initial_levels(compaction_config), - ) - .unwrap(); - new_groups.push(*group_id); - let group_deltas = &mut new_version_delta - .group_deltas - .entry(*group_id) - .or_default() - .group_deltas; - group_deltas.push(GroupDelta { - delta_type: Some(DeltaType::GroupConstruct(GroupConstruct { - group_config: Some(compaction_config.clone()), - parent_group_id: *parent_group_id, - table_ids: Vec::from_iter(member_table_ids.iter().cloned()), - })), - }); - let split_id_vers = new_hummock_version.init_with_parent_group( - *parent_group_id, - *group_id, - member_table_ids, - ); - if !split_id_vers.is_empty() && let Some(parent_compact_status) = - compaction.and_then(|compaction| compaction.compaction_statuses.get(parent_group_id)) - { - for (sst_id, _, level_idx) in &split_id_vers { - if let Some(level_handler) = parent_compact_status - .level_handlers - .get(*level_idx as usize) - { - if let Some(task_id) = level_handler.pending_task_id_by_sst(sst_id) { - tasks_to_cancel.push(task_id); - } - } - } - } - for (id, divide_ver, _) in split_id_vers { - match branched_ssts.get_mut(id) { - Some(mut entry) => { - *entry.get_mut(parent_group_id).unwrap() += 1; - entry.insert(*group_id, divide_ver); - } - None => branched_ssts.insert( - id, - [(*parent_group_id, divide_ver), (*group_id, divide_ver)] - .into_iter() - .collect(), - ), - } - } - } - } - tasks_to_cancel.sort(); - tasks_to_cancel.dedup(); - - new_version_delta.max_committed_epoch = new_hummock_version.max_committed_epoch; - commit_multi_var!( - self, - None, - trx_extern_part.take().unwrap_or_default(), - new_version_delta - )?; - branched_ssts.commit_memory(); - versioning.current_version = new_hummock_version; - - // This function MUST NOT fail from now on. - - self.env - .notification_manager() - .notify_hummock_without_version( - Operation::Add, - Info::HummockVersionDeltas(risingwave_pb::hummock::HummockVersionDeltas { - version_deltas: vec![versioning - .hummock_version_deltas - .last_key_value() - .unwrap() - .1 - .clone()], - }), - ); - - for compaction_group_id in deleted_compaction_groups { - remove_compaction_group_in_sst_stat(&self.metrics, compaction_group_id); - } - let mut manager_cancel = self.compaction_tasks_to_cancel.lock(); - manager_cancel.append(&mut tasks_to_cancel); - - Ok(None) - } - /// Caller should ensure `epoch` > `max_committed_epoch` #[named] pub async fn commit_epoch( @@ -1486,12 +1296,6 @@ where if versioning_guard.disable_commit_epochs { return Ok(None); } - let (raw_compaction_groups, compaction_group_index) = - self.compaction_groups_and_index().await; - let compaction_groups: HashMap<_, _> = raw_compaction_groups - .into_iter() - .map(|group| (group.group_id(), group)) - .collect(); let versioning = versioning_guard.deref_mut(); self.commit_epoch_sanity_check( @@ -1508,21 +1312,15 @@ where add_prost_table_stats_map(&mut table_stats_change, &std::mem::take(&mut s.table_stats)); } - let old_version = versioning.current_version.clone(); - let new_version_id = old_version.id + 1; + let old_version = &versioning.current_version; let mut new_version_delta = BTreeMapEntryTransaction::new_insert( &mut versioning.hummock_version_deltas, - new_version_id, - HummockVersionDelta { - prev_id: old_version.id, - safe_epoch: old_version.safe_epoch, - trivial_move: false, - ..Default::default() - }, + old_version.id + 1, + build_version_delta_after_version(old_version), ); - let mut new_hummock_version = old_version; - new_version_delta.id = new_version_id; - new_hummock_version.id = new_version_id; + new_version_delta.max_committed_epoch = epoch; + let mut new_hummock_version = old_version.clone(); + new_hummock_version.id = new_version_delta.id; let mut branched_ssts = BTreeMapTransaction::new(&mut versioning.branched_ssts); let mut branch_sstables = vec![]; sstables.retain_mut(|local_sst_info| { @@ -1531,20 +1329,24 @@ where sst_info: sst, .. } = local_sst_info; - let is_sst_belong_to_group_declared = match compaction_groups.get(compaction_group_id) { + let is_sst_belong_to_group_declared = match old_version.levels.get(compaction_group_id) + { Some(compaction_group) => sst .table_ids .iter() - .all(|t| compaction_group.member_table_ids().contains(t)), + .all(|t| compaction_group.member_table_ids.contains(t)), None => false, }; if !is_sst_belong_to_group_declared { let mut group_table_ids: BTreeMap<_, Vec<_>> = BTreeMap::new(); for table_id in sst.get_table_ids() { - match compaction_group_index.get(table_id) { + match try_get_compaction_group_id_by_table_id( + &versioning.current_version, + *table_id, + ) { Some(compaction_group_id) => { group_table_ids - .entry(*compaction_group_id) + .entry(compaction_group_id) .or_default() .push(*table_id); } @@ -1634,7 +1436,6 @@ where } // Create a new_version, possibly merely to bump up the version id and max_committed_epoch. - new_version_delta.max_committed_epoch = epoch; new_hummock_version.max_committed_epoch = epoch; // Apply stats changes. @@ -1676,26 +1477,14 @@ where tracing::trace!("new committed epoch {}", epoch); - self.env - .notification_manager() - .notify_hummock_without_version( - Operation::Add, - Info::HummockVersionDeltas(risingwave_pb::hummock::HummockVersionDeltas { - version_deltas: vec![versioning - .hummock_version_deltas - .last_key_value() - .unwrap() - .1 - .clone()], - }), - ); + self.notify_last_version_delta(versioning); drop(versioning_guard); // Don't trigger compactions if we enable deterministic compaction if !self.env.opts.compaction_deterministic_test { // commit_epoch may contains SSTs from any compaction group for id in modified_compaction_groups { - self.try_send_compaction_request(id); + self.try_send_compaction_request(id, compact_task::TaskType::Dynamic); } } #[cfg(test)] @@ -1748,6 +1537,7 @@ where .hummock_version_deltas .range((Excluded(old_checkpoint_id), Included(new_checkpoint_id))) { + assert_eq!(version_delta.prev_id, checkpoint.id); checkpoint.apply_version_delta(version_delta); } new_checkpoint_id = checkpoint.id; @@ -1755,10 +1545,10 @@ where return Ok(0); } commit_multi_var!(self, None, Transaction::default(), checkpoint)?; - versioning.extend_ssts_to_delete_from_deltas(( - Excluded(old_checkpoint_id), - Included(new_checkpoint_id), - )); + versioning.extend_ssts_to_delete_from_deltas( + (Excluded(old_checkpoint_id), Included(new_checkpoint_id)), + &self.metrics, + ); #[cfg(test)] { drop(versioning_guard); @@ -1817,6 +1607,7 @@ where } /// Gets current version without pinning it. + /// Should not be called inside [`HummockManager`], because it requests locks internally. #[named] pub async fn get_current_version(&self) -> HummockVersion { read_lock!(self, versioning).await.current_version.clone() @@ -1848,11 +1639,6 @@ where Ok(HummockVersionDeltas { version_deltas }) } - #[named] - pub async fn get_read_guard(&self) -> RwLockReadGuard<'_, Versioning> { - read_lock!(self, versioning).await - } - pub async fn init_metadata_for_version_replay( &self, table_catalogs: Vec, @@ -1861,17 +1647,29 @@ where for table in &table_catalogs { table.insert(self.env.meta_store()).await?; } + for group in &compaction_groups { + assert!( + group.id == StaticCompactionGroupId::NewCompactionGroup as u64 + || (group.id >= StaticCompactionGroupId::StateDefault as u64 + && group.id <= StaticCompactionGroupId::MaterializedView as u64), + "compaction group id should be either NewCompactionGroup to create new one, or predefined static ones." + ); + } - for group in compaction_groups { - let mut tables = vec![]; - for table_id in group.member_table_ids { - if let Some(option) = group.table_id_to_options.get(&table_id) { - tables.push((table_id as StateTableId, option.into())); - } + for group in &compaction_groups { + let mut pairs = vec![]; + for table_id in group.member_table_ids.clone() { + pairs.push((table_id as StateTableId, group.id)); } let group_config = group.compaction_config.clone().unwrap(); - self.register_new_group(group.id, group_config, &tables) - .await?; + self.compaction_group_manager + .write() + .await + .init_compaction_config_for_replay(group.id, group_config, self.env.meta_store()) + .await + .unwrap(); + self.register_table_ids(&pairs).await?; + tracing::info!("Registered table ids {:?}", pairs); } // Notify that tables have created @@ -1886,9 +1684,8 @@ where .await; } - let groups = self.compaction_groups().await; tracing::info!("Inited compaction groups:"); - for group in groups { + for group in compaction_groups { tracing::info!("{:?}", group); } Ok(()) @@ -1941,7 +1738,7 @@ where return Ok(()); } for compaction_group in compaction_groups { - self.try_send_compaction_request(compaction_group); + self.try_send_compaction_request(compaction_group, compact_task::TaskType::Dynamic); } Ok(()) } @@ -1990,9 +1787,13 @@ where } /// Sends a compaction request to compaction scheduler. - pub fn try_send_compaction_request(&self, compaction_group: CompactionGroupId) -> bool { + pub fn try_send_compaction_request( + &self, + compaction_group: CompactionGroupId, + task_type: compact_task::TaskType, + ) -> bool { if let Some(sender) = self.compaction_request_channel.read().as_ref() { - match sender.try_sched_compaction(compaction_group) { + match sender.try_sched_compaction(compaction_group, task_type) { Ok(_) => true, Err(e) => { tracing::error!( @@ -2122,6 +1923,22 @@ where pub fn cluster_manager(&self) -> &ClusterManagerRef { &self.cluster_manager } + + fn notify_last_version_delta(&self, versioning: &mut Versioning) { + self.env + .notification_manager() + .notify_hummock_without_version( + Operation::Add, + Info::HummockVersionDeltas(risingwave_pb::hummock::HummockVersionDeltas { + version_deltas: vec![versioning + .hummock_version_deltas + .last_key_value() + .unwrap() + .1 + .clone()], + }), + ); + } } fn drop_sst( @@ -2152,6 +1969,7 @@ fn gen_version_delta<'a>( deterministic_mode: bool, ) -> HummockVersionDelta { let mut version_delta = HummockVersionDelta { + id: old_version.id + 1, prev_id: old_version.id, max_committed_epoch: old_version.max_committed_epoch, trivial_move, @@ -2196,9 +2014,8 @@ fn gen_version_delta<'a>( group_deltas.push(group_delta); version_delta.gc_sst_ids.append(&mut gc_sst_ids); version_delta.safe_epoch = std::cmp::max(old_version.safe_epoch, compact_task.watermark); - version_delta.id = old_version.id + 1; // Don't persist version delta generated by compaction to meta store in deterministic mode. - // Because it will overwrite existing version delta that has same ID generated in the data + // Because it will override existing version delta that has same ID generated in the data // ingestion phase. if !deterministic_mode { txn.insert(version_delta.id, version_delta.clone()); diff --git a/src/meta/src/hummock/manager/tests.rs b/src/meta/src/hummock/manager/tests.rs index 91ee71ad815f5..5e52982907a8d 100644 --- a/src/meta/src/hummock/manager/tests.rs +++ b/src/meta/src/hummock/manager/tests.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,7 +19,9 @@ use std::sync::Arc; use itertools::Itertools; use risingwave_common::util::epoch::INVALID_EPOCH; use risingwave_hummock_sdk::compact::compact_task_to_string; -use risingwave_hummock_sdk::compaction_group::hummock_version_ext::HummockVersionExt; +use risingwave_hummock_sdk::compaction_group::hummock_version_ext::{ + get_compaction_group_ids, HummockVersionExt, +}; use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; use risingwave_hummock_sdk::table_stats::{to_prost_table_stats_map, TableStats, TableStatsMap}; // use risingwave_hummock_sdk::key_range::KeyRange; @@ -28,12 +30,12 @@ use risingwave_hummock_sdk::{ }; use risingwave_pb::common::{HostAddress, WorkerType}; use risingwave_pb::hummock::compact_task::TaskStatus; -use risingwave_pb::hummock::pin_version_response::Payload; +use risingwave_pb::hummock::version_update_payload::Payload; use risingwave_pb::hummock::{ HummockPinnedSnapshot, HummockPinnedVersion, HummockSnapshot, KeyRange, SstableInfo, }; -use crate::hummock::compaction::ManualCompactionOption; +use crate::hummock::compaction::{default_level_selector, ManualCompactionOption}; use crate::hummock::error::Error; use crate::hummock::test_utils::*; use crate::hummock::{start_compaction_scheduler, CompactionScheduler, HummockManagerRef}; @@ -113,7 +115,10 @@ async fn test_hummock_compaction_task() { // No compaction task available. assert!(hummock_manager - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap() .is_none()); @@ -137,7 +142,10 @@ async fn test_hummock_compaction_task() { // Get a compaction task. let mut compact_task = hummock_manager - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap() .unwrap(); @@ -175,7 +183,10 @@ async fn test_hummock_compaction_task() { let compactor = hummock_manager.get_idle_compactor().await.unwrap(); // Get a compaction task. let mut compact_task = hummock_manager - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap() .unwrap(); @@ -437,6 +448,8 @@ async fn test_hummock_manager_basic() { ); let mut epoch = 1; + let mut register_log_count = 0; + let mut commit_log_count = 0; let commit_one = |epoch: HummockEpoch, hummock_manager: HummockManagerRef| async move { let original_tables = generate_test_tables(epoch, get_sst_ids(&hummock_manager, 2).await); register_sstable_infos_to_compaction_group( @@ -455,14 +468,16 @@ async fn test_hummock_manager_basic() { }; commit_one(epoch, hummock_manager.clone()).await; + register_log_count += 1; + commit_log_count += 1; epoch += 1; - let sync_group_version_id = FIRST_VERSION_ID; + let init_version_id = FIRST_VERSION_ID; // increased version id assert_eq!( hummock_manager.get_current_version().await.id, - sync_group_version_id + 1 + init_version_id + commit_log_count + register_log_count ); // min pinned version id if no clients @@ -487,15 +502,19 @@ async fn test_hummock_manager_basic() { } Payload::PinnedVersion(version) => version, }; - assert_eq!(version.get_id(), sync_group_version_id + 1); + assert_eq!( + version.get_id(), + init_version_id + commit_log_count + register_log_count + ); assert_eq!( hummock_manager.get_min_pinned_version_id().await, - sync_group_version_id + 1 + init_version_id + commit_log_count + register_log_count ); } commit_one(epoch, hummock_manager.clone()).await; - // epoch += 1; + commit_log_count += 1; + register_log_count += 1; for _ in 0..2 { // should pin latest because deltas cannot contain INVALID_EPOCH @@ -505,11 +524,14 @@ async fn test_hummock_manager_basic() { } Payload::PinnedVersion(version) => version, }; - assert_eq!(version.get_id(), sync_group_version_id + 2); + assert_eq!( + version.get_id(), + init_version_id + commit_log_count + register_log_count + ); // pinned by context_id_1 assert_eq!( hummock_manager.get_min_pinned_version_id().await, - sync_group_version_id + 1 + init_version_id + commit_log_count + register_log_count - 2, ); } @@ -524,7 +546,7 @@ async fn test_hummock_manager_basic() { ); assert_eq!( hummock_manager.proceed_version_checkpoint().await.unwrap(), - sync_group_version_id + commit_log_count + register_log_count - 2 ); assert!(hummock_manager.get_ssts_to_delete().await.is_empty()); assert_eq!( @@ -532,7 +554,7 @@ async fn test_hummock_manager_basic() { .delete_version_deltas(usize::MAX) .await .unwrap(), - (sync_group_version_id as usize, 0) + ((commit_log_count + register_log_count - 2) as usize, 0) ); hummock_manager @@ -541,7 +563,7 @@ async fn test_hummock_manager_basic() { .unwrap(); assert_eq!( hummock_manager.get_min_pinned_version_id().await, - sync_group_version_id + 2 + init_version_id + commit_log_count + register_log_count ); assert!(hummock_manager.get_ssts_to_delete().await.is_empty()); assert_eq!( @@ -553,7 +575,7 @@ async fn test_hummock_manager_basic() { ); assert_eq!( hummock_manager.proceed_version_checkpoint().await.unwrap(), - 1 + 2 ); assert!(hummock_manager.get_ssts_to_delete().await.is_empty()); assert_eq!( @@ -561,7 +583,7 @@ async fn test_hummock_manager_basic() { .delete_version_deltas(usize::MAX) .await .unwrap(), - (1, 0) + (2, 0) ); hummock_manager @@ -701,7 +723,10 @@ async fn test_print_compact_task() { // Get a compaction task. let compact_task = hummock_manager - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap() .unwrap(); @@ -857,7 +882,7 @@ async fn test_trigger_compaction_deterministic() { let _ = add_test_tables(&hummock_manager, context_id).await; let cur_version = hummock_manager.get_current_version().await; - let compaction_groups = hummock_manager.compaction_group_ids().await; + let compaction_groups = get_compaction_group_ids(&cur_version); let ret = hummock_manager .trigger_compaction_deterministic(cur_version.id, compaction_groups) @@ -887,7 +912,10 @@ async fn test_hummock_compaction_task_heartbeat() { // No compaction task available. assert!(hummock_manager - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap() .is_none()); @@ -912,7 +940,10 @@ async fn test_hummock_compaction_task_heartbeat() { let compactor = hummock_manager.get_idle_compactor().await.unwrap(); // Get a compaction task. let mut compact_task = hummock_manager - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap() .unwrap(); @@ -958,7 +989,10 @@ async fn test_hummock_compaction_task_heartbeat() { let compactor = hummock_manager.get_idle_compactor().await.unwrap(); // Get a compaction task. let mut compact_task = hummock_manager - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap() .unwrap(); @@ -1005,7 +1039,10 @@ async fn test_hummock_compaction_task_heartbeat_removal_on_node_removal() { // No compaction task available. assert!(hummock_manager - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap() .is_none()); @@ -1030,7 +1067,10 @@ async fn test_hummock_compaction_task_heartbeat_removal_on_node_removal() { let compactor = hummock_manager.get_idle_compactor().await.unwrap(); // Get a compaction task. let compact_task = hummock_manager - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap() .unwrap(); @@ -1110,7 +1150,7 @@ async fn test_extend_ssts_to_delete() { // Checkpoint assert_eq!( hummock_manager.proceed_version_checkpoint().await.unwrap(), - 3 + 6 ); assert_eq!( hummock_manager @@ -1118,7 +1158,6 @@ async fn test_extend_ssts_to_delete() { .await, orphan_sst_num as usize ); - // Another 3 SSTs from useless delta logs after checkpoint assert_eq!( hummock_manager.get_ssts_to_delete().await.len(), orphan_sst_num as usize + 3 @@ -1165,7 +1204,6 @@ async fn test_version_stats() { table_stats: table_ids .iter() .map(|table_id| (*table_id, table_stats_change.clone())) - .into_iter() .collect(), }) .collect_vec(); @@ -1199,7 +1237,10 @@ async fn test_version_stats() { .add_compactor(worker_node.id, u64::MAX); let compactor = hummock_manager.get_idle_compactor().await.unwrap(); let mut compact_task = hummock_manager - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap() .unwrap(); diff --git a/src/meta/src/hummock/manager/versioning.rs b/src/meta/src/hummock/manager/versioning.rs index 5274aeabd0057..c2d0e9a63a7cc 100644 --- a/src/meta/src/hummock/manager/versioning.rs +++ b/src/meta/src/hummock/manager/versioning.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -29,8 +29,9 @@ use risingwave_pb::hummock::{ use crate::hummock::manager::worker::{HummockManagerEvent, HummockManagerEventSender}; use crate::hummock::manager::{read_lock, write_lock}; -use crate::hummock::metrics_utils::trigger_safepoint_stat; +use crate::hummock::metrics_utils::{trigger_safepoint_stat, trigger_stale_ssts_stat}; use crate::hummock::HummockManager; +use crate::rpc::metrics::MetaMetrics; use crate::storage::MetaStore; /// `HummockVersionSafePoint` prevents hummock versions GE than it from being GC. @@ -105,9 +106,18 @@ impl Versioning { min_pinned_version_id } + pub fn extend_ssts_to_delete_from_deltas( + &mut self, + delta_range: impl RangeBounds, + metric: &MetaMetrics, + ) { + self.extend_ssts_to_delete_from_deltas_impl(delta_range); + trigger_stale_ssts_stat(metric, self.ssts_to_delete.len()); + } + /// Extends `ssts_to_delete` according to given deltas. /// Possibly extends `deltas_to_delete`. - pub fn extend_ssts_to_delete_from_deltas( + fn extend_ssts_to_delete_from_deltas_impl( &mut self, delta_range: impl RangeBounds, ) { @@ -217,7 +227,7 @@ mod tests { }, ); assert_eq!(versioning.deltas_to_delete.len(), 0); - versioning.extend_ssts_to_delete_from_deltas(1..=2); + versioning.extend_ssts_to_delete_from_deltas_impl(1..=2); assert_eq!(versioning.deltas_to_delete.len(), 1); } diff --git a/src/meta/src/hummock/manager/worker.rs b/src/meta/src/hummock/manager/worker.rs index 77f0a9bb2d214..a4d1031320c04 100644 --- a/src/meta/src/hummock/manager/worker.rs +++ b/src/meta/src/hummock/manager/worker.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/hummock/metrics_utils.rs b/src/meta/src/hummock/metrics_utils.rs index 93683871c31a1..0efac54535e5a 100644 --- a/src/meta/src/hummock/metrics_utils.rs +++ b/src/meta/src/hummock/metrics_utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -40,6 +40,7 @@ pub fn trigger_version_stat( .set(current_version.encoded_len() as i64); metrics.safe_epoch.set(current_version.safe_epoch as i64); metrics.current_version_id.set(current_version.id as i64); + metrics.version_stats.reset(); for (table_id, stats) in &version_stats.table_stats { let table_id = format!("{}", table_id); metrics @@ -217,3 +218,7 @@ pub fn trigger_safepoint_stat(metrics: &MetaMetrics, safepoints: &[HummockVersio .set(HummockVersionId::MAX as _); } } + +pub fn trigger_stale_ssts_stat(metrics: &MetaMetrics, total_number: usize) { + metrics.stale_ssts_count.set(total_number as _); +} diff --git a/src/meta/src/hummock/mock_hummock_meta_client.rs b/src/meta/src/hummock/mock_hummock_meta_client.rs index a76330e413f37..c7b7c279aa05e 100644 --- a/src/meta/src/hummock/mock_hummock_meta_client.rs +++ b/src/meta/src/hummock/mock_hummock_meta_client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ use risingwave_hummock_sdk::{ use risingwave_pb::common::{HostAddress, WorkerType}; use risingwave_pb::hummock::subscribe_compact_tasks_response::Task; use risingwave_pb::hummock::{ - CompactTask, CompactTaskProgress, CompactionGroup, HummockSnapshot, HummockVersion, + compact_task, CompactTask, CompactTaskProgress, HummockSnapshot, HummockVersion, SubscribeCompactTasksResponse, VacuumTask, }; use risingwave_rpc_client::error::{Result, RpcError}; @@ -37,6 +37,9 @@ use risingwave_rpc_client::{CompactTaskItem, HummockMetaClient}; use tokio::task::JoinHandle; use tokio_stream::wrappers::UnboundedReceiverStream; +use crate::hummock::compaction::{ + default_level_selector, LevelSelector, SpaceReclaimCompactionSelector, +}; use crate::hummock::compaction_scheduler::CompactionRequestChannel; use crate::hummock::HummockManager; use crate::storage::MemStore; @@ -61,7 +64,10 @@ impl MockHummockMetaClient { pub async fn get_compact_task(&self) -> Option { self.hummock_manager - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap_or(None) } @@ -195,10 +201,19 @@ impl HummockMetaClient for MockHummockMetaClient { let hummock_manager_compact = self.hummock_manager.clone(); let (task_tx, task_rx) = tokio::sync::mpsc::unbounded_channel(); let handle = tokio::spawn(async move { - while let Some(group) = sched_rx.recv().await { + while let Some((group, task_type)) = sched_rx.recv().await { sched_channel.unschedule(group); + + let mut selector: Box = match task_type { + compact_task::TaskType::Dynamic => default_level_selector(), + compact_task::TaskType::SpaceReclaim => { + Box::::default() + } + + _ => panic!("Error type when mock_hummock_meta_client subscribe_compact_tasks"), + }; if let Some(task) = hummock_manager_compact - .get_compact_task(group) + .get_compact_task(group, &mut selector) .await .unwrap() { @@ -231,10 +246,6 @@ impl HummockMetaClient for MockHummockMetaClient { Ok(()) } - async fn get_compaction_groups(&self) -> Result> { - todo!() - } - async fn trigger_manual_compaction( &self, _compaction_group_id: u64, diff --git a/src/meta/src/hummock/mod.rs b/src/meta/src/hummock/mod.rs index 92736813a229a..8801012eb27ca 100644 --- a/src/meta/src/hummock/mod.rs +++ b/src/meta/src/hummock/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ // limitations under the License. pub mod compaction; -pub mod compaction_group; mod compaction_schedule_policy; mod compaction_scheduler; pub mod compactor_manager; @@ -25,7 +24,7 @@ mod level_handler; mod metrics_utils; #[cfg(any(test, feature = "test"))] pub mod mock_hummock_meta_client; -mod model; +pub mod model; #[cfg(any(test, feature = "test"))] pub mod test_utils; mod utils; diff --git a/src/meta/src/hummock/model/compact_task_assignment.rs b/src/meta/src/hummock/model/compact_task_assignment.rs index 6e924d43d40c1..a9a0f72429f1b 100644 --- a/src/meta/src/hummock/model/compact_task_assignment.rs +++ b/src/meta/src/hummock/model/compact_task_assignment.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,11 +16,9 @@ use prost::Message; use risingwave_hummock_sdk::HummockCompactionTaskId; use risingwave_pb::hummock::CompactTaskAssignment; +use crate::hummock::model::HUMMOCK_COMPACT_TASK_ASSIGNMENT; use crate::model::{MetadataModel, MetadataModelResult}; -/// `cf(compact_task_assignment)`: `CompactTaskId` -> `CompactTaskAssignment` -const HUMMOCK_COMPACT_TASK_ASSIGNMENT: &str = "cf/compact_task_assignment"; - /// `AssignedCompactTasks` tracks compact tasks assigned to context id. impl MetadataModel for CompactTaskAssignment { type KeyType = HummockCompactionTaskId; diff --git a/src/meta/src/hummock/compaction_group/mod.rs b/src/meta/src/hummock/model/compaction_group_config.rs similarity index 50% rename from src/meta/src/hummock/compaction_group/mod.rs rename to src/meta/src/hummock/model/compaction_group_config.rs index d7ba34c558e4f..8d16cc01d8229 100644 --- a/src/meta/src/hummock/compaction_group/mod.rs +++ b/src/meta/src/hummock/model/compaction_group_config.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,33 +13,26 @@ // limitations under the License. use std::borrow::Borrow; -use std::collections::{HashMap, HashSet}; +use std::sync::Arc; -use itertools::Itertools; pub use risingwave_common::catalog::TableOption; -use risingwave_hummock_sdk::compaction_group::{StateTableId, StaticCompactionGroupId}; use risingwave_hummock_sdk::CompactionGroupId; use risingwave_pb::hummock::CompactionConfig; +use crate::hummock::model::HUMMOCK_COMPACTION_GROUP_CONFIG_CF_NAME; use crate::model::{MetadataModel, MetadataModelResult}; #[derive(Debug, Clone, PartialEq)] pub struct CompactionGroup { pub(crate) group_id: CompactionGroupId, - pub(crate) parent_group_id: CompactionGroupId, - pub(crate) member_table_ids: HashSet, - pub(crate) compaction_config: CompactionConfig, - pub(crate) table_id_to_options: HashMap, + pub(crate) compaction_config: Arc, } impl CompactionGroup { pub fn new(group_id: CompactionGroupId, compaction_config: CompactionConfig) -> Self { Self { group_id, - member_table_ids: Default::default(), - compaction_config, - table_id_to_options: HashMap::default(), - parent_group_id: StaticCompactionGroupId::NewCompactionGroup as CompactionGroupId, + compaction_config: Arc::new(compaction_config), } } @@ -47,35 +40,22 @@ impl CompactionGroup { self.group_id } - pub fn member_table_ids(&self) -> &HashSet { - &self.member_table_ids - } - - pub fn compaction_config(&self) -> CompactionConfig { + pub fn compaction_config(&self) -> Arc { self.compaction_config.clone() } - - pub fn table_id_to_options(&self) -> &HashMap { - &self.table_id_to_options - } } impl From<&risingwave_pb::hummock::CompactionGroup> for CompactionGroup { fn from(compaction_group: &risingwave_pb::hummock::CompactionGroup) -> Self { Self { group_id: compaction_group.id, - parent_group_id: compaction_group.parent_id, - member_table_ids: compaction_group.member_table_ids.iter().cloned().collect(), - compaction_config: compaction_group - .compaction_config - .as_ref() - .cloned() - .unwrap(), - table_id_to_options: compaction_group - .table_id_to_options - .iter() - .map(|id_to_table_option| (*id_to_table_option.0, id_to_table_option.1.into())) - .collect::>(), + compaction_config: Arc::new( + compaction_group + .compaction_config + .as_ref() + .cloned() + .unwrap(), + ), } } } @@ -84,30 +64,17 @@ impl From<&CompactionGroup> for risingwave_pb::hummock::CompactionGroup { fn from(compaction_group: &CompactionGroup) -> Self { Self { id: compaction_group.group_id, - parent_id: compaction_group.parent_group_id, - member_table_ids: compaction_group - .member_table_ids - .iter() - .cloned() - .collect_vec(), - compaction_config: Some(compaction_group.compaction_config.clone()), - table_id_to_options: compaction_group - .table_id_to_options - .iter() - .map(|id_to_table_option| (*id_to_table_option.0, id_to_table_option.1.into())) - .collect::>(), + compaction_config: Some(compaction_group.compaction_config.as_ref().clone()), } } } -const HUMMOCK_COMPACTION_GROUP_CF_NAME: &str = "cf/hummock_compaction_group"; - impl MetadataModel for CompactionGroup { type KeyType = CompactionGroupId; type ProstType = risingwave_pb::hummock::CompactionGroup; fn cf_name() -> String { - String::from(HUMMOCK_COMPACTION_GROUP_CF_NAME) + String::from(HUMMOCK_COMPACTION_GROUP_CONFIG_CF_NAME) } fn to_protobuf(&self) -> Self::ProstType { diff --git a/src/meta/src/hummock/compaction/prost_type.rs b/src/meta/src/hummock/model/compaction_status.rs similarity index 94% rename from src/meta/src/hummock/compaction/prost_type.rs rename to src/meta/src/hummock/model/compaction_status.rs index 20a9433e3431d..6ffa1553565f2 100644 --- a/src/meta/src/hummock/compaction/prost_type.rs +++ b/src/meta/src/hummock/model/compaction_status.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,10 +18,9 @@ use itertools::Itertools; use risingwave_hummock_sdk::CompactionGroupId; use crate::hummock::compaction::CompactStatus; +use crate::hummock::model::HUMMOCK_COMPACTION_STATUS_CF_NAME; use crate::model::{MetadataModel, MetadataModelResult}; -const HUMMOCK_COMPACTION_STATUS_CF_NAME: &str = "cf/hummock_compaction_status"; - impl MetadataModel for CompactStatus { type KeyType = CompactionGroupId; type ProstType = risingwave_pb::hummock::CompactStatus; diff --git a/src/meta/src/hummock/model/mod.rs b/src/meta/src/hummock/model/mod.rs index 05d145401beb3..a2e5d1748f351 100644 --- a/src/meta/src/hummock/model/mod.rs +++ b/src/meta/src/hummock/model/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,13 +13,28 @@ // limitations under the License. mod compact_task_assignment; +mod compaction_group_config; +mod compaction_status; mod pinned_snapshot; mod pinned_version; mod version; mod version_delta; mod version_stats; +pub use compaction_group_config::CompactionGroup; +pub use compaction_status::*; pub use pinned_snapshot::*; pub use pinned_version::*; pub use version::*; pub use version_delta::*; + +/// Column family names for hummock. +/// Deprecated `cf_name` should be reserved for backward compatibility. +const HUMMOCK_VERSION_CF_NAME: &str = "cf/hummock_0"; +const HUMMOCK_VERSION_DELTA_CF_NAME: &str = "cf/hummock_1"; +const HUMMOCK_PINNED_VERSION_CF_NAME: &str = "cf/hummock_2"; +const HUMMOCK_PINNED_SNAPSHOT_CF_NAME: &str = "cf/hummock_3"; +const HUMMOCK_COMPACTION_STATUS_CF_NAME: &str = "cf/hummock_4"; +const HUMMOCK_COMPACT_TASK_ASSIGNMENT: &str = "cf/hummock_5"; +const HUMMOCK_COMPACTION_GROUP_CONFIG_CF_NAME: &str = "cf/hummock_6"; +const HUMMOCK_VERSION_STATS_CF_NAME: &str = "cf/hummock_7"; diff --git a/src/meta/src/hummock/model/pinned_snapshot.rs b/src/meta/src/hummock/model/pinned_snapshot.rs index d282909e1d680..8c153844355fa 100644 --- a/src/meta/src/hummock/model/pinned_snapshot.rs +++ b/src/meta/src/hummock/model/pinned_snapshot.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,12 +16,9 @@ use prost::Message; use risingwave_hummock_sdk::HummockContextId; use risingwave_pb::hummock::HummockPinnedSnapshot; +use crate::hummock::model::HUMMOCK_PINNED_SNAPSHOT_CF_NAME; use crate::model::{MetadataModel, MetadataModelResult}; -/// Column family name for hummock pinned snapshot -/// `cf(hummock_pinned_snapshot)`: `HummockContextId` -> `HummockPinnedSnapshot` -const HUMMOCK_PINNED_SNAPSHOT_CF_NAME: &str = "cf/hummock_pinned_snapshot"; - /// `HummockPinnedSnapshot` tracks pinned snapshots by given context id. impl MetadataModel for HummockPinnedSnapshot { type KeyType = HummockContextId; diff --git a/src/meta/src/hummock/model/pinned_version.rs b/src/meta/src/hummock/model/pinned_version.rs index 85cd0e3c2722a..020532166ca9e 100644 --- a/src/meta/src/hummock/model/pinned_version.rs +++ b/src/meta/src/hummock/model/pinned_version.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,12 +16,9 @@ use prost::Message; use risingwave_hummock_sdk::HummockContextId; use risingwave_pb::hummock::HummockPinnedVersion; +use crate::hummock::model::HUMMOCK_PINNED_VERSION_CF_NAME; use crate::model::{MetadataModel, MetadataModelResult}; -/// Column family name for hummock pinned version -/// `cf(hummock_pinned_version)`: `HummockContextId` -> `HummockPinnedVersion` -const HUMMOCK_PINNED_VERSION_CF_NAME: &str = "cf/hummock_pinned_version"; - /// `HummockPinnedVersion` tracks pinned versions by given context id. impl MetadataModel for HummockPinnedVersion { type KeyType = HummockContextId; diff --git a/src/meta/src/hummock/model/version.rs b/src/meta/src/hummock/model/version.rs index 21aa9656fc6e9..0ecd42ce5d39a 100644 --- a/src/meta/src/hummock/model/version.rs +++ b/src/meta/src/hummock/model/version.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,12 +16,9 @@ use prost::Message; use risingwave_hummock_sdk::HummockVersionId; use risingwave_pb::hummock::HummockVersion; +use crate::hummock::model::HUMMOCK_VERSION_CF_NAME; use crate::model::{MetadataModel, MetadataModelResult}; -/// Column family name for hummock version. -/// `cf(hummock_version)`: `HummockVersionId` -> `HummockVersion` -const HUMMOCK_VERSION_CF_NAME: &str = "cf/hummock_version"; - /// `HummockVersion` tracks `Sstables` in given version. impl MetadataModel for HummockVersion { type KeyType = HummockVersionId; diff --git a/src/meta/src/hummock/model/version_delta.rs b/src/meta/src/hummock/model/version_delta.rs index c0f472aa99be5..5b2f541a4e5a5 100644 --- a/src/meta/src/hummock/model/version_delta.rs +++ b/src/meta/src/hummock/model/version_delta.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,12 +16,9 @@ use prost::Message; use risingwave_hummock_sdk::HummockVersionId; use risingwave_pb::hummock::HummockVersionDelta; +use crate::hummock::model::HUMMOCK_VERSION_DELTA_CF_NAME; use crate::model::{MetadataModel, MetadataModelResult}; -/// Column family name for hummock version delta. -/// `cf(hummock_version_delta)`: `HummockVersionId` -> `HummockVersionDelta` -const HUMMOCK_VERSION_DELTA_CF_NAME: &str = "cf/hummock_version_delta"; - /// `HummockVersionDelta` tracks delta of `Sstables` in given version based on previous version. impl MetadataModel for HummockVersionDelta { type KeyType = HummockVersionId; diff --git a/src/meta/src/hummock/model/version_stats.rs b/src/meta/src/hummock/model/version_stats.rs index f0a38aa0ed481..7e3f71e5204d4 100644 --- a/src/meta/src/hummock/model/version_stats.rs +++ b/src/meta/src/hummock/model/version_stats.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,11 +16,9 @@ use prost::Message; use risingwave_hummock_sdk::HummockVersionId; use risingwave_pb::hummock::HummockVersionStats; +use crate::hummock::model::HUMMOCK_VERSION_STATS_CF_NAME; use crate::model::{MetadataModel, MetadataModelResult}; -/// `cf(hummock_table_stats)`: `HummockVersionId` -> `TableStatsMap` -const HUMMOCK_VERSION_STATS_CF_NAME: &str = "cf/hummock_version_stats"; - /// `HummockVersionStats` stores stats for hummock version. /// Currently it only persists one row for latest version. impl MetadataModel for HummockVersionStats { diff --git a/src/meta/src/hummock/test_utils.rs b/src/meta/src/hummock/test_utils.rs index ee2786f25cb5a..0a74ded9267b4 100644 --- a/src/meta/src/hummock/test_utils.rs +++ b/src/meta/src/hummock/test_utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ use risingwave_pb::hummock::{ }; use crate::hummock::compaction::compaction_config::CompactionConfigBuilder; -use crate::hummock::compaction_group::TableOption; +use crate::hummock::compaction::default_level_selector; use crate::hummock::{CompactorManager, HummockManager, HummockManagerRef}; use crate::manager::{ClusterManager, ClusterManagerRef, MetaSrvEnv, META_NODE_ID}; use crate::rpc::metrics::MetaMetrics; @@ -90,8 +90,9 @@ where temp_compactor = true; } let compactor = hummock_manager.get_idle_compactor().await.unwrap(); + let mut selector = default_level_selector(); let mut compact_task = hummock_manager - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task(StaticCompactionGroupId::StateDefault.into(), &mut selector) .await .unwrap() .unwrap(); @@ -157,7 +158,7 @@ pub fn generate_test_tables(epoch: u64, sst_ids: Vec) -> Vec( { hummock_manager_ref .register_table_ids( - &mut table_ids + &table_ids .iter() - .map(|table_id| (*table_id, compaction_group_id, TableOption::default())) + .map(|table_id| (*table_id, compaction_group_id)) .collect_vec(), ) .await @@ -305,8 +306,7 @@ pub async fn setup_compute_env_with_config( compactor_manager, config, ) - .await - .unwrap(); + .await; let fake_host_address = HostAddress { host: "127.0.0.1".to_string(), port, diff --git a/src/meta/src/hummock/utils.rs b/src/meta/src/hummock/utils.rs index adc7063410a20..91ff13e4044f9 100644 --- a/src/meta/src/hummock/utils.rs +++ b/src/meta/src/hummock/utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/hummock/vacuum.rs b/src/meta/src/hummock/vacuum.rs index 08556638438e9..0fb7d73e5e2e9 100644 --- a/src/meta/src/hummock/vacuum.rs +++ b/src/meta/src/hummock/vacuum.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -377,7 +377,7 @@ mod tests { // Makes checkpoint and extends deltas_to_delete. Deletes deltas of v0->v1 and v2->v3. // Delta of v1->v2 cannot be deleted yet because it's used by ssts_to_delete. - assert_eq!(VacuumManager::vacuum_metadata(&vacuum).await.unwrap(), 2); + assert_eq!(VacuumManager::vacuum_metadata(&vacuum).await.unwrap(), 5); // No SST deletion is scheduled because no available worker. assert_eq!( VacuumManager::vacuum_sst_data(&vacuum).await.unwrap().len(), @@ -461,7 +461,7 @@ mod tests { panic!() } }; - // min_sst_retention_time_sec overwrite user provided value. + // min_sst_retention_time_sec override user provided value. assert_eq!( vacuum.env.opts.min_sst_retention_time_sec, full_scan_task.sst_retention_time_sec @@ -479,7 +479,7 @@ mod tests { panic!() } }; - // min_sst_retention_time_sec doesn't overwrite user provided value. + // min_sst_retention_time_sec doesn't override user provided value. assert_eq!( vacuum.env.opts.min_sst_retention_time_sec + 1, full_scan_task.sst_retention_time_sec diff --git a/src/meta/src/lib.rs b/src/meta/src/lib.rs index ffa6bb6461d7c..af4f59f4f9732 100644 --- a/src/meta/src/lib.rs +++ b/src/meta/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -48,98 +48,146 @@ mod stream; use std::time::Duration; -use clap::{ArgEnum, Parser}; +use clap::Parser; pub use error::{MetaError, MetaResult}; +use risingwave_common::{GIT_SHA, RW_VERSION}; +use risingwave_common_proc_macro::OverrideConfig; use crate::manager::MetaOpts; use crate::rpc::server::{rpc_serve, AddressInfo, MetaStoreBackend}; -#[derive(Copy, Clone, Debug, ArgEnum)] -pub enum Backend { - Mem, - Etcd, -} - #[derive(Debug, Clone, Parser)] pub struct MetaNodeOpts { // TODO: rename to listen_address and separate out the port. - #[clap(long, default_value = "127.0.0.1:5690")] + #[clap(long, env = "RW_LISTEN_ADDR", default_value = "127.0.0.1:5690")] listen_addr: String, - #[clap(long)] + /// Deprecated. But we keep it for backward compatibility. + #[clap(long, env = "RW_HOST")] host: Option, - #[clap(long)] - endpoint: Option, + /// The address for contacting this instance of the service. + /// This would be synonymous with the service's "public address" + /// or "identifying address". + /// It will serve as a unique identifier in cluster + /// membership and leader election. Must be specified for etcd backend. + /// TODO: After host is removed, we require that this parameter must be provided when using + /// etcd + #[clap(long, env = "RW_ADVERTISE_ADDR")] + advertise_addr: Option, - #[clap(long)] + #[clap(long, env = "RW_DASHBOARD_HOST")] dashboard_host: Option, - #[clap(long)] + #[clap(long, env = "RW_PROMETHEUS_HOST")] prometheus_host: Option, - #[clap(long, arg_enum, default_value_t = Backend::Mem)] - backend: Backend, - - #[clap(long, default_value_t = String::from(""))] + #[clap(long, env = "RW_ETCD_ENDPOINTS", default_value_t = String::from(""))] etcd_endpoints: String, /// Enable authentication with etcd. By default disabled. - #[clap(long)] + #[clap(long, env = "RW_ETCD_AUTH")] etcd_auth: bool, /// Username of etcd, required when --etcd-auth is enabled. - /// Default value is read from the 'ETCD_USERNAME' environment variable. - #[clap(long, env = "ETCD_USERNAME", default_value = "")] + #[clap(long, env = "RW_ETCD_USERNAME", default_value = "")] etcd_username: String, /// Password of etcd, required when --etcd-auth is enabled. - /// Default value is read from the 'ETCD_PASSWORD' environment variable. - #[clap(long, env = "ETCD_PASSWORD", default_value = "")] + #[clap(long, env = "RW_ETCD_PASSWORD", default_value = "")] etcd_password: String, - #[clap(long)] + #[clap(long, env = "RW_DASHBOARD_UI_PATH")] dashboard_ui_path: Option, /// For dashboard service to fetch cluster info. - #[clap(long)] + #[clap(long, env = "RW_PROMETHEUS_ENDPOINT")] prometheus_endpoint: Option, + // TODO(zhidong): Make it required in v0.1.18 + /// State store url. + #[clap(long, env = "RW_STATE_STORE")] + state_store: Option, + /// Endpoint of the connector node, there will be a sidecar connector node /// colocated with Meta node in the cloud environment - #[clap(long, env = "META_CONNECTOR_RPC_ENDPOINT")] + #[clap(long, env = "RW_CONNECTOR_RPC_ENDPOINT")] pub connector_rpc_endpoint: Option, /// The path of `risingwave.toml` configuration file. /// /// If empty, default configuration values will be used. - /// - /// Note that internal system parameters should be defined in the configuration file at - /// [`risingwave_common::config`] instead of command line arguments. - #[clap(long, default_value = "")] + #[clap(long, env = "RW_CONFIG_PATH", default_value = "")] pub config_path: String, + + #[clap(flatten)] + pub override_opts: OverrideConfigOpts, +} + +/// Command-line arguments for compute-node that overrides the config file. +#[derive(Parser, Clone, Debug, OverrideConfig)] +pub struct OverrideConfigOpts { + #[clap(long, env = "RW_BACKEND", arg_enum)] + #[override_opts(path = meta.backend)] + backend: Option, + + /// Target size of the Sstable. + #[clap(long, env = "RW_SSTABLE_SIZE_MB")] + #[override_opts(path = storage.sstable_size_mb)] + sstable_size_mb: Option, + + /// Size of each block in bytes in SST. + #[clap(long, env = "RW_BLOCK_SIZE_KB")] + #[override_opts(path = storage.block_size_kb)] + block_size_kb: Option, + + /// False positive probability of bloom filter. + #[clap(long, env = "RW_BLOOM_FALSE_POSITIVE")] + #[override_opts(path = storage.bloom_false_positive)] + bloom_false_positive: Option, + + /// Remote directory for storing data and metadata objects. + #[clap(long, env = "RW_DATA_DIRECTORY")] + #[override_opts(path = storage.data_directory)] + data_directory: Option, + + /// Remote storage url for storing snapshots. + #[clap(long, env = "RW_BACKUP_STORAGE_URL")] + #[override_opts(path = backup.storage_url)] + backup_storage_url: Option, + + /// Remote directory for storing snapshots. + #[clap(long, env = "RW_STORAGE_DIRECTORY")] + #[override_opts(path = backup.storage_directory)] + backup_storage_directory: Option, } use std::future::Future; +use std::net::SocketAddr; use std::pin::Pin; -use risingwave_common::config::load_config; +use risingwave_common::config::{load_config, MetaBackend, RwConfig}; +use tracing::info; /// Start meta node pub fn start(opts: MetaNodeOpts) -> Pin + Send>> { // WARNING: don't change the function signature. Making it `async fn` will cause // slow compile in release mode. Box::pin(async move { - let config = load_config(&opts.config_path); - tracing::info!("Starting meta node with config {:?}", config); - tracing::info!("Starting meta node with options {:?}", opts); - let meta_addr = opts.host.unwrap_or_else(|| opts.listen_addr.clone()); - let endpoint = opts.endpoint.unwrap_or_else(|| opts.listen_addr.clone()); - let listen_addr = opts.listen_addr.parse().unwrap(); + info!("Starting meta node"); + info!("> options: {:?}", opts); + let config = load_config(&opts.config_path, Some(opts.override_opts)); + info!("> config: {:?}", config); + info!("> version: {} ({})", RW_VERSION, GIT_SHA); + let listen_addr: SocketAddr = opts.listen_addr.parse().unwrap(); + let meta_addr = opts.host.unwrap_or_else(|| listen_addr.ip().to_string()); let dashboard_addr = opts.dashboard_host.map(|x| x.parse().unwrap()); let prometheus_addr = opts.prometheus_host.map(|x| x.parse().unwrap()); - let backend = match opts.backend { - Backend::Etcd => MetaStoreBackend::Etcd { + let advertise_addr = opts + .advertise_addr + .unwrap_or_else(|| format!("{}:{}", meta_addr, listen_addr.port())); + let backend = match config.meta.backend { + MetaBackend::Etcd => MetaStoreBackend::Etcd { endpoints: opts .etcd_endpoints .split(',') @@ -150,9 +198,11 @@ pub fn start(opts: MetaNodeOpts) -> Pin + Send>> { false => None, }, }, - Backend::Mem => MetaStoreBackend::Mem, + MetaBackend::Mem => MetaStoreBackend::Mem, }; + validate_config(&config); + let max_heartbeat_interval = Duration::from_secs(config.meta.max_heartbeat_interval_secs as u64); let barrier_interval = Duration::from_millis(config.streaming.barrier_interval_ms as u64); @@ -160,16 +210,15 @@ pub fn start(opts: MetaNodeOpts) -> Pin + Send>> { let in_flight_barrier_nums = config.streaming.in_flight_barrier_nums; let checkpoint_frequency = config.streaming.checkpoint_frequency; - tracing::info!("Meta server listening at {}", listen_addr); + info!("Meta server listening at {}", listen_addr); let add_info = AddressInfo { - endpoint, - addr: meta_addr, + advertise_addr, listen_addr, prometheus_addr, dashboard_addr, ui_path: opts.dashboard_ui_path, }; - let (join_handle, leader_lost_handle, _shutdown_send) = rpc_serve( + let (mut join_handle, leader_lost_handle, shutdown_send) = rpc_serve( add_info, backend, max_heartbeat_interval, @@ -193,18 +242,38 @@ pub fn start(opts: MetaNodeOpts) -> Pin + Send>> { connector_rpc_endpoint: opts.connector_rpc_endpoint, backup_storage_url: config.backup.storage_url, backup_storage_directory: config.backup.storage_directory, + sstable_size_mb: config.storage.sstable_size_mb, + block_size_kb: config.storage.block_size_kb, + bloom_false_positive: config.storage.bloom_false_positive, + state_store: opts.state_store, + data_directory: config.storage.data_directory, + periodic_space_reclaim_compaction_interval_sec: config + .meta + .periodic_space_reclaim_compaction_interval_sec, }, ) .await .unwrap(); - if let Some(leader_lost_handle) = leader_lost_handle { - tokio::select! { - _ = join_handle => {}, - _ = leader_lost_handle => {}, + let res = tokio::select! { + _ = tokio::signal::ctrl_c() => { + tracing::info!("receive ctrl+c"); + shutdown_send.send(()).unwrap(); + join_handle.await } - } else { - join_handle.await.unwrap(); + res = &mut join_handle => res, + }; + res.unwrap(); + if let Some(leader_lost_handle) = leader_lost_handle { + leader_lost_handle.abort(); } }) } + +fn validate_config(config: &RwConfig) { + if config.meta.meta_leader_lease_secs <= 1 { + let error_msg = "meta leader lease secs should be larger than 1"; + tracing::error!(error_msg); + panic!("{}", error_msg); + } +} diff --git a/src/meta/src/manager/catalog/database.rs b/src/meta/src/manager/catalog/database.rs index fff22e10e5efa..014b8f641b58b 100644 --- a/src/meta/src/manager/catalog/database.rs +++ b/src/meta/src/manager/catalog/database.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ use std::collections::hash_map::Entry; use std::collections::{BTreeMap, HashMap, HashSet}; use itertools::Itertools; +use risingwave_common::catalog::TableOption; use risingwave_pb::catalog::{Database, Function, Index, Schema, Sink, Source, Table, View}; use super::{DatabaseId, FunctionId, RelationId, SchemaId, SinkId, SourceId, ViewId}; @@ -67,7 +68,7 @@ pub struct DatabaseManager { pub(super) in_progress_creation_tracker: HashSet, // In-progress creating streaming job tracker: this is a temporary workaround to avoid clean up // creating streaming jobs. - pub(super) in_progress_creation_streaming_job: HashSet, + pub(super) in_progress_creation_streaming_job: HashMap, // In-progress creating tables, including internal tables. pub(super) in_progress_creating_tables: HashMap, } @@ -92,7 +93,12 @@ impl DatabaseManager { ); let schemas = BTreeMap::from_iter(schemas.into_iter().map(|schema| (schema.id, schema))); let sources = BTreeMap::from_iter(sources.into_iter().map(|source| (source.id, source))); - let sinks = BTreeMap::from_iter(sinks.into_iter().map(|sink| (sink.id, sink))); + let sinks = BTreeMap::from_iter(sinks.into_iter().map(|sink| { + for depend_relation_id in &sink.dependent_relations { + *relation_ref_count.entry(*depend_relation_id).or_default() += 1; + } + (sink.id, sink) + })); let indexes = BTreeMap::from_iter(indexes.into_iter().map(|index| (index.id, index))); let tables = BTreeMap::from_iter(tables.into_iter().map(|table| { for depend_relation_id in &table.dependent_relations { @@ -119,7 +125,7 @@ impl DatabaseManager { functions, relation_ref_count, in_progress_creation_tracker: HashSet::default(), - in_progress_creation_streaming_job: HashSet::default(), + in_progress_creation_streaming_job: HashMap::default(), in_progress_creating_tables: HashMap::default(), }) } @@ -179,6 +185,10 @@ impl DatabaseManager { } } + pub fn list_databases(&self) -> Vec { + self.databases.values().cloned().collect_vec() + } + pub fn list_creating_tables(&self) -> Vec
{ self.in_progress_creating_tables .values() @@ -190,6 +200,18 @@ impl DatabaseManager { self.tables.values().cloned().collect_vec() } + pub fn get_table_options(&self, table_ids: &[TableId]) -> HashMap { + self.tables + .iter() + .filter_map(|(id, table)| { + if table_ids.contains(id) { + return Some((*id, TableOption::build_table_option(&table.properties))); + } + None + }) + .collect() + } + pub fn list_table_ids(&self, schema_id: SchemaId) -> Vec { self.tables .values() @@ -271,8 +293,9 @@ impl DatabaseManager { self.in_progress_creation_tracker.insert(relation.clone()); } - pub fn mark_creating_streaming_job(&mut self, table_id: TableId) { - self.in_progress_creation_streaming_job.insert(table_id); + pub fn mark_creating_streaming_job(&mut self, table_id: TableId, key: RelationKey) { + self.in_progress_creation_streaming_job + .insert(table_id, key); } pub fn unmark_creating(&mut self, relation: &RelationKey) { @@ -283,8 +306,15 @@ impl DatabaseManager { self.in_progress_creation_streaming_job.remove(&table_id); } + pub fn find_creating_streaming_job_id(&self, key: &RelationKey) -> Option { + self.in_progress_creation_streaming_job + .iter() + .find(|(_, v)| *v == key) + .map(|(k, _)| *k) + } + pub fn all_creating_streaming_jobs(&self) -> impl Iterator + '_ { - self.in_progress_creation_streaming_job.iter().cloned() + self.in_progress_creation_streaming_job.keys().cloned() } pub fn mark_creating_tables(&mut self, tables: &[Table]) { diff --git a/src/meta/src/manager/catalog/fragment.rs b/src/meta/src/manager/catalog/fragment.rs index 4f453e4124a66..e374fec0eed8a 100644 --- a/src/meta/src/manager/catalog/fragment.rs +++ b/src/meta/src/manager/catalog/fragment.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,10 +22,11 @@ use risingwave_common::catalog::TableId; use risingwave_common::hash::ParallelUnitId; use risingwave_common::{bail, try_match_expand}; use risingwave_connector::source::SplitImpl; -use risingwave_pb::common::{Buffer, ParallelUnit, ParallelUnitMapping, WorkerNode}; +use risingwave_pb::common::{ParallelUnit, WorkerNode}; use risingwave_pb::meta::subscribe_response::{Info, Operation}; use risingwave_pb::meta::table_fragments::actor_status::ActorState; -use risingwave_pb::meta::table_fragments::{ActorStatus, State}; +use risingwave_pb::meta::table_fragments::{ActorStatus, Fragment, State}; +use risingwave_pb::meta::FragmentParallelUnitMapping; use risingwave_pb::stream_plan::stream_node::NodeBody; use risingwave_pb::stream_plan::{ Dispatcher, DispatcherType, FragmentTypeFlag, StreamActor, StreamNode, @@ -39,7 +40,7 @@ use crate::model::{ ActorId, BTreeMapTransaction, FragmentId, MetadataModel, TableFragments, ValTransaction, }; use crate::storage::{MetaStore, Transaction}; -use crate::stream::{actor_mapping_to_parallel_unit_mapping, SplitAssignment}; +use crate::stream::SplitAssignment; use crate::MetaResult; pub struct FragmentManagerCore { @@ -48,20 +49,18 @@ pub struct FragmentManagerCore { impl FragmentManagerCore { /// List all fragment vnode mapping info that not in `State::Initial`. - pub fn all_running_fragment_mappings(&self) -> impl Iterator + '_ { + pub fn all_running_fragment_mappings( + &self, + ) -> impl Iterator + '_ { self.table_fragments .values() .filter(|tf| tf.state() != State::Initial) .flat_map(|table_fragments| { table_fragments.fragments.values().map(|fragment| { - let parallel_unit_mapping = fragment - .vnode_mapping - .as_ref() - .expect("no data distribution found"); - ParallelUnitMapping { + let parallel_unit_mapping = fragment.vnode_mapping.clone().unwrap(); + FragmentParallelUnitMapping { fragment_id: fragment.fragment_id, - original_indices: parallel_unit_mapping.original_indices.clone(), - data: parallel_unit_mapping.data.clone(), + mapping: Some(parallel_unit_mapping), } }) }) @@ -83,19 +82,6 @@ pub struct ActorInfos { pub barrier_inject_actor_maps: HashMap>, } -pub struct FragmentVNodeInfo { - /// actor id => parallel unit - pub actor_parallel_unit_maps: BTreeMap, - - /// fragment vnode mapping info - pub vnode_mapping: Option, -} - -#[derive(Default)] -pub struct BuildGraphInfo { - pub table_mview_actor_ids: HashMap>, -} - pub type FragmentManagerRef = Arc>; impl FragmentManager @@ -167,9 +153,14 @@ where .vnode_mapping .clone() .expect("no data distribution found"); + let fragment_mapping = FragmentParallelUnitMapping { + fragment_id: fragment.fragment_id, + mapping: Some(mapping), + }; + self.env .notification_manager() - .notify_frontend(operation, Info::ParallelUnitMapping(mapping)) + .notify_frontend(operation, Info::ParallelUnitMapping(fragment_mapping)) .await; } } @@ -183,7 +174,7 @@ where Ok(map .get(table_id) .cloned() - .context(format!("table_fragment not exist: id={}", table_id))?) + .with_context(|| format!("table_fragment not exist: id={}", table_id))?) } pub async fn select_table_fragments_by_ids( @@ -196,7 +187,7 @@ where table_fragments.push( map.get(table_id) .cloned() - .context(format!("table_fragment not exist: id={}", table_id))?, + .with_context(|| format!("table_fragment not exist: id={}", table_id))?, ); } Ok(table_fragments) @@ -237,7 +228,7 @@ where let mut table_fragments = BTreeMapTransaction::new(map); let mut table_fragment = table_fragments .get_mut(*table_id) - .context(format!("table_fragment not exist: id={}", table_id))?; + .with_context(|| format!("table_fragment not exist: id={}", table_id))?; assert_eq!(table_fragment.state(), State::Initial); table_fragment.set_state(State::Creating); @@ -249,10 +240,12 @@ where let mut dependent_table = table_fragments .get_mut(dependent_table_id) - .context(format!( - "dependent table_fragment not exist: id={}", - dependent_table_id - ))?; + .with_context(|| { + format!( + "dependent table_fragment not exist: id={}", + dependent_table_id + ) + })?; for fragment in dependent_table.fragments.values_mut() { for actor in &mut fragment.actors { // Extend new dispatchers to table fragments. @@ -277,7 +270,7 @@ where let mut table_fragments = BTreeMapTransaction::new(map); let mut table_fragment = table_fragments .get_mut(table_id) - .context(format!("table_fragment not exist: id={}", table_id))?; + .with_context(|| format!("table_fragment not exist: id={}", table_id))?; assert_eq!(table_fragment.state(), State::Creating); table_fragment.set_state(State::Created); @@ -302,13 +295,14 @@ where if table_ids.contains(&dependent_table_id) { continue; } - let mut dependent_table = - table_fragments - .get_mut(dependent_table_id) - .context(format!( + let mut dependent_table = table_fragments + .get_mut(dependent_table_id) + .with_context(|| { + format!( "dependent table_fragment not exist: id={}", dependent_table_id - ))?; + ) + })?; dependent_table .fragments @@ -448,14 +442,6 @@ where actor_maps } - pub async fn all_chain_actor_ids(&self) -> HashSet { - let map = &self.core.read().await.table_fragments; - - map.values() - .flat_map(|table_fragment| table_fragment.chain_actor_ids()) - .collect::>() - } - pub async fn update_actor_splits_by_split_assignment( &self, split_assignment: &SplitAssignment, @@ -694,17 +680,17 @@ where } if let Some(actor_mapping) = upstream_dispatcher_mapping.as_ref() { - *vnode_mapping = actor_mapping_to_parallel_unit_mapping( - fragment_id, - &actor_to_parallel_unit, - actor_mapping, - ) + *vnode_mapping = actor_mapping + .to_parallel_unit(&actor_to_parallel_unit) + .to_protobuf(); } if !fragment.state_table_ids.is_empty() { - let mut mapping = vnode_mapping.clone(); - mapping.fragment_id = fragment.fragment_id; - fragment_mapping_to_notify.push(mapping); + let fragment_mapping = FragmentParallelUnitMapping { + fragment_id: fragment_id as FragmentId, + mapping: Some(vnode_mapping.clone()), + }; + fragment_mapping_to_notify.push(fragment_mapping); } } @@ -728,7 +714,9 @@ where for dispatcher in &mut upstream_actor.dispatcher { if dispatcher.dispatcher_id == dispatcher_id { if let DispatcherType::Hash = dispatcher.r#type() { - dispatcher.hash_mapping = upstream_dispatcher_mapping.clone(); + dispatcher.hash_mapping = upstream_dispatcher_mapping + .as_ref() + .map(|m| m.to_protobuf()); } update_actors( @@ -824,91 +812,70 @@ where .collect::>>() } + #[cfg(test)] pub async fn get_table_mview_actor_ids(&self, table_id: &TableId) -> MetaResult> { let map = &self.core.read().await.table_fragments; Ok(map .get(table_id) - .context(format!("table_fragment not exist: id={}", table_id))? + .with_context(|| format!("table_fragment not exist: id={}", table_id))? .mview_actor_ids()) } - // we will read three things at once, avoiding locking too much. - pub async fn get_build_graph_info( + /// Get the upstream `Materialize` fragments of the specified tables. + pub async fn get_upstream_mview_fragments( &self, - table_ids: &HashSet, - ) -> MetaResult { + upstream_table_ids: &HashSet, + ) -> MetaResult> { let map = &self.core.read().await.table_fragments; - let mut info: BuildGraphInfo = Default::default(); - - for table_id in table_ids { - info.table_mview_actor_ids.insert( - *table_id, - map.get(table_id) - .context(format!("table_fragment not exist: id={}", table_id))? - .mview_actor_ids(), - ); - } - Ok(info) - } - - pub async fn get_mview_vnode_bitmap_info( - &self, - table_ids: &HashSet, - ) -> MetaResult)>>> { - let map = &self.core.read().await.table_fragments; - let mut info: HashMap)>> = HashMap::new(); - - for table_id in table_ids { - info.insert( - *table_id, - map.get(table_id) - .context(format!("table_fragment not exist: id={}", table_id))? - .mview_vnode_bitmap_info(), - ); + let mut fragments = HashMap::new(); + + for &table_id in upstream_table_ids { + let table_fragments = map + .get(&table_id) + .with_context(|| format!("table_fragment not exist: id={}", table_id))?; + if let Some(fragment) = table_fragments.mview_fragment() { + fragments.insert(table_id, fragment); + } } - Ok(info) + Ok(fragments) } - pub async fn get_mview_fragment_vnode_info( + /// Get the downstream `Chain` fragments of the specified table. + pub async fn get_downstream_chain_fragments( &self, - table_ids: &HashSet, - ) -> MetaResult> { + table_id: TableId, + ) -> MetaResult> { let map = &self.core.read().await.table_fragments; - let mut info: HashMap = HashMap::new(); - for table_id in table_ids { - let table_fragment = map - .get(table_id) - .context(format!("table_fragment not exist: id={}", table_id))?; - info.insert( - *table_id, - FragmentVNodeInfo { - actor_parallel_unit_maps: table_fragment.mview_actor_parallel_units(), - vnode_mapping: table_fragment.mview_vnode_mapping(), - }, - ); - } + let table_fragments = map + .get(&table_id) + .with_context(|| format!("table_fragment not exist: id={}", table_id))?; - Ok(info) - } + let mview_fragment = table_fragments.mview_fragment().unwrap(); + let downstream_fragment_ids: HashSet<_> = mview_fragment.actors[0] + .dispatcher + .iter() + .map(|d| d.dispatcher_id as FragmentId) + .collect(); - pub async fn get_tables_worker_actors( - &self, - table_ids: &HashSet, - ) -> MetaResult>>> { - let map = &self.core.read().await.table_fragments; - let mut info: HashMap>> = HashMap::new(); + // Find the fragments based on the fragment ids. + let fragments = map + .values() + .flat_map(|table_fragments| { + table_fragments + .fragments + .values() + .filter(|fragment| downstream_fragment_ids.contains(&fragment.fragment_id)) + .inspect(|f| { + assert!((f.fragment_type_mask & FragmentTypeFlag::ChainNode as u32) != 0) + }) + }) + .cloned() + .collect_vec(); - for table_id in table_ids { - info.insert( - *table_id, - map.get(table_id) - .context(format!("table_fragment not exist: id={}", table_id))? - .worker_actor_ids(), - ); - } + assert_eq!(downstream_fragment_ids.len(), fragments.len()); - Ok(info) + Ok(fragments) } } diff --git a/src/meta/src/manager/catalog/mod.rs b/src/meta/src/manager/catalog/mod.rs index f63eb806f89e6..e9faf9c8441c8 100644 --- a/src/meta/src/manager/catalog/mod.rs +++ b/src/meta/src/manager/catalog/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,9 +26,9 @@ pub use database::*; pub use fragment::*; use itertools::Itertools; use risingwave_common::catalog::{ - valid_table_name, TableId as StreamingJobId, DEFAULT_DATABASE_NAME, DEFAULT_SCHEMA_NAME, - DEFAULT_SUPER_USER, DEFAULT_SUPER_USER_FOR_PG, DEFAULT_SUPER_USER_FOR_PG_ID, - DEFAULT_SUPER_USER_ID, SYSTEM_SCHEMAS, + valid_table_name, TableId as StreamingJobId, TableOption, DEFAULT_DATABASE_NAME, + DEFAULT_SCHEMA_NAME, DEFAULT_SUPER_USER, DEFAULT_SUPER_USER_FOR_PG, + DEFAULT_SUPER_USER_FOR_PG_ID, DEFAULT_SUPER_USER_ID, SYSTEM_SCHEMAS, }; use risingwave_common::{bail, ensure}; use risingwave_pb::catalog::table::OptionalAssociatedSourceId; @@ -82,6 +82,7 @@ macro_rules! commit_meta { }; } pub(crate) use commit_meta; +use risingwave_pb::meta::CreatingJobInfo; pub type CatalogManagerRef = Arc>; @@ -582,7 +583,7 @@ where bail!("table is in creating procedure"); } else { database_core.mark_creating(&key); - database_core.mark_creating_streaming_job(table.id); + database_core.mark_creating_streaming_job(table.id, key); for &dependent_relation_id in &table.dependent_relations { database_core.increase_ref_count(dependent_relation_id); } @@ -973,7 +974,7 @@ where } else { database_core.mark_creating(&source_key); database_core.mark_creating(&mview_key); - database_core.mark_creating_streaming_job(table.id); + database_core.mark_creating_streaming_job(table.id, mview_key); ensure!(table.dependent_relations.is_empty()); // source and table user_core.increase_ref_count(source.owner, 2); @@ -1221,7 +1222,7 @@ where bail!("index already in creating procedure"); } else { database_core.mark_creating(&key); - database_core.mark_creating_streaming_job(index_table.id); + database_core.mark_creating_streaming_job(index_table.id, key); for &dependent_relation_id in &index_table.dependent_relations { database_core.increase_ref_count(dependent_relation_id); } @@ -1312,7 +1313,7 @@ where bail!("sink already in creating procedure"); } else { database_core.mark_creating(&key); - database_core.mark_creating_streaming_job(sink.id); + database_core.mark_creating_streaming_job(sink.id, key); for &dependent_relation_id in &sink.dependent_relations { database_core.increase_ref_count(dependent_relation_id); } @@ -1323,12 +1324,13 @@ where pub async fn finish_create_sink_procedure( &self, + internal_tables: Vec
, sink: &Sink, ) -> MetaResult { let core = &mut *self.core.lock().await; let database_core = &mut core.database; let key = (sink.database_id, sink.schema_id, sink.name.clone()); - + let mut tables = BTreeMapTransaction::new(&mut database_core.tables); let mut sinks = BTreeMapTransaction::new(&mut database_core.sinks); if !sinks.contains_key(&sink.id) && database_core.in_progress_creation_tracker.contains(&key) @@ -1339,8 +1341,15 @@ where .remove(&sink.id); sinks.insert(sink.id, sink.clone()); + for table in &internal_tables { + tables.insert(table.id, table.clone()); + } + commit_meta!(self, sinks, tables)?; - commit_meta!(self, sinks)?; + for internal_table in internal_tables { + self.notify_frontend(Operation::Add, Info::Table(internal_table)) + .await; + } let version = self .notify_frontend(Operation::Add, Info::Sink(sink.to_owned())) @@ -1372,11 +1381,16 @@ where } } - pub async fn drop_sink(&self, sink_id: SinkId) -> MetaResult { + pub async fn drop_sink( + &self, + sink_id: SinkId, + internal_table_ids: Vec, + ) -> MetaResult { let core = &mut *self.core.lock().await; let database_core = &mut core.database; let user_core = &mut core.user; let mut sinks = BTreeMapTransaction::new(&mut database_core.sinks); + let mut tables = BTreeMapTransaction::new(&mut database_core.tables); let mut users = BTreeMapTransaction::new(&mut user_core.user_info); let sink = sinks.remove(sink_id); @@ -1386,11 +1400,28 @@ where None => { let dependent_relations = sink.dependent_relations.clone(); - let objects = &[Object::SinkId(sink.id)]; + let objects = &[Object::SinkId(sink.id)] + .into_iter() + .chain( + internal_table_ids + .iter() + .map(|table_id| Object::TableId(*table_id)) + .collect_vec(), + ) + .collect_vec(); + + let internal_tables = internal_table_ids + .iter() + .map(|internal_table_id| { + tables + .remove(*internal_table_id) + .expect("internal table should exist") + }) + .collect_vec(); let users_need_update = Self::update_user_privileges(&mut users, objects); - commit_meta!(self, sinks, users)?; + commit_meta!(self, sinks, tables, users)?; user_core.decrease_ref(sink.owner); @@ -1403,6 +1434,11 @@ where database_core.decrease_ref_count(dependent_relation_id); } + for internal_table in internal_tables { + self.notify_frontend(Operation::Delete, Info::Table(internal_table)) + .await; + } + let version = self .notify_frontend(Operation::Delete, Info::Sink(sink)) .await; @@ -1415,10 +1451,18 @@ where } } + pub async fn list_databases(&self) -> Vec { + self.core.lock().await.database.list_databases() + } + pub async fn list_tables(&self) -> Vec
{ self.core.lock().await.database.list_tables() } + pub async fn get_table_options(&self, table_ids: &[TableId]) -> HashMap { + self.core.lock().await.database.get_table_options(table_ids) + } + pub async fn list_table_ids(&self, schema_id: SchemaId) -> Vec { self.core.lock().await.database.list_table_ids(schema_id) } @@ -1442,6 +1486,23 @@ where Ok(all_streaming_jobs) } + pub async fn find_creating_streaming_job_ids( + &self, + infos: Vec, + ) -> Vec { + let guard = self.core.lock().await; + infos + .into_iter() + .flat_map(|info| { + guard.database.find_creating_streaming_job_id(&( + info.database_id, + info.schema_id, + info.name, + )) + }) + .collect_vec() + } + async fn notify_frontend(&self, operation: Operation, info: Info) -> NotificationVersion { self.env .notification_manager() @@ -1575,12 +1636,6 @@ where user.name ))); } - if !user.grant_privileges.is_empty() { - return Err(MetaError::permission_denied(format!( - "Cannot drop user {} with privileges", - id - ))); - } if user_core .user_grant_relation .get(&id) diff --git a/src/meta/src/manager/catalog/user.rs b/src/meta/src/manager/catalog/user.rs index 35ea1c9b3407e..3a02a15ab8ce4 100644 --- a/src/meta/src/manager/catalog/user.rs +++ b/src/meta/src/manager/catalog/user.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -71,6 +71,7 @@ impl UserManager { self.user_info.values().any(|x| x.name.eq(user)) } + #[allow(dead_code)] pub fn ensure_user_id(&self, user_id: UserId) -> MetaResult<()> { if self.user_info.contains_key(&user_id) { Ok(()) diff --git a/src/meta/src/manager/cluster.rs b/src/meta/src/manager/cluster.rs index b5d8a9b16bc47..6557d5d4b6450 100644 --- a/src/meta/src/manager/cluster.rs +++ b/src/meta/src/manager/cluster.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -102,6 +102,7 @@ where _ = monitor_interval.tick() => {}, // Shutdown monitor _ = &mut shutdown_rx => { + tracing::info!("Worker number monitor is stopped"); return; } } @@ -339,9 +340,10 @@ where core.list_active_parallel_units() } - pub async fn get_active_parallel_unit_count(&self) -> usize { + /// Get the cluster info used for scheduling a streaming job. + pub async fn get_streaming_cluster_info(&self) -> StreamingClusterInfo { let core = self.core.read().await; - core.get_active_parallel_unit_count() + core.get_streaming_cluster_info() } /// Generate `parallel_degree` parallel units. @@ -369,6 +371,16 @@ where } } +/// The cluster info used for scheduling a streaming job. +#[derive(Debug, Clone)] +pub struct StreamingClusterInfo { + /// All **active** compute nodes in the cluster. + pub worker_nodes: HashMap, + + /// All parallel units of the **active** compute nodes in the cluster. + pub parallel_units: HashMap, +} + pub struct ClusterManagerCore { /// Record for workers in the cluster. workers: HashMap, @@ -467,6 +479,26 @@ impl ClusterManagerCore { .collect() } + fn get_streaming_cluster_info(&self) -> StreamingClusterInfo { + let active_workers: HashMap<_, _> = self + .list_worker_node(WorkerType::ComputeNode, Some(State::Running)) + .into_iter() + .map(|w| (w.id, w)) + .collect(); + + let active_parallel_units = self + .parallel_units + .iter() + .filter(|p| active_workers.contains_key(&p.worker_node_id)) + .map(|p| (p.id, p.clone())) + .collect(); + + StreamingClusterInfo { + worker_nodes: active_workers, + parallel_units: active_parallel_units, + } + } + fn count_worker_node(&self) -> HashMap { const MONITORED_WORKER_TYPES: [WorkerType; 3] = [ WorkerType::Compactor, @@ -489,10 +521,6 @@ impl ClusterManagerCore { } ret } - - fn get_active_parallel_unit_count(&self) -> usize { - self.list_active_parallel_units().len() - } } #[cfg(test)] diff --git a/src/meta/src/manager/env.rs b/src/meta/src/manager/env.rs index 5cd96d6e7a441..631581b962663 100644 --- a/src/meta/src/manager/env.rs +++ b/src/meta/src/manager/env.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ use std::ops::Deref; use std::sync::Arc; use std::time::Duration; -use risingwave_pb::meta::MetaLeaderInfo; +use risingwave_pb::meta::SystemParams; use risingwave_rpc_client::{StreamClientPool, StreamClientPoolRef}; use crate::manager::{ @@ -49,8 +49,6 @@ where /// idle status manager. idle_manager: IdleManagerRef, - info: MetaLeaderInfo, - /// options read by all services pub opts: Arc, } @@ -97,6 +95,24 @@ pub struct MetaOpts { pub backup_storage_url: String, /// The storage directory for storing backups. pub backup_storage_directory: String, + + /// Target size of the Sstable. + pub sstable_size_mb: u32, + + /// Size of each block in bytes in SST. + pub block_size_kb: u32, + + /// False positive probability of bloom filter. + pub bloom_false_positive: f64, + + /// State store url. + pub state_store: Option, + + /// Remote directory for storing data and metadata objects. + pub data_directory: String, + + /// Schedule space_reclaim_compaction for all compaction groups with this interval. + pub periodic_space_reclaim_compaction_interval_sec: u64, } impl MetaOpts { @@ -119,6 +135,28 @@ impl MetaOpts { connector_rpc_endpoint: None, backup_storage_url: "memory".to_string(), backup_storage_directory: "backup".to_string(), + sstable_size_mb: 256, + block_size_kb: 64, + bloom_false_positive: 0.001, + state_store: None, + data_directory: "hummock_001".to_string(), + periodic_space_reclaim_compaction_interval_sec: 60, + } + } + + pub fn init_system_params(&self) -> SystemParams { + // For fields not provided from CLI, use default values. + // For deprecated fields, use `None`. + SystemParams { + barrier_interval_ms: Some(self.barrier_interval.as_millis() as u32), + checkpoint_frequency: Some(self.checkpoint_frequency as u64), + sstable_size_mb: Some(self.sstable_size_mb), + bloom_false_positive: Some(self.bloom_false_positive), + block_size_kb: Some(self.block_size_kb), + state_store: Some(self.state_store.clone().unwrap_or_default()), + data_directory: Some(self.data_directory.clone()), + backup_storage_url: Some(self.backup_storage_url.clone()), + backup_storage_directory: Some(self.backup_storage_directory.clone()), } } } @@ -127,7 +165,7 @@ impl MetaSrvEnv where S: MetaStore, { - pub async fn new(opts: MetaOpts, meta_store: Arc, info: MetaLeaderInfo) -> Self { + pub async fn new(opts: MetaOpts, meta_store: Arc) -> Self { // change to sync after refactor `IdGeneratorManager::new` sync. let id_gen_manager = Arc::new(IdGeneratorManager::new(meta_store.clone()).await); let stream_client_pool = Arc::new(StreamClientPool::default()); @@ -140,7 +178,6 @@ where notification_manager, stream_client_pool, idle_manager, - info, opts: opts.into(), } } @@ -184,10 +221,6 @@ where pub fn stream_client_pool(&self) -> &StreamClientPool { self.stream_client_pool.deref() } - - pub fn get_leader_info(&self) -> MetaLeaderInfo { - self.info.clone() - } } #[cfg(any(test, feature = "test"))] @@ -199,7 +232,6 @@ impl MetaSrvEnv { pub async fn for_test_opts(opts: Arc) -> Self { // change to sync after refactor `IdGeneratorManager::new` sync. - let leader_info = MetaLeaderInfo::default(); let meta_store = Arc::new(MemStore::default()); let id_gen_manager = Arc::new(IdGeneratorManager::new(meta_store.clone()).await); let notification_manager = Arc::new(NotificationManager::new(meta_store.clone()).await); @@ -212,7 +244,6 @@ impl MetaSrvEnv { notification_manager, stream_client_pool, idle_manager, - info: leader_info, opts, } } diff --git a/src/meta/src/manager/id.rs b/src/meta/src/manager/id.rs index f42dffd22faf3..ddbe8f4578f53 100644 --- a/src/meta/src/manager/id.rs +++ b/src/meta/src/manager/id.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -134,11 +134,11 @@ pub mod IdCategory { pub const Backup: IdCategoryType = 7; pub const HummockSstableId: IdCategoryType = 8; pub const ParallelUnit: IdCategoryType = 9; - pub const Source: IdCategoryType = 10; + pub const _Source: IdCategoryType = 10; pub const HummockCompactionTask: IdCategoryType = 11; pub const User: IdCategoryType = 12; - pub const Sink: IdCategoryType = 13; - pub const Index: IdCategoryType = 14; + pub const _Sink: IdCategoryType = 13; + pub const _Index: IdCategoryType = 14; pub const CompactionGroup: IdCategoryType = 15; pub const Function: IdCategoryType = 16; } diff --git a/src/meta/src/manager/idle.rs b/src/meta/src/manager/idle.rs index 9746675236a8f..4659d52c3fd95 100644 --- a/src/meta/src/manager/idle.rs +++ b/src/meta/src/manager/idle.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/manager/mod.rs b/src/meta/src/manager/mod.rs index 9b85f9c66ec2f..5e9cd46b5b49f 100644 --- a/src/meta/src/manager/mod.rs +++ b/src/meta/src/manager/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,10 +19,16 @@ mod id; mod idle; mod notification; mod streaming_job; -pub use catalog::*; -pub use cluster::*; -pub use env::*; -pub use id::*; -pub use idle::*; -pub use notification::*; -pub use streaming_job::*; +mod system_param; + +pub(crate) use catalog::*; +pub use cluster::WorkerKey; +pub(crate) use cluster::*; +pub use env::MetaSrvEnv; +pub(crate) use env::*; +pub(crate) use id::*; +pub(crate) use idle::*; +pub(crate) use notification::*; +pub use notification::{LocalNotification, MessageStatus, NotificationManagerRef}; +pub(crate) use streaming_job::*; +pub(crate) use system_param::*; diff --git a/src/meta/src/manager/notification.rs b/src/meta/src/manager/notification.rs index 0ebbbbf7fd549..f3d9436b7f86e 100644 --- a/src/meta/src/manager/notification.rs +++ b/src/meta/src/manager/notification.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -108,6 +108,12 @@ where } } + pub async fn abort_all(&self) { + let mut guard = self.core.lock().await; + *guard = NotificationManagerCore::new(); + guard.exiting = true; + } + #[inline(always)] fn notify( &self, @@ -210,6 +216,10 @@ where sender: UnboundedSender, ) { let mut core_guard = self.core.lock().await; + if core_guard.exiting { + tracing::warn!("notification manager exiting."); + return; + } let senders = match subscribe_type { SubscribeType::Frontend => &mut core_guard.frontend_senders, SubscribeType::Hummock => &mut core_guard.hummock_senders, @@ -222,6 +232,10 @@ where pub async fn insert_local_sender(&self, sender: UnboundedSender) { let mut core_guard = self.core.lock().await; + if core_guard.exiting { + tracing::warn!("notification manager exiting."); + return; + } core_guard.local_senders.push(sender); } @@ -240,6 +254,7 @@ struct NotificationManagerCore { compactor_senders: HashMap>, /// The notification sender to local subscribers. local_senders: Vec>, + exiting: bool, } impl NotificationManagerCore { @@ -249,6 +264,7 @@ impl NotificationManagerCore { hummock_senders: HashMap::new(), compactor_senders: HashMap::new(), local_senders: vec![], + exiting: false, } } diff --git a/src/meta/src/manager/streaming_job.rs b/src/meta/src/manager/streaming_job.rs index 7ca560c613002..7fe1b84ac56bb 100644 --- a/src/meta/src/manager/streaming_job.rs +++ b/src/meta/src/manager/streaming_job.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ use crate::model::FragmentId; // This enum is used in order to re-use code in `DdlServiceImpl` for creating MaterializedView and // Sink. -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum StreamingJob { MaterializedView(Table), Sink(Sink), @@ -124,7 +124,8 @@ impl StreamingJob { match self { Self::MaterializedView(table) => table.definition.clone(), Self::Table(_, table) => table.definition.clone(), - _ => "".to_owned(), + Self::Index(_, table) => table.definition.clone(), + Self::Sink(sink) => sink.definition.clone(), } } diff --git a/src/meta/src/manager/system_param/mod.rs b/src/meta/src/manager/system_param/mod.rs new file mode 100644 index 0000000000000..119c25d8079e6 --- /dev/null +++ b/src/meta/src/manager/system_param/mod.rs @@ -0,0 +1,84 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod model; +use std::ops::DerefMut; +use std::sync::Arc; + +use risingwave_common::system_param::set_system_param; +use risingwave_pb::meta::SystemParams; +use tokio::sync::RwLock; + +use self::model::SystemParamsModel; +use super::MetaSrvEnv; +use crate::model::{ValTransaction, VarTransaction}; +use crate::storage::{MetaStore, Transaction}; +use crate::{MetaError, MetaResult}; + +pub type SystemParamManagerRef = Arc>; + +pub struct SystemParamManager { + env: MetaSrvEnv, + params: RwLock, +} + +impl SystemParamManager { + /// Return error if `init_params` conflict with persisted system params. + pub async fn new(env: MetaSrvEnv, init_params: SystemParams) -> MetaResult { + let meta_store = env.meta_store_ref(); + let persisted = SystemParams::get(meta_store.as_ref()).await?; + + let params = if let Some(persisted) = persisted { + Self::validate_init_params(&persisted, &init_params); + persisted + } else { + SystemParams::insert(&init_params, meta_store.as_ref()).await?; + init_params + }; + + Ok(Self { + env, + params: RwLock::new(params), + }) + } + + pub async fn get_params(&self) -> SystemParams { + self.params.read().await.clone() + } + + pub async fn set_param(&self, name: &str, value: Option) -> MetaResult<()> { + let mut params_guard = self.params.write().await; + let params = params_guard.deref_mut(); + let mut mem_txn = VarTransaction::new(params); + + set_system_param(mem_txn.deref_mut(), name, value).map_err(MetaError::system_param)?; + + let mut store_txn = Transaction::default(); + mem_txn.apply_to_txn(&mut store_txn)?; + self.env.meta_store().txn(store_txn).await?; + + mem_txn.commit(); + + Ok(()) + } + + fn validate_init_params(persisted: &SystemParams, init: &SystemParams) { + // Only compare params from CLI and config file. + // TODO: Currently all fields are from CLI/config, but after CLI becomes the only source of + // `init`, should only compare them + if persisted != init { + tracing::warn!("System parameters from CLI and config file differ from the persisted") + } + } +} diff --git a/src/meta/src/manager/system_param/model.rs b/src/meta/src/manager/system_param/model.rs new file mode 100644 index 0000000000000..d53eb0f88a011 --- /dev/null +++ b/src/meta/src/manager/system_param/model.rs @@ -0,0 +1,93 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use async_trait::async_trait; +use risingwave_common::system_param::{system_params_from_kv, system_params_to_kv}; +use risingwave_pb::meta::SystemParams; + +use crate::model::{MetadataModelError, MetadataModelResult, Transactional}; +use crate::storage::{MetaStore, Snapshot, Transaction}; + +const SYSTEM_PARAMS_CF_NAME: &str = "cf/system_params"; + +// A dummy trait to implement custom methods on `SystemParams`. +#[async_trait] +pub trait SystemParamsModel: Sized { + fn cf_name() -> String; + async fn get(store: &S) -> MetadataModelResult>; + async fn insert(&self, store: &S) -> MetadataModelResult<()>; +} + +#[async_trait] +impl SystemParamsModel for SystemParams { + fn cf_name() -> String { + SYSTEM_PARAMS_CF_NAME.to_string() + } + + /// All undeprecated fields are guaranteed to be `Some`. + /// Return error if there are missing or unrecognized fields. + async fn get(store: &S) -> MetadataModelResult> + where + S: MetaStore, + { + let kvs = store.list_cf(&Self::cf_name()).await?; + if kvs.is_empty() { + Ok(None) + } else { + Ok(Some( + system_params_from_kv(kvs).map_err(MetadataModelError::internal)?, + )) + } + } + + /// All undeprecated fields must be `Some`. + /// Return error if there are missing fields. + async fn insert(&self, store: &S) -> MetadataModelResult<()> + where + S: MetaStore, + { + let mut txn = Transaction::default(); + self.upsert_in_transaction(&mut txn)?; + Ok(store.txn(txn).await?) + } +} + +impl Transactional for SystemParams { + fn upsert_in_transaction(&self, trx: &mut Transaction) -> MetadataModelResult<()> { + for (k, v) in system_params_to_kv(self).map_err(MetadataModelError::internal)? { + trx.put(Self::cf_name(), k.into_bytes(), v.into_bytes()); + } + Ok(()) + } + + fn delete_in_transaction(&self, _trx: &mut Transaction) -> MetadataModelResult<()> { + unreachable!() + } +} + +pub async fn get_system_params_at_snapshot( + snapshot: &S::Snapshot, +) -> MetadataModelResult> +where + S: MetaStore, +{ + let kvs = snapshot.list_cf(&SystemParams::cf_name()).await?; + if kvs.is_empty() { + Ok(None) + } else { + Ok(Some( + system_params_from_kv(kvs).map_err(MetadataModelError::internal)?, + )) + } +} diff --git a/src/meta/src/model/barrier.rs b/src/meta/src/model/barrier.rs index 94c6d9f3037d0..6e6d4020903cf 100644 --- a/src/meta/src/model/barrier.rs +++ b/src/meta/src/model/barrier.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/model/catalog.rs b/src/meta/src/model/catalog.rs index f70b25840268c..b96e3d5704fa1 100644 --- a/src/meta/src/model/catalog.rs +++ b/src/meta/src/model/catalog.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/model/cluster.rs b/src/meta/src/model/cluster.rs index 8ae64515db366..cabbe5b482ea0 100644 --- a/src/meta/src/model/cluster.rs +++ b/src/meta/src/model/cluster.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/model/error.rs b/src/meta/src/model/error.rs index f15b2b12d88f7..083efbe8f1c19 100644 --- a/src/meta/src/model/error.rs +++ b/src/meta/src/model/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use anyhow::anyhow; use risingwave_pb::ProstFieldNotFound; use thiserror::Error; @@ -45,3 +46,9 @@ impl From for tonic::Status { tonic::Status::new(tonic::Code::Internal, format!("{}", e)) } } + +impl MetadataModelError { + pub fn internal(msg: impl ToString) -> Self { + MetadataModelError::InternalError(anyhow!(msg.to_string())) + } +} diff --git a/src/meta/src/model/mod.rs b/src/meta/src/model/mod.rs index b638e92a4926b..cbd79b4f4cacf 100644 --- a/src/meta/src/model/mod.rs +++ b/src/meta/src/model/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -84,8 +84,8 @@ pub trait MetadataModel: std::fmt::Debug + Sized { let bytes_vec = store.list_cf(&Self::cf_name()).await?; bytes_vec .iter() - .map(|bytes| { - Self::ProstType::decode(bytes.as_slice()) + .map(|(_k, v)| { + Self::ProstType::decode(v.as_slice()) .map(Self::from_protobuf) .map_err(Into::into) }) diff --git a/src/meta/src/model/notification.rs b/src/meta/src/model/notification.rs index 767ca4ecf7a53..8b0d6df4009a8 100644 --- a/src/meta/src/model/notification.rs +++ b/src/meta/src/model/notification.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/model/stream.rs b/src/meta/src/model/stream.rs index ea55b152d8840..ce385b207b81f 100644 --- a/src/meta/src/model/stream.rs +++ b/src/meta/src/model/stream.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque}; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use itertools::Itertools; use risingwave_common::catalog::TableId; use risingwave_common::hash::ParallelUnitId; use risingwave_connector::source::SplitImpl; -use risingwave_pb::common::{Buffer, ParallelUnit, ParallelUnitMapping}; +use risingwave_pb::common::{ParallelUnit, ParallelUnitMapping}; use risingwave_pb::meta::table_fragments::actor_status::ActorState; use risingwave_pb::meta::table_fragments::{ActorStatus, Fragment, State}; use risingwave_pb::meta::TableFragments as ProstTableFragments; @@ -61,7 +61,7 @@ pub struct TableFragments { pub(crate) env: StreamEnvironment, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct StreamEnvironment { /// The timezone used to interpret timestamps and dates for conversion pub(crate) timezone: Option, @@ -74,7 +74,7 @@ impl StreamEnvironment { } } - fn from_protobuf(prost: ProstStreamEnvironment) -> Self { + pub fn from_protobuf(prost: &ProstStreamEnvironment) -> Self { Self { timezone: if prost.get_timezone().is_empty() { None @@ -105,7 +105,7 @@ impl MetadataModel for TableFragments { } fn from_protobuf(prost: Self::ProstType) -> Self { - let env = StreamEnvironment::from_protobuf(prost.get_env().unwrap().clone()); + let env = StreamEnvironment::from_protobuf(prost.get_env().unwrap()); Self { table_id: TableId::new(prost.table_id), state: prost.state(), @@ -122,19 +122,44 @@ impl MetadataModel for TableFragments { } impl TableFragments { - /// Create a new `TableFragments` with state of `Initialized` with env. + /// Create a new `TableFragments` with state of `Initial`, with other fields empty. + pub fn for_test(table_id: TableId, fragments: BTreeMap) -> Self { + Self::new( + table_id, + fragments, + &BTreeMap::new(), + StreamEnvironment::default(), + ) + } + + /// Create a new `TableFragments` with state of `Initial`, with the status of actors set to + /// `Inactive` on the given parallel units. pub fn new( table_id: TableId, fragments: BTreeMap, - env: ProstStreamEnvironment, + actor_locations: &BTreeMap, + env: StreamEnvironment, ) -> Self { + let actor_status = actor_locations + .iter() + .map(|(&actor_id, parallel_unit)| { + ( + actor_id, + ActorStatus { + parallel_unit: Some(parallel_unit.clone()), + state: ActorState::Inactive as i32, + }, + ) + }) + .collect(); + Self { table_id, state: State::Initial, fragments, - actor_status: BTreeMap::default(), + actor_status, actor_splits: HashMap::default(), - env: StreamEnvironment::from_protobuf(env), + env, } } @@ -146,11 +171,6 @@ impl TableFragments { self.fragments.values().collect_vec() } - /// Set the actor locations. - pub fn set_actor_status(&mut self, actor_status: BTreeMap) { - self.actor_status = actor_status; - } - /// Returns the table id. pub fn table_id(&self) -> TableId { self.table_id @@ -179,13 +199,19 @@ impl TableFragments { /// Returns mview fragment vnode mapping. /// Note that: the sink fragment is also stored as `TableFragments`, it's possible that /// there's no fragment with `FragmentTypeFlag::Mview` exists. - pub fn mview_vnode_mapping(&self) -> Option { + pub fn mview_vnode_mapping(&self) -> Option<(FragmentId, ParallelUnitMapping)> { self.fragments .values() .find(|fragment| { (fragment.get_fragment_type_mask() & FragmentTypeFlag::Mview as u32) != 0 }) - .and_then(|fragment| fragment.vnode_mapping.clone()) + .map(|fragment| { + ( + fragment.fragment_id, + // vnode mapping is always `Some`, even for singletons. + fragment.vnode_mapping.clone().unwrap(), + ) + }) } /// Update state of all actors @@ -239,6 +265,16 @@ impl TableFragments { }) } + /// Returns the fragment with the `Mview` type flag. + pub fn mview_fragment(&self) -> Option { + self.fragments + .values() + .find(|fragment| { + (fragment.get_fragment_type_mask() & FragmentTypeFlag::Mview as u32) != 0 + }) + .cloned() + } + /// Returns actors that contains Chain node. pub fn chain_actor_ids(&self) -> HashSet { Self::filter_actor_ids(self, |fragment_type_mask| { @@ -248,25 +284,6 @@ impl TableFragments { .collect() } - /// Returns fragments that contains Chain node. - pub fn chain_fragment_ids(&self) -> HashSet { - self.fragments - .values() - .filter(|fragment| { - (fragment.get_fragment_type_mask() & FragmentTypeFlag::ChainNode as u32) != 0 - }) - .map(|f| f.fragment_id) - .collect() - } - - pub fn fetch_parallel_unit_by_actor(&self, actor_id: &ActorId) -> Option { - if let Some(status) = self.actor_status.get(actor_id) { - status.parallel_unit.clone() - } else { - None - } - } - /// Find the source node that contains an external stream source inside the stream node, if any. pub fn find_source_node_with_stream_source(stream_node: &StreamNode) -> Option<&SourceNode> { if let Some(NodeBody::Source(source)) = stream_node.node_body.as_ref() { @@ -352,15 +369,6 @@ impl TableFragments { map } - pub fn actor_to_worker(&self) -> HashMap { - let mut map = HashMap::default(); - for (&actor_id, actor_status) in &self.actor_status { - let node_id = actor_status.get_parallel_unit().unwrap().worker_node_id as WorkerId; - map.insert(actor_id, node_id); - } - map - } - pub fn update_vnode_mapping(&mut self, migrate_map: &HashMap) { for fragment in self.fragments.values_mut() { if fragment.vnode_mapping.is_some() { @@ -423,128 +431,6 @@ impl TableFragments { actor_map } - /// Returns fragment vnode mapping. - pub fn fragment_vnode_mapping(&self, fragment_id: FragmentId) -> Option { - if let Some(fragment) = self.fragments.get(&fragment_id) { - fragment.vnode_mapping.clone() - } else { - None - } - } - - /// Returns mview actor vnode bitmap infos. - pub fn mview_vnode_bitmap_info(&self) -> Vec<(ActorId, Option)> { - self.fragments - .values() - .filter(|fragment| { - (fragment.get_fragment_type_mask() & FragmentTypeFlag::Mview as u32) != 0 - }) - .flat_map(|fragment| { - fragment - .actors - .iter() - .map(|actor| (actor.actor_id, actor.vnode_bitmap.clone())) - }) - .collect_vec() - } - - pub fn mview_actor_parallel_units(&self) -> BTreeMap { - let sink_actor_ids = self.mview_actor_ids(); - sink_actor_ids - .iter() - .map(|actor_id| { - ( - *actor_id, - self.actor_status[actor_id] - .get_parallel_unit() - .unwrap() - .clone(), - ) - }) - .collect() - } - - /// Generate topological order of fragments. If `index(a) < index(b)` in vec, then a is the - /// downstream of b. - pub fn generate_topological_order(&self) -> Vec { - let mut actionable_fragment_id = VecDeque::new(); - - // If downstream_edges[x][y] exists, then there's an edge from x to y. - let mut downstream_edges: HashMap> = HashMap::new(); - - // Counts how many upstreams are there for a given fragment - let mut upstream_cnts: HashMap = HashMap::new(); - - let mut result = vec![]; - - let mut actor_to_fragment_mapping = HashMap::new(); - - // Firstly, record actor -> fragment mapping - for (fragment_id, fragment) in &self.fragments { - for actor in &fragment.actors { - let ret = actor_to_fragment_mapping.insert(actor.actor_id, *fragment_id); - assert!(ret.is_none(), "duplicated actor id found"); - } - } - - // Then, generate the DAG of fragments - for (fragment_id, fragment) in &self.fragments { - for upstream_actor in &fragment.actors { - for dispatcher in &upstream_actor.dispatcher { - for downstream_actor in &dispatcher.downstream_actor_id { - let downstream_fragment_id = - actor_to_fragment_mapping.get(downstream_actor).unwrap(); - - let did_not_have = downstream_edges - .entry(*fragment_id) - .or_default() - .insert(*downstream_fragment_id); - - if did_not_have { - *upstream_cnts.entry(*downstream_fragment_id).or_default() += 1; - } - } - } - } - } - - // Find actionable fragments - for fragment_id in self.fragments.keys() { - if upstream_cnts.get(fragment_id).is_none() { - actionable_fragment_id.push_back(*fragment_id); - } - } - - // After that, we can generate topological order - while let Some(fragment_id) = actionable_fragment_id.pop_front() { - result.push(fragment_id); - - // Find if we can process more fragments - if let Some(downstreams) = downstream_edges.get(&fragment_id) { - for downstream_id in downstreams.iter() { - let cnt = upstream_cnts - .get_mut(downstream_id) - .expect("the downstream should exist"); - - *cnt -= 1; - if *cnt == 0 { - upstream_cnts.remove(downstream_id); - actionable_fragment_id.push_back(*downstream_id); - } - } - } - } - - if !upstream_cnts.is_empty() { - // There are fragments that are not processed yet. - panic!("not a DAG"); - } - - assert_eq!(result.len(), self.fragments.len()); - - result - } - /// Returns the internal table ids without the mview table. pub fn internal_table_ids(&self) -> Vec { self.fragments diff --git a/src/meta/src/model/user.rs b/src/meta/src/model/user.rs index feab577ef26d6..b77f0aa16942f 100644 --- a/src/meta/src/model/user.rs +++ b/src/meta/src/model/user.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/rpc/election_client.rs b/src/meta/src/rpc/election_client.rs index 266ded6ece9d0..b54b53abd4e7b 100644 --- a/src/meta/src/rpc/election_client.rs +++ b/src/meta/src/rpc/election_client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,10 +13,10 @@ // limitations under the License. use std::borrow::BorrowMut; +use std::collections::HashSet; use std::time::Duration; use etcd_client::{Client, ConnectOptions, Error, GetOptions}; -use risingwave_pb::meta::MetaLeaderInfo; use tokio::sync::watch::Receiver; use tokio::sync::{oneshot, watch}; use tokio::time; @@ -28,33 +28,24 @@ const META_ELECTION_KEY: &str = "__meta_election_"; pub struct ElectionMember { pub id: String, - pub lease: i64, -} - -impl From for MetaLeaderInfo { - fn from(val: ElectionMember) -> Self { - let ElectionMember { id, lease } = val; - MetaLeaderInfo { - node_address: id, - lease_id: lease as u64, - } - } + pub is_leader: bool, } #[async_trait::async_trait] pub trait ElectionClient: Send + Sync + 'static { fn id(&self) -> MetaResult; - async fn run_once(&self, ttl: i64, stop: watch::Receiver<()>) -> MetaResult<()>; - fn subscribe(&self) -> watch::Receiver; + async fn run_once(&self, ttl: i64, stop: Receiver<()>) -> MetaResult<()>; + fn subscribe(&self) -> Receiver; async fn leader(&self) -> MetaResult>; async fn get_members(&self) -> MetaResult>; async fn is_leader(&self) -> bool; } pub struct EtcdElectionClient { - client: Client, id: String, is_leader_sender: watch::Sender, + endpoints: Vec, + options: Option, } #[async_trait::async_trait] @@ -64,7 +55,7 @@ impl ElectionClient for EtcdElectionClient { } async fn leader(&self) -> MetaResult> { - let mut election_client = self.client.election_client(); + let mut election_client = self.client().await?.election_client(); let leader = election_client.leader(META_ELECTION_KEY).await; let leader = match leader { @@ -76,18 +67,17 @@ impl ElectionClient for EtcdElectionClient { Ok(leader.and_then(|leader| { leader.kv().map(|leader_kv| ElectionMember { id: String::from_utf8_lossy(leader_kv.value()).to_string(), - lease: leader_kv.lease(), + is_leader: true, }) })) } async fn run_once(&self, ttl: i64, stop: watch::Receiver<()>) -> MetaResult<()> { - let mut lease_client = self.client.lease_client(); - let mut election_client = self.client.election_client(); + let client = self.client().await?; + let mut lease_client = client.lease_client(); + let mut election_client = client.election_client(); let mut stop = stop; - self.is_leader_sender.send_replace(false); - tracing::info!("client {} start election", self.id); // is restored leader from previous session? @@ -136,7 +126,7 @@ impl ElectionClient for EtcdElectionClient { let (keep_alive_fail_tx, mut keep_alive_fail_rx) = oneshot::channel(); - let mut lease_client = self.client.lease_client(); + let mut lease_client = client.lease_client(); let mut stop_ = stop.clone(); @@ -156,15 +146,25 @@ impl ElectionClient for EtcdElectionClient { let mut ticker = time::interval(Duration::from_secs(1)); + // timeout controller, when keep alive fails for more than a certain period of time + // before it is considered a complete failure + let mut timeout = time::interval(Duration::from_secs((ttl / 2) as u64)); + timeout.reset(); + loop { tokio::select! { biased; + _ = timeout.tick() => { + tracing::warn!("lease {} keep alive timeout", lease_id); + keep_alive_fail_tx.send(()).unwrap(); + break; + } + _ = ticker.tick() => { if let Err(err) = keeper.keep_alive().await { - tracing::error!("keep alive for lease {} failed {}", lease_id, err); - keep_alive_fail_tx.send(()).unwrap(); - break; + tracing::warn!("keep alive for lease {} failed {}", lease_id, err); + continue } match resp_stream.message().await { @@ -174,16 +174,23 @@ impl ElectionClient for EtcdElectionClient { keep_alive_fail_tx.send(()).unwrap(); break; } + + timeout.reset(); }, Ok(None) => { tracing::warn!("lease keeper for lease {} response stream closed unexpected", lease_id); - keep_alive_fail_tx.send(()).unwrap(); - break; + + // try to re-create lease keeper, with timeout as ttl / 2 + if let Ok(Ok((keeper_, resp_stream_))) = time::timeout(Duration::from_secs((ttl / 2) as u64), lease_client.keep_alive(lease_id)).await { + keeper = keeper_; + resp_stream = resp_stream_; + }; + + continue; } Err(e) => { tracing::error!("lease keeper failed {}", e.to_string()); - keep_alive_fail_tx.send(()).unwrap(); - break; + continue; } }; } @@ -200,6 +207,7 @@ impl ElectionClient for EtcdElectionClient { let _guard = scopeguard::guard(handle, |handle| handle.abort()); if !restored_leader { + self.is_leader_sender.send_replace(false); tracing::info!("no restored leader, campaigning"); tokio::select! { biased; @@ -216,10 +224,10 @@ impl ElectionClient for EtcdElectionClient { }; } - let mut observe_stream = election_client.observe(META_ELECTION_KEY).await?; - self.is_leader_sender.send_replace(true); + let mut observe_stream = election_client.observe(META_ELECTION_KEY).await?; + loop { tokio::select! { biased; @@ -233,7 +241,15 @@ impl ElectionClient for EtcdElectionClient { }, resp = observe_stream.next() => { match resp { - None => unreachable!(), + None => { + tracing::warn!("observe stream closed unexpected, recreating"); + + // try to re-create observe stream, with timeout as ttl / 2 + if let Ok(Ok(stream)) = time::timeout(Duration::from_secs((ttl / 2) as u64), election_client.observe(META_ELECTION_KEY)).await { + observe_stream = stream; + tracing::info!("recreating observe stream"); + } + } Some(Ok(leader)) => { if let Some(kv) = leader.kv() && kv.value() != self.id.as_bytes() { tracing::warn!("leader has been changed to {}", String::from_utf8_lossy(kv.value()).to_string()); @@ -241,8 +257,8 @@ impl ElectionClient for EtcdElectionClient { } } Some(Err(e)) => { - tracing::error!("error {} received from leader observe stream", e.to_string()); - break; + tracing::warn!("error {} received from leader observe stream", e.to_string()); + continue } } } @@ -257,20 +273,35 @@ impl ElectionClient for EtcdElectionClient { } async fn get_members(&self) -> MetaResult> { - let mut client = self.client.kv_client(); + let mut client = self.client().await?.kv_client(); let keys = client .get(META_ELECTION_KEY, Some(GetOptions::new().with_prefix())) .await?; - // todo, sort by revision - Ok(keys + let member_ids: HashSet<_> = keys .kvs() .iter() - .map(|kv| ElectionMember { - id: String::from_utf8_lossy(kv.value()).to_string(), - lease: kv.lease(), - }) - .collect()) + .map(|kv| String::from_utf8_lossy(kv.value()).to_string()) + .collect(); + + let members = match self.leader().await? { + Some(leader) => member_ids + .into_iter() + .map(|id| { + let is_leader = id == leader.id; + ElectionMember { id, is_leader } + }) + .collect(), + None => member_ids + .into_iter() + .map(|id| ElectionMember { + id, + is_leader: false, + }) + .collect(), + }; + + Ok(members) } fn id(&self) -> MetaResult { @@ -283,19 +314,19 @@ impl ElectionClient for EtcdElectionClient { } impl EtcdElectionClient { - pub(crate) async fn new( - endpoints: Vec, - options: Option, - id: String, - ) -> MetaResult { - let client = Client::connect(&endpoints, options.clone()).await?; - + pub(crate) fn new(endpoints: Vec, options: Option, id: String) -> Self { let (sender, _) = watch::channel(false); - Ok(Self { - client, + Self { + endpoints, + options, id, is_leader_sender: sender, - }) + } + } + + async fn client(&self) -> MetaResult { + let client = Client::connect(self.endpoints.clone(), self.options.clone()).await?; + Ok(client) } } @@ -305,11 +336,12 @@ mod tests { use std::sync::Arc; use std::time::Duration; + use etcd_client::GetOptions; use itertools::Itertools; use tokio::sync::watch; use tokio::time; - use crate::rpc::election_client::{ElectionClient, EtcdElectionClient}; + use crate::rpc::election_client::{ElectionClient, EtcdElectionClient, META_ELECTION_KEY}; #[tokio::test] async fn test_election() { @@ -325,15 +357,11 @@ mod tests { let (stop_sender, stop_receiver) = watch::channel(()); clients.push(( stop_sender, - Arc::new( - EtcdElectionClient::new( - vec!["localhost:2388".to_string()], - None, - format!("client_{}", i).to_string(), - ) - .await - .unwrap(), - ), + Arc::new(EtcdElectionClient::new( + vec!["localhost:2388".to_string()], + None, + format!("client_{}", i).to_string(), + )), )); } @@ -396,11 +424,25 @@ mod tests { assert_eq!(election_leader.id, leader.1.id().unwrap()); - let lease_id = election_leader.lease; - let client = etcd_client::Client::connect(&vec!["localhost:2388"], None) .await .unwrap(); + + let kvs = client + .kv_client() + .get(META_ELECTION_KEY, Some(GetOptions::new().with_prefix())) + .await + .unwrap(); + + let leader_kv = kvs + .kvs() + .iter() + .find(|kv| kv.value() == election_leader.id.as_bytes()) + .cloned() + .unwrap(); + + let lease_id = leader_kv.lease(); + client.lease_client().revoke(lease_id).await.unwrap(); time::sleep(Duration::from_secs(10)).await; diff --git a/src/meta/src/rpc/follower_svc.rs b/src/meta/src/rpc/follower_svc.rs deleted file mode 100644 index 6801e45fd389a..0000000000000 --- a/src/meta/src/rpc/follower_svc.rs +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2023 Singularity Data -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use risingwave_pb::health::health_server::HealthServer; -use risingwave_pb::leader::leader_service_server::LeaderServiceServer; -use risingwave_pb::meta::MetaLeaderInfo; -use tokio::sync::oneshot::Receiver as OneReceiver; -use tokio::sync::watch::Receiver as WatchReceiver; - -use super::intercept::MetricsMiddlewareLayer; -use super::server::AddressInfo; -use super::service::health_service::HealthServiceImpl; -use crate::rpc::metrics::MetaMetrics; -use crate::rpc::server::ElectionClientRef; -use crate::rpc::service::leader_service::LeaderServiceImpl; - -/// Starts all services needed for the meta follower node -pub async fn start_follower_srv( - mut svc_shutdown_rx: WatchReceiver<()>, - follower_shutdown_rx: OneReceiver<()>, - address_info: AddressInfo, - election_client: Option, -) { - let leader_srv = LeaderServiceImpl::new( - election_client, - MetaLeaderInfo { - node_address: address_info.listen_addr.to_string(), - lease_id: 0, - }, - ); - - let health_srv = HealthServiceImpl::new(); - tonic::transport::Server::builder() - .layer(MetricsMiddlewareLayer::new(Arc::new(MetaMetrics::new()))) - .add_service(LeaderServiceServer::new(leader_srv)) - .add_service(HealthServer::new(health_srv)) - .serve_with_shutdown(address_info.listen_addr, async move { - tokio::select! { - _ = tokio::signal::ctrl_c() => {}, - // shutdown service if all services should be shut down - res = svc_shutdown_rx.changed() => { - match res { - Ok(_) => tracing::info!("Shutting down services"), - Err(_) => tracing::error!("Service shutdown sender dropped") - } - }, - // shutdown service if follower becomes leader - res = follower_shutdown_rx => { - match res { - Ok(_) => tracing::info!("Shutting down follower services"), - Err(_) => tracing::error!("Follower service shutdown sender dropped") - } - }, - } - }) - .await - .unwrap(); -} diff --git a/src/meta/src/rpc/intercept.rs b/src/meta/src/rpc/intercept.rs index 23352ef3edf12..849454aab65ea 100644 --- a/src/meta/src/rpc/intercept.rs +++ b/src/meta/src/rpc/intercept.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/rpc/leader_svc.rs b/src/meta/src/rpc/leader_svc.rs deleted file mode 100644 index 8bdc5783cfe3e..0000000000000 --- a/src/meta/src/rpc/leader_svc.rs +++ /dev/null @@ -1,336 +0,0 @@ -// Copyright 2023 Singularity Data -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; -use std::time::Duration; - -use risingwave_backup::storage::ObjectStoreMetaSnapshotStorage; -use risingwave_common::monitor::process_linux::monitor_process; -use risingwave_common_service::metrics_manager::MetricsManager; -use risingwave_object_store::object::object_metrics::ObjectStoreMetrics; -use risingwave_object_store::object::parse_remote_object_store; -use risingwave_pb::backup_service::backup_service_server::BackupServiceServer; -use risingwave_pb::ddl_service::ddl_service_server::DdlServiceServer; -use risingwave_pb::health::health_server::HealthServer; -use risingwave_pb::hummock::hummock_manager_service_server::HummockManagerServiceServer; -use risingwave_pb::leader::leader_service_server::LeaderServiceServer; -use risingwave_pb::meta::cluster_service_server::ClusterServiceServer; -use risingwave_pb::meta::heartbeat_service_server::HeartbeatServiceServer; -use risingwave_pb::meta::notification_service_server::NotificationServiceServer; -use risingwave_pb::meta::scale_service_server::ScaleServiceServer; -use risingwave_pb::meta::stream_manager_service_server::StreamManagerServiceServer; -use risingwave_pb::meta::MetaLeaderInfo; -use risingwave_pb::user::user_service_server::UserServiceServer; -use tokio::sync::watch::Receiver as WatchReceiver; - -use super::intercept::MetricsMiddlewareLayer; -use super::service::health_service::HealthServiceImpl; -use super::service::notification_service::NotificationServiceImpl; -use super::service::scale_service::ScaleServiceImpl; -use super::DdlServiceImpl; -use crate::backup_restore::BackupManager; -use crate::barrier::{BarrierScheduler, GlobalBarrierManager}; -use crate::hummock::{CompactionScheduler, HummockManager}; -use crate::manager::{ - CatalogManager, ClusterManager, FragmentManager, IdleManager, MetaOpts, MetaSrvEnv, -}; -use crate::rpc::metrics::MetaMetrics; -use crate::rpc::server::{AddressInfo, ElectionClientRef}; -use crate::rpc::service::backup_service::BackupServiceImpl; -use crate::rpc::service::cluster_service::ClusterServiceImpl; -use crate::rpc::service::heartbeat_service::HeartbeatServiceImpl; -use crate::rpc::service::hummock_service::HummockServiceImpl; -use crate::rpc::service::leader_service::LeaderServiceImpl; -use crate::rpc::service::stream_service::StreamServiceImpl; -use crate::rpc::service::user_service::UserServiceImpl; -use crate::storage::MetaStore; -use crate::stream::{GlobalStreamManager, SourceManager}; -use crate::{hummock, MetaResult}; - -/// Starts all services needed for the meta leader node -/// Only call this function once, since initializing the services multiple times will result in an -/// inconsistent state -/// -/// ## Returns -/// Returns an error if the service initialization failed -pub async fn start_leader_srv( - meta_store: Arc, - address_info: AddressInfo, - max_heartbeat_interval: Duration, - opts: MetaOpts, - current_leader: MetaLeaderInfo, - election_client: Option, - mut svc_shutdown_rx: WatchReceiver<()>, -) -> MetaResult<()> { - tracing::info!("Defining leader services"); - let prometheus_endpoint = opts.prometheus_endpoint.clone(); - let env = MetaSrvEnv::::new(opts, meta_store.clone(), current_leader.clone()).await; - let fragment_manager = Arc::new(FragmentManager::new(env.clone()).await.unwrap()); - let meta_metrics = Arc::new(MetaMetrics::new()); - let registry = meta_metrics.registry(); - monitor_process(registry).unwrap(); - - let cluster_manager = Arc::new( - ClusterManager::new(env.clone(), max_heartbeat_interval) - .await - .unwrap(), - ); - let heartbeat_srv = HeartbeatServiceImpl::new(cluster_manager.clone()); - - let compactor_manager = Arc::new( - hummock::CompactorManager::with_meta(env.clone(), max_heartbeat_interval.as_secs()) - .await - .unwrap(), - ); - - let hummock_manager = hummock::HummockManager::new( - env.clone(), - cluster_manager.clone(), - meta_metrics.clone(), - compactor_manager.clone(), - ) - .await - .unwrap(); - - #[cfg(not(madsim))] - if let Some(ref dashboard_addr) = address_info.dashboard_addr { - let dashboard_service = crate::dashboard::DashboardService { - dashboard_addr: *dashboard_addr, - cluster_manager: cluster_manager.clone(), - fragment_manager: fragment_manager.clone(), - meta_store: env.meta_store_ref(), - prometheus_endpoint: prometheus_endpoint.clone(), - prometheus_client: prometheus_endpoint.as_ref().map(|x| { - use std::str::FromStr; - prometheus_http_query::Client::from_str(x).unwrap() - }), - }; - // TODO: join dashboard service back to local thread. - tokio::spawn(dashboard_service.serve(address_info.ui_path)); - } - - let catalog_manager = Arc::new(CatalogManager::new(env.clone()).await.unwrap()); - - let (barrier_scheduler, scheduled_barriers) = - BarrierScheduler::new_pair(hummock_manager.clone(), env.opts.checkpoint_frequency); - - let source_manager = Arc::new( - SourceManager::new( - env.opts.connector_rpc_endpoint.clone(), - barrier_scheduler.clone(), - catalog_manager.clone(), - fragment_manager.clone(), - ) - .await - .unwrap(), - ); - - let barrier_manager = Arc::new(GlobalBarrierManager::new( - scheduled_barriers, - env.clone(), - cluster_manager.clone(), - catalog_manager.clone(), - fragment_manager.clone(), - hummock_manager.clone(), - source_manager.clone(), - meta_metrics.clone(), - )); - - { - let source_manager = source_manager.clone(); - tokio::spawn(async move { - source_manager.run().await.unwrap(); - }); - } - - let stream_manager = Arc::new( - GlobalStreamManager::new( - env.clone(), - fragment_manager.clone(), - barrier_scheduler.clone(), - cluster_manager.clone(), - source_manager.clone(), - hummock_manager.clone(), - ) - .unwrap(), - ); - - hummock_manager - .purge_stale( - &fragment_manager - .list_table_fragments() - .await - .expect("list_table_fragments"), - ) - .await - .unwrap(); - - // Initialize services. - let backup_object_store = Arc::new( - parse_remote_object_store( - &env.opts.backup_storage_url, - Arc::new(ObjectStoreMetrics::unused()), - true, - "Meta Backup", - ) - .await, - ); - let backup_storage = Arc::new( - ObjectStoreMetaSnapshotStorage::new( - &env.opts.backup_storage_directory, - backup_object_store, - ) - .await?, - ); - let backup_manager = Arc::new(BackupManager::new( - env.clone(), - hummock_manager.clone(), - backup_storage, - meta_metrics.registry().clone(), - )); - let vacuum_manager = Arc::new(hummock::VacuumManager::new( - env.clone(), - hummock_manager.clone(), - backup_manager.clone(), - compactor_manager.clone(), - )); - - let ddl_srv = DdlServiceImpl::::new( - env.clone(), - catalog_manager.clone(), - stream_manager.clone(), - source_manager.clone(), - cluster_manager.clone(), - fragment_manager.clone(), - barrier_manager.clone(), - ); - - let user_srv = UserServiceImpl::::new(env.clone(), catalog_manager.clone()); - - let scale_srv = ScaleServiceImpl::::new( - barrier_scheduler.clone(), - fragment_manager.clone(), - cluster_manager.clone(), - source_manager, - catalog_manager.clone(), - stream_manager.clone(), - ); - - let cluster_srv = ClusterServiceImpl::::new(cluster_manager.clone()); - let stream_srv = StreamServiceImpl::::new( - env.clone(), - barrier_scheduler.clone(), - fragment_manager.clone(), - ); - let hummock_srv = HummockServiceImpl::new( - hummock_manager.clone(), - compactor_manager.clone(), - vacuum_manager.clone(), - fragment_manager.clone(), - ); - let notification_srv = NotificationServiceImpl::new( - env.clone(), - catalog_manager, - cluster_manager.clone(), - hummock_manager.clone(), - fragment_manager.clone(), - backup_manager.clone(), - ); - let health_srv = HealthServiceImpl::new(); - let backup_srv = BackupServiceImpl::new(backup_manager); - - if let Some(prometheus_addr) = address_info.prometheus_addr { - MetricsManager::boot_metrics_service( - prometheus_addr.to_string(), - meta_metrics.registry().clone(), - ) - } - - let compaction_scheduler = Arc::new(CompactionScheduler::new( - env.clone(), - hummock_manager.clone(), - compactor_manager.clone(), - )); - - // sub_tasks executed concurrently. Can be shutdown via shutdown_all - let mut sub_tasks = - hummock::start_hummock_workers(vacuum_manager, compaction_scheduler, &env.opts); - sub_tasks.push( - ClusterManager::start_worker_num_monitor( - cluster_manager.clone(), - Duration::from_secs(env.opts.node_num_monitor_interval_sec), - meta_metrics.clone(), - ) - .await, - ); - sub_tasks.push(HummockManager::start_compaction_heartbeat(hummock_manager).await); - - if cfg!(not(test)) { - sub_tasks.push( - ClusterManager::start_heartbeat_checker(cluster_manager, Duration::from_secs(1)).await, - ); - sub_tasks.push(GlobalBarrierManager::start(barrier_manager).await); - } - let (idle_send, idle_recv) = tokio::sync::oneshot::channel(); - sub_tasks.push( - IdleManager::start_idle_checker(env.idle_manager_ref(), Duration::from_secs(30), idle_send) - .await, - ); - - let shutdown_all = async move { - for (join_handle, shutdown_sender) in sub_tasks { - if let Err(_err) = shutdown_sender.send(()) { - // Maybe it is already shut down - continue; - } - if let Err(err) = join_handle.await { - tracing::warn!("Failed to join shutdown: {:?}", err); - } - } - }; - - let leader_srv = LeaderServiceImpl::new(election_client, current_leader); - - tonic::transport::Server::builder() - .layer(MetricsMiddlewareLayer::new(meta_metrics)) - .add_service(HeartbeatServiceServer::new(heartbeat_srv)) - .add_service(ClusterServiceServer::new(cluster_srv)) - .add_service(StreamManagerServiceServer::new(stream_srv)) - .add_service(HummockManagerServiceServer::new(hummock_srv)) - .add_service(NotificationServiceServer::new(notification_srv)) - .add_service(LeaderServiceServer::new(leader_srv)) - .add_service(DdlServiceServer::new(ddl_srv)) - .add_service(UserServiceServer::new(user_srv)) - .add_service(ScaleServiceServer::new(scale_srv)) - .add_service(HealthServer::new(health_srv)) - .add_service(BackupServiceServer::new(backup_srv)) - .serve_with_shutdown(address_info.listen_addr, async move { - tokio::select! { - _ = tokio::signal::ctrl_c() => {}, - res = svc_shutdown_rx.changed() => { - match res { - Ok(_) => tracing::info!("Shutting down services"), - Err(_) => tracing::error!("Service shutdown receiver dropped") - } - shutdown_all.await; - }, - _ = idle_recv => { - shutdown_all.await; - }, - } - }) - .await - .unwrap(); - Ok(()) -} diff --git a/src/meta/src/rpc/metrics.rs b/src/meta/src/rpc/metrics.rs index c6dbd49440bc1..ea722f6def714 100644 --- a/src/meta/src/rpc/metrics.rs +++ b/src/meta/src/rpc/metrics.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -66,6 +66,9 @@ pub struct MetaMetrics { pub min_safepoint_version_id: IntGauge, /// Hummock version stats pub version_stats: IntGaugeVec, + /// Total number of SSTs that is no longer referenced by versions but is not yet deleted from + /// storage. + pub stale_ssts_count: IntGauge, /// Latency for hummock manager to acquire lock pub hummock_manager_lock_time: HistogramVec, @@ -163,7 +166,7 @@ impl MetaMetrics { let compact_frequency = register_int_counter_vec_with_registry!( "storage_level_compact_frequency", "num of compactions from each level to next level", - &["compactor", "group", "result"], + &["compactor", "group", "task_type", "result"], registry ) .unwrap(); @@ -223,6 +226,12 @@ impl MetaMetrics { ) .unwrap(); + let stale_ssts_count = register_int_gauge_with_registry!( + "storage_stale_ssts_count", + "total number of SSTs that is no longer referenced by versions but is not yet deleted from storage", + registry + ).unwrap(); + let hummock_manager_lock_time = register_histogram_vec_with_registry!( "hummock_manager_lock_time", "latency for hummock manager to acquire the rwlock", @@ -267,6 +276,7 @@ impl MetaMetrics { level_file_size, version_size, version_stats, + stale_ssts_count, current_version_id, checkpoint_version_id, min_pinned_version_id, diff --git a/src/meta/src/rpc/mod.rs b/src/meta/src/rpc/mod.rs index 1cbad958dc0d6..a5afb8ee6dc43 100644 --- a/src/meta/src/rpc/mod.rs +++ b/src/meta/src/rpc/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,9 +13,7 @@ // limitations under the License. mod election_client; -mod follower_svc; mod intercept; -mod leader_svc; pub mod metrics; pub mod server; mod service; diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index a61f60468f48b..9d2e1384a6ea9 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,19 +16,55 @@ use std::net::SocketAddr; use std::sync::Arc; use std::time::Duration; +use either::Either; use etcd_client::ConnectOptions; -use risingwave_pb::meta::MetaLeaderInfo; -use tokio::sync::oneshot::channel as OneChannel; +use risingwave_backup::storage::ObjectStoreMetaSnapshotStorage; +use risingwave_common::monitor::process_linux::monitor_process; +use risingwave_common_service::metrics_manager::MetricsManager; +use risingwave_object_store::object::object_metrics::ObjectStoreMetrics; +use risingwave_object_store::object::parse_remote_object_store; +use risingwave_pb::backup_service::backup_service_server::BackupServiceServer; +use risingwave_pb::ddl_service::ddl_service_server::DdlServiceServer; +use risingwave_pb::health::health_server::HealthServer; +use risingwave_pb::hummock::hummock_manager_service_server::HummockManagerServiceServer; +use risingwave_pb::meta::cluster_service_server::ClusterServiceServer; +use risingwave_pb::meta::heartbeat_service_server::HeartbeatServiceServer; +use risingwave_pb::meta::meta_member_service_server::MetaMemberServiceServer; +use risingwave_pb::meta::notification_service_server::NotificationServiceServer; +use risingwave_pb::meta::scale_service_server::ScaleServiceServer; +use risingwave_pb::meta::stream_manager_service_server::StreamManagerServiceServer; +use risingwave_pb::meta::system_params_service_server::SystemParamsServiceServer; +use risingwave_pb::user::user_service_server::UserServiceServer; +use tokio::sync::oneshot::{channel as OneChannel, Receiver as OneReceiver}; use tokio::sync::watch; -use tokio::sync::watch::Sender as WatchSender; +use tokio::sync::watch::{Receiver as WatchReceiver, Sender as WatchSender}; use tokio::task::JoinHandle; -use super::follower_svc::start_follower_srv; -use crate::manager::MetaOpts; +use super::intercept::MetricsMiddlewareLayer; +use super::service::health_service::HealthServiceImpl; +use super::service::notification_service::NotificationServiceImpl; +use super::service::scale_service::ScaleServiceImpl; +use super::DdlServiceImpl; +use crate::backup_restore::BackupManager; +use crate::barrier::{BarrierScheduler, GlobalBarrierManager}; +use crate::hummock::{CompactionScheduler, HummockManager}; +use crate::manager::{ + CatalogManager, ClusterManager, FragmentManager, IdleManager, MetaOpts, MetaSrvEnv, + SystemParamManager, +}; use crate::rpc::election_client::{ElectionClient, EtcdElectionClient}; -use crate::rpc::leader_svc::start_leader_srv; +use crate::rpc::metrics::MetaMetrics; +use crate::rpc::service::backup_service::BackupServiceImpl; +use crate::rpc::service::cluster_service::ClusterServiceImpl; +use crate::rpc::service::heartbeat_service::HeartbeatServiceImpl; +use crate::rpc::service::hummock_service::HummockServiceImpl; +use crate::rpc::service::meta_member_service::MetaMemberServiceImpl; +use crate::rpc::service::stream_service::StreamServiceImpl; +use crate::rpc::service::system_params_service::SystemParamsServiceImpl; +use crate::rpc::service::user_service::UserServiceImpl; use crate::storage::{EtcdMetaStore, MemStore, MetaStore, WrappedEtcdClient as EtcdClient}; -use crate::MetaResult; +use crate::stream::{GlobalStreamManager, SourceManager}; +use crate::{hummock, MetaResult}; #[derive(Debug)] pub enum MetaStoreBackend { @@ -41,8 +77,7 @@ pub enum MetaStoreBackend { #[derive(Clone)] pub struct AddressInfo { - pub endpoint: String, - pub addr: String, + pub advertise_addr: String, pub listen_addr: SocketAddr, pub prometheus_addr: Option, pub dashboard_addr: Option, @@ -52,8 +87,7 @@ pub struct AddressInfo { impl Default for AddressInfo { fn default() -> Self { Self { - endpoint: "".to_string(), - addr: "127.0.0.1:0000".to_string(), + advertise_addr: "".to_string(), listen_addr: SocketAddr::V4("127.0.0.1:0000".parse().unwrap()), prometheus_addr: None, dashboard_addr: None, @@ -90,10 +124,11 @@ pub async fn rpc_serve( .map_err(|e| anyhow::anyhow!("failed to connect etcd {}", e))?; let meta_store = Arc::new(EtcdMetaStore::new(client)); - let election_client = Arc::new( - EtcdElectionClient::new(endpoints, Some(options), address_info.endpoint.clone()) - .await?, - ); + let election_client = Arc::new(EtcdElectionClient::new( + endpoints, + Some(options), + address_info.advertise_addr.clone(), + )); rpc_serve_with_store( meta_store, @@ -149,16 +184,28 @@ pub async fn rpc_serve_with_store( let join_handle = tokio::spawn(async move { if let Some(election_client) = election_client.clone() { - let mut state_watcher = election_client.subscribe(); - let svc_shutdown_rx_clone = svc_shutdown_rx.clone(); + let mut is_leader_watcher = election_client.subscribe(); + let mut svc_shutdown_rx_clone = svc_shutdown_rx.clone(); let (follower_shutdown_tx, follower_shutdown_rx) = OneChannel::<()>(); - let follower_handle: Option> = if !*state_watcher.borrow() { + + tokio::select! { + _ = svc_shutdown_rx_clone.changed() => return, + res = is_leader_watcher.changed() => { + if let Err(err) = res { + tracing::error!("leader watcher recv failed {}", err.to_string()); + } + } + } + let svc_shutdown_rx_clone = svc_shutdown_rx.clone(); + + // If not the leader, spawn a follower. + let follower_handle: Option> = if !*is_leader_watcher.borrow() { let address_info_clone = address_info.clone(); let election_client_ = election_client.clone(); Some(tokio::spawn(async move { let _ = tracing::span!(tracing::Level::INFO, "follower services").enter(); - start_follower_srv( + start_service_as_election_follower( svc_shutdown_rx_clone, follower_shutdown_rx, address_info_clone, @@ -170,9 +217,17 @@ pub async fn rpc_serve_with_store( None }; - while !*state_watcher.borrow_and_update() { - if let Err(e) = state_watcher.changed().await { - tracing::error!("state watcher recv failed {}", e.to_string()); + let mut svc_shutdown_rx_clone = svc_shutdown_rx.clone(); + while !*is_leader_watcher.borrow_and_update() { + tokio::select! { + _ = svc_shutdown_rx_clone.changed() => { + return; + } + res = is_leader_watcher.changed() => { + if let Err(err) = res { + tracing::error!("leader watcher recv failed {}", err.to_string()); + } + } } } @@ -182,21 +237,11 @@ pub async fn rpc_serve_with_store( } }; - let current_leader = if let Some(election_client) = election_client.as_ref() { - election_client.leader().await.unwrap().unwrap().into() - } else { - MetaLeaderInfo { - node_address: address_info.listen_addr.clone().to_string(), - lease_id: 0, - } - }; - - start_leader_srv( + start_service_as_election_leader( meta_store, address_info, max_heartbeat_interval, opts, - current_leader, election_client, svc_shutdown_rx, ) @@ -206,3 +251,342 @@ pub async fn rpc_serve_with_store( Ok((join_handle, leader_lost_handle, svc_shutdown_tx)) } + +/// Starts all services needed for the meta follower node +pub async fn start_service_as_election_follower( + mut svc_shutdown_rx: WatchReceiver<()>, + follower_shutdown_rx: OneReceiver<()>, + address_info: AddressInfo, + election_client: Option, +) { + let meta_member_srv = MetaMemberServiceImpl::new(match election_client { + None => Either::Right(address_info.clone()), + Some(election_client) => Either::Left(election_client), + }); + + let health_srv = HealthServiceImpl::new(); + tonic::transport::Server::builder() + .layer(MetricsMiddlewareLayer::new(Arc::new(MetaMetrics::new()))) + .add_service(MetaMemberServiceServer::new(meta_member_srv)) + .add_service(HealthServer::new(health_srv)) + .serve_with_shutdown(address_info.listen_addr, async move { + tokio::select! { + // shutdown service if all services should be shut down + res = svc_shutdown_rx.changed() => { + match res { + Ok(_) => tracing::info!("Shutting down services"), + Err(_) => tracing::error!("Service shutdown sender dropped") + } + }, + // shutdown service if follower becomes leader + res = follower_shutdown_rx => { + match res { + Ok(_) => tracing::info!("Shutting down follower services"), + Err(_) => tracing::error!("Follower service shutdown sender dropped") + } + }, + } + }) + .await + .unwrap(); +} + +/// Starts all services needed for the meta leader node +/// Only call this function once, since initializing the services multiple times will result in an +/// inconsistent state +/// +/// ## Returns +/// Returns an error if the service initialization failed +pub async fn start_service_as_election_leader( + meta_store: Arc, + address_info: AddressInfo, + max_heartbeat_interval: Duration, + opts: MetaOpts, + election_client: Option, + mut svc_shutdown_rx: WatchReceiver<()>, +) -> MetaResult<()> { + tracing::info!("Defining leader services"); + let prometheus_endpoint = opts.prometheus_endpoint.clone(); + let init_system_params = opts.init_system_params(); + let env = MetaSrvEnv::::new(opts, meta_store.clone()).await; + let fragment_manager = Arc::new(FragmentManager::new(env.clone()).await.unwrap()); + let meta_metrics = Arc::new(MetaMetrics::new()); + let registry = meta_metrics.registry(); + monitor_process(registry).unwrap(); + + let cluster_manager = Arc::new( + ClusterManager::new(env.clone(), max_heartbeat_interval) + .await + .unwrap(), + ); + let heartbeat_srv = HeartbeatServiceImpl::new(cluster_manager.clone()); + + let compactor_manager = Arc::new( + hummock::CompactorManager::with_meta(env.clone(), max_heartbeat_interval.as_secs()) + .await + .unwrap(), + ); + + let catalog_manager = Arc::new(CatalogManager::new(env.clone()).await.unwrap()); + let hummock_manager = hummock::HummockManager::new( + env.clone(), + cluster_manager.clone(), + meta_metrics.clone(), + compactor_manager.clone(), + catalog_manager.clone(), + ) + .await + .unwrap(); + + let meta_member_srv = MetaMemberServiceImpl::new(match election_client { + None => Either::Right(address_info.clone()), + Some(election_client) => Either::Left(election_client), + }); + + #[cfg(not(madsim))] + if let Some(ref dashboard_addr) = address_info.dashboard_addr { + let dashboard_service = crate::dashboard::DashboardService { + dashboard_addr: *dashboard_addr, + cluster_manager: cluster_manager.clone(), + fragment_manager: fragment_manager.clone(), + meta_store: env.meta_store_ref(), + prometheus_endpoint: prometheus_endpoint.clone(), + prometheus_client: prometheus_endpoint.as_ref().map(|x| { + use std::str::FromStr; + prometheus_http_query::Client::from_str(x).unwrap() + }), + }; + // TODO: join dashboard service back to local thread. + tokio::spawn(dashboard_service.serve(address_info.ui_path)); + } + + let (barrier_scheduler, scheduled_barriers) = + BarrierScheduler::new_pair(hummock_manager.clone(), env.opts.checkpoint_frequency); + + let source_manager = Arc::new( + SourceManager::new( + env.opts.connector_rpc_endpoint.clone(), + barrier_scheduler.clone(), + catalog_manager.clone(), + fragment_manager.clone(), + ) + .await + .unwrap(), + ); + + let barrier_manager = Arc::new(GlobalBarrierManager::new( + scheduled_barriers, + env.clone(), + cluster_manager.clone(), + catalog_manager.clone(), + fragment_manager.clone(), + hummock_manager.clone(), + source_manager.clone(), + meta_metrics.clone(), + )); + + { + let source_manager = source_manager.clone(); + tokio::spawn(async move { + source_manager.run().await.unwrap(); + }); + } + + let stream_manager = Arc::new( + GlobalStreamManager::new( + env.clone(), + fragment_manager.clone(), + barrier_scheduler.clone(), + cluster_manager.clone(), + source_manager.clone(), + hummock_manager.clone(), + ) + .unwrap(), + ); + + hummock_manager + .purge( + &fragment_manager + .list_table_fragments() + .await + .expect("list_table_fragments"), + ) + .await + .unwrap(); + + // Initialize services. + let backup_object_store = Arc::new( + parse_remote_object_store( + &env.opts.backup_storage_url, + Arc::new(ObjectStoreMetrics::unused()), + "Meta Backup", + ) + .await, + ); + let backup_storage = Arc::new( + ObjectStoreMetaSnapshotStorage::new( + &env.opts.backup_storage_directory, + backup_object_store, + ) + .await?, + ); + let backup_manager = Arc::new(BackupManager::new( + env.clone(), + hummock_manager.clone(), + backup_storage, + meta_metrics.registry().clone(), + )); + let vacuum_manager = Arc::new(hummock::VacuumManager::new( + env.clone(), + hummock_manager.clone(), + backup_manager.clone(), + compactor_manager.clone(), + )); + let system_params_manager = + Arc::new(SystemParamManager::new(env.clone(), init_system_params).await?); + + let ddl_srv = DdlServiceImpl::::new( + env.clone(), + catalog_manager.clone(), + stream_manager.clone(), + source_manager.clone(), + cluster_manager.clone(), + fragment_manager.clone(), + barrier_manager.clone(), + ); + + let user_srv = UserServiceImpl::::new(env.clone(), catalog_manager.clone()); + + let scale_srv = ScaleServiceImpl::::new( + barrier_scheduler.clone(), + fragment_manager.clone(), + cluster_manager.clone(), + source_manager, + catalog_manager.clone(), + stream_manager.clone(), + ); + + let cluster_srv = ClusterServiceImpl::::new(cluster_manager.clone()); + let stream_srv = StreamServiceImpl::::new( + env.clone(), + barrier_scheduler.clone(), + stream_manager.clone(), + catalog_manager.clone(), + fragment_manager.clone(), + ); + let hummock_srv = HummockServiceImpl::new( + hummock_manager.clone(), + compactor_manager.clone(), + vacuum_manager.clone(), + fragment_manager.clone(), + ); + let notification_srv = NotificationServiceImpl::new( + env.clone(), + catalog_manager, + cluster_manager.clone(), + hummock_manager.clone(), + fragment_manager.clone(), + backup_manager.clone(), + ); + let health_srv = HealthServiceImpl::new(); + let backup_srv = BackupServiceImpl::new(backup_manager); + let system_params_srv = SystemParamsServiceImpl::new(system_params_manager); + + if let Some(prometheus_addr) = address_info.prometheus_addr { + MetricsManager::boot_metrics_service( + prometheus_addr.to_string(), + meta_metrics.registry().clone(), + ) + } + + let compaction_scheduler = Arc::new(CompactionScheduler::new( + env.clone(), + hummock_manager.clone(), + compactor_manager.clone(), + )); + + // sub_tasks executed concurrently. Can be shutdown via shutdown_all + let mut sub_tasks = + hummock::start_hummock_workers(vacuum_manager, compaction_scheduler, &env.opts); + sub_tasks.push( + ClusterManager::start_worker_num_monitor( + cluster_manager.clone(), + Duration::from_secs(env.opts.node_num_monitor_interval_sec), + meta_metrics.clone(), + ) + .await, + ); + sub_tasks.push(HummockManager::start_compaction_heartbeat(hummock_manager).await); + + if cfg!(not(test)) { + sub_tasks.push( + ClusterManager::start_heartbeat_checker(cluster_manager, Duration::from_secs(1)).await, + ); + sub_tasks.push(GlobalBarrierManager::start(barrier_manager).await); + } + let (idle_send, idle_recv) = tokio::sync::oneshot::channel(); + sub_tasks.push( + IdleManager::start_idle_checker(env.idle_manager_ref(), Duration::from_secs(30), idle_send) + .await, + ); + + let (abort_sender, abort_recv) = tokio::sync::oneshot::channel(); + let notification_mgr = env.notification_manager_ref(); + let stream_abort_handler = tokio::spawn(async move { + abort_recv.await.unwrap(); + notification_mgr.abort_all().await; + compactor_manager.abort_all_compactors(); + }); + sub_tasks.push((stream_abort_handler, abort_sender)); + + let shutdown_all = async move { + for (join_handle, shutdown_sender) in sub_tasks { + if let Err(_err) = shutdown_sender.send(()) { + continue; + } + // The barrier manager can't be shutdown gracefully if it's under recovering, try to + // abort it using timeout. + match tokio::time::timeout(Duration::from_secs(1), join_handle).await { + Ok(Err(err)) => { + tracing::warn!("Failed to join shutdown: {:?}", err); + } + Err(e) => { + tracing::warn!("Join shutdown timeout: {:?}", e); + } + _ => {} + } + } + }; + + tonic::transport::Server::builder() + .layer(MetricsMiddlewareLayer::new(meta_metrics)) + .add_service(HeartbeatServiceServer::new(heartbeat_srv)) + .add_service(ClusterServiceServer::new(cluster_srv)) + .add_service(StreamManagerServiceServer::new(stream_srv)) + .add_service(HummockManagerServiceServer::new(hummock_srv)) + .add_service(NotificationServiceServer::new(notification_srv)) + .add_service(MetaMemberServiceServer::new(meta_member_srv)) + .add_service(DdlServiceServer::new(ddl_srv)) + .add_service(UserServiceServer::new(user_srv)) + .add_service(ScaleServiceServer::new(scale_srv)) + .add_service(HealthServer::new(health_srv)) + .add_service(BackupServiceServer::new(backup_srv)) + .add_service(SystemParamsServiceServer::new(system_params_srv)) + .serve_with_shutdown(address_info.listen_addr, async move { + tokio::select! { + res = svc_shutdown_rx.changed() => { + match res { + Ok(_) => tracing::info!("Shutting down services"), + Err(_) => tracing::error!("Service shutdown receiver dropped") + } + shutdown_all.await; + }, + _ = idle_recv => { + shutdown_all.await; + }, + } + }) + .await + .unwrap(); + Ok(()) +} diff --git a/src/meta/src/rpc/service/backup_service.rs b/src/meta/src/rpc/service/backup_service.rs index 10e053b0df82c..b7dfe55c8c99e 100644 --- a/src/meta/src/rpc/service/backup_service.rs +++ b/src/meta/src/rpc/service/backup_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/rpc/service/cluster_service.rs b/src/meta/src/rpc/service/cluster_service.rs index cc1af240f0710..d725c428a1b5d 100644 --- a/src/meta/src/rpc/service/cluster_service.rs +++ b/src/meta/src/rpc/service/cluster_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/rpc/service/ddl_service.rs b/src/meta/src/rpc/service/ddl_service.rs index cbea605d858b1..d53c0986349b5 100644 --- a/src/meta/src/rpc/service/ddl_service.rs +++ b/src/meta/src/rpc/service/ddl_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,10 +15,10 @@ use itertools::Itertools; use risingwave_common::catalog::CatalogVersion; use risingwave_pb::catalog::table::OptionalAssociatedSourceId; +use risingwave_pb::catalog::Table; use risingwave_pb::ddl_service::ddl_service_server::DdlService; use risingwave_pb::ddl_service::drop_table_request::SourceId as ProstSourceId; use risingwave_pb::ddl_service::*; -use risingwave_pb::stream_plan::stream_fragment_graph::Parallelism; use risingwave_pb::stream_plan::stream_node::NodeBody; use risingwave_pb::stream_plan::StreamFragmentGraph as StreamFragmentGraphProto; use tonic::{Request, Response, Status}; @@ -28,11 +28,12 @@ use crate::manager::{ CatalogManagerRef, ClusterManagerRef, FragmentManagerRef, IdCategory, IdCategoryType, MetaSrvEnv, NotificationVersion, SourceId, StreamingJob, TableId, }; -use crate::model::TableFragments; +use crate::model::{StreamEnvironment, TableFragments}; use crate::storage::MetaStore; use crate::stream::{ - visit_fragment, ActorGraphBuilder, CreateStreamingJobContext, GlobalStreamManagerRef, - SourceManagerRef, StreamFragmentGraph, + visit_fragment, ActorGraphBuildResult, ActorGraphBuilder, CompleteStreamFragmentGraph, + CreateStreamingJobContext, GlobalStreamManagerRef, ReplaceTableContext, SourceManagerRef, + StreamFragmentGraph, }; use crate::{MetaError, MetaResult}; @@ -228,9 +229,16 @@ where ) -> Result, Status> { self.check_barrier_manager_status().await?; let sink_id = request.into_inner().sink_id; - + let table_fragment = self + .fragment_manager + .select_table_fragments_by_table_id(&sink_id.into()) + .await?; + let internal_tables = table_fragment.internal_table_ids(); // 1. Drop sink in catalog. - let version = self.catalog_manager.drop_sink(sink_id).await?; + let version = self + .catalog_manager + .drop_sink(sink_id, internal_tables) + .await?; // 2. drop streaming job of sink. self.stream_manager .drop_streaming_jobs(vec![sink_id.into()]) @@ -484,12 +492,54 @@ where async fn replace_table_plan( &self, - _request: Request, + request: Request, ) -> Result, Status> { + let req = request.into_inner(); + + let mut stream_job = StreamingJob::Table(None, req.table.unwrap()); + let fragment_graph = req.fragment_graph.unwrap(); + + let (_ctx, _table_fragments) = self + .prepare_replace_table(&mut stream_job, fragment_graph) + .await?; + Err(Status::unimplemented( "replace table plan is not implemented yet", )) } + + async fn get_table( + &self, + request: Request, + ) -> Result, Status> { + let req = request.into_inner(); + let database = self + .catalog_manager + .list_databases() + .await + .into_iter() + .find(|db| db.name == req.database_name); + if let Some(db) = database { + let table = self + .catalog_manager + .list_tables() + .await + .into_iter() + .find(|t| t.name == req.table_name && t.database_id == db.id); + Ok(Response::new(GetTableResponse { table })) + } else { + Ok(Response::new(GetTableResponse { table: None })) + } + } + + async fn get_ddl_progress( + &self, + _request: Request, + ) -> Result, Status> { + Ok(Response::new(GetDdlProgressResponse { + ddl_progress: self.barrier_manager.get_ddl_progress().await, + })) + } } impl DdlServiceImpl @@ -515,22 +565,22 @@ where ) -> MetaResult { self.check_barrier_manager_status().await?; - let (mut ctx, table_fragments) = - self.prepare_stream_job(stream_job, fragment_graph).await?; + let (ctx, table_fragments) = self.prepare_stream_job(stream_job, fragment_graph).await?; + let internal_tables = ctx.internal_tables(); let result = try { if let Some(source) = stream_job.source() { self.source_manager.register_source(source).await?; } self.stream_manager - .create_streaming_job(table_fragments, &mut ctx) + .create_streaming_job(table_fragments, ctx) .await?; }; match result { - Ok(_) => self.finish_stream_job(stream_job, &ctx).await, + Ok(_) => self.finish_stream_job(stream_job, internal_tables).await, Err(err) => { - self.cancel_stream_job(stream_job, &ctx).await?; + self.cancel_stream_job(stream_job, internal_tables).await?; Err(err) } } @@ -546,24 +596,19 @@ where let id = self.gen_unique_id::<{ IdCategory::Table }>().await?; stream_job.set_id(id); - // 2. Get the env for streaming jobs - let env = fragment_graph.get_env().unwrap().clone(); - let default_parallelism = - if let Some(Parallelism { parallelism }) = fragment_graph.parallelism { - parallelism as usize - } else { - self.cluster_manager.get_active_parallel_unit_count().await - } as u32; + // 2. Get the env for streaming jobs. + let env = StreamEnvironment::from_protobuf(fragment_graph.get_env().unwrap()); // 3. Build fragment graph. let fragment_graph = StreamFragmentGraph::new(fragment_graph, self.env.id_gen_manager_ref(), &*stream_job) .await?; + let default_parallelism = fragment_graph.default_parallelism(); let internal_tables = fragment_graph.internal_tables(); // 4. Set the graph-related fields and freeze the `stream_job`. stream_job.set_table_fragment_id(fragment_graph.table_fragment_id()); - let dependent_relations = fragment_graph.dependent_relations(); + let dependent_relations = fragment_graph.dependent_relations().clone(); stream_job.set_dependent_relations(dependent_relations.clone()); let stream_job = &*stream_job; @@ -573,29 +618,58 @@ where .start_create_stream_job_procedure(stream_job) .await?; - // 6. Build actor graph from the fragment graph. - // TODO: directly store the freezed `stream_job`. - let mut ctx = CreateStreamingJobContext { - streaming_definition: stream_job.mview_definition(), - table_properties: stream_job.properties(), - table_mview_map: self - .fragment_manager - .get_build_graph_info(dependent_relations) - .await? - .table_mview_actor_ids, - dependent_table_ids: dependent_relations.clone(), - internal_tables, - ..Default::default() - }; + // 6. Resolve the upstream fragments, extend the fragment graph to a complete graph that + // contains all information needed for building the actor graph. + let upstream_mview_fragments = self + .fragment_manager + .get_upstream_mview_fragments(&dependent_relations) + .await?; + let upstream_mview_actors = upstream_mview_fragments + .iter() + .map(|(&table_id, fragment)| { + ( + table_id, + fragment.actors.iter().map(|a| a.actor_id).collect_vec(), + ) + }) + .collect(); + + let complete_graph = + CompleteStreamFragmentGraph::with_upstreams(fragment_graph, upstream_mview_fragments)?; + + // 7. Build the actor graph. + let cluster_info = self.cluster_manager.get_streaming_cluster_info().await; + let actor_graph_builder = + ActorGraphBuilder::new(complete_graph, cluster_info, default_parallelism)?; + + let ActorGraphBuildResult { + graph, + building_locations, + existing_locations, + dispatchers, + merge_updates, + } = actor_graph_builder + .generate_graph(self.env.id_gen_manager_ref(), stream_job) + .await?; + assert!(merge_updates.is_empty()); - // TODO(bugen): we should merge this step with the `Scheduler`. - let actor_graph_builder = ActorGraphBuilder::new(fragment_graph, default_parallelism); + // 8. Build the table fragments structure that will be persisted in the stream manager, and + // the context that contains all information needed for building the actors on the compute + // nodes. + let table_fragments = + TableFragments::new(id.into(), graph, &building_locations.actor_locations, env); - let graph = actor_graph_builder - .generate_graph(self.env.id_gen_manager_ref(), &mut ctx) - .await?; + let ctx = CreateStreamingJobContext { + dispatchers, + upstream_mview_actors, + internal_tables, + building_locations, + existing_locations, + table_properties: stream_job.properties(), + definition: stream_job.mview_definition(), + }; - // 7. mark creating tables, including internal tables and the table of the stream job. + // 9. Mark creating tables, including internal tables and the table of the stream job. // Note(bugen): should we take `Sink` into account as well? let creating_tables = ctx .internal_tables() @@ -607,16 +681,17 @@ where .mark_creating_tables(&creating_tables) .await; - Ok((ctx, TableFragments::new(id.into(), graph, env))) + Ok((ctx, table_fragments)) } /// `cancel_stream_job` cancels a stream job and clean some states. async fn cancel_stream_job( &self, stream_job: &StreamingJob, - ctx: &CreateStreamingJobContext, + internal_tables: Vec
, ) -> MetaResult<()> { - let mut creating_internal_table_ids = ctx.internal_table_ids(); + let mut creating_internal_table_ids = + internal_tables.into_iter().map(|t| t.id).collect_vec(); // 1. cancel create procedure. match stream_job { StreamingJob::MaterializedView(table) => { @@ -661,26 +736,26 @@ where async fn finish_stream_job( &self, stream_job: &StreamingJob, - ctx: &CreateStreamingJobContext, + internal_tables: Vec
, ) -> MetaResult { // 1. finish procedure. - let mut creating_internal_table_ids = ctx.internal_table_ids(); + let mut creating_internal_table_ids = internal_tables.iter().map(|t| t.id).collect_vec(); let version = match stream_job { StreamingJob::MaterializedView(table) => { creating_internal_table_ids.push(table.id); self.catalog_manager - .finish_create_table_procedure(ctx.internal_tables(), table) + .finish_create_table_procedure(internal_tables, table) .await? } StreamingJob::Sink(sink) => { self.catalog_manager - .finish_create_sink_procedure(sink) + .finish_create_sink_procedure(internal_tables, sink) .await? } StreamingJob::Table(source, table) => { creating_internal_table_ids.push(table.id); if let Some(source) = source { - let internal_tables: [_; 1] = ctx.internal_tables().try_into().unwrap(); + let internal_tables: [_; 1] = internal_tables.try_into().unwrap(); self.catalog_manager .finish_create_table_procedure_with_source( source, @@ -689,7 +764,6 @@ where ) .await? } else { - let internal_tables = ctx.internal_tables(); assert!(internal_tables.is_empty()); // Though `internal_tables` is empty here, we pass it as a parameter to reuse // the method. @@ -751,6 +825,72 @@ where Ok(version) } + /// Prepares a table replacement and returns the context and table fragments. + async fn prepare_replace_table( + &self, + stream_job: &mut StreamingJob, + fragment_graph: StreamFragmentGraphProto, + ) -> MetaResult<(ReplaceTableContext, TableFragments)> { + let id = stream_job.id(); + + // 1. Get the env for streaming jobs. + let env = StreamEnvironment::from_protobuf(fragment_graph.get_env().unwrap()); + + // 2. Build fragment graph. + let fragment_graph = + StreamFragmentGraph::new(fragment_graph, self.env.id_gen_manager_ref(), &*stream_job) + .await?; + let default_parallelism = fragment_graph.default_parallelism(); + assert!(fragment_graph.internal_tables().is_empty()); + + // 3. Set the graph-related fields and freeze the `stream_job`. + stream_job.set_table_fragment_id(fragment_graph.table_fragment_id()); + let stream_job = &*stream_job; + + // TODO: 4. Mark current relation as "updating". + + // 5. Resolve the downstream fragments, extend the fragment graph to a complete graph that + // contains all information needed for building the actor graph. + let downstream_fragments = self + .fragment_manager + .get_downstream_chain_fragments(id.into()) + .await?; + + let complete_graph = + CompleteStreamFragmentGraph::with_downstreams(fragment_graph, downstream_fragments)?; + + // 6. Build the actor graph. + let cluster_info = self.cluster_manager.get_streaming_cluster_info().await; + let actor_graph_builder = + ActorGraphBuilder::new(complete_graph, cluster_info, default_parallelism)?; + + let ActorGraphBuildResult { + graph, + building_locations, + existing_locations, + dispatchers, + merge_updates, + } = actor_graph_builder + .generate_graph(self.env.id_gen_manager_ref(), stream_job) + .await?; + assert!(dispatchers.is_empty()); + + // 7. Build the table fragments structure that will be persisted in the stream manager, and + // the context that contains all information needed for building the actors on the compute + // nodes. + let table_fragments = + TableFragments::new(id.into(), graph, &building_locations.actor_locations, env); + + let ctx = ReplaceTableContext { + merge_updates, + building_locations, + existing_locations, + table_properties: stream_job.properties(), + }; + + Ok((ctx, table_fragments)) + } + async fn gen_unique_id(&self) -> MetaResult { let id = self.env.id_gen_manager().generate::().await? as u32; Ok(id) diff --git a/src/meta/src/rpc/service/health_service.rs b/src/meta/src/rpc/service/health_service.rs index 75bb7900aa5c2..bdb01c1ef0760 100644 --- a/src/meta/src/rpc/service/health_service.rs +++ b/src/meta/src/rpc/service/health_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/rpc/service/heartbeat_service.rs b/src/meta/src/rpc/service/heartbeat_service.rs index 605c57878b7ad..d10d5994c577c 100644 --- a/src/meta/src/rpc/service/heartbeat_service.rs +++ b/src/meta/src/rpc/service/heartbeat_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/rpc/service/hummock_service.rs b/src/meta/src/rpc/service/hummock_service.rs index a335d1f40ca6b..4889d40e39e2a 100644 --- a/src/meta/src/rpc/service/hummock_service.rs +++ b/src/meta/src/rpc/service/hummock_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ use std::time::Duration; use itertools::Itertools; use risingwave_common::catalog::{TableId, NON_RESERVED_PG_CATALOG_TABLE_ID}; use risingwave_pb::hummock::hummock_manager_service_server::HummockManagerService; +use risingwave_pb::hummock::version_update_payload::Payload; use risingwave_pb::hummock::*; use tonic::{Request, Response, Status}; @@ -243,7 +244,8 @@ where .add_compactor(context_id, req.max_concurrent_task_number); // Trigger compaction on all compaction groups. for cg_id in self.hummock_manager.compaction_group_ids().await { - self.hummock_manager.try_send_compaction_request(cg_id); + self.hummock_manager + .try_send_compaction_request(cg_id, compact_task::TaskType::Dynamic); } self.hummock_manager .try_resume_compaction(CompactionResumeTrigger::CompactorAddition { context_id }); @@ -274,23 +276,6 @@ where Ok(Response::new(ReportVacuumTaskResponse { status: None })) } - async fn get_compaction_groups( - &self, - _request: Request, - ) -> Result, Status> { - let resp = GetCompactionGroupsResponse { - status: None, - compaction_groups: self - .hummock_manager - .compaction_groups() - .await - .iter() - .map(|cg| cg.into()) - .collect(), - }; - Ok(Response::new(resp)) - } - async fn trigger_manual_compaction( &self, request: Request, @@ -443,13 +428,7 @@ where &self, _request: Request, ) -> Result, Status> { - let compaction_groups = self - .hummock_manager - .compaction_groups() - .await - .iter() - .map(|cg| cg.into()) - .collect_vec(); + let compaction_groups = self.hummock_manager.list_compaction_group().await; Ok(Response::new(RiseCtlListCompactionGroupResponse { status: None, compaction_groups, @@ -503,4 +482,20 @@ where .set_compactor_config(request.context_id, request.config.unwrap().into()); Ok(Response::new(SetCompactorRuntimeConfigResponse {})) } + + async fn pin_version( + &self, + request: Request, + ) -> Result, Status> { + let req = request.into_inner(); + let payload = self.hummock_manager.pin_version(req.context_id).await?; + match payload { + Payload::PinnedVersion(version) => Ok(Response::new(PinVersionResponse { + pinned_version: Some(version), + })), + Payload::VersionDeltas(_) => { + unreachable!("pin_version should not return version delta") + } + } + } } diff --git a/src/meta/src/rpc/service/leader_service.rs b/src/meta/src/rpc/service/leader_service.rs deleted file mode 100644 index 956ae86ede771..0000000000000 --- a/src/meta/src/rpc/service/leader_service.rs +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2023 Singularity Data -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::borrow::Borrow; - -use risingwave_common::util::addr::{leader_info_to_host_addr, HostAddr}; -use risingwave_pb::common::HostAddress; -use risingwave_pb::leader::leader_service_server::LeaderService; -use risingwave_pb::leader::{ - LeaderRequest, LeaderResponse, Member, MembersRequest, MembersResponse, -}; -use risingwave_pb::meta::MetaLeaderInfo; -use tonic::{Request, Response, Status}; - -use crate::rpc::server::ElectionClientRef; - -#[derive(Clone)] -pub struct LeaderServiceImpl { - election_client: Option, - current_leader: MetaLeaderInfo, -} - -impl LeaderServiceImpl { - pub fn new(election_client: Option, current_leader: MetaLeaderInfo) -> Self { - LeaderServiceImpl { - election_client, - current_leader, - } - } -} - -#[async_trait::async_trait] -impl LeaderService for LeaderServiceImpl { - #[cfg_attr(coverage, no_coverage)] - async fn leader( - &self, - _request: Request, - ) -> Result, Status> { - let leader = match self.election_client.borrow() { - None => Ok(Some(self.current_leader.clone())), - Some(client) => client.leader().await.map(|member| member.map(Into::into)), - }?; - - let leader_address = leader - .map(leader_info_to_host_addr) - .map(|leader_addr| HostAddress { - host: leader_addr.host, - port: leader_addr.port.into(), - }); - - Ok(Response::new(LeaderResponse { - leader_addr: leader_address, - })) - } - - async fn members( - &self, - _request: Request, - ) -> Result, Status> { - let members = if let Some(election_client) = self.election_client.borrow() { - let mut members = vec![]; - for member in election_client.get_members().await? { - let host_addr = member.id.parse::()?; - members.push(Member { - member_addr: Some(HostAddress { - host: host_addr.host, - port: host_addr.port.into(), - }), - lease_id: member.lease, - }) - } - - members - } else { - let host_addr = self.current_leader.node_address.parse::()?; - vec![Member { - member_addr: Some(HostAddress { - host: host_addr.host, - port: host_addr.port.into(), - }), - lease_id: self.current_leader.lease_id as i64, - }] - }; - - Ok(Response::new(MembersResponse { members })) - } -} diff --git a/src/meta/src/rpc/service/meta_member_service.rs b/src/meta/src/rpc/service/meta_member_service.rs new file mode 100644 index 0000000000000..cbc66d40752aa --- /dev/null +++ b/src/meta/src/rpc/service/meta_member_service.rs @@ -0,0 +1,76 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Borrow; + +use either::Either; +use risingwave_common::util::addr::HostAddr; +use risingwave_pb::common::HostAddress; +use risingwave_pb::meta::meta_member_service_server::MetaMemberService; +use risingwave_pb::meta::{MembersRequest, MembersResponse, MetaMember}; +use tonic::{Request, Response, Status}; + +use crate::rpc::server::{AddressInfo, ElectionClientRef}; + +#[derive(Clone)] +pub struct MetaMemberServiceImpl { + election_client_or_self: Either, +} + +impl MetaMemberServiceImpl { + pub fn new(election_client_or_self: Either) -> Self { + MetaMemberServiceImpl { + election_client_or_self, + } + } +} + +#[async_trait::async_trait] +impl MetaMemberService for MetaMemberServiceImpl { + #[cfg_attr(coverage, no_coverage)] + async fn members( + &self, + _request: Request, + ) -> Result, Status> { + let members = match self.election_client_or_self.borrow() { + Either::Left(election_client) => { + let mut members = vec![]; + for member in election_client.get_members().await? { + let host_addr = member.id.parse::()?; + members.push(MetaMember { + address: Some(HostAddress { + host: host_addr.host, + port: host_addr.port.into(), + }), + is_leader: member.is_leader, + }) + } + + members + } + Either::Right(self_as_leader) => { + let host_addr = self_as_leader.advertise_addr.parse::()?; + vec![MetaMember { + address: Some(HostAddress { + host: host_addr.host, + port: host_addr.port.into(), + }), + is_leader: true, + }] + } + }; + + Ok(Response::new(MembersResponse { members })) + } +} diff --git a/src/meta/src/rpc/service/mod.rs b/src/meta/src/rpc/service/mod.rs index a4f9c06c44608..dd775588405d3 100644 --- a/src/meta/src/rpc/service/mod.rs +++ b/src/meta/src/rpc/service/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,10 +18,11 @@ pub mod ddl_service; pub mod health_service; pub mod heartbeat_service; pub mod hummock_service; -pub mod leader_service; +pub mod meta_member_service; pub mod notification_service; pub mod scale_service; pub mod stream_service; +pub mod system_params_service; pub mod user_service; use std::pin::Pin; diff --git a/src/meta/src/rpc/service/notification_service.rs b/src/meta/src/rpc/service/notification_service.rs index fc80e6212d34f..4d26465ccad6b 100644 --- a/src/meta/src/rpc/service/notification_service.rs +++ b/src/meta/src/rpc/service/notification_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,10 +16,12 @@ use itertools::Itertools; use risingwave_pb::backup_service::MetaBackupManifestId; use risingwave_pb::catalog::Table; use risingwave_pb::common::worker_node::State::Running; -use risingwave_pb::common::{ParallelUnitMapping, WorkerNode, WorkerType}; +use risingwave_pb::common::{WorkerNode, WorkerType}; use risingwave_pb::meta::meta_snapshot::SnapshotVersion; use risingwave_pb::meta::notification_service_server::NotificationService; -use risingwave_pb::meta::{MetaSnapshot, SubscribeRequest, SubscribeType}; +use risingwave_pb::meta::{ + FragmentParallelUnitMapping, MetaSnapshot, SubscribeRequest, SubscribeType, +}; use risingwave_pb::user::UserInfo; use tokio::sync::mpsc; use tokio_stream::wrappers::UnboundedReceiverStream; @@ -82,7 +84,7 @@ where async fn get_parallel_unit_mapping_snapshot( &self, - ) -> (Vec, NotificationVersion) { + ) -> (Vec, NotificationVersion) { let fragment_guard = self.fragment_manager.get_fragment_read_guard().await; let parallel_unit_mappings = fragment_guard.all_running_fragment_mappings().collect_vec(); let notification_version = self.env.notification_manager().current_version().await; @@ -153,12 +155,7 @@ where async fn hummock_subscribe(&self) -> MetaSnapshot { let (tables, catalog_version) = self.get_tables_and_creating_tables_snapshot().await; - let hummock_version = self - .hummock_manager - .get_read_guard() - .await - .current_version - .clone(); + let hummock_version = self.hummock_manager.get_current_version().await; let meta_backup_manifest_id = self.backup_manager.manifest().manifest_id; MetaSnapshot { diff --git a/src/meta/src/rpc/service/scale_service.rs b/src/meta/src/rpc/service/scale_service.rs index 98c44de7ab882..27d5a24d835b9 100644 --- a/src/meta/src/rpc/service/scale_service.rs +++ b/src/meta/src/rpc/service/scale_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/rpc/service/stream_service.rs b/src/meta/src/rpc/service/stream_service.rs index 6cf7a479651f1..7e320843ed402 100644 --- a/src/meta/src/rpc/service/stream_service.rs +++ b/src/meta/src/rpc/service/stream_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ use std::collections::{HashMap, HashSet}; use itertools::Itertools; +use risingwave_common::catalog::TableId; use risingwave_pb::meta::list_table_fragments_response::{ ActorInfo, FragmentInfo, TableFragmentInfo, }; @@ -23,8 +24,9 @@ use risingwave_pb::meta::*; use tonic::{Request, Response, Status}; use crate::barrier::BarrierScheduler; -use crate::manager::{FragmentManagerRef, MetaSrvEnv}; +use crate::manager::{CatalogManagerRef, FragmentManagerRef, MetaSrvEnv}; use crate::storage::MetaStore; +use crate::stream::GlobalStreamManagerRef; pub type TonicResponse = Result, Status>; @@ -35,6 +37,8 @@ where { env: MetaSrvEnv, barrier_scheduler: BarrierScheduler, + stream_manager: GlobalStreamManagerRef, + catalog_manager: CatalogManagerRef, fragment_manager: FragmentManagerRef, } @@ -45,11 +49,15 @@ where pub fn new( env: MetaSrvEnv, barrier_scheduler: BarrierScheduler, + stream_manager: GlobalStreamManagerRef, + catalog_manager: CatalogManagerRef, fragment_manager: FragmentManagerRef, ) -> Self { StreamServiceImpl { env, barrier_scheduler, + stream_manager, + catalog_manager, fragment_manager, } } @@ -72,6 +80,23 @@ where })) } + async fn cancel_creating_jobs( + &self, + request: Request, + ) -> TonicResponse { + let req = request.into_inner(); + let table_ids = self + .catalog_manager + .find_creating_streaming_job_ids(req.infos) + .await; + if !table_ids.is_empty() { + self.stream_manager + .cancel_streaming_jobs(table_ids.into_iter().map(TableId::from).collect_vec()) + .await; + } + Ok(Response::new(CancelCreatingJobsResponse { status: None })) + } + #[cfg_attr(coverage, no_coverage)] async fn list_table_fragments( &self, diff --git a/src/meta/src/rpc/service/system_params_service.rs b/src/meta/src/rpc/service/system_params_service.rs new file mode 100644 index 0000000000000..d23fe71de101b --- /dev/null +++ b/src/meta/src/rpc/service/system_params_service.rs @@ -0,0 +1,63 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use async_trait::async_trait; +use risingwave_pb::meta::system_params_service_server::SystemParamsService; +use risingwave_pb::meta::{ + GetSystemParamsRequest, GetSystemParamsResponse, SetSystemParamRequest, SetSystemParamResponse, +}; +use tonic::{Request, Response, Status}; + +use crate::manager::SystemParamManagerRef; +use crate::storage::MetaStore; + +pub struct SystemParamsServiceImpl +where + S: MetaStore, +{ + system_params_manager: SystemParamManagerRef, +} + +impl SystemParamsServiceImpl { + pub fn new(system_params_manager: SystemParamManagerRef) -> Self { + Self { + system_params_manager, + } + } +} + +#[async_trait] +impl SystemParamsService for SystemParamsServiceImpl +where + S: MetaStore, +{ + async fn get_system_params( + &self, + _request: Request, + ) -> Result, Status> { + let params = Some(self.system_params_manager.get_params().await); + Ok(Response::new(GetSystemParamsResponse { params })) + } + + async fn set_system_param( + &self, + request: Request, + ) -> Result, Status> { + let req = request.into_inner(); + self.system_params_manager + .set_param(&req.param, req.value) + .await?; + Ok(Response::new(SetSystemParamResponse {})) + } +} diff --git a/src/meta/src/rpc/service/user_service.rs b/src/meta/src/rpc/service/user_service.rs index a994221d87268..be5e8f0851bea 100644 --- a/src/meta/src/rpc/service/user_service.rs +++ b/src/meta/src/rpc/service/user_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,8 +27,6 @@ use crate::manager::{CatalogManagerRef, IdCategory, MetaSrvEnv}; use crate::storage::MetaStore; use crate::MetaResult; -// TODO: Change user manager as a part of the catalog manager, to ensure that operations on Catalog -// and User are transactional. pub struct UserServiceImpl { env: MetaSrvEnv, diff --git a/src/meta/src/storage/etcd_meta_store.rs b/src/meta/src/storage/etcd_meta_store.rs index 5ae19d52501d1..1fd3f11d94bbb 100644 --- a/src/meta/src/storage/etcd_meta_store.rs +++ b/src/meta/src/storage/etcd_meta_store.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/storage/etcd_retry_client.rs b/src/meta/src/storage/etcd_retry_client.rs index 1f89ac8eae326..b558ab3db6616 100644 --- a/src/meta/src/storage/etcd_retry_client.rs +++ b/src/meta/src/storage/etcd_retry_client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/storage/mem_meta_store.rs b/src/meta/src/storage/mem_meta_store.rs index 9a8b8b0f3d43b..ab08d6a9c8df9 100644 --- a/src/meta/src/storage/mem_meta_store.rs +++ b/src/meta/src/storage/mem_meta_store.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/storage/meta_store.rs b/src/meta/src/storage/meta_store.rs index 19f51f21ced7f..0cc5315fd826e 100644 --- a/src/meta/src/storage/meta_store.rs +++ b/src/meta/src/storage/meta_store.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -37,9 +37,8 @@ pub trait MetaStore: Clone + Sync + Send + 'static { async fn delete_cf(&self, cf: &str, key: &[u8]) -> MetaStoreResult<()>; async fn txn(&self, trx: Transaction) -> MetaStoreResult<()>; - async fn list_cf(&self, cf: &str) -> MetaStoreResult>> { - let kvs = self.snapshot().await.list_cf(cf).await?; - Ok(kvs.into_iter().map(|(_k, v)| v).collect()) + async fn list_cf(&self, cf: &str) -> MetaStoreResult, Vec)>> { + self.snapshot().await.list_cf(cf).await } async fn get_cf(&self, cf: &str, key: &[u8]) -> MetaStoreResult> { diff --git a/src/meta/src/storage/mod.rs b/src/meta/src/storage/mod.rs index d6765cb4471ae..9da50b146a3f1 100644 --- a/src/meta/src/storage/mod.rs +++ b/src/meta/src/storage/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/storage/tests.rs b/src/meta/src/storage/tests.rs index cb2830bcd4d1d..e62bef99951c0 100644 --- a/src/meta/src/storage/tests.rs +++ b/src/meta/src/storage/tests.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ trait MetaStoreTestExt: MetaStore { async fn put(&self, key: Key, value: Value) -> MetaStoreResult<()>; async fn get(&self, key: &[u8]) -> MetaStoreResult; async fn delete(&self, key: &[u8]) -> MetaStoreResult<()>; - async fn list(&self) -> MetaStoreResult>>; + async fn list(&self) -> MetaStoreResult, Vec)>>; } #[async_trait] @@ -45,7 +45,7 @@ impl MetaStoreTestExt for S { self.delete_cf(TEST_DEFAULT_CF, key).await } - async fn list(&self) -> MetaStoreResult>> { + async fn list(&self) -> MetaStoreResult, Vec)>> { self.list_cf(TEST_DEFAULT_CF).await } } @@ -103,7 +103,13 @@ async fn test_meta_store_basic(store: &S) -> MetaStoreResult<()> { .put(b"key_3".to_vec(), b"value_3_new".to_vec()) .await .is_ok()); - let mut values = store.list().await.unwrap(); + let mut values = store + .list() + .await + .unwrap() + .into_iter() + .map(|(_, v)| v) + .collect_vec(); values.sort(); let expected: Vec> = vec![ b"value_1".to_vec(), @@ -138,7 +144,13 @@ async fn test_meta_store_transaction(meta_store: &S) -> MetaStoreR let mut trx = Transaction::default(); trx.add_operations(ops); meta_store.txn(trx).await.unwrap(); - let result = meta_store.list_cf(cf).await.unwrap(); + let result = meta_store + .list_cf(cf) + .await + .unwrap() + .into_iter() + .map(|(_, v)| v) + .collect_vec(); let expected = kvs .iter() .take(2) diff --git a/src/meta/src/storage/transaction.rs b/src/meta/src/storage/transaction.rs index fba9d85aa31d1..54033b46ff422 100644 --- a/src/meta/src/storage/transaction.rs +++ b/src/meta/src/storage/transaction.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/storage/wrapped_etcd_client.rs b/src/meta/src/storage/wrapped_etcd_client.rs index dce4d7e0e73e5..0760ead7ca63c 100644 --- a/src/meta/src/storage/wrapped_etcd_client.rs +++ b/src/meta/src/storage/wrapped_etcd_client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/stream/mapping.rs b/src/meta/src/stream/mapping.rs deleted file mode 100644 index 9715f017b3910..0000000000000 --- a/src/meta/src/stream/mapping.rs +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright 2023 Singularity Data -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashMap; - -use itertools::Itertools; -use risingwave_common::buffer::{Bitmap, BitmapBuilder}; -use risingwave_common::hash::{ParallelUnitId, VirtualNode, VnodeMapping}; -use risingwave_common::util::compress::compress_data; -use risingwave_pb::common::{ParallelUnit, ParallelUnitMapping}; -use risingwave_pb::stream_plan::ActorMapping; - -use crate::model::{ActorId, FragmentId}; - -/// Build a vnode mapping according to parallel units where the fragment is scheduled. -/// For example, if `parallel_units` is `[0, 1, 2]`, and the total vnode count is 10, we'll -/// generate mapping like `[0, 0, 0, 0, 1, 1, 1, 2, 2, 2]`. -pub(crate) fn build_vnode_mapping(parallel_units: &[ParallelUnit]) -> VnodeMapping { - let mut vnode_mapping = Vec::with_capacity(VirtualNode::COUNT); - - let hash_shard_size = VirtualNode::COUNT / parallel_units.len(); - let mut one_more_count = VirtualNode::COUNT % parallel_units.len(); - let mut init_bound = 0; - - parallel_units.iter().for_each(|parallel_unit| { - let vnode_count = if one_more_count > 0 { - one_more_count -= 1; - hash_shard_size + 1 - } else { - hash_shard_size - }; - let parallel_unit_id = parallel_unit.id; - init_bound += vnode_count; - vnode_mapping.resize(init_bound, parallel_unit_id); - }); - - vnode_mapping -} - -pub(crate) fn vnode_mapping_to_bitmaps( - vnode_mapping: VnodeMapping, -) -> HashMap { - let mut vnode_bitmaps = HashMap::new(); - vnode_mapping - .iter() - .enumerate() - .for_each(|(vnode, parallel_unit)| { - vnode_bitmaps - .entry(*parallel_unit) - .or_insert_with(|| BitmapBuilder::zeroed(VirtualNode::COUNT)) - .set(vnode, true); - }); - vnode_bitmaps - .into_iter() - .map(|(u, b)| (u, b.finish())) - .collect() -} - -pub(crate) fn actor_mapping_from_bitmaps(bitmaps: &HashMap) -> ActorMapping { - let mut raw = vec![0 as ActorId; VirtualNode::COUNT]; - - for (actor_id, bitmap) in bitmaps { - for (idx, pos) in raw.iter_mut().enumerate() { - if bitmap.is_set(idx) { - *pos = *actor_id; - } - } - } - let (original_indices, data) = compress_data(&raw); - - ActorMapping { - original_indices, - data, - } -} - -pub(crate) fn parallel_unit_mapping_to_actor_mapping( - parallel_unit_mapping: &ParallelUnitMapping, - parallel_unit_to_actor_map: &HashMap, -) -> ActorMapping { - let ParallelUnitMapping { - original_indices, - data, - .. - } = parallel_unit_mapping; - - let actor_data = data - .iter() - .map(|parallel_unit_id| parallel_unit_to_actor_map[parallel_unit_id]) - .collect_vec(); - - ActorMapping { - original_indices: original_indices.clone(), - data: actor_data, - } -} - -pub fn actor_mapping_to_parallel_unit_mapping( - fragment_id: FragmentId, - actor_to_parallel_unit_map: &HashMap, - actor_mapping: &ActorMapping, -) -> ParallelUnitMapping { - let ActorMapping { - original_indices, - data, - } = actor_mapping; - - let parallel_unit_data = data - .iter() - .map(|actor_id| actor_to_parallel_unit_map[actor_id]) - .collect_vec(); - - ParallelUnitMapping { - fragment_id, - original_indices: original_indices.clone(), - data: parallel_unit_data, - } -} diff --git a/src/meta/src/stream/mod.rs b/src/meta/src/stream/mod.rs index 491b55065fe20..7497ae9de7b3c 100644 --- a/src/meta/src/stream/mod.rs +++ b/src/meta/src/stream/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,9 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -mod mapping; mod scale; -mod scheduler; mod source_manager; mod stream_graph; mod stream_manager; @@ -22,9 +20,7 @@ mod stream_manager; mod test_fragmenter; mod test_scale; -pub use mapping::*; pub use scale::*; -pub use scheduler::*; pub use source_manager::*; pub use stream_graph::*; pub use stream_manager::*; diff --git a/src/meta/src/stream/scale.rs b/src/meta/src/stream/scale.rs index 2c2796f428934..92c43d737b31a 100644 --- a/src/meta/src/stream/scale.rs +++ b/src/meta/src/stream/scale.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,7 +23,8 @@ use num_integer::Integer; use num_traits::abs; use risingwave_common::bail; use risingwave_common::buffer::{Bitmap, BitmapBuilder}; -use risingwave_common::hash::{ParallelUnitId, VirtualNode}; +use risingwave_common::hash::{ActorMapping, ParallelUnitId, VirtualNode}; +use risingwave_common::util::iter_util::ZipEqDebug; use risingwave_pb::common::{worker_node, ActorInfo, ParallelUnit, WorkerNode, WorkerType}; use risingwave_pb::meta::table_fragments::actor_status::ActorState; use risingwave_pb::meta::table_fragments::fragment::FragmentDistributionType; @@ -31,8 +32,7 @@ use risingwave_pb::meta::table_fragments::{self, ActorStatus, Fragment}; use risingwave_pb::stream_plan::barrier::Mutation; use risingwave_pb::stream_plan::stream_node::NodeBody; use risingwave_pb::stream_plan::{ - ActorMapping, DispatcherType, FragmentTypeFlag, PauseMutation, ResumeMutation, StreamActor, - StreamNode, + DispatcherType, FragmentTypeFlag, PauseMutation, ResumeMutation, StreamActor, StreamNode, }; use risingwave_pb::stream_service::{ BroadcastActorInfoTableRequest, BuildActorsRequest, HangingChannel, UpdateActorsRequest, @@ -43,7 +43,6 @@ use crate::barrier::{Command, Reschedule}; use crate::manager::{IdCategory, WorkerId}; use crate::model::{ActorId, DispatcherId, FragmentId, TableFragments}; use crate::storage::MetaStore; -use crate::stream::mapping::actor_mapping_from_bitmaps; use crate::stream::GlobalStreamManager; use crate::MetaResult; @@ -436,6 +435,10 @@ where // treatment because the upstream and downstream of NoShuffle are always 1-1 // correspondence, so we need to clone the reschedule plan to the downstream of all // cascading relations. + // + // Delta join will introduce a `NoShuffle` edge between index chain node and lookup node + // (index_mv --NoShuffle--> index_chain --NoShuffle--> lookup) which will break current + // `NoShuffle` scaling assumption. Currently we detect this case and forbid it to scale. if no_shuffle_source_fragment_ids.contains(fragment_id) { let mut queue: VecDeque<_> = fragment_dispatcher_map .get(fragment_id) @@ -451,6 +454,20 @@ where if let Some(downstream_fragments) = fragment_dispatcher_map.get(&downstream_id) { + // If `NoShuffle` used by other fragment type rather than `ChainNode`, bail. + for downstream_fragment_id in downstream_fragments.keys() { + let downstream_fragment = fragment_map + .get(downstream_fragment_id) + .ok_or_else(|| anyhow!("fragment {fragment_id} does not exist"))?; + if (downstream_fragment.get_fragment_type_mask() + & (FragmentTypeFlag::ChainNode as u32 + | FragmentTypeFlag::Mview as u32)) + == 0 + { + bail!("Rescheduling NoShuffle edge only supports ChainNode and Mview. Other usage for e.g. delta join is forbidden currently."); + } + } + queue.extend(downstream_fragments.keys().cloned()); } @@ -783,7 +800,7 @@ where for (actor_to_create, sample_actor) in actors_to_create .iter() - .zip_eq(repeat(fragment.actors.first().unwrap()).take(actors_to_create.len())) + .zip_eq_debug(repeat(fragment.actors.first().unwrap()).take(actors_to_create.len())) { let new_actor_id = actor_to_create.0; let new_parallel_unit_id = actor_to_create.1; @@ -993,19 +1010,15 @@ where if !in_degree_types.contains(&DispatcherType::Hash) { None } else if parallel_unit_to_actor_after_reschedule.len() == 1 { - Some(ActorMapping { - original_indices: vec![VirtualNode::COUNT as u64 - 1], - data: vec![ - *parallel_unit_to_actor_after_reschedule - .first_key_value() - .unwrap() - .1, - ], - }) + let actor_id = parallel_unit_to_actor_after_reschedule + .into_values() + .next() + .unwrap(); + Some(ActorMapping::new_single(actor_id)) } else { // Changes of the bitmap must occur in the case of HashDistribution - Some(actor_mapping_from_bitmaps( - fragment_actor_bitmap.get(&fragment_id).unwrap(), + Some(ActorMapping::from_bitmaps( + &fragment_actor_bitmap[&fragment_id], )) } } @@ -1440,7 +1453,7 @@ where fragment_actor_bitmap.get(&downstream_fragment_id) { // If downstream scale in/out - *mapping = actor_mapping_from_bitmaps(downstream_updated_bitmap) + *mapping = ActorMapping::from_bitmaps(downstream_updated_bitmap).to_protobuf(); } } } diff --git a/src/meta/src/stream/scheduler.rs b/src/meta/src/stream/scheduler.rs deleted file mode 100644 index 149c61d7f25ac..0000000000000 --- a/src/meta/src/stream/scheduler.rs +++ /dev/null @@ -1,410 +0,0 @@ -// Copyright 2023 Singularity Data -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::{BTreeMap, HashMap, LinkedList}; -use std::iter::empty; - -use anyhow::{anyhow, Context}; -use itertools::Itertools; -use rand::prelude::SliceRandom; -use risingwave_common::bail; -use risingwave_common::hash::VnodeMapping; -use risingwave_common::util::compress::compress_data; -use risingwave_pb::common::{ActorInfo, ParallelUnit, ParallelUnitMapping, WorkerNode}; -use risingwave_pb::meta::table_fragments::fragment::FragmentDistributionType; -use risingwave_pb::meta::table_fragments::Fragment; - -use crate::manager::{WorkerId, WorkerLocations}; -use crate::model::ActorId; -use crate::stream::{build_vnode_mapping, vnode_mapping_to_bitmaps}; -use crate::MetaResult; - -/// [`Scheduler`] defines schedule logic for mv actors. -pub struct Scheduler { - /// The parallel units of the cluster in a round-robin manner on each worker. - all_parallel_units: Vec, -} - -/// [`ScheduledLocations`] represents the location of scheduled result. -pub struct ScheduledLocations { - /// actor location map. - pub actor_locations: BTreeMap, - /// worker location map. - pub worker_locations: WorkerLocations, -} - -impl ScheduledLocations { - #[cfg_attr(not(test), expect(dead_code))] - pub fn new() -> Self { - Self::with_workers(empty()) - } - - pub fn with_workers(workers: impl IntoIterator) -> Self { - Self { - actor_locations: Default::default(), - worker_locations: workers.into_iter().map(|w| (w.id, w)).collect(), - } - } - - /// Returns all actors for every worker node. - pub fn worker_actors(&self) -> HashMap> { - let mut worker_actors = HashMap::new(); - self.actor_locations - .iter() - .for_each(|(actor_id, parallel_unit)| { - worker_actors - .entry(parallel_unit.worker_node_id) - .or_insert_with(Vec::new) - .push(*actor_id); - }); - - worker_actors - } - - /// Returns the `ActorInfo` map for every actor. - pub fn actor_info_map(&self) -> HashMap { - self.actor_locations - .iter() - .map(|(actor_id, parallel_unit)| { - ( - *actor_id, - ActorInfo { - actor_id: *actor_id, - host: self.worker_locations[¶llel_unit.worker_node_id] - .host - .clone(), - }, - ) - }) - .collect::>() - } - - /// Returns an iterator of `ActorInfo`. - pub fn actor_infos(&self) -> impl Iterator + '_ { - self.actor_locations - .iter() - .map(|(actor_id, parallel_unit)| ActorInfo { - actor_id: *actor_id, - host: self.worker_locations[¶llel_unit.worker_node_id] - .host - .clone(), - }) - } - - /// Find a placement location that is on the same worker node of given actor ids. - pub fn schedule_colocate_with(&self, actor_ids: &[ActorId]) -> MetaResult { - let mut result_location = None; - for actor_id in actor_ids { - let location = self - .actor_locations - .get(actor_id) - .ok_or_else(|| anyhow!("actor location not found: {}", actor_id))?; - match &result_location { - None => result_location = Some(location.clone()), - Some(result_location) if result_location != location => { - bail!( - "cannot satisfy placement rule: {} is at {:?}, while others are on {:?}", - actor_id, - location, - result_location - ); - } - _ => {} - } - } - Ok(result_location.unwrap()) - } -} - -impl Scheduler { - pub fn new(parallel_units: impl IntoIterator) -> Self { - // Group parallel units with worker node. - let mut parallel_units_map = BTreeMap::new(); - for p in parallel_units { - parallel_units_map - .entry(p.worker_node_id) - .or_insert_with(Vec::new) - .push(p); - } - let mut parallel_units: LinkedList<_> = parallel_units_map - .into_values() - .map(|v| v.into_iter()) - .collect(); - - // Visit the parallel units in a round-robin manner on each worker. - let mut round_robin = Vec::new(); - while !parallel_units.is_empty() { - parallel_units.drain_filter(|ps| { - if let Some(p) = ps.next() { - round_robin.push(p); - false - } else { - true - } - }); - } - - Self { - all_parallel_units: round_robin, - } - } - - /// Schedules input fragments to different parallel units (workers). - /// The schedule procedure is two-fold: - /// (1) For singleton fragments, we schedule each to one parallel unit randomly. - /// (2) For normal fragments, we schedule them to each worker node in a round-robin manner. - pub fn schedule( - &self, - fragment: &mut Fragment, - locations: &mut ScheduledLocations, - ) -> MetaResult<()> { - if fragment.actors.is_empty() { - bail!("fragment has no actor"); - } - - if fragment.distribution_type == FragmentDistributionType::Single as i32 { - // Singleton fragment - let [actor] = fragment.actors.as_slice() else { - panic!("singleton fragment should only have one actor") - }; - - let parallel_unit = - if actor.same_worker_node_as_upstream && !actor.upstream_actor_id.is_empty() { - // Schedule the fragment to the same parallel unit as upstream. - locations.schedule_colocate_with(&actor.upstream_actor_id)? - } else { - // Randomly choose one parallel unit to schedule from all parallel units. - self.all_parallel_units - .choose(&mut rand::thread_rng()) - .cloned() - .context("no parallel unit to schedule")? - }; - - // Build vnode mapping. However, we'll leave vnode field of actors unset for singletons. - let _vnode_mapping = - self.set_fragment_vnode_mapping(fragment, &[parallel_unit.clone()])?; - - // Record actor locations. - locations - .actor_locations - .insert(fragment.actors[0].actor_id, parallel_unit); - } else { - // Normal fragment - let parallel_units = if self.all_parallel_units.len() < fragment.actors.len() { - bail!( - "not enough parallel units to schedule, required {} got {}", - fragment.actors.len(), - self.all_parallel_units.len(), - ); - } else { - // By taking a prefix of all parallel units, we schedule the actors round-robin-ly. - // Then sort them by parallel unit id to make the actor ids continuous against the - // parallel unit id. - let mut parallel_units = self.all_parallel_units[..fragment.actors.len()].to_vec(); - parallel_units.sort_unstable_by_key(|p| p.id); - parallel_units - }; - - // Build vnode mapping according to the parallel units. - let vnode_mapping = self.set_fragment_vnode_mapping(fragment, ¶llel_units)?; - let vnode_bitmaps = vnode_mapping_to_bitmaps(vnode_mapping); - - // Record actor locations and set vnodes into the actors. - for (actor, parallel_unit) in fragment.actors.iter_mut().zip_eq(parallel_units) { - let parallel_unit = - if actor.same_worker_node_as_upstream && !actor.upstream_actor_id.is_empty() { - locations.schedule_colocate_with(&actor.upstream_actor_id)? - } else { - parallel_unit.clone() - }; - - actor.vnode_bitmap = - Some(vnode_bitmaps.get(¶llel_unit.id).unwrap().to_protobuf()); - locations - .actor_locations - .insert(actor.actor_id, parallel_unit); - } - } - - Ok(()) - } - - /// `set_fragment_vnode_mapping` works by following steps: - /// 1. Build a vnode mapping according to parallel units where the fragment is scheduled. - /// 2. Set the vnode mapping into the fragment. - /// 3. Record the relationship between state tables and vnode mappings. - fn set_fragment_vnode_mapping( - &self, - fragment: &mut Fragment, - parallel_units: &[ParallelUnit], - ) -> MetaResult { - let vnode_mapping = build_vnode_mapping(parallel_units); - let (original_indices, data) = compress_data(&vnode_mapping); - fragment.vnode_mapping = Some(ParallelUnitMapping { - original_indices, - data, - fragment_id: fragment.fragment_id, - }); - - Ok(vnode_mapping) - } -} - -#[cfg(test)] -mod test { - use std::sync::Arc; - use std::time::Duration; - - use itertools::Itertools; - use risingwave_common::buffer::Bitmap; - use risingwave_common::hash::VirtualNode; - use risingwave_pb::catalog::Table; - use risingwave_pb::common::{HostAddress, WorkerType}; - use risingwave_pb::meta::table_fragments::fragment::FragmentDistributionType; - use risingwave_pb::stream_plan::stream_node::NodeBody; - use risingwave_pb::stream_plan::{ - FragmentTypeFlag, MaterializeNode, StreamActor, StreamNode, TopNNode, - }; - - use super::*; - use crate::manager::{ClusterManager, MetaSrvEnv}; - - #[tokio::test] - async fn test_schedule() -> MetaResult<()> { - let env = MetaSrvEnv::for_test().await; - let cluster_manager = - Arc::new(ClusterManager::new(env.clone(), Duration::from_secs(3600)).await?); - - let node_count = 4; - let fake_parallelism = 4; - for i in 0..node_count { - let host = HostAddress { - host: "127.0.0.1".to_string(), - port: i as i32, - }; - cluster_manager - .add_worker_node(WorkerType::ComputeNode, host.clone(), fake_parallelism) - .await?; - cluster_manager.activate_worker_node(host).await?; - } - - let scheduler = Scheduler::new(cluster_manager.list_active_parallel_units().await); - let mut locations = ScheduledLocations::new(); - - let mut actor_id = 1u32; - let mut single_fragments = (1..6u32) - .map(|id| { - let fragment = Fragment { - fragment_id: id, - fragment_type_mask: FragmentTypeFlag::FragmentUnspecified as u32, - distribution_type: FragmentDistributionType::Single as i32, - actors: vec![StreamActor { - actor_id, - fragment_id: id, - nodes: Some(StreamNode { - node_body: Some(NodeBody::TopN(TopNNode { - table: Some(Table { - id: 0, - ..Default::default() - }), - ..Default::default() - })), - ..Default::default() - }), - dispatcher: vec![], - upstream_actor_id: vec![], - same_worker_node_as_upstream: false, - vnode_bitmap: None, - mview_definition: "".to_owned(), - }], - ..Default::default() - }; - actor_id += 1; - fragment - }) - .collect_vec(); - - let parallel_degree = fake_parallelism; - let mut normal_fragments = (6..8u32) - .map(|fragment_id| { - let actors = (actor_id..actor_id + node_count * parallel_degree as u32) - .map(|id| StreamActor { - actor_id: id, - fragment_id, - nodes: Some(StreamNode { - node_body: Some(NodeBody::Materialize(MaterializeNode { - table_id: fragment_id, - ..Default::default() - })), - ..Default::default() - }), - dispatcher: vec![], - upstream_actor_id: vec![], - same_worker_node_as_upstream: false, - vnode_bitmap: None, - mview_definition: "".to_owned(), - }) - .collect_vec(); - actor_id += node_count * parallel_degree as u32; - Fragment { - fragment_id, - fragment_type_mask: FragmentTypeFlag::FragmentUnspecified as u32, - distribution_type: FragmentDistributionType::Hash as i32, - actors, - ..Default::default() - } - }) - .collect_vec(); - - // Test round robin schedule for singleton fragments - for fragment in &mut single_fragments { - scheduler.schedule(fragment, &mut locations).unwrap(); - } - for fragment in single_fragments { - assert_ne!(fragment.vnode_mapping, None); - for actor in fragment.actors { - assert!(actor.vnode_bitmap.is_none()); - } - } - - // Test normal schedule for other fragments - for fragment in &mut normal_fragments { - scheduler.schedule(fragment, &mut locations).unwrap(); - } - assert_eq!( - locations - .actor_locations - .iter() - .filter(|(actor_id, _)| { - normal_fragments[1] - .actors - .iter() - .map(|actor| actor.actor_id) - .contains(actor_id) - }) - .count(), - node_count as usize * parallel_degree - ); - for fragment in normal_fragments { - assert_ne!(fragment.vnode_mapping, None,); - let mut vnode_sum = 0; - for actor in fragment.actors { - vnode_sum += Bitmap::from(actor.get_vnode_bitmap()?).count_ones(); - } - assert_eq!(vnode_sum, VirtualNode::COUNT); - } - - Ok(()) - } -} diff --git a/src/meta/src/stream/source_manager.rs b/src/meta/src/stream/source_manager.rs index 2ad43a3b39257..0469768b5fd16 100644 --- a/src/meta/src/stream/source_manager.rs +++ b/src/meta/src/stream/source_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/meta/src/stream/stream_graph.rs b/src/meta/src/stream/stream_graph.rs index 31e7374bd2603..aff8fceacfb52 100644 --- a/src/meta/src/stream/stream_graph.rs +++ b/src/meta/src/stream/stream_graph.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,1233 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::hash_map::HashMap; -use std::collections::{BTreeMap, HashSet, VecDeque}; -use std::ops::{Deref, Range}; -use std::sync::{Arc, LazyLock}; +mod actor; +mod fragment; +mod id; +mod schedule; +mod visit; -use assert_matches::assert_matches; -use itertools::Itertools; -use risingwave_common::bail; -use risingwave_common::catalog::{generate_internal_table_name_with_type, TableId}; -use risingwave_pb::catalog::Table; -use risingwave_pb::meta::table_fragments::fragment::FragmentDistributionType; -use risingwave_pb::meta::table_fragments::Fragment; -use risingwave_pb::stream_plan::stream_fragment_graph::{StreamFragment, StreamFragmentEdge}; -use risingwave_pb::stream_plan::stream_node::NodeBody; -use risingwave_pb::stream_plan::{ - agg_call_state, DispatchStrategy, Dispatcher, DispatcherType, MergeNode, StreamActor, - StreamFragmentGraph as StreamFragmentGraphProto, StreamNode, -}; - -use super::CreateStreamingJobContext; -use crate::manager::{ - IdCategory, IdCategoryType, IdGeneratorManager, IdGeneratorManagerRef, StreamingJob, -}; -use crate::model::FragmentId; -use crate::storage::MetaStore; -use crate::MetaResult; - -/// Id of an Actor, maybe local or global -#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, PartialOrd, Ord)] -enum LocalActorId { - /// The global allocated id of a fragment. - Global(u32), - /// The local id of a fragment, need to be converted to global id if being used in the meta - /// service. - Local(u32), -} - -impl LocalActorId { - pub fn as_global_id(&self) -> u32 { - match self { - Self::Global(id) => *id, - _ => panic!("actor id is not global id"), - } - } - - pub fn as_local_id(&self) -> u32 { - match self { - Self::Local(id) => *id, - _ => panic!("actor id is not local id"), - } - } - - /// Convert local id to global id. Panics if the actor id is not local, or actor id >= - /// len. - pub fn to_global_id(self, offset: u32, len: u32) -> Self { - let id = self.as_local_id(); - assert!(id < len, "actor id {} is out of range (len: {})", id, len); - Self::Global(id + offset) - } -} - -/// A wrapper to distinguish global ID generated by the [`IdGeneratorManager`] and the local ID from -/// the frontend. -#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, PartialOrd, Ord)] -struct GlobalId(u32); - -impl GlobalId { - pub fn as_global_id(&self) -> u32 { - self.0 - } -} - -/// Utility for converting local IDs into pre-allocated global IDs by adding an `offset`. -/// -/// This requires the local IDs exactly a permutation of the range `[0, len)`. -#[derive(Clone, Copy, Debug)] -struct GlobalIdGen { - offset: u32, - len: u32, -} - -impl GlobalIdGen { - /// Pre-allocate a range of IDs with the given `len` and return the generator. - pub async fn new(id_gen: &IdGeneratorManager, len: u64) -> MetaResult { - let offset = id_gen.generate_interval::(len).await?; - Ok(Self { - offset: offset as u32, - len: len as u32, - }) - } - - /// Convert local id to global id. Panics if `id >= len`. - pub fn to_global_id(self, local_id: u32) -> GlobalId { - assert!( - local_id < self.len, - "id {} is out of range (len: {})", - local_id, - self.len - ); - GlobalId(local_id + self.offset) - } -} - -type GlobalFragmentId = GlobalId<{ IdCategory::Fragment }>; -type GlobalFragmentIdGen = GlobalIdGen<{ IdCategory::Fragment }>; -type GlobalTableIdGen = GlobalIdGen<{ IdCategory::Table }>; - -/// A list of actors with order. -#[derive(Debug, Clone)] -struct OrderedActorLink(pub Vec); - -impl OrderedActorLink { - pub fn to_global_ids(&self, actor_id_offset: u32, actor_id_len: u32) -> Self { - Self( - self.0 - .iter() - .map(|x| x.to_global_id(actor_id_offset, actor_id_len)) - .collect(), - ) - } - - pub fn as_global_ids(&self) -> Vec { - Self::slice_as_global_ids(self.0.as_slice()) - } - - pub fn slice_as_global_ids(data: &[LocalActorId]) -> Vec { - data.iter().map(|x| x.as_global_id()).collect() - } -} - -struct StreamActorDownstream { - dispatch_strategy: DispatchStrategy, - dispatcher_id: u64, - - /// Downstream actors. - actors: OrderedActorLink, - - /// Whether to place the downstream actors on the same node - same_worker_node: bool, -} - -struct StreamActorUpstream { - /// Upstream actors - actors: OrderedActorLink, - /// associate fragment id - fragment_id: GlobalFragmentId, - /// Whether to place the upstream actors on the same node - same_worker_node: bool, -} - -/// [`StreamActorBuilder`] builds a stream actor in a stream DAG. -struct StreamActorBuilder { - /// actor id field - actor_id: LocalActorId, - - /// associated fragment id - fragment_id: GlobalFragmentId, - - /// associated stream node - nodes: Arc, - - /// downstream dispatchers (dispatcher, downstream actor, hash mapping) - downstreams: Vec, - - /// upstreams, exchange node operator_id -> upstream actor ids - upstreams: HashMap, - - /// Whether to place this actors on the same node as chain's upstream MVs. - chain_same_worker_node: bool, - - /// whether this actor builder has been sealed - sealed: bool, -} - -impl StreamActorBuilder { - fn is_chain_same_worker_node(stream_node: &StreamNode) -> bool { - fn visit(stream_node: &StreamNode) -> bool { - if let Some(NodeBody::Chain(ref chain)) = stream_node.node_body { - return chain.same_worker_node; - } - stream_node.input.iter().any(visit) - } - visit(stream_node) - } - - pub fn new( - actor_id: LocalActorId, - fragment_id: GlobalFragmentId, - node: Arc, - ) -> Self { - Self { - actor_id, - fragment_id, - chain_same_worker_node: Self::is_chain_same_worker_node(&node), - nodes: node, - downstreams: vec![], - upstreams: HashMap::new(), - sealed: false, - } - } - - pub fn get_fragment_id(&self) -> GlobalFragmentId { - self.fragment_id - } - - /// Add a dispatcher to this actor. - pub fn add_dispatcher( - &mut self, - dispatch_strategy: DispatchStrategy, - dispatcher_id: u64, - downstream_actors: OrderedActorLink, - same_worker_node: bool, - ) { - assert!(!self.sealed); - - self.downstreams.push(StreamActorDownstream { - dispatch_strategy, - dispatcher_id, - actors: downstream_actors, - same_worker_node, - }); - } - - /// Build an actor from given information. At the same time, convert local actor id to global - /// actor id. - pub fn seal(&mut self, actor_id_offset: u32, actor_id_len: u32) { - assert!(!self.sealed); - - self.actor_id = self.actor_id.to_global_id(actor_id_offset, actor_id_len); - self.downstreams = std::mem::take(&mut self.downstreams) - .into_iter() - .map( - |StreamActorDownstream { - dispatch_strategy, - dispatcher_id, - actors: downstreams, - same_worker_node, - }| { - let downstreams = downstreams.to_global_ids(actor_id_offset, actor_id_len); - - if dispatch_strategy.r#type == DispatcherType::NoShuffle as i32 { - assert_eq!( - downstreams.0.len(), - 1, - "no shuffle should only have one actor downstream" - ); - assert!( - dispatch_strategy.column_indices.is_empty(), - "should leave `column_indices` empty" - ); - } - - StreamActorDownstream { - dispatch_strategy, - dispatcher_id, - actors: downstreams, - same_worker_node, - } - }, - ) - .collect(); - - self.upstreams = std::mem::take(&mut self.upstreams) - .into_iter() - .map( - |( - exchange_id, - StreamActorUpstream { - actors, - same_worker_node, - fragment_id, - }, - )| { - ( - exchange_id, - StreamActorUpstream { - actors: actors.to_global_ids(actor_id_offset, actor_id_len), - same_worker_node, - fragment_id, - }, - ) - }, - ) - .collect(); - self.sealed = true; - } - - /// Build an actor after seal. - pub fn build(&self) -> StreamActor { - assert!(self.sealed); - - let dispatcher = self - .downstreams - .iter() - .map( - |StreamActorDownstream { - dispatch_strategy, - dispatcher_id, - actors, - same_worker_node: _, - }| Dispatcher { - downstream_actor_id: actors.as_global_ids(), - r#type: dispatch_strategy.r#type, - column_indices: dispatch_strategy.column_indices.clone(), - // will be filled later by stream manager - hash_mapping: None, - dispatcher_id: *dispatcher_id, - }, - ) - .collect_vec(); - - StreamActor { - actor_id: self.actor_id.as_global_id(), - fragment_id: self.fragment_id.as_global_id(), - nodes: Some(self.nodes.deref().clone()), - dispatcher, - upstream_actor_id: self - .upstreams - .iter() - .flat_map(|(_, StreamActorUpstream { actors, .. })| actors.0.iter().copied()) - .map(|x| x.as_global_id()) - .collect(), // TODO: store each upstream separately - same_worker_node_as_upstream: self.chain_same_worker_node - || self.upstreams.values().any(|u| u.same_worker_node), - vnode_bitmap: None, - // To be filled by `StreamGraphBuilder::build` - mview_definition: "".to_owned(), - } - } -} - -/// [`StreamGraphBuilder`] build a stream graph. It injects some information to achieve -/// dependencies. See `build_inner` for more details. -#[derive(Default)] -struct StreamGraphBuilder { - actor_builders: BTreeMap, -} - -impl StreamGraphBuilder { - /// Insert new generated actor. - pub fn add_actor( - &mut self, - actor_id: LocalActorId, - fragment_id: GlobalFragmentId, - node: Arc, - ) { - self.actor_builders.insert( - actor_id, - StreamActorBuilder::new(actor_id, fragment_id, node), - ); - } - - /// Number of actors in the graph builder - pub fn actor_len(&self) -> usize { - self.actor_builders.len() - } - - /// Add dependency between two connected node in the graph. - pub fn add_link( - &mut self, - upstream_fragment_id: GlobalFragmentId, - upstream_actor_ids: &[LocalActorId], - downstream_actor_ids: &[LocalActorId], - edge: StreamFragmentEdge, - ) { - let exchange_operator_id = edge.link_id; - let same_worker_node = edge.same_worker_node; - let dispatch_strategy = edge.dispatch_strategy.unwrap(); - // We can't use the exchange operator id directly as the dispatch id, because an exchange - // could belong to more than one downstream in DAG. - // We can use downstream fragment id as an unique id for dispatcher. - // In this way we can ensure the dispatchers of `StreamActor` would have different id, - // even though they come from the same exchange operator. - let dispatch_id = edge.downstream_id as u64; - - if dispatch_strategy.get_type().unwrap() == DispatcherType::NoShuffle { - assert_eq!( - upstream_actor_ids.len(), - downstream_actor_ids.len(), - "mismatched length when processing no-shuffle exchange: {:?} -> {:?} on exchange {}", - upstream_actor_ids, - downstream_actor_ids, - exchange_operator_id - ); - - // update 1v1 relationship - upstream_actor_ids - .iter() - .zip_eq(downstream_actor_ids.iter()) - .for_each(|(upstream_id, downstream_id)| { - self.actor_builders - .get_mut(upstream_id) - .unwrap() - .add_dispatcher( - dispatch_strategy.clone(), - dispatch_id, - OrderedActorLink(vec![*downstream_id]), - same_worker_node, - ); - - let ret = self - .actor_builders - .get_mut(downstream_id) - .unwrap() - .upstreams - .insert( - exchange_operator_id, - StreamActorUpstream { - actors: OrderedActorLink(vec![*upstream_id]), - fragment_id: upstream_fragment_id, - same_worker_node, - }, - ); - - assert!( - ret.is_none(), - "duplicated exchange input {} for no-shuffle actors {:?} -> {:?}", - exchange_operator_id, - upstream_id, - downstream_id - ); - }); - - return; - } - - // otherwise, make m * n links between actors. - - assert!( - !same_worker_node, - "same_worker_node only applies to 1v1 dispatchers." - ); - - // update actors to have dispatchers, link upstream -> downstream. - upstream_actor_ids.iter().for_each(|upstream_id| { - self.actor_builders - .get_mut(upstream_id) - .unwrap() - .add_dispatcher( - dispatch_strategy.clone(), - dispatch_id, - OrderedActorLink(downstream_actor_ids.to_vec()), - same_worker_node, - ); - }); - - // update actors to have upstreams, link downstream <- upstream. - downstream_actor_ids.iter().for_each(|downstream_id| { - let ret = self - .actor_builders - .get_mut(downstream_id) - .unwrap() - .upstreams - .insert( - exchange_operator_id, - StreamActorUpstream { - actors: OrderedActorLink(upstream_actor_ids.to_vec()), - fragment_id: upstream_fragment_id, - same_worker_node, - }, - ); - assert!( - ret.is_none(), - "duplicated exchange input {} for actors {:?} -> {:?}", - exchange_operator_id, - upstream_actor_ids, - downstream_actor_ids - ); - }); - } - - /// Build final stream DAG with dependencies with current actor builders. - #[allow(clippy::type_complexity)] - pub fn build( - mut self, - ctx: &CreateStreamingJobContext, - actor_id_offset: u32, - actor_id_len: u32, - ) -> MetaResult>> { - let mut graph: HashMap> = HashMap::new(); - - for builder in self.actor_builders.values_mut() { - builder.seal(actor_id_offset, actor_id_len); - } - - for builder in self.actor_builders.values() { - let mut actor = builder.build(); - let mut upstream_actors = builder - .upstreams - .iter() - .map(|(id, StreamActorUpstream { actors, .. })| (*id, actors.clone())) - .collect(); - let mut upstream_fragments = builder - .upstreams - .iter() - .map(|(id, StreamActorUpstream { fragment_id, .. })| (*id, *fragment_id)) - .collect(); - let stream_node = self.build_inner( - actor.get_nodes()?, - &mut upstream_actors, - &mut upstream_fragments, - )?; - - actor.nodes = Some(stream_node); - actor.mview_definition = ctx.streaming_definition.clone(); - - graph - .entry(builder.get_fragment_id()) - .or_default() - .push(actor); - } - Ok(graph) - } - - /// Build stream actor inside, two works will be done: - /// 1. replace node's input with [`MergeNode`] if it is `ExchangeNode`, and swallow - /// mergeNode's input. - /// 2. ignore root node when it's `ExchangeNode`. - /// 3. replace node's `ExchangeNode` input with [`MergeNode`] and resolve its upstream actor - /// ids if it is a `ChainNode`. - fn build_inner( - &self, - stream_node: &StreamNode, - upstream_actor_id: &mut HashMap, - upstream_fragment_id: &mut HashMap, - ) -> MetaResult { - match stream_node.get_node_body()? { - NodeBody::Exchange(_) => { - panic!("ExchangeNode should be eliminated from the top of the plan node when converting fragments to actors: {:#?}", stream_node) - } - NodeBody::Chain(_) => Ok(self.resolve_chain_node(stream_node)?), - _ => { - let mut new_stream_node = stream_node.clone(); - - for (input, new_input) in stream_node - .input - .iter() - .zip_eq(new_stream_node.input.iter_mut()) - { - *new_input = match input.get_node_body()? { - NodeBody::Exchange(e) => { - assert!(!input.get_fields().is_empty()); - StreamNode { - input: vec![], - stream_key: input.stream_key.clone(), - node_body: Some(NodeBody::Merge(MergeNode { - upstream_actor_id: upstream_actor_id - .remove(&input.get_operator_id()) - .expect("failed to find upstream actor id for given exchange node").as_global_ids(), - upstream_fragment_id: upstream_fragment_id.get(&input.get_operator_id()).unwrap().as_global_id(), - upstream_dispatcher_type: e.get_strategy()?.r#type, - fields: input.get_fields().clone(), - })), - fields: input.get_fields().clone(), - operator_id: input.operator_id, - identity: "MergeExecutor".to_string(), - append_only: input.append_only, - } - } - NodeBody::Chain(_) => self.resolve_chain_node(input)?, - _ => self.build_inner(input, upstream_actor_id, upstream_fragment_id)?, - } - } - Ok(new_stream_node) - } - } - } - - /// Resolve the chain node, only rewrite the schema of input `MergeNode`. - fn resolve_chain_node(&self, stream_node: &StreamNode) -> MetaResult { - let NodeBody::Chain(chain_node) = stream_node.get_node_body().unwrap() else { - unreachable!() - }; - let input = stream_node.get_input(); - assert_eq!(input.len(), 2); - - let merge_node = &input[0]; - assert_matches!(merge_node.node_body, Some(NodeBody::Merge(_))); - let batch_plan_node = &input[1]; - assert_matches!(batch_plan_node.node_body, Some(NodeBody::BatchPlan(_))); - - let chain_input = vec![ - StreamNode { - input: vec![], - stream_key: merge_node.stream_key.clone(), - node_body: Some(NodeBody::Merge(MergeNode { - upstream_actor_id: vec![], - upstream_fragment_id: 0, - upstream_dispatcher_type: DispatcherType::NoShuffle as _, - fields: chain_node.upstream_fields.clone(), - })), - fields: chain_node.upstream_fields.clone(), - operator_id: merge_node.operator_id, - identity: "MergeExecutor".to_string(), - append_only: stream_node.append_only, - }, - batch_plan_node.clone(), - ]; - - Ok(StreamNode { - input: chain_input, - stream_key: stream_node.stream_key.clone(), - node_body: Some(NodeBody::Chain(chain_node.clone())), - operator_id: stream_node.operator_id, - identity: "ChainExecutor".to_string(), - fields: chain_node.upstream_fields.clone(), - append_only: stream_node.append_only, - }) - } -} - -/// The mutable state when building actor graph. -#[derive(Default)] -struct BuildActorGraphState { - /// stream graph builder, to build streaming DAG. - stream_graph_builder: StreamGraphBuilder, - /// when converting fragment graph to actor graph, we need to know which actors belong to a - /// fragment. - fragment_actors: HashMap>, - /// local actor id - next_local_actor_id: u32, -} - -impl BuildActorGraphState { - fn gen_actor_ids(&mut self, parallel_degree: u32) -> Range { - let start_actor_id = self.next_local_actor_id; - self.next_local_actor_id += parallel_degree; - start_actor_id..start_actor_id + parallel_degree - } -} - -/// [`ActorGraphBuilder`] generates the proto for interconnected actors for a streaming pipeline. -pub struct ActorGraphBuilder { - /// Default parallelism. - default_parallelism: u32, - - fragment_graph: StreamFragmentGraph, -} - -impl ActorGraphBuilder { - pub fn new(fragment_graph: StreamFragmentGraph, default_parallelism: u32) -> Self { - Self { - default_parallelism, - fragment_graph, - } - } - - /// Build a stream graph by duplicating each fragment as parallel actors. - pub async fn generate_graph( - &self, - id_gen_manager: IdGeneratorManagerRef, - ctx: &mut CreateStreamingJobContext, - ) -> MetaResult> - where - S: MetaStore, - { - let stream_graph = { - let BuildActorGraphState { - stream_graph_builder, - next_local_actor_id, - .. - } = { - let mut state = BuildActorGraphState::default(); - - // Generate actors of the streaming plan - self.build_actor_graph(&mut state, &self.fragment_graph, ctx)?; - state - }; - - // generates global ids - let (actor_len, start_actor_id) = { - let actor_len = stream_graph_builder.actor_len() as u32; - assert_eq!(actor_len, next_local_actor_id); - let start_actor_id = id_gen_manager - .generate_interval::<{ IdCategory::Actor }>(actor_len as u64) - .await? as _; - - (actor_len, start_actor_id) - }; - - stream_graph_builder.build(&*ctx, start_actor_id, actor_len)? - }; - - // Serialize the graph - let stream_graph = stream_graph - .into_iter() - .map(|(fragment_id, actors)| { - let fragment = self.fragment_graph.seal_fragment(fragment_id, actors); - let fragment_id = fragment_id.as_global_id(); - (fragment_id, fragment) - }) - .collect(); - - Ok(stream_graph) - } - - /// Build actor graph from fragment graph using topological sort. Setup dispatcher in actor and - /// generate actors by their parallelism. - fn build_actor_graph( - &self, - state: &mut BuildActorGraphState, - fragment_graph: &StreamFragmentGraph, - ctx: &mut CreateStreamingJobContext, - ) -> MetaResult<()> { - // Use topological sort to build the graph from downstream to upstream. (The first fragment - // popped out from the heap will be the top-most node in plan, or the sink in stream graph.) - let mut actionable_fragment_id = VecDeque::new(); - let mut downstream_cnts = HashMap::new(); - - // Iterate all fragments - for fragment_id in fragment_graph.fragments.keys() { - // Count how many downstreams we have for a given fragment - let downstream_cnt = fragment_graph.get_downstreams(*fragment_id).len(); - if downstream_cnt == 0 { - actionable_fragment_id.push_back(*fragment_id); - } else { - downstream_cnts.insert(*fragment_id, downstream_cnt); - } - } - - while let Some(fragment_id) = actionable_fragment_id.pop_front() { - // Build the actors corresponding to the fragment - self.build_actor_graph_fragment(fragment_id, state, fragment_graph, ctx)?; - - // Find if we can process more fragments - for upstream_id in fragment_graph.get_upstreams(fragment_id).keys() { - let downstream_cnt = downstream_cnts - .get_mut(upstream_id) - .expect("the upstream should exist"); - *downstream_cnt -= 1; - if *downstream_cnt == 0 { - downstream_cnts.remove(upstream_id); - actionable_fragment_id.push_back(*upstream_id); - } - } - } - - if !downstream_cnts.is_empty() { - // There are fragments that are not processed yet. - bail!("graph is not a DAG"); - } - - Ok(()) - } - - fn build_actor_graph_fragment( - &self, - fragment_id: GlobalFragmentId, - state: &mut BuildActorGraphState, - fragment_graph: &StreamFragmentGraph, - ctx: &mut CreateStreamingJobContext, - ) -> MetaResult<()> { - let current_fragment = fragment_graph.get_fragment(fragment_id).unwrap().clone(); - let upstream_table_id = current_fragment - .upstream_table_ids - .iter() - .at_most_one() - .unwrap() - .map(TableId::from); - if let Some(upstream_table_id) = upstream_table_id { - ctx.chain_fragment_upstream_table_map - .insert(fragment_id.as_global_id(), upstream_table_id); - } - - let parallel_degree = if current_fragment.is_singleton { - 1 - } else if let Some(upstream_table_id) = upstream_table_id { - // set fragment parallelism to the parallelism of its dependent table. - let upstream_actors = ctx - .table_mview_map - .get(&upstream_table_id) - .expect("upstream actor should exist"); - upstream_actors.len() as u32 - } else { - self.default_parallelism - }; - - let node = Arc::new(current_fragment.node.unwrap()); - let actor_ids = state - .gen_actor_ids(parallel_degree) - .into_iter() - .map(LocalActorId::Local) - .collect_vec(); - - for id in &actor_ids { - state - .stream_graph_builder - .add_actor(*id, fragment_id, node.clone()); - } - - for (downstream_fragment_id, dispatch_edge) in fragment_graph.get_downstreams(fragment_id) { - let downstream_actors = state - .fragment_actors - .get(downstream_fragment_id) - .expect("downstream fragment not processed yet"); - - let dispatch_strategy = dispatch_edge.dispatch_strategy.as_ref().unwrap(); - match dispatch_strategy.get_type()? { - DispatcherType::Hash - | DispatcherType::Simple - | DispatcherType::Broadcast - | DispatcherType::NoShuffle => { - state.stream_graph_builder.add_link( - fragment_id, - &actor_ids, - downstream_actors, - dispatch_edge.clone(), - ); - } - DispatcherType::Unspecified => unreachable!(), - } - } - - let ret = state.fragment_actors.insert(fragment_id, actor_ids); - assert!( - ret.is_none(), - "fragment {:?} already processed", - fragment_id - ); - - Ok(()) - } -} - -/// The fragment in the building phase, including the [`StreamFragment`] from the frontend and -/// several additional helper fields. -#[derive(Debug, Clone)] -pub struct BuildingFragment { - /// The fragment structure from the frontend, with the global fragment ID. - inner: StreamFragment, - - /// A clone of the internal tables in this fragment. - internal_tables: Vec
, - - /// The ID of the job if it's materialized in this fragment. - table_id: Option, -} - -impl BuildingFragment { - /// Create a new [`BuildingFragment`] from a [`StreamFragment`]. The global fragment ID and - /// global table IDs will be correctly filled with the given `id` and `table_id_gen`. - fn new( - id: GlobalFragmentId, - fragment: StreamFragment, - job: &StreamingJob, - table_id_gen: GlobalTableIdGen, - ) -> Self { - let mut fragment = StreamFragment { - fragment_id: id.as_global_id(), - ..fragment - }; - let internal_tables = Self::fill_internal_tables(&mut fragment, job, table_id_gen); - let table_id = Self::fill_job(&mut fragment, job).then(|| job.id()); - - Self { - inner: fragment, - internal_tables, - table_id, - } - } - - /// Fill the information of the internal tables in the fragment. - fn fill_internal_tables( - fragment: &mut StreamFragment, - job: &StreamingJob, - table_id_gen: GlobalTableIdGen, - ) -> Vec
{ - let fragment_id = fragment.fragment_id; - let mut internal_tables = Vec::new(); - - visit_internal_tables(fragment, |table, table_type_name| { - table.id = table_id_gen.to_global_id(table.id).as_global_id(); - table.schema_id = job.schema_id(); - table.database_id = job.database_id(); - table.name = generate_internal_table_name_with_type( - &job.name(), - fragment_id, - table.id, - table_type_name, - ); - table.fragment_id = fragment_id; - - // Record the internal table. - internal_tables.push(table.clone()); - }); - - internal_tables - } - - /// Fill the information of the job in the fragment. - fn fill_job(fragment: &mut StreamFragment, job: &StreamingJob) -> bool { - let table_id = job.id(); - let fragment_id = fragment.fragment_id; - let mut has_table = false; - - visit_fragment(fragment, |node_body| match node_body { - NodeBody::Materialize(materialize_node) => { - materialize_node.table_id = table_id; - - // Fill the ID of the `Table`. - let table = materialize_node.table.as_mut().unwrap(); - table.id = table_id; - table.database_id = job.database_id(); - table.schema_id = job.schema_id(); - table.fragment_id = fragment_id; - - has_table = true; - } - NodeBody::Sink(sink_node) => { - sink_node.table_id = table_id; - - has_table = true; - } - NodeBody::Dml(dml_node) => { - dml_node.table_id = table_id; - } - _ => {} - }); - - has_table - } -} - -impl Deref for BuildingFragment { - type Target = StreamFragment; - - fn deref(&self) -> &Self::Target { - &self.inner - } -} - -/// In-memory representation of a **Fragment** Graph, built from the [`StreamFragmentGraphProto`] -/// from the frontend. -#[derive(Default)] -pub struct StreamFragmentGraph { - /// stores all the fragments in the graph. - fragments: HashMap, - - /// stores edges between fragments: upstream => downstream. - downstreams: HashMap>, - - /// stores edges between fragments: downstream -> upstream. - upstreams: HashMap>, - - /// Dependent relations of this job. - dependent_relations: HashSet, -} - -impl StreamFragmentGraph { - /// Create a new [`StreamFragmentGraph`] from the given [`StreamFragmentGraphProto`], with all - /// global IDs correctly filled. - pub async fn new( - proto: StreamFragmentGraphProto, - id_gen: IdGeneratorManagerRef, - job: &StreamingJob, - ) -> MetaResult { - let fragment_id_gen = - GlobalFragmentIdGen::new(&id_gen, proto.fragments.len() as u64).await?; - let table_id_gen = GlobalTableIdGen::new(&id_gen, proto.table_ids_cnt as u64).await?; - - // Create nodes. - let fragments: HashMap<_, _> = proto - .fragments - .into_iter() - .map(|(id, fragment)| { - let id = fragment_id_gen.to_global_id(id); - let fragment = BuildingFragment::new(id, fragment, job, table_id_gen); - (id, fragment) - }) - .collect(); - - assert_eq!( - fragments - .values() - .map(|f| f.internal_tables.len() as u32) - .sum::(), - proto.table_ids_cnt - ); - - // Create edges. - let mut downstreams = HashMap::new(); - let mut upstreams = HashMap::new(); - - for edge in proto.edges { - let upstream_id = fragment_id_gen.to_global_id(edge.upstream_id); - let downstream_id = fragment_id_gen.to_global_id(edge.downstream_id); - - upstreams - .entry(downstream_id) - .or_insert_with(HashMap::new) - .try_insert( - upstream_id, - StreamFragmentEdge { - upstream_id: upstream_id.as_global_id(), - downstream_id: downstream_id.as_global_id(), - ..edge.clone() - }, - ) - .unwrap(); - downstreams - .entry(upstream_id) - .or_insert_with(HashMap::new) - .try_insert( - downstream_id, - StreamFragmentEdge { - upstream_id: upstream_id.as_global_id(), - downstream_id: downstream_id.as_global_id(), - ..edge - }, - ) - .unwrap(); - } - - // Note: Here we directly use the field `dependent_table_ids` in the proto (resolved in - // frontend), instead of visiting the graph ourselves. Note that for creating table with a - // connector, the source itself is NOT INCLUDED in this list. - let dependent_relations = proto - .dependent_table_ids - .iter() - .map(TableId::from) - .collect(); - - Ok(Self { - fragments, - downstreams, - upstreams, - dependent_relations, - }) - } - - /// Seal a [`StreamFragment`] from the graph into a [`Fragment`], which will be further used to - /// build actors, schedule, and persist into meta store. - fn seal_fragment(&self, id: GlobalFragmentId, actors: Vec) -> Fragment { - let BuildingFragment { - inner, - internal_tables, - table_id, - } = self.fragments.get(&id).unwrap().to_owned(); - - let distribution_type = if inner.is_singleton { - FragmentDistributionType::Single - } else { - FragmentDistributionType::Hash - } as i32; - - let state_table_ids = internal_tables - .iter() - .map(|t| t.id) - .chain(table_id) - .collect(); - - let upstream_fragment_ids = self - .get_upstreams(id) - .keys() - .map(|id| id.as_global_id()) - .collect(); - - Fragment { - fragment_id: inner.fragment_id, - fragment_type_mask: inner.fragment_type_mask, - distribution_type, - actors, - // Will be filled in `Scheduler::schedule` later. - vnode_mapping: None, - state_table_ids, - upstream_fragment_ids, - } - } - - /// Retrieve the internal tables map of the whole graph. - pub fn internal_tables(&self) -> HashMap { - let mut tables = HashMap::new(); - for fragment in self.fragments.values() { - for table in &fragment.internal_tables { - tables - .try_insert(table.id, table.clone()) - .unwrap_or_else(|_| panic!("duplicated table id `{}`", table.id)); - } - } - tables - } - - /// Returns the fragment id where the table is materialized. - pub fn table_fragment_id(&self) -> FragmentId { - self.fragments - .values() - .filter(|b| b.table_id.is_some()) - .map(|b| b.fragment_id) - .exactly_one() - .expect("require exactly 1 materialize/sink node when creating the streaming job") - } - - pub fn dependent_relations(&self) -> &HashSet { - &self.dependent_relations - } - - fn get_fragment(&self, fragment_id: GlobalFragmentId) -> Option<&StreamFragment> { - self.fragments.get(&fragment_id).map(|b| b.deref()) - } - - fn get_downstreams( - &self, - fragment_id: GlobalFragmentId, - ) -> &HashMap { - self.downstreams.get(&fragment_id).unwrap_or(&EMPTY_HASHMAP) - } - - fn get_upstreams( - &self, - fragment_id: GlobalFragmentId, - ) -> &HashMap { - self.upstreams.get(&fragment_id).unwrap_or(&EMPTY_HASHMAP) - } -} - -static EMPTY_HASHMAP: LazyLock> = - LazyLock::new(HashMap::new); - -/// A utility for visiting and mutating the [`NodeBody`] of the [`StreamNode`]s in a -/// [`StreamFragment`] recursively. -pub fn visit_fragment(fragment: &mut StreamFragment, mut f: F) -where - F: FnMut(&mut NodeBody), -{ - fn visit_inner(stream_node: &mut StreamNode, f: &mut F) - where - F: FnMut(&mut NodeBody), - { - f(stream_node.node_body.as_mut().unwrap()); - for input in &mut stream_node.input { - visit_inner(input, f); - } - } - - visit_inner(fragment.node.as_mut().unwrap(), &mut f) -} - -/// Visit the internal tables of a [`StreamFragment`]. -fn visit_internal_tables(fragment: &mut StreamFragment, mut f: F) -where - F: FnMut(&mut Table, &'static str), -{ - macro_rules! always { - ($table:expr, $name:expr) => {{ - let table = $table - .as_mut() - .unwrap_or_else(|| panic!("internal table {} should always exist", $name)); - f(table, $name); - }}; - } - - #[allow(unused_macros)] - macro_rules! optional { - ($table:expr, $name:expr) => { - if let Some(table) = &mut $table { - f(table, $name); - } - }; - } - - visit_fragment(fragment, |body| { - match body { - // Join - NodeBody::HashJoin(node) => { - // TODO: make the degree table optional - always!(node.left_table, "HashJoinLeft"); - always!(node.left_degree_table, "HashJoinDegreeLeft"); - always!(node.right_table, "HashJoinRight"); - always!(node.right_degree_table, "HashJoinDegreeRight"); - } - NodeBody::DynamicFilter(node) => { - always!(node.left_table, "DynamicFilterLeft"); - always!(node.right_table, "DynamicFilterRight"); - } - - // Aggregation - NodeBody::HashAgg(node) => { - assert_eq!(node.agg_call_states.len(), node.agg_calls.len()); - always!(node.result_table, "HashAggResult"); - for state in &mut node.agg_call_states { - if let agg_call_state::Inner::MaterializedInputState(s) = - state.inner.as_mut().unwrap() - { - always!(s.table, "HashAgg"); - } - } - } - NodeBody::GlobalSimpleAgg(node) => { - assert_eq!(node.agg_call_states.len(), node.agg_calls.len()); - always!(node.result_table, "GlobalSimpleAggResult"); - for state in &mut node.agg_call_states { - if let agg_call_state::Inner::MaterializedInputState(s) = - state.inner.as_mut().unwrap() - { - always!(s.table, "GlobalSimpleAgg"); - } - } - } - - // Top-N - NodeBody::AppendOnlyTopN(node) => { - always!(node.table, "AppendOnlyTopN"); - } - NodeBody::TopN(node) => { - always!(node.table, "TopN"); - } - NodeBody::GroupTopN(node) => { - always!(node.table, "GroupTopN"); - } - - // Source - NodeBody::Source(node) => { - if let Some(source) = &mut node.source_inner { - always!(source.state_table, "Source"); - } - } - NodeBody::Now(node) => { - always!(node.state_table, "Now"); - } - - // Shared arrangement - NodeBody::Arrange(node) => { - always!(node.table, "Arrange"); - } - - // Note: add internal tables for new nodes here. - _ => {} - } - }) -} +pub use actor::{ActorGraphBuildResult, ActorGraphBuilder}; +pub use fragment::{CompleteStreamFragmentGraph, StreamFragmentGraph}; +pub use schedule::Locations; +pub use visit::visit_fragment; diff --git a/src/meta/src/stream/stream_graph/actor.rs b/src/meta/src/stream/stream_graph/actor.rs new file mode 100644 index 0000000000000..ebcfcbaee8bc8 --- /dev/null +++ b/src/meta/src/stream/stream_graph/actor.rs @@ -0,0 +1,839 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::{BTreeMap, HashMap}; +use std::num::NonZeroUsize; +use std::sync::Arc; + +use assert_matches::assert_matches; +use itertools::Itertools; +use risingwave_common::bail; +use risingwave_common::buffer::Bitmap; +use risingwave_common::hash::{ActorId, ActorMapping, ParallelUnitId}; +use risingwave_common::util::iter_util::ZipEqFast; +use risingwave_pb::meta::table_fragments::Fragment; +use risingwave_pb::stream_plan::stream_node::NodeBody; +use risingwave_pb::stream_plan::update_mutation::MergeUpdate; +use risingwave_pb::stream_plan::{Dispatcher, DispatcherType, MergeNode, StreamActor, StreamNode}; + +use super::id::GlobalFragmentIdsExt; +use super::Locations; +use crate::manager::{IdGeneratorManagerRef, StreamingClusterInfo, StreamingJob}; +use crate::model::{DispatcherId, FragmentId}; +use crate::storage::MetaStore; +use crate::stream::stream_graph::fragment::{ + CompleteStreamFragmentGraph, EdgeId, EitherFragment, StreamFragmentEdge, +}; +use crate::stream::stream_graph::id::{GlobalActorId, GlobalActorIdGen, GlobalFragmentId}; +use crate::stream::stream_graph::schedule; +use crate::stream::stream_graph::schedule::Distribution; +use crate::MetaResult; + +/// The upstream information of an actor during the building process. This will eventually be used +/// to create the `MergeNode`s as the leaf executor of each actor. +#[derive(Debug, Clone)] +struct ActorUpstream { + /// The ID of this edge. + edge_id: EdgeId, + + /// Upstream actors. + actors: Vec, + + /// The fragment ID of this upstream. + fragment_id: GlobalFragmentId, +} + +/// [`ActorBuilder`] builds a stream actor in a stream DAG. +#[derive(Debug)] +struct ActorBuilder { + /// The ID of this actor. + actor_id: GlobalActorId, + + /// The fragment ID of this actor. + fragment_id: GlobalFragmentId, + + /// The body of this actor, verbatim from the frontend. + /// + /// This cannot be directly used for execution, and it will be rewritten after we know all of + /// the upstreams and downstreams in the end. See `rewrite`. + nodes: Arc, + + /// The dispatchers to the downstream actors. + downstreams: HashMap, + + /// The upstream actors. + upstreams: HashMap, + + /// The virtual node bitmap, if this fragment is hash distributed. + vnode_bitmap: Option, +} + +impl ActorBuilder { + fn new( + actor_id: GlobalActorId, + fragment_id: GlobalFragmentId, + vnode_bitmap: Option, + node: Arc, + ) -> Self { + Self { + actor_id, + fragment_id, + nodes: node, + downstreams: HashMap::new(), + upstreams: HashMap::new(), + vnode_bitmap, + } + } + + fn fragment_id(&self) -> GlobalFragmentId { + self.fragment_id + } + + /// Add a dispatcher to this actor. + fn add_dispatcher(&mut self, dispatcher: Dispatcher) { + self.downstreams + .try_insert(dispatcher.dispatcher_id, dispatcher) + .unwrap(); + } + + /// Add an upstream to this actor. + fn add_upstream(&mut self, upstream: ActorUpstream) { + self.upstreams + .try_insert(upstream.edge_id, upstream) + .unwrap(); + } + + /// Rewrite the actor body. + /// + /// During this process, the following things will be done: + /// 1. Replace the logical `Exchange` in node's input with `Merge`, which can be executed on the + /// compute nodes. + /// 2. Fill the upstream mview info of the `Merge` node under the `Chain` node. + fn rewrite(&self) -> MetaResult { + self.rewrite_inner(&self.nodes, 0) + } + + fn rewrite_inner(&self, stream_node: &StreamNode, depth: usize) -> MetaResult { + match stream_node.get_node_body()? { + // Leaf node `Exchange`. + NodeBody::Exchange(exchange) => { + // The exchange node should always be the bottom of the plan node. If we find one + // when the depth is 0, it means that the plan node is not well-formed. + if depth == 0 { + bail!( + "there should be no ExchangeNode on the top of the plan node: {:#?}", + stream_node + ) + } + assert!(!stream_node.get_fields().is_empty()); + assert!(stream_node.input.is_empty()); + + // Index the upstreams by the an internal edge ID. + let upstreams = &self.upstreams[&EdgeId::Internal { + link_id: stream_node.get_operator_id(), + }]; + + Ok(StreamNode { + node_body: Some(NodeBody::Merge(MergeNode { + upstream_actor_id: upstreams.actors.as_global_ids(), + upstream_fragment_id: upstreams.fragment_id.as_global_id(), + upstream_dispatcher_type: exchange.get_strategy()?.r#type, + fields: stream_node.get_fields().clone(), + })), + identity: "MergeExecutor".to_string(), + ..stream_node.clone() + }) + } + + // "Leaf" node `Chain`. + NodeBody::Chain(chain_node) => { + let input = stream_node.get_input(); + assert_eq!(input.len(), 2); + + let merge_node = &input[0]; + assert_matches!(merge_node.node_body, Some(NodeBody::Merge(_))); + let batch_plan_node = &input[1]; + assert_matches!(batch_plan_node.node_body, Some(NodeBody::BatchPlan(_))); + + // Index the upstreams by the an external edge ID. + let upstreams = &self.upstreams[&EdgeId::UpstreamExternal { + upstream_table_id: chain_node.table_id, + downstream_fragment_id: self.fragment_id, + }]; + + // As we always use the `NoShuffle` exchange for MV on MV, there should be only one + // upstream. + let upstream_actor_id = upstreams.actors.as_global_ids(); + assert_eq!(upstream_actor_id.len(), 1); + + let chain_input = vec![ + // Fill the merge node with correct upstream info. + StreamNode { + input: vec![], + stream_key: merge_node.stream_key.clone(), + node_body: Some(NodeBody::Merge(MergeNode { + upstream_actor_id, + upstream_fragment_id: upstreams.fragment_id.as_global_id(), + upstream_dispatcher_type: DispatcherType::NoShuffle as _, + fields: chain_node.upstream_fields.clone(), + })), + fields: chain_node.upstream_fields.clone(), + operator_id: merge_node.operator_id, + identity: "MergeExecutor".to_string(), + append_only: stream_node.append_only, + }, + batch_plan_node.clone(), + ]; + + Ok(StreamNode { + input: chain_input, + identity: "ChainExecutor".to_string(), + fields: chain_node.upstream_fields.clone(), + ..stream_node.clone() + }) + } + + // For other nodes, visit the children recursively. + _ => { + let mut new_stream_node = stream_node.clone(); + for (input, new_input) in stream_node + .input + .iter() + .zip_eq_fast(&mut new_stream_node.input) + { + *new_input = self.rewrite_inner(input, depth + 1)?; + } + Ok(new_stream_node) + } + } + } + + /// Build an actor after all the upstreams and downstreams are processed. + fn build(self, job: &StreamingJob) -> MetaResult { + let rewritten_nodes = self.rewrite()?; + + // TODO: store each upstream separately + let upstream_actor_id = self + .upstreams + .into_values() + .flat_map(|ActorUpstream { actors, .. }| actors.as_global_ids()) + .collect(); + + Ok(StreamActor { + actor_id: self.actor_id.as_global_id(), + fragment_id: self.fragment_id.as_global_id(), + nodes: Some(rewritten_nodes), + dispatcher: self.downstreams.into_values().collect(), + upstream_actor_id, + vnode_bitmap: self.vnode_bitmap.map(|b| b.to_protobuf()), + mview_definition: job.mview_definition(), + }) + } +} + +/// The required changes to an existing external actor to build the graph of a streaming job. +/// +/// For example, when we're creating an mview on an existing mview, we need to add new downstreams +/// to the upstream actors, by adding new dispatchers. +#[derive(Default)] +struct ExternalChange { + /// The new downstreams to be added, indexed by the dispatcher ID. + new_downstreams: HashMap, + + /// The new upstreams to be added (replaced), indexed by the upstream fragment ID. + new_upstreams: HashMap, +} + +impl ExternalChange { + /// Add a dispatcher to the external actor. + fn add_dispatcher(&mut self, dispatcher: Dispatcher) { + self.new_downstreams + .try_insert(dispatcher.dispatcher_id, dispatcher) + .unwrap(); + } + + /// Add an upstream to the external actor. + fn add_upstream(&mut self, upstream: ActorUpstream) { + self.new_upstreams + .try_insert(upstream.fragment_id, upstream) + .unwrap(); + } +} + +/// The parallel unit location of actors. +type ActorLocations = BTreeMap; + +/// The actual mutable state of building an actor graph. +/// +/// When the fragments are visited in a topological order, actor builders will be added to this +/// state and the scheduled locations will be added. As the building process is run on the +/// **complete graph** which also contains the info of the existing (external) fragments, the info +/// of them will be also recorded. +#[derive(Default)] +struct ActorGraphBuildStateInner { + /// The builders of the actors to be built. + actor_builders: BTreeMap, + + /// The scheduled locations of the actors to be built. + building_locations: ActorLocations, + + /// The required changes to the external actors. See [`ExternalChange`]. + external_changes: BTreeMap, + + /// The actual locations of the external actors. + external_locations: ActorLocations, +} + +/// The information of a fragment, used for parameter passing for `Inner::add_link`. +struct FragmentLinkNode<'a> { + fragment_id: GlobalFragmentId, + actor_ids: &'a [GlobalActorId], + distribution: &'a Distribution, +} + +impl ActorGraphBuildStateInner { + /// Insert new generated actor and record its location. + /// + /// The `vnode_bitmap` should be `Some` for the actors of hash-distributed fragments. + fn add_actor( + &mut self, + actor_id: GlobalActorId, + fragment_id: GlobalFragmentId, + parallel_unit_id: ParallelUnitId, + vnode_bitmap: Option, + node: Arc, + ) { + self.actor_builders + .try_insert( + actor_id, + ActorBuilder::new(actor_id, fragment_id, vnode_bitmap, node), + ) + .unwrap(); + + self.building_locations + .try_insert(actor_id, parallel_unit_id) + .unwrap(); + } + + /// Record the location of an external actor. + fn record_external_location( + &mut self, + actor_id: GlobalActorId, + parallel_unit_id: ParallelUnitId, + ) { + self.external_locations + .try_insert(actor_id, parallel_unit_id) + .unwrap(); + } + + /// Create a new hash dispatcher. + fn new_hash_dispatcher( + column_indices: &[u32], + downstream_fragment_id: GlobalFragmentId, + downstream_actors: &[GlobalActorId], + downstream_actor_mapping: ActorMapping, + ) -> Dispatcher { + Dispatcher { + r#type: DispatcherType::Hash as _, + column_indices: column_indices.to_vec(), + hash_mapping: Some(downstream_actor_mapping.to_protobuf()), + dispatcher_id: downstream_fragment_id.as_global_id() as u64, + downstream_actor_id: downstream_actors.as_global_ids(), + } + } + + /// Create a new dispatcher for non-hash types. + fn new_normal_dispatcher( + dispatcher_type: DispatcherType, + downstream_fragment_id: GlobalFragmentId, + downstream_actors: &[GlobalActorId], + ) -> Dispatcher { + assert_ne!(dispatcher_type, DispatcherType::Hash); + Dispatcher { + r#type: dispatcher_type as _, + column_indices: Vec::new(), + hash_mapping: None, + dispatcher_id: downstream_fragment_id.as_global_id() as u64, + downstream_actor_id: downstream_actors.as_global_ids(), + } + } + + /// Add the new dispatcher for an actor. + /// + /// - If the actor is to be built, the dispatcher will be added to the actor builder. + /// - If the actor is an external actor, the dispatcher will be added to the external changes. + fn add_dispatcher(&mut self, actor_id: GlobalActorId, dispatcher: Dispatcher) { + if let Some(actor_builder) = self.actor_builders.get_mut(&actor_id) { + actor_builder.add_dispatcher(dispatcher); + } else { + self.external_changes + .entry(actor_id) + .or_default() + .add_dispatcher(dispatcher); + } + } + + /// Add the new upstream for an actor. + /// + /// - If the actor is to be built, the upstream will be added to the actor builder. + /// - If the actor is an external actor, the upstream will be added to the external changes. + fn add_upstream(&mut self, actor_id: GlobalActorId, upstream: ActorUpstream) { + if let Some(actor_builder) = self.actor_builders.get_mut(&actor_id) { + actor_builder.add_upstream(upstream); + } else { + self.external_changes + .entry(actor_id) + .or_default() + .add_upstream(upstream); + } + } + + /// Get the location of an actor. Will look up the location map of both the actors to be built + /// and the external actors. + fn get_location(&self, actor_id: GlobalActorId) -> ParallelUnitId { + self.building_locations + .get(&actor_id) + .copied() + .or_else(|| self.external_locations.get(&actor_id).copied()) + .unwrap() + } + + /// Add a "link" between two fragments in the graph. + /// + /// The `edge` will be expanded into multiple (downstream - upstream) pairs for the actors in + /// the two fragments, based on the distribution and the dispatch strategy. They will be + /// finally transformed to `Dispatcher` and `Merge` nodes when building the actors. + /// + /// If there're existing (external) fragments, the info will be recorded in `external_changes`, + /// instead of the actor builders. + fn add_link<'a>( + &mut self, + upstream: FragmentLinkNode<'a>, + downstream: FragmentLinkNode<'a>, + edge: &'a StreamFragmentEdge, + ) { + let dt = edge.dispatch_strategy.r#type(); + + match dt { + // For `NoShuffle`, make n "1-1" links between the actors. + DispatcherType::NoShuffle => { + for (upstream_id, downstream_id) in upstream + .actor_ids + .iter() + .zip_eq_fast(downstream.actor_ids.iter()) + { + // Assert that the each actor pair is in the same location. + let upstream_location = self.get_location(*upstream_id); + let downstream_location = self.get_location(*downstream_id); + assert_eq!(upstream_location, downstream_location); + + // Create a new dispatcher just between these two actors. + self.add_dispatcher( + *upstream_id, + Self::new_normal_dispatcher(dt, downstream.fragment_id, &[*downstream_id]), + ); + + // Also record the upstream for the downstream actor. + self.add_upstream( + *downstream_id, + ActorUpstream { + edge_id: edge.id, + actors: vec![*upstream_id], + fragment_id: upstream.fragment_id, + }, + ); + } + } + + // Otherwise, make m * n links between the actors. + DispatcherType::Hash | DispatcherType::Broadcast | DispatcherType::Simple => { + // Add dispatchers for the upstream actors. + let dispatcher = if let DispatcherType::Hash = dt { + // Transform the `ParallelUnitMapping` from the downstream distribution to the + // `ActorMapping`, used for the `HashDispatcher` for the upstream actors. + let downstream_locations: HashMap = downstream + .actor_ids + .iter() + .map(|&actor_id| (self.get_location(actor_id), actor_id.as_global_id())) + .collect(); + let actor_mapping = downstream + .distribution + .as_hash() + .unwrap() + .to_actor(&downstream_locations); + + Self::new_hash_dispatcher( + &edge.dispatch_strategy.column_indices, + downstream.fragment_id, + downstream.actor_ids, + actor_mapping, + ) + } else { + Self::new_normal_dispatcher(dt, downstream.fragment_id, downstream.actor_ids) + }; + for upstream_id in upstream.actor_ids { + self.add_dispatcher(*upstream_id, dispatcher.clone()); + } + + // Add upstreams for the downstream actors. + let actor_upstream = ActorUpstream { + edge_id: edge.id, + actors: upstream.actor_ids.to_vec(), + fragment_id: upstream.fragment_id, + }; + for downstream_id in downstream.actor_ids { + self.add_upstream(*downstream_id, actor_upstream.clone()); + } + } + + DispatcherType::Unspecified => unreachable!(), + } + } +} + +/// The mutable state of building an actor graph. See [`ActorGraphBuildStateInner`]. +struct ActorGraphBuildState { + /// The actual state. + inner: ActorGraphBuildStateInner, + + /// The actor IDs of each fragment. + fragment_actors: HashMap>, + + /// The next local actor id to use. + next_local_id: u32, + + /// The global actor id generator. + actor_id_gen: GlobalActorIdGen, +} + +impl ActorGraphBuildState { + /// Create an empty state with the given id generator. + fn new(actor_id_gen: GlobalActorIdGen) -> Self { + Self { + inner: Default::default(), + fragment_actors: Default::default(), + next_local_id: 0, + actor_id_gen, + } + } + + /// Get the next global actor id. + fn next_actor_id(&mut self) -> GlobalActorId { + let local_id = self.next_local_id; + self.next_local_id += 1; + + self.actor_id_gen.to_global_id(local_id) + } + + /// Finish the build and return the inner state. + fn finish(self) -> ActorGraphBuildStateInner { + // Assert that all the actors are built. + assert_eq!(self.actor_id_gen.len(), self.next_local_id); + + self.inner + } +} + +/// The result of a built actor graph. Will be further embedded into the `Context` for building +/// actors on the compute nodes. +pub struct ActorGraphBuildResult { + /// The graph of sealed fragments, including all actors. + pub graph: BTreeMap, + + /// The scheduled locations of the actors to be built. + pub building_locations: Locations, + + /// The actual locations of the external actors. + pub existing_locations: Locations, + + /// The new dispatchers to be added to the upstream mview actors. Used for MV on MV. + pub dispatchers: HashMap>, + + /// The updates to be applied to the downstream chain actors. Used for schema change (replace + /// table plan). + pub merge_updates: Vec, +} + +/// [`ActorGraphBuilder`] builds the actor graph for the given complete fragment graph, based on the +/// current cluster info and the required parallelism. +pub struct ActorGraphBuilder { + /// The pre-scheduled distribution for each building fragment. + distributions: HashMap, + + /// The actual distribution for each existing fragment. + existing_distributions: HashMap, + + /// The complete fragment graph. + fragment_graph: CompleteStreamFragmentGraph, + + /// The cluster info for creating a streaming job. + cluster_info: StreamingClusterInfo, +} + +impl ActorGraphBuilder { + /// Create a new actor graph builder with the given "complete" graph. Returns an error if the + /// graph is failed to be scheduled. + pub fn new( + fragment_graph: CompleteStreamFragmentGraph, + cluster_info: StreamingClusterInfo, + default_parallelism: Option, + ) -> MetaResult { + let existing_distributions = fragment_graph.existing_distribution(); + + // Schedule the distribution of all building fragments. + let distributions = schedule::Scheduler::new( + cluster_info.parallel_units.values().cloned(), + default_parallelism, + )? + .schedule(&fragment_graph)?; + + Ok(Self { + distributions, + existing_distributions, + fragment_graph, + cluster_info, + }) + } + + /// Get the distribution of the given fragment. Will look up the distribution map of both the + /// building and existing fragments. + fn get_distribution(&self, fragment_id: GlobalFragmentId) -> &Distribution { + self.distributions + .get(&fragment_id) + .or_else(|| self.existing_distributions.get(&fragment_id)) + .unwrap() + } + + /// Convert the actor location map to the [`Locations`] struct. + fn build_locations(&self, actor_locations: ActorLocations) -> Locations { + let actor_locations = actor_locations + .into_iter() + .map(|(id, p)| { + ( + id.as_global_id(), + self.cluster_info.parallel_units[&p].clone(), + ) + }) + .collect(); + + let worker_locations = self.cluster_info.worker_nodes.clone(); + + Locations { + actor_locations, + worker_locations, + } + } + + /// Build a stream graph by duplicating each fragment as parallel actors. Returns + /// [`ActorGraphBuildResult`] that will be further used to build actors on the compute nodes. + pub async fn generate_graph( + self, + id_gen_manager: IdGeneratorManagerRef, + job: &StreamingJob, + ) -> MetaResult + where + S: MetaStore, + { + // Pre-generate IDs for all actors. + let actor_len = self + .distributions + .values() + .map(|d| d.parallelism()) + .sum::() as u64; + let id_gen = GlobalActorIdGen::new(&id_gen_manager, actor_len).await?; + + // Build the actor graph and get the final state. + let ActorGraphBuildStateInner { + actor_builders, + building_locations, + external_changes, + external_locations, + } = self.build_actor_graph(id_gen)?; + + // Serialize the graph into a map of sealed fragments. + let graph = { + let mut actors: HashMap> = HashMap::new(); + + // As all fragments are processed, we can now `build` the actors where the `Exchange` + // and `Chain` are rewritten. + for builder in actor_builders.into_values() { + let fragment_id = builder.fragment_id(); + let actor = builder.build(job)?; + actors.entry(fragment_id).or_default().push(actor); + } + + actors + .into_iter() + .map(|(fragment_id, actors)| { + let distribution = self.distributions[&fragment_id].clone(); + let fragment = + self.fragment_graph + .seal_fragment(fragment_id, actors, distribution); + let fragment_id = fragment_id.as_global_id(); + (fragment_id, fragment) + }) + .collect() + }; + + // Convert the actor location map to the `Locations` struct. + let building_locations = self.build_locations(building_locations); + let existing_locations = self.build_locations(external_locations); + + // Extract the new dispatchers from the external changes. + let dispatchers = external_changes + .iter() + .map(|(actor_id, change)| { + ( + actor_id.as_global_id(), + change.new_downstreams.values().cloned().collect_vec(), + ) + }) + .filter(|(_, v)| !v.is_empty()) + .collect(); + + // Extract the updates for merge executors from the external changes. + let merge_updates = external_changes + .iter() + .flat_map(|(actor_id, change)| { + change + .new_upstreams + .values() + .map(move |upstream| { + let EdgeId::DownstreamExternal { original_upstream_fragment_id, .. } = upstream.edge_id else { + unreachable!("edge from internal to external must be `DownstreamExternal`") + }; + + MergeUpdate { + actor_id: actor_id.as_global_id(), + upstream_fragment_id: original_upstream_fragment_id.as_global_id(), + new_upstream_fragment_id: Some(upstream.fragment_id.as_global_id()), + added_upstream_actor_id: upstream.actors.as_global_ids(), + removed_upstream_actor_id: vec![], + } + }) + }) + .collect(); + + Ok(ActorGraphBuildResult { + graph, + building_locations, + existing_locations, + dispatchers, + merge_updates, + }) + } + + /// Build actor graph for each fragment, using topological order. + fn build_actor_graph(&self, id_gen: GlobalActorIdGen) -> MetaResult { + let mut state = ActorGraphBuildState::new(id_gen); + + // Use topological sort to build the graph from downstream to upstream. (The first fragment + // popped out from the heap will be the top-most node in plan, or the sink in stream graph.) + for fragment_id in self.fragment_graph.topo_order()? { + self.build_actor_graph_fragment(fragment_id, &mut state)?; + } + + Ok(state.finish()) + } + + /// Build actor graph for a specific fragment. + fn build_actor_graph_fragment( + &self, + fragment_id: GlobalFragmentId, + state: &mut ActorGraphBuildState, + ) -> MetaResult<()> { + let current_fragment = self.fragment_graph.get_fragment(fragment_id); + let distribution = self.get_distribution(fragment_id); + + // First, add or record the actors for the current fragment into the state. + let actor_ids = match current_fragment { + // For building fragments, we need to generate the actor builders. + EitherFragment::Building(current_fragment) => { + let node = Arc::new(current_fragment.node.clone().unwrap()); + let bitmaps = distribution.as_hash().map(|m| m.to_bitmaps()); + + distribution + .parallel_units() + .map(|parallel_unit_id| { + let actor_id = state.next_actor_id(); + let vnode_bitmap = bitmaps.as_ref().map(|m| &m[¶llel_unit_id]).cloned(); + + state.inner.add_actor( + actor_id, + fragment_id, + parallel_unit_id, + vnode_bitmap, + node.clone(), + ); + + actor_id + }) + .collect_vec() + } + + // For existing fragments, we only need to record the actor locations. + EitherFragment::Existing(existing_fragment) => existing_fragment + .actors + .iter() + .map(|a| { + let actor_id = GlobalActorId::new(a.actor_id); + let parallel_unit_id = match &distribution { + Distribution::Singleton(parallel_unit_id) => *parallel_unit_id, + Distribution::Hash(mapping) => mapping + .get_matched(&Bitmap::from(a.get_vnode_bitmap().unwrap())) + .unwrap(), + }; + + state + .inner + .record_external_location(actor_id, parallel_unit_id); + + actor_id + }) + .collect_vec(), + }; + + // Then, add links between the current fragment and its downstream fragments. + for (downstream_fragment_id, edge) in self.fragment_graph.get_downstreams(fragment_id) { + let downstream_actors = state + .fragment_actors + .get(&downstream_fragment_id) + .expect("downstream fragment not processed yet"); + + let downstream_distribution = self.get_distribution(downstream_fragment_id); + + state.inner.add_link( + FragmentLinkNode { + fragment_id, + actor_ids: &actor_ids, + distribution, + }, + FragmentLinkNode { + fragment_id: downstream_fragment_id, + actor_ids: downstream_actors, + distribution: downstream_distribution, + }, + edge, + ); + } + + // Finally, record the actor IDs for the current fragment. + state + .fragment_actors + .try_insert(fragment_id, actor_ids) + .unwrap_or_else(|_| panic!("fragment {:?} is already processed", fragment_id)); + + Ok(()) + } +} diff --git a/src/meta/src/stream/stream_graph/fragment.rs b/src/meta/src/stream/stream_graph/fragment.rs new file mode 100644 index 0000000000000..187300fdceaf8 --- /dev/null +++ b/src/meta/src/stream/stream_graph/fragment.rs @@ -0,0 +1,691 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::{HashMap, HashSet}; +use std::num::NonZeroUsize; +use std::ops::Deref; +use std::sync::LazyLock; + +use anyhow::{anyhow, Context}; +use enum_as_inner::EnumAsInner; +use itertools::Itertools; +use risingwave_common::bail; +use risingwave_common::catalog::{generate_internal_table_name_with_type, TableId}; +use risingwave_pb::catalog::Table; +use risingwave_pb::meta::table_fragments::fragment::FragmentDistributionType; +use risingwave_pb::meta::table_fragments::Fragment; +use risingwave_pb::stream_plan::stream_fragment_graph::{ + Parallelism, StreamFragment, StreamFragmentEdge as StreamFragmentEdgeProto, +}; +use risingwave_pb::stream_plan::stream_node::NodeBody; +use risingwave_pb::stream_plan::{ + DispatchStrategy, DispatcherType, StreamActor, StreamFragmentGraph as StreamFragmentGraphProto, +}; + +use crate::manager::{IdGeneratorManagerRef, StreamingJob}; +use crate::model::FragmentId; +use crate::storage::MetaStore; +use crate::stream::stream_graph::id::{GlobalFragmentId, GlobalFragmentIdGen, GlobalTableIdGen}; +use crate::stream::stream_graph::schedule::Distribution; +use crate::stream::stream_graph::visit; +use crate::MetaResult; + +/// The fragment in the building phase, including the [`StreamFragment`] from the frontend and +/// several additional helper fields. +#[derive(Debug, Clone)] +pub(super) struct BuildingFragment { + /// The fragment structure from the frontend, with the global fragment ID. + inner: StreamFragment, + + /// A clone of the internal tables in this fragment. + internal_tables: Vec
, + + /// The ID of the job if it's materialized in this fragment. + table_id: Option, +} + +impl BuildingFragment { + /// Create a new [`BuildingFragment`] from a [`StreamFragment`]. The global fragment ID and + /// global table IDs will be correctly filled with the given `id` and `table_id_gen`. + fn new( + id: GlobalFragmentId, + fragment: StreamFragment, + job: &StreamingJob, + table_id_gen: GlobalTableIdGen, + ) -> Self { + let mut fragment = StreamFragment { + fragment_id: id.as_global_id(), + ..fragment + }; + let internal_tables = Self::fill_internal_tables(&mut fragment, job, table_id_gen); + let table_id = Self::fill_job(&mut fragment, job).then(|| job.id()); + + Self { + inner: fragment, + internal_tables, + table_id, + } + } + + /// Fill the information of the internal tables in the fragment. + fn fill_internal_tables( + fragment: &mut StreamFragment, + job: &StreamingJob, + table_id_gen: GlobalTableIdGen, + ) -> Vec
{ + let fragment_id = fragment.fragment_id; + let mut internal_tables = Vec::new(); + + visit::visit_internal_tables(fragment, |table, table_type_name| { + table.id = table_id_gen.to_global_id(table.id).as_global_id(); + table.schema_id = job.schema_id(); + table.database_id = job.database_id(); + table.name = generate_internal_table_name_with_type( + &job.name(), + fragment_id, + table.id, + table_type_name, + ); + table.fragment_id = fragment_id; + + // Record the internal table. + internal_tables.push(table.clone()); + }); + + internal_tables + } + + /// Fill the information of the job in the fragment. + fn fill_job(fragment: &mut StreamFragment, job: &StreamingJob) -> bool { + let table_id = job.id(); + let fragment_id = fragment.fragment_id; + let mut has_table = false; + + visit::visit_fragment(fragment, |node_body| match node_body { + NodeBody::Materialize(materialize_node) => { + materialize_node.table_id = table_id; + + // Fill the ID of the `Table`. + let table = materialize_node.table.as_mut().unwrap(); + table.id = table_id; + table.database_id = job.database_id(); + table.schema_id = job.schema_id(); + table.fragment_id = fragment_id; + + has_table = true; + } + NodeBody::Sink(sink_node) => { + sink_node.sink_desc.as_mut().unwrap().id = table_id; + + has_table = true; + } + NodeBody::Dml(dml_node) => { + dml_node.table_id = table_id; + } + _ => {} + }); + + has_table + } +} + +impl Deref for BuildingFragment { + type Target = StreamFragment; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +/// The ID of an edge in the fragment graph. For different types of edges, the ID will be in +/// different variants. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EnumAsInner)] +pub(super) enum EdgeId { + /// The edge between two building (internal) fragments. + Internal { + /// The ID generated by the frontend, generally the operator ID of `Exchange`. + /// See [`StreamFragmentEdgeProto`]. + link_id: u64, + }, + + /// The edge between an upstream external fragment and downstream building fragment. Used for + /// MV on MV. + UpstreamExternal { + /// The ID of the upstream table or materialized view. + upstream_table_id: u32, + /// The ID of the downstream fragment. + downstream_fragment_id: GlobalFragmentId, + }, + + /// The edge between an upstream building fragment and downstream external fragment. Used for + /// schema change (replace table plan). + DownstreamExternal { + /// The ID of the original upstream fragment (`Materialize`). + original_upstream_fragment_id: GlobalFragmentId, + /// The ID of the downstream fragment. + downstream_fragment_id: GlobalFragmentId, + }, +} + +/// The edge in the fragment graph. +/// +/// The edge can be either internal or external. This is distinguished by the [`EdgeId`]. +#[derive(Debug, Clone)] +pub(super) struct StreamFragmentEdge { + /// The ID of the edge. + pub id: EdgeId, + + /// The strategy used for dispatching the data. + pub dispatch_strategy: DispatchStrategy, +} + +impl StreamFragmentEdge { + fn from_protobuf(edge: &StreamFragmentEdgeProto) -> Self { + Self { + // By creating an edge from the protobuf, we know that the edge is from the frontend and + // is internal. + id: EdgeId::Internal { + link_id: edge.link_id, + }, + dispatch_strategy: edge.get_dispatch_strategy().unwrap().clone(), + } + } +} + +/// In-memory representation of a **Fragment** Graph, built from the [`StreamFragmentGraphProto`] +/// from the frontend. +#[derive(Default)] +pub struct StreamFragmentGraph { + /// stores all the fragments in the graph. + fragments: HashMap, + + /// stores edges between fragments: upstream => downstream. + downstreams: HashMap>, + + /// stores edges between fragments: downstream -> upstream. + upstreams: HashMap>, + + /// Dependent relations of this job. + dependent_relations: HashSet, + + /// The default parallelism of the job, specified by the `STREAMING_PARALLELISM` session + /// variable. If not specified, all active parallel units will be used. + default_parallelism: Option, +} + +impl StreamFragmentGraph { + /// Create a new [`StreamFragmentGraph`] from the given [`StreamFragmentGraphProto`], with all + /// global IDs correctly filled. + pub async fn new( + proto: StreamFragmentGraphProto, + id_gen: IdGeneratorManagerRef, + job: &StreamingJob, + ) -> MetaResult { + let fragment_id_gen = + GlobalFragmentIdGen::new(&id_gen, proto.fragments.len() as u64).await?; + let table_id_gen = GlobalTableIdGen::new(&id_gen, proto.table_ids_cnt as u64).await?; + + // Create nodes. + let fragments: HashMap<_, _> = proto + .fragments + .into_iter() + .map(|(id, fragment)| { + let id = fragment_id_gen.to_global_id(id); + let fragment = BuildingFragment::new(id, fragment, job, table_id_gen); + (id, fragment) + }) + .collect(); + + assert_eq!( + fragments + .values() + .map(|f| f.internal_tables.len() as u32) + .sum::(), + proto.table_ids_cnt + ); + + // Create edges. + let mut downstreams = HashMap::new(); + let mut upstreams = HashMap::new(); + + for edge in proto.edges { + let upstream_id = fragment_id_gen.to_global_id(edge.upstream_id); + let downstream_id = fragment_id_gen.to_global_id(edge.downstream_id); + let edge = StreamFragmentEdge::from_protobuf(&edge); + + upstreams + .entry(downstream_id) + .or_insert_with(HashMap::new) + .try_insert(upstream_id, edge.clone()) + .unwrap(); + downstreams + .entry(upstream_id) + .or_insert_with(HashMap::new) + .try_insert(downstream_id, edge) + .unwrap(); + } + + // Note: Here we directly use the field `dependent_table_ids` in the proto (resolved in + // frontend), instead of visiting the graph ourselves. Note that for creating table with a + // connector, the source itself is NOT INCLUDED in this list. + let dependent_relations = proto + .dependent_table_ids + .iter() + .map(TableId::from) + .collect(); + + let default_parallelism = if let Some(Parallelism { parallelism }) = proto.parallelism { + Some(NonZeroUsize::new(parallelism as usize).context("parallelism should not be 0")?) + } else { + None + }; + + Ok(Self { + fragments, + downstreams, + upstreams, + dependent_relations, + default_parallelism, + }) + } + + /// Retrieve the internal tables map of the whole graph. + pub fn internal_tables(&self) -> HashMap { + let mut tables = HashMap::new(); + for fragment in self.fragments.values() { + for table in &fragment.internal_tables { + tables + .try_insert(table.id, table.clone()) + .unwrap_or_else(|_| panic!("duplicated table id `{}`", table.id)); + } + } + tables + } + + /// Returns the fragment id where the table is materialized. + pub fn table_fragment_id(&self) -> FragmentId { + self.fragments + .values() + .filter(|b| b.table_id.is_some()) + .map(|b| b.fragment_id) + .exactly_one() + .expect("require exactly 1 materialize/sink node when creating the streaming job") + } + + /// Get the dependent relations of this job. + pub fn dependent_relations(&self) -> &HashSet { + &self.dependent_relations + } + + /// Get the default parallelism of the job. + pub fn default_parallelism(&self) -> Option { + self.default_parallelism + } + + /// Get downstreams of a fragment. + fn get_downstreams( + &self, + fragment_id: GlobalFragmentId, + ) -> &HashMap { + self.downstreams.get(&fragment_id).unwrap_or(&EMPTY_HASHMAP) + } + + /// Get upstreams of a fragment. + fn get_upstreams( + &self, + fragment_id: GlobalFragmentId, + ) -> &HashMap { + self.upstreams.get(&fragment_id).unwrap_or(&EMPTY_HASHMAP) + } +} + +static EMPTY_HASHMAP: LazyLock> = + LazyLock::new(HashMap::new); + +/// A fragment that is either being built or already exists. Used for generalize the logic of +/// [`ActorGraphBuilder`]. +#[derive(Debug, Clone, EnumAsInner)] +pub(super) enum EitherFragment { + /// An internal fragment that is being built for the current streaming job. + Building(BuildingFragment), + + /// An existing fragment that is external but connected to the fragments being built. + Existing(Fragment), +} + +/// A wrapper of [`StreamFragmentGraph`] that contains the additional information of existing +/// fragments, which is connected to the graph's top-most or bottom-most fragments. +/// +/// For example, +/// - if we're going to build a mview on an existing mview, the upstream fragment containing the +/// `Materialize` node will be included in this structure. +/// - if we're going to replace the plan of a table with downstream mviews, the downstream fragments +/// containing the `Chain` nodes will be included in this structure. +pub struct CompleteStreamFragmentGraph { + /// The fragment graph of the streaming job being built. + building_graph: StreamFragmentGraph, + + /// The required information of existing fragments. + existing_fragments: HashMap, + + /// Extra edges between existing fragments and the building fragments. + extra_downstreams: HashMap>, + + /// Extra edges between existing fragments and the building fragments. + extra_upstreams: HashMap>, +} + +impl CompleteStreamFragmentGraph { + /// Create a new [`CompleteStreamFragmentGraph`] with empty existing fragments, i.e., there's no + /// upstream mviews. + #[cfg(test)] + pub fn for_test(graph: StreamFragmentGraph) -> Self { + Self { + building_graph: graph, + existing_fragments: Default::default(), + extra_downstreams: Default::default(), + extra_upstreams: Default::default(), + } + } + + /// Create a new [`CompleteStreamFragmentGraph`] for MV on MV, with the upstream existing + /// `Materialize` fragments. + pub fn with_upstreams( + graph: StreamFragmentGraph, + upstream_mview_fragments: HashMap, + ) -> MetaResult { + let mut extra_downstreams = HashMap::new(); + let mut extra_upstreams = HashMap::new(); + + // Build the extra edges between the upstream `Materialize` and the downstream `Chain` of + // the new materialized view. + for (&id, fragment) in &graph.fragments { + for &upstream_table_id in &fragment.upstream_table_ids { + let mview_fragment = upstream_mview_fragments + .get(&TableId::new(upstream_table_id)) + .context("upstream materialized view fragment not found")?; + let mview_id = GlobalFragmentId::new(mview_fragment.fragment_id); + + let edge = StreamFragmentEdge { + id: EdgeId::UpstreamExternal { + upstream_table_id, + downstream_fragment_id: id, + }, + // We always use `NoShuffle` for the exchange between the upstream `Materialize` + // and the downstream `Chain` of the new materialized view. + dispatch_strategy: DispatchStrategy { + r#type: DispatcherType::NoShuffle as _, + ..Default::default() + }, + }; + + extra_downstreams + .entry(mview_id) + .or_insert_with(HashMap::new) + .try_insert(id, edge.clone()) + .unwrap(); + extra_upstreams + .entry(id) + .or_insert_with(HashMap::new) + .try_insert(mview_id, edge) + .unwrap(); + } + } + + let existing_fragments = upstream_mview_fragments + .into_values() + .map(|f| (GlobalFragmentId::new(f.fragment_id), f)) + .collect(); + + Ok(Self { + building_graph: graph, + existing_fragments, + extra_downstreams, + extra_upstreams, + }) + } + + /// Create a new [`CompleteStreamFragmentGraph`] for replacing an existing table, with the + /// downstream existing `Chain` fragments. + pub fn with_downstreams( + graph: StreamFragmentGraph, + downstream_fragments: Vec, + ) -> MetaResult { + let mut extra_downstreams = HashMap::new(); + let mut extra_upstreams = HashMap::new(); + + let original_table_fragment_id = GlobalFragmentId::new( + downstream_fragments + .iter() + .flat_map(|f| f.upstream_fragment_ids.iter().copied()) + .unique() + .exactly_one() + .map_err(|_| { + anyhow!("downstream fragments must have exactly one upstream fragment") + })?, + ); + + let table_fragment_id = GlobalFragmentId::new(graph.table_fragment_id()); + + // Build the extra edges between the `Materialize` and the downstream `Chain` of the + // existing materialized views. + for fragment in &downstream_fragments { + let id = GlobalFragmentId::new(fragment.fragment_id); + + let edge = StreamFragmentEdge { + id: EdgeId::DownstreamExternal { + original_upstream_fragment_id: original_table_fragment_id, + downstream_fragment_id: id, + }, + // We always use `NoShuffle` for the exchange between the upstream `Materialize` + // and the downstream `Chain` of the new materialized view. + dispatch_strategy: DispatchStrategy { + r#type: DispatcherType::NoShuffle as _, + ..Default::default() + }, + }; + + extra_downstreams + .entry(table_fragment_id) + .or_insert_with(HashMap::new) + .try_insert(id, edge.clone()) + .unwrap(); + extra_upstreams + .entry(id) + .or_insert_with(HashMap::new) + .try_insert(table_fragment_id, edge) + .unwrap(); + } + + let existing_fragments = downstream_fragments + .into_iter() + .map(|f| (GlobalFragmentId::new(f.fragment_id), f)) + .collect(); + + Ok(Self { + building_graph: graph, + existing_fragments, + extra_downstreams, + extra_upstreams, + }) + } +} + +impl CompleteStreamFragmentGraph { + /// Returns **all** fragment IDs in the complete graph, including the ones that are not in the + /// building graph. + pub(super) fn all_fragment_ids(&self) -> impl Iterator + '_ { + self.building_graph + .fragments + .keys() + .chain(self.existing_fragments.keys()) + .copied() + } + + /// Returns an iterator of **all** edges in the complete graph, including the external edges. + pub(super) fn all_edges( + &self, + ) -> impl Iterator + '_ { + self.building_graph + .downstreams + .iter() + .chain(self.extra_downstreams.iter()) + .flat_map(|(&from, tos)| tos.iter().map(move |(&to, edge)| (from, to, edge))) + } + + /// Returns the distribution of the existing fragments. + pub(super) fn existing_distribution(&self) -> HashMap { + self.existing_fragments + .iter() + .map(|(&id, f)| (id, Distribution::from_fragment(f))) + .collect() + } + + /// Generate topological order of **all** fragments in this graph, including the ones that are + /// not in the building graph. Returns error if the graph is not a DAG and topological sort can + /// not be done. + /// + /// For MV on MV, the first fragment popped out from the heap will be the top-most node, or the + /// `Sink` / `Materialize` in stream graph. + pub(super) fn topo_order(&self) -> MetaResult> { + let mut topo = Vec::new(); + let mut downstream_cnts = HashMap::new(); + + // Iterate all fragments. + for fragment_id in self.all_fragment_ids() { + // Count how many downstreams we have for a given fragment. + let downstream_cnt = self.get_downstreams(fragment_id).count(); + if downstream_cnt == 0 { + topo.push(fragment_id); + } else { + downstream_cnts.insert(fragment_id, downstream_cnt); + } + } + + let mut i = 0; + while let Some(&fragment_id) = topo.get(i) { + i += 1; + // Find if we can process more fragments. + for (upstream_id, _) in self.get_upstreams(fragment_id) { + let downstream_cnt = downstream_cnts.get_mut(&upstream_id).unwrap(); + *downstream_cnt -= 1; + if *downstream_cnt == 0 { + downstream_cnts.remove(&upstream_id); + topo.push(upstream_id); + } + } + } + + if !downstream_cnts.is_empty() { + // There are fragments that are not processed yet. + bail!("graph is not a DAG"); + } + + Ok(topo) + } + + /// Seal a [`BuildingFragment`] from the graph into a [`Fragment`], which will be further used + /// to build actors on the compute nodes and persist into meta store. + pub(super) fn seal_fragment( + &self, + id: GlobalFragmentId, + actors: Vec, + distribution: Distribution, + ) -> Fragment { + let BuildingFragment { + inner, + internal_tables, + table_id, + } = self.get_fragment(id).into_building().unwrap(); + + let distribution_type = if inner.is_singleton { + FragmentDistributionType::Single + } else { + FragmentDistributionType::Hash + } as i32; + + let state_table_ids = internal_tables + .iter() + .map(|t| t.id) + .chain(table_id) + .collect(); + + let upstream_fragment_ids = self + .get_upstreams(id) + .map(|(id, _)| id.as_global_id()) + .collect(); + + Fragment { + fragment_id: inner.fragment_id, + fragment_type_mask: inner.fragment_type_mask, + distribution_type, + actors, + vnode_mapping: Some(distribution.into_mapping().to_protobuf()), + state_table_ids, + upstream_fragment_ids, + } + } + + /// Get a fragment from the complete graph, which can be either a building fragment or an + /// existing fragment. + pub(super) fn get_fragment(&self, fragment_id: GlobalFragmentId) -> EitherFragment { + if let Some(fragment) = self.existing_fragments.get(&fragment_id) { + EitherFragment::Existing(fragment.clone()) + } else { + EitherFragment::Building( + self.building_graph + .fragments + .get(&fragment_id) + .unwrap() + .clone(), + ) + } + } + + /// Get **all** downstreams of a fragment, including the ones that are not in the building + /// graph. + pub(super) fn get_downstreams( + &self, + fragment_id: GlobalFragmentId, + ) -> impl Iterator { + self.building_graph + .get_downstreams(fragment_id) + .iter() + .chain( + self.extra_downstreams + .get(&fragment_id) + .into_iter() + .flatten(), + ) + .map(|(&id, edge)| (id, edge)) + } + + /// Get **all** upstreams of a fragment, including the ones that are not in the building + /// graph. + pub(super) fn get_upstreams( + &self, + fragment_id: GlobalFragmentId, + ) -> impl Iterator { + self.building_graph + .get_upstreams(fragment_id) + .iter() + .chain(self.extra_upstreams.get(&fragment_id).into_iter().flatten()) + .map(|(&id, edge)| (id, edge)) + } + + /// Returns all building fragments in the graph. + pub(super) fn building_fragments(&self) -> &HashMap { + &self.building_graph.fragments + } +} diff --git a/src/meta/src/stream/stream_graph/id.rs b/src/meta/src/stream/stream_graph/id.rs new file mode 100644 index 0000000000000..b468637142150 --- /dev/null +++ b/src/meta/src/stream/stream_graph/id.rs @@ -0,0 +1,90 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::manager::{IdCategory, IdCategoryType, IdGeneratorManager}; +use crate::storage::MetaStore; +use crate::MetaResult; + +/// A wrapper to distinguish global ID generated by the [`IdGeneratorManager`] and the local ID from +/// the frontend. +#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, PartialOrd, Ord)] +pub(super) struct GlobalId(u32); + +impl GlobalId { + pub const fn new(id: u32) -> Self { + Self(id) + } + + pub fn as_global_id(&self) -> u32 { + self.0 + } +} + +impl From for GlobalId { + fn from(id: u32) -> Self { + Self(id) + } +} + +/// Utility for converting local IDs into pre-allocated global IDs by adding an `offset`. +/// +/// This requires the local IDs exactly a permutation of the range `[0, len)`. +#[derive(Clone, Copy, Debug)] +pub(super) struct GlobalIdGen { + offset: u32, + len: u32, +} + +impl GlobalIdGen { + /// Pre-allocate a range of IDs with the given `len` and return the generator. + pub async fn new(id_gen: &IdGeneratorManager, len: u64) -> MetaResult { + let offset = id_gen.generate_interval::(len).await?; + Ok(Self { + offset: offset as u32, + len: len as u32, + }) + } + + /// Convert local id to global id. Panics if `id >= len`. + pub fn to_global_id(self, local_id: u32) -> GlobalId { + assert!( + local_id < self.len, + "id {} is out of range (len: {})", + local_id, + self.len + ); + GlobalId(local_id + self.offset) + } + + /// Returns the length of this ID generator. + pub fn len(&self) -> u32 { + self.len + } +} + +pub(super) type GlobalFragmentId = GlobalId<{ IdCategory::Fragment }>; +pub(super) type GlobalFragmentIdGen = GlobalIdGen<{ IdCategory::Fragment }>; + +pub(super) type GlobalTableIdGen = GlobalIdGen<{ IdCategory::Table }>; + +pub(super) type GlobalActorId = GlobalId<{ IdCategory::Actor }>; +pub(super) type GlobalActorIdGen = GlobalIdGen<{ IdCategory::Actor }>; + +/// Extension for converting a slice of [`GlobalActorId`] to a vector of global IDs. +#[easy_ext::ext(GlobalFragmentIdsExt)] +pub(super) impl> A { + fn as_global_ids(&self) -> Vec { + self.as_ref().iter().map(|x| x.as_global_id()).collect() + } +} diff --git a/src/meta/src/stream/stream_graph/schedule.rs b/src/meta/src/stream/stream_graph/schedule.rs new file mode 100644 index 0000000000000..631fafa17065d --- /dev/null +++ b/src/meta/src/stream/stream_graph/schedule.rs @@ -0,0 +1,492 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![allow( + clippy::collapsible_if, + clippy::explicit_iter_loop, + reason = "generated by crepe" +)] + +use std::collections::{BTreeMap, HashMap, LinkedList}; +use std::num::NonZeroUsize; + +use either::Either; +use enum_as_inner::EnumAsInner; +use itertools::Itertools; +use rand::seq::SliceRandom; +use rand::thread_rng; +use risingwave_common::bail; +use risingwave_common::hash::{ParallelUnitId, ParallelUnitMapping}; +use risingwave_pb::common::{ActorInfo, ParallelUnit}; +use risingwave_pb::meta::table_fragments::fragment::FragmentDistributionType; +use risingwave_pb::stream_plan::DispatcherType::{self, *}; + +use crate::manager::{WorkerId, WorkerLocations}; +use crate::model::ActorId; +use crate::stream::stream_graph::fragment::CompleteStreamFragmentGraph; +use crate::stream::stream_graph::id::GlobalFragmentId as Id; +use crate::MetaResult; + +type HashMappingId = usize; + +/// The internal distribution structure for processing in the scheduler. +/// +/// See [`Distribution`] for the public interface. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +enum DistId { + Singleton(ParallelUnitId), + Hash(HashMappingId), +} + +/// Facts as the input of the scheduler. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +enum Fact { + /// An edge in the stream graph. + Edge { + from: Id, + to: Id, + dt: DispatcherType, + }, + /// A distribution requirement for an external(existing) fragment. + ExternalReq { id: Id, dist: DistId }, + /// A singleton requirement for a building fragment. + /// Note that the physical parallel unit is not determined yet. + SingletonReq(Id), +} + +/// Results of all building fragments, as the output of the scheduler. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +enum Result { + /// This fragment is required to be distributed by the given [`DistId`]. + Required(DistId), + /// This fragment is singleton, and should be scheduled to the default parallel unit. + DefaultSingleton, + /// This fragment is hash-distributed, and should be scheduled by the default hash mapping. + DefaultHash, +} + +crepe::crepe! { + @input + struct Input(Fact); + + struct Edge(Id, Id, DispatcherType); + struct ExternalReq(Id, DistId); + struct SingletonReq(Id); + struct Fragment(Id); + struct Requirement(Id, DistId); + + @output + struct Success(Id, Result); + @output + #[derive(Debug)] + struct Failed(Id); + + // Extract facts. + Edge(from, to, dt) <- Input(f), let Fact::Edge { from, to, dt } = f; + ExternalReq(id, dist) <- Input(f), let Fact::ExternalReq { id, dist } = f; + SingletonReq(id) <- Input(f), let Fact::SingletonReq(id) = f; + + // Internal fragments. + Fragment(x) <- Edge(x, _, _), !ExternalReq(x, _); + Fragment(y) <- Edge(_, y, _), !ExternalReq(y, _); + + // Requirements from the facts. + Requirement(x, d) <- ExternalReq(x, d); + // Requirements of `NoShuffle` edges. + Requirement(x, d) <- Edge(x, y, NoShuffle), Requirement(y, d); + Requirement(y, d) <- Edge(x, y, NoShuffle), Requirement(x, d); + + // The downstream fragment of a `Simple` edge must be singleton. + SingletonReq(y) <- Edge(_, y, Simple); + + // Multiple requirements conflict. + Failed(x) <- Requirement(x, d1), Requirement(x, d2), (d1 != d2); + // Singleton requirement conflicts with hash requirement. + Failed(x) <- SingletonReq(x), Requirement(x, d), let DistId::Hash(_) = d; + + // Take the required distribution as the result. + Success(x, Result::Required(d)) <- Fragment(x), Requirement(x, d), !Failed(x); + // Take the default singleton distribution as the result, if no other requirement. + Success(x, Result::DefaultSingleton) <- Fragment(x), SingletonReq(x), !Requirement(x, _); + // Take the default hash distribution as the result, if no other requirement. + Success(x, Result::DefaultHash) <- Fragment(x), !SingletonReq(x), !Requirement(x, _); +} + +/// The distribution of a fragment. +#[derive(Debug, Clone, EnumAsInner)] +pub(super) enum Distribution { + /// The fragment is singleton and is scheduled to the given parallel unit. + Singleton(ParallelUnitId), + + /// The fragment is hash-distributed and is scheduled by the given hash mapping. + Hash(ParallelUnitMapping), +} + +impl Distribution { + /// The parallelism required by the distribution. + pub fn parallelism(&self) -> usize { + self.parallel_units().count() + } + + /// All parallel units required by the distribution. + pub fn parallel_units(&self) -> impl Iterator + '_ { + match self { + Distribution::Singleton(p) => Either::Left(std::iter::once(*p)), + Distribution::Hash(mapping) => Either::Right(mapping.iter_unique()), + } + } + + /// Convert the distribution to a [`ParallelUnitMapping`]. + /// + /// - For singleton distribution, all of the virtual nodes are mapped to the same parallel unit. + /// - For hash distribution, the mapping is returned as is. + pub fn into_mapping(self) -> ParallelUnitMapping { + match self { + Distribution::Singleton(p) => ParallelUnitMapping::new_single(p), + Distribution::Hash(mapping) => mapping, + } + } + + /// Create a distribution from a persisted protobuf `Fragment`. + pub fn from_fragment(fragment: &risingwave_pb::meta::table_fragments::Fragment) -> Self { + let mapping = ParallelUnitMapping::from_protobuf(fragment.get_vnode_mapping().unwrap()); + + match fragment.get_distribution_type().unwrap() { + FragmentDistributionType::Unspecified => unreachable!(), + FragmentDistributionType::Single => { + let parallel_unit = mapping.to_single().unwrap(); + Distribution::Singleton(parallel_unit) + } + FragmentDistributionType::Hash => Distribution::Hash(mapping), + } + } +} + +/// [`Scheduler`] schedules the distribution of fragments in a stream graph. +pub(super) struct Scheduler { + /// The default hash mapping for hash-distributed fragments, if there's no requirement derived. + default_hash_mapping: ParallelUnitMapping, + + /// The default parallel unit for singleton fragments, if there's no requirement derived. + default_singleton_parallel_unit: ParallelUnitId, +} + +impl Scheduler { + /// Create a new [`Scheduler`] with the given parallel units and the default parallelism. + /// + /// Each hash-distributed fragment will be scheduled to at most `default_parallelism` parallel + /// units, in a round-robin fashion on all compute nodes. If the `default_parallelism` is + /// `None`, all parallel units will be used. + pub fn new( + parallel_units: impl IntoIterator, + default_parallelism: Option, + ) -> MetaResult { + // Group parallel units with worker node. + let mut parallel_units_map = BTreeMap::new(); + for p in parallel_units { + parallel_units_map + .entry(p.worker_node_id) + .or_insert_with(Vec::new) + .push(p); + } + + // Use all parallel units if no default parallelism is specified. + let default_parallelism = default_parallelism.map_or_else( + || parallel_units_map.values().map(|p| p.len()).sum::(), + NonZeroUsize::get, + ); + + let mut parallel_units: LinkedList<_> = parallel_units_map + .into_values() + .map(|v| v.into_iter().sorted_by_key(|p| p.id)) + .collect(); + + // Visit the parallel units in a round-robin manner on each worker. + let mut round_robin = Vec::new(); + while !parallel_units.is_empty() { + parallel_units.drain_filter(|ps| { + if let Some(p) = ps.next() { + round_robin.push(p); + false + } else { + true + } + }); + } + round_robin.truncate(default_parallelism); + + if round_robin.len() < default_parallelism { + bail!( + "Not enough parallel units to schedule {} parallelism", + default_parallelism + ); + } + + // Sort all parallel units by ID to achieve better vnode locality. + round_robin.sort_unstable_by_key(|p| p.id); + + // Build the default hash mapping uniformly. + let default_hash_mapping = ParallelUnitMapping::build(&round_robin); + // Randomly choose a parallel unit as the default singleton parallel unit. + let default_singleton_parallel_unit = round_robin.choose(&mut thread_rng()).unwrap().id; + + Ok(Self { + default_hash_mapping, + default_singleton_parallel_unit, + }) + } + + /// Schedule the given complete graph and returns the distribution of each **building + /// fragment**. + pub fn schedule( + &self, + graph: &CompleteStreamFragmentGraph, + ) -> MetaResult> { + let existing_distribution = graph.existing_distribution(); + + // Build an index map for all hash mappings. + let all_hash_mappings = existing_distribution + .values() + .flat_map(|dist| dist.as_hash()) + .cloned() + .unique() + .collect_vec(); + let hash_mapping_id: HashMap<_, _> = all_hash_mappings + .iter() + .enumerate() + .map(|(i, m)| (m.clone(), i)) + .collect(); + + let mut facts = Vec::new(); + + // Singletons + for (&id, fragment) in graph.building_fragments() { + if fragment.is_singleton { + facts.push(Fact::SingletonReq(id)); + } + } + // External + for (id, req) in existing_distribution { + let dist = match req { + Distribution::Singleton(parallel_unit) => DistId::Singleton(parallel_unit), + Distribution::Hash(mapping) => DistId::Hash(hash_mapping_id[&mapping]), + }; + facts.push(Fact::ExternalReq { id, dist }); + } + // Edges + for (from, to, edge) in graph.all_edges() { + facts.push(Fact::Edge { + from, + to, + dt: edge.dispatch_strategy.r#type(), + }); + } + + // Run the algorithm. + let mut crepe = Crepe::new(); + crepe.extend(facts.into_iter().map(Input)); + let (success, failed) = crepe.run(); + if !failed.is_empty() { + bail!("Failed to schedule: {:?}", failed); + } + // Should not contain any existing fragments. + assert_eq!(success.len(), graph.building_fragments().len()); + + // Extract the results. + let distributions = success + .into_iter() + .map(|Success(id, result)| { + let distribution = match result { + // Required + Result::Required(DistId::Singleton(parallel_unit)) => { + Distribution::Singleton(parallel_unit) + } + Result::Required(DistId::Hash(mapping)) => { + Distribution::Hash(all_hash_mappings[mapping].clone()) + } + + // Default + Result::DefaultSingleton => { + Distribution::Singleton(self.default_singleton_parallel_unit) + } + Result::DefaultHash => Distribution::Hash(self.default_hash_mapping.clone()), + }; + (id, distribution) + }) + .collect(); + + Ok(distributions) + } +} + +/// [`Locations`] represents the parallel unit and worker locations of the actors. +#[cfg_attr(test, derive(Default))] +pub struct Locations { + /// actor location map. + pub actor_locations: BTreeMap, + /// worker location map. + pub worker_locations: WorkerLocations, +} + +impl Locations { + /// Returns all actors for every worker node. + pub fn worker_actors(&self) -> HashMap> { + self.actor_locations + .iter() + .map(|(actor_id, parallel_unit)| (parallel_unit.worker_node_id, *actor_id)) + .into_group_map() + } + + /// Returns the `ActorInfo` map for every actor. + pub fn actor_info_map(&self) -> HashMap { + self.actor_infos() + .map(|info| (info.actor_id, info)) + .collect() + } + + /// Returns an iterator of `ActorInfo`. + pub fn actor_infos(&self) -> impl Iterator + '_ { + self.actor_locations + .iter() + .map(|(actor_id, parallel_unit)| ActorInfo { + actor_id: *actor_id, + host: self.worker_locations[¶llel_unit.worker_node_id] + .host + .clone(), + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn test_success(facts: impl IntoIterator, expected: HashMap) { + let mut crepe = Crepe::new(); + crepe.extend(facts.into_iter().map(Input)); + let (success, failed) = crepe.run(); + + assert!(failed.is_empty()); + + let success: HashMap<_, _> = success + .into_iter() + .map(|Success(id, result)| (id, result)) + .collect(); + + assert_eq!(success, expected); + } + + fn test_failed(facts: impl IntoIterator) { + let mut crepe = Crepe::new(); + crepe.extend(facts.into_iter().map(Input)); + let (_success, failed) = crepe.run(); + + assert!(!failed.is_empty()); + } + + // 1 -|-> 101 --> + // 103 --> 104 + // 2 -|-> 102 --> + #[test] + fn test_scheduling_mv_on_mv() { + #[rustfmt::skip] + let facts = [ + Fact::ExternalReq { id: 1.into(), dist: DistId::Hash(1) }, + Fact::ExternalReq { id: 2.into(), dist: DistId::Singleton(2) }, + Fact::Edge { from: 1.into(), to: 101.into(), dt: NoShuffle }, + Fact::Edge { from: 2.into(), to: 102.into(), dt: NoShuffle }, + Fact::Edge { from: 101.into(), to: 103.into(), dt: Hash }, + Fact::Edge { from: 102.into(), to: 103.into(), dt: Hash }, + Fact::Edge { from: 103.into(), to: 104.into(), dt: Simple }, + ]; + + let expected = maplit::hashmap! { + 101.into() => Result::Required(DistId::Hash(1)), + 102.into() => Result::Required(DistId::Singleton(2)), + 103.into() => Result::DefaultHash, + 104.into() => Result::DefaultSingleton, + }; + + test_success(facts, expected); + } + + // 1 -|-> 101 --> 103 --> + // X 105 + // 2 -|-> 102 --> 104 --> + #[test] + fn test_delta_join() { + #[rustfmt::skip] + let facts = [ + Fact::ExternalReq { id: 1.into(), dist: DistId::Hash(1) }, + Fact::ExternalReq { id: 2.into(), dist: DistId::Hash(2) }, + Fact::Edge { from: 1.into(), to: 101.into(), dt: NoShuffle }, + Fact::Edge { from: 2.into(), to: 102.into(), dt: NoShuffle }, + Fact::Edge { from: 101.into(), to: 103.into(), dt: NoShuffle }, + Fact::Edge { from: 102.into(), to: 104.into(), dt: NoShuffle }, + Fact::Edge { from: 101.into(), to: 104.into(), dt: Hash }, + Fact::Edge { from: 102.into(), to: 103.into(), dt: Hash }, + Fact::Edge { from: 103.into(), to: 105.into(), dt: Hash }, + Fact::Edge { from: 104.into(), to: 105.into(), dt: Hash }, + ]; + + let expected = maplit::hashmap! { + 101.into() => Result::Required(DistId::Hash(1)), + 102.into() => Result::Required(DistId::Hash(2)), + 103.into() => Result::Required(DistId::Hash(1)), + 104.into() => Result::Required(DistId::Hash(2)), + 105.into() => Result::DefaultHash, + }; + + test_success(facts, expected); + } + + // 1 -|-> 101 --> + // 103 + // 102 --> + #[test] + fn test_singleton_leaf() { + #[rustfmt::skip] + let facts = [ + Fact::ExternalReq { id: 1.into(), dist: DistId::Hash(1) }, + Fact::Edge { from: 1.into(), to: 101.into(), dt: NoShuffle }, + Fact::SingletonReq(102.into()), // like `Now` + Fact::Edge { from: 101.into(), to: 103.into(), dt: Hash }, + Fact::Edge { from: 102.into(), to: 103.into(), dt: Broadcast }, + ]; + + let expected = maplit::hashmap! { + 101.into() => Result::Required(DistId::Hash(1)), + 102.into() => Result::DefaultSingleton, + 103.into() => Result::DefaultHash, + }; + + test_success(facts, expected); + } + + // 1 -|-> + // 101 + // 2 -|-> + #[test] + fn test_upstream_hash_shard_failed() { + #[rustfmt::skip] + let facts = [ + Fact::ExternalReq { id: 1.into(), dist: DistId::Hash(1) }, + Fact::ExternalReq { id: 2.into(), dist: DistId::Hash(2) }, + Fact::Edge { from: 1.into(), to: 101.into(), dt: NoShuffle }, + Fact::Edge { from: 2.into(), to: 101.into(), dt: NoShuffle }, + ]; + + test_failed(facts); + } +} diff --git a/src/meta/src/stream/stream_graph/visit.rs b/src/meta/src/stream/stream_graph/visit.rs new file mode 100644 index 0000000000000..d9f399f5f7006 --- /dev/null +++ b/src/meta/src/stream/stream_graph/visit.rs @@ -0,0 +1,155 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_pb::catalog::Table; +use risingwave_pb::stream_plan::stream_fragment_graph::StreamFragment; +use risingwave_pb::stream_plan::stream_node::NodeBody; +use risingwave_pb::stream_plan::{agg_call_state, StreamNode}; + +/// A utility for visiting and mutating the [`NodeBody`] of the [`StreamNode`]s in a +/// [`StreamFragment`] recursively. +pub fn visit_fragment(fragment: &mut StreamFragment, mut f: F) +where + F: FnMut(&mut NodeBody), +{ + fn visit_inner(stream_node: &mut StreamNode, f: &mut F) + where + F: FnMut(&mut NodeBody), + { + f(stream_node.node_body.as_mut().unwrap()); + for input in &mut stream_node.input { + visit_inner(input, f); + } + } + + visit_inner(fragment.node.as_mut().unwrap(), &mut f) +} + +/// Visit the internal tables of a [`StreamFragment`]. +pub(super) fn visit_internal_tables(fragment: &mut StreamFragment, mut f: F) +where + F: FnMut(&mut Table, &str), +{ + macro_rules! always { + ($table:expr, $name:expr) => {{ + let table = $table + .as_mut() + .unwrap_or_else(|| panic!("internal table {} should always exist", $name)); + f(table, $name); + }}; + } + + #[allow(unused_macros)] + macro_rules! optional { + ($table:expr, $name:expr) => { + if let Some(table) = &mut $table { + f(table, $name); + } + }; + } + + macro_rules! repeated { + ($tables:expr, $name:expr) => { + for table in &mut $tables { + f(table, $name); + } + }; + } + + visit_fragment(fragment, |body| { + match body { + // Join + NodeBody::HashJoin(node) => { + // TODO: make the degree table optional + always!(node.left_table, "HashJoinLeft"); + always!(node.left_degree_table, "HashJoinDegreeLeft"); + always!(node.right_table, "HashJoinRight"); + always!(node.right_degree_table, "HashJoinDegreeRight"); + } + NodeBody::DynamicFilter(node) => { + always!(node.left_table, "DynamicFilterLeft"); + always!(node.right_table, "DynamicFilterRight"); + } + + // Aggregation + NodeBody::HashAgg(node) => { + assert_eq!(node.agg_call_states.len(), node.agg_calls.len()); + always!(node.result_table, "HashAggResult"); + for state in &mut node.agg_call_states { + if let agg_call_state::Inner::MaterializedInputState(s) = + state.inner.as_mut().unwrap() + { + always!(s.table, "HashAgg"); + } + } + for (distinct_col, dedup_table) in &mut node.distinct_dedup_tables { + f(dedup_table, &format!("HashAggDedupForCol{}", distinct_col)); + } + } + NodeBody::GlobalSimpleAgg(node) => { + assert_eq!(node.agg_call_states.len(), node.agg_calls.len()); + always!(node.result_table, "GlobalSimpleAggResult"); + for state in &mut node.agg_call_states { + if let agg_call_state::Inner::MaterializedInputState(s) = + state.inner.as_mut().unwrap() + { + always!(s.table, "GlobalSimpleAgg"); + } + } + for (distinct_col, dedup_table) in &mut node.distinct_dedup_tables { + f( + dedup_table, + &format!("GlobalSimpleAggDedupForCol{}", distinct_col), + ); + } + } + + // Top-N + NodeBody::AppendOnlyTopN(node) => { + always!(node.table, "AppendOnlyTopN"); + } + NodeBody::TopN(node) => { + always!(node.table, "TopN"); + } + NodeBody::AppendOnlyGroupTopN(node) => { + always!(node.table, "AppendOnlyGroupTopN"); + } + NodeBody::GroupTopN(node) => { + always!(node.table, "GroupTopN"); + } + + // Source + NodeBody::Source(node) => { + if let Some(source) = &mut node.source_inner { + always!(source.state_table, "Source"); + } + } + NodeBody::Now(node) => { + always!(node.state_table, "Now"); + } + NodeBody::WatermarkFilter(node) => { + assert!(!node.tables.is_empty()); + repeated!(node.tables, "WatermarkFilter"); + } + + // Shared arrangement + NodeBody::Arrange(node) => { + always!(node.table, "Arrange"); + } + + // Note: add internal tables for new nodes here. + _ => {} + } + }) +} diff --git a/src/meta/src/stream/stream_manager.rs b/src/meta/src/stream/stream_manager.rs index 459687dc68cdd..fbdbdb5c32d7c 100644 --- a/src/meta/src/stream/stream_manager.rs +++ b/src/meta/src/stream/stream_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,73 +12,137 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{HashMap, HashSet}; use std::sync::Arc; -use futures::future::BoxFuture; +use futures::future::{try_join_all, BoxFuture}; use itertools::Itertools; -use risingwave_common::bail; use risingwave_common::catalog::TableId; -use risingwave_common::hash::VirtualNode; use risingwave_pb::catalog::Table; -use risingwave_pb::common::{ActorInfo, Buffer, WorkerType}; -use risingwave_pb::meta::table_fragments::actor_status::ActorState; -use risingwave_pb::meta::table_fragments::fragment::FragmentDistributionType; -use risingwave_pb::meta::table_fragments::ActorStatus; -use risingwave_pb::stream_plan::stream_node::NodeBody; -use risingwave_pb::stream_plan::{ActorMapping, Dispatcher, DispatcherType, StreamNode}; +use risingwave_pb::common::ActorInfo; +use risingwave_pb::stream_plan::update_mutation::MergeUpdate; +use risingwave_pb::stream_plan::Dispatcher; use risingwave_pb::stream_service::{ - BroadcastActorInfoTableRequest, BuildActorsRequest, HangingChannel, UpdateActorsRequest, + BroadcastActorInfoTableRequest, BuildActorsRequest, DropActorsRequest, HangingChannel, + UpdateActorsRequest, }; +use tokio::sync::mpsc::Sender; +use tokio::sync::Mutex; use uuid::Uuid; -use super::ScheduledLocations; +use super::Locations; use crate::barrier::{BarrierScheduler, Command}; use crate::hummock::HummockManagerRef; -use crate::manager::{ - ClusterManagerRef, FragmentManagerRef, FragmentVNodeInfo, MetaSrvEnv, WorkerId, -}; -use crate::model::{ActorId, FragmentId, TableFragments}; +use crate::manager::{ClusterManagerRef, FragmentManagerRef, MetaSrvEnv}; +use crate::model::{ActorId, TableFragments}; use crate::storage::MetaStore; -use crate::stream::{parallel_unit_mapping_to_actor_mapping, Scheduler, SourceManagerRef}; -use crate::MetaResult; +use crate::stream::SourceManagerRef; +use crate::{MetaError, MetaResult}; pub type GlobalStreamManagerRef = Arc>; -/// [`CreateStreamingJobContext`] carries one-time infos. -#[derive(Default)] +/// [`CreateStreamingJobContext`] carries one-time infos for creating a streaming job. +/// +/// Note: for better readability, keep this struct complete and immutable once created. +#[cfg_attr(test, derive(Default))] pub struct CreateStreamingJobContext { /// New dispatchers to add from upstream actors to downstream actors. pub dispatchers: HashMap>, - /// Upstream mview actor ids grouped by worker node id. - pub upstream_worker_actors: HashMap>, + /// Upstream mview actor ids grouped by table id. - pub table_mview_map: HashMap>, - /// Dependent table ids - pub dependent_table_ids: HashSet, - /// Internal TableID to Table mapping + pub upstream_mview_actors: HashMap>, + + /// Internal tables in the streaming job. pub internal_tables: HashMap, - /// The upstream tables of all fragments containing chain nodes. - /// These fragments need to be colocated with their upstream tables. - /// - /// They are scheduled in `resolve_chain_node`. - pub chain_fragment_upstream_table_map: HashMap, - /// The SQL definition of this streaming job. Used for debugging only. - pub streaming_definition: String, + /// The locations of the actors to build in the streaming job. + pub building_locations: Locations, + + /// The locations of the existing actors, essentially the upstream mview actors to update. + pub existing_locations: Locations, + + /// The properties of the streaming job. + // TODO: directly store `StreamingJob here. pub table_properties: HashMap, + + /// DDL definition. + pub definition: String, } impl CreateStreamingJobContext { pub fn internal_tables(&self) -> Vec
{ self.internal_tables.values().cloned().collect() } +} - pub fn internal_table_ids(&self) -> Vec { - self.internal_tables.keys().copied().collect() +pub enum CreatingState { + Failed { reason: MetaError }, + Canceling, + Created, +} + +struct StreamingJobExecution { + id: TableId, + shutdown_tx: Option>, +} + +impl StreamingJobExecution { + fn new(id: TableId, shutdown_tx: Sender) -> Self { + Self { + id, + shutdown_tx: Some(shutdown_tx), + } } } +#[derive(Default)] +struct CreatingStreamingJobInfo { + streaming_jobs: Mutex>, +} + +impl CreatingStreamingJobInfo { + async fn add_job(&self, job: StreamingJobExecution) { + let mut jobs = self.streaming_jobs.lock().await; + jobs.insert(job.id, job); + } + + async fn delete_job(&self, job_id: TableId) { + let mut jobs = self.streaming_jobs.lock().await; + jobs.remove(&job_id); + } + + async fn cancel_jobs(&self, job_ids: Vec) { + let mut jobs = self.streaming_jobs.lock().await; + for job_id in job_ids { + if let Some(job) = jobs.get_mut(&job_id) && let Some(shutdown_tx) = job.shutdown_tx.take() { + let _ = shutdown_tx.send(CreatingState::Canceling).await.inspect_err(|_| { + tracing::warn!("failed to send canceling state"); + }); + } + } + } +} + +type CreatingStreamingJobInfoRef = Arc; + +/// [`ReplaceTableContext`] carries one-time infos for replacing the plan of an existing table. +/// +/// Note: for better readability, keep this struct complete and immutable once created. +pub struct ReplaceTableContext { + /// The updates to be applied to the downstream chain actors. Used for schema change. + pub merge_updates: Vec, + + /// The locations of the actors to build in the new table to replace. + pub building_locations: Locations, + + /// The locations of the existing actors, essentially the downstream chain actors to update. + pub existing_locations: Locations, + + /// The properties of the streaming job. + // TODO: directly store `StreamingJob here. + pub table_properties: HashMap, +} + /// `GlobalStreamManager` manages all the streams in the system. pub struct GlobalStreamManager { pub(crate) env: MetaSrvEnv, @@ -95,6 +159,9 @@ pub struct GlobalStreamManager { /// Maintains streaming sources from external system like kafka pub(crate) source_manager: SourceManagerRef, + /// Creating streaming job info. + creating_job_info: CreatingStreamingJobInfoRef, + hummock_manager: HummockManagerRef, } @@ -117,440 +184,159 @@ where cluster_manager, source_manager, hummock_manager, + creating_job_info: Arc::new(CreatingStreamingJobInfo::default()), }) } - async fn resolve_chain_node( - &self, - table_fragments: &mut TableFragments, - dependent_table_ids: &HashSet, - dispatchers: &mut HashMap>, - upstream_worker_actors: &mut HashMap>, - locations: &mut ScheduledLocations, - chain_fragment_upstream_table_map: &HashMap, + /// Create streaming job, it works as follows: + /// + /// 1. Broadcast the actor info based on the scheduling result in the context, build the hanging + /// channels in upstream worker nodes. + /// 2. (optional) Get the split information of the `StreamSource` via source manager and patch + /// actors. + /// 3. Notify related worker nodes to update and build the actors. + /// 4. Store related meta data. + pub async fn create_streaming_job( + self: &Arc, + table_fragments: TableFragments, + ctx: CreateStreamingJobContext, ) -> MetaResult<()> { - // The closure environment. Used to simulate recursive closure. - struct Env<'a> { - /// Records what's the corresponding parallel unit of each actor and mview vnode - /// mapping info of one table. - upstream_fragment_vnode_info: &'a HashMap, - /// Records each upstream mview actor's vnode bitmap info. - upstream_vnode_bitmap_info: &'a mut HashMap)>>, - /// Records what's the actors on each worker of one table. - tables_worker_actors: &'a HashMap>>, - /// Schedule information of all actors. - locations: &'a mut ScheduledLocations, - /// New dispatchers for this mview. - dispatchers: &'a mut HashMap>, - /// New vnode bitmaps for chain actors. - actor_vnode_bitmaps: &'a mut HashMap>, - /// Upstream Materialize actor ids grouped by worker id. - upstream_worker_actors: &'a mut HashMap>, - } + let table_id = table_fragments.table_id(); + let (sender, mut receiver) = tokio::sync::mpsc::channel(10); + let execution = StreamingJobExecution::new(table_id, sender.clone()); + self.creating_job_info.add_job(execution).await; - impl Env<'_> { - fn resolve_chain_node_inner( - &mut self, - stream_node: &mut StreamNode, - actor_id: ActorId, - upstream_actor_idx: usize, - is_singleton: bool, - upstream_fragment_id: FragmentId, - ) -> MetaResult<()> { - let Some(NodeBody::Chain(ref mut chain)) = stream_node.node_body else { - // If node is not chain node, recursively deal with input nodes - for input in &mut stream_node.input { - self.resolve_chain_node_inner(input, actor_id, upstream_actor_idx, is_singleton, upstream_fragment_id)?; - } - return Ok(()); - }; - - // get upstream table id - let table_id = TableId::new(chain.table_id); - // 1. use table id to get upstream vnode mapping info: [(actor_id, - // option(vnode_bitmap))] - let upstream_vnode_mapping_info = &self.upstream_vnode_bitmap_info[&table_id]; - - let (upstream_actor_id, upstream_vnode_bitmap) = { - if is_singleton { - // The upstream fragment should also be singleton. - upstream_vnode_mapping_info.iter().exactly_one().unwrap() - } else { - // Assign a upstream actor id to this chain node. - &upstream_vnode_mapping_info[upstream_actor_idx] + let stream_manager = self.clone(); + tokio::spawn(async move { + let mut revert_funcs = vec![]; + let res = stream_manager + .create_streaming_job_impl(&mut revert_funcs, table_fragments, ctx) + .await; + match res { + Ok(_) => { + let _ = sender + .send(CreatingState::Created) + .await + .inspect_err(|_| tracing::warn!("failed to notify created: {table_id}")); + } + Err(err) => { + for revert_func in revert_funcs.into_iter().rev() { + revert_func.await; } - }; - - // Here we force schedule the chain node to the same parallel unit as its upstream, - // so `same_worker_node_as_upstream` is not used here. If we want to - // support different parallel units, we need to keep the vnode bitmap and assign a - // new parallel unit with some other strategies. - let upstream_parallel_unit = self - .upstream_fragment_vnode_info - .get(&table_id) - .unwrap() - .actor_parallel_unit_maps - .get(upstream_actor_id) - .unwrap() - .clone(); - self.locations - .actor_locations - .insert(actor_id, upstream_parallel_unit); - self.actor_vnode_bitmaps - .insert(actor_id, upstream_vnode_bitmap.clone()); - - // fill upstream node-actor info for later use - let upstream_table_worker_actors = - self.tables_worker_actors.get(&table_id).unwrap(); - - let chain_upstream_worker_actors = upstream_table_worker_actors - .iter() - .flat_map(|(worker_id, actor_ids)| { - actor_ids.iter().map(|actor_id| (*worker_id, *actor_id)) - }) - .filter(|(_, actor_id)| upstream_actor_id == actor_id) - .into_group_map(); - for (worker_id, actor_ids) in chain_upstream_worker_actors { - self.upstream_worker_actors - .entry(worker_id) - .or_default() - .extend(actor_ids); + let _ = sender + .send(CreatingState::Failed { + reason: err.clone(), + }) + .await + .inspect_err(|_| { + tracing::warn!("failed to notify failed: {table_id}, err: {err}") + }); } - - // deal with merge and batch query node, setting upstream infos. - let batch_stream_node = &stream_node.input[1]; - assert!( - matches!(batch_stream_node.node_body, Some(NodeBody::BatchPlan(_))), - "chain's input[1] should always be batch query" - ); - - let merge_stream_node = &mut stream_node.input[0]; - let Some(NodeBody::Merge(ref mut merge)) = merge_stream_node.node_body else { - unreachable!("chain's input[0] should always be merge"); - }; - merge.upstream_actor_id.push(*upstream_actor_id); - merge.upstream_fragment_id = upstream_fragment_id; - - // finally, we should also build dispatcher infos here. - // - // Note: currently we ensure that the downstream chain operator has the same - // parallel unit and distribution as the upstream mview, so we can simply use - // `NoShuffle` dispatcher here. - self.dispatchers - .entry(*upstream_actor_id) - .or_default() - .push(Dispatcher { - r#type: DispatcherType::NoShuffle as _, - // Use chain actor id as dispatcher id to avoid collision in this - // Dispatch executor. - dispatcher_id: actor_id as _, - downstream_actor_id: vec![actor_id], - ..Default::default() - }); - - Ok(()) - } - } - - let upstream_fragment_vnode_info = &self - .fragment_manager - .get_mview_fragment_vnode_info(dependent_table_ids) - .await?; - - let upstream_vnode_bitmap_info = &mut self - .fragment_manager - .get_mview_vnode_bitmap_info(dependent_table_ids) - .await?; - - let tables_worker_actors = &self - .fragment_manager - .get_tables_worker_actors(dependent_table_ids) - .await?; - - let mut env = Env { - upstream_fragment_vnode_info, - upstream_vnode_bitmap_info, - tables_worker_actors, - locations, - dispatchers, - actor_vnode_bitmaps: &mut Default::default(), - upstream_worker_actors, - }; - - for fragment in table_fragments.fragments.values_mut() { - if !chain_fragment_upstream_table_map.contains_key(&fragment.fragment_id) { - continue; - } - - // setup fragment vnode mapping. - let upstream_table_id = chain_fragment_upstream_table_map - .get(&fragment.fragment_id) - .unwrap(); - - let upstream_fragment_vnode_info = env - .upstream_fragment_vnode_info - .get(upstream_table_id) - .unwrap(); - - let upstream_fragment_id = upstream_fragment_vnode_info - .vnode_mapping - .as_ref() - .unwrap() - .fragment_id; - - let is_singleton = - fragment.get_distribution_type()? == FragmentDistributionType::Single; - - let upstream_actor_ids = env - .upstream_vnode_bitmap_info - .get(upstream_table_id) - .map(|v| v.iter().map(|(actor_id, _)| *actor_id).collect_vec()) - .unwrap(); - - for (idx, actor) in &mut fragment.actors.iter_mut().enumerate() { - let stream_node = actor.nodes.as_mut().unwrap(); - env.resolve_chain_node_inner( - stream_node, - actor.actor_id, - idx, - is_singleton, - upstream_fragment_id, - )?; - - // setup actor vnode bitmap. - actor.vnode_bitmap = env.actor_vnode_bitmaps.remove(&actor.actor_id).unwrap(); - - // setup upstream actor id - actor.upstream_actor_id.push(upstream_actor_ids[idx]); } + }); - // Note: it's possible that there're some other normal `Merge` nodes in the fragment of - // `Chain` and their upstreams are already filled in `upstream_fragment_ids`, so we - // won't check it empty here and just push the one resolved in `Chain` to the end. - fragment - .upstream_fragment_ids - .push(upstream_fragment_id as FragmentId); - - let mut vnode_mapping = upstream_fragment_vnode_info.vnode_mapping.clone(); - // The upstream vnode_mapping is cloned here, - // so the fragment id in the mapping needs to be changed to the id of this fragment - if let Some(mapping) = vnode_mapping.as_mut() { - assert_ne!(mapping.fragment_id, fragment.fragment_id); - mapping.fragment_id = fragment.fragment_id; + let res = try { + while let Some(state) = receiver.recv().await { + match state { + CreatingState::Failed { reason } => { + return Err(reason); + } + CreatingState::Canceling => { + if let Ok(table_fragments) = self + .fragment_manager + .select_table_fragments_by_table_id(&table_id) + .await + { + // try to cancel buffered creating command. + if self + .barrier_scheduler + .try_cancel_scheduled_create(table_id) + .await + { + tracing::debug!( + "cancelling streaming job {table_id} in buffer queue." + ); + let node_actors = table_fragments.worker_actor_ids(); + let cluster_info = + self.cluster_manager.get_streaming_cluster_info().await; + let node_actors = node_actors + .into_iter() + .map(|(id, actor_ids)| { + ( + cluster_info.worker_nodes.get(&id).cloned().unwrap(), + actor_ids, + ) + }) + .collect_vec(); + let futures = node_actors.into_iter().map(|(node, actor_ids)| { + let request_id = Uuid::new_v4().to_string(); + async move { + let client = + self.env.stream_client_pool().get(&node).await?; + let request = DropActorsRequest { + request_id, + actor_ids, + }; + client.drop_actors(request).await + } + }); + try_join_all(futures).await?; + + self.fragment_manager + .drop_table_fragments_vec(&HashSet::from_iter(std::iter::once( + table_id, + ))) + .await?; + return Err(MetaError::cancelled("create".into())); + } + if !table_fragments.is_created() { + tracing::debug!( + "cancelling streaming job {table_id} by issue cancel command." + ); + self.barrier_scheduler + .run_command(Command::CancelStreamingJob(table_fragments)) + .await?; + return Err(MetaError::cancelled("create".into())); + } + } + } + CreatingState::Created => return Ok(()), + } } - fragment.vnode_mapping = vnode_mapping; - } - Ok(()) - } + }; - /// Create streaming job, it works as follows: - /// 1. schedule the actors to nodes in the cluster. - /// 2. broadcast the actor info table. - /// (optional) get the split information of the `StreamSource` via source manager and patch - /// actors . - /// 3. notify related nodes to update and build the actors. - /// 4. store related meta data. - /// - /// Note the `table_fragments` is required to be sorted in topology order. (Downstream first, - /// then upstream.) - pub async fn create_streaming_job( - &self, - table_fragments: TableFragments, - context: &mut CreateStreamingJobContext, - ) -> MetaResult<()> { - let mut revert_funcs = vec![]; - if let Err(e) = self - .create_streaming_job_impl(&mut revert_funcs, table_fragments, context) - .await - { - for revert_func in revert_funcs.into_iter().rev() { - revert_func.await; - } - return Err(e); - } - Ok(()) + self.creating_job_info.delete_job(table_id).await; + res } async fn create_streaming_job_impl( &self, revert_funcs: &mut Vec>, - mut table_fragments: TableFragments, + table_fragments: TableFragments, CreateStreamingJobContext { dispatchers, - upstream_worker_actors, - table_mview_map, - dependent_table_ids, + upstream_mview_actors, table_properties, - chain_fragment_upstream_table_map, + building_locations, + existing_locations, + definition, .. - }: &mut CreateStreamingJobContext, + }: CreateStreamingJobContext, ) -> MetaResult<()> { - // Schedule actors to parallel units. `locations` will record the parallel unit that an - // actor is scheduled to, and the worker node this parallel unit is on. - let mut locations = { - // List all running worker nodes and the parallel units. - // - // It's possible that the cluster configuration has been changed after we resolve the - // stream graph, so the scheduling is fallible and the client may need to retry. - // TODO: refactor to use a consistent snapshot of cluster configuration. - let workers = self - .cluster_manager - .list_worker_node( - WorkerType::ComputeNode, - Some(risingwave_pb::common::worker_node::State::Running), - ) - .await; - if workers.is_empty() { - bail!("no available compute node in the cluster"); - } - let parallel_units = self.cluster_manager.list_active_parallel_units().await; - - // Create empty locations and the scheduler. - let mut locations = ScheduledLocations::with_workers(workers); - let scheduler = Scheduler::new(parallel_units); - - // Schedule each fragment(actors) to nodes except chain, recorded in `locations`. - // Vnode mapping in fragment will be filled in as well. - let topological_order = table_fragments.generate_topological_order(); - for fragment_id in topological_order { - let fragment = table_fragments.fragments.get_mut(&fragment_id).unwrap(); - if !chain_fragment_upstream_table_map.contains_key(&fragment_id) { - scheduler.schedule(fragment, &mut locations)?; - } - } - - locations - }; - - // Resolve chain node infos, including: - // 1. insert upstream actor id in merge node - // 2. insert parallel unit id in batch query node - self.resolve_chain_node( - &mut table_fragments, - dependent_table_ids, - dispatchers, - upstream_worker_actors, - &mut locations, - chain_fragment_upstream_table_map, - ) - .await?; - let dispatchers = &*dispatchers; - let upstream_worker_actors = &*upstream_worker_actors; - - // Record vnode to parallel unit mapping for actors. - let actor_to_vnode_mapping = { - let mut mapping = HashMap::new(); - for fragment in table_fragments.fragments.values() { - for actor in &fragment.actors { - mapping - .try_insert(actor.actor_id, fragment.vnode_mapping.clone()) - .unwrap(); - } - } - mapping - }; - - // Fill hash dispatcher's mapping with scheduled locations. - for fragment in table_fragments.fragments.values_mut() { - // Filter out hash dispatchers in this fragment. - let dispatchers = fragment - .actors - .iter_mut() - .flat_map(|actor| actor.dispatcher.iter_mut()) - .filter(|d| d.get_type().unwrap() == DispatcherType::Hash); - - for dispatcher in dispatchers { - match dispatcher.downstream_actor_id.as_slice() { - [] => panic!("hash dispatcher should have at least one downstream actor"), - - // There exists some unoptimized situation where a hash dispatcher has ONLY ONE - // downstream actor, which makes it behave like a simple dispatcher. As a - // workaround, we specially compute the consistent hash mapping here. - // This arm could be removed after the optimizer has been fully implemented. - &[single_downstream_actor] => { - dispatcher.hash_mapping = Some(ActorMapping { - original_indices: vec![VirtualNode::COUNT as u64 - 1], - data: vec![single_downstream_actor], - }); - } - - // For normal cases, we can simply transform the mapping from downstream actors - // to current hash dispatchers. - downstream_actors @ &[first_downstream_actor, ..] => { - // All actors in the downstream fragment should have the same parallel unit - // mapping, find it with the first downstream actor. - let downstream_vnode_mapping = actor_to_vnode_mapping - .get(&first_downstream_actor) - .unwrap() - .as_ref() - .unwrap_or_else(|| { - panic!("no vnode mapping for actor {}", &first_downstream_actor); - }); - - // Mapping from the parallel unit to downstream actors. - let parallel_unit_actor_map = downstream_actors - .iter() - .map(|actor_id| { - ( - locations.actor_locations.get(actor_id).unwrap().id, - *actor_id, - ) - }) - .collect::>(); - - // Transform the mapping of parallel unit to the mapping of actor. - dispatcher.hash_mapping = Some(parallel_unit_mapping_to_actor_mapping( - downstream_vnode_mapping, - ¶llel_unit_actor_map, - )); - } - } - } - } - - // Mark the actors to be built as `ActorState::Inactive`. - let actor_status = locations - .actor_locations - .iter() - .map(|(&actor_id, parallel_unit)| { - ( - actor_id, - ActorStatus { - parallel_unit: Some(parallel_unit.clone()), - state: ActorState::Inactive as i32, - }, - ) - }) - .collect(); - table_fragments.set_actor_status(actor_status); - - let table_fragments = table_fragments; let actor_map = table_fragments.actor_map(); // Actors on each stream node will need to know where their upstream lies. `actor_info` // includes such information. It contains: // 1. actors in the current create-streaming-job request. // 2. all upstream actors. - let actor_infos_to_broadcast = { - let current = locations.actor_infos(); - let upstream = upstream_worker_actors - .iter() - .flat_map(|(worker_id, upstreams)| { - upstreams.iter().map(|up_id| ActorInfo { - actor_id: *up_id, - host: locations - .worker_locations - .get(worker_id) - .unwrap() - .host - .clone(), - }) - }); - current.chain(upstream).collect_vec() - }; + let actor_infos_to_broadcast = building_locations + .actor_infos() + .chain(existing_locations.actor_infos()) + .collect_vec(); - let actor_host_infos = locations.actor_info_map(); - let worker_actors = locations.worker_actors(); + let building_actor_infos = building_locations.actor_info_map(); + let building_worker_actors = building_locations.worker_actors(); + let existing_worker_actors = existing_locations.worker_actors(); // Hanging channels for each worker node. let mut hanging_channels = { @@ -561,17 +347,17 @@ where let down_infos = dispatchers .iter() .flat_map(|d| d.downstream_actor_id.iter()) - .map(|down_id| actor_host_infos[down_id].clone()) + .map(|down_id| building_actor_infos[down_id].clone()) .collect_vec(); (up_id, down_infos) }) .collect::>(); - upstream_worker_actors + existing_worker_actors .iter() - .map(|(node_id, up_ids)| { + .map(|(&worker_id, up_ids)| { ( - *node_id, + worker_id, up_ids .iter() .flat_map(|up_id| { @@ -595,8 +381,8 @@ where // The first stage does 2 things: broadcast actor info, and send local actor ids to // different WorkerNodes. Such that each WorkerNode knows the overall actor // allocation, but not actually builds it. We initialize all channels in this stage. - for (worker_id, actors) in &worker_actors { - let worker_node = locations.worker_locations.get(worker_id).unwrap(); + for (worker_id, actors) in &building_worker_actors { + let worker_node = building_locations.worker_locations.get(worker_id).unwrap(); let client = self.env.stream_client_pool().get(worker_node).await?; client @@ -607,7 +393,7 @@ where let stream_actors = actors .iter() - .map(|actor_id| actor_map.get(actor_id).cloned().unwrap()) + .map(|actor_id| actor_map[actor_id].clone()) .collect::>(); let request_id = Uuid::new_v4().to_string(); @@ -621,9 +407,9 @@ where .await?; } - // Build remaining hanging channels on compute nodes. + // Build **remaining** hanging channels on compute nodes. for (worker_id, hanging_channels) in hanging_channels { - let worker_node = locations.worker_locations.get(&worker_id).unwrap(); + let worker_node = building_locations.worker_locations.get(&worker_id).unwrap(); let client = self.env.stream_client_pool().get(worker_node).await?; let request_id = Uuid::new_v4().to_string(); @@ -640,18 +426,22 @@ where // Register to compaction group beforehand. let hummock_manager_ref = self.hummock_manager.clone(); let registered_table_ids = hummock_manager_ref - .register_table_fragments(&table_fragments, table_properties) + .register_table_fragments(&table_fragments, &table_properties) .await?; + debug_assert_eq!( + registered_table_ids.len(), + table_fragments.all_table_ids().count() + ); revert_funcs.push(Box::pin(async move { if let Err(e) = hummock_manager_ref.unregister_table_ids(®istered_table_ids).await { - tracing::warn!("Failed to unregister compaction group for {:#?}.\nThey will be cleaned up on node restart.\n{:#?}", registered_table_ids, e); + tracing::warn!("Failed to unregister compaction group for {:#?}. They will be cleaned up on node restart. {:#?}", registered_table_ids, e); } })); // In the second stage, each [`WorkerNode`] builds local actors and connect them with // channels. - for (worker_id, actors) in worker_actors { - let worker_node = locations.worker_locations.get(&worker_id).unwrap(); + for (worker_id, actors) in building_worker_actors { + let worker_node = building_locations.worker_locations.get(&worker_id).unwrap(); let client = self.env.stream_client_pool().get(worker_node).await?; let request_id = Uuid::new_v4().to_string(); @@ -671,15 +461,16 @@ where let table_id = table_fragments.table_id(); - let split_assignment = self.source_manager.pre_allocate_splits(&table_id).await?; + let init_split_assignment = self.source_manager.pre_allocate_splits(&table_id).await?; if let Err(err) = self .barrier_scheduler .run_command(Command::CreateStreamingJob { table_fragments, - table_mview_map: table_mview_map.clone(), - dispatchers: dispatchers.clone(), - init_split_assignment: split_assignment, + upstream_mview_actors, + dispatchers, + init_split_assignment, + definition: definition.to_string(), }) .await { @@ -719,22 +510,24 @@ where .await?; // Unregister from compaction group afterwards. - for table_fragments in table_fragments_vec { - if let Err(e) = self - .hummock_manager - .unregister_table_fragments(&table_fragments) - .await - { - tracing::warn!( - "Failed to unregister compaction group for {}. It will be unregistered eventually.\n{:#?}", - table_fragments.table_id(), + if let Err(e) = self + .hummock_manager + .unregister_table_fragments_vec(&table_fragments_vec) + .await + { + tracing::warn!( + "Failed to unregister compaction group for {:#?}. They will be cleaned up on node restart. {:#?}", + table_fragments_vec, e ); - } } Ok(()) } + + pub async fn cancel_streaming_jobs(&self, table_ids: Vec) { + self.creating_job_info.cancel_jobs(table_ids).await; + } } #[cfg(test)] @@ -745,9 +538,11 @@ mod tests { use std::time::Duration; use risingwave_common::catalog::TableId; + use risingwave_common::hash::ParallelUnitMapping; use risingwave_pb::common::{HostAddress, WorkerType}; use risingwave_pb::meta::table_fragments::fragment::FragmentDistributionType; use risingwave_pb::meta::table_fragments::Fragment; + use risingwave_pb::stream_plan::stream_node::NodeBody; use risingwave_pb::stream_plan::*; use risingwave_pb::stream_service::stream_service_server::{ StreamService, StreamServiceServer, @@ -768,8 +563,9 @@ mod tests { use crate::hummock::{CompactorManager, HummockManager}; use crate::manager::{ CatalogManager, CatalogManagerRef, ClusterManager, FragmentManager, MetaSrvEnv, + StreamingClusterInfo, }; - use crate::model::ActorId; + use crate::model::{ActorId, FragmentId}; use crate::rpc::metrics::MetaMetrics; use crate::storage::MemStore; use crate::stream::SourceManager; @@ -870,7 +666,7 @@ mod tests { } struct MockServices { - global_stream_manager: GlobalStreamManager, + global_stream_manager: GlobalStreamManagerRef, catalog_manager: CatalogManagerRef, fragment_manager: FragmentManagerRef, state: Arc, @@ -929,6 +725,7 @@ mod tests { cluster_manager.clone(), meta_metrics.clone(), compactor_manager.clone(), + catalog_manager.clone(), ) .await?; @@ -968,7 +765,7 @@ mod tests { let (join_handle_2, shutdown_tx_2) = GlobalBarrierManager::start(barrier_manager).await; Ok(Self { - global_stream_manager: stream_manager, + global_stream_manager: Arc::new(stream_manager), catalog_manager, fragment_manager, state, @@ -981,18 +778,52 @@ mod tests { async fn create_materialized_view( &self, - table_fragments: TableFragments, + table_id: TableId, + fragments: BTreeMap, ) -> MetaResult<()> { - let mut ctx = CreateStreamingJobContext::default(); + // Create fake locations where all actors are scheduled to the same parallel unit. + let locations = { + let StreamingClusterInfo { + worker_nodes, + parallel_units, + } = self + .global_stream_manager + .cluster_manager + .get_streaming_cluster_info() + .await; + + let actor_locations = fragments + .values() + .flat_map(|f| &f.actors) + .map(|a| (a.actor_id, parallel_units[&0].clone())) + .collect(); + + Locations { + actor_locations, + worker_locations: worker_nodes, + } + }; + let table = Table { - id: table_fragments.table_id().table_id(), + id: table_id.table_id(), ..Default::default() }; + let table_fragments = TableFragments::new( + table_id, + fragments, + &locations.actor_locations, + Default::default(), + ); + let ctx = CreateStreamingJobContext { + building_locations: locations, + ..Default::default() + }; + self.catalog_manager .start_create_table_procedure(&table) .await?; self.global_stream_manager - .create_streaming_job(table_fragments, &mut ctx) + .create_streaming_job(table_fragments, ctx) .await?; self.catalog_manager .finish_create_table_procedure(vec![], &table) @@ -1053,45 +884,17 @@ mod tests { fragment_type_mask: FragmentTypeFlag::Mview as u32, distribution_type: FragmentDistributionType::Hash as i32, actors: actors.clone(), + state_table_ids: vec![0], + vnode_mapping: Some(ParallelUnitMapping::new_single(0).to_protobuf()), ..Default::default() }, ); - let table_fragments = - TableFragments::new(table_id, fragments, StreamEnvironment::default()); - services.create_materialized_view(table_fragments).await?; - - for actor in actors { - let mut scheduled_actor = services - .state - .actor_streams - .lock() - .unwrap() - .get(&actor.get_actor_id()) - .cloned() - .unwrap(); - scheduled_actor.vnode_bitmap.take().unwrap(); - assert_eq!(scheduled_actor, actor); - assert!(services - .state - .actor_ids - .lock() - .unwrap() - .contains(&actor.get_actor_id())); - assert_eq!( - services - .state - .actor_infos - .lock() - .unwrap() - .get(&actor.get_actor_id()) - .cloned() - .unwrap(), - HostAddress { - host: "127.0.0.1".to_string(), - port: 12334, - } - ); - } + services + .create_materialized_view(table_id, fragments) + .await?; + + let actor_len = services.state.actor_streams.lock().unwrap().len(); + assert_eq!(actor_len, 4); // assert that actors are created let mview_actor_ids = services .fragment_manager @@ -1139,49 +942,19 @@ mod tests { fragment_type_mask: FragmentTypeFlag::Mview as u32, distribution_type: FragmentDistributionType::Hash as i32, actors: actors.clone(), + state_table_ids: vec![0], + vnode_mapping: Some(ParallelUnitMapping::new_single(0).to_protobuf()), ..Default::default() }, ); - let table_fragments = - TableFragments::new(table_id, fragments, StreamEnvironment::default()); services - .create_materialized_view(table_fragments) + .create_materialized_view(table_id, fragments) .await .unwrap(); - for actor in actors { - let mut scheduled_actor = services - .state - .actor_streams - .lock() - .unwrap() - .get(&actor.get_actor_id()) - .cloned() - .unwrap(); - scheduled_actor.vnode_bitmap.take().unwrap(); - assert_eq!(scheduled_actor, actor); - assert!(services - .state - .actor_ids - .lock() - .unwrap() - .contains(&actor.get_actor_id())); - assert_eq!( - services - .state - .actor_infos - .lock() - .unwrap() - .get(&actor.get_actor_id()) - .cloned() - .unwrap(), - HostAddress { - host: "127.0.0.1".to_string(), - port: 12335, - } - ); - } + let actor_len = services.state.actor_streams.lock().unwrap().len(); + assert_eq!(actor_len, 4); // assert that actors are created let mview_actor_ids = services .fragment_manager diff --git a/src/meta/src/stream/test_fragmenter.rs b/src/meta/src/stream/test_fragmenter.rs index 2fee9627de51c..61b3d30acc5f6 100644 --- a/src/meta/src/stream/test_fragmenter.rs +++ b/src/meta/src/stream/test_fragmenter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,11 +13,13 @@ // limitations under the License. use std::collections::{HashMap, HashSet}; +use std::num::NonZeroUsize; use std::vec; use itertools::Itertools; use risingwave_common::catalog::{DatabaseId, SchemaId, TableId}; use risingwave_pb::catalog::Table as ProstTable; +use risingwave_pb::common::{ParallelUnit, WorkerNode}; use risingwave_pb::data::data_type::TypeName; use risingwave_pb::data::DataType; use risingwave_pb::expr::agg_call::{Arg, Type}; @@ -33,10 +35,11 @@ use risingwave_pb::stream_plan::{ StreamFragmentGraph as StreamFragmentGraphProto, StreamNode, StreamSource, }; -use crate::manager::{MetaSrvEnv, StreamingJob}; +use crate::manager::{MetaSrvEnv, StreamingClusterInfo, StreamingJob}; use crate::model::TableFragments; -use crate::stream::stream_graph::ActorGraphBuilder; -use crate::stream::{CreateStreamingJobContext, StreamFragmentGraph}; +use crate::stream::{ + ActorGraphBuildResult, ActorGraphBuilder, CompleteStreamFragmentGraph, StreamFragmentGraph, +}; use crate::MetaResult; fn make_inputref(idx: i32) -> ExprNode { @@ -116,8 +119,8 @@ fn make_source_internal_table(id: u32) -> ProstTable { ]; ProstTable { id, - schema_id: SchemaId::placeholder() as u32, - database_id: DatabaseId::placeholder() as u32, + schema_id: SchemaId::placeholder().schema_id, + database_id: DatabaseId::placeholder().database_id, name: String::new(), columns, pk: vec![ColumnOrder { @@ -135,8 +138,8 @@ fn make_internal_table(id: u32, is_agg_value: bool) -> ProstTable { } ProstTable { id, - schema_id: SchemaId::placeholder() as u32, - database_id: DatabaseId::placeholder() as u32, + schema_id: SchemaId::placeholder().schema_id, + database_id: DatabaseId::placeholder().database_id, name: String::new(), columns, pk: vec![ColumnOrder { @@ -151,8 +154,8 @@ fn make_internal_table(id: u32, is_agg_value: bool) -> ProstTable { fn make_empty_table(id: u32) -> ProstTable { ProstTable { id, - schema_id: SchemaId::placeholder() as u32, - database_id: DatabaseId::placeholder() as u32, + schema_id: SchemaId::placeholder().schema_id, + database_id: DatabaseId::placeholder().database_id, name: String::new(), columns: vec![], pk: vec![], @@ -256,6 +259,7 @@ fn make_stream_fragments() -> Vec { is_append_only: false, agg_call_states: vec![make_agg_call_result_state(), make_agg_call_result_state()], result_table: Some(make_empty_table(1)), + ..Default::default() })), input: vec![filter_node], fields: vec![], // TODO: fill this later @@ -298,6 +302,7 @@ fn make_stream_fragments() -> Vec { is_append_only: false, agg_call_states: vec![make_agg_call_result_state(), make_agg_call_result_state()], result_table: Some(make_empty_table(2)), + ..Default::default() })), fields: vec![], // TODO: fill this later input: vec![exchange_node_1], @@ -371,7 +376,6 @@ fn make_fragment_edges() -> Vec { r#type: DispatcherType::Simple as i32, column_indices: vec![], }), - same_worker_node: false, link_id: 4, upstream_id: 1, downstream_id: 0, @@ -381,7 +385,6 @@ fn make_fragment_edges() -> Vec { r#type: DispatcherType::Hash as i32, column_indices: vec![0], }), - same_worker_node: false, link_id: 1, upstream_id: 2, downstream_id: 1, @@ -401,6 +404,32 @@ fn make_stream_graph() -> StreamFragmentGraphProto { } } +fn make_cluster_info() -> StreamingClusterInfo { + let parallel_units = (0..8) + .map(|id| { + ( + id, + ParallelUnit { + id, + worker_node_id: 0, + }, + ) + }) + .collect(); + let worker_nodes = std::iter::once(( + 0, + WorkerNode { + id: 0, + ..Default::default() + }, + )) + .collect(); + StreamingClusterInfo { + worker_nodes, + parallel_units, + } +} + #[tokio::test] async fn test_graph_builder() -> MetaResult<()> { let env = MetaSrvEnv::for_test().await; @@ -409,28 +438,26 @@ async fn test_graph_builder() -> MetaResult<()> { let graph = make_stream_graph(); let fragment_graph = StreamFragmentGraph::new(graph, env.id_gen_manager_ref(), &job).await?; - - let mut ctx = CreateStreamingJobContext { - internal_tables: fragment_graph.internal_tables(), - ..Default::default() - }; - - let actor_graph_builder = ActorGraphBuilder::new(fragment_graph, parallel_degree); - - let graph = actor_graph_builder - .generate_graph(env.id_gen_manager_ref(), &mut ctx) + let internal_tables = fragment_graph.internal_tables(); + + let actor_graph_builder = ActorGraphBuilder::new( + CompleteStreamFragmentGraph::for_test(fragment_graph), + make_cluster_info(), + Some(NonZeroUsize::new(parallel_degree).unwrap()), + )?; + let ActorGraphBuildResult { graph, .. } = actor_graph_builder + .generate_graph(env.id_gen_manager_ref(), &job) .await?; - let table_fragments = - TableFragments::new(TableId::default(), graph, StreamEnvironment::default()); + let table_fragments = TableFragments::for_test(TableId::default(), graph); let actors = table_fragments.actors(); let barrier_inject_actor_ids = table_fragments.barrier_inject_actor_ids(); let sink_actor_ids = table_fragments.mview_actor_ids(); - let internal_table_ids = ctx.internal_table_ids(); + assert_eq!(actors.len(), 9); assert_eq!(barrier_inject_actor_ids, vec![6, 7, 8, 9]); assert_eq!(sink_actor_ids, vec![1]); - assert_eq!(internal_table_ids.len(), 3); + assert_eq!(internal_tables.len(), 3); let fragment_upstreams: HashMap<_, _> = table_fragments .fragments @@ -465,6 +492,7 @@ async fn test_graph_builder() -> MetaResult<()> { expected_upstream.insert(9, vec![]); for actor in actors { + println!("actor_id = {}", actor.get_actor_id()); assert_eq!( expected_downstream.get(&actor.get_actor_id()).unwrap(), actor diff --git a/src/meta/src/stream/test_scale.rs b/src/meta/src/stream/test_scale.rs index 9058491e36e3d..d3a42a3c2432c 100644 --- a/src/meta/src/stream/test_scale.rs +++ b/src/meta/src/stream/test_scale.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,19 +19,12 @@ mod tests { use itertools::Itertools; use maplit::btreeset; use risingwave_common::buffer::Bitmap; - use risingwave_common::hash::{ParallelUnitId, VirtualNode}; - use risingwave_common::util::compress::decompress_data; + use risingwave_common::hash::{ActorMapping, ParallelUnitId, ParallelUnitMapping, VirtualNode}; use risingwave_pb::common::ParallelUnit; - use risingwave_pb::stream_plan::{ActorMapping, StreamActor}; + use risingwave_pb::stream_plan::StreamActor; use crate::model::ActorId; - use crate::stream::mapping::{ - actor_mapping_from_bitmaps, build_vnode_mapping, vnode_mapping_to_bitmaps, - }; use crate::stream::scale::rebalance_actor_vnode; - use crate::stream::{ - actor_mapping_to_parallel_unit_mapping, parallel_unit_mapping_to_actor_mapping, - }; fn simulated_parallel_unit_nums(min: Option, max: Option) -> Vec { let mut raw = vec![1, 3, 12, 42, VirtualNode::COUNT]; @@ -49,7 +42,7 @@ mod tests { fn build_fake_actors(info: &[(ActorId, ParallelUnitId)]) -> Vec { let parallel_units = generate_parallel_units(info); - let vnode_bitmaps = vnode_mapping_to_bitmaps(build_vnode_mapping(¶llel_units)); + let vnode_bitmaps = ParallelUnitMapping::build(¶llel_units).to_bitmaps(); info.iter() .map(|(actor_id, parallel_unit_id)| StreamActor { @@ -111,13 +104,13 @@ mod tests { .map(|i| (i as ActorId, i as ParallelUnitId)) .collect_vec(); let parallel_units = generate_parallel_units(&info); - let vnode_mapping = build_vnode_mapping(¶llel_units); + let vnode_mapping = ParallelUnitMapping::build(¶llel_units); assert_eq!(vnode_mapping.len(), VirtualNode::COUNT); let mut check: HashMap> = HashMap::new(); - for (idx, parallel_unit_id) in vnode_mapping.into_iter().enumerate() { - check.entry(parallel_unit_id).or_default().push(idx); + for (vnode, parallel_unit_id) in vnode_mapping.iter_with_vnode() { + check.entry(parallel_unit_id).or_default().push(vnode); } assert_eq!(check.len(), parallel_units_num); @@ -140,7 +133,7 @@ mod tests { .map(|i| (i as ActorId, i as ParallelUnitId)) .collect_vec(); let parallel_units = generate_parallel_units(&info); - let bitmaps = vnode_mapping_to_bitmaps(build_vnode_mapping(¶llel_units)); + let bitmaps = ParallelUnitMapping::build(¶llel_units).to_bitmaps(); check_bitmaps(&bitmaps); } } @@ -153,23 +146,16 @@ mod tests { let actor_to_parallel_unit_map = (0..parallel_unit_num) .map(|i| (i as ActorId, i as ParallelUnitId)) .collect(); - let parallel_unit_mapping = actor_mapping_to_parallel_unit_mapping( - 1, - &actor_to_parallel_unit_map, - &actor_mapping, - ); + let parallel_unit_mapping = actor_mapping.to_parallel_unit(&actor_to_parallel_unit_map); let parallel_unit_to_actor_map: HashMap<_, _> = actor_to_parallel_unit_map .into_iter() .map(|(k, v)| (v, k)) .collect(); - let new_actor_mapping = parallel_unit_mapping_to_actor_mapping( - ¶llel_unit_mapping, - ¶llel_unit_to_actor_map, - ); + let new_actor_mapping = parallel_unit_mapping.to_actor(¶llel_unit_to_actor_map); - assert!(actor_mapping.eq(&new_actor_mapping)) + assert_eq!(actor_mapping, new_actor_mapping) } } @@ -191,7 +177,7 @@ mod tests { }) .collect(); - (actor_mapping_from_bitmaps(&bitmaps), bitmaps) + (ActorMapping::from_bitmaps(&bitmaps), bitmaps) } #[test] @@ -200,17 +186,10 @@ mod tests { let (actor_mapping, bitmaps) = generate_actor_mapping(parallel_unit_num); check_bitmaps(&bitmaps); - let ActorMapping { - original_indices, - data, - } = actor_mapping; - - let raw = decompress_data(&original_indices, &data); - for (actor_id, bitmap) in &bitmaps { - for (idx, value) in raw.iter().enumerate() { - if bitmap.is_set(idx) { - assert_eq!(*value, *actor_id); + for (vnode, value) in actor_mapping.iter_with_vnode() { + if bitmap.is_set(vnode.to_index()) { + assert_eq!(value, *actor_id); } } } diff --git a/src/object_store/Cargo.toml b/src/object_store/Cargo.toml index af843582f24dc..ae646f7cb6467 100644 --- a/src/object_store/Cargo.toml +++ b/src/object_store/Cargo.toml @@ -12,18 +12,18 @@ repository = { workspace = true } async-trait = "0.1" async_stack_trace = { path = "../utils/async_stack_trace" } aws-config = { workspace = true } -aws-endpoint = { workspace = true } -aws-sdk-s3 = { workspace = true } +aws-sdk-s3 = { version = "0.2.15", package = "madsim-aws-sdk-s3" } aws-smithy-http = { workspace = true } aws-smithy-types = { workspace = true } -aws-types = { workspace = true } bytes = { version = "1", features = ["serde"] } crc32fast = "1.3.2" fail = "0.5" futures = { version = "0.3", default-features = false, features = ["alloc"] } hyper = "0.14" itertools = "0.10" +opendal = "0.26.2" prometheus = { version = "0.13", features = ["process"] } +random-string = "1.0" risingwave_common = { path = "../common" } spin = "0.9" tempfile = "3" @@ -32,6 +32,16 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [ "fs", ] } tracing = "0.1" +# This crate is excluded from hakari (see hakari.toml) after hdfs is introduced... +# +# [target.'cfg(not(madsim))'.dependencies] +# workspace-hack = { path = "../workspace-hack" } +# +# [package.metadata.cargo-machete] +# ignored = ["workspace-hack"] +# +# [package.metadata.cargo-udeps.ignore] +# normal = ["workspace-hack"] -[target.'cfg(not(madsim))'.dependencies] -workspace-hack = { path = "../workspace-hack" } +# [features] +# hdfs-backend = ["opendal/services-hdfs"] diff --git a/src/object_store/src/lib.rs b/src/object_store/src/lib.rs index 33bb2d3beb3d0..3ecef33bf6cfe 100644 --- a/src/object_store/src/lib.rs +++ b/src/object_store/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/object_store/src/object/disk.rs b/src/object_store/src/object/disk.rs index 5651fd0ce4c8d..3cf4f3895babd 100644 --- a/src/object_store/src/object/disk.rs +++ b/src/object_store/src/object/disk.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/object_store/src/object/error.rs b/src/object_store/src/object/error.rs index c1735759e80f3..a7d5dced17fcb 100644 --- a/src/object_store/src/object/error.rs +++ b/src/object_store/src/object/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ use std::marker::{Send, Sync}; use risingwave_common::error::BoxedError; use thiserror::Error; +use tokio::sync::oneshot::error::RecvError; #[derive(Error, Debug)] enum ObjectErrorInner { @@ -31,6 +32,8 @@ enum ObjectErrorInner { inner: io::Error, }, + #[error(transparent)] + Opendal(BoxedError), #[error("Internal error: {0}")] Internal(String), } @@ -68,6 +71,10 @@ impl ObjectError { ObjectErrorInner::Disk { msg, inner: err }.into() } + pub fn opendal(err: impl Into) -> Self { + ObjectErrorInner::Opendal(err.into()).into() + } + pub fn s3(err: impl Into) -> Self { ObjectErrorInner::S3(err.into()).into() } @@ -87,5 +94,22 @@ impl From for ObjectError { ObjectErrorInner::S3(e.into()).into() } } +impl From for ObjectError { + fn from(e: opendal::Error) -> Self { + ObjectErrorInner::Opendal(e.into()).into() + } +} + +impl From for ObjectError { + fn from(e: io::Error) -> Self { + ObjectErrorInner::Opendal(e.into()).into() + } +} + +impl From for ObjectError { + fn from(e: RecvError) -> Self { + ObjectErrorInner::Internal(e.to_string()).into() + } +} pub type ObjectResult = std::result::Result; diff --git a/src/object_store/src/object/mem.rs b/src/object_store/src/object/mem.rs index a7816c563cf84..676c24b8f036a 100644 --- a/src/object_store/src/object/mem.rs +++ b/src/object_store/src/object/mem.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/object_store/src/object/mod.rs b/src/object_store/src/object/mod.rs index 19da680bb87c3..383494d14e747 100644 --- a/src/object_store/src/object/mod.rs +++ b/src/object_store/src/object/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,6 +21,9 @@ use tokio::io::{AsyncRead, AsyncReadExt}; pub mod mem; pub use mem::*; +pub mod opendal_engine; +pub use opendal_engine::*; + pub mod s3; use async_stack_trace::StackTrace; pub use s3::*; @@ -184,6 +187,7 @@ pub trait ObjectStore: Send + Sync { pub enum ObjectStoreImpl { InMem(MonitoredObjectStore), Disk(MonitoredObjectStore), + Opendal(MonitoredObjectStore), S3(MonitoredObjectStore), S3Compatible(MonitoredObjectStore), Hybrid { @@ -232,6 +236,10 @@ macro_rules! object_store_impl_method_body { assert!(path.is_remote(), "get local path in pure disk object store: {:?}", $path); $dispatch_macro!(disk, $method_name, path.as_str() $(, $args)*) }, + ObjectStoreImpl::Opendal(opendal) => { + assert!(path.is_remote(), "get local path in pure opendal object store engine: {:?}", $path); + $dispatch_macro!(opendal, $method_name, path.as_str() $(, $args)*) + }, ObjectStoreImpl::S3(s3) => { assert!(path.is_remote(), "get local path in pure s3 object store: {:?}", $path); $dispatch_macro!(s3, $method_name, path.as_str() $(, $args)*) @@ -248,6 +256,7 @@ macro_rules! object_store_impl_method_body { ObjectStorePath::Local(_) => match local.as_ref() { ObjectStoreImpl::InMem(in_mem) => $dispatch_macro!(in_mem, $method_name, path.as_str() $(, $args)*), ObjectStoreImpl::Disk(disk) => $dispatch_macro!(disk, $method_name, path.as_str() $(, $args)*), + ObjectStoreImpl::Opendal(_) => unreachable!("Opendal object store cannot be used as local object store"), ObjectStoreImpl::S3(_) => unreachable!("S3 cannot be used as local object store"), ObjectStoreImpl::S3Compatible(_) => unreachable!("S3 compatible cannot be used as local object store"), ObjectStoreImpl::Hybrid {..} => unreachable!("local object store of hybrid object store cannot be hybrid") @@ -255,6 +264,7 @@ macro_rules! object_store_impl_method_body { ObjectStorePath::Remote(_) => match remote.as_ref() { ObjectStoreImpl::InMem(in_mem) => $dispatch_macro!(in_mem, $method_name, path.as_str() $(, $args)*), ObjectStoreImpl::Disk(disk) => $dispatch_macro!(disk, $method_name, path.as_str() $(, $args)*), + ObjectStoreImpl::Opendal(opendal) => $dispatch_macro!(opendal, $method_name, path.as_str() $(, $args)*), ObjectStoreImpl::S3(s3) => $dispatch_macro!(s3, $method_name, path.as_str() $(, $args)*), ObjectStoreImpl::S3Compatible(s3_compatible) => $dispatch_macro!(s3_compatible, $method_name, path.as_str() $(, $args)*), ObjectStoreImpl::Hybrid {..} => unreachable!("remote object store of hybrid object store cannot be hybrid") @@ -286,6 +296,10 @@ macro_rules! object_store_impl_method_body_slice { assert!(paths_loc.is_empty(), "get local path in pure disk object store: {:?}", $paths); $dispatch_macro!(disk, $method_name, &paths_rem $(, $args)*) }, + ObjectStoreImpl::Opendal(opendal) => { + assert!(paths_loc.is_empty(), "get local path in pure opendal object store: {:?}", $paths); + $dispatch_macro!(opendal, $method_name, &paths_rem $(, $args)*) + }, ObjectStoreImpl::S3(s3) => { assert!(paths_loc.is_empty(), "get local path in pure s3 object store: {:?}", $paths); $dispatch_macro!(s3, $method_name, &paths_rem $(, $args)*) @@ -302,6 +316,7 @@ macro_rules! object_store_impl_method_body_slice { match local.as_ref() { ObjectStoreImpl::InMem(in_mem) => $dispatch_macro!(in_mem, $method_name, &paths_loc $(, $args)*), ObjectStoreImpl::Disk(disk) => $dispatch_macro!(disk, $method_name, &paths_loc $(, $args)*), + ObjectStoreImpl::Opendal(_) => unreachable!("Opendal object store cannot be used as local object store"), ObjectStoreImpl::S3(_) => unreachable!("S3 cannot be used as local object store"), ObjectStoreImpl::S3Compatible(_) => unreachable!("S3 cannot be used as local object store"), ObjectStoreImpl::Hybrid {..} => unreachable!("local object store of hybrid object store cannot be hybrid") @@ -311,6 +326,7 @@ macro_rules! object_store_impl_method_body_slice { match remote.as_ref() { ObjectStoreImpl::InMem(in_mem) => $dispatch_macro!(in_mem, $method_name, &paths_rem $(, $args)*), ObjectStoreImpl::Disk(disk) => $dispatch_macro!(disk, $method_name, &paths_rem $(, $args)*), + ObjectStoreImpl::Opendal(opendal) => $dispatch_macro!(opendal, $method_name, &paths_rem $(, $args)*), ObjectStoreImpl::S3(s3) => $dispatch_macro!(s3, $method_name, &paths_rem $(, $args)*), ObjectStoreImpl::S3Compatible(s3) => $dispatch_macro!(s3, $method_name, &paths_rem $(, $args)*), ObjectStoreImpl::Hybrid {..} => unreachable!("remote object store of hybrid object store cannot be hybrid") @@ -381,6 +397,7 @@ impl ObjectStoreImpl { match self { ObjectStoreImpl::InMem(store) => store.inner.get_object_prefix(obj_id), ObjectStoreImpl::Disk(store) => store.inner.get_object_prefix(obj_id), + ObjectStoreImpl::Opendal(store) => store.inner.get_object_prefix(obj_id), ObjectStoreImpl::S3(store) => store.inner.get_object_prefix(obj_id), ObjectStoreImpl::S3Compatible(store) => store.inner.get_object_prefix(obj_id), ObjectStoreImpl::Hybrid { local, remote } => { @@ -791,7 +808,6 @@ impl MonitoredObjectStore { pub async fn parse_remote_object_store( url: &str, metrics: Arc, - object_store_use_batch_delete: bool, ident: &str, ) -> ObjectStoreImpl { match url { @@ -803,6 +819,25 @@ pub async fn parse_remote_object_store( .await .monitored(metrics), ), + #[cfg(feature = "hdfs-backend")] + hdfs if hdfs.starts_with("hdfs://") => { + let hdfs = hdfs.strip_prefix("hdfs://").unwrap(); + let (namenode, root) = hdfs.split_once('@').unwrap(); + ObjectStoreImpl::Opendal( + OpendalObjectStore::new_hdfs_engine(namenode.to_string(), root.to_string()) + .unwrap() + .monitored(metrics), + ) + } + oss if oss.starts_with("oss://") => { + let oss = oss.strip_prefix("oss://").unwrap(); + let (bucket, root) = oss.split_once('@').unwrap(); + ObjectStoreImpl::Opendal( + OpendalObjectStore::new_oss_engine(bucket.to_string(), root.to_string()) + .unwrap() + .monitored(metrics), + ) + } s3_compatible if s3_compatible.starts_with("s3-compatible://") => { ObjectStoreImpl::S3Compatible( S3ObjectStore::new_s3_compatible( @@ -811,7 +846,6 @@ pub async fn parse_remote_object_store( .unwrap() .to_string(), metrics.clone(), - object_store_use_batch_delete, ) .await .monitored(metrics), @@ -860,6 +894,16 @@ pub fn parse_local_object_store(url: &str, metrics: Arc) -> tracing::warn!("You're using Hummock in-memory local object store. This should never be used in benchmarks and production environment."); ObjectStoreImpl::InMem(InMemObjectStore::new().monitored(metrics)) } + #[cfg(feature = "hdfs-backend")] + hdfs if hdfs.starts_with("hdfs://") => { + let hdfs = hdfs.strip_prefix("hdfs://").unwrap(); + let (namenode, root) = hdfs.split_once('@').unwrap(); + ObjectStoreImpl::Opendal( + OpendalObjectStore::new_hdfs_engine(namenode.to_string(), root.to_string()) + .unwrap() + .monitored(metrics), + ) + } other => { unimplemented!( "{} Hummock only supports s3, minio, disk, and memory for now.", diff --git a/src/object_store/src/object/object_metrics.rs b/src/object_store/src/object/object_metrics.rs index 07bac8df31f5e..9379c3300fc70 100644 --- a/src/object_store/src/object/object_metrics.rs +++ b/src/object_store/src/object/object_metrics.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/object_store/src/object/opendal_engine/hdfs.rs b/src/object_store/src/object/opendal_engine/hdfs.rs new file mode 100644 index 0000000000000..6f3fda4095cdf --- /dev/null +++ b/src/object_store/src/object/opendal_engine/hdfs.rs @@ -0,0 +1,37 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use opendal::services::Hdfs; +use opendal::Operator; + +use super::{EngineType, OpendalObjectStore}; +use crate::object::ObjectResult; +impl OpendalObjectStore { + /// create opendal hdfs engine. + pub fn new_hdfs_engine(namenode: String, root: String) -> ObjectResult { + // Create hdfs backend builder. + let mut builder = Hdfs::default(); + // Set the name node for hdfs. + builder.name_node(&namenode); + // Set the root for hdfs, all operations will happen under this root. + // NOTE: the root must be absolute path. + builder.root(&root); + + let op: Operator = Operator::create(builder)?.finish(); + Ok(Self { + op, + engine_type: EngineType::Hdfs, + }) + } +} diff --git a/src/object_store/src/object/opendal_engine/mod.rs b/src/object_store/src/object/opendal_engine/mod.rs new file mode 100644 index 0000000000000..15c7b6b6f3ca5 --- /dev/null +++ b/src/object_store/src/object/opendal_engine/mod.rs @@ -0,0 +1,24 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod opendal_object_store; +pub use opendal_object_store::*; + +#[cfg(feature = "hdfs-backend")] +pub mod hdfs; +#[cfg(feature = "hdfs-backend")] +pub use hdfs::*; + +pub mod oss; +pub use oss::*; diff --git a/src/object_store/src/object/opendal_engine/opendal_object_store.rs b/src/object_store/src/object/opendal_engine/opendal_object_store.rs new file mode 100644 index 0000000000000..d3001e1200d34 --- /dev/null +++ b/src/object_store/src/object/opendal_engine/opendal_object_store.rs @@ -0,0 +1,371 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use bytes::{BufMut, Bytes, BytesMut}; +use fail::fail_point; +use futures::future::try_join_all; +use futures::StreamExt; +use itertools::Itertools; +use opendal::services::Memory; +use opendal::Operator; +use tokio::io::AsyncRead; + +use crate::object::{ + BlockLocation, BoxedStreamingUploader, ObjectError, ObjectMetadata, ObjectResult, ObjectStore, + StreamingUploader, +}; + +/// Opendal object storage. +#[derive(Clone)] +pub struct OpendalObjectStore { + pub(crate) op: Operator, + pub(crate) engine_type: EngineType, +} +#[derive(Clone)] +pub enum EngineType { + Memory, + Hdfs, + Oss, +} + +impl OpendalObjectStore { + /// create opendal memory engine, used for unit tests. + pub fn new_memory_engine() -> ObjectResult { + // Create memory backend builder. + let builder = Memory::default(); + + let op: Operator = Operator::create(builder)?.finish(); + Ok(Self { + op, + engine_type: EngineType::Memory, + }) + } +} + +#[async_trait::async_trait] +impl ObjectStore for OpendalObjectStore { + fn get_object_prefix(&self, _obj_id: u64) -> String { + String::default() + } + + async fn upload(&self, path: &str, obj: Bytes) -> ObjectResult<()> { + if obj.is_empty() { + Err(ObjectError::internal("upload empty object")) + } else { + self.op.object(path).write(obj).await?; + Ok(()) + } + } + + fn streaming_upload(&self, path: &str) -> ObjectResult { + Ok(Box::new(OpenDalStreamingUploader::new( + self.op.clone(), + path.to_string(), + ))) + } + + async fn read(&self, path: &str, block: Option) -> ObjectResult { + match block { + Some(block) => { + let range = block.offset as u64..(block.offset + block.size) as u64; + let res = Bytes::from(self.op.object(path).range_read(range).await?); + + if block.size != res.len() { + Err(ObjectError::internal("bad block offset and size")) + } else { + Ok(res) + } + } + None => Ok(Bytes::from(self.op.object(path).read().await?)), + } + } + + async fn readv(&self, path: &str, block_locs: &[BlockLocation]) -> ObjectResult> { + let futures = block_locs + .iter() + .map(|block_loc| self.read(path, Some(*block_loc))) + .collect_vec(); + try_join_all(futures).await + } + + /// Returns a stream reading the object specified in `path`. If given, the stream starts at the + /// byte with index `start_pos` (0-based). As far as possible, the stream only loads the amount + /// of data into memory that is read from the stream. + async fn streaming_read( + &self, + path: &str, + start_pos: Option, + ) -> ObjectResult> { + fail_point!("opendal_streaming_read_err", |_| Err( + ObjectError::internal("opendal streaming read error") + )); + + let reader = match start_pos { + Some(start_position) => { + self.op + .object(path) + .range_reader(start_position as u64..) + .await? + } + None => self.op.object(path).reader().await?, + }; + + Ok(Box::new(reader)) + } + + async fn metadata(&self, path: &str) -> ObjectResult { + let opendal_metadata = self.op.object(path).metadata().await?; + let key = path.to_string(); + let last_modified = match opendal_metadata.last_modified() { + Some(t) => t.unix_timestamp() as f64, + None => 0_f64, + }; + + let total_size = opendal_metadata.content_length() as usize; + let metadata = ObjectMetadata { + key, + last_modified, + total_size, + }; + Ok(metadata) + } + + async fn delete(&self, path: &str) -> ObjectResult<()> { + self.op.object(path).delete().await?; + Ok(()) + } + + /// Deletes the objects with the given paths permanently from the storage. If an object + /// specified in the request is not found, it will be considered as successfully deleted. + async fn delete_objects(&self, paths: &[String]) -> ObjectResult<()> { + for path in paths { + self.op.object(path).delete().await?; + } + Ok(()) + } + + async fn list(&self, prefix: &str) -> ObjectResult> { + let mut object_lister = self.op.object(prefix).list().await?; + let mut metadata_list = vec![]; + while let Some(obj) = object_lister.next().await { + let object = obj?; + let key = prefix.to_string(); + let om = object.metadata().await?; + + let last_modified = match om.last_modified() { + Some(t) => t.unix_timestamp() as f64, + None => 0_f64, + }; + + let total_size = om.content_length() as usize; + let metadata = ObjectMetadata { + key, + last_modified, + total_size, + }; + metadata_list.push(metadata); + } + Ok(metadata_list) + } + + fn store_media_type(&self) -> &'static str { + match self.engine_type { + EngineType::Memory => "Memory", + EngineType::Hdfs => "Hdfs", + EngineType::Oss => "Oss", + } + } +} + +/// Store multiple parts in a map, and concatenate them on finish. +pub struct OpenDalStreamingUploader { + op: Operator, + path: String, + buffer: BytesMut, +} +impl OpenDalStreamingUploader { + pub fn new(op: Operator, path: String) -> Self { + Self { + op, + path, + buffer: BytesMut::new(), + } + } +} +#[async_trait::async_trait] +impl StreamingUploader for OpenDalStreamingUploader { + async fn write_bytes(&mut self, data: Bytes) -> ObjectResult<()> { + self.buffer.put(data); + Ok(()) + } + + async fn finish(mut self: Box) -> ObjectResult<()> { + self.op.object(&self.path).write(self.buffer).await?; + + Ok(()) + } + + fn get_memory_usage(&self) -> u64 { + self.buffer.capacity() as u64 + } +} + +#[cfg(test)] +mod tests { + use bytes::Bytes; + + use super::*; + + fn gen_test_payload() -> Vec { + let mut ret = Vec::new(); + for i in 0..100000 { + ret.extend(format!("{:05}", i).as_bytes()); + } + ret + } + #[tokio::test] + async fn test_memory_upload() { + let block = Bytes::from("123456"); + let store = OpendalObjectStore::new_memory_engine().unwrap(); + store.upload("/abc", block).await.unwrap(); + + // No such object. + store + .read("/ab", Some(BlockLocation { offset: 0, size: 3 })) + .await + .unwrap_err(); + + let bytes = store + .read("/abc", Some(BlockLocation { offset: 4, size: 2 })) + .await + .unwrap(); + assert_eq!(String::from_utf8(bytes.to_vec()).unwrap(), "56".to_string()); + + // Overflow. + store + .read( + "/abc", + Some(BlockLocation { + offset: 4, + size: 40, + }), + ) + .await + .unwrap_err(); + + store.delete("/abc").await.unwrap(); + + // No such object. + store + .read("/abc", Some(BlockLocation { offset: 0, size: 3 })) + .await + .unwrap_err(); + } + + #[tokio::test] + async fn test_memory_metadata() { + let block = Bytes::from("123456"); + let path = "/abc".to_string(); + let obj_store = OpendalObjectStore::new_memory_engine().unwrap(); + obj_store.upload("/abc", block).await.unwrap(); + + let metadata = obj_store.metadata("/abc").await.unwrap(); + assert_eq!(metadata.total_size, 6); + obj_store.delete(&path).await.unwrap(); + } + + // Currently OpenDAL does not support delete objects operation, but they are planning this + // feature. So let's not delete this unit test now. https://github.com/datafuselabs/opendal/issues/1279 + + // #[tokio::test] + // async fn test_memory_delete_objects() { + // let block1 = Bytes::from("123456"); + // let block2 = Bytes::from("987654"); + // let store = OpendalObjectStore::new_memory_engine().unwrap(); + // store.upload("/abc", block1).await.unwrap(); + // store.upload("/klm", block2).await.unwrap(); + + // assert_eq!(store.list("").await.unwrap().len(), 2); + + // let str_list = [ + // String::from("/abc"), + // String::from("/klm"), + // String::from("/xyz"), + // ]; + + // store.delete_objects(&str_list).await.unwrap(); + + // assert_eq!(store.list("").await.unwrap().len(), 0); + // } + + #[tokio::test] + async fn test_memory_read_multi_block() { + let store = OpendalObjectStore::new_memory_engine().unwrap(); + let payload = gen_test_payload(); + store + .upload("test.obj", Bytes::from(payload.clone())) + .await + .unwrap(); + let metadata = store.metadata("test.obj").await.unwrap(); + assert_eq!(payload.len(), metadata.total_size); + let test_loc = vec![(0, 1000), (10000, 1000), (20000, 1000)]; + let read_data = store + .readv( + "test.obj", + &test_loc + .iter() + .map(|(offset, size)| BlockLocation { + offset: *offset, + size: *size, + }) + .collect_vec(), + ) + .await + .unwrap(); + assert_eq!(test_loc.len(), read_data.len()); + for (i, (offset, size)) in test_loc.iter().enumerate() { + assert_eq!(&payload[*offset..(*offset + *size)], &read_data[i][..]); + } + store.delete("test.obj").await.unwrap(); + } + + #[tokio::test] + async fn test_memory_streaming_upload() { + let blocks = vec![Bytes::from("123"), Bytes::from("456"), Bytes::from("789")]; + let obj = Bytes::from("123456789"); + + let store = OpendalObjectStore::new_memory_engine().unwrap(); + let mut uploader = store.streaming_upload("/temp").unwrap(); + + for block in blocks { + uploader.write_bytes(block).await.unwrap(); + } + uploader.finish().await.unwrap(); + + // Read whole object. + let read_obj = store.read("/temp", None).await.unwrap(); + assert!(read_obj.eq(&obj)); + + // Read part of the object. + let read_obj = store + .read("/temp", Some(BlockLocation { offset: 4, size: 2 })) + .await + .unwrap(); + assert_eq!( + String::from_utf8(read_obj.to_vec()).unwrap(), + "56".to_string() + ); + store.delete("/temp").await.unwrap(); + } +} diff --git a/src/object_store/src/object/opendal_engine/oss.rs b/src/object_store/src/object/opendal_engine/oss.rs new file mode 100644 index 0000000000000..567a1051dd232 --- /dev/null +++ b/src/object_store/src/object/opendal_engine/oss.rs @@ -0,0 +1,47 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use opendal::services::Oss; +use opendal::Operator; + +use super::{EngineType, OpendalObjectStore}; +use crate::object::ObjectResult; +impl OpendalObjectStore { + /// create opendal oss engine. + pub fn new_oss_engine(bucket: String, root: String) -> ObjectResult { + // Create oss backend builder. + let mut builder = Oss::default(); + + builder.bucket(&bucket); + + builder.root(&root); + + let endpoint = std::env::var("OSS_ENDPOINT") + .unwrap_or_else(|_| panic!("OSS_ENDPOINT not found from environment variables")); + let access_key_id = std::env::var("OSS_ACCESS_KEY_ID") + .unwrap_or_else(|_| panic!("OSS_ACCESS_KEY_ID not found from environment variables")); + let access_key_secret = std::env::var("OSS_ACCESS_KEY_SECRET").unwrap_or_else(|_| { + panic!("OSS_ACCESS_KEY_SECRET not found from environment variables") + }); + + builder.endpoint(&endpoint); + builder.access_key_id(&access_key_id); + builder.access_key_secret(&access_key_secret); + let op: Operator = Operator::create(builder)?.finish(); + Ok(Self { + op, + engine_type: EngineType::Oss, + }) + } +} diff --git a/src/object_store/src/object/s3.rs b/src/object_store/src/object/s3.rs index 81114f21fc04c..9f11db934ccc3 100644 --- a/src/object_store/src/object/s3.rs +++ b/src/object_store/src/object/s3.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ use futures::future::try_join_all; use futures::stream; use hyper::Body; use itertools::Itertools; +use random_string::generate; use tokio::io::AsyncRead; use tokio::task::JoinHandle; @@ -523,9 +524,8 @@ impl S3ObjectStore { .load() .await; let client = Client::new(&sdk_config); - Self::configure_bucket_lifecycle(&client, &bucket) - .await - .unwrap(); + Self::configure_bucket_lifecycle(&client, &bucket).await; + Self { client, bucket, @@ -535,11 +535,7 @@ impl S3ObjectStore { } } - pub async fn new_s3_compatible( - bucket: String, - metrics: Arc, - object_store_use_batch_delete: bool, - ) -> Self { + pub async fn new_s3_compatible(bucket: String, metrics: Arc) -> Self { // Retry 3 times if we get server-side errors or throttling errors // load from env let region = std::env::var("S3_COMPATIBLE_REGION").unwrap_or_else(|_| { @@ -568,9 +564,32 @@ impl S3ObjectStore { .await; let client = Client::new(&sdk_config); - Self::configure_bucket_lifecycle(&client, bucket.as_str()) + Self::configure_bucket_lifecycle(&client, bucket.as_str()).await; + + // check whether use batch delete + let charset = "1234567890"; + let test_path = "risingwave_check_batch_delete/".to_string() + &generate(10, charset); + client + .put_object() + .bucket(&bucket) + .body(aws_sdk_s3::types::ByteStream::from(Bytes::from( + "test batch delete", + ))) + .key(&test_path) + .send() .await .unwrap(); + let obj_ids = vec![ObjectIdentifier::builder().key(&test_path).build()]; + + let delete_builder = Delete::builder().set_objects(Some(obj_ids)); + let object_store_use_batch_delete = client + .delete_objects() + .bucket(&bucket) + .delete(delete_builder.build()) + .send() + .await + .is_ok(); + Self { client, bucket: bucket.to_string(), @@ -587,8 +606,13 @@ impl S3ObjectStore { let (secret_access_key, rest) = rest.split_once('@').unwrap(); let (address, bucket) = rest.split_once('/').unwrap(); - let loader = aws_config::ConfigLoader::default(); - let builder = aws_sdk_s3::config::Builder::from(&loader.load().await) + #[cfg(madsim)] + let builder = aws_sdk_s3::config::Builder::new(); + #[cfg(not(madsim))] + let builder = + aws_sdk_s3::config::Builder::from(&aws_config::ConfigLoader::default().load().await); + + let config = builder .region(Region::new("custom")) .endpoint_resolver(Endpoint::immutable( format!("http://{}", address).try_into().unwrap(), @@ -597,8 +621,8 @@ impl S3ObjectStore { access_key_id, secret_access_key, None, - )); - let config = builder.build(); + )) + .build(); let client = Client::from_conf(config); Self { @@ -660,7 +684,7 @@ impl S3ObjectStore { /// - /// - MinIO /// - - async fn configure_bucket_lifecycle(client: &Client, bucket: &str) -> ObjectResult<()> { + async fn configure_bucket_lifecycle(client: &Client, bucket: &str) { // Check if lifecycle is already configured to avoid overriding existing configuration. let mut configured_rules = vec![]; let get_config_result = client @@ -698,19 +722,23 @@ impl S3ObjectStore { let bucket_lifecycle_config = BucketLifecycleConfiguration::builder() .rules(bucket_lifecycle_rule) .build(); - client + if client .put_bucket_lifecycle_configuration() .bucket(bucket) .lifecycle_configuration(bucket_lifecycle_config) .send() - .await?; - tracing::info!( - "S3 bucket {:?} is configured to automatically purge abandoned MultipartUploads after {} days", - bucket, - S3_INCOMPLETE_MULTIPART_UPLOAD_RETENTION_DAYS, - ); + .await + .is_ok() + { + tracing::info!( + "S3 bucket {:?} is configured to automatically purge abandoned MultipartUploads after {} days", + bucket, + S3_INCOMPLETE_MULTIPART_UPLOAD_RETENTION_DAYS, + ); + } else { + tracing::warn!("Failed to configure life cycle rule for S3 bucket: {:?}. It is recommended to configure it manually to avoid unnecessary storage cost.", bucket); + } } - Ok(()) } } diff --git a/src/prost/Cargo.toml b/src/prost/Cargo.toml index c77526fe3d0dc..1b1c98d5ca282 100644 --- a/src/prost/Cargo.toml +++ b/src/prost/Cargo.toml @@ -8,13 +8,11 @@ license = { workspace = true } repository = { workspace = true } [dependencies] -bytes = "1" pbjson = "0.5" prost = "0.11" prost-helpers = { path = "helpers" } -prost-types = "0.11" serde = { version = "1", features = ["derive"] } -tonic = { version = "0.2.11", package = "madsim-tonic" } +tonic = { version = "0.2.14", package = "madsim-tonic" } [target.'cfg(not(madsim))'.dependencies] workspace-hack = { path = "../workspace-hack" } @@ -22,3 +20,9 @@ workspace-hack = { path = "../workspace-hack" } [build-dependencies] pbjson-build = "0.5" tonic-build = { version = "0.2.11", package = "madsim-tonic-build" } + +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] diff --git a/src/prost/build.rs b/src/prost/build.rs index d16b385fbf1f9..c19688daea986 100644 --- a/src/prost/build.rs +++ b/src/prost/build.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ fn main() -> Result<(), Box> { "expr", "health", "hummock", - "leader", + "java_binding", "meta", "monitor_service", "plan_common", @@ -53,6 +53,7 @@ fn main() -> Result<(), Box> { tonic_build::configure() .file_descriptor_set_path(file_descriptor_set_path.as_path()) .compile_well_known_types(true) + .protoc_arg("--experimental_allow_proto3_optional") .type_attribute(".", "#[derive(prost_helpers::AnyPB)]") .out_dir(out_dir.as_path()) .compile(&protos, &[proto_dir.to_string()]) diff --git a/src/prost/helpers/Cargo.toml b/src/prost/helpers/Cargo.toml index 32392a34af23e..407e885742c7b 100644 --- a/src/prost/helpers/Cargo.toml +++ b/src/prost/helpers/Cargo.toml @@ -14,3 +14,9 @@ syn = "1" [target.'cfg(not(madsim))'.dependencies] workspace-hack = { path = "../../workspace-hack" } + +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] diff --git a/src/prost/helpers/src/generate.rs b/src/prost/helpers/src/generate.rs index b9edd4f6eff20..7fe949131ca6f 100644 --- a/src/prost/helpers/src/generate.rs +++ b/src/prost/helpers/src/generate.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/prost/helpers/src/lib.rs b/src/prost/helpers/src/lib.rs index e68131f96c1f5..99a95fdf88976 100644 --- a/src/prost/helpers/src/lib.rs +++ b/src/prost/helpers/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/prost/src/lib.rs b/src/prost/src/lib.rs index fd51501a7bbde..e8ad3c7f770d7 100644 --- a/src/prost/src/lib.rs +++ b/src/prost/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -75,12 +75,12 @@ pub mod monitor_service; #[cfg_attr(madsim, path = "sim/backup_service.rs")] pub mod backup_service; #[rustfmt::skip] +#[cfg_attr(madsim, path = "sim/java_binding.rs")] +pub mod java_binding; +#[rustfmt::skip] #[cfg_attr(madsim, path = "sim/health.rs")] pub mod health; #[rustfmt::skip] -#[cfg_attr(madsim, path = "sim/leader.rs")] -pub mod leader; -#[rustfmt::skip] #[path = "connector_service.serde.rs"] pub mod connector_service_serde; #[rustfmt::skip] @@ -137,6 +137,9 @@ pub mod monitor_service_serde; #[rustfmt::skip] #[path = "backup_service.serde.rs"] pub mod backup_service_serde; +#[rustfmt::skip] +#[path = "java_binding.serde.rs"] +pub mod java_binding_serde; #[derive(Clone, PartialEq, Eq, Debug)] pub struct ProstFieldNotFound(pub &'static str); diff --git a/src/risedevtool/Cargo.toml b/src/risedevtool/Cargo.toml index 54b0360324919..53bd018118619 100644 --- a/src/risedevtool/Cargo.toml +++ b/src/risedevtool/Cargo.toml @@ -7,9 +7,14 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = "1" -bytes = "1" chrono = { version = "0.4", default-features = false, features = ["clock", "std"] } clap = { version = "3", features = ["derive"] } console = "0.15" @@ -37,6 +42,5 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [ "signal", "fs" ] } -toml = "0.5.9" workspace-hack = { path = "../workspace-hack" } yaml-rust = "0.4" diff --git a/src/risedevtool/connector.toml b/src/risedevtool/connector.toml index 8ece68133534b..9b36f0c244e8b 100644 --- a/src/risedevtool/connector.toml +++ b/src/risedevtool/connector.toml @@ -1,9 +1,10 @@ extend = "common.toml" [env] -RW_CONNECTOR_VERSION = "0.0.2" +RW_CONNECTOR_VERSION = "0.1.16" RW_CONNECTOR_DOWNLOAD_PATH = "${PREFIX_TMP}/risingwave-connector-${RW_CONNECTOR_VERSION}.tar.gz" -RW_CONNECTOR_RELEASE = "risingwave-connector-${RW_CONNECTOR_VERSION}.jar" +RW_CONNECTOR_RELEASE = "risingwave-connector-${RW_CONNECTOR_VERSION}.tar.gz" +RW_CONNECTOR_BIN_PREFIX = "${PREFIX_BIN}/connector-node" RW_CONNECTOR_DOWNLOAD_URL = "https://github.com/risingwavelabs/risingwave-connector-release/raw/main/risingwave-connector-${RW_CONNECTOR_VERSION}.tar.gz" @@ -15,12 +16,19 @@ description = "Download RisingWave Connector" script = ''' #!/usr/bin/env bash set -e -if [ -f "${PREFIX_BIN}/risingwave-connector.jar" ]; then +if [ -f "${RW_CONNECTOR_BIN_PREFIX}/start-service.sh" ]; then exit 0 fi -echo "RisingWave Connector not found, download ${RW_CONNECTOR_RELEASE}" -curl -fL -o "${RW_CONNECTOR_DOWNLOAD_PATH}" "${RW_CONNECTOR_DOWNLOAD_URL}" -tar -xvzf "${RW_CONNECTOR_DOWNLOAD_PATH}" -C "${PREFIX_TMP}" -mv "${PREFIX_TMP}/${RW_CONNECTOR_RELEASE}" "${PREFIX_BIN}/risingwave-connector.jar" -rm ${RW_CONNECTOR_DOWNLOAD_PATH} + +if [ -f "${RW_CONNECTOR_DOWNLOAD_PATH}" ]; then + mkdir -p "${PREFIX_BIN}/connector-node" + tar xf "${RW_CONNECTOR_DOWNLOAD_PATH}" -C "${PREFIX_BIN}/connector-node" + rm "${RW_CONNECTOR_DOWNLOAD_PATH}" +else + echo "RisingWave Connector not found, download ${RW_CONNECTOR_RELEASE}" + curl -fL -o "${RW_CONNECTOR_DOWNLOAD_PATH}" "${RW_CONNECTOR_DOWNLOAD_URL}" + mkdir -p "${PREFIX_BIN}/connector-node" + tar xf "${RW_CONNECTOR_DOWNLOAD_PATH}" -C "${PREFIX_BIN}/connector-node" + rm "${RW_CONNECTOR_DOWNLOAD_PATH}" +fi ''' diff --git a/src/risedevtool/src/bin/risedev-compose.rs b/src/risedevtool/src/bin/risedev-compose.rs index 5d9a4bd09a637..fbce31d6bb376 100644 --- a/src/risedevtool/src/bin/risedev-compose.rs +++ b/src/risedevtool/src/bin/risedev-compose.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -203,6 +203,7 @@ fn main() -> Result<()> { ServiceConfig::ZooKeeper(_) => { return Err(anyhow!("not supported, please use redpanda instead")) } + ServiceConfig::OpenDal(_) => continue, ServiceConfig::AwsS3(_) => continue, ServiceConfig::RedPanda(c) => { if opts.deploy { diff --git a/src/risedevtool/src/bin/risedev-config.rs b/src/risedevtool/src/bin/risedev-config.rs index d9e5622c95f9b..7e157a21c4598 100644 --- a/src/risedevtool/src/bin/risedev-config.rs +++ b/src/risedevtool/src/bin/risedev-config.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ use std::fs::OpenOptions; use std::io::{BufRead, BufReader, BufWriter, Write}; -use anyhow::{anyhow, Context, Result}; +use anyhow::{Context, Result}; use clap::{ArgEnum, Parser, Subcommand}; use console::style; use dialoguer::MultiSelect; @@ -59,6 +59,7 @@ enum Commands { pub enum Components { #[clap(name = "minio")] Minio, + Hdfs, PrometheusAndGrafana, Etcd, Kafka, @@ -77,6 +78,7 @@ impl Components { pub fn title(&self) -> String { match self { Self::Minio => "[Component] Hummock: MinIO + MinIO-CLI", + Self::Hdfs => "[Component] Hummock: Hdfs Backend", Self::PrometheusAndGrafana => "[Component] Metrics: Prometheus + Grafana", Self::Etcd => "[Component] Etcd", Self::Kafka => "[Component] Kafka", @@ -97,6 +99,10 @@ impl Components { match self { Self::Minio => { " +Required by Hummock state store." + } + Self::Hdfs => { + " Required by Hummock state store." } Self::PrometheusAndGrafana => { @@ -169,6 +175,7 @@ Required if you want to create CDC source from external Databases. pub fn from_env(env: impl AsRef) -> Option { match env.as_ref() { "ENABLE_MINIO" => Some(Self::Minio), + "ENABLE_HDFS" => Some(Self::Hdfs), "ENABLE_PROMETHEUS_GRAFANA" => Some(Self::PrometheusAndGrafana), "ENABLE_ETCD" => Some(Self::Etcd), "ENABLE_KAFKA" => Some(Self::Kafka), @@ -188,6 +195,7 @@ Required if you want to create CDC source from external Databases. pub fn env(&self) -> String { match self { Self::Minio => "ENABLE_MINIO", + Self::Hdfs => "ENABLE_HDFS", Self::PrometheusAndGrafana => "ENABLE_PROMETHEUS_GRAFANA", Self::Etcd => "ENABLE_ETCD", Self::Kafka => "ENABLE_KAFKA", @@ -209,19 +217,8 @@ Required if you want to create CDC source from external Databases. } } -fn configure(chosen: &[Components]) -> Result> { +fn configure(chosen: &[Components]) -> Result>> { println!("=== Configure RiseDev ==="); - println!(); - println!("RiseDev includes several components. You can select the ones you need, so as to reduce build time."); - println!(); - println!( - "Use {} to navigate between up / down, use {} to go to next page,\nand use {} to select an item. Press {} to continue.", - style("arrow up / down").bold(), - style("arrow left / right").bold(), - style("space").bold(), - style("enter").bold() - ); - println!(); let all_components = all::().collect_vec(); @@ -229,8 +226,7 @@ fn configure(chosen: &[Components]) -> Result> { let items = all_components .iter() - .enumerate() - .map(|(idx, c)| { + .map(|c| { let title = c.title(); let desc = style( ("\n".to_string() + c.description().trim()) @@ -239,33 +235,32 @@ fn configure(chosen: &[Components]) -> Result> { ) .dim(); - let instruction = if (idx + 1) % ITEMS_PER_PAGE == 0 || idx == all_components.len() - 1 - { - format!( - "\n\n page {}/{}", - style(((idx + ITEMS_PER_PAGE - 1) / ITEMS_PER_PAGE).to_string()).bold(), - (all_components.len() + ITEMS_PER_PAGE - 1) / ITEMS_PER_PAGE, - ) - } else { - String::new() - }; - - (format!("{title}{desc}{instruction}",), chosen.contains(c)) + (format!("{title}{desc}",), chosen.contains(c)) }) .collect_vec(); - let chosen_indices: Vec = MultiSelect::new() + let Some(chosen_indices) = MultiSelect::new() + .with_prompt( + format!( + "RiseDev includes several components. You can select the ones you need, so as to reduce build time\n\n{}: navigate\n{}: confirm and save {}: quit without saving\n\nPick items with {}", + style("↑ / ↓ / ← / → ").reverse(), + style("Enter").reverse(), + style("Esc / q").reverse(), + style("Space").reverse(), + ) + ) .items_checked(&items) .max_length(ITEMS_PER_PAGE) - .interact_opt()? - .ok_or_else(|| anyhow!("no selection made"))?; + .interact_opt()? else { + return Ok(None); + }; let chosen = chosen_indices .into_iter() .map(|i| all_components[i]) .collect_vec(); - Ok(chosen) + Ok(Some(chosen)) } fn main() -> Result<()> { @@ -323,7 +318,14 @@ fn main() -> Result<()> { Some(Commands::Disable { component }) => { chosen.into_iter().filter(|x| x != component).collect() } - None => configure(&chosen)?, + None => match configure(&chosen)? { + Some(chosen) => chosen, + None => { + println!("Quit without saving"); + println!("========================="); + return Ok(()); + } + }, }; println!("=== Enabled Components ==="); diff --git a/src/risedevtool/src/bin/risedev-dev.rs b/src/risedevtool/src/bin/risedev-dev.rs index d578f34dd8dd6..dfa8ea5d5be33 100644 --- a/src/risedevtool/src/bin/risedev-dev.rs +++ b/src/risedevtool/src/bin/risedev-dev.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ use std::path::Path; use std::sync::Arc; use std::time::{Duration, Instant}; -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context, Result}; use console::style; use indicatif::ProgressBar; use risedev::util::{complete_spin, fail_spin}; @@ -27,8 +27,8 @@ use risedev::{ compute_risectl_env, preflight_check, AwsS3Config, CompactorService, ComputeNodeService, ConfigExpander, ConfigureTmuxTask, ConnectorNodeService, EnsureStopService, ExecuteContext, FrontendService, GrafanaService, JaegerService, KafkaService, MetaNodeService, MinioService, - PrometheusService, PubsubService, RedisService, ServiceConfig, Task, ZooKeeperService, - RISEDEV_SESSION_NAME, + OpendalConfig, PrometheusService, PubsubService, RedisService, ServiceConfig, Task, + ZooKeeperService, RISEDEV_SESSION_NAME, }; use tempfile::tempdir; use yaml_rust::YamlEmitter; @@ -112,6 +112,7 @@ fn task_main( ServiceConfig::Redis(c) => Some((c.port, c.id.clone())), ServiceConfig::ZooKeeper(c) => Some((c.port, c.id.clone())), ServiceConfig::AwsS3(_) => None, + ServiceConfig::OpenDal(_) => None, ServiceConfig::RedPanda(_) => None, ServiceConfig::ConnectorNode(c) => Some((c.port, c.id.clone())), }; @@ -276,6 +277,29 @@ fn task_main( ctx.pb .set_message(format!("using AWS s3 bucket {}", c.bucket)); } + ServiceConfig::OpenDal(c) => { + let mut ctx = + ExecuteContext::new(&mut logger, manager.new_progress(), status_dir.clone()); + + struct OpendalService(OpendalConfig); + impl Task for OpendalService { + fn execute( + &mut self, + _ctx: &mut ExecuteContext, + ) -> anyhow::Result<()> { + Ok(()) + } + + fn id(&self) -> String { + self.0.id.clone() + } + } + + ctx.service(&OpendalService(c.clone())); + ctx.complete_spin(); + ctx.pb + .set_message(format!("using Opendal, namenode = {}", c.namenode)); + } ServiceConfig::ZooKeeper(c) => { let mut ctx = ExecuteContext::new(&mut logger, manager.new_progress(), status_dir.clone()); @@ -352,7 +376,7 @@ fn main() -> Result<()> { if let Some(config_path) = &config_path { let target = Path::new(&env::var("PREFIX_CONFIG")?).join("risingwave.toml"); - std::fs::copy(config_path, target)?; + std::fs::copy(config_path, target).context("config file not found")?; } { diff --git a/src/risedevtool/src/bin/risedev-docslt.rs b/src/risedevtool/src/bin/risedev-docslt.rs index 8ee605e29bf5e..dcc7f6f273b83 100644 --- a/src/risedevtool/src/bin/risedev-docslt.rs +++ b/src/risedevtool/src/bin/risedev-docslt.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/compose.rs b/src/risedevtool/src/compose.rs index 94a01f0b43fda..73271c9c34718 100644 --- a/src/risedevtool/src/compose.rs +++ b/src/risedevtool/src/compose.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/compose_deploy.rs b/src/risedevtool/src/compose_deploy.rs index 987bc216e4c32..b89b538de55ac 100644 --- a/src/risedevtool/src/compose_deploy.rs +++ b/src/risedevtool/src/compose_deploy.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/config.rs b/src/risedevtool/src/config.rs index 87adfdc588541..cb29dc9bb804f 100644 --- a/src/risedevtool/src/config.rs +++ b/src/risedevtool/src/config.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -166,6 +166,7 @@ impl ConfigExpander { "prometheus" => ServiceConfig::Prometheus(serde_yaml::from_str(&out_str)?), "grafana" => ServiceConfig::Grafana(serde_yaml::from_str(&out_str)?), "jaeger" => ServiceConfig::Jaeger(serde_yaml::from_str(&out_str)?), + "opendal" => ServiceConfig::OpenDal(serde_yaml::from_str(&out_str)?), "aws-s3" => ServiceConfig::AwsS3(serde_yaml::from_str(&out_str)?), "kafka" => ServiceConfig::Kafka(serde_yaml::from_str(&out_str)?), "pubsub" => ServiceConfig::Pubsub(serde_yaml::from_str(&out_str)?), diff --git a/src/risedevtool/src/config/dollar_expander.rs b/src/risedevtool/src/config/dollar_expander.rs index b7fba7593e442..ece06be15ac40 100644 --- a/src/risedevtool/src/config/dollar_expander.rs +++ b/src/risedevtool/src/config/dollar_expander.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/config/id_expander.rs b/src/risedevtool/src/config/id_expander.rs index cb32b0b1aec2c..968c76b17789b 100644 --- a/src/risedevtool/src/config/id_expander.rs +++ b/src/risedevtool/src/config/id_expander.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/config/provide_expander.rs b/src/risedevtool/src/config/provide_expander.rs index 661dad0aeb848..9948c81b0336c 100644 --- a/src/risedevtool/src/config/provide_expander.rs +++ b/src/risedevtool/src/config/provide_expander.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/config/use_expander.rs b/src/risedevtool/src/config/use_expander.rs index c724af29d3634..bdf201eabaea1 100644 --- a/src/risedevtool/src/config/use_expander.rs +++ b/src/risedevtool/src/config/use_expander.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -41,7 +41,7 @@ impl UseExpander { Ok(Self { template }) } - /// Overwrites values in `default` with values from `provided`. + /// Overrides values in `default` with values from `provided`. fn merge(use_id: &str, default: &yaml::Hash, provided: &yaml::Hash) -> yaml::Hash { let mut result = yaml::Hash::new(); // put `use` as the first element to make the generated yaml more readable. diff --git a/src/risedevtool/src/config_gen.rs b/src/risedevtool/src/config_gen.rs index 0b3a2dc913c3d..b3df9f79acf72 100644 --- a/src/risedevtool/src/config_gen.rs +++ b/src/risedevtool/src/config_gen.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/config_gen/grafana_gen.rs b/src/risedevtool/src/config_gen/grafana_gen.rs index 34f5eaad4a393..7dfd740d5fbd2 100644 --- a/src/risedevtool/src/config_gen/grafana_gen.rs +++ b/src/risedevtool/src/config_gen/grafana_gen.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/config_gen/kafka_gen.rs b/src/risedevtool/src/config_gen/kafka_gen.rs index b5b4c98d49b3d..5755442a56141 100644 --- a/src/risedevtool/src/config_gen/kafka_gen.rs +++ b/src/risedevtool/src/config_gen/kafka_gen.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/config_gen/prometheus_gen.rs b/src/risedevtool/src/config_gen/prometheus_gen.rs index ec25f592bfd6d..6172ffcf1accb 100644 --- a/src/risedevtool/src/config_gen/prometheus_gen.rs +++ b/src/risedevtool/src/config_gen/prometheus_gen.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/config_gen/zookeeper_gen.rs b/src/risedevtool/src/config_gen/zookeeper_gen.rs index 550709cda8250..89b695ffd90de 100644 --- a/src/risedevtool/src/config_gen/zookeeper_gen.rs +++ b/src/risedevtool/src/config_gen/zookeeper_gen.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/lib.rs b/src/risedevtool/src/lib.rs index 6a43bc52ecf5e..c4fdb65647f0a 100644 --- a/src/risedevtool/src/lib.rs +++ b/src/risedevtool/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/preflight_check.rs b/src/risedevtool/src/preflight_check.rs index 30fd45519eb6b..47fb8235495fc 100644 --- a/src/risedevtool/src/preflight_check.rs +++ b/src/risedevtool/src/preflight_check.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/risectl_env.rs b/src/risedevtool/src/risectl_env.rs index 1736241cd2ec3..a65b3b23acfcb 100644 --- a/src/risedevtool/src/risectl_env.rs +++ b/src/risedevtool/src/risectl_env.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -31,6 +31,7 @@ pub fn compute_risectl_env(services: &Vec) -> Result { let mut cmd = Command::new("compute-node"); if add_storage_backend( "risectl", + c.provide_opendal.as_ref().unwrap(), c.provide_minio.as_ref().unwrap(), c.provide_aws_s3.as_ref().unwrap(), HummockInMemoryStrategy::Disallowed, diff --git a/src/risedevtool/src/service_config.rs b/src/risedevtool/src/service_config.rs index e8a77b5f644a1..4c3c613e487eb 100644 --- a/src/risedevtool/src/service_config.rs +++ b/src/risedevtool/src/service_config.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -33,6 +33,7 @@ pub struct ComputeNodeConfig { pub provide_minio: Option>, pub provide_meta_node: Option>, pub provide_compute_node: Option>, + pub provide_opendal: Option>, pub provide_aws_s3: Option>, pub provide_jaeger: Option>, pub provide_compactor: Option>, @@ -100,7 +101,9 @@ pub struct CompactorConfig { pub exporter_port: u16, pub provide_minio: Option>, + pub provide_opendal: Option>, pub provide_aws_s3: Option>, + pub provide_meta_node: Option>, pub user_managed: bool, pub max_concurrent_task_number: u64, @@ -216,6 +219,20 @@ pub struct AwsS3Config { pub s3_compatible: bool, } +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +#[serde(deny_unknown_fields)] +pub struct OpendalConfig { + #[serde(rename = "use")] + phantom_use: Option, + + pub id: String, + pub engine: String, + pub namenode: String, + pub bucket: String, + pub root: String, +} + #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] #[serde(deny_unknown_fields)] @@ -312,6 +329,7 @@ pub enum ServiceConfig { Prometheus(PrometheusConfig), Grafana(GrafanaConfig), Jaeger(JaegerConfig), + OpenDal(OpendalConfig), AwsS3(AwsS3Config), Kafka(KafkaConfig), Pubsub(PubsubConfig), @@ -340,6 +358,7 @@ impl ServiceConfig { Self::Redis(c) => &c.id, Self::RedPanda(c) => &c.id, Self::ConnectorNode(c) => &c.id, + Self::OpenDal(c) => &c.id, } } } diff --git a/src/risedevtool/src/task.rs b/src/risedevtool/src/task.rs index a4315a6ecbd5d..1c0a1f95a4331 100644 --- a/src/risedevtool/src/task.rs +++ b/src/risedevtool/src/task.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/task/compactor_service.rs b/src/risedevtool/src/task/compactor_service.rs index 208e98cea1a8c..ef4d261bc2beb 100644 --- a/src/risedevtool/src/task/compactor_service.rs +++ b/src/risedevtool/src/task/compactor_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -58,14 +58,14 @@ impl CompactorService { "compactor cannot use in-memory hummock if remote object store is not provided" )); } - cmd.arg("--host") + cmd.arg("--listen-addr") .arg(format!("{}:{}", config.listen_address, config.port)) .arg("--prometheus-listener-addr") .arg(format!( "{}:{}", config.listen_address, config.exporter_port )) - .arg("--client-address") + .arg("--advertise-addr") .arg(format!("{}:{}", config.address, config.port)) .arg("--metrics-level") .arg("1") @@ -80,8 +80,10 @@ impl CompactorService { let provide_minio = config.provide_minio.as_ref().unwrap(); let provide_aws_s3 = config.provide_aws_s3.as_ref().unwrap(); + let provide_opendal = config.provide_opendal.as_ref().unwrap(); add_storage_backend( &config.id, + provide_opendal, provide_minio, provide_aws_s3, hummock_in_memory_strategy, diff --git a/src/risedevtool/src/task/compute_node_service.rs b/src/risedevtool/src/task/compute_node_service.rs index 4284d4c088842..a6ac5a8e83465 100644 --- a/src/risedevtool/src/task/compute_node_service.rs +++ b/src/risedevtool/src/task/compute_node_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -47,14 +47,14 @@ impl ComputeNodeService { config: &ComputeNodeConfig, hummock_in_memory_strategy: HummockInMemoryStrategy, ) -> Result<()> { - cmd.arg("--host") + cmd.arg("--listen-addr") .arg(format!("{}:{}", config.listen_address, config.port)) .arg("--prometheus-listener-addr") .arg(format!( "{}:{}", config.listen_address, config.exporter_port )) - .arg("--client-address") + .arg("--advertise-addr") .arg(format!("{}:{}", config.address, config.port)) .arg("--metrics-level") .arg("1") @@ -82,25 +82,29 @@ impl ComputeNodeService { } let provide_minio = config.provide_minio.as_ref().unwrap(); + let provide_opendal = config.provide_opendal.as_ref().unwrap(); let provide_aws_s3 = config.provide_aws_s3.as_ref().unwrap(); + let provide_compute_node = config.provide_compute_node.as_ref().unwrap(); let is_shared_backend = match ( config.enable_in_memory_kv_state_backend, provide_minio.as_slice(), provide_aws_s3.as_slice(), + provide_opendal.as_slice(), ) { - (true, [], []) => { + (true, [], [], []) => { cmd.arg("--state-store").arg("in-memory"); false } - (true, _, _) => { + (true, _, _, _) => { return Err(anyhow!( "When `enable_in_memory_kv_state_backend` is enabled, no minio and aws-s3 should be provided.", )); } - (false, provide_minio, provide_aws_s3) => add_storage_backend( + (_, provide_minio, provide_aws_s3, provide_opendal) => add_storage_backend( &config.id, + provide_opendal, provide_minio, provide_aws_s3, hummock_in_memory_strategy, diff --git a/src/risedevtool/src/task/configure_tmux_service.rs b/src/risedevtool/src/task/configure_tmux_service.rs index 47419ffc7871b..5fc4bcbf4094f 100644 --- a/src/risedevtool/src/task/configure_tmux_service.rs +++ b/src/risedevtool/src/task/configure_tmux_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/task/connector_service.rs b/src/risedevtool/src/task/connector_service.rs index ce8f5066b96cf..9cc589012a44b 100644 --- a/src/risedevtool/src/task/connector_service.rs +++ b/src/risedevtool/src/task/connector_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -32,7 +32,9 @@ impl ConnectorNodeService { fn connector_path(&self) -> Result { let prefix_bin = env::var("PREFIX_BIN")?; - Ok(Path::new(&prefix_bin).join("risingwave-connector.jar")) + Ok(Path::new(&prefix_bin) + .join("connector-node") + .join("start-service.sh")) } } @@ -44,13 +46,8 @@ impl Task for ConnectorNodeService { if !path.exists() { return Err(anyhow!("RisingWave connector binary not found in {:?}\nDid you enable risingwave connector feature in `./risedev configure`?", path)); } - - let mut cmd = Command::new("java"); - // the main class can be removed in the next version of cdc source - cmd.arg("-jar") - .arg(path) - .arg("--port") - .arg(self.config.port.to_string()); + let mut cmd = Command::new("sh"); + cmd.arg(path).arg("-p").arg(self.config.port.to_string()); ctx.run_command(ctx.tmux_run(cmd)?)?; ctx.pb.set_message("started"); diff --git a/src/risedevtool/src/task/ensure_stop_service.rs b/src/risedevtool/src/task/ensure_stop_service.rs index 44074a11fb0de..037304d264935 100644 --- a/src/risedevtool/src/task/ensure_stop_service.rs +++ b/src/risedevtool/src/task/ensure_stop_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/task/etcd_service.rs b/src/risedevtool/src/task/etcd_service.rs index 195b1b9f7613c..d442a45beb542 100644 --- a/src/risedevtool/src/task/etcd_service.rs +++ b/src/risedevtool/src/task/etcd_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/task/frontend_service.rs b/src/risedevtool/src/task/frontend_service.rs index 06252f2a66a19..c3e54cb393344 100644 --- a/src/risedevtool/src/task/frontend_service.rs +++ b/src/risedevtool/src/task/frontend_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,8 +17,10 @@ use std::path::Path; use std::process::Command; use anyhow::{anyhow, Result}; +use itertools::Itertools; use super::{ExecuteContext, Task}; +use crate::util::{get_program_args, get_program_env_cmd, get_program_name}; use crate::FrontendConfig; pub struct FrontendService { @@ -42,9 +44,9 @@ impl FrontendService { /// Apply command args according to config pub fn apply_command_args(cmd: &mut Command, config: &FrontendConfig) -> Result<()> { - cmd.arg("--host") + cmd.arg("--listen-addr") .arg(format!("{}:{}", config.listen_address, config.port)) - .arg("--client-address") + .arg("--advertise-addr") .arg(format!("{}:{}", config.address, config.port)) .arg("--prometheus-listener-addr") .arg(format!( @@ -65,14 +67,12 @@ impl FrontendService { "Cannot configure node: no meta node found in this configuration." )); } else { - let meta_node = provide_meta_node.last().unwrap(); - cmd.arg("--meta-addr") - .arg(format!("http://{}:{}", meta_node.address, meta_node.port)); - if provide_meta_node.len() > 1 { - eprintln!("WARN: more than 1 meta node instance is detected, only using the last one for meta node."); - // According to some heruistics, the last etcd node seems always to be elected as - // leader. Therefore we ensure compute node can start by using the last one. - } + cmd.arg("--meta-addr").arg( + provide_meta_node + .iter() + .map(|meta_node| format!("http://{}:{}", meta_node.address, meta_node.port)) + .join(","), + ); } Ok(()) @@ -98,6 +98,13 @@ impl Task for FrontendService { ctx.pb.set_message("started"); } else { ctx.pb.set_message("user managed"); + writeln!( + &mut ctx.log, + "Please use the following parameters to start the frontend:\n{}\n{} {}\n\n", + get_program_env_cmd(&cmd), + get_program_name(&cmd), + get_program_args(&cmd) + )?; } Ok(()) diff --git a/src/risedevtool/src/task/grafana_service.rs b/src/risedevtool/src/task/grafana_service.rs index 5f393096171af..4e212300cb3c6 100644 --- a/src/risedevtool/src/task/grafana_service.rs +++ b/src/risedevtool/src/task/grafana_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/task/jaeger_service.rs b/src/risedevtool/src/task/jaeger_service.rs index 921465bd62b1c..d953dabb39be6 100644 --- a/src/risedevtool/src/task/jaeger_service.rs +++ b/src/risedevtool/src/task/jaeger_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/task/kafka_service.rs b/src/risedevtool/src/task/kafka_service.rs index 9b1369358d231..62cb30916bd74 100644 --- a/src/risedevtool/src/task/kafka_service.rs +++ b/src/risedevtool/src/task/kafka_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/task/meta_node_service.rs b/src/risedevtool/src/task/meta_node_service.rs index 87b56487ba62e..9bb6b3596ac65 100644 --- a/src/risedevtool/src/task/meta_node_service.rs +++ b/src/risedevtool/src/task/meta_node_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ use std::process::Command; use anyhow::{anyhow, Result}; use super::{ExecuteContext, Task}; +use crate::util::{get_program_args, get_program_env_cmd, get_program_name}; use crate::MetaNodeConfig; pub struct MetaNodeService { @@ -44,8 +45,8 @@ impl MetaNodeService { pub fn apply_command_args(cmd: &mut Command, config: &MetaNodeConfig) -> Result<()> { cmd.arg("--listen-addr") .arg(format!("{}:{}", config.listen_address, config.port)) - .arg("--host") - .arg(config.address.clone()) + .arg("--advertise-addr") + .arg(format!("{}:{}", config.address, config.port)) .arg("--dashboard-host") .arg(format!( "{}:{}", @@ -131,6 +132,13 @@ impl Task for MetaNodeService { ctx.pb.set_message("started"); } else { ctx.pb.set_message("user managed"); + writeln!( + &mut ctx.log, + "Please use the following parameters to start the meta:\n{}\n{} {}\n\n", + get_program_env_cmd(&cmd), + get_program_name(&cmd), + get_program_args(&cmd) + )?; } Ok(()) diff --git a/src/risedevtool/src/task/minio_service.rs b/src/risedevtool/src/task/minio_service.rs index d5a67d6bae095..f3d8c31a83774 100644 --- a/src/risedevtool/src/task/minio_service.rs +++ b/src/risedevtool/src/task/minio_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/task/prometheus_service.rs b/src/risedevtool/src/task/prometheus_service.rs index 218a2dc6e60e4..453cc137df74b 100644 --- a/src/risedevtool/src/task/prometheus_service.rs +++ b/src/risedevtool/src/task/prometheus_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/task/pubsub_service.rs b/src/risedevtool/src/task/pubsub_service.rs index 45d2b53671765..7708641ebdae6 100644 --- a/src/risedevtool/src/task/pubsub_service.rs +++ b/src/risedevtool/src/task/pubsub_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/task/redis_service.rs b/src/risedevtool/src/task/redis_service.rs index 2241963917535..cb52523c96968 100644 --- a/src/risedevtool/src/task/redis_service.rs +++ b/src/risedevtool/src/task/redis_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/task/task_configure_grpc_node.rs b/src/risedevtool/src/task/task_configure_grpc_node.rs index b17d19a809e18..7f587e6718770 100644 --- a/src/risedevtool/src/task/task_configure_grpc_node.rs +++ b/src/risedevtool/src/task/task_configure_grpc_node.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/task/task_configure_minio.rs b/src/risedevtool/src/task/task_configure_minio.rs index aa0f50c47afea..26bf855fcd41d 100644 --- a/src/risedevtool/src/task/task_configure_minio.rs +++ b/src/risedevtool/src/task/task_configure_minio.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/task/task_etcd_ready_check.rs b/src/risedevtool/src/task/task_etcd_ready_check.rs index 42c5c0ef32405..ffc62e3707022 100644 --- a/src/risedevtool/src/task/task_etcd_ready_check.rs +++ b/src/risedevtool/src/task/task_etcd_ready_check.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/task/task_kafka_ready_check.rs b/src/risedevtool/src/task/task_kafka_ready_check.rs index fe4a6638b5443..6362e063819e8 100644 --- a/src/risedevtool/src/task/task_kafka_ready_check.rs +++ b/src/risedevtool/src/task/task_kafka_ready_check.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/task/task_pubsub_emu_ready_check.rs b/src/risedevtool/src/task/task_pubsub_emu_ready_check.rs index 1dbe0e360a58a..fe2daabf58f37 100644 --- a/src/risedevtool/src/task/task_pubsub_emu_ready_check.rs +++ b/src/risedevtool/src/task/task_pubsub_emu_ready_check.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/task/task_redis_ready_check.rs b/src/risedevtool/src/task/task_redis_ready_check.rs index c3c35bc6b9e61..f922a0d80e479 100644 --- a/src/risedevtool/src/task/task_redis_ready_check.rs +++ b/src/risedevtool/src/task/task_redis_ready_check.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/task/utils.rs b/src/risedevtool/src/task/utils.rs index 5496659dc5763..2ea067cfa5988 100644 --- a/src/risedevtool/src/task/utils.rs +++ b/src/risedevtool/src/task/utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,8 +15,9 @@ use std::process::Command; use anyhow::{anyhow, Result}; +use itertools::Itertools; -use crate::{AwsS3Config, MetaNodeConfig, MinioConfig}; +use crate::{AwsS3Config, MetaNodeConfig, MinioConfig, OpendalConfig}; /// Add a meta node to the parameters. pub fn add_meta_node(provide_meta_node: &[MetaNodeConfig], cmd: &mut Command) -> Result<()> { @@ -27,16 +28,12 @@ pub fn add_meta_node(provide_meta_node: &[MetaNodeConfig], cmd: &mut Command) -> )); } meta_nodes => { - cmd.arg("--meta-address").arg(format!( - "http://{}:{}", - meta_nodes.last().unwrap().address, - meta_nodes.last().unwrap().port - )); - if meta_nodes.len() > 1 { - eprintln!("WARN: more than 1 meta node instance is detected, only using the last one for meta node."); - // According to some heruistics, the last etcd node seems always to be elected as - // leader. Therefore we ensure compute node can start by using the last one. - } + cmd.arg("--meta-address").arg( + meta_nodes + .iter() + .map(|meta_node| format!("http://{}:{}", meta_node.address, meta_node.port)) + .join(","), + ); } }; @@ -57,13 +54,14 @@ pub enum HummockInMemoryStrategy { /// Add a storage backend to the parameters. Returns whether this is a shared backend. pub fn add_storage_backend( id: &str, + provide_opendal: &[OpendalConfig], provide_minio: &[MinioConfig], provide_aws_s3: &[AwsS3Config], hummock_in_memory_strategy: HummockInMemoryStrategy, cmd: &mut Command, ) -> Result { - let is_shared_backend = match (provide_minio, provide_aws_s3) { - ([], []) => { + let is_shared_backend = match (provide_minio, provide_aws_s3, provide_opendal) { + ([], [], []) => { match hummock_in_memory_strategy { HummockInMemoryStrategy::Isolated => { cmd.arg("--state-store").arg("hummock+memory"); @@ -78,7 +76,7 @@ pub fn add_storage_backend( )), } } - ([minio], []) => { + ([minio], [], []) => { cmd.arg("--state-store").arg(format!( "hummock+minio://{hummock_user}:{hummock_password}@{minio_addr}:{minio_port}/{hummock_bucket}", hummock_user = minio.root_user, @@ -89,7 +87,7 @@ pub fn add_storage_backend( )); true } - ([], [aws_s3]) => { + ([], [aws_s3], []) => { // if s3-compatible is true, using some s3 compatible object store. match aws_s3.s3_compatible{ true => cmd.arg("--state-store") @@ -99,8 +97,23 @@ pub fn add_storage_backend( }; true } + ([], [], [opendal]) => { + if opendal.engine == "hdfs"{ + cmd.arg("--state-store") + .arg(format!("hummock+hdfs://{}@{}", opendal.namenode, opendal.root)); + true + } + else if opendal.engine == "oss"{ + cmd.arg("--state-store") + .arg(format!("hummock+oss://{}@{}", opendal.bucket, opendal.root)); + true + } + else{ + unimplemented!() + } + } - (other_minio, other_s3) => { + (other_minio, other_s3, _) => { return Err(anyhow!( "{} minio and {} s3 instance found in config, but only 1 is needed", other_minio.len(), diff --git a/src/risedevtool/src/task/zookeeper_service.rs b/src/risedevtool/src/task/zookeeper_service.rs index 1f41a57f076d8..5d5138186c9fb 100644 --- a/src/risedevtool/src/task/zookeeper_service.rs +++ b/src/risedevtool/src/task/zookeeper_service.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/util.rs b/src/risedevtool/src/util.rs index 2ecf38ec79e01..c8df04f1da991 100644 --- a/src/risedevtool/src/util.rs +++ b/src/risedevtool/src/util.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/risedevtool/src/wait.rs b/src/risedevtool/src/wait.rs index 49d1c8651c446..f29d42b2ad7c9 100644 --- a/src/risedevtool/src/wait.rs +++ b/src/risedevtool/src/wait.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/rpc_client/Cargo.toml b/src/rpc_client/Cargo.toml index a166169fbdbef..b831703f4ee85 100644 --- a/src/rpc_client/Cargo.toml +++ b/src/rpc_client/Cargo.toml @@ -7,12 +7,18 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = "1" async-trait = "0.1" -async_stack_trace = { path = "../utils/async_stack_trace" } +either = "1.8.1" futures = { version = "0.3", default-features = false, features = ["alloc"] } -paste = "1" +itertools = "0.10.5" rand = "0.8" risingwave_common = { path = "../common" } risingwave_hummock_sdk = { path = "../storage/hummock_sdk" } @@ -30,6 +36,7 @@ tokio-retry = "0.3" tokio-stream = "0.1" tonic = { version = "0.2", package = "madsim-tonic" } tracing = "0.1" +url = "2.3.1" [target.'cfg(not(madsim))'.dependencies] moka = { version = "0.9", features = ["future"] } diff --git a/src/rpc_client/src/compute_client.rs b/src/rpc_client/src/compute_client.rs index d2d0999af2669..d7cff5eb9bc93 100644 --- a/src/rpc_client/src/compute_client.rs +++ b/src/rpc_client/src/compute_client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/rpc_client/src/connector_client.rs b/src/rpc_client/src/connector_client.rs index 7ca10c793bcc9..44eb525a44ee3 100644 --- a/src/rpc_client/src/connector_client.rs +++ b/src/rpc_client/src/connector_client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/rpc_client/src/error.rs b/src/rpc_client/src/error.rs index ccd03efd0d524..24aaa799192ff 100644 --- a/src/rpc_client/src/error.rs +++ b/src/rpc_client/src/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/rpc_client/src/hummock_meta_client.rs b/src/rpc_client/src/hummock_meta_client.rs index fcb783b06f6a7..4859842af29e8 100644 --- a/src/rpc_client/src/hummock_meta_client.rs +++ b/src/rpc_client/src/hummock_meta_client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ use risingwave_hummock_sdk::{ HummockEpoch, HummockSstableId, HummockVersionId, LocalSstableInfo, SstIdRange, }; use risingwave_pb::hummock::{ - CompactTask, CompactTaskProgress, CompactionGroup, HummockSnapshot, HummockVersion, VacuumTask, + CompactTask, CompactTaskProgress, HummockSnapshot, HummockVersion, VacuumTask, }; use crate::error::Result; @@ -58,7 +58,6 @@ pub trait HummockMetaClient: Send + Sync + 'static { max_concurrent_task_number: u64, ) -> Result>; async fn report_vacuum_task(&self, vacuum_task: VacuumTask) -> Result<()>; - async fn get_compaction_groups(&self) -> Result>; async fn trigger_manual_compaction( &self, compaction_group_id: u64, diff --git a/src/rpc_client/src/lib.rs b/src/rpc_client/src/lib.rs index adccf3135bad5..265cde773c334 100644 --- a/src/rpc_client/src/lib.rs +++ b/src/rpc_client/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,6 +21,8 @@ #![feature(type_alias_impl_trait)] #![feature(associated_type_defaults)] #![feature(generators)] +#![feature(iterator_try_collect)] +#![feature(hash_drain_filter)] #[cfg(madsim)] use std::collections::HashMap; @@ -52,7 +54,7 @@ mod stream_client; pub use compute_client::{ComputeClient, ComputeClientPool, ComputeClientPoolRef}; pub use connector_client::ConnectorClient; pub use hummock_meta_client::{CompactTaskItem, HummockMetaClient}; -pub use meta_client::MetaClient; +pub use meta_client::{MetaClient, SystemParamsReader}; pub use stream_client::{StreamClient, StreamClientPool, StreamClientPoolRef}; #[async_trait] @@ -159,3 +161,21 @@ macro_rules! rpc_client_method_impl { )* } } + +#[macro_export] +macro_rules! meta_rpc_client_method_impl { + ($( { $client:tt, $fn_name:ident, $req:ty, $resp:ty }),*) => { + $( + pub async fn $fn_name(&self, request: $req) -> $crate::Result<$resp> { + let mut client = self.core.read().await.$client.to_owned(); + match client.$fn_name(request).await { + Ok(resp) => Ok(resp.into_inner()), + Err(e) => { + self.refresh_client_if_needed(e.code()).await; + Err(RpcError::GrpcStatus(e)) + } + } + } + )* + } +} diff --git a/src/rpc_client/src/meta_client.rs b/src/rpc_client/src/meta_client.rs index eacddcf65c099..bf8b571146d50 100644 --- a/src/rpc_client/src/meta_client.rs +++ b/src/rpc_client/src/meta_client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,14 +12,20 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; +use std::borrow::Borrow; +use std::collections::{HashMap, HashSet}; use std::fmt::Debug; +use std::sync::Arc; use std::time::Duration; +use anyhow::anyhow; use async_trait::async_trait; +use either::Either; use futures::stream::BoxStream; +use itertools::Itertools; use risingwave_common::catalog::{CatalogVersion, FunctionId, IndexId, TableId}; use risingwave_common::config::MAX_CONNECTION_WINDOW_SIZE; +use risingwave_common::system_param::system_params_to_kv; use risingwave_common::util::addr::HostAddr; use risingwave_hummock_sdk::compact::CompactorRuntimeConfig; use risingwave_hummock_sdk::table_stats::to_prost_table_stats_map; @@ -34,7 +40,7 @@ use risingwave_pb::catalog::{ Schema as ProstSchema, Sink as ProstSink, Source as ProstSource, Table as ProstTable, View as ProstView, }; -use risingwave_pb::common::WorkerType; +use risingwave_pb::common::{HostAddress, WorkerType}; use risingwave_pb::ddl_service::ddl_service_client::DdlServiceClient; use risingwave_pb::ddl_service::drop_table_request::SourceId; use risingwave_pb::ddl_service::*; @@ -45,24 +51,30 @@ use risingwave_pb::meta::cluster_service_client::ClusterServiceClient; use risingwave_pb::meta::heartbeat_request::{extra_info, ExtraInfo}; use risingwave_pb::meta::heartbeat_service_client::HeartbeatServiceClient; use risingwave_pb::meta::list_table_fragments_response::TableFragmentInfo; +use risingwave_pb::meta::meta_member_service_client::MetaMemberServiceClient; use risingwave_pb::meta::notification_service_client::NotificationServiceClient; use risingwave_pb::meta::reschedule_request::Reschedule as ProstReschedule; use risingwave_pb::meta::scale_service_client::ScaleServiceClient; use risingwave_pb::meta::stream_manager_service_client::StreamManagerServiceClient; -use risingwave_pb::meta::*; +use risingwave_pb::meta::system_params_service_client::SystemParamsServiceClient; +use risingwave_pb::meta::{SystemParams as ProstSystemParams, *}; use risingwave_pb::stream_plan::StreamFragmentGraph; use risingwave_pb::user::update_user_request::UpdateField; use risingwave_pb::user::user_service_client::UserServiceClient; use risingwave_pb::user::*; +use tokio::sync::mpsc::Receiver; use tokio::sync::oneshot::Sender; +use tokio::sync::{mpsc, oneshot, RwLock}; use tokio::task::JoinHandle; +use tokio::time; use tokio_retry::strategy::{jitter, ExponentialBackoff}; use tonic::transport::{Channel, Endpoint}; -use tonic::Streaming; +use tonic::{Code, Streaming}; +use tracing::warn; -use crate::error::Result; +use crate::error::{Result, RpcError}; use crate::hummock_meta_client::{CompactTaskItem, HummockMetaClient}; -use crate::{rpc_client_method_impl, ExtraInfoSourceRef}; +use crate::{meta_rpc_client_method_impl, ExtraInfoSourceRef}; type DatabaseId = u32; type SchemaId = u32; @@ -77,6 +89,8 @@ pub struct MetaClient { } impl MetaClient { + const META_ADDRESS_LOAD_BALANCE_MODE_PREFIX: &'static str = "load-balance+"; + pub fn worker_id(&self) -> u32 { self.worker_id } @@ -107,32 +121,87 @@ impl MetaClient { .await } + pub(crate) fn parse_meta_addr(meta_addr: &str) -> Result { + if meta_addr.starts_with(Self::META_ADDRESS_LOAD_BALANCE_MODE_PREFIX) { + let addr = meta_addr + .strip_prefix(Self::META_ADDRESS_LOAD_BALANCE_MODE_PREFIX) + .unwrap(); + + let addr = addr.split(',').exactly_one().map_err(|_| { + RpcError::Internal(anyhow!( + "meta address {} in load-balance mode should be exactly one", + addr + )) + })?; + + let _url = url::Url::parse(addr).map_err(|e| { + RpcError::Internal(anyhow!("could not parse meta address {}, {}", addr, e)) + })?; + + Ok(MetaAddressStrategy::LoadBalance(addr.to_string())) + } else { + let addrs: Vec<_> = meta_addr.split(',').map(str::to_string).collect(); + + if addrs.is_empty() { + return Err(RpcError::Internal(anyhow!( + "empty meta addresses {:?}", + addrs + ))); + } + + for addr in &addrs { + let _url = url::Url::parse(addr).map_err(|e| { + RpcError::Internal(anyhow!("could not parse meta address {}, {}", addr, e)) + })?; + } + + Ok(MetaAddressStrategy::List(addrs)) + } + } + /// Register the current node to the cluster and set the corresponding worker id. pub async fn register_new( meta_addr: &str, worker_type: WorkerType, addr: &HostAddr, worker_node_parallelism: usize, - ) -> Result { - let grpc_meta_client = GrpcMetaClient::new(meta_addr).await?; - let request = AddWorkerNodeRequest { + ) -> Result<(Self, SystemParamsReader)> { + let addr_strategy = Self::parse_meta_addr(meta_addr)?; + + let grpc_meta_client = GrpcMetaClient::new(addr_strategy).await?; + + let add_worker_request = AddWorkerNodeRequest { worker_type: worker_type as i32, host: Some(addr.to_protobuf()), worker_node_parallelism: worker_node_parallelism as u64, }; - let retry_strategy = GrpcMetaClient::retry_strategy_for_request(); - let resp = tokio_retry::Retry::spawn(retry_strategy, || async { - let request = request.clone(); - grpc_meta_client.add_worker_node(request).await - }) - .await?; - let worker_node = resp.node.expect("AddWorkerNodeResponse::node is empty"); - Ok(Self { - worker_id: worker_node.id, - worker_type, - host_addr: addr.clone(), - inner: grpc_meta_client, - }) + let add_worker_resp = + tokio_retry::Retry::spawn(GrpcMetaClient::retry_strategy_for_request(), || async { + let request = add_worker_request.clone(); + grpc_meta_client.add_worker_node(request).await + }) + .await?; + let worker_node = add_worker_resp + .node + .expect("AddWorkerNodeResponse::node is empty"); + + let system_params_request = GetSystemParamsRequest {}; + let system_params_resp = + tokio_retry::Retry::spawn(GrpcMetaClient::retry_strategy_for_request(), || async { + let request = system_params_request.clone(); + grpc_meta_client.get_system_params(request).await + }) + .await?; + + Ok(( + Self { + worker_id: worker_node.id, + worker_type, + host_addr: addr.clone(), + inner: grpc_meta_client, + }, + system_params_resp.params.unwrap().into(), + )) } /// Activate the current node in cluster to confirm it's ready to serve. @@ -258,6 +327,20 @@ impl MetaClient { Ok((resp.table_id.into(), resp.version)) } + pub async fn replace_table( + &self, + table: ProstTable, + graph: StreamFragmentGraph, + ) -> Result { + let request = ReplaceTablePlanRequest { + table: Some(table), + fragment_graph: Some(graph), + }; + let resp = self.inner.replace_table_plan(request).await?; + // TODO: handle error in `resp.status` here + Ok(resp.version) + } + pub async fn create_view(&self, view: ProstView) -> Result<(u32, CatalogVersion)> { let request = CreateViewRequest { view: Some(view) }; let resp = self.inner.create_view(request).await?; @@ -492,6 +575,12 @@ impl MetaClient { Ok(resp.snapshot.unwrap()) } + pub async fn cancel_creating_jobs(&self, infos: Vec) -> Result<()> { + let request = CancelCreatingJobsRequest { infos }; + let _ = self.inner.cancel_creating_jobs(request).await?; + Ok(()) + } + pub async fn list_table_fragments( &self, table_ids: &[u32], @@ -548,7 +637,7 @@ impl MetaClient { pub async fn init_metadata_for_replay( &self, tables: Vec, - compaction_groups: Vec, + compaction_groups: Vec, ) -> Result<()> { let req = InitMetadataForReplayRequest { tables, @@ -635,7 +724,7 @@ impl MetaClient { Ok(resp.num_tasks as usize) } - pub async fn risectl_list_compaction_group(&self) -> Result> { + pub async fn risectl_list_compaction_group(&self) -> Result> { let req = RiseCtlListCompactionGroupRequest {}; let resp = self.inner.rise_ctl_list_compaction_group(req).await?; Ok(resp.compaction_groups) @@ -686,6 +775,24 @@ impl MetaClient { let resp = self.inner.get_meta_snapshot_manifest(req).await?; Ok(resp.manifest.expect("should exist")) } + + pub async fn get_system_params(&self) -> Result { + let req = GetSystemParamsRequest {}; + let resp = self.inner.get_system_params(req).await?; + Ok(resp.params.unwrap().into()) + } + + pub async fn set_system_param(&self, param: String, value: Option) -> Result<()> { + let req = SetSystemParamRequest { param, value }; + self.inner.set_system_param(req).await?; + Ok(()) + } + + pub async fn get_ddl_progress(&self) -> Result> { + let req = GetDdlProgressRequest {}; + let resp = self.inner.get_ddl_progress(req).await?; + Ok(resp.ddl_progress) + } } #[async_trait] @@ -816,12 +923,6 @@ impl HummockMetaClient for MetaClient { Ok(()) } - async fn get_compaction_groups(&self) -> Result> { - let req = GetCompactionGroupsRequest {}; - let resp = self.inner.get_compaction_groups(req).await?; - Ok(resp.compaction_groups) - } - async fn trigger_manual_compaction( &self, compaction_group_id: u64, @@ -831,8 +932,9 @@ impl HummockMetaClient for MetaClient { // TODO: support key_range parameter let req = TriggerManualCompactionRequest { compaction_group_id, - table_id, /* if table_id not exist, manual_compaction will include all the sst - * without check internal_table_id */ + table_id, + // if table_id not exist, manual_compaction will include all the sst + // without check internal_table_id level, ..Default::default() }; @@ -851,12 +953,73 @@ impl HummockMetaClient for MetaClient { } } -/// Client to meta server. Cloning the instance is lightweight. -/// -/// It is a wrapper of tonic client. See [`rpc_client_method_impl`]. +/// A wrapper for [`risingwave_pb::meta::SystemParams`] for 2 purposes: +/// - Avoid misuse of deprecated fields by hiding their getters. +/// - Abstract fallback logic for fields that might not be provided by meta service due to backward +/// compatibility. +pub struct SystemParamsReader { + prost: ProstSystemParams, +} + +impl From for SystemParamsReader { + fn from(prost: ProstSystemParams) -> Self { + Self { prost } + } +} + +impl SystemParamsReader { + pub fn barrier_interval_ms(&self) -> u32 { + self.prost.barrier_interval_ms.unwrap() + } + + pub fn checkpoint_frequency(&self) -> u64 { + self.prost.checkpoint_frequency.unwrap() + } + + pub fn sstable_size_mb(&self) -> u32 { + self.prost.sstable_size_mb.unwrap() + } + + pub fn block_size_kb(&self) -> u32 { + self.prost.block_size_kb.unwrap() + } + + pub fn bloom_false_positive(&self) -> f64 { + self.prost.bloom_false_positive.unwrap() + } + + // TODO(zhidong): Only read from system params in v0.1.18. + pub fn state_store(&self, from_local: String) -> String { + let from_prost = self.prost.state_store.as_ref().unwrap(); + if from_prost.is_empty() { + warn!("--state-store is not specified on meta node, reading from CLI instead"); + from_local + } else { + from_prost.clone() + } + } + + pub fn data_directory(&self) -> &str { + self.prost.data_directory.as_ref().unwrap() + } + + pub fn backup_storage_url(&self) -> &str { + self.prost.backup_storage_url.as_ref().unwrap() + } + + pub fn backup_storage_directory(&self) -> &str { + self.prost.backup_storage_directory.as_ref().unwrap() + } + + pub fn to_kv(&self) -> Vec<(String, String)> { + system_params_to_kv(&self.prost).unwrap() + } +} + #[derive(Debug, Clone)] -struct GrpcMetaClient { +struct GrpcMetaClientCore { cluster_client: ClusterServiceClient, + meta_member_client: MetaMemberServiceClient, heartbeat_client: HeartbeatServiceClient, ddl_client: DdlServiceClient, hummock_client: HummockManagerServiceClient, @@ -865,6 +1028,189 @@ struct GrpcMetaClient { user_client: UserServiceClient, scale_client: ScaleServiceClient, backup_client: BackupServiceClient, + system_params_client: SystemParamsServiceClient, +} + +impl GrpcMetaClientCore { + pub(crate) fn new(channel: Channel) -> Self { + let cluster_client = ClusterServiceClient::new(channel.clone()); + let meta_member_client = MetaMemberClient::new(channel.clone()); + let heartbeat_client = HeartbeatServiceClient::new(channel.clone()); + let ddl_client = DdlServiceClient::new(channel.clone()); + let hummock_client = HummockManagerServiceClient::new(channel.clone()); + let notification_client = NotificationServiceClient::new(channel.clone()); + let stream_client = StreamManagerServiceClient::new(channel.clone()); + let user_client = UserServiceClient::new(channel.clone()); + let scale_client = ScaleServiceClient::new(channel.clone()); + let backup_client = BackupServiceClient::new(channel.clone()); + let system_params_client = SystemParamsServiceClient::new(channel); + GrpcMetaClientCore { + cluster_client, + meta_member_client, + heartbeat_client, + ddl_client, + hummock_client, + notification_client, + stream_client, + user_client, + scale_client, + backup_client, + system_params_client, + } + } +} + +/// Client to meta server. Cloning the instance is lightweight. +/// +/// It is a wrapper of tonic client. See [`rpc_client_method_impl`]. +#[derive(Debug, Clone)] +struct GrpcMetaClient { + force_refresh_sender: mpsc::Sender>>, + core: Arc>, +} + +#[derive(Debug, Eq, PartialEq)] +pub enum MetaAddressStrategy { + LoadBalance(String), + List(Vec), +} + +type MetaMemberClient = MetaMemberServiceClient; + +struct MetaMemberGroup { + client_cache: HashMap, + members: HashSet, +} + +struct ElectionMemberManagement { + core_ref: Arc>, + members: Either, + current_leader: String, +} + +impl ElectionMemberManagement { + const ELECTION_MEMBER_REFRESH_PERIOD: Duration = Duration::from_secs(5); + + fn host_address_to_url(addr: HostAddress) -> String { + format!("http://{}:{}", addr.host, addr.port) + } + + async fn recreate_core(&self, channel: Channel) { + let mut core = self.core_ref.write().await; + *core = GrpcMetaClientCore::new(channel); + } + + async fn refresh_members(&mut self) -> Result<()> { + let leader_addr = match self.members.as_mut() { + Either::Left(client) => { + let resp = client.to_owned().members(MembersRequest {}).await?; + let resp = resp.into_inner(); + resp.members.into_iter().find(|member| member.is_leader) + } + Either::Right(member_group) => { + let mut fetched_members = None; + + for addr in &member_group.members { + let mut client = match member_group.client_cache.get(addr) { + Some(cached_client) => cached_client.to_owned(), + None => { + let endpoint = match GrpcMetaClient::addr_to_endpoint(addr.clone()) { + Ok(endpoint) => endpoint, + Err(e) => { + tracing::warn!( + "failed to create endpoint from {}, {}", + addr, + e + ); + continue; + } + }; + + let channel = match GrpcMetaClient::connect_to_endpoint(endpoint).await + { + Ok(channel) => channel, + Err(e) => { + tracing::warn!( + "failed to create rpc channel from {}, {}", + addr, + e + ); + continue; + } + }; + + let client: MetaMemberServiceClient = + MetaMemberServiceClient::new(channel); + member_group + .client_cache + .insert(addr.clone(), client.clone()); + client.to_owned() + } + }; + + let MembersResponse { members } = match client.members(MembersRequest {}).await + { + Ok(members) => members.into_inner(), + Err(e) => { + tracing::warn!( + "failed to fetch members from MetaMemberClient {}: {}", + addr, + e + ); + continue; + } + }; + + fetched_members = Some(members); + + break; + } + + let members = + fetched_members.ok_or_else(|| anyhow!("could not refresh members"))?; + + // find new leader + let mut leader = None; + let mut member_addrs = HashSet::new(); + for member in members { + if member.is_leader { + leader = Some(member.clone()); + } + + member_addrs.insert(Self::host_address_to_url(member.address.unwrap())); + } + + // drain old cache + let drained = member_group + .client_cache + .drain_filter(|addr, _| !member_addrs.borrow().contains(addr)); + + for (addr, _) in drained { + tracing::info!("dropping meta client from {}", addr); + } + + // update members + member_group.members = member_addrs; + + leader + } + }; + + if let Some(leader) = leader_addr { + let discovered_leader = Self::host_address_to_url(leader.address.unwrap()); + + if discovered_leader != self.current_leader { + tracing::info!("new meta leader {} discovered", discovered_leader); + let (channel, _) = + GrpcMetaClient::try_build_rpc_channel(vec![discovered_leader.clone()]).await?; + + self.recreate_core(channel).await; + self.current_leader = discovered_leader; + } + } + + Ok(()) + } } impl GrpcMetaClient { @@ -883,53 +1229,160 @@ impl GrpcMetaClient { // Max retry interval in ms for request to meta server. const REQUEST_RETRY_MAX_INTERVAL_MS: u64 = 5000; - /// Connect to the meta server `addr`. - pub async fn new(addr: &str) -> Result { - let endpoint = Endpoint::from_shared(addr.to_string())? - .initial_connection_window_size(MAX_CONNECTION_WINDOW_SIZE); + async fn start_meta_member_monitor( + &self, + init_leader_addr: String, + members: Either, + force_refresh_receiver: Receiver>>, + ) -> Result<()> { + let core_ref = self.core.clone(); + let current_leader = init_leader_addr; + + let enable_period_tick = matches!(members, Either::Right(_)); + + let member_management = ElectionMemberManagement { + core_ref, + members, + current_leader, + }; + + let mut force_refresh_receiver = force_refresh_receiver; + + tokio::spawn(async move { + let mut member_management = member_management; + let mut ticker = + time::interval(ElectionMemberManagement::ELECTION_MEMBER_REFRESH_PERIOD); + + loop { + let result_sender: Option>> = if enable_period_tick { + tokio::select! { + _ = ticker.tick() => None, + result_sender = force_refresh_receiver.recv() => result_sender, + } + } else { + force_refresh_receiver.recv().await + }; + + let tick_result = member_management.refresh_members().await; + if let Err(e) = tick_result.as_ref() { + tracing::warn!("refresh election client failed {}", e); + } + + if let Some(sender) = result_sender { + // ignore resp + let _resp = sender.send(tick_result); + } + } + }); + + Ok(()) + } + + async fn force_refresh_leader(&self) -> Result<()> { + let (sender, receiver) = oneshot::channel(); + + self.force_refresh_sender + .send(sender) + .await + .map_err(|e| anyhow!(e))?; + + receiver.await.map_err(|e| anyhow!(e))? + } + + /// Connect to the meta server from `addrs`. + pub async fn new(strategy: MetaAddressStrategy) -> Result { + let (channel, addr) = match &strategy { + MetaAddressStrategy::LoadBalance(addr) => { + Self::try_build_rpc_channel(vec![addr.clone()]).await + } + MetaAddressStrategy::List(addrs) => Self::try_build_rpc_channel(addrs.clone()).await, + }?; + let (force_refresh_sender, force_refresh_receiver) = mpsc::channel(1); + let client = GrpcMetaClient { + force_refresh_sender, + core: Arc::new(RwLock::new(GrpcMetaClientCore::new(channel))), + }; + + let meta_member_client = client.core.read().await.meta_member_client.clone(); + let members = match &strategy { + MetaAddressStrategy::LoadBalance(_) => Either::Left(meta_member_client), + MetaAddressStrategy::List(_) => { + let mut client_cache = HashMap::new(); + let mut members = HashSet::new(); + members.insert(addr.to_string()); + + client_cache.insert(addr.to_string(), meta_member_client); + + Either::Right(MetaMemberGroup { + client_cache, + members, + }) + } + }; + + client + .start_meta_member_monitor(addr, members, force_refresh_receiver) + .await?; + + if let Err(e) = client.force_refresh_leader().await { + tracing::warn!("force refresh leader failed {}, init leader may failed", e); + } + + Ok(client) + } + + fn addr_to_endpoint(addr: String) -> Result { + Endpoint::from_shared(addr) + .map(|endpoint| endpoint.initial_connection_window_size(MAX_CONNECTION_WINDOW_SIZE)) + .map_err(RpcError::TransportError) + } + + pub(crate) async fn try_build_rpc_channel(addrs: Vec) -> Result<(Channel, String)> { + let endpoints: Vec<_> = addrs + .into_iter() + .map(|addr| Self::addr_to_endpoint(addr.clone()).map(|endpoint| (endpoint, addr))) + .try_collect()?; + let retry_strategy = ExponentialBackoff::from_millis(Self::CONN_RETRY_BASE_INTERVAL_MS) .max_delay(Duration::from_millis(Self::CONN_RETRY_MAX_INTERVAL_MS)) .map(jitter); + let channel = tokio_retry::Retry::spawn(retry_strategy, || async { - let endpoint = endpoint.clone(); - endpoint - .http2_keep_alive_interval(Duration::from_secs( - Self::ENDPOINT_KEEP_ALIVE_INTERVAL_SEC, - )) - .keep_alive_timeout(Duration::from_secs(Self::ENDPOINT_KEEP_ALIVE_TIMEOUT_SEC)) - .connect_timeout(Duration::from_secs(5)) - .connect() - .await - .inspect_err(|e| { - tracing::warn!( - "Failed to connect to meta server {}, wait for online: {}", - addr, - e - ); - }) + let endpoints = endpoints.clone(); + + for (endpoint, addr) in endpoints { + match Self::connect_to_endpoint(endpoint).await { + Ok(channel) => { + tracing::info!("Connect to meta server {} successfully", addr); + return Ok((channel, addr)); + } + Err(e) => { + tracing::warn!( + "Failed to connect to meta server {}, trying again: {}", + addr, + e + ) + } + } + } + + Err(RpcError::Internal(anyhow!( + "Failed to connect to meta server" + ))) }) .await?; - let cluster_client = ClusterServiceClient::new(channel.clone()); - let heartbeat_client = HeartbeatServiceClient::new(channel.clone()); - let ddl_client = DdlServiceClient::new(channel.clone()); - let hummock_client = HummockManagerServiceClient::new(channel.clone()); - let notification_client = NotificationServiceClient::new(channel.clone()); - let stream_client = StreamManagerServiceClient::new(channel.clone()); - let user_client = UserServiceClient::new(channel.clone()); - let scale_client = ScaleServiceClient::new(channel.clone()); - let backup_client = BackupServiceClient::new(channel); - Ok(Self { - cluster_client, - heartbeat_client, - ddl_client, - hummock_client, - notification_client, - stream_client, - user_client, - scale_client, - backup_client, - }) + Ok(channel) + } + + async fn connect_to_endpoint(endpoint: Endpoint) -> Result { + endpoint + .http2_keep_alive_interval(Duration::from_secs(Self::ENDPOINT_KEEP_ALIVE_INTERVAL_SEC)) + .keep_alive_timeout(Duration::from_secs(Self::ENDPOINT_KEEP_ALIVE_TIMEOUT_SEC)) + .connect_timeout(Duration::from_secs(5)) + .connect() + .await + .map_err(RpcError::TransportError) } /// Return retry strategy for retrying meta requests. @@ -950,6 +1403,7 @@ macro_rules! for_all_meta_rpc { //(not used) ,{ cluster_client, list_all_nodes, ListAllNodesRequest, ListAllNodesResponse } ,{ heartbeat_client, heartbeat, HeartbeatRequest, HeartbeatResponse } ,{ stream_client, flush, FlushRequest, FlushResponse } + ,{ stream_client, cancel_creating_jobs, CancelCreatingJobsRequest, CancelCreatingJobsResponse } ,{ stream_client, list_table_fragments, ListTableFragmentsRequest, ListTableFragmentsResponse } ,{ ddl_client, create_table, CreateTableRequest, CreateTableResponse } ,{ ddl_client, create_materialized_view, CreateMaterializedViewRequest, CreateMaterializedViewResponse } @@ -969,7 +1423,9 @@ macro_rules! for_all_meta_rpc { ,{ ddl_client, drop_schema, DropSchemaRequest, DropSchemaResponse } ,{ ddl_client, drop_index, DropIndexRequest, DropIndexResponse } ,{ ddl_client, drop_function, DropFunctionRequest, DropFunctionResponse } + ,{ ddl_client, replace_table_plan, ReplaceTablePlanRequest, ReplaceTablePlanResponse } ,{ ddl_client, risectl_list_state_tables, RisectlListStateTablesRequest, RisectlListStateTablesResponse } + ,{ ddl_client, get_ddl_progress, GetDdlProgressRequest, GetDdlProgressResponse } ,{ hummock_client, unpin_version_before, UnpinVersionBeforeRequest, UnpinVersionBeforeResponse } ,{ hummock_client, get_current_version, GetCurrentVersionRequest, GetCurrentVersionResponse } ,{ hummock_client, replay_version_delta, ReplayVersionDeltaRequest, ReplayVersionDeltaResponse } @@ -987,7 +1443,6 @@ macro_rules! for_all_meta_rpc { ,{ hummock_client, subscribe_compact_tasks, SubscribeCompactTasksRequest, Streaming } ,{ hummock_client, report_compaction_task_progress, ReportCompactionTaskProgressRequest, ReportCompactionTaskProgressResponse } ,{ hummock_client, report_vacuum_task, ReportVacuumTaskRequest, ReportVacuumTaskResponse } - ,{ hummock_client, get_compaction_groups, GetCompactionGroupsRequest, GetCompactionGroupsResponse } ,{ hummock_client, trigger_manual_compaction, TriggerManualCompactionRequest, TriggerManualCompactionResponse } ,{ hummock_client, report_full_scan_task, ReportFullScanTaskRequest, ReportFullScanTaskResponse } ,{ hummock_client, trigger_full_gc, TriggerFullGcRequest, TriggerFullGcResponse } @@ -1011,10 +1466,69 @@ macro_rules! for_all_meta_rpc { ,{ backup_client, get_backup_job_status, GetBackupJobStatusRequest, GetBackupJobStatusResponse } ,{ backup_client, delete_meta_snapshot, DeleteMetaSnapshotRequest, DeleteMetaSnapshotResponse} ,{ backup_client, get_meta_snapshot_manifest, GetMetaSnapshotManifestRequest, GetMetaSnapshotManifestResponse} + ,{ system_params_client, get_system_params, GetSystemParamsRequest, GetSystemParamsResponse } + ,{ system_params_client, set_system_param, SetSystemParamRequest, SetSystemParamResponse } } }; } impl GrpcMetaClient { - for_all_meta_rpc! { rpc_client_method_impl } + async fn refresh_client_if_needed(&self, code: Code) { + if matches!( + code, + Code::Unknown | Code::Unimplemented | Code::Unavailable + ) { + tracing::debug!("matching tonic code {}", code); + let (result_sender, result_receiver) = oneshot::channel(); + if self.force_refresh_sender.try_send(result_sender).is_ok() { + if let Ok(Err(e)) = result_receiver.await { + tracing::warn!("force refresh meta client failed {}", e); + } + } else { + tracing::debug!("skipping the current refresh, somewhere else is already doing it") + } + } + } +} + +impl GrpcMetaClient { + for_all_meta_rpc! { meta_rpc_client_method_impl } +} + +#[cfg(test)] +mod tests { + use crate::meta_client::MetaAddressStrategy; + use crate::MetaClient; + + #[test] + fn test_parse_meta_addr() { + let results = vec![ + ( + "load-balance+http://abc", + Some(MetaAddressStrategy::LoadBalance("http://abc".to_string())), + ), + ("load-balance+http://abc,http://def", None), + ("load-balance+http://abc:xxx", None), + ("", None), + ( + "http://abc,http://def", + Some(MetaAddressStrategy::List(vec![ + "http://abc".to_string(), + "http://def".to_string(), + ])), + ), + ("http://abc:xx,http://def", None), + ]; + for (addr, result) in results { + let parsed_result = MetaClient::parse_meta_addr(addr); + match result { + None => { + assert!(parsed_result.is_err()); + } + Some(strategy) => { + assert_eq!(strategy, parsed_result.unwrap()) + } + } + } + } } diff --git a/src/rpc_client/src/stream_client.rs b/src/rpc_client/src/stream_client.rs index 47200553c5c96..9a2438a05f606 100644 --- a/src/rpc_client/src/stream_client.rs +++ b/src/rpc_client/src/stream_client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/source/Cargo.toml b/src/source/Cargo.toml index a8977f26777e7..4845d779b2563 100644 --- a/src/source/Cargo.toml +++ b/src/source/Cargo.toml @@ -7,43 +7,25 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = "1" -async-stream = "0.3" -async-trait = "0.1" -aws-config = { workspace = true } -aws-sdk-s3 = { workspace = true } -byteorder = "1" -bytes = "1" -chrono = { version = "0.4", default-features = false, features = ["clock", "std"] } -crc32fast = "1" -enum-as-inner = "0.5" -farmhash = "1" +easy-ext = "1" futures = { version = "0.3", default-features = false, features = ["alloc"] } futures-async-stream = "0.2" itertools = "0.10" -maplit = "1" -num-traits = "0.2" parking_lot = "0.12" -paste = "1" -prometheus = { version = "0.13", features = ["process"] } rand = "0.8" risingwave_common = { path = "../common" } risingwave_connector = { path = "../connector" } -risingwave_expr = { path = "../expr" } risingwave_pb = { path = "../prost" } -risingwave_storage = { path = "../storage" } -serde = { version = "1", features = ["derive"] } -smallvec = "1" -static_assertions = "1" -tempfile = "3" -thiserror = "1" tokio = { version = "0.2", package = "madsim-tokio", features = ["rt", "rt-multi-thread", "sync", "macros", "time", "signal", "fs"] } -tokio-stream = "0.1" -tonic = { version = "0.2", package = "madsim-tonic" } tracing = { version = "0.1" } -twox-hash = "1" -url = "2" [target.'cfg(not(madsim))'.dependencies] workspace-hack = { path = "../workspace-hack" } diff --git a/src/source/benches/json_parser.rs b/src/source/benches/json_parser.rs index 4818785c43061..92b286da7b1cb 100644 --- a/src/source/benches/json_parser.rs +++ b/src/source/benches/json_parser.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,8 +17,8 @@ use rand::distributions::Alphanumeric; use rand::prelude::*; use risingwave_common::catalog::ColumnId; use risingwave_common::types::{DataType, NaiveDateTimeWrapper, NaiveDateWrapper}; -use risingwave_connector::parser::{JsonParser, SourceParser, SourceStreamChunkBuilder}; -use risingwave_connector::SourceColumnDesc; +use risingwave_connector::parser::{JsonParser, SourceStreamChunkBuilder}; +use risingwave_connector::source::SourceColumnDesc; const NUM_RECORDS: usize = 1 << 18; // ~ 250,000 @@ -130,7 +130,7 @@ fn get_descs() -> Vec { fn bench_json_parser(c: &mut Criterion) { let descs = get_descs(); - let parser = JsonParser {}; + let parser = JsonParser::new(descs.clone()).unwrap(); let records = generate_all_json(); let rt = tokio::runtime::Builder::new_multi_thread() .enable_all() @@ -141,7 +141,7 @@ fn bench_json_parser(c: &mut Criterion) { let mut builder = SourceStreamChunkBuilder::with_capacity(descs.clone(), NUM_RECORDS); for record in &records { let writer = builder.row_writer(); - parser.parse(record, writer).await.unwrap(); + parser.parse_inner(record, writer).await.unwrap(); } }) }); diff --git a/src/source/src/common.rs b/src/source/src/common.rs index e7bf7b8e3dd66..038effe94bfd0 100644 --- a/src/source/src/common.rs +++ b/src/source/src/common.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -use itertools::Itertools; use risingwave_common::array::column::Column; use risingwave_common::array::DataChunk; use risingwave_common::error::Result; use risingwave_common::types::Datum; -use risingwave_connector::SourceColumnDesc; +use risingwave_common::util::iter_util::ZipEqFast; +use risingwave_connector::source::SourceColumnDesc; pub(crate) trait SourceChunkBuilder { fn build_columns<'a>( @@ -31,7 +31,7 @@ pub(crate) trait SourceChunkBuilder { .collect(); for row in rows { - for (datum, builder) in row.iter().zip_eq(&mut builders) { + for (datum, builder) in row.iter().zip_eq_fast(&mut builders) { builder.append_datum(datum); } } diff --git a/src/source/src/connector_source.rs b/src/source/src/connector_source.rs index f1460926aa9df..955cc9bd992e8 100644 --- a/src/source/src/connector_source.rs +++ b/src/source/src/connector_source.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,247 +16,47 @@ use std::collections::HashMap; use std::sync::Arc; use futures::future::try_join_all; -use futures::stream::BoxStream; use futures::StreamExt; -use futures_async_stream::try_stream; use itertools::Itertools; -use risingwave_common::catalog::{ColumnDesc, ColumnId, TableId}; -use risingwave_common::error::ErrorCode::{ConnectorError, ProtocolError}; -use risingwave_common::error::{internal_error, Result, RwError, ToRwResult}; -use risingwave_common::types::Datum; +use risingwave_common::catalog::ColumnId; +use risingwave_common::error::ErrorCode::ConnectorError; +use risingwave_common::error::{internal_error, Result}; use risingwave_common::util::select_all; -use risingwave_connector::parser::{ - SourceParserImpl, SourceStreamChunkBuilder, SpecificParserConfig, -}; +use risingwave_connector::parser::{CommonParserConfig, ParserConfig, SpecificParserConfig}; +use risingwave_connector::source::monitor::SourceMetrics; use risingwave_connector::source::{ - Column, ConnectorProperties, ConnectorState, SourceMessage, SourceMeta, SplitId, SplitMetaData, - SplitReaderImpl, -}; -use risingwave_connector::{ConnectorParams, SourceColumnDesc, SourceFormat, StreamChunkWithState}; -use risingwave_expr::vector_op::cast::i64_to_timestamptz; -use risingwave_pb::catalog::{ - ColumnIndex as ProstColumnIndex, StreamSourceInfo as ProstStreamSourceInfo, + BoxSourceWithStateStream, Column, ConnectorProperties, ConnectorState, SourceColumnDesc, + SourceInfo, SplitReaderImpl, }; -use risingwave_pb::plan_common::{ - ColumnCatalog as ProstColumnCatalog, RowFormatType as ProstRowFormatType, -}; - -use crate::fs_connector_source::FsConnectorSource; -use crate::monitor::SourceMetrics; - -pub const DEFAULT_CONNECTOR_MESSAGE_BUFFER_SIZE: usize = 16; - -#[derive(Clone, Debug)] -pub struct SourceContext { - pub actor_id: u32, - pub source_id: TableId, -} - -impl SourceContext { - pub fn new(actor_id: u32, source_id: TableId) -> Self { - SourceContext { - actor_id, - source_id, - } - } -} - -fn default_split_id() -> SplitId { - "None".into() -} - -struct InnerConnectorSourceReader { - reader: SplitReaderImpl, - // split should be None or only contains one value - split: ConnectorState, - - metrics: Arc, - context: SourceContext, -} - -/// [`ConnectorSource`] serves as a bridge between external components and streaming or -/// batch processing. [`ConnectorSource`] introduces schema at this level while -/// [`SplitReaderImpl`] simply loads raw content from message queue or file system. -/// Parallel means that multiple [`InnerConnectorSourceReader`] will run in parallel during the -/// `next`, so that 0 or more Splits reads can be handled at the Source level. -pub struct ConnectorSourceReader { - parser: Arc, - columns: Vec, - - // merge all streams of inner reader into one - // TODO: make this static dispatch instead of box - stream: BoxStream<'static, Result>>, -} - -impl InnerConnectorSourceReader { - async fn new( - prop: ConnectorProperties, - split: ConnectorState, - columns: Vec, - metrics: Arc, - context: SourceContext, - ) -> Result { - tracing::debug!( - "Spawning new connector source inner reader with config {:?}, split {:?}", - prop, - split - ); - - // Here is a workaround, we now provide the vec with only one element - let reader = SplitReaderImpl::create( - prop, - split.clone(), - Some( - columns - .iter() - .cloned() - .map(|col| Column { - name: col.name, - data_type: col.data_type, - }) - .collect_vec(), - ), - ) - .await - .to_rw_result()?; - - Ok(InnerConnectorSourceReader { - reader, - split, - metrics, - context, - }) - } - - #[try_stream(boxed, ok = Vec, error = RwError)] - async fn into_stream(self) { - let actor_id = self.context.actor_id.to_string(); - let source_id = self.context.source_id.to_string(); - let id = match &self.split { - Some(splits) => splits[0].id(), - None => default_split_id(), - }; - #[for_await] - for msgs in self.reader.into_stream() { - let msgs = msgs?; - self.metrics - .partition_input_count - .with_label_values(&[&actor_id, &source_id, &id]) - .inc_by(msgs.len() as u64); - let sum_bytes = msgs - .iter() - .map(|msg| match &msg.payload { - None => 0, - Some(payload) => payload.len() as u64, - }) - .sum(); - self.metrics - .partition_input_bytes - .with_label_values(&[&actor_id, &source_id, &id]) - .inc_by(sum_bytes); - yield msgs; - } - } -} - -impl ConnectorSourceReader { - #[try_stream(boxed, ok = StreamChunkWithState, error = RwError)] - pub async fn into_stream(self) { - #[for_await] - for batch in self.stream { - let batch = batch?; - let mut builder = - SourceStreamChunkBuilder::with_capacity(self.columns.clone(), batch.len()); - let mut split_offset_mapping: HashMap = HashMap::new(); - - for msg in batch { - if let Some(content) = msg.payload { - split_offset_mapping.insert(msg.split_id, msg.offset); - - let old_op_num = builder.op_num(); - - if let Err(e) = self - .parser - .parse(content.as_ref(), builder.row_writer()) - .await - { - tracing::warn!("message parsing failed {}, skipping", e.to_string()); - continue; - } - - let new_op_num = builder.op_num(); - - // new_op_num - old_op_num is the number of rows added to the builder - for _ in old_op_num..new_op_num { - // TODO: support more kinds of SourceMeta - if let SourceMeta::Kafka(kafka_meta) = msg.meta.clone() { - let f = |desc: &SourceColumnDesc| -> Option { - if !desc.is_meta { - return None; - } - match desc.name.as_str() { - "_rw_kafka_timestamp" => Some( - kafka_meta - .timestamp - .map(|ts| i64_to_timestamptz(ts).unwrap().into()), - ), - _ => unreachable!( - "kafka will not have this meta column: {}", - desc.name - ), - } - }; - builder.row_writer().fulfill_meta_column(f)?; - } - } - } - } - yield StreamChunkWithState { - chunk: builder.finish(), - split_offset_mapping: Some(split_offset_mapping), - }; - } - } -} #[derive(Clone, Debug)] pub struct ConnectorSource { pub config: ConnectorProperties, pub columns: Vec, - pub parser: Arc, + // pub parser: Arc, + pub parser_config: SpecificParserConfig, pub connector_message_buffer_size: usize, } impl ConnectorSource { - #[allow(clippy::too_many_arguments)] - pub async fn new( - format: SourceFormat, - row_schema_location: &str, - use_schema_registry: bool, - proto_message_name: String, + pub fn new( properties: HashMap, columns: Vec, connector_node_addr: Option, connector_message_buffer_size: usize, + parser_config: SpecificParserConfig, ) -> Result { - let mut config = ConnectorProperties::extract(properties.clone()) - .map_err(|e| ConnectorError(e.into()))?; + let mut config = + ConnectorProperties::extract(properties).map_err(|e| ConnectorError(e.into()))?; if let Some(addr) = connector_node_addr { // fixme: require source_id config.init_properties_for_cdc(0, addr, None) } - let parser = SourceParserImpl::create( - &format, - &properties, - row_schema_location, - use_schema_registry, - proto_message_name, - ) - .await?; + Ok(Self { config, columns, - parser, + parser_config, connector_message_buffer_size, }) } @@ -284,8 +84,8 @@ impl ConnectorSource { splits: ConnectorState, column_ids: Vec, metrics: Arc, - context: SourceContext, - ) -> Result { + source_info: SourceInfo, + ) -> Result { let config = self.config.clone(); let columns = self.get_target_columns(column_ids)?; let source_metrics = metrics.clone(); @@ -297,201 +97,44 @@ impl ConnectorSource { .collect::>(), None => vec![None], }; - let readers = - try_join_all(to_reader_splits.into_iter().map(|split| { - tracing::debug!("spawning connector split reader for split {:?}", split); - let props = config.clone(); - let columns = columns.clone(); - let metrics = source_metrics.clone(); - let context = context.clone(); - async move { - InnerConnectorSourceReader::new(props, split, columns, metrics, context).await - } - })) - .await?; - - let stream = select_all(readers.into_iter().map(|r| r.into_stream())).boxed(); - - Ok(ConnectorSourceReader { - parser: self.parser.clone(), - columns, - stream, - }) - } -} - -/// `SourceDescV2` describes a stream source. -#[derive(Debug)] -pub struct SourceDescV2 { - pub source: ConnectorSource, - pub format: SourceFormat, - pub columns: Vec, - pub metrics: Arc, - pub pk_column_ids: Vec, -} - -#[derive(Clone)] -pub struct SourceDescBuilderV2 { - columns: Vec, - metrics: Arc, - pk_column_ids: Vec, - row_id_index: Option, - properties: HashMap, - source_info: ProstStreamSourceInfo, - connector_params: ConnectorParams, - connector_message_buffer_size: usize, -} - -impl SourceDescBuilderV2 { - #[allow(clippy::too_many_arguments)] - pub fn new( - columns: Vec, - metrics: Arc, - pk_column_ids: Vec, - row_id_index: Option, - properties: HashMap, - source_info: ProstStreamSourceInfo, - connector_params: ConnectorParams, - connector_message_buffer_size: usize, - ) -> Self { - Self { - columns, - metrics, - pk_column_ids, - row_id_index, - properties, - source_info, - connector_params, - connector_message_buffer_size, - } - } - - pub async fn build(self) -> Result { - let format = match self.source_info.get_row_format()? { - ProstRowFormatType::Json => SourceFormat::Json, - ProstRowFormatType::Protobuf => SourceFormat::Protobuf, - ProstRowFormatType::DebeziumJson => SourceFormat::DebeziumJson, - ProstRowFormatType::Avro => SourceFormat::Avro, - ProstRowFormatType::Maxwell => SourceFormat::Maxwell, - ProstRowFormatType::CanalJson => SourceFormat::CanalJson, - ProstRowFormatType::Csv => SourceFormat::Csv, - ProstRowFormatType::RowUnspecified => unreachable!(), - }; - - if format == SourceFormat::Protobuf && self.source_info.row_schema_location.is_empty() { - return Err(ProtocolError("protobuf file location not provided".to_string()).into()); - } - - let mut columns: Vec<_> = self - .columns - .iter() - .map(|c| SourceColumnDesc::from(&ColumnDesc::from(c.column_desc.as_ref().unwrap()))) - .collect(); - if let Some(row_id_index) = self.row_id_index.as_ref() { - columns[row_id_index.index as usize].is_row_id = true; - } - assert!( - !self.pk_column_ids.is_empty(), - "source should have at least one pk column" - ); - - let source = ConnectorSource::new( - format.clone(), - &self.source_info.row_schema_location, - self.source_info.use_schema_registry, - self.source_info.proto_message_name, - self.properties, - columns.clone(), - self.connector_params.connector_rpc_endpoint, - self.connector_message_buffer_size, - ) + let readers = try_join_all(to_reader_splits.into_iter().map(|state| { + tracing::debug!("spawning connector split reader for split {:?}", state); + let props = config.clone(); + let columns = columns.clone(); + let data_gen_columns = Some( + columns + .iter() + .cloned() + .map(|col| Column { + name: col.name, + data_type: col.data_type, + }) + .collect_vec(), + ); + let metrics = source_metrics.clone(); + + async move { + // InnerConnectorSourceReader::new(props, split, columns, metrics, + // source_info).await + let parser_config = ParserConfig { + specific: self.parser_config.clone(), + common: CommonParserConfig { + rw_columns: columns, + }, + }; + SplitReaderImpl::create( + props, + state, + parser_config, + metrics, + source_info, + data_gen_columns, + ) + .await + } + })) .await?; - Ok(SourceDescV2 { - source, - format, - columns, - metrics: self.metrics, - pk_column_ids: self.pk_column_ids, - }) - } - - pub fn metrics(&self) -> Arc { - self.metrics.clone() - } - - pub fn build_fs_stream_source(&self) -> Result { - let format = match self.source_info.get_row_format()? { - ProstRowFormatType::Csv => SourceFormat::Csv, - _ => unreachable!(), - }; - - let mut columns: Vec<_> = self - .columns - .iter() - .map(|c| SourceColumnDesc::from(&ColumnDesc::from(c.column_desc.as_ref().unwrap()))) - .collect(); - - if let Some(row_id_index) = self.row_id_index.as_ref() { - columns[row_id_index.index as usize].is_row_id = true; - } - - let parser_config = SpecificParserConfig::new(&format, &self.source_info); - - FsConnectorSource::new( - format, - self.properties.clone(), - columns, - self.connector_params.connector_rpc_endpoint.clone(), - parser_config, - ) - } -} - -pub mod test_utils { - use std::collections::HashMap; - - use risingwave_common::catalog::{ColumnDesc, ColumnId, Schema}; - use risingwave_pb::catalog::{ColumnIndex, StreamSourceInfo}; - use risingwave_pb::plan_common::ColumnCatalog; - - use super::{SourceDescBuilderV2, DEFAULT_CONNECTOR_MESSAGE_BUFFER_SIZE}; - - pub fn create_source_desc_builder( - schema: &Schema, - pk_column_ids: Vec, - row_id_index: Option, - source_info: StreamSourceInfo, - properties: HashMap, - ) -> SourceDescBuilderV2 { - let row_id_index = row_id_index.map(|index| ColumnIndex { index }); - let columns = schema - .fields - .iter() - .enumerate() - .map(|(i, f)| ColumnCatalog { - column_desc: Some( - ColumnDesc { - data_type: f.data_type.clone(), - column_id: ColumnId::from(i as i32), // use column index as column id - name: f.name.clone(), - field_descs: vec![], - type_name: "".to_string(), - } - .to_protobuf(), - ), - is_hidden: false, - }) - .collect(); - SourceDescBuilderV2 { - columns, - metrics: Default::default(), - pk_column_ids, - row_id_index, - properties, - source_info, - connector_params: Default::default(), - connector_message_buffer_size: DEFAULT_CONNECTOR_MESSAGE_BUFFER_SIZE, - } + Ok(select_all(readers.into_iter().map(|r| r.into_stream())).boxed()) } } diff --git a/src/source/src/dml_manager.rs b/src/source/src/dml_manager.rs index a62b4b411ab5e..de00117321328 100644 --- a/src/source/src/dml_manager.rs +++ b/src/source/src/dml_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,29 +12,37 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::Ordering; use std::collections::hash_map::Entry; use std::collections::HashMap; use std::sync::{Arc, Weak}; +use anyhow::Context; use parking_lot::RwLock; use risingwave_common::array::StreamChunk; -use risingwave_common::catalog::{ColumnDesc, TableId}; -use risingwave_common::error::ErrorCode::InternalError; +use risingwave_common::bail; +use risingwave_common::catalog::{ColumnDesc, TableId, TableVersionId}; use risingwave_common::error::Result; use tokio::sync::oneshot; -use crate::{TableSource, TableSourceRef}; +use crate::{TableDmlHandle, TableDmlHandleRef}; pub type DmlManagerRef = Arc; +#[derive(Debug)] +struct TableReader { + version_id: TableVersionId, + handle: Weak, +} + /// [`DmlManager`] manages the communication between batch data manipulation and streaming /// processing. -/// NOTE: `TableSource` is used here as an out-of-the-box solution. It should be renamed -/// as `BatchDml` later. We should further optimize its implementation (e.g. directly expose a -/// channel instead of offering a `write_chunk` interface). +/// NOTE: `TableDmlHandle` is used here as an out-of-the-box solution. We should further optimize +/// its implementation (e.g. directly expose a channel instead of offering a `write_chunk` +/// interface). #[derive(Default, Debug)] pub struct DmlManager { - table_readers: RwLock>>, + table_readers: RwLock>, } impl DmlManager { @@ -44,57 +52,206 @@ impl DmlManager { } } + /// Register a new DML reader for a table. If the reader for this version of the table already + /// exists, returns a reference to the existing reader. pub fn register_reader( &self, table_id: TableId, + table_version_id: TableVersionId, column_descs: &[ColumnDesc], - ) -> Result { + ) -> Result { let mut table_readers = self.table_readers.write(); // Clear invalid table readers. - table_readers.drain_filter(|_, weak_ref| weak_ref.strong_count() == 0); - - match table_readers.entry(table_id) { - Entry::Occupied(o) => o.get().upgrade().ok_or_else(|| { - InternalError(format!( - "fail to register reader for table with id {:?}", - table_id.table_id - )) - .into() - }), - Entry::Vacant(v) => { - let reader = Arc::new(TableSource::new(column_descs.to_vec())); - v.insert(Arc::downgrade(&reader)); - Ok(reader) - } + table_readers.drain_filter(|_, r| r.handle.strong_count() == 0); + + macro_rules! new_handle { + ($entry:ident) => {{ + let handle = Arc::new(TableDmlHandle::new(column_descs.to_vec())); + $entry.insert(TableReader { + version_id: table_version_id, + handle: Arc::downgrade(&handle), + }); + handle + }}; } + + let handle = match table_readers.entry(table_id) { + // Create a new reader. This happens when the first `DmlExecutor` of this table is + // activated on this compute node. + Entry::Vacant(v) => new_handle!(v), + + Entry::Occupied(mut o) => { + let TableReader { version_id, handle } = o.get(); + + match table_version_id.cmp(version_id) { + // This should never happen as the schema change is guaranteed to happen after a + // table is successfully created and all the readers are registered. + Ordering::Less => unreachable!("table version `{table_version_id}` expired"), + + // Register with the correct version. This happens when the following + // `DmlExecutor`s of this table is activated on this compute + // node. + Ordering::Equal => handle.upgrade().with_context(|| { + format!("fail to register reader for table with key `{table_id:?}`") + })?, + + // A new version of the table is activated, overwrite the old reader. + Ordering::Greater => new_handle!(o), + } + } + }; + + Ok(handle) } - pub fn write_chunk( + pub async fn write_chunk( &self, - table_id: &TableId, + table_id: TableId, + table_version_id: TableVersionId, chunk: StreamChunk, ) -> Result> { - let table_readers = self.table_readers.read(); - let writer = table_readers - .get(table_id) - .ok_or_else(|| { - InternalError(format!( - "no reader for dml in table with id {:?}", - table_id.table_id - )) - })? - .upgrade() - .ok_or_else(|| { - InternalError(format!( - "no reader for dml in table with id {:?}", - table_id.table_id - )) - })?; - writer.write_chunk(chunk) + let handle = { + let table_readers = self.table_readers.read(); + + match table_readers.get(&table_id) { + Some(TableReader { version_id, handle }) => { + match table_version_id.cmp(version_id) { + // A new version of the table is activated, but the DML request is still on + // the old version. + Ordering::Less => { + bail!("schema changed for table `{table_id:?}`, please retry later") + } + + // Write the chunk of correct version to the table. + Ordering::Equal => handle.upgrade(), + + // This should never happen as the notification of the new version is + // guaranteed to happen after all new readers are activated. + Ordering::Greater => { + unreachable!("table version `{table_version_id} not registered") + } + } + } + None => None, + } + } + .with_context(|| format!("no reader for dml in table `{table_id:?}`"))?; + + handle.write_chunk(chunk).await } pub fn clear(&self) { self.table_readers.write().clear() } } + +#[cfg(test)] +mod tests { + use futures::FutureExt; + use risingwave_common::catalog::INITIAL_TABLE_VERSION_ID; + use risingwave_common::test_prelude::StreamChunkTestExt; + use risingwave_common::types::DataType; + + use super::*; + + #[easy_ext::ext(DmlManagerTestExt)] + impl DmlManager { + /// Write a chunk and assert that the chunk channel is not blocking. + pub fn write_chunk_ready( + &self, + table_id: TableId, + table_version_id: TableVersionId, + chunk: StreamChunk, + ) -> Result> { + self.write_chunk(table_id, table_version_id, chunk) + .now_or_never() + .unwrap() + } + } + + #[test] + fn test_register_and_drop() { + let dml_manager = DmlManager::new(); + let table_id = TableId::new(1); + let table_version_id = INITIAL_TABLE_VERSION_ID; + let column_descs = vec![ColumnDesc::unnamed(100.into(), DataType::Float64)]; + let chunk = || StreamChunk::from_pretty("F\n+ 1"); + + let h1 = dml_manager + .register_reader(table_id, table_version_id, &column_descs) + .unwrap(); + let h2 = dml_manager + .register_reader(table_id, table_version_id, &column_descs) + .unwrap(); + + // They should be the same handle. + assert!(Arc::ptr_eq(&h1, &h2)); + + // Start reading. + let r1 = h1.stream_reader(); + let r2 = h2.stream_reader(); + + // Should be able to write to the table. + dml_manager + .write_chunk_ready(table_id, table_version_id, chunk()) + .unwrap(); + + // After dropping one reader, the other one should still be able to write. + // This is to simulate the scale-in of DML executors. + drop(r1); + dml_manager + .write_chunk_ready(table_id, table_version_id, chunk()) + .unwrap(); + + // After dropping the last reader, no more writes are allowed. + // This is to simulate the dropping of the table. + drop(r2); + dml_manager + .write_chunk_ready(table_id, table_version_id, chunk()) + .unwrap_err(); + } + + #[test] + fn test_versioned() { + let dml_manager = DmlManager::new(); + let table_id = TableId::new(1); + + let old_version_id = INITIAL_TABLE_VERSION_ID; + let old_column_descs = vec![ColumnDesc::unnamed(100.into(), DataType::Float64)]; + let old_chunk = || StreamChunk::from_pretty("F\n+ 1"); + + let new_version_id = old_version_id + 1; + let new_column_descs = vec![ + ColumnDesc::unnamed(100.into(), DataType::Float64), + ColumnDesc::unnamed(101.into(), DataType::Float64), + ]; + let new_chunk = || StreamChunk::from_pretty("F F\n+ 1 2"); + + // Start reading. + let old_h = dml_manager + .register_reader(table_id, old_version_id, &old_column_descs) + .unwrap(); + let _old_r = old_h.stream_reader(); + + // Should be able to write to the table. + dml_manager + .write_chunk_ready(table_id, old_version_id, old_chunk()) + .unwrap(); + + // Start reading the new version. + let new_h = dml_manager + .register_reader(table_id, new_version_id, &new_column_descs) + .unwrap(); + let _new_r = new_h.stream_reader(); + + // Should not be able to write to the old version. + dml_manager + .write_chunk_ready(table_id, old_version_id, old_chunk()) + .unwrap_err(); + // Should be able to write to the new version. + dml_manager + .write_chunk_ready(table_id, new_version_id, new_chunk()) + .unwrap(); + } +} diff --git a/src/source/src/fs_connector_source.rs b/src/source/src/fs_connector_source.rs index eb17e59ce017a..ff5b3dd7616d7 100644 --- a/src/source/src/fs_connector_source.rs +++ b/src/source/src/fs_connector_source.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,25 +19,22 @@ use risingwave_common::catalog::ColumnId; use risingwave_common::error::ErrorCode::ConnectorError; use risingwave_common::error::{internal_error, Result, RwError}; use risingwave_connector::parser::{CommonParserConfig, ParserConfig, SpecificParserConfig}; -use risingwave_connector::source::{ConnectorProperties, ConnectorState, SplitReaderV2Impl}; -use risingwave_connector::{SourceColumnDesc, SourceFormat}; - -use crate::connector_source::SourceContext; -use crate::monitor::SourceMetrics; +use risingwave_connector::source::monitor::SourceMetrics; +use risingwave_connector::source::{ + ConnectorProperties, ConnectorState, SourceColumnDesc, SourceInfo, SplitReaderImpl, +}; #[derive(Clone, Debug)] pub struct FsConnectorSource { pub config: ConnectorProperties, pub columns: Vec, pub properties: HashMap, - pub format: SourceFormat, pub parser_config: SpecificParserConfig, } impl FsConnectorSource { #[allow(clippy::too_many_arguments)] pub fn new( - format: SourceFormat, properties: HashMap, columns: Vec, connector_node_addr: Option, @@ -55,7 +52,6 @@ impl FsConnectorSource { config, columns, properties, - format, parser_config, }) } @@ -82,20 +78,19 @@ impl FsConnectorSource { &self, state: ConnectorState, column_ids: Vec, - _metrics: Arc, - _context: SourceContext, - ) -> Result { + metrics: Arc, + source_info: SourceInfo, + ) -> Result { let config = self.config.clone(); let columns = self.get_target_columns(column_ids)?; let parser_config = ParserConfig { specific: self.parser_config.clone(), common: CommonParserConfig { - props: self.properties.clone(), rw_columns: columns, }, }; - SplitReaderV2Impl::create(config, state, parser_config, None) + SplitReaderImpl::create(config, state, parser_config, metrics, source_info, None) .await .map_err(RwError::from) } diff --git a/src/source/src/lib.rs b/src/source/src/lib.rs index cd42d1bf40c22..d9b0725e55388 100644 --- a/src/source/src/lib.rs +++ b/src/source/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #![feature(generators)] #![feature(hash_drain_filter)] #![feature(type_alias_impl_trait)] +#![feature(box_patterns)] pub use table::*; @@ -28,8 +29,8 @@ pub mod dml_manager; mod common; pub mod connector_source; -pub use connector_source::test_utils as connector_test_utils; +pub mod source_desc; +pub use source_desc::test_utils as connector_test_utils; pub mod fs_connector_source; -pub mod monitor; pub mod row_id; mod table; diff --git a/src/source/src/row_id.rs b/src/source/src/row_id.rs index 03c758008d9ea..f3cdc7f5bb62f 100644 --- a/src/source/src/row_id.rs +++ b/src/source/src/row_id.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/source/src/source_desc.rs b/src/source/src/source_desc.rs new file mode 100644 index 0000000000000..fe6fb766cc832 --- /dev/null +++ b/src/source/src/source_desc.rs @@ -0,0 +1,234 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::sync::Arc; + +use risingwave_common::catalog::ColumnDesc; +use risingwave_common::error::ErrorCode::ProtocolError; +use risingwave_common::error::Result; +use risingwave_connector::parser::SpecificParserConfig; +use risingwave_connector::source::monitor::SourceMetrics; +use risingwave_connector::source::{SourceColumnDesc, SourceFormat}; +use risingwave_connector::ConnectorParams; +use risingwave_pb::catalog::{ + ColumnIndex as ProstColumnIndex, StreamSourceInfo as ProstStreamSourceInfo, +}; +use risingwave_pb::plan_common::{ + ColumnCatalog as ProstColumnCatalog, RowFormatType as ProstRowFormatType, +}; + +use crate::connector_source::ConnectorSource; +use crate::fs_connector_source::FsConnectorSource; + +pub const DEFAULT_CONNECTOR_MESSAGE_BUFFER_SIZE: usize = 16; + +/// `SourceDesc` describes a stream source. +#[derive(Debug)] +pub struct SourceDesc { + pub source: ConnectorSource, + pub format: SourceFormat, + pub columns: Vec, + pub metrics: Arc, + pub pk_column_ids: Vec, +} + +/// `FsSourceDesc` describes a stream source. +#[derive(Debug)] +pub struct FsSourceDesc { + pub source: FsConnectorSource, + pub format: SourceFormat, + pub columns: Vec, + pub metrics: Arc, + pub pk_column_ids: Vec, +} + +#[derive(Clone)] +pub struct SourceDescBuilder { + columns: Vec, + metrics: Arc, + pk_column_ids: Vec, + row_id_index: Option, + properties: HashMap, + source_info: ProstStreamSourceInfo, + connector_params: ConnectorParams, + connector_message_buffer_size: usize, +} + +impl SourceDescBuilder { + #[allow(clippy::too_many_arguments)] + pub fn new( + columns: Vec, + metrics: Arc, + pk_column_ids: Vec, + row_id_index: Option, + properties: HashMap, + source_info: ProstStreamSourceInfo, + connector_params: ConnectorParams, + connector_message_buffer_size: usize, + ) -> Self { + Self { + columns, + metrics, + pk_column_ids, + row_id_index, + properties, + source_info, + connector_params, + connector_message_buffer_size, + } + } + + pub async fn build(self) -> Result { + let format = match self.source_info.get_row_format()? { + ProstRowFormatType::Json => SourceFormat::Json, + ProstRowFormatType::Protobuf => SourceFormat::Protobuf, + ProstRowFormatType::DebeziumJson => SourceFormat::DebeziumJson, + ProstRowFormatType::Avro => SourceFormat::Avro, + ProstRowFormatType::Maxwell => SourceFormat::Maxwell, + ProstRowFormatType::CanalJson => SourceFormat::CanalJson, + ProstRowFormatType::Native => SourceFormat::Native, + ProstRowFormatType::DebeziumAvro => SourceFormat::DebeziumAvro, + _ => unreachable!(), + }; + + if format == SourceFormat::Protobuf && self.source_info.row_schema_location.is_empty() { + return Err(ProtocolError("protobuf file location not provided".to_string()).into()); + } + + let mut columns: Vec<_> = self + .columns + .iter() + .map(|c| SourceColumnDesc::from(&ColumnDesc::from(c.column_desc.as_ref().unwrap()))) + .collect(); + if let Some(row_id_index) = self.row_id_index.as_ref() { + columns[row_id_index.index as usize].is_row_id = true; + } + assert!( + !self.pk_column_ids.is_empty(), + "source should have at least one pk column" + ); + + let psrser_config = + SpecificParserConfig::new(format, &self.source_info, &self.properties).await?; + + let source = ConnectorSource::new( + self.properties, + columns.clone(), + self.connector_params.connector_rpc_endpoint, + self.connector_message_buffer_size, + psrser_config, + )?; + + Ok(SourceDesc { + source, + format, + columns, + metrics: self.metrics, + pk_column_ids: self.pk_column_ids, + }) + } + + pub fn metrics(&self) -> Arc { + self.metrics.clone() + } + + pub async fn build_fs_source_desc(&self) -> Result { + let format = match self.source_info.get_row_format()? { + ProstRowFormatType::Csv => SourceFormat::Csv, + _ => unreachable!(), + }; + + let mut columns: Vec<_> = self + .columns + .iter() + .map(|c| SourceColumnDesc::from(&ColumnDesc::from(c.column_desc.as_ref().unwrap()))) + .collect(); + + if let Some(row_id_index) = self.row_id_index.as_ref() { + columns[row_id_index.index as usize].is_row_id = true; + } + + assert!( + !self.pk_column_ids.is_empty(), + "source should have at least one pk column" + ); + + let parser_config = + SpecificParserConfig::new(format, &self.source_info, &self.properties).await?; + + let source = FsConnectorSource::new( + self.properties.clone(), + columns.clone(), + self.connector_params.connector_rpc_endpoint.clone(), + parser_config, + )?; + + Ok(FsSourceDesc { + source, + format, + columns, + metrics: self.metrics.clone(), + pk_column_ids: self.pk_column_ids.clone(), + }) + } +} + +pub mod test_utils { + use std::collections::HashMap; + + use risingwave_common::catalog::{ColumnDesc, ColumnId, Schema}; + use risingwave_pb::catalog::{ColumnIndex, StreamSourceInfo}; + use risingwave_pb::plan_common::ColumnCatalog; + + use super::{SourceDescBuilder, DEFAULT_CONNECTOR_MESSAGE_BUFFER_SIZE}; + + pub fn create_source_desc_builder( + schema: &Schema, + pk_column_ids: Vec, + row_id_index: Option, + source_info: StreamSourceInfo, + properties: HashMap, + ) -> SourceDescBuilder { + let row_id_index = row_id_index.map(|index| ColumnIndex { index }); + let columns = schema + .fields + .iter() + .enumerate() + .map(|(i, f)| ColumnCatalog { + column_desc: Some( + ColumnDesc { + data_type: f.data_type.clone(), + column_id: ColumnId::from(i as i32), // use column index as column id + name: f.name.clone(), + field_descs: vec![], + type_name: "".to_string(), + } + .to_protobuf(), + ), + is_hidden: false, + }) + .collect(); + SourceDescBuilder { + columns, + metrics: Default::default(), + pk_column_ids, + row_id_index, + properties, + source_info, + connector_params: Default::default(), + connector_message_buffer_size: DEFAULT_CONNECTOR_MESSAGE_BUFFER_SIZE, + } + } +} diff --git a/src/source/src/table.rs b/src/source/src/table.rs index ac4c722d1a4a5..e479868fc9de1 100644 --- a/src/source/src/table.rs +++ b/src/source/src/table.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,44 +15,50 @@ use std::sync::Arc; use anyhow::Context; +use futures::FutureExt; use futures_async_stream::try_stream; -use parking_lot::{RwLock, RwLockUpgradableReadGuard}; +use parking_lot::RwLock; use rand::seq::IteratorRandom; use risingwave_common::array::StreamChunk; -use risingwave_common::catalog::{ColumnDesc, ColumnId}; +use risingwave_common::catalog::ColumnDesc; use risingwave_common::error::{Result, RwError}; -use risingwave_connector::StreamChunkWithState; +use risingwave_connector::source::StreamChunkWithState; use tokio::sync::mpsc::error::SendError; use tokio::sync::{mpsc, oneshot}; -pub type TableSourceRef = Arc; +pub type TableDmlHandleRef = Arc; #[derive(Debug)] -struct TableSourceCore { +struct TableDmlHandleCore { /// The senders of the changes channel. /// /// When a `StreamReader` is created, a channel will be created and the sender will be /// saved here. The insert statement will take one channel randomly. - changes_txs: Vec)>>, + changes_txs: Vec)>>, } -/// [`TableSource`] is a special internal source to handle table updates from user, +/// The buffer size of the channel between each [`TableDmlHandle`] and the source executors. +// TODO: decide a default value carefully and make this configurable. +const DML_CHUNK_BUFFER_SIZE: usize = 32; + +/// [`TableDmlHandle`] is a special internal source to handle table updates from user, /// including insert/delete/update statements via SQL interface. /// /// Changed rows will be send to the associated "materialize" streaming task, then be written to the -/// state store. Therefore, [`TableSource`] can be simply be treated as a channel without side +/// state store. Therefore, [`TableDmlHandle`] can be simply be treated as a channel without side /// effects. #[derive(Debug)] -pub struct TableSource { - core: RwLock, +pub struct TableDmlHandle { + core: RwLock, /// All columns in this table. + #[allow(dead_code)] column_descs: Vec, } -impl TableSource { +impl TableDmlHandle { pub fn new(column_descs: Vec) -> Self { - let core = TableSourceCore { + let core = TableDmlHandleCore { changes_txs: vec![], }; @@ -62,28 +68,36 @@ impl TableSource { } } + pub fn stream_reader(&self) -> TableStreamReader { + let mut core = self.core.write(); + let (tx, rx) = mpsc::channel(DML_CHUNK_BUFFER_SIZE); + core.changes_txs.push(tx); + + TableStreamReader { rx } + } + /// Asynchronously write stream chunk into table. Changes written here will be simply passed to /// the associated streaming task via channel, and then be materialized to storage there. /// /// Returns an oneshot channel which will be notified when the chunk is taken by some reader, /// and the `usize` represents the cardinality of this chunk. - pub fn write_chunk(&self, mut chunk: StreamChunk) -> Result> { + pub async fn write_chunk(&self, mut chunk: StreamChunk) -> Result> { loop { - let core = self.core.upgradable_read(); - // The `changes_txs` should not be empty normally, since we ensured that the channels - // between the `TableSource` and the `SourceExecutor`s are ready before we making the + // between the `TableDmlHandle` and the `SourceExecutor`s are ready before we making the // table catalog visible to the users. However, when we're recovering, it's possible // that the streaming executors are not ready when the frontend is able to schedule DML // tasks to the compute nodes, so this'll be temporarily unavailable, so we throw an // error instead of asserting here. // TODO: may reject DML when streaming executors are not recovered. - let (index, tx) = core + let tx = self + .core + .read() .changes_txs .iter() - .enumerate() .choose(&mut rand::thread_rng()) - .context("no available table reader in streaming source executors")?; + .context("no available table reader in streaming source executors")? + .clone(); #[cfg(debug_assertions)] risingwave_common::util::schema_check::schema_check( @@ -94,24 +108,31 @@ impl TableSource { let (notifier_tx, notifier_rx) = oneshot::channel(); - match tx.send((chunk, notifier_tx)) { + match tx.send((chunk, notifier_tx)).await { Ok(_) => return Ok(notifier_rx), // It's possible that the source executor is scaled in or migrated, so the channel // is closed. In this case, we should remove the closed channel and retry. Err(SendError((chunk_, _))) => { - tracing::info!("find one closed table source channel, remove it and retry"); - + tracing::info!("find one closed table source channel, retry"); chunk = chunk_; - RwLockUpgradableReadGuard::upgrade(core) - .changes_txs - .swap_remove(index); + + // Remove all closed channels. + self.core.write().changes_txs.retain(|tx| !tx.is_closed()); } } } } } +#[easy_ext::ext(TableDmlHandleTestExt)] +impl TableDmlHandle { + /// Write a chunk and assert that the chunk channel is not blocking. + fn write_chunk_ready(&self, chunk: StreamChunk) -> Result> { + self.write_chunk(chunk).now_or_never().unwrap() + } +} + /// [`TableStreamReader`] reads changes from a certain table continuously. /// This struct should be only used for associated materialize task, thus the reader should be /// created only once. Further streaming task relying on this table source should follow the @@ -119,73 +140,18 @@ impl TableSource { #[derive(Debug)] pub struct TableStreamReader { /// The receiver of the changes channel. - rx: mpsc::UnboundedReceiver<(StreamChunk, oneshot::Sender)>, - - /// Mappings from the source column to the column to be read. - column_indices: Vec, + rx: mpsc::Receiver<(StreamChunk, oneshot::Sender)>, } impl TableStreamReader { #[try_stream(boxed, ok = StreamChunkWithState, error = RwError)] pub async fn into_stream(mut self) { while let Some((chunk, notifier)) = self.rx.recv().await { - let (ops, columns, bitmap) = chunk.into_inner(); - - let selected_columns = self - .column_indices - .iter() - .map(|i| columns[*i].clone()) - .collect(); - let chunk = StreamChunk::new(ops, selected_columns, bitmap); - // Notify about that we've taken the chunk. _ = notifier.send(chunk.cardinality()); - yield chunk.into(); } } - - #[try_stream(boxed, ok = StreamChunk, error = RwError)] - pub async fn into_stream_v2(mut self) { - while let Some((chunk, notifier)) = self.rx.recv().await { - // Notify about that we've taken the chunk. - _ = notifier.send(chunk.cardinality()); - yield chunk; - } - } -} - -impl TableSource { - /// Create a new stream reader. - #[expect(clippy::unused_async)] - pub async fn stream_reader(&self, column_ids: Vec) -> Result { - let column_indices = column_ids - .into_iter() - .map(|id| { - self.column_descs - .iter() - .position(|c| c.column_id == id) - .expect("column id not exists") - }) - .collect(); - - let mut core = self.core.write(); - let (tx, rx) = mpsc::unbounded_channel(); - core.changes_txs.push(tx); - - Ok(TableStreamReader { rx, column_indices }) - } - - pub fn stream_reader_v2(&self) -> TableStreamReader { - let mut core = self.core.write(); - let (tx, rx) = mpsc::unbounded_channel(); - core.changes_txs.push(tx); - - TableStreamReader { - rx, - column_indices: Default::default(), - } - } } #[cfg(test)] @@ -196,25 +162,23 @@ mod tests { use futures::StreamExt; use itertools::Itertools; use risingwave_common::array::{Array, I64Array, Op}; + use risingwave_common::catalog::ColumnId; use risingwave_common::column_nonnull; use risingwave_common::types::DataType; use super::*; - fn new_source() -> TableSource { - TableSource::new(vec![ColumnDesc::unnamed( + fn new_source() -> TableDmlHandle { + TableDmlHandle::new(vec![ColumnDesc::unnamed( ColumnId::from(0), DataType::Int64, )]) } #[tokio::test] - async fn test_table_source() -> Result<()> { + async fn test_table_dml_handle() -> Result<()> { let source = Arc::new(new_source()); - let mut reader = source - .stream_reader(vec![ColumnId::from(0)]) - .await? - .into_stream(); + let mut reader = source.stream_reader().into_stream(); macro_rules! write_chunk { ($i:expr) => {{ @@ -224,7 +188,7 @@ mod tests { vec![column_nonnull!(I64Array, [$i])], None, ); - source.write_chunk(chunk).unwrap(); + source.write_chunk_ready(chunk).unwrap(); }}; } diff --git a/src/sqlparser/Cargo.toml b/src/sqlparser/Cargo.toml index 31eeb6ced5adb..b7e0578473288 100644 --- a/src/sqlparser/Cargo.toml +++ b/src/sqlparser/Cargo.toml @@ -17,16 +17,16 @@ path = "src/lib.rs" [features] default = ["std"] std = [] -# Enable JSON output in the `cli` example: -json_example = ["serde_json", "serde"] + +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] [dependencies] itertools = "0.10" serde = { version = "1.0", features = ["derive"], optional = true } -# serde_json is only used in examples/cli, but we have to put it outside -# of dev-dependencies because of -# https://github.com/rust-lang/cargo/issues/1596 -serde_json = { version = "1.0", optional = true } tracing = "0.1" [target.'cfg(not(madsim))'.dependencies] diff --git a/src/sqlparser/examples/parse.rs b/src/sqlparser/examples/parse.rs index 8393b8646bbc6..73587757de75f 100644 --- a/src/sqlparser/examples/parse.rs +++ b/src/sqlparser/examples/parse.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/sqlparser/fuzz/Cargo.toml b/src/sqlparser/fuzz/Cargo.toml index 72ab86ef6044d..d53162b86059e 100644 --- a/src/sqlparser/fuzz/Cargo.toml +++ b/src/sqlparser/fuzz/Cargo.toml @@ -4,9 +4,14 @@ version = "0.1.0" edition = "2018" publish = false +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] honggfuzz = "0.5.54" -sqlparser = { path = ".." } # Prevent this from interfering with workspaces [workspace] diff --git a/src/sqlparser/fuzz/fuzz_targets/fuzz_parse_sql.rs b/src/sqlparser/fuzz/fuzz_targets/fuzz_parse_sql.rs index 97dc8d81047c3..ba0ce5f0f5b55 100644 --- a/src/sqlparser/fuzz/fuzz_targets/fuzz_parse_sql.rs +++ b/src/sqlparser/fuzz/fuzz_targets/fuzz_parse_sql.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/sqlparser/sqlparser_bench/Cargo.toml b/src/sqlparser/sqlparser_bench/Cargo.toml index 512fd5f578645..73a752280d6d9 100644 --- a/src/sqlparser/sqlparser_bench/Cargo.toml +++ b/src/sqlparser/sqlparser_bench/Cargo.toml @@ -4,6 +4,12 @@ version = "0.1.0" authors = ["Dandandan "] edition = "2018" +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] risingwave_sqlparser = { path = "../" } diff --git a/src/sqlparser/src/ast/ddl.rs b/src/sqlparser/src/ast/ddl.rs index 587661b56894c..21f24bb67e4cb 100644 --- a/src/sqlparser/src/ast/ddl.rs +++ b/src/sqlparser/src/ast/ddl.rs @@ -175,6 +175,21 @@ impl fmt::Display for AlterColumnOperation { } } +/// The watermark on source. +/// `WATERMARK FOR AS ()` +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct SourceWatermark { + pub column: Ident, + pub expr: Expr, +} + +impl fmt::Display for SourceWatermark { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "WATERMARK FOR {} AS {}", self.column, self.expr,) + } +} + /// A table-level constraint, specified in a `CREATE TABLE` or an /// `ALTER TABLE ADD ` statement. #[derive(Debug, Clone, PartialEq, Eq, Hash)] diff --git a/src/sqlparser/src/ast/mod.rs b/src/sqlparser/src/ast/mod.rs index 5457a77531975..b5526de729442 100644 --- a/src/sqlparser/src/ast/mod.rs +++ b/src/sqlparser/src/ast/mod.rs @@ -33,7 +33,7 @@ use serde::{Deserialize, Serialize}; pub use self::data_type::{DataType, StructField}; pub use self::ddl::{ AlterColumnOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef, - ReferentialAction, TableConstraint, + ReferentialAction, SourceWatermark, TableConstraint, }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ @@ -191,6 +191,28 @@ impl From> for ObjectName { } } +/// For array type `ARRAY[..]` or `[..]` +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Array { + /// The list of expressions between brackets + pub elem: Vec, + + /// `true` for `ARRAY[..]`, `false` for `[..]` + pub named: bool, +} + +impl fmt::Display for Array { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}[{}]", + if self.named { "ARRAY" } else { "" }, + display_comma_separated(&self.elem) + ) + } +} + /// An SQL expression of any type. /// /// The parser does not distinguish between expressions of different types @@ -344,12 +366,13 @@ pub enum Expr { Row(Vec), /// The `ARRAY` expr. Alternative syntax for `ARRAY` is by utilizing curly braces, /// e.g. {1, 2, 3}, - Array(Vec), + Array(Array), /// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]` ArrayIndex { obj: Box, index: Box }, } impl fmt::Display for Expr { + #[expect(clippy::disallowed_methods, reason = "use zip_eq")] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Expr::Identifier(s) => write!(f, "{}", s), @@ -396,20 +419,14 @@ impl fmt::Display for Expr { low, high ), - Expr::BinaryOp { left, op, right } => write!( - f, - "{} {} {}", - fmt_expr_with_paren(left), - op, - fmt_expr_with_paren(right) - ), + Expr::BinaryOp { left, op, right } => write!(f, "{} {} {}", left, op, right), Expr::SomeOp(expr) => write!(f, "SOME({})", expr), Expr::AllOp(expr) => write!(f, "ALL({})", expr), Expr::UnaryOp { op, expr } => { if op == &UnaryOperator::PGPostfixFactorial { write!(f, "{}{}", expr, op) } else { - write!(f, "{} {}", op, fmt_expr_with_paren(expr)) + write!(f, "{} {}", op, expr) } } Expr::Cast { expr, data_type } => write!(f, "CAST({} AS {})", expr, data_type), @@ -543,38 +560,11 @@ impl fmt::Display for Expr { write!(f, "{}[{}]", obj, index)?; Ok(()) } - Expr::Array(exprs) => write!( - f, - "ARRAY[{}]", - exprs - .iter() - .map(|v| v.to_string()) - .collect::>() - .as_slice() - .join(", ") - ), + Expr::Array(exprs) => write!(f, "{}", exprs), } } } -/// Wrap complex expressions with necessary parentheses. -/// For example, `a > b LIKE c` becomes `a > (b LIKE c)`. -fn fmt_expr_with_paren(e: &Expr) -> String { - use Expr as E; - match e { - E::BinaryOp { .. } - | E::UnaryOp { .. } - | E::IsNull(_) - | E::IsNotNull(_) - | E::IsFalse(_) - | E::IsTrue(_) - | E::IsNotTrue(_) - | E::IsNotFalse(_) => return format!("({})", e), - _ => {} - }; - format!("{}", e) -} - /// A window specification (i.e. `OVER (PARTITION BY .. ORDER BY .. etc.)`) #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -916,6 +906,7 @@ pub enum Statement { name: ObjectName, columns: Vec, query: Box, + emit_mode: Option, with_options: Vec, }, /// CREATE TABLE @@ -1085,6 +1076,11 @@ pub enum Statement { CreateUser(CreateUserStatement), /// ALTER USER AlterUser(AlterUserStatement), + /// ALTER SYSTEM SET configuration_parameter { TO | = } { value | 'value' | DEFAULT } + AlterSystem { + param: Ident, + value: SetVariableValue, + }, /// FLUSH the current barrier. /// /// Note: RisingWave specific statement. @@ -1246,6 +1242,7 @@ impl fmt::Display for Statement { query, materialized, with_options, + emit_mode, } => { write!( f, @@ -1254,6 +1251,9 @@ impl fmt::Display for Statement { materialized = if *materialized { "MATERIALIZED " } else { "" }, name = name )?; + if let Some(emit_mode) = emit_mode { + write!(f, " EMIT {}", emit_mode)?; + } if !with_options.is_empty() { write!(f, " WITH ({})", display_comma_separated(with_options))?; } @@ -1517,6 +1517,13 @@ impl fmt::Display for Statement { Statement::AlterUser(statement) => { write!(f, "ALTER USER {}", statement) } + Statement::AlterSystem{param, value} => { + f.write_str("ALTER SYSTEM SET ")?; + write!( + f, + "{param} = {value}", + ) + } Statement::Flush => { write!(f, "FLUSH") } @@ -1918,6 +1925,22 @@ impl fmt::Display for SqlOption { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum EmitMode { + Immediately, + OnWindowClose, +} + +impl fmt::Display for EmitMode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(match self { + EmitMode::Immediately => "IMMEDIATELY", + EmitMode::OnWindowClose => "ON WINDOW CLOSE", + }) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum TransactionMode { @@ -2173,6 +2196,7 @@ impl fmt::Display for CreateFunctionBody { pub enum SetVariableValue { Ident(Ident), Literal(Value), + Default, } impl fmt::Display for SetVariableValue { @@ -2181,6 +2205,7 @@ impl fmt::Display for SetVariableValue { match self { Ident(ident) => write!(f, "{}", ident), Literal(literal) => write!(f, "{}", literal), + Default => write!(f, "DEFAULT"), } } } @@ -2298,4 +2323,25 @@ mod tests { }; assert_eq!("v1[1][1]", format!("{}", array_index2)); } + + #[test] + /// issue: https://github.com/risingwavelabs/risingwave/issues/7635 + fn test_nested_op_display() { + let binary_op = Expr::BinaryOp { + left: Box::new(Expr::Value(Value::Boolean(true))), + op: BinaryOperator::Or, + right: Box::new(Expr::IsNotFalse(Box::new(Expr::Value(Value::Boolean( + true, + ))))), + }; + assert_eq!("true OR true IS NOT FALSE", format!("{}", binary_op)); + + let unary_op = Expr::UnaryOp { + op: UnaryOperator::Not, + expr: Box::new(Expr::IsNotFalse(Box::new(Expr::Value(Value::Boolean( + true, + ))))), + }; + assert_eq!("NOT true IS NOT FALSE", format!("{}", unary_op)); + } } diff --git a/src/sqlparser/src/ast/statement.rs b/src/sqlparser/src/ast/statement.rs index 1028d414ee969..5401054a0d22f 100644 --- a/src/sqlparser/src/ast/statement.rs +++ b/src/sqlparser/src/ast/statement.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,11 +13,13 @@ // limitations under the License. use core::fmt; +use std::fmt::Write; use itertools::Itertools; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +use super::ddl::SourceWatermark; use super::{Ident, ObjectType, Query}; use crate::ast::{ display_comma_separated, display_separated, ColumnDef, ObjectName, SqlOption, TableConstraint, @@ -68,6 +70,7 @@ macro_rules! impl_fmt_display { // with_properties: AstOption, // [Keyword::ROW, Keyword::FORMAT], // source_schema: SourceSchema, +// [Keyword::WATERMARK, Keyword::FOR] column [Keyword::AS] // }); #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -78,6 +81,7 @@ pub struct CreateSourceStatement { pub source_name: ObjectName, pub with_properties: WithProperties, pub source_schema: SourceSchema, + pub source_watermarks: Vec, } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -91,6 +95,8 @@ pub enum SourceSchema { Maxwell, // Keyword::MAXWELL CanalJson, // Keyword::CANAL_JSON Csv(CsvInfo), // Keyword::CSV + Native, + DebeziumAvro(DebeziumAvroSchema), // Keyword::DEBEZIUM_AVRO } impl ParseTo for SourceSchema { @@ -112,6 +118,9 @@ impl ParseTo for SourceSchema { } else if p.parse_keywords(&[Keyword::CSV]) { impl_parse_to!(csv_info: CsvInfo, p); SourceSchema::Csv(csv_info) + } else if p.parse_keywords(&[Keyword::DEBEZIUM_AVRO]) { + impl_parse_to!(avro_schema: DebeziumAvroSchema, p); + SourceSchema::DebeziumAvro(avro_schema) } else { return Err(ParserError::ParserError( "expected JSON | PROTOBUF | DEBEZIUM_JSON | AVRO | MAXWELL | CANAL_JSON after ROW FORMAT".to_string(), @@ -130,7 +139,9 @@ impl fmt::Display for SourceSchema { SourceSchema::DebeziumJson => write!(f, "DEBEZIUM JSON"), SourceSchema::Avro(avro_schema) => write!(f, "AVRO {}", avro_schema), SourceSchema::CanalJson => write!(f, "CANAL JSON"), - SourceSchema::Csv(csv_ingo) => write!(f, "CSV {}", csv_ingo), + SourceSchema::Csv(csv_info) => write!(f, "CSV {}", csv_info), + SourceSchema::Native => write!(f, "NATIVE"), + SourceSchema::DebeziumAvro(avro_schema) => write!(f, "DEBEZIUM {}", avro_schema), } } } @@ -217,6 +228,51 @@ impl fmt::Display for AvroSchema { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct DebeziumAvroSchema { + pub row_schema_location: AstString, +} + +impl ParseTo for DebeziumAvroSchema { + fn parse_to(p: &mut Parser) -> Result { + impl_parse_to!( + [ + Keyword::ROW, + Keyword::SCHEMA, + Keyword::LOCATION, + Keyword::CONFLUENT, + Keyword::SCHEMA, + Keyword::REGISTRY + ], + p + ); + impl_parse_to!(row_schema_location: AstString, p); + Ok(Self { + row_schema_location, + }) + } +} + +impl fmt::Display for DebeziumAvroSchema { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut v: Vec = vec![]; + impl_fmt_display!( + [ + Keyword::ROW, + Keyword::SCHEMA, + Keyword::LOCATION, + Keyword::CONFLUENT, + Keyword::SCHEMA, + Keyword::REGISTRY + ], + v + ); + impl_fmt_display!(row_schema_location, v, self); + v.iter().join(" ").fmt(f) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct CsvInfo { @@ -264,21 +320,40 @@ impl ParseTo for CreateSourceStatement { impl_parse_to!(source_name: ObjectName, p); // parse columns - let (columns, constraints) = p.parse_columns()?; + let (columns, constraints, source_watermarks) = p.parse_columns_with_watermark()?; impl_parse_to!(with_properties: WithProperties, p); let option = with_properties .0 .iter() .find(|&opt| opt.name.real_value() == UPSTREAM_SOURCE_KEY); + let connector: String = option.map(|opt| opt.value.to_string()).unwrap_or_default(); // row format for cdc source must be debezium json - let source_schema = if let Some(opt) = option && opt.value.to_string().contains("-cdc") { + // row format for nexmark source must be native + // default row format for datagen source is native + let source_schema = if connector.contains("-cdc") { if p.peek_nth_any_of_keywords(0, &[Keyword::ROW]) && p.peek_nth_any_of_keywords(1, &[Keyword::FORMAT]) { return Err(ParserError::ParserError("Row format for cdc connectors should not be set here because it is limited to debezium json".to_string())); } SourceSchema::DebeziumJson + } else if connector.contains("nexmark") { + if p.peek_nth_any_of_keywords(0, &[Keyword::ROW]) + && p.peek_nth_any_of_keywords(1, &[Keyword::FORMAT]) + { + return Err(ParserError::ParserError("Row format for nexmark connectors should not be set here because it is limited to internal native format".to_string())); + } + SourceSchema::Native + } else if connector.contains("datagen") { + if p.peek_nth_any_of_keywords(0, &[Keyword::ROW]) + && p.peek_nth_any_of_keywords(1, &[Keyword::FORMAT]) + { + impl_parse_to!([Keyword::ROW, Keyword::FORMAT], p); + SourceSchema::parse_to(p)? + } else { + SourceSchema::Native + } } else { impl_parse_to!([Keyword::ROW, Keyword::FORMAT], p); SourceSchema::parse_to(p)? @@ -291,6 +366,7 @@ impl ParseTo for CreateSourceStatement { source_name, with_properties, source_schema, + source_watermarks, }) } } @@ -300,6 +376,36 @@ impl fmt::Display for CreateSourceStatement { let mut v: Vec = vec![]; impl_fmt_display!(if_not_exists => [Keyword::IF, Keyword::NOT, Keyword::EXISTS], v, self); impl_fmt_display!(source_name, v, self); + + // Items + let mut items = String::new(); + let has_items = !self.columns.is_empty() + || !self.constraints.is_empty() + || !self.source_watermarks.is_empty(); + has_items.then(|| write!(&mut items, "(")); + write!(&mut items, "{}", display_comma_separated(&self.columns))?; + if !self.columns.is_empty() + && (!self.constraints.is_empty() || !self.source_watermarks.is_empty()) + { + write!(&mut items, ", ")?; + } + write!(&mut items, "{}", display_comma_separated(&self.constraints))?; + if !self.columns.is_empty() + && !self.constraints.is_empty() + && !self.source_watermarks.is_empty() + { + write!(&mut items, ", ")?; + } + write!( + &mut items, + "{}", + display_comma_separated(&self.source_watermarks) + )?; + has_items.then(|| write!(&mut items, ")")); + if !items.is_empty() { + v.push(items); + } + impl_fmt_display!(with_properties, v, self); impl_fmt_display!([Keyword::ROW, Keyword::FORMAT], v); impl_fmt_display!(source_schema, v, self); diff --git a/src/sqlparser/src/keywords.rs b/src/sqlparser/src/keywords.rs index 4178b49fc4542..3a60e5e9eed98 100644 --- a/src/sqlparser/src/keywords.rs +++ b/src/sqlparser/src/keywords.rs @@ -173,6 +173,7 @@ define_keywords!( DATE, DAY, DEALLOCATE, + DEBEZIUM_AVRO, DEBEZIUM_JSON, DEC, DECIMAL, @@ -198,6 +199,7 @@ define_keywords!( EACH, ELEMENT, ELSE, + EMIT, ENCRYPTED, END, END_EXEC = "END-EXEC", @@ -250,6 +252,7 @@ define_keywords!( IF, IGNORE, ILIKE, + IMMEDIATELY, IMMUTABLE, IN, INCLUDE, @@ -306,6 +309,7 @@ define_keywords!( MONTH, MULTISET, NATIONAL, + NATIVE, NATURAL, NCHAR, NCLOB, @@ -526,6 +530,7 @@ define_keywords!( VIEWS, VIRTUAL, VOLATILE, + WATERMARK, WHEN, WHENEVER, WHERE, diff --git a/src/sqlparser/src/lib.rs b/src/sqlparser/src/lib.rs index 3b47984488551..988b218fd7f66 100644 --- a/src/sqlparser/src/lib.rs +++ b/src/sqlparser/src/lib.rs @@ -33,7 +33,6 @@ #![cfg_attr(not(feature = "std"), no_std)] #![feature(lint_reasons)] #![feature(let_chains)] -#![expect(clippy::derive_partial_eq_without_eq)] #![expect(clippy::doc_markdown)] #![expect(clippy::upper_case_acronyms)] diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index 998b7939b5f3d..b7563a34e8943 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -24,6 +24,7 @@ use core::fmt; use tracing::{debug, instrument}; +use crate::ast::ddl::SourceWatermark; use crate::ast::{ParseTo, *}; use crate::keywords::{self, Keyword}; use crate::tokenizer::*; @@ -114,6 +115,8 @@ impl fmt::Display for ParserError { #[cfg(feature = "std")] impl std::error::Error for ParserError {} +type ColumnsDefTuple = (Vec, Vec, Vec); + pub struct Parser { tokens: Vec, /// The index of the first unprocessed token in `self.tokens` @@ -121,12 +124,18 @@ pub struct Parser { /// Since we cannot distinguish `>>` and double `>`, so use `angle_brackets_num` to store the /// number of `<` to match `>` in sql like `struct>`. angle_brackets_num: i32, + /// It's important that already in named Array or not. so use this field check in or not. + /// Consider 0 is you're not in named Array. if more than 0 is you're in named Array + array_depth: usize, + /// We cannot know current array should be keep named or not, so by using this field store + /// every depth of array that should be keep named or not. + array_named_stack: Vec, } impl Parser { - const BETWEEN_PREC: u8 = 20; + const BETWEEN_PREC: u8 = 21; const PLUS_MINUS_PREC: u8 = 30; - const TIME_ZONE_PREC: u8 = 20; + const TIME_ZONE_PREC: u8 = 21; const UNARY_NOT_PREC: u8 = 15; /// Parse the specified tokens @@ -135,6 +144,8 @@ impl Parser { tokens, index: 0, angle_brackets_num: 0, + array_depth: 0, + array_named_stack: Vec::new(), } } @@ -236,6 +247,25 @@ impl Parser { Ok(Statement::Analyze { table_name }) } + /// Check is enter array expression. + pub fn peek_array_depth(&self) -> usize { + self.array_depth + } + + /// When enter specify ARRAY prefix expression. + pub fn increase_array_depth(&mut self, num: usize) { + self.array_depth += num; + } + + /// When exit specify ARRAY prefix expression. + pub fn decrease_array_depth(&mut self, num: usize) { + self.array_depth -= num; + } + + pub fn is_in_array(&self) -> bool { + self.peek_array_depth() > 0 + } + /// Tries to parse a wildcard expression. If it is not a wildcard, parses an expression. /// /// A wildcard expression either means: @@ -440,9 +470,10 @@ impl Parser { expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?), }), Keyword::ROW => self.parse_row_expr(), - Keyword::ARRAY => Ok(Expr::Array( - self.parse_token_wrapped_exprs(&Token::LBracket, &Token::RBracket)?, - )), + Keyword::ARRAY => { + self.expect_token(&Token::LBracket)?; + self.parse_array_expr(true) + } k if keywords::RESERVED_FOR_COLUMN_OR_TABLE_NAME.contains(&k) => { parser_err!(format!("syntax error at or near \"{w}\"")) } @@ -471,6 +502,9 @@ impl Parser { _ => Ok(Expr::Identifier(w.to_ident())), }, }, // End of Token::Word + + Token::LBracket if self.is_in_array() => self.parse_array_expr(false), + tok @ Token::Minus | tok @ Token::Plus => { let op = if tok == Token::Plus { UnaryOperator::Plus @@ -899,6 +933,49 @@ impl Parser { } } + /// Parses an array expression `[ex1, ex2, ..]` + /// if `named` is `true`, came from an expression like `ARRAY[ex1, ex2]` + pub fn parse_array_expr(&mut self, named: bool) -> Result { + self.increase_array_depth(1); + if self.array_named_stack.len() < self.peek_array_depth() { + self.array_named_stack.push(named); + } else if let Err(parse_err) = self.check_same_named_array(named) { + Err(parse_err)? + } + + if self.peek_token() == Token::RBracket { + let _ = self.next_token(); // consume ] + self.decrease_array_depth(1); + Ok(Expr::Array(Array { + elem: vec![], + named, + })) + } else { + let exprs = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RBracket)?; + if self.array_named_stack.len() > self.peek_array_depth() { + self.array_named_stack.pop(); + } + self.decrease_array_depth(1); + Ok(Expr::Array(Array { elem: exprs, named })) + } + } + + fn check_same_named_array(&mut self, current_named: bool) -> Result<(), ParserError> { + let previous_named = self.array_named_stack.last().unwrap(); + if current_named != *previous_named { + // for '[' + self.prev_token(); + if current_named { + // for keyword 'array' + self.prev_token(); + } + parser_err!(format!("syntax error at or near '{}'", self.peek_token()))? + } else { + Ok(()) + } + } + // This function parses date/time fields for interval qualifiers. pub fn parse_date_time_field(&mut self) -> Result { match self.next_token() { @@ -1580,6 +1657,11 @@ impl Parser { // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. let name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; + let emit_mode = if materialized { + self.parse_emit_mode()? + } else { + None + }; let with_options = self.parse_options(Keyword::WITH)?; self.expect_keyword(Keyword::AS)?; let query = Box::new(self.parse_query()?); @@ -1591,6 +1673,7 @@ impl Parser { materialized, or_replace, with_options, + emit_mode, }) } @@ -1830,22 +1913,37 @@ impl Parser { let option = with_options .iter() .find(|&opt| opt.name.real_value() == UPSTREAM_SOURCE_KEY); - let source_schema = if let Some(opt) = option { - // Table is created with an external connector. - if opt.value.to_string().contains("-cdc") { - // cdc connectors + let connector = option.map(|opt| opt.value.to_string()); + // row format for cdc source must be debezium json + // row format for nexmark source must be native + // default row format for datagen source is native + let source_schema = if let Some(connector) = connector { + if connector.contains("-cdc") { if self.peek_nth_any_of_keywords(0, &[Keyword::ROW]) && self.peek_nth_any_of_keywords(1, &[Keyword::FORMAT]) { return Err(ParserError::ParserError("Row format for cdc connectors should not be set here because it is limited to debezium json".to_string())); + } + Some(SourceSchema::DebeziumJson) + } else if connector.contains("nexmark") { + if self.peek_nth_any_of_keywords(0, &[Keyword::ROW]) + && self.peek_nth_any_of_keywords(1, &[Keyword::FORMAT]) + { + return Err(ParserError::ParserError("Row format for nexmark connectors should not be set here because it is limited to internal native format".to_string())); + } + Some(SourceSchema::Native) + } else if connector.contains("datagen") { + if self.peek_nth_any_of_keywords(0, &[Keyword::ROW]) + && self.peek_nth_any_of_keywords(1, &[Keyword::FORMAT]) + { + self.expect_keywords(&[Keyword::ROW, Keyword::FORMAT])?; + Some(SourceSchema::parse_to(self)?) } else { - Some(SourceSchema::DebeziumJson) + Some(SourceSchema::Native) } } else { - // non-cdc connectors - self - .expect_keywords(&[Keyword::ROW, Keyword::FORMAT]) - .map_err(|_| ParserError::ParserError("Please specify 'connector' in WITH clause to create a table with a connector".to_string()))?; + // other connectors + self.expect_keywords(&[Keyword::ROW, Keyword::FORMAT])?; Some(SourceSchema::parse_to(self)?) } } else { @@ -1874,15 +1972,36 @@ impl Parser { } pub fn parse_columns(&mut self) -> Result<(Vec, Vec), ParserError> { + let (column_refs, table_constraints, _) = self.parse_columns_inner(true)?; + Ok((column_refs, table_constraints)) + } + + pub fn parse_columns_with_watermark(&mut self) -> Result { + self.parse_columns_inner(true) + } + + fn parse_columns_inner( + &mut self, + with_watermark: bool, + ) -> Result { let mut columns = vec![]; let mut constraints = vec![]; + let mut watermarks = vec![]; if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { - return Ok((columns, constraints)); + return Ok((columns, constraints, watermarks)); } loop { if let Some(constraint) = self.parse_optional_table_constraint()? { constraints.push(constraint); + } else if with_watermark && let Some(watermark) = self.parse_optional_watermark()? { + watermarks.push(watermark); + if watermarks.len() > 1 { + // TODO(yuhao): allow multiple watermark on source. + return Err(ParserError::ParserError( + "Only 1 watermark is allowed to be defined on source.".to_string(), + )); + } } else if let Token::Word(_) = self.peek_token() { columns.push(self.parse_column_def()?); } else { @@ -1897,7 +2016,7 @@ impl Parser { } } - Ok((columns, constraints)) + Ok((columns, constraints, watermarks)) } fn parse_column_def(&mut self) -> Result { @@ -2003,6 +2122,18 @@ impl Parser { } } + pub fn parse_optional_watermark(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::WATERMARK) { + self.expect_keyword(Keyword::FOR)?; + let column = self.parse_identifier_non_reserved()?; + self.expect_keyword(Keyword::AS)?; + let expr = self.parse_expr()?; + Ok(Some(SourceWatermark { column, expr })) + } else { + Ok(None) + } + } + pub fn parse_optional_table_constraint( &mut self, ) -> Result, ParserError> { @@ -2072,9 +2203,18 @@ impl Parser { pub fn parse_options(&mut self, keyword: Keyword) -> Result, ParserError> { if self.parse_keyword(keyword) { self.expect_token(&Token::LParen)?; - let options = self.parse_comma_separated(Parser::parse_sql_option)?; - self.expect_token(&Token::RParen)?; - Ok(options) + let mut values = vec![]; + loop { + values.push(Parser::parse_sql_option(self)?); + let comma = self.consume_token(&Token::Comma); + if self.consume_token(&Token::RParen) { + // allow a trailing comma, even though it's not in standard + break; + } else if !comma { + return self.expected("',' or ')' after option definition", self.peek_token()); + } + } + Ok(values) } else { Ok(vec![]) } @@ -2087,11 +2227,32 @@ impl Parser { Ok(SqlOption { name, value }) } + pub fn parse_emit_mode(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::EMIT) { + match self.parse_one_of_keywords(&[Keyword::IMMEDIATELY, Keyword::ON]) { + Some(Keyword::IMMEDIATELY) => Ok(Some(EmitMode::Immediately)), + Some(Keyword::ON) => { + self.expect_keywords(&[Keyword::WINDOW, Keyword::CLOSE])?; + Ok(Some(EmitMode::OnWindowClose)) + } + Some(_) => unreachable!(), + None => self.expected( + "IMMEDIATELY or ON WINDOW CLOSE after EMIT", + self.peek_token(), + ), + } + } else { + Ok(None) + } + } + pub fn parse_alter(&mut self) -> Result { if self.parse_keyword(Keyword::TABLE) { self.parse_alter_table() } else if self.parse_keyword(Keyword::USER) { self.parse_alter_user() + } else if self.parse_keyword(Keyword::SYSTEM) { + self.parse_alter_system() } else { self.expected("TABLE or USER after ALTER", self.peek_token()) } @@ -2188,6 +2349,16 @@ impl Parser { }) } + pub fn parse_alter_system(&mut self) -> Result { + self.expect_keyword(Keyword::SET)?; + let param = self.parse_identifier()?; + if self.expect_keyword(Keyword::TO).is_err() && self.expect_token(&Token::Eq).is_err() { + return self.expected("TO or = after ALTER SYSTEM SET", self.peek_token()); + } + let value = self.parse_set_variable()?; + Ok(Statement::AlterSystem { param, value }) + } + /// Parse a copy statement pub fn parse_copy(&mut self) -> Result { let table_name = self.parse_object_name()?; @@ -2262,6 +2433,21 @@ impl Parser { } } + fn parse_set_variable(&mut self) -> Result { + let token = self.peek_token(); + match (self.parse_value(), token) { + (Ok(value), _) => Ok(SetVariableValue::Literal(value)), + (Err(_), Token::Word(ident)) => { + if ident.keyword == Keyword::DEFAULT { + Ok(SetVariableValue::Default) + } else { + Ok(SetVariableValue::Ident(ident.to_ident())) + } + } + (Err(_), unexpected) => self.expected("variable value", unexpected), + } + } + pub fn parse_number_value(&mut self) -> Result { match self.parse_value()? { Value::Number(v) => Ok(v), @@ -2988,12 +3174,7 @@ impl Parser { if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { let mut values = vec![]; loop { - let token = self.peek_token(); - let value = match (self.parse_value(), token) { - (Ok(value), _) => SetVariableValue::Literal(value), - (Err(_), Token::Word(ident)) => SetVariableValue::Ident(ident.to_ident()), - (Err(_), unexpected) => self.expected("variable value", unexpected)?, - }; + let value = self.parse_set_variable()?; values.push(value); if self.consume_token(&Token::Comma) { continue; @@ -3362,9 +3543,11 @@ impl Parser { Keyword::CONNECT => Action::Connect, Keyword::CREATE => Action::Create, Keyword::DELETE => Action::Delete, + Keyword::EXECUTE => Action::Execute, Keyword::INSERT => Action::Insert { columns }, Keyword::REFERENCES => Action::References { columns }, Keyword::SELECT => Action::Select { columns }, + Keyword::TEMPORARY => Action::Temporary, Keyword::TRIGGER => Action::Trigger, Keyword::TRUNCATE => Action::Truncate, Keyword::UPDATE => Action::Update { columns }, @@ -3439,7 +3622,7 @@ impl Parser { } fn parse_grant_permission(&mut self) -> Result<(Keyword, Option>), ParserError> { - if let Some(kw) = self.parse_one_of_keywords(&[ + let kw = self.expect_one_of_keywords(&[ Keyword::CONNECT, Keyword::CREATE, Keyword::DELETE, @@ -3452,22 +3635,19 @@ impl Parser { Keyword::TRUNCATE, Keyword::UPDATE, Keyword::USAGE, - ]) { - let columns = match kw { - Keyword::INSERT | Keyword::REFERENCES | Keyword::SELECT | Keyword::UPDATE => { - let columns = self.parse_parenthesized_column_list(Optional)?; - if columns.is_empty() { - None - } else { - Some(columns) - } + ])?; + let columns = match kw { + Keyword::INSERT | Keyword::REFERENCES | Keyword::SELECT | Keyword::UPDATE => { + let columns = self.parse_parenthesized_column_list(Optional)?; + if columns.is_empty() { + None + } else { + Some(columns) } - _ => None, - }; - Ok((kw, columns)) - } else { - self.expected("a privilege keyword", self.peek_token())? - } + } + _ => None, + }; + Ok((kw, columns)) } /// Parse a REVOKE statement diff --git a/src/sqlparser/test_runner/Cargo.toml b/src/sqlparser/test_runner/Cargo.toml index 2b311665ec5b8..f33c8cf1b218f 100644 --- a/src/sqlparser/test_runner/Cargo.toml +++ b/src/sqlparser/test_runner/Cargo.toml @@ -3,13 +3,17 @@ name = "risingwave_sqlparser_test_runner" version = "0.1.0" edition = "2021" +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = "1" -itertools = "0.10" risingwave_sqlparser = { path = "../" } serde = { version = "1", features = ["derive"] } serde_yaml = "0.9" -tempfile = "3" walkdir = "2" [target.'cfg(not(madsim))'.dependencies] diff --git a/src/sqlparser/test_runner/src/lib.rs b/src/sqlparser/test_runner/src/lib.rs index 64ef2d614efd1..4ffc2381c95cc 100644 --- a/src/sqlparser/test_runner/src/lib.rs +++ b/src/sqlparser/test_runner/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/sqlparser/test_runner/tests/test_runner.rs b/src/sqlparser/test_runner/tests/test_runner.rs index 4ab4235877155..42602a531347a 100644 --- a/src/sqlparser/test_runner/tests/test_runner.rs +++ b/src/sqlparser/test_runner/tests/test_runner.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/sqlparser/tests/sqlparser_common.rs b/src/sqlparser/tests/sqlparser_common.rs index d5f1f08bc013a..a935e8743ccff 100644 --- a/src/sqlparser/tests/sqlparser_common.rs +++ b/src/sqlparser/tests/sqlparser_common.rs @@ -457,7 +457,7 @@ fn parse_compound_expr_1() { use self::Expr::*; let sql = "a + b * c"; let ast = run_parser_method(sql, |parser| parser.parse_expr()).unwrap(); - assert_eq!("a + (b * c)", &ast.to_string()); + assert_eq!("a + b * c", &ast.to_string()); assert_eq!( BinaryOp { left: Box::new(Identifier(Ident::new("a"))), @@ -478,7 +478,7 @@ fn parse_compound_expr_2() { use self::Expr::*; let sql = "a * b + c"; let ast = run_parser_method(sql, |parser| parser.parse_expr()).unwrap(); - assert_eq!("(a * b) + c", &ast.to_string()); + assert_eq!("a * b + c", &ast.to_string()); assert_eq!( BinaryOp { left: Box::new(BinaryOp { @@ -498,7 +498,7 @@ fn parse_unary_math() { use self::Expr::*; let sql = "- a + - b"; let ast = run_parser_method(sql, |parser| parser.parse_expr()).unwrap(); - assert_eq!("(- a) + (- b)", &ast.to_string()); + assert_eq!("- a + - b", &ast.to_string()); assert_eq!( BinaryOp { left: Box::new(UnaryOp { @@ -565,7 +565,7 @@ fn parse_not_precedence() { // NOT has higher precedence than OR/AND, so the following must parse as (NOT true) OR true let sql = "NOT true OR true"; let ast = run_parser_method(sql, |parser| parser.parse_expr()).unwrap(); - assert_eq!("(NOT true) OR true", &ast.to_string()); + assert_eq!("NOT true OR true", &ast.to_string()); assert_matches!( ast, Expr::BinaryOp { @@ -578,7 +578,7 @@ fn parse_not_precedence() { // NULL) let sql = "NOT a IS NULL"; let ast = run_parser_method(sql, |parser| parser.parse_expr()).unwrap(); - assert_eq!("NOT (a IS NULL)", &ast.to_string()); + assert_eq!("NOT a IS NULL", &ast.to_string()); assert_matches!( ast, Expr::UnaryOp { @@ -605,7 +605,7 @@ fn parse_not_precedence() { // NOT has lower precedence than LIKE, so the following parses as NOT ('a' NOT LIKE 'b') let sql = "NOT 'a' NOT LIKE 'b'"; let ast = run_parser_method(sql, |parser| parser.parse_expr()).unwrap(); - assert_eq!("NOT ('a' NOT LIKE 'b')", &ast.to_string()); + assert_eq!("NOT 'a' NOT LIKE 'b'", &ast.to_string()); assert_eq!( ast, Expr::UnaryOp { @@ -2920,6 +2920,7 @@ fn parse_create_view() { or_replace, materialized, with_options, + emit_mode, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -2927,6 +2928,7 @@ fn parse_create_view() { assert!(!materialized); assert!(!or_replace); assert_eq!(with_options, vec![]); + assert_eq!(emit_mode, None); } _ => unreachable!(), } @@ -2966,13 +2968,15 @@ fn parse_create_view_with_columns() { with_options, query, materialized, + emit_mode, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![Ident::new("has"), Ident::new("cols")]); assert_eq!(with_options, vec![]); assert_eq!("SELECT 1, 2", query.to_string()); assert!(!materialized); - assert!(!or_replace) + assert!(!or_replace); + assert_eq!(emit_mode, None); } _ => unreachable!(), } @@ -2988,13 +2992,15 @@ fn parse_create_or_replace_view() { with_options, query, materialized, + emit_mode, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); assert_eq!(with_options, vec![]); assert_eq!("SELECT 1", query.to_string()); assert!(!materialized); - assert!(or_replace) + assert!(or_replace); + assert_eq!(emit_mode, None); } _ => unreachable!(), } @@ -3015,13 +3021,15 @@ fn parse_create_or_replace_materialized_view() { with_options, query, materialized, + emit_mode, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); assert_eq!(with_options, vec![]); assert_eq!("SELECT 1", query.to_string()); assert!(materialized); - assert!(or_replace) + assert!(or_replace); + assert_eq!(emit_mode, None); } _ => unreachable!(), } @@ -3038,6 +3046,58 @@ fn parse_create_materialized_view() { query, materialized, with_options, + emit_mode, + } => { + assert_eq!("myschema.myview", name.to_string()); + assert_eq!(Vec::::new(), columns); + assert_eq!("SELECT foo FROM bar", query.to_string()); + assert!(materialized); + assert_eq!(with_options, vec![]); + assert!(!or_replace); + assert_eq!(emit_mode, None); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_materialized_view_emit_immediately() { + let sql = "CREATE MATERIALIZED VIEW myschema.myview EMIT IMMEDIATELY AS SELECT foo FROM bar"; + match verified_stmt(sql) { + Statement::CreateView { + name, + or_replace, + columns, + query, + materialized, + with_options, + emit_mode, + } => { + assert_eq!("myschema.myview", name.to_string()); + assert_eq!(Vec::::new(), columns); + assert_eq!("SELECT foo FROM bar", query.to_string()); + assert!(materialized); + assert_eq!(with_options, vec![]); + assert!(!or_replace); + assert_eq!(emit_mode, Some(EmitMode::Immediately)); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_materialized_view_emit_on_window_close() { + let sql = + "CREATE MATERIALIZED VIEW myschema.myview EMIT ON WINDOW CLOSE AS SELECT foo FROM bar"; + match verified_stmt(sql) { + Statement::CreateView { + name, + or_replace, + columns, + query, + materialized, + with_options, + emit_mode, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -3045,6 +3105,7 @@ fn parse_create_materialized_view() { assert!(materialized); assert_eq!(with_options, vec![]); assert!(!or_replace); + assert_eq!(emit_mode, Some(EmitMode::OnWindowClose)); } _ => unreachable!(), } @@ -3502,7 +3563,7 @@ fn parse_create_index() { #[test] fn parse_grant() { - let sql = "GRANT SELECT, INSERT, UPDATE (shape, size), USAGE, DELETE, TRUNCATE, REFERENCES, TRIGGER ON abc, def TO xyz, m WITH GRANT OPTION GRANTED BY jj"; + let sql = "GRANT SELECT, INSERT, UPDATE (shape, size), EXECUTE, TEMPORARY, USAGE, DELETE, TRUNCATE, REFERENCES, TRIGGER ON abc, def TO xyz, m WITH GRANT OPTION GRANTED BY jj"; match verified_stmt(sql) { Statement::Grant { privileges, @@ -3520,6 +3581,8 @@ fn parse_grant() { Action::Update { columns: Some(vec![Ident::new("shape"), Ident::new("size")]) }, + Action::Execute, + Action::Temporary, Action::Usage, Action::Delete, Action::Truncate, diff --git a/src/sqlparser/tests/sqlparser_postgres.rs b/src/sqlparser/tests/sqlparser_postgres.rs index 8da5c731db92c..2cf28dc0a5065 100644 --- a/src/sqlparser/tests/sqlparser_postgres.rs +++ b/src/sqlparser/tests/sqlparser_postgres.rs @@ -422,7 +422,7 @@ fn parse_set() { Statement::SetVariable { local: false, variable: "a".into(), - value: vec![SetVariableValue::Ident("DEFAULT".into())], + value: vec![SetVariableValue::Default], } ); @@ -883,3 +883,169 @@ fn parse_drop_function() { } ); } + +#[test] +fn parse_array() { + let sql = "SELECT ARRAY[ARRAY[1, 2], ARRAY[3, 4]]"; + assert_eq!( + verified_stmt(sql), + Statement::Query(Box::new(Query { + with: None, + body: SetExpr::Select(Box::new(Select { + distinct: Distinct::All, + projection: vec![SelectItem::UnnamedExpr(Expr::Array(Array { + elem: vec![ + Expr::Array(Array { + elem: vec![ + Expr::Value(Value::Number(String::from("1"))), + Expr::Value(Value::Number(String::from("2"))) + ], + named: true + }), + Expr::Array(Array { + elem: vec![ + Expr::Value(Value::Number(String::from("3"))), + Expr::Value(Value::Number(String::from("4"))), + ], + named: true + }), + ], + named: true + }))], + from: vec![], + lateral_views: vec![], + selection: None, + group_by: vec![], + having: None + })), + order_by: vec![], + limit: None, + offset: None, + fetch: None + })) + ); + + let sql = "SELECT ARRAY[[1, 2], [3, 4]]"; + assert_eq!( + verified_stmt(sql), + Statement::Query(Box::new(Query { + with: None, + body: SetExpr::Select(Box::new(Select { + distinct: Distinct::All, + projection: vec![SelectItem::UnnamedExpr(Expr::Array(Array { + elem: vec![ + Expr::Array(Array { + elem: vec![ + Expr::Value(Value::Number(String::from("1"))), + Expr::Value(Value::Number(String::from("2"))) + ], + named: false + }), + Expr::Array(Array { + elem: vec![ + Expr::Value(Value::Number(String::from("3"))), + Expr::Value(Value::Number(String::from("4"))), + ], + named: false + }), + ], + named: true + }))], + from: vec![], + lateral_views: vec![], + selection: None, + group_by: vec![], + having: None + })), + order_by: vec![], + limit: None, + offset: None, + fetch: None + })) + ); + + let sql = "SELECT ARRAY[ARRAY[ARRAY[1, 2]], ARRAY[[3, 4]]]"; + assert_eq!( + verified_stmt(sql), + Statement::Query(Box::new(Query { + with: None, + body: SetExpr::Select(Box::new(Select { + distinct: Distinct::All, + projection: vec![SelectItem::UnnamedExpr(Expr::Array(Array { + elem: vec![ + Expr::Array(Array { + elem: vec![Expr::Array(Array { + elem: vec![ + Expr::Value(Value::Number(String::from("1"))), + Expr::Value(Value::Number(String::from("2"))) + ], + named: true + })], + named: true + }), + Expr::Array(Array { + elem: vec![Expr::Array(Array { + elem: vec![ + Expr::Value(Value::Number(String::from("3"))), + Expr::Value(Value::Number(String::from("4"))) + ], + named: false + })], + named: true + }), + ], + named: true + }))], + from: vec![], + lateral_views: vec![], + selection: None, + group_by: vec![], + having: None + })), + order_by: vec![], + limit: None, + offset: None, + fetch: None + })) + ); + + let sql = "SELECT ARRAY[ARRAY[1, 2], [3, 4]]"; + assert_eq!( + parse_sql_statements(sql), + Err(ParserError::ParserError( + "syntax error at or near '['".to_string() + )) + ); + + let sql = "SELECT ARRAY[ARRAY[], []]"; + assert_eq!( + parse_sql_statements(sql), + Err(ParserError::ParserError( + "syntax error at or near '['".to_string() + )) + ); + + let sql = "SELECT ARRAY[[1, 2], ARRAY[3, 4]]"; + assert_eq!( + parse_sql_statements(sql), + Err(ParserError::ParserError( + "syntax error at or near 'ARRAY'".to_string() + )) + ); + + let sql = "SELECT ARRAY[[], ARRAY[]]"; + assert_eq!( + parse_sql_statements(sql), + Err(ParserError::ParserError( + "syntax error at or near 'ARRAY'".to_string() + )) + ); + + let sql = "SELECT [[1, 2], [3, 4]]"; + assert_eq!( + parse_sql_statements(sql), + Err(ParserError::ParserError( + "Expected an expression:, found: [".to_string() + )), + ); +} diff --git a/src/sqlparser/tests/testdata/alter.yaml b/src/sqlparser/tests/testdata/alter.yaml new file mode 100644 index 0000000000000..0672068780a5c --- /dev/null +++ b/src/sqlparser/tests/testdata/alter.yaml @@ -0,0 +1,11 @@ +- input: ALTER USER user WITH SUPERUSER CREATEDB PASSWORD 'password' + formatted_sql: ALTER USER user WITH SUPERUSER CREATEDB PASSWORD 'password' + +- input: ALTER USER user RENAME TO another + formatted_sql: ALTER USER user RENAME TO another + +- input: ALTER SYSTEM SET a = 'abc' + formatted_sql: ALTER SYSTEM SET a = 'abc' + +- input: ALTER SYSTEM SET a = DEFAULT + formatted_sql: ALTER SYSTEM SET a = DEFAULT diff --git a/src/sqlparser/tests/testdata/create.yaml b/src/sqlparser/tests/testdata/create.yaml index 5dc5caa400832..d3ce3dc38c46d 100644 --- a/src/sqlparser/tests/testdata/create.yaml +++ b/src/sqlparser/tests/testdata/create.yaml @@ -34,12 +34,17 @@ - input: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.servers = 'localhost:1001') ROW FORMAT PROTOBUF MESSAGE 'Foo' ROW SCHEMA LOCATION 'file://' formatted_sql: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.servers = 'localhost:1001') ROW FORMAT PROTOBUF MESSAGE 'Foo' ROW SCHEMA LOCATION 'file://' formatted_ast: | - CreateSource { stmt: CreateSourceStatement { if_not_exists: true, columns: [], constraints: [], source_name: ObjectName([Ident { value: "src", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "topic", quote_style: None }]), value: SingleQuotedString("abc") }, SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "servers", quote_style: None }]), value: SingleQuotedString("localhost:1001") }]), source_schema: Protobuf(ProtobufSchema { message_name: AstString("Foo"), row_schema_location: AstString("file://"), use_schema_registry: false }) } } + CreateSource { stmt: CreateSourceStatement { if_not_exists: true, columns: [], constraints: [], source_name: ObjectName([Ident { value: "src", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "topic", quote_style: None }]), value: SingleQuotedString("abc") }, SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "servers", quote_style: None }]), value: SingleQuotedString("localhost:1001") }]), source_schema: Protobuf(ProtobufSchema { message_name: AstString("Foo"), row_schema_location: AstString("file://"), use_schema_registry: false }), source_watermarks: [] } } - input: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.servers = 'localhost:1001') ROW FORMAT PROTOBUF MESSAGE 'Foo' ROW SCHEMA LOCATION CONFLUENT SCHEMA REGISTRY 'http://' formatted_sql: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.servers = 'localhost:1001') ROW FORMAT PROTOBUF MESSAGE 'Foo' ROW SCHEMA LOCATION CONFLUENT SCHEMA REGISTRY 'http://' formatted_ast: | - CreateSource { stmt: CreateSourceStatement { if_not_exists: true, columns: [], constraints: [], source_name: ObjectName([Ident { value: "src", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "topic", quote_style: None }]), value: SingleQuotedString("abc") }, SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "servers", quote_style: None }]), value: SingleQuotedString("localhost:1001") }]), source_schema: Protobuf(ProtobufSchema { message_name: AstString("Foo"), row_schema_location: AstString("http://"), use_schema_registry: true }) } } + CreateSource { stmt: CreateSourceStatement { if_not_exists: true, columns: [], constraints: [], source_name: ObjectName([Ident { value: "src", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "topic", quote_style: None }]), value: SingleQuotedString("abc") }, SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "servers", quote_style: None }]), value: SingleQuotedString("localhost:1001") }]), source_schema: Protobuf(ProtobufSchema { message_name: AstString("Foo"), row_schema_location: AstString("http://"), use_schema_registry: true }), source_watermarks: [] } } + +- input: CREATE SOURCE bid (auction INTEGER, bidder INTEGER, price INTEGER, WATERMARK FOR auction AS auction - 1, "date_time" TIMESTAMP) with (connector = 'nexmark', nexmark.table.type = 'Bid', nexmark.split.num = '12', nexmark.min.event.gap.in.ns = '0') + formatted_sql: CREATE SOURCE bid (auction INT, bidder INT, price INT, "date_time" TIMESTAMP, WATERMARK FOR auction AS auction - 1) WITH (connector = 'nexmark', nexmark.table.type = 'Bid', nexmark.split.num = '12', nexmark.min.event.gap.in.ns = '0') ROW FORMAT NATIVE + formatted_ast: | + CreateSource { stmt: CreateSourceStatement { if_not_exists: false, columns: [ColumnDef { name: Ident { value: "auction", quote_style: None }, data_type: Some(Int), collation: None, options: [] }, ColumnDef { name: Ident { value: "bidder", quote_style: None }, data_type: Some(Int), collation: None, options: [] }, ColumnDef { name: Ident { value: "price", quote_style: None }, data_type: Some(Int), collation: None, options: [] }, ColumnDef { name: Ident { value: "date_time", quote_style: Some('"') }, data_type: Some(Timestamp(false)), collation: None, options: [] }], constraints: [], source_name: ObjectName([Ident { value: "bid", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "connector", quote_style: None }]), value: SingleQuotedString("nexmark") }, SqlOption { name: ObjectName([Ident { value: "nexmark", quote_style: None }, Ident { value: "table", quote_style: None }, Ident { value: "type", quote_style: None }]), value: SingleQuotedString("Bid") }, SqlOption { name: ObjectName([Ident { value: "nexmark", quote_style: None }, Ident { value: "split", quote_style: None }, Ident { value: "num", quote_style: None }]), value: SingleQuotedString("12") }, SqlOption { name: ObjectName([Ident { value: "nexmark", quote_style: None }, Ident { value: "min", quote_style: None }, Ident { value: "event", quote_style: None }, Ident { value: "gap", quote_style: None }, Ident { value: "in", quote_style: None }, Ident { value: "ns", quote_style: None }]), value: SingleQuotedString("0") }]), source_schema: Native, source_watermarks: [SourceWatermark { column: Ident { value: "auction", quote_style: None }, expr: BinaryOp { left: Identifier(Ident { value: "auction", quote_style: None }), op: Minus, right: Value(Number("1")) } }] } } - input: CREATE TABLE T (v1 INT, v2 STRUCT) formatted_sql: CREATE TABLE T (v1 INT, v2 STRUCT) @@ -60,12 +65,6 @@ - input: CREATE USER user WITH SUPERUSER CREATEDB PASSWORD 'password' formatted_sql: CREATE USER user WITH SUPERUSER CREATEDB PASSWORD 'password' -- input: ALTER USER user WITH SUPERUSER CREATEDB PASSWORD 'password' - formatted_sql: ALTER USER user WITH SUPERUSER CREATEDB PASSWORD 'password' - -- input: ALTER USER user RENAME TO another - formatted_sql: ALTER USER user RENAME TO another - - input: CREATE SINK snk error_msg: | sql parser error: Expected FROM or AS after CREATE SINK sink_name, found: EOF diff --git a/src/sqlparser/tests/testdata/select.yaml b/src/sqlparser/tests/testdata/select.yaml index 78efa0e532a25..741790a18120d 100644 --- a/src/sqlparser/tests/testdata/select.yaml +++ b/src/sqlparser/tests/testdata/select.yaml @@ -50,13 +50,13 @@ - input: SELECT * FROM unnest(Array[1,2,3]); formatted_sql: SELECT * FROM unnest(ARRAY[1, 2, 3]) formatted_ast: | - Query(Query { with: None, body: Select(Select { distinct: All, projection: [Wildcard], from: [TableWithJoins { relation: TableFunction { name: ObjectName([Ident { value: "unnest", quote_style: None }]), alias: None, args: [Unnamed(Expr(Array([Value(Number("1")), Value(Number("2")), Value(Number("3"))])))] }, joins: [] }], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None }) + Query(Query { with: None, body: Select(Select { distinct: All, projection: [Wildcard], from: [TableWithJoins { relation: TableFunction { name: ObjectName([Ident { value: "unnest", quote_style: None }]), alias: None, args: [Unnamed(Expr(Array(Array { elem: [Value(Number("1")), Value(Number("2")), Value(Number("3"))], named: true })))] }, joins: [] }], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None }) - input: SELECT id, fname, lname FROM customer WHERE salary <> 'Not Provided' AND salary <> '' - formatted_sql: SELECT id, fname, lname FROM customer WHERE (salary <> 'Not Provided') AND (salary <> '') + formatted_sql: SELECT id, fname, lname FROM customer WHERE salary <> 'Not Provided' AND salary <> '' - input: SELECT id FROM customer WHERE NOT salary = '' - formatted_sql: SELECT id FROM customer WHERE NOT (salary = '') + formatted_sql: SELECT id FROM customer WHERE NOT salary = '' - input: SELECT * FROM t LIMIT 1 FETCH FIRST ROWS ONLY error_msg: "sql parser error: Cannot specify both LIMIT and FETCH" diff --git a/src/storage/Cargo.toml b/src/storage/Cargo.toml index 44e4197fac5fc..0ef8842f20a4a 100644 --- a/src/storage/Cargo.toml +++ b/src/storage/Cargo.toml @@ -7,45 +7,35 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] -anyhow = "1" arc-swap = "1" async-trait = "0.1" async_stack_trace = { path = "../utils/async_stack_trace" } auto_enums = { version = "0.7", features = ["futures"] } -bitvec = "1" -byteorder = "1" bytes = { version = "1", features = ["serde"] } -chrono = { version = "0.4", default-features = false, features = [ - "clock", - "std", -] } -crc32fast = "1" crossbeam = "0.8.1" dashmap = { version = "5", default-features = false } dyn-clone = "1.0.4" -either = "1" enum-as-inner = "0.5" fail = "0.5" -farmhash = "1" futures = { version = "0.3", default-features = false, features = ["alloc"] } futures-async-stream = "0.2" -hyper = "0.14" itertools = "0.10" libc = "0.2" lz4 = "1.23.1" memcomparable = "0.1" minitrace = "0.4" minstant = "0.1" -nix = { version = "0.25", features = ["fs", "mman"] } -num-integer = "0.1" -num-traits = "0.2" parking_lot = "0.12" -paste = "1" prometheus = { version = "0.13", features = ["process"] } prost = "0.11" rand = "0.8" -regex = "1" risingwave_backup = { path = "../storage/backup" } risingwave_common = { path = "../common" } risingwave_common_service = { path = "../common/common_service" } @@ -55,13 +45,7 @@ risingwave_pb = { path = "../prost" } risingwave_rpc_client = { path = "../rpc_client" } risingwave_tracing = { path = "../tracing" } scopeguard = "1" -# rocksdb = { git = "https://github.com/tikv/rust-rocksdb.git", rev = "fa83ff19", features = [ -# "encryption", -# "static_libcpp", -# ], optional = true } -serde = { version = "1", features = ["derive"] } sled = "0.34.7" -smallvec = "1" spin = "0.9" sync-point = { path = "../utils/sync-point" } tempfile = "3" @@ -76,14 +60,15 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [ "time", "signal", ] } -tokio-metrics = "0.1.0" tokio-retry = "0.3" -tokio-stream = "0.1" -tonic = { version = "0.2", package = "madsim-tonic" } tracing = "0.1" +xorf = "0.8.1" xxhash-rust = { version = "0.8.5", features = ["xxh32", "xxh64"] } zstd = "0.11.2" +[target.'cfg(target_os = "linux")'.dependencies] +nix = { version = "0.25", features = ["fs", "mman"] } + [target.'cfg(not(madsim))'.dependencies] workspace-hack = { path = "../workspace-hack" } diff --git a/src/storage/backup/Cargo.toml b/src/storage/backup/Cargo.toml index 4daae3ef78104..387c627ee8663 100644 --- a/src/storage/backup/Cargo.toml +++ b/src/storage/backup/Cargo.toml @@ -7,6 +7,12 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = "1" async-trait = "0.1" @@ -19,7 +25,6 @@ risingwave_hummock_sdk = { path = "../hummock_sdk" } risingwave_object_store = { path = "../../object_store" } risingwave_pb = { path = "../../prost" } serde = { version = "1", features = ["derive"] } -serde_derive = "1" serde_json = "1" thiserror = "1" twox-hash = "1" diff --git a/src/storage/backup/cmd/Cargo.toml b/src/storage/backup/cmd/Cargo.toml index f284cebf1f5ad..26f33f1e03949 100644 --- a/src/storage/backup/cmd/Cargo.toml +++ b/src/storage/backup/cmd/Cargo.toml @@ -7,6 +7,12 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] clap = { version = "3", features = ["derive"] } risingwave_backup = { path = "../../backup" } diff --git a/src/storage/backup/cmd/src/bin/backup_restore.rs b/src/storage/backup/cmd/src/bin/backup_restore.rs index df0d89f312af2..620f4d4a1490d 100644 --- a/src/storage/backup/cmd/src/bin/backup_restore.rs +++ b/src/storage/backup/cmd/src/bin/backup_restore.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ use risingwave_backup::error::BackupResult; fn main() -> BackupResult<()> { use clap::StructOpt; let opts = risingwave_meta::backup_restore::RestoreOpts::parse(); - risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new_default()); + risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new()); tokio::runtime::Builder::new_multi_thread() .enable_all() .build() diff --git a/src/storage/backup/cmd/src/lib.rs b/src/storage/backup/cmd/src/lib.rs index 38cd4f8dd934f..b0b2c0a500fb9 100644 --- a/src/storage/backup/cmd/src/lib.rs +++ b/src/storage/backup/cmd/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/backup/src/error.rs b/src/storage/backup/src/error.rs index 53b5a5f8595aa..44b0208c6cbc6 100644 --- a/src/storage/backup/src/error.rs +++ b/src/storage/backup/src/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/backup/src/lib.rs b/src/storage/backup/src/lib.rs index a75482d8a61b2..44b9c39c9f87b 100644 --- a/src/storage/backup/src/lib.rs +++ b/src/storage/backup/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/backup/src/meta_snapshot.rs b/src/storage/backup/src/meta_snapshot.rs index 2c0381d22fbe0..3fabbba54128c 100644 --- a/src/storage/backup/src/meta_snapshot.rs +++ b/src/storage/backup/src/meta_snapshot.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,12 +13,14 @@ // limitations under the License. use std::collections::HashMap; +use std::fmt::{Display, Formatter}; use bytes::{Buf, BufMut}; use itertools::Itertools; -use risingwave_pb::catalog::{Database, Index, Schema, Sink, Source, Table, View}; +use risingwave_common::util::iter_util::ZipEqFast; +use risingwave_pb::catalog::{Database, Function, Index, Schema, Sink, Source, Table, View}; use risingwave_pb::hummock::{CompactionGroup, HummockVersion, HummockVersionStats}; -use risingwave_pb::meta::TableFragments; +use risingwave_pb::meta::{SystemParams, TableFragments}; use risingwave_pb::user::UserInfo; use crate::error::{BackupError, BackupResult}; @@ -26,6 +28,7 @@ use crate::{xxhash64_checksum, xxhash64_verify, MetaSnapshotId}; #[derive(Debug, Default, Clone, PartialEq)] pub struct MetaSnapshot { + pub format_version: u32, pub id: MetaSnapshotId, /// Snapshot of meta store. pub metadata: ClusterMetadata, @@ -34,6 +37,7 @@ pub struct MetaSnapshot { impl MetaSnapshot { pub fn encode(&self) -> Vec { let mut buf = vec![]; + buf.put_u32_le(self.format_version); buf.put_u64_le(self.id); self.metadata.encode_to(&mut buf); let checksum = xxhash64_checksum(&buf); @@ -44,25 +48,68 @@ impl MetaSnapshot { pub fn decode(mut buf: &[u8]) -> BackupResult { let checksum = (&buf[buf.len() - 8..]).get_u64_le(); xxhash64_verify(&buf[..buf.len() - 8], checksum)?; + let format_version = buf.get_u32_le(); let id = buf.get_u64_le(); let metadata = ClusterMetadata::decode(buf)?; - Ok(Self { id, metadata }) + Ok(Self { + format_version, + id, + metadata, + }) } } +impl Display for MetaSnapshot { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + writeln!(f, "format_version: {}", self.format_version)?; + writeln!(f, "id: {}", self.id)?; + writeln!(f, "default_cf:")?; + for (k, v) in &self.metadata.default_cf { + let key = String::from_utf8(k.clone()).unwrap(); + writeln!(f, "{} {:x?}", key, v)?; + } + writeln!(f, "hummock_version:")?; + writeln!(f, "{:#?}", self.metadata.hummock_version)?; + writeln!(f, "version_stats:")?; + writeln!(f, "{:#?}", self.metadata.version_stats)?; + writeln!(f, "compaction_groups:")?; + writeln!(f, "{:#?}", self.metadata.compaction_groups)?; + writeln!(f, "database:")?; + writeln!(f, "{:#?}", self.metadata.database)?; + writeln!(f, "schema:")?; + writeln!(f, "{:#?}", self.metadata.schema)?; + writeln!(f, "table:")?; + writeln!(f, "{:#?}", self.metadata.table)?; + writeln!(f, "index:")?; + writeln!(f, "{:#?}", self.metadata.index)?; + writeln!(f, "sink:")?; + writeln!(f, "{:#?}", self.metadata.sink)?; + writeln!(f, "source:")?; + writeln!(f, "{:#?}", self.metadata.source)?; + writeln!(f, "view:")?; + writeln!(f, "{:#?}", self.metadata.view)?; + writeln!(f, "table_fragments:")?; + writeln!(f, "{:#?}", self.metadata.table_fragments)?; + writeln!(f, "user_info:")?; + writeln!(f, "{:#?}", self.metadata.user_info)?; + writeln!(f, "function:")?; + writeln!(f, "{:#?}", self.metadata.function)?; + writeln!(f, "system_param:")?; + writeln!(f, "{:#?}", self.metadata.system_param)?; + Ok(()) + } +} + +/// For backward compatibility, never remove fields and only append new field. #[derive(Debug, Default, Clone, PartialEq)] pub struct ClusterMetadata { /// Unlike other metadata that has implemented `MetadataModel`, /// DEFAULT_COLUMN_FAMILY stores various single row metadata, e.g. id offset and epoch offset. /// So we use `default_cf` stores raw KVs for them. pub default_cf: HashMap, Vec>, - - /// Hummock metadata pub hummock_version: HummockVersion, pub version_stats: HummockVersionStats, pub compaction_groups: Vec, - - /// Catalog metadata pub database: Vec, pub schema: Vec, pub table: Vec
, @@ -70,9 +117,10 @@ pub struct ClusterMetadata { pub sink: Vec, pub source: Vec, pub view: Vec, - pub table_fragments: Vec, pub user_info: Vec, + pub function: Vec, + pub system_param: SystemParams, } impl ClusterMetadata { @@ -93,6 +141,8 @@ impl ClusterMetadata { Self::encode_prost_message_list(&self.sink.iter().collect_vec(), buf); Self::encode_prost_message_list(&self.source.iter().collect_vec(), buf); Self::encode_prost_message_list(&self.view.iter().collect_vec(), buf); + Self::encode_prost_message_list(&self.function.iter().collect_vec(), buf); + Self::encode_prost_message(&self.system_param, buf); } pub fn decode(mut buf: &[u8]) -> BackupResult { @@ -100,7 +150,7 @@ impl ClusterMetadata { let default_cf_values: Vec> = Self::decode_prost_message_list(&mut buf)?; let default_cf = default_cf_keys .into_iter() - .zip_eq(default_cf_values.into_iter()) + .zip_eq_fast(default_cf_values.into_iter()) .collect(); let hummock_version = Self::decode_prost_message(&mut buf)?; let version_stats = Self::decode_prost_message(&mut buf)?; @@ -114,6 +164,8 @@ impl ClusterMetadata { let sink: Vec = Self::decode_prost_message_list(&mut buf)?; let source: Vec = Self::decode_prost_message_list(&mut buf)?; let view: Vec = Self::decode_prost_message_list(&mut buf)?; + let function: Vec = Self::decode_prost_message_list(&mut buf)?; + let system_param: SystemParams = Self::decode_prost_message(&mut buf)?; Ok(Self { default_cf, @@ -129,6 +181,8 @@ impl ClusterMetadata { view, table_fragments, user_info, + function, + system_param, }) } @@ -179,7 +233,11 @@ mod tests { fn test_snapshot_encoding_decoding() { let mut metadata = ClusterMetadata::default(); metadata.hummock_version.id = 321; - let raw = MetaSnapshot { id: 123, metadata }; + let raw = MetaSnapshot { + format_version: 0, + id: 123, + metadata, + }; let encoded = raw.encode(); let decoded = MetaSnapshot::decode(&encoded).unwrap(); assert_eq!(raw, decoded); diff --git a/src/storage/backup/src/storage.rs b/src/storage/backup/src/storage.rs index 64d7d2e98eb4f..bc9108bdd2e57 100644 --- a/src/storage/backup/src/storage.rs +++ b/src/storage/backup/src/storage.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/benches/bench_block_iter.rs b/src/storage/benches/bench_block_iter.rs index 0ef38194e6496..a0b8d3a27057d 100644 --- a/src/storage/benches/bench_block_iter.rs +++ b/src/storage/benches/bench_block_iter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/benches/bench_compactor.rs b/src/storage/benches/bench_compactor.rs index 31d331b05719e..12de2c713b25a 100644 --- a/src/storage/benches/bench_compactor.rs +++ b/src/storage/benches/bench_compactor.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ use risingwave_hummock_sdk::key::FullKey; use risingwave_hummock_sdk::key_range::KeyRange; use risingwave_object_store::object::object_metrics::ObjectStoreMetrics; use risingwave_object_store::object::{InMemObjectStore, ObjectStore, ObjectStoreImpl}; -use risingwave_pb::hummock::SstableInfo; +use risingwave_pb::hummock::{compact_task, SstableInfo}; use risingwave_storage::hummock::compactor::{ Compactor, ConcatSstableIterator, DummyCompactionFilter, TaskConfig, }; @@ -179,6 +179,7 @@ async fn compact>(iter: I, sstable_store gc_delete_keys: false, watermark: 0, stats_target_table_ids: None, + task_type: compact_task::TaskType::Dynamic, }; Compactor::compact_and_build_sst( &mut builder, diff --git a/src/storage/benches/bench_compression.rs b/src/storage/benches/bench_compression.rs index 87cc1380ec836..63f283187222e 100644 --- a/src/storage/benches/bench_compression.rs +++ b/src/storage/benches/bench_compression.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/benches/bench_fs_operation.rs b/src/storage/benches/bench_fs_operation.rs index d64f1839d9080..ea99197201060 100644 --- a/src/storage/benches/bench_fs_operation.rs +++ b/src/storage/benches/bench_fs_operation.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/benches/bench_lru_cache.rs b/src/storage/benches/bench_lru_cache.rs index ef5bf7dd9bef2..1c23b709f7008 100644 --- a/src/storage/benches/bench_lru_cache.rs +++ b/src/storage/benches/bench_lru_cache.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/benches/bench_merge_iter.rs b/src/storage/benches/bench_merge_iter.rs index 210958fe89250..295ab2784a7a9 100644 --- a/src/storage/benches/bench_merge_iter.rs +++ b/src/storage/benches/bench_merge_iter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/benches/bench_multi_builder.rs b/src/storage/benches/bench_multi_builder.rs index ecf0e4d964ce9..b65bb7d188e56 100644 --- a/src/storage/benches/bench_multi_builder.rs +++ b/src/storage/benches/bench_multi_builder.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ use risingwave_storage::hummock::value::HummockValue; use risingwave_storage::hummock::{ BatchSstableWriterFactory, CachePolicy, CompressionAlgorithm, HummockResult, MemoryLimiter, SstableBuilder, SstableBuilderOptions, SstableStore, SstableWriterFactory, - SstableWriterOptions, StreamingSstableWriterFactory, TieredCache, + SstableWriterOptions, StreamingSstableWriterFactory, TieredCache, XorFilterBuilder, }; use risingwave_storage::monitor::ObjectStoreMetrics; @@ -61,9 +61,10 @@ impl LocalTableBuilderFactory { #[async_trait::async_trait] impl TableBuilderFactory for LocalTableBuilderFactory { + type Filter = XorFilterBuilder; type Writer = ::Writer; - async fn open_builder(&self) -> HummockResult> { + async fn open_builder(&mut self) -> HummockResult> { let id = self.next_id.fetch_add(1, SeqCst); let tracker = self.limiter.require_memory(1).await; let writer_options = SstableWriterOptions { diff --git a/src/storage/compactor/Cargo.toml b/src/storage/compactor/Cargo.toml index 379280854ca09..3173f8e7410e6 100644 --- a/src/storage/compactor/Cargo.toml +++ b/src/storage/compactor/Cargo.toml @@ -8,20 +8,24 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] async-trait = "0.1" clap = { version = "3", features = ["derive"] } -parking_lot = "0.12" prometheus = { version = "0.13" } risingwave_common = { path = "../../common" } +risingwave_common_proc_macro = { path = "../../common/proc_macro" } risingwave_common_service = { path = "../../common/common_service" } risingwave_hummock_sdk = { path = "../hummock_sdk" } risingwave_object_store = { path = "../../object_store" } risingwave_pb = { path = "../../prost" } risingwave_rpc_client = { path = "../../rpc_client" } risingwave_storage = { path = "../../storage" } -risingwave_tracing = { path = "../../tracing" } -serde = { version = "1", features = ["derive"] } tokio = { version = "0.2", package = "madsim-tokio", features = [ "fs", "rt", @@ -31,9 +35,6 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [ "time", "signal", ] } -tokio-retry = "0.3" -tokio-stream = "0.1" -toml = "0.5" tonic = { version = "0.2", package = "madsim-tonic" } tracing = "0.1" diff --git a/src/storage/compactor/src/compactor_observer/mod.rs b/src/storage/compactor/src/compactor_observer/mod.rs index 85723d661f34d..2aca9e26eebab 100644 --- a/src/storage/compactor/src/compactor_observer/mod.rs +++ b/src/storage/compactor/src/compactor_observer/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/compactor/src/compactor_observer/observer_manager.rs b/src/storage/compactor/src/compactor_observer/observer_manager.rs index 3c8fbca7e0c26..6dc3ef6d80466 100644 --- a/src/storage/compactor/src/compactor_observer/observer_manager.rs +++ b/src/storage/compactor/src/compactor_observer/observer_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/compactor/src/lib.rs b/src/storage/compactor/src/lib.rs index 75d0f46330038..d1067f9fa4e3d 100644 --- a/src/storage/compactor/src/lib.rs +++ b/src/storage/compactor/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,51 +17,78 @@ mod rpc; mod server; use clap::Parser; +use risingwave_common_proc_macro::OverrideConfig; use crate::server::compactor_serve; /// Command-line arguments for compute-node. #[derive(Parser, Clone, Debug)] pub struct CompactorOpts { - // TODO: rename to listen_address and separate out the port. - #[clap(long, default_value = "127.0.0.1:6660")] - pub host: String, - - // Optional, we will use listen_address if not specified. - #[clap(long)] - pub client_address: Option, + // TODO: rename to listen_addr and separate out the port. + /// The address that this service listens to. + /// Usually the localhost + desired port. + #[clap( + long, + alias = "host", + env = "RW_LISTEN_ADDR", + default_value = "127.0.0.1:6660" + )] + pub listen_addr: String, + + /// The address for contacting this instance of the service. + /// This would be synonymous with the service's "public address" + /// or "identifying address". + /// Optional, we will use listen_addr if not specified. + #[clap(long, env = "RW_ADVERTISE_ADDR", alias = "client-address")] + pub advertise_addr: Option, // TODO: This is currently unused. - #[clap(long)] + #[clap(long, env = "RW_PORT")] pub port: Option, - #[clap(long, default_value = "")] - pub state_store: String, - - #[clap(long, default_value = "127.0.0.1:1260")] + #[clap( + long, + env = "RW_PROMETHEUS_LISTENER_ADDR", + default_value = "127.0.0.1:1260" + )] pub prometheus_listener_addr: String, - #[clap(long, default_value = "0")] - pub metrics_level: u32, - - #[clap(long, default_value = "http://127.0.0.1:5690")] + #[clap(long, env = "RW_META_ADDRESS", default_value = "http://127.0.0.1:5690")] pub meta_address: String, - /// It's a hint used by meta node. - #[clap(long, default_value = "16")] - pub max_concurrent_task_number: u64, + /// Of the form `hummock+{object_store}` where `object_store` + /// is one of `s3://{path}`, `s3-compatible://{path}`, `minio://{path}`, `disk://{path}`, + /// `memory` or `memory-shared`. + #[clap(long, env = "RW_STATE_STORE")] + pub state_store: Option, - #[clap(long)] + #[clap(long, env = "RW_COMPACTION_WORKER_THREADS_NUMBER")] pub compaction_worker_threads_number: Option, /// The path of `risingwave.toml` configuration file. /// /// If empty, default configuration values will be used. - /// - /// Note that internal system parameters should be defined in the configuration file at - /// [`risingwave_common::config`] instead of command line arguments. - #[clap(long, default_value = "")] + #[clap(long, env = "RW_CONFIG_PATH", default_value = "")] pub config_path: String, + + #[clap(flatten)] + override_config: OverrideConfigOpts, +} + +/// Command-line arguments for compactor-node that overrides the config file. +#[derive(Parser, Clone, Debug, OverrideConfig)] +struct OverrideConfigOpts { + /// Used for control the metrics level, similar to log level. + /// 0 = close metrics + /// >0 = open metrics + #[clap(long, env = "RW_METRICS_LEVEL")] + #[override_opts(path = server.metrics_level)] + pub metrics_level: Option, + + /// It's a hint used by meta node. + #[clap(long, env = "RW_MAX_CONCURRENT_TASK_NUMBER")] + #[override_opts(path = storage.max_concurrent_compaction_task_number)] + pub max_concurrent_task_number: Option, } use std::future::Future; @@ -71,26 +98,34 @@ pub fn start(opts: CompactorOpts) -> Pin + Send>> { // WARNING: don't change the function signature. Making it `async fn` will cause // slow compile in release mode. Box::pin(async move { + tracing::info!("Compactor node options: {:?}", opts); + warn_future_deprecate_options(&opts); tracing::info!("meta address: {}", opts.meta_address.clone()); - let listen_address = opts.host.parse().unwrap(); - tracing::info!("Server Listening at {}", listen_address); + let listen_addr = opts.listen_addr.parse().unwrap(); + tracing::info!("Server Listening at {}", listen_addr); - let client_address = opts - .client_address + let advertise_addr = opts + .advertise_addr .as_ref() .unwrap_or_else(|| { - tracing::warn!("Client address is not specified, defaulting to host address"); - &opts.host + tracing::warn!("advertise addr is not specified, defaulting to listen address"); + &opts.listen_addr }) .parse() .unwrap(); - tracing::info!("Client address is {}", client_address); + tracing::info!(" address is {}", advertise_addr); let (join_handle, observer_join_handle, _shutdown_sender) = - compactor_serve(listen_address, client_address, opts).await; + compactor_serve(listen_addr, advertise_addr, opts).await; join_handle.await.unwrap(); - observer_join_handle.await.unwrap(); + observer_join_handle.abort(); }) } + +fn warn_future_deprecate_options(opts: &CompactorOpts) { + if opts.state_store.is_some() { + tracing::warn!("`--state-store` will not be accepted by compactor node in the next release. Please consider moving this argument to the meta node."); + } +} diff --git a/src/storage/compactor/src/rpc.rs b/src/storage/compactor/src/rpc.rs index 9303aa9517985..17d02d398e0b6 100644 --- a/src/storage/compactor/src/rpc.rs +++ b/src/storage/compactor/src/rpc.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/compactor/src/server.rs b/src/storage/compactor/src/server.rs index 119a5c009658d..0a8a1ce093615 100644 --- a/src/storage/compactor/src/server.rs +++ b/src/storage/compactor/src/server.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ use std::time::Duration; use risingwave_common::config::load_config; use risingwave_common::monitor::process_linux::monitor_process; use risingwave_common::util::addr::HostAddr; +use risingwave_common::{GIT_SHA, RW_VERSION}; use risingwave_common_service::metrics_manager::MetricsManager; use risingwave_common_service::observer_manager::ObserverManager; use risingwave_hummock_sdk::compact::CompactorRuntimeConfig; @@ -27,7 +28,7 @@ use risingwave_object_store::object::parse_remote_object_store; use risingwave_pb::common::WorkerType; use risingwave_pb::compactor::compactor_service_server::CompactorServiceServer; use risingwave_rpc_client::MetaClient; -use risingwave_storage::hummock::compactor::{CompactionExecutor, CompactorContext, Context}; +use risingwave_storage::hummock::compactor::{CompactionExecutor, CompactorContext}; use risingwave_storage::hummock::hummock_meta_client::MonitoredHummockMetaClient; use risingwave_storage::hummock::{ CompactorMemoryCollector, MemoryLimiter, SstableIdManager, SstableStore, @@ -35,8 +36,10 @@ use risingwave_storage::hummock::{ use risingwave_storage::monitor::{ monitor_cache, CompactorMetrics, HummockMetrics, ObjectStoreMetrics, }; +use risingwave_storage::opts::StorageOpts; use tokio::sync::oneshot::Sender; use tokio::task::JoinHandle; +use tracing::info; use super::compactor_observer::observer_manager::CompactorObserverNode; use crate::rpc::CompactorServiceImpl; @@ -45,23 +48,29 @@ use crate::CompactorOpts; /// Fetches and runs compaction tasks. pub async fn compactor_serve( listen_addr: SocketAddr, - client_addr: HostAddr, + advertise_addr: HostAddr, opts: CompactorOpts, ) -> (JoinHandle<()>, JoinHandle<()>, Sender<()>) { - let config = load_config(&opts.config_path); - tracing::info!( - "Starting compactor with config {:?} and opts {:?}", - config, - opts + let config = load_config(&opts.config_path, Some(opts.override_config)); + info!("Starting compactor node",); + info!("> config: {:?}", config); + info!( + "> debug assertions: {}", + if cfg!(debug_assertions) { "on" } else { "off" } ); + info!("> version: {} ({})", RW_VERSION, GIT_SHA); // Register to the cluster. - let meta_client = - MetaClient::register_new(&opts.meta_address, WorkerType::Compactor, &client_addr, 0) - .await - .unwrap(); - tracing::info!("Assigned compactor id {}", meta_client.worker_id()); - meta_client.activate(&client_addr).await.unwrap(); + let (meta_client, system_params) = MetaClient::register_new( + &opts.meta_address, + WorkerType::Compactor, + &advertise_addr, + 0, + ) + .await + .unwrap(); + info!("Assigned compactor id {}", meta_client.worker_id()); + meta_client.activate(&advertise_addr).await.unwrap(); // Boot compactor let registry = prometheus::Registry::new(); @@ -75,25 +84,27 @@ pub async fn compactor_serve( hummock_metrics.clone(), )); - // use half of limit because any memory which would hold in meta-cache will be allocate by - // limited at first. - let storage_config = Arc::new(config.storage); + let state_store_url = { + let from_local = opts.state_store.unwrap_or("".to_string()); + system_params.state_store(from_local) + }; + + let storage_opts = Arc::new(StorageOpts::from((&config, &system_params))); let object_store = Arc::new( parse_remote_object_store( - opts.state_store + state_store_url .strip_prefix("hummock+") .expect("object store must be hummock for compactor server"), object_metrics, - storage_config.object_store_use_batch_delete, "Hummock", ) .await, ); let sstable_store = Arc::new(SstableStore::for_compactor( object_store, - storage_config.data_directory.to_string(), + storage_opts.data_directory.to_string(), 1 << 20, // set 1MB memory to avoid panic. - storage_config.meta_cache_capacity_mb * (1 << 20), + storage_opts.meta_cache_capacity_mb * (1 << 20), )); let filter_key_extractor_manager = Arc::new(FilterKeyExtractorManager::default()); @@ -101,10 +112,13 @@ pub async fn compactor_serve( let observer_manager = ObserverManager::new_with_meta_client(meta_client.clone(), compactor_observer_node).await; + // use half of limit because any memory which would hold in meta-cache will be allocate by + // limited at first. let observer_join_handle = observer_manager.start().await; - let output_limit_mb = storage_config.compactor_memory_limit_mb as u64 / 2; + let output_limit_mb = storage_opts.compactor_memory_limit_mb as u64 / 2; let memory_limiter = Arc::new(MemoryLimiter::new(output_limit_mb << 20)); - let input_limit_mb = storage_config.compactor_memory_limit_mb as u64 / 2; + let input_limit_mb = storage_opts.compactor_memory_limit_mb as u64 / 2; + let max_concurrent_task_number = storage_opts.max_concurrent_compaction_task_number; let memory_collector = Arc::new(CompactorMemoryCollector::new( memory_limiter.clone(), sstable_store.clone(), @@ -113,10 +127,10 @@ pub async fn compactor_serve( monitor_cache(memory_collector, ®istry).unwrap(); let sstable_id_manager = Arc::new(SstableIdManager::new( hummock_meta_client.clone(), - storage_config.sstable_id_remote_fetch_number, + storage_opts.sstable_id_remote_fetch_number, )); - let context = Arc::new(Context { - options: storage_config, + let compactor_context = Arc::new(CompactorContext { + storage_opts, hummock_meta_client: hummock_meta_client.clone(), sstable_store: sstable_store.clone(), compactor_metrics, @@ -128,13 +142,10 @@ pub async fn compactor_serve( read_memory_limiter: memory_limiter, sstable_id_manager: sstable_id_manager.clone(), task_progress_manager: Default::default(), + compactor_runtime_config: Arc::new(tokio::sync::Mutex::new(CompactorRuntimeConfig { + max_concurrent_task_number, + })), }); - let compactor_context = Arc::new(CompactorContext::with_config( - context, - CompactorRuntimeConfig { - max_concurrent_task_number: opts.max_concurrent_task_number, - }, - )); let sub_tasks = vec![ MetaClient::start_heartbeat_loop( meta_client.clone(), @@ -176,7 +187,7 @@ pub async fn compactor_serve( }); // Boot metrics service. - if opts.metrics_level > 0 { + if config.server.metrics_level > 0 { MetricsManager::boot_metrics_service( opts.prometheus_listener_addr.clone(), registry.clone(), diff --git a/src/storage/hummock_sdk/Cargo.toml b/src/storage/hummock_sdk/Cargo.toml index 40ce926895f25..51acfdd4e6ab0 100644 --- a/src/storage/hummock_sdk/Cargo.toml +++ b/src/storage/hummock_sdk/Cargo.toml @@ -7,6 +7,12 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } +[package.metadata.cargo-machete] +ignored = ["workspace-hack", "num-traits"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] bytes = "1" hex = "0.4" @@ -15,7 +21,6 @@ num-derive = "0.3" num-traits = "0.2" parking_lot = "0.12" parse-display = "0.6" -prost = "0.11" risingwave_common = { path = "../../common" } risingwave_pb = { path = "../../prost" } tokio = { version = "0.2", package = "madsim-tokio", features = [ diff --git a/src/storage/hummock_sdk/src/compact.rs b/src/storage/hummock_sdk/src/compact.rs index c60b49c657835..fcb6ba8b177e0 100644 --- a/src/storage/hummock_sdk/src/compact.rs +++ b/src/storage/hummock_sdk/src/compact.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -95,7 +95,7 @@ pub fn append_sstable_info_to_string(s: &mut String, sstable_info: &SstableInfo) } /// Config that is updatable when compactor is running. -#[derive(Clone)] +#[derive(Clone, Default)] pub struct CompactorRuntimeConfig { pub max_concurrent_task_number: u64, } diff --git a/src/storage/hummock_sdk/src/compaction_group/hummock_version_ext.rs b/src/storage/hummock_sdk/src/compaction_group/hummock_version_ext.rs index d276355ea597e..ff4282c6c4d96 100644 --- a/src/storage/hummock_sdk/src/compaction_group/hummock_version_ext.rs +++ b/src/storage/hummock_sdk/src/compaction_group/hummock_version_ext.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,8 +20,8 @@ use risingwave_pb::hummock::group_delta::DeltaType; use risingwave_pb::hummock::hummock_version::Levels; use risingwave_pb::hummock::hummock_version_delta::GroupDeltas; use risingwave_pb::hummock::{ - CompactionConfig, GroupConstruct, GroupDestroy, HummockVersion, HummockVersionDelta, Level, - LevelType, OverlappingLevel, SstableInfo, + CompactionConfig, GroupConstruct, GroupDestroy, GroupMetaChange, HummockVersion, + HummockVersionDelta, Level, LevelType, OverlappingLevel, SstableInfo, }; use super::StateTableId; @@ -37,6 +37,7 @@ pub struct GroupDeltasSummary { pub insert_table_infos: Vec, pub group_construct: Option, pub group_destroy: Option, + pub group_meta_changes: Vec, } pub fn summarize_group_deltas(group_deltas: &GroupDeltas) -> GroupDeltasSummary { @@ -47,6 +48,7 @@ pub fn summarize_group_deltas(group_deltas: &GroupDeltas) -> GroupDeltasSummary let mut insert_table_infos = vec![]; let mut group_construct = None; let mut group_destroy = None; + let mut group_meta_changes = vec![]; for group_delta in &group_deltas.group_deltas { match group_delta.get_delta_type().unwrap() { DeltaType::IntraLevel(intra_level) => { @@ -68,6 +70,9 @@ pub fn summarize_group_deltas(group_deltas: &GroupDeltas) -> GroupDeltasSummary assert!(group_destroy.is_none()); group_destroy = Some(destroy_delta.clone()); } + DeltaType::GroupMetaChange(meta_delta) => { + group_meta_changes.push(meta_delta.clone()); + } } } @@ -79,6 +84,7 @@ pub fn summarize_group_deltas(group_deltas: &GroupDeltas) -> GroupDeltasSummary insert_table_infos, group_construct, group_destroy, + group_meta_changes, } } @@ -107,7 +113,7 @@ pub trait HummockVersionUpdateExt { group_id: CompactionGroupId, member_table_ids: &HashSet, ) -> Vec<(HummockSstableId, u64, u32)>; - fn apply_version_delta(&mut self, version_delta: &HummockVersionDelta); + fn apply_version_delta(&mut self, version_delta: &HummockVersionDelta) -> Vec; fn build_compaction_group_info(&self) -> HashMap; fn build_branched_sst_info( @@ -175,13 +181,21 @@ impl HummockVersionExt for HummockVersion { } } +pub type SstSplitInfo = ( + HummockSstableId, + // divide version + u64, + // level idx + u32, +); + impl HummockVersionUpdateExt for HummockVersion { fn init_with_parent_group( &mut self, parent_group_id: CompactionGroupId, group_id: CompactionGroupId, member_table_ids: &HashSet, - ) -> Vec<(HummockSstableId, u64, u32)> { + ) -> Vec { let mut split_id_vers = vec![]; if parent_group_id == StaticCompactionGroupId::NewCompactionGroup as CompactionGroupId || !self.levels.contains_key(&parent_group_id) @@ -195,20 +209,16 @@ impl HummockVersionUpdateExt for HummockVersion { if let Some(ref mut l0) = parent_levels.l0 { for sub_level in &mut l0.sub_levels { let mut insert_table_infos = vec![]; - for table_info in &mut sub_level.table_infos { - if table_info + for sst_info in &mut sub_level.table_infos { + if sst_info .get_table_ids() .iter() .any(|table_id| member_table_ids.contains(table_id)) { - table_info.divide_version += 1; - split_id_vers.push(( - table_info.get_id(), - table_info.get_divide_version(), - 0, - )); - let mut branch_table_info = table_info.clone(); - branch_table_info.table_ids = table_info + sst_info.divide_version += 1; + split_id_vers.push((sst_info.get_id(), sst_info.get_divide_version(), 0)); + let mut branch_table_info = sst_info.clone(); + branch_table_info.table_ids = sst_info .table_ids .drain_filter(|table_id| member_table_ids.contains(table_id)) .collect_vec(); @@ -225,20 +235,20 @@ impl HummockVersionUpdateExt for HummockVersion { } for (z, level) in parent_levels.levels.iter_mut().enumerate() { let level_idx = level.get_level_idx(); - for table_info in &mut level.table_infos { - if table_info + for sst_info in &mut level.table_infos { + if sst_info .get_table_ids() .iter() .any(|table_id| member_table_ids.contains(table_id)) { - table_info.divide_version += 1; + sst_info.divide_version += 1; split_id_vers.push(( - table_info.get_id(), - table_info.get_divide_version(), + sst_info.get_id(), + sst_info.get_divide_version(), level_idx, )); - let mut branch_table_info = table_info.clone(); - branch_table_info.table_ids = table_info + let mut branch_table_info = sst_info.clone(); + branch_table_info.table_ids = sst_info .table_ids .drain_filter(|table_id| member_table_ids.contains(table_id)) .collect_vec(); @@ -250,22 +260,24 @@ impl HummockVersionUpdateExt for HummockVersion { split_id_vers } - fn apply_version_delta(&mut self, version_delta: &HummockVersionDelta) { + fn apply_version_delta(&mut self, version_delta: &HummockVersionDelta) -> Vec { + let mut sst_split_info = vec![]; for (compaction_group_id, group_deltas) in &version_delta.group_deltas { let summary = summarize_group_deltas(group_deltas); if let Some(group_construct) = &summary.group_construct { - self.levels.insert( + let mut new_levels = build_initial_compaction_group_levels( *compaction_group_id, - ::build_initial_levels( - group_construct.get_group_config().unwrap(), - ), + group_construct.get_group_config().unwrap(), ); - let parent_group_id = group_construct.get_parent_group_id(); - self.init_with_parent_group( + let parent_group_id = group_construct.parent_group_id; + new_levels.parent_group_id = parent_group_id; + new_levels.member_table_ids = group_construct.table_ids.clone(); + self.levels.insert(*compaction_group_id, new_levels); + sst_split_info.extend(self.init_with_parent_group( parent_group_id, *compaction_group_id, &HashSet::from_iter(group_construct.get_table_ids().iter().cloned()), - ); + )); } let has_destroy = summary.group_destroy.is_some(); let levels = self @@ -273,6 +285,15 @@ impl HummockVersionUpdateExt for HummockVersion { .get_mut(compaction_group_id) .expect("compaction group should exist"); + for group_meta_delta in &summary.group_meta_changes { + levels + .member_table_ids + .extend(group_meta_delta.table_ids_add.clone()); + levels + .member_table_ids + .drain_filter(|t| group_meta_delta.table_ids_remove.contains(t)); + } + assert!( self.max_committed_epoch <= version_delta.max_committed_epoch, "new max commit epoch {} is older than the current max commit epoch {}", @@ -316,18 +337,14 @@ impl HummockVersionUpdateExt for HummockVersion { self.id = version_delta.id; self.max_committed_epoch = version_delta.max_committed_epoch; self.safe_epoch = version_delta.safe_epoch; + sst_split_info } fn build_compaction_group_info(&self) -> HashMap { let mut ret = HashMap::new(); for (compaction_group_id, levels) in &self.levels { - if let Some(ref l0) = levels.l0 { - for sub_level in l0.get_sub_levels() { - update_compaction_group_info(sub_level, *compaction_group_id, &mut ret); - } - } - for level in levels.get_levels() { - update_compaction_group_info(level, *compaction_group_id, &mut ret); + for table_id in &levels.member_table_ids { + ret.insert(TableId::new(*table_id), *compaction_group_id); } } ret @@ -353,24 +370,11 @@ impl HummockVersionUpdateExt for HummockVersion { } } -fn update_compaction_group_info( - level: &Level, - compaction_group_id: CompactionGroupId, - compaction_group_info: &mut HashMap, -) { - for table_info in level.get_table_infos() { - table_info.get_table_ids().iter().for_each(|table_id| { - compaction_group_info.insert(TableId::new(*table_id), compaction_group_id); - }); - } -} - pub trait HummockLevelsExt { fn get_level0(&self) -> &OverlappingLevel; fn get_level(&self, idx: usize) -> &Level; fn get_level_mut(&mut self, idx: usize) -> &mut Level; fn apply_compact_ssts(&mut self, summary: GroupDeltasSummary); - fn build_initial_levels(compaction_config: &CompactionConfig) -> Levels; } impl HummockLevelsExt for Levels { @@ -439,26 +443,76 @@ impl HummockLevelsExt for Levels { .sum::(); } } +} - fn build_initial_levels(compaction_config: &CompactionConfig) -> Levels { - let mut levels = vec![]; - for l in 0..compaction_config.get_max_level() { - levels.push(Level { - level_idx: (l + 1) as u32, - level_type: LevelType::Nonoverlapping as i32, - table_infos: vec![], - total_file_size: 0, - sub_level_id: 0, - }); - } - Levels { - levels, - l0: Some(OverlappingLevel { - sub_levels: vec![], - total_file_size: 0, - }), +pub fn build_initial_compaction_group_levels( + group_id: CompactionGroupId, + compaction_config: &CompactionConfig, +) -> Levels { + let mut levels = vec![]; + for l in 0..compaction_config.get_max_level() { + levels.push(Level { + level_idx: (l + 1) as u32, + level_type: LevelType::Nonoverlapping as i32, + table_infos: vec![], + total_file_size: 0, + sub_level_id: 0, + }); + } + Levels { + levels, + l0: Some(OverlappingLevel { + sub_levels: vec![], + total_file_size: 0, + }), + group_id, + parent_group_id: StaticCompactionGroupId::NewCompactionGroup as _, + member_table_ids: vec![], + } +} + +pub fn try_get_compaction_group_id_by_table_id( + version: &HummockVersion, + table_id: StateTableId, +) -> Option { + for (group_id, levels) in &version.levels { + if levels.member_table_ids.contains(&table_id) { + return Some(*group_id); } } + None +} + +/// Gets all compaction group ids. +pub fn get_compaction_group_ids(version: &HummockVersion) -> Vec { + version.levels.keys().cloned().collect() +} + +/// Gets all member table ids. +pub fn get_member_table_ids(version: &HummockVersion) -> HashSet { + version + .levels + .iter() + .flat_map(|(_, levels)| levels.member_table_ids.clone()) + .collect() +} + +/// Gets all SST ids in `group_id` +pub fn get_compaction_group_sst_ids( + version: &HummockVersion, + group_id: CompactionGroupId, +) -> Vec { + let group_levels = version.get_compaction_group_levels(group_id); + group_levels + .l0 + .as_ref() + .unwrap() + .sub_levels + .iter() + .rev() + .chain(group_levels.levels.iter()) + .flat_map(|level| level.table_infos.iter().map(|table_info| table_info.id)) + .collect_vec() } pub fn new_sub_level( @@ -508,6 +562,18 @@ pub fn add_new_sub_level( l0.sub_levels.push(level); } +pub fn build_version_delta_after_version(version: &HummockVersion) -> HummockVersionDelta { + HummockVersionDelta { + id: version.id + 1, + prev_id: version.id, + safe_epoch: version.safe_epoch, + trivial_move: false, + max_committed_epoch: version.max_committed_epoch, + group_deltas: Default::default(), + gc_sst_ids: vec![], + } +} + /// Delete sstables if the table id is in the id set. /// /// Return `true` if some sst is deleted, and `false` is the deletion is trivial @@ -541,41 +607,6 @@ fn level_insert_ssts(operand: &mut Level, insert_table_infos: Vec) debug_assert!(can_concat(&operand.table_infos)); } -pub trait HummockVersionDeltaExt { - fn get_removed_sst_ids(&self) -> Vec; - fn get_inserted_sst_ids(&self) -> Vec; -} - -impl HummockVersionDeltaExt for HummockVersionDelta { - fn get_removed_sst_ids(&self) -> Vec { - let mut ret = vec![]; - for group_deltas in self.group_deltas.values() { - for group_delta in &group_deltas.group_deltas { - if let DeltaType::IntraLevel(intra_level) = group_delta.get_delta_type().unwrap() { - for sst_id in &intra_level.removed_table_ids { - ret.push(*sst_id); - } - } - } - } - ret - } - - fn get_inserted_sst_ids(&self) -> Vec { - let mut ret = vec![]; - for group_deltas in self.group_deltas.values() { - for group_delta in &group_deltas.group_deltas { - if let DeltaType::IntraLevel(intra_level) = group_delta.get_delta_type().unwrap() { - for sst in &intra_level.inserted_table_infos { - ret.push(sst.id); - } - } - } - } - ret - } -} - #[cfg(test)] mod tests { use std::collections::HashMap; @@ -588,9 +619,8 @@ mod tests { HummockVersionDelta, IntraLevelDelta, Level, LevelType, OverlappingLevel, SstableInfo, }; - use super::HummockLevelsExt; use crate::compaction_group::hummock_version_ext::{ - HummockVersionExt, HummockVersionUpdateExt, + build_initial_compaction_group_levels, HummockVersionExt, HummockVersionUpdateExt, }; #[test] @@ -605,6 +635,7 @@ mod tests { sub_levels: vec![], total_file_size: 0, }), + ..Default::default() }, )]), max_committed_epoch: 0, @@ -648,17 +679,23 @@ mod tests { levels: HashMap::from_iter([ ( 0, - Levels::build_initial_levels(&CompactionConfig { - max_level: 6, - ..Default::default() - }), + build_initial_compaction_group_levels( + 0, + &CompactionConfig { + max_level: 6, + ..Default::default() + }, + ), ), ( 1, - Levels::build_initial_levels(&CompactionConfig { - max_level: 6, - ..Default::default() - }), + build_initial_compaction_group_levels( + 1, + &CompactionConfig { + max_level: 6, + ..Default::default() + }, + ), ), ]), max_committed_epoch: 0, @@ -708,10 +745,13 @@ mod tests { ..Default::default() }; version.apply_version_delta(&version_delta); - let mut cg1 = Levels::build_initial_levels(&CompactionConfig { - max_level: 6, - ..Default::default() - }); + let mut cg1 = build_initial_compaction_group_levels( + 1, + &CompactionConfig { + max_level: 6, + ..Default::default() + }, + ); cg1.levels[0] = Level { level_idx: 1, level_type: LevelType::Nonoverlapping as i32, @@ -728,10 +768,13 @@ mod tests { levels: HashMap::from_iter([ ( 2, - Levels::build_initial_levels(&CompactionConfig { - max_level: 6, - ..Default::default() - }), + build_initial_compaction_group_levels( + 2, + &CompactionConfig { + max_level: 6, + ..Default::default() + } + ), ), (1, cg1,), ]), diff --git a/src/storage/hummock_sdk/src/compaction_group/mod.rs b/src/storage/hummock_sdk/src/compaction_group/mod.rs index fed5fa32b483f..2f197bf2d33b3 100644 --- a/src/storage/hummock_sdk/src/compaction_group/mod.rs +++ b/src/storage/hummock_sdk/src/compaction_group/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/hummock_sdk/src/filter_key_extractor.rs b/src/storage/hummock_sdk/src/filter_key_extractor.rs index d50769e7fc24c..65881cf294f4a 100644 --- a/src/storage/hummock_sdk/src/filter_key_extractor.rs +++ b/src/storage/hummock_sdk/src/filter_key_extractor.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ use risingwave_common::util::sort_util::OrderType; use risingwave_pb::catalog::Table; use tokio::sync::Notify; +use crate::info_in_release; use crate::key::{get_table_id, TABLE_PREFIX_LEN}; const ACQUIRE_TIMEOUT: Duration = Duration::from_secs(60); @@ -307,11 +308,13 @@ pub struct FilterKeyExtractorManager { impl FilterKeyExtractorManager { /// Insert (`table_id`, `filter_key_extractor`) as mapping to `HashMap` for `acquire` pub fn update(&self, table_id: u32, filter_key_extractor: Arc) { + info_in_release!("update key extractor of {}", table_id); self.inner.update(table_id, filter_key_extractor); } /// Remove a mapping by `table_id` pub fn remove(&self, table_id: u32) { + info_in_release!("remove key extractor of {}", table_id); self.inner.remove(table_id); } @@ -462,6 +465,7 @@ mod tests { handle_pk_conflict: false, read_prefix_len_hint: 1, version: None, + watermark_indices: vec![], } } diff --git a/src/storage/hummock_sdk/src/key.rs b/src/storage/hummock_sdk/src/key.rs index 1b456a7478eb7..966e1bd8a0ef1 100644 --- a/src/storage/hummock_sdk/src/key.rs +++ b/src/storage/hummock_sdk/src/key.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,6 +25,8 @@ use crate::HummockEpoch; pub const EPOCH_LEN: usize = std::mem::size_of::(); pub const TABLE_PREFIX_LEN: usize = std::mem::size_of::(); +// Max length for key overlap and diff length. See KeyPrefix::encode. +pub const MAX_KEY_LEN: usize = u16::MAX as usize; pub type TableKeyRange = (Bound>>, Bound>>); pub type UserKeyRange = (Bound>>, Bound>>); diff --git a/src/storage/hummock_sdk/src/key_cmp.rs b/src/storage/hummock_sdk/src/key_cmp.rs index 1c482ecce89e7..b2e97a839d28e 100644 --- a/src/storage/hummock_sdk/src/key_cmp.rs +++ b/src/storage/hummock_sdk/src/key_cmp.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/hummock_sdk/src/key_range.rs b/src/storage/hummock_sdk/src/key_range.rs index b57ba3250f562..eb814a0e91a0b 100644 --- a/src/storage/hummock_sdk/src/key_range.rs +++ b/src/storage/hummock_sdk/src/key_range.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/hummock_sdk/src/lib.rs b/src/storage/hummock_sdk/src/lib.rs index cc556e67b547e..69df808f68676 100644 --- a/src/storage/hummock_sdk/src/lib.rs +++ b/src/storage/hummock_sdk/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -53,8 +53,31 @@ pub type CompactionGroupId = u64; pub const INVALID_VERSION_ID: HummockVersionId = 0; pub const FIRST_VERSION_ID: HummockVersionId = 1; -pub const LOCAL_SST_ID_MASK: HummockSstableId = 1 << (HummockSstableId::BITS - 1); -pub const REMOTE_SST_ID_MASK: HummockSstableId = !LOCAL_SST_ID_MASK; +#[macro_export] +/// This is wrapper for `info` log. +/// +/// In our CI tests, we frequently create and drop tables, and checkpoint in all barriers, which may +/// cause many events. However, these events are not expected to be frequent in production usage, so +/// we print an info log for every these events. But these events are frequent in CI, and produce +/// many logs in CI, and we may want to downgrade the log level of these event log to debug. +/// Therefore, we provide this macro to wrap the `info` log, which will produce `info` log when +/// `debug_assertions` is not enabled, and `debug` log when `debug_assertions` is enabled. +macro_rules! info_in_release { + ($($arg:tt)*) => { + { + #[cfg(debug_assertions)] + { + use tracing::debug; + debug!($($arg)*); + } + #[cfg(not(debug_assertions))] + { + use tracing::info; + info!($($arg)*); + } + } + } +} #[derive(Debug, Clone)] pub struct LocalSstableInfo { @@ -148,18 +171,6 @@ impl PartialEq for LocalSstableInfo { } } -pub fn get_remote_sst_id(id: HummockSstableId) -> HummockSstableId { - id & REMOTE_SST_ID_MASK -} - -pub fn get_local_sst_id(id: HummockSstableId) -> HummockSstableId { - id | LOCAL_SST_ID_MASK -} - -pub fn is_remote_sst_id(id: HummockSstableId) -> bool { - id & LOCAL_SST_ID_MASK == 0 -} - /// Package read epoch of hummock, it be used for `wait_epoch` #[derive(Debug, Clone)] pub enum HummockReadEpoch { diff --git a/src/storage/hummock_sdk/src/prost_key_range.rs b/src/storage/hummock_sdk/src/prost_key_range.rs index 6cbad2267965c..fb8368ac985b2 100644 --- a/src/storage/hummock_sdk/src/prost_key_range.rs +++ b/src/storage/hummock_sdk/src/prost_key_range.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/hummock_sdk/src/table_stats.rs b/src/storage/hummock_sdk/src/table_stats.rs index 78096b0afe935..19732e30cba13 100644 --- a/src/storage/hummock_sdk/src/table_stats.rs +++ b/src/storage/hummock_sdk/src/table_stats.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/hummock_test/Cargo.toml b/src/storage/hummock_test/Cargo.toml index 8c936afe5bddd..bc7ff418946a0 100644 --- a/src/storage/hummock_test/Cargo.toml +++ b/src/storage/hummock_test/Cargo.toml @@ -8,6 +8,12 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] async-stream = "0.3" async-trait = "0.1" diff --git a/src/storage/hummock_test/benches/bench_hummock_iter.rs b/src/storage/hummock_test/benches/bench_hummock_iter.rs index a14f8b8979c87..b2ff2bde06e70 100644 --- a/src/storage/hummock_test/benches/bench_hummock_iter.rs +++ b/src/storage/hummock_test/benches/bench_hummock_iter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ use risingwave_hummock_test::get_notification_client_for_test; use risingwave_meta::hummock::test_utils::setup_compute_env; use risingwave_meta::hummock::MockHummockMetaClient; use risingwave_storage::hummock::iterator::test_utils::mock_sstable_store; -use risingwave_storage::hummock::test_utils::default_config_for_test; +use risingwave_storage::hummock::test_utils::default_opts_for_test; use risingwave_storage::hummock::HummockStorage; use risingwave_storage::storage_value::StorageValue; use risingwave_storage::store::{ReadOptions, StateStoreRead, StateStoreWrite, WriteOptions}; @@ -51,7 +51,7 @@ fn criterion_benchmark(c: &mut Criterion) { let runtime = tokio::runtime::Runtime::new().unwrap(); let batches = gen_interleave_shared_buffer_batch_iter(10000, 100); let sstable_store = mock_sstable_store(); - let hummock_options = Arc::new(default_config_for_test()); + let hummock_options = Arc::new(default_opts_for_test()); let (env, hummock_manager_ref, _cluster_manager_ref, worker_node) = runtime.block_on(setup_compute_env(8080)); let meta_client = Arc::new(MockHummockMetaClient::new( diff --git a/src/storage/hummock_test/src/compactor_tests.rs b/src/storage/hummock_test/src/compactor_tests.rs index 98c02fdad641c..b706bcee533eb 100644 --- a/src/storage/hummock_test/src/compactor_tests.rs +++ b/src/storage/hummock_test/src/compactor_tests.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ pub(crate) mod tests { use itertools::Itertools; use rand::Rng; use risingwave_common::catalog::TableId; - use risingwave_common::config::StorageConfig; use risingwave_common::constants::hummock::CompactionFilterFlag; use risingwave_common::util::epoch::Epoch; use risingwave_common_service::observer_manager::NotificationClient; + use risingwave_hummock_sdk::compact::CompactorRuntimeConfig; use risingwave_hummock_sdk::compaction_group::hummock_version_ext::HummockVersionExt; use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; use risingwave_hummock_sdk::filter_key_extractor::{ @@ -34,24 +34,23 @@ pub(crate) mod tests { FullKeyFilterKeyExtractor, }; use risingwave_hummock_sdk::key::{next_key, TABLE_PREFIX_LEN}; - use risingwave_meta::hummock::compaction::ManualCompactionOption; + use risingwave_meta::hummock::compaction::{default_level_selector, ManualCompactionOption}; use risingwave_meta::hummock::test_utils::{ register_table_ids_to_compaction_group, setup_compute_env, unregister_table_ids_from_compaction_group, }; use risingwave_meta::hummock::{HummockManagerRef, MockHummockMetaClient}; - use risingwave_meta::storage::{MemStore, MetaStore}; + use risingwave_meta::storage::MetaStore; use risingwave_pb::hummock::{HummockVersion, TableOption}; use risingwave_rpc_client::HummockMetaClient; - use risingwave_storage::hummock::compactor::{ - CompactionExecutor, Compactor, CompactorContext, Context, - }; + use risingwave_storage::hummock::compactor::{CompactionExecutor, Compactor, CompactorContext}; use risingwave_storage::hummock::iterator::test_utils::mock_sstable_store; use risingwave_storage::hummock::sstable_store::SstableStoreRef; use risingwave_storage::hummock::{ HummockStorage as GlobalHummockStorage, MemoryLimiter, SstableIdManager, }; use risingwave_storage::monitor::{CompactorMetrics, StoreLocalStatistic}; + use risingwave_storage::opts::StorageOpts; use risingwave_storage::storage_value::StorageValue; use risingwave_storage::store::{ ReadOptions, StateStoreReadExt, StateStoreWrite, WriteOptions, @@ -70,11 +69,11 @@ pub(crate) mod tests { table_id: TableId, ) -> HummockStorage { let remote_dir = "hummock_001_test".to_string(); - let options = Arc::new(StorageConfig { + let options = Arc::new(StorageOpts { sstable_size_mb: 1, block_size_kb: 1, bloom_false_positive: 0.1, - data_directory: remote_dir, + data_directory: remote_dir.clone(), write_conflict_detection_enabled: true, ..Default::default() }); @@ -104,11 +103,11 @@ pub(crate) mod tests { notification_client: impl NotificationClient, ) -> GlobalHummockStorage { let remote_dir = "hummock_001_test".to_string(); - let options = Arc::new(StorageConfig { + let options = Arc::new(StorageOpts { sstable_size_mb: 1, block_size_kb: 1, bloom_false_positive: 0.1, - data_directory: remote_dir, + data_directory: remote_dir.clone(), write_conflict_detection_enabled: true, ..Default::default() }); @@ -162,7 +161,7 @@ pub(crate) mod tests { filter_key_extractor_manager: FilterKeyExtractorManagerRef, ) -> CompactorContext { get_compactor_context_with_filter_key_extractor_manager_impl( - storage.options().clone(), + storage.storage_opts().clone(), storage.sstable_store(), hummock_meta_client, filter_key_extractor_manager, @@ -170,13 +169,13 @@ pub(crate) mod tests { } fn get_compactor_context_with_filter_key_extractor_manager_impl( - options: Arc, + options: Arc, sstable_store: SstableStoreRef, hummock_meta_client: &Arc, filter_key_extractor_manager: FilterKeyExtractorManagerRef, ) -> CompactorContext { - let context = Arc::new(Context { - options: options.clone(), + CompactorContext { + storage_opts: options.clone(), sstable_store, hummock_meta_client: hummock_meta_client.clone(), compactor_metrics: Arc::new(CompactorMetrics::unused()), @@ -189,8 +188,10 @@ pub(crate) mod tests { options.sstable_id_remote_fetch_number, )), task_progress_manager: Default::default(), - }); - CompactorContext::new(context) + compactor_runtime_config: Arc::new(tokio::sync::Mutex::new( + CompactorRuntimeConfig::default(), + )), + } } #[tokio::test] @@ -223,13 +224,16 @@ pub(crate) mod tests { &hummock_meta_client, &key, 1 << 10, - (1..129).into_iter().map(|v| (v * 1000) << 16).collect_vec(), + (1..129).map(|v| (v * 1000) << 16).collect_vec(), ) .await; // 2. get compact task let mut compact_task = hummock_manager_ref - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap() .unwrap(); @@ -352,13 +356,16 @@ pub(crate) mod tests { &hummock_meta_client, &key, 1 << 20, - (1..129).into_iter().collect_vec(), + (1..129).collect_vec(), ) .await; // 2. get compact task let mut compact_task = hummock_manager_ref - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap() .unwrap(); @@ -405,7 +412,7 @@ pub(crate) mod tests { .sstable(output_table, &mut StoreLocalStatistic::default()) .await .unwrap(); - let target_table_size = storage.options().sstable_size_mb * (1 << 20); + let target_table_size = storage.storage_opts().sstable_size_mb * (1 << 20); assert!( table.value().meta.estimated_size > target_table_size, @@ -437,7 +444,10 @@ pub(crate) mod tests { // 6. get compact task and there should be none let compact_task = hummock_manager_ref - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap(); @@ -487,10 +497,9 @@ pub(crate) mod tests { } } - pub(crate) async fn prepare_compactor_and_filter( + pub(crate) fn prepare_compactor_and_filter( storage: &HummockStorage, hummock_meta_client: &Arc, - hummock_manager_ref: HummockManagerRef, existing_table_id: u32, ) -> CompactorContext { let filter_key_extractor_manager = storage.filter_key_extractor_manager().clone(); @@ -501,21 +510,11 @@ pub(crate) mod tests { )), ); - let compact_ctx = get_compactor_context_with_filter_key_extractor_manager( + get_compactor_context_with_filter_key_extractor_manager( storage, hummock_meta_client, - filter_key_extractor_manager.clone(), - ); - - let existing_table_id: u32 = 1; - // Only registered table_ids are accepted in commit_epoch - register_table_ids_to_compaction_group( - &hummock_manager_ref, - &[existing_table_id], - StaticCompactionGroupId::StateDefault.into(), + filter_key_extractor_manager, ) - .await; - compact_ctx } #[tokio::test] @@ -539,10 +538,8 @@ pub(crate) mod tests { let compact_ctx_existing_table_id = prepare_compactor_and_filter( &storage_existing_table_id, &hummock_meta_client, - hummock_manager_ref.clone(), existing_table_id, - ) - .await; + ); prepare_data( hummock_meta_client.clone(), @@ -603,7 +600,10 @@ pub(crate) mod tests { // 5. get compact task and there should be none let compact_task = hummock_manager_ref - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap(); @@ -648,7 +648,7 @@ pub(crate) mod tests { ); let compact_ctx = get_compactor_context_with_filter_key_extractor_manager_impl( - global_storage.options().clone(), + global_storage.storage_opts().clone(), global_storage.sstable_store(), &hummock_meta_client, filter_key_extractor_manager.clone(), @@ -661,18 +661,18 @@ pub(crate) mod tests { let existing_table_ids = 2; let kv_count: usize = 128; let mut epoch: u64 = 1; + register_table_ids_to_compaction_group( + &hummock_manager_ref, + &[drop_table_id, existing_table_ids], + StaticCompactionGroupId::StateDefault.into(), + ) + .await; for index in 0..kv_count { let (table_id, storage) = if index % 2 == 0 { - (drop_table_id, storage_1.clone()) + (drop_table_id, &storage_1) } else { - (existing_table_ids, storage_2.clone()) + (existing_table_ids, &storage_2) }; - register_table_ids_to_compaction_group( - &hummock_manager_ref, - &[table_id], - StaticCompactionGroupId::StateDefault.into(), - ) - .await; epoch += 1; let mut local = storage.start_write_batch(WriteOptions { epoch, @@ -758,7 +758,10 @@ pub(crate) mod tests { // 6. get compact task and there should be none let compact_task = hummock_manager_ref - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap(); assert!(compact_task.is_none()); @@ -802,11 +805,12 @@ pub(crate) mod tests { worker_node.id, )); + let existing_table_id = 2; let storage = get_hummock_storage( hummock_meta_client.clone(), get_notification_client_for_test(env, hummock_manager_ref.clone(), worker_node.clone()), &hummock_manager_ref, - TableId::from(2), + TableId::from(existing_table_id), ) .await; let filter_key_extractor_manager = storage.filter_key_extractor_manager().clone(); @@ -825,18 +829,11 @@ pub(crate) mod tests { // 1. add sstables let val = Bytes::from(b"0"[..].to_vec()); // 1 Byte value - let existing_table_id = 2; let kv_count = 11; // let base_epoch = Epoch(0); let base_epoch = Epoch::now(); let mut epoch: u64 = base_epoch.0; let millisec_interval_epoch: u64 = (1 << 16) * 100; - register_table_ids_to_compaction_group( - &hummock_manager_ref, - &[existing_table_id], - StaticCompactionGroupId::StateDefault.into(), - ) - .await; let mut epoch_set = BTreeSet::new(); for _ in 0..kv_count { epoch += millisec_interval_epoch; @@ -931,7 +928,10 @@ pub(crate) mod tests { // 5. get compact task and there should be none let compact_task = hummock_manager_ref - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap(); assert!(compact_task.is_none()); @@ -1095,7 +1095,10 @@ pub(crate) mod tests { // 5. get compact task and there should be none let compact_task = hummock_manager_ref - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap(); assert!(compact_task.is_none()); @@ -1156,13 +1159,8 @@ pub(crate) mod tests { TableId::from(existing_table_id), ) .await; - let compact_ctx = prepare_compactor_and_filter( - &storage, - &hummock_meta_client, - hummock_manager_ref.clone(), - existing_table_id, - ) - .await; + let compact_ctx = + prepare_compactor_and_filter(&storage, &hummock_meta_client, existing_table_id); prepare_data(hummock_meta_client.clone(), &storage, existing_table_id, 2).await; let mut local = storage.start_write_batch(WriteOptions { diff --git a/src/storage/hummock_test/src/failpoint_tests.rs b/src/storage/hummock_test/src/failpoint_tests.rs index 867009c67b1f3..66624bf6832e7 100644 --- a/src/storage/hummock_test/src/failpoint_tests.rs +++ b/src/storage/hummock_test/src/failpoint_tests.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ use risingwave_meta::hummock::test_utils::setup_compute_env; use risingwave_meta::hummock::MockHummockMetaClient; use risingwave_rpc_client::HummockMetaClient; use risingwave_storage::hummock::iterator::test_utils::mock_sstable_store; -use risingwave_storage::hummock::test_utils::{count_stream, default_config_for_test}; +use risingwave_storage::hummock::test_utils::{count_stream, default_opts_for_test}; use risingwave_storage::hummock::HummockStorage; use risingwave_storage::storage_value::StorageValue; use risingwave_storage::store::{ReadOptions, StateStoreRead, StateStoreWrite, WriteOptions}; @@ -39,7 +39,7 @@ async fn test_failpoints_state_store_read_upload() { let mem_upload_err = "mem_upload_err"; let mem_read_err = "mem_read_err"; let sstable_store = mock_sstable_store(); - let hummock_options = Arc::new(default_config_for_test()); + let hummock_options = Arc::new(default_opts_for_test()); let (env, hummock_manager_ref, _cluster_manager_ref, worker_node) = setup_compute_env(8080).await; let meta_client = Arc::new(MockHummockMetaClient::new( @@ -48,7 +48,7 @@ async fn test_failpoints_state_store_read_upload() { )); let hummock_storage = HummockStorage::for_test( - hummock_options.clone(), + hummock_options, sstable_store.clone(), meta_client.clone(), get_notification_client_for_test(env, hummock_manager_ref, worker_node), diff --git a/src/storage/hummock_test/src/hummock_read_version_tests.rs b/src/storage/hummock_test/src/hummock_read_version_tests.rs index eccaffb944a6a..b59b9a89daa5a 100644 --- a/src/storage/hummock_test/src/hummock_read_version_tests.rs +++ b/src/storage/hummock_test/src/hummock_read_version_tests.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/hummock_test/src/hummock_storage_tests.rs b/src/storage/hummock_test/src/hummock_storage_tests.rs index b355c2a70f403..567a2d8188afb 100644 --- a/src/storage/hummock_test/src/hummock_storage_tests.rs +++ b/src/storage/hummock_test/src/hummock_storage_tests.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -744,7 +744,7 @@ async fn test_multiple_epoch_sync() { .await .unwrap(); let test_get = || { - let hummock_storage_clone = hummock_storage.clone(); + let hummock_storage_clone = &hummock_storage; async move { assert_eq!( hummock_storage_clone @@ -837,7 +837,6 @@ async fn test_iter_with_min_epoch() { // epoch 1 write let batch_epoch1: Vec<(Bytes, StorageValue)> = (0..10) - .into_iter() .map(|index| { ( Bytes::from(gen_key(index)), @@ -861,7 +860,6 @@ async fn test_iter_with_min_epoch() { let epoch2 = (32 * 1000) << 16; // epoch 2 write let batch_epoch2: Vec<(Bytes, StorageValue)> = (20..30) - .into_iter() .map(|index| { ( Bytes::from(gen_key(index)), @@ -1050,7 +1048,6 @@ async fn test_hummock_version_reader() { // epoch 1 write let batch_epoch1: Vec<(Bytes, StorageValue)> = (0..10) - .into_iter() .map(|index| { ( Bytes::from(gen_key(index)), @@ -1062,7 +1059,6 @@ async fn test_hummock_version_reader() { let epoch2 = (32 * 1000) << 16; // epoch 2 write let batch_epoch2: Vec<(Bytes, StorageValue)> = (20..30) - .into_iter() .map(|index| { ( Bytes::from(gen_key(index)), @@ -1074,7 +1070,6 @@ async fn test_hummock_version_reader() { let epoch3 = (33 * 1000) << 16; // epoch 3 write let batch_epoch3: Vec<(Bytes, StorageValue)> = (40..50) - .into_iter() .map(|index| { ( Bytes::from(gen_key(index)), @@ -1496,7 +1491,6 @@ async fn test_get_with_min_epoch() { // epoch 1 write let batch_epoch1: Vec<(Bytes, StorageValue)> = (0..10) - .into_iter() .map(|index| { ( Bytes::from(gen_key(index)), @@ -1520,7 +1514,6 @@ async fn test_get_with_min_epoch() { let epoch2 = (32 * 1000) << 16; // epoch 2 write let batch_epoch2: Vec<(Bytes, StorageValue)> = (20..30) - .into_iter() .map(|index| { ( Bytes::from(gen_key(index)), diff --git a/src/storage/hummock_test/src/lib.rs b/src/storage/hummock_test/src/lib.rs index 918309886f45b..41d57a3bb6571 100644 --- a/src/storage/hummock_test/src/lib.rs +++ b/src/storage/hummock_test/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/hummock_test/src/local_version_manager_tests.rs b/src/storage/hummock_test/src/local_version_manager_tests.rs index 09294ace9a9d5..75e7acc99ff4b 100644 --- a/src/storage/hummock_test/src/local_version_manager_tests.rs +++ b/src/storage/hummock_test/src/local_version_manager_tests.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ use std::sync::Arc; use bytes::Bytes; use risingwave_common::catalog::TableId; -use risingwave_common::config::StorageConfig; +use risingwave_hummock_sdk::compact::CompactorRuntimeConfig; use risingwave_hummock_sdk::filter_key_extractor::FilterKeyExtractorManager; use risingwave_hummock_sdk::HummockSstableId; use risingwave_meta::hummock::test_utils::{ @@ -27,9 +27,9 @@ use risingwave_meta::hummock::{HummockManagerRef, MockHummockMetaClient}; use risingwave_meta::manager::MetaSrvEnv; use risingwave_meta::storage::MemStore; use risingwave_pb::common::WorkerNode; -use risingwave_pb::hummock::pin_version_response::Payload; +use risingwave_pb::hummock::version_update_payload::Payload; use risingwave_pb::hummock::HummockVersion; -use risingwave_storage::hummock::compactor::Context; +use risingwave_storage::hummock::compactor::CompactorContext; use risingwave_storage::hummock::event_handler::hummock_event_handler::BufferTracker; use risingwave_storage::hummock::iterator::test_utils::mock_sstable_store; use risingwave_storage::hummock::local_version::local_version_manager::{ @@ -38,16 +38,17 @@ use risingwave_storage::hummock::local_version::local_version_manager::{ use risingwave_storage::hummock::shared_buffer::shared_buffer_batch::SharedBufferBatch; use risingwave_storage::hummock::shared_buffer::UncommittedData; use risingwave_storage::hummock::test_utils::{ - default_config_for_test, gen_dummy_batch, gen_dummy_batch_several_keys, gen_dummy_sst_info, + default_opts_for_test, gen_dummy_batch, gen_dummy_batch_several_keys, gen_dummy_sst_info, }; use risingwave_storage::hummock::SstableIdManager; use risingwave_storage::monitor::CompactorMetrics; +use risingwave_storage::opts::StorageOpts; use risingwave_storage::storage_value::StorageValue; use crate::test_utils::prepare_first_valid_version; pub async fn prepare_local_version_manager( - opt: Arc, + opt: Arc, env: MetaSrvEnv, hummock_manager_ref: HummockManagerRef, worker_node: WorkerNode, @@ -70,14 +71,15 @@ pub async fn prepare_local_version_manager( let filter_key_extractor_manager = Arc::new(FilterKeyExtractorManager::default()); update_filter_key_extractor_for_table_ids(&filter_key_extractor_manager, &[0]); - let buffer_tracker = BufferTracker::from_storage_config(&opt); - let compactor_context = Arc::new(Context::new_local_compact_context( - opt.clone(), + let buffer_tracker = BufferTracker::from_storage_opts(&opt); + let compactor_context = Arc::new(CompactorContext::new_local_compact_context( + opt, sstable_store, hummock_meta_client, Arc::new(CompactorMetrics::unused()), sstable_id_manager, filter_key_extractor_manager, + CompactorRuntimeConfig::default(), )); LocalVersionManager::new(pinned_version, compactor_context, buffer_tracker) @@ -85,7 +87,7 @@ pub async fn prepare_local_version_manager( #[tokio::test] async fn test_update_pinned_version() { - let opt = Arc::new(default_config_for_test()); + let opt = Arc::new(default_opts_for_test()); let (env, hummock_manager_ref, _, worker_node) = setup_compute_env(8080).await; let local_version_manager = prepare_local_version_manager(opt, env, hummock_manager_ref, worker_node).await; @@ -234,7 +236,7 @@ async fn test_update_pinned_version() { #[tokio::test] async fn test_update_uncommitted_ssts() { - let mut opt = default_config_for_test(); + let mut opt = default_opts_for_test(); opt.share_buffers_sync_parallelism = 2; opt.sstable_size_mb = 1; let opt = Arc::new(opt); @@ -426,7 +428,7 @@ async fn test_update_uncommitted_ssts() { #[tokio::test] async fn test_clear_shared_buffer() { - let opt = Arc::new(default_config_for_test()); + let opt = Arc::new(default_opts_for_test()); let (env, hummock_manager_ref, _, worker_node) = setup_compute_env(8080).await; let local_version_manager = prepare_local_version_manager(opt, env, hummock_manager_ref, worker_node).await; @@ -468,7 +470,7 @@ async fn test_clear_shared_buffer() { #[tokio::test] async fn test_sst_gc_watermark() { - let opt = Arc::new(default_config_for_test()); + let opt = Arc::new(default_opts_for_test()); let (env, hummock_manager_ref, _, worker_node) = setup_compute_env(8080).await; let local_version_manager = prepare_local_version_manager(opt, env, hummock_manager_ref, worker_node).await; diff --git a/src/storage/hummock_test/src/mock_notification_client.rs b/src/storage/hummock_test/src/mock_notification_client.rs index 81457c07c5770..6ac06d607b0e0 100644 --- a/src/storage/hummock_test/src/mock_notification_client.rs +++ b/src/storage/hummock_test/src/mock_notification_client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -57,12 +57,7 @@ impl NotificationClient for MockNotificationClient { .insert_sender(subscribe_type, worker_key.clone(), tx.clone()) .await; - let hummock_version = self - .hummock_manager - .get_read_guard() - .await - .current_version - .clone(); + let hummock_version = self.hummock_manager.get_current_version().await; let meta_snapshot = MetaSnapshot { hummock_version: Some(hummock_version), version: Some(Default::default()), diff --git a/src/storage/hummock_test/src/snapshot_tests.rs b/src/storage/hummock_test/src/snapshot_tests.rs index 61228ee83fdb1..81c0e9e30604c 100644 --- a/src/storage/hummock_test/src/snapshot_tests.rs +++ b/src/storage/hummock_test/src/snapshot_tests.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ use risingwave_meta::hummock::test_utils::setup_compute_env; use risingwave_meta::hummock::MockHummockMetaClient; use risingwave_rpc_client::HummockMetaClient; use risingwave_storage::hummock::iterator::test_utils::mock_sstable_store; -use risingwave_storage::hummock::test_utils::default_config_for_test; +use risingwave_storage::hummock::test_utils::default_opts_for_test; use risingwave_storage::hummock::*; use risingwave_storage::monitor::{CompactorMetrics, HummockStateStoreMetrics}; use risingwave_storage::storage_value::StorageValue; @@ -274,7 +274,7 @@ async fn test_snapshot_range_scan_inner( #[ignore] async fn test_snapshot_backward_range_scan_inner(enable_sync: bool, enable_commit: bool) { let sstable_store = mock_sstable_store(); - let hummock_options = Arc::new(default_config_for_test()); + let hummock_options = Arc::new(default_opts_for_test()); let (env, hummock_manager_ref, _cluster_manager_ref, worker_node) = setup_compute_env(8080).await; let mock_hummock_meta_client = Arc::new(MockHummockMetaClient::new( diff --git a/src/storage/hummock_test/src/state_store_tests.rs b/src/storage/hummock_test/src/state_store_tests.rs index 42462aec47caf..168225312a7ae 100644 --- a/src/storage/hummock_test/src/state_store_tests.rs +++ b/src/storage/hummock_test/src/state_store_tests.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ use risingwave_meta::hummock::test_utils::setup_compute_env; use risingwave_meta::hummock::MockHummockMetaClient; use risingwave_rpc_client::HummockMetaClient; use risingwave_storage::hummock::iterator::test_utils::mock_sstable_store; -use risingwave_storage::hummock::test_utils::{count_stream, default_config_for_test}; +use risingwave_storage::hummock::test_utils::{count_stream, default_opts_for_test}; use risingwave_storage::hummock::{HummockStorage, HummockStorageV1}; use risingwave_storage::monitor::{CompactorMetrics, HummockStateStoreMetrics}; use risingwave_storage::storage_value::StorageValue; @@ -443,10 +443,6 @@ async fn test_state_store_sync_inner( hummock_storage: impl HummockStateStoreTestTrait, _meta_client: Arc, ) { - let mut config = default_config_for_test(); - config.shared_buffer_capacity_mb = 64; - config.write_conflict_detection_enabled = false; - let mut epoch: HummockEpoch = hummock_storage.get_pinned_version().max_committed_epoch() + 1; // ingest 16B batch @@ -539,7 +535,7 @@ async fn test_state_store_sync_inner( #[ignore] async fn test_reload_storage() { let sstable_store = mock_sstable_store(); - let hummock_options = Arc::new(default_config_for_test()); + let hummock_options = Arc::new(default_opts_for_test()); let (env, hummock_manager_ref, _cluster_manager_ref, worker_node) = setup_compute_env(8080).await; let meta_client = Arc::new(MockHummockMetaClient::new( @@ -602,7 +598,7 @@ async fn test_reload_storage() { // Mock something happened to storage internal, and storage is reloaded. drop(hummock_storage); let hummock_storage = HummockStorage::for_test( - hummock_options.clone(), + hummock_options, sstable_store.clone(), meta_client.clone(), get_notification_client_for_test(env, hummock_manager_ref, worker_node), @@ -779,7 +775,7 @@ async fn test_write_anytime_inner( let epoch1 = initial_epoch + 1; let assert_old_value = |epoch| { - let hummock_storage = hummock_storage.clone(); + let hummock_storage = &hummock_storage; async move { // check point get assert_eq!( @@ -904,7 +900,7 @@ async fn test_write_anytime_inner( assert_old_value(epoch1).await; let assert_new_value = |epoch| { - let hummock_storage = hummock_storage.clone(); + let hummock_storage = &hummock_storage; async move { // check point get assert_eq!( @@ -1203,7 +1199,7 @@ async fn test_multiple_epoch_sync_inner( .await .unwrap(); let test_get = || { - let hummock_storage_clone = hummock_storage.clone(); + let hummock_storage_clone = &hummock_storage; async move { assert_eq!( hummock_storage_clone diff --git a/src/storage/hummock_test/src/sync_point_tests.rs b/src/storage/hummock_test/src/sync_point_tests.rs index 56be0b0cd867d..7d17c68db60fb 100644 --- a/src/storage/hummock_test/src/sync_point_tests.rs +++ b/src/storage/hummock_test/src/sync_point_tests.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; use risingwave_hummock_sdk::key::{next_key, user_key}; use risingwave_hummock_sdk::HummockVersionId; use risingwave_meta::hummock::compaction::compaction_config::CompactionConfigBuilder; -use risingwave_meta::hummock::compaction::ManualCompactionOption; +use risingwave_meta::hummock::compaction::{default_level_selector, ManualCompactionOption}; use risingwave_meta::hummock::test_utils::{ add_ssts, setup_compute_env, setup_compute_env_with_config, }; @@ -159,7 +159,10 @@ async fn test_syncpoints_test_local_notification_receiver() { // Test cancel compaction task let _sst_infos = add_ssts(1, hummock_manager.as_ref(), context_id).await; let mut task = hummock_manager - .get_compact_task(StaticCompactionGroupId::StateDefault.into()) + .get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + &mut default_level_selector(), + ) .await .unwrap() .unwrap(); @@ -246,15 +249,11 @@ async fn test_syncpoints_get_in_delete_range_boundary() { TableId::from(existing_table_id), ) .await; - let compact_ctx = Arc::new( - prepare_compactor_and_filter( - &storage, - &hummock_meta_client, - hummock_manager_ref.clone(), - existing_table_id, - ) - .await, - ); + let compact_ctx = Arc::new(prepare_compactor_and_filter( + &storage, + &hummock_meta_client, + existing_table_id, + )); let compactor_manager = hummock_manager_ref.compactor_manager_ref_for_test(); compactor_manager.add_compactor(worker_node.id, u64::MAX); diff --git a/src/storage/hummock_test/src/test_utils.rs b/src/storage/hummock_test/src/test_utils.rs index 6c46dc70d323d..2043a8802ece8 100644 --- a/src/storage/hummock_test/src/test_utils.rs +++ b/src/storage/hummock_test/src/test_utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ use risingwave_meta::manager::MetaSrvEnv; use risingwave_meta::storage::{MemStore, MetaStore}; use risingwave_pb::catalog::Table as ProstTable; use risingwave_pb::common::WorkerNode; -use risingwave_pb::hummock::pin_version_response; +use risingwave_pb::hummock::version_update_payload; use risingwave_rpc_client::HummockMetaClient; use risingwave_storage::error::StorageResult; use risingwave_storage::hummock::backup_reader::BackupReader; @@ -47,7 +47,7 @@ use risingwave_storage::hummock::iterator::test_utils::mock_sstable_store; use risingwave_storage::hummock::local_version::pinned_version::PinnedVersion; use risingwave_storage::hummock::observer_manager::HummockObserverNode; use risingwave_storage::hummock::store::state_store::LocalHummockStorage; -use risingwave_storage::hummock::test_utils::default_config_for_test; +use risingwave_storage::hummock::test_utils::default_opts_for_test; use risingwave_storage::hummock::{HummockStorage, HummockStorageV1}; use risingwave_storage::monitor::{CompactorMetrics, HummockStateStoreMetrics}; use risingwave_storage::storage_value::StorageValue; @@ -84,7 +84,7 @@ pub async fn prepare_first_valid_version( .await; observer_manager.start().await; let hummock_version = match rx.recv().await { - Some(HummockEvent::VersionUpdate(pin_version_response::Payload::PinnedVersion( + Some(HummockEvent::VersionUpdate(version_update_payload::Payload::PinnedVersion( version, ))) => version, _ => unreachable!("should be full version"), @@ -205,19 +205,13 @@ impl StateStoreWrite for LocalGlobalState } } -impl Clone for LocalGlobalStateStoreHolder { +impl Clone for LocalGlobalStateStoreHolder { fn clone(&self) -> Self { - Self { - local: self.local.clone(), - global: self.global.clone(), - } + unimplemented!() } } -impl StateStore for LocalGlobalStateStoreHolder -where - ::Local: Clone, -{ +impl StateStore for LocalGlobalStateStoreHolder { type Local = G::Local; type NewLocalFuture<'a> = impl Future + Send; @@ -283,7 +277,7 @@ impl HummockStateStoreTestTrait for HummockStorageV1 { pub async fn with_hummock_storage_v1() -> (HummockStorageV1, Arc) { let sstable_store = mock_sstable_store(); - let hummock_options = Arc::new(default_config_for_test()); + let hummock_options = Arc::new(default_opts_for_test()); let (env, hummock_manager_ref, _cluster_manager_ref, worker_node) = setup_compute_env(8080).await; let meta_client = Arc::new(MockHummockMetaClient::new( @@ -317,7 +311,7 @@ pub async fn with_hummock_storage_v2( table_id: TableId, ) -> (HummockV2MixedStateStore, Arc) { let sstable_store = mock_sstable_store(); - let hummock_options = Arc::new(default_config_for_test()); + let hummock_options = Arc::new(default_opts_for_test()); let (env, hummock_manager_ref, _cluster_manager_ref, worker_node) = setup_compute_env(8080).await; let meta_client = Arc::new(MockHummockMetaClient::new( @@ -415,7 +409,7 @@ impl HummockTestEnv { pub async fn prepare_hummock_test_env() -> HummockTestEnv { let sstable_store = mock_sstable_store(); - let hummock_options = Arc::new(default_config_for_test()); + let hummock_options = Arc::new(default_opts_for_test()); let (env, hummock_manager_ref, _cluster_manager_ref, worker_node) = setup_compute_env(8080).await; diff --git a/src/storage/hummock_test/src/vacuum_tests.rs b/src/storage/hummock_test/src/vacuum_tests.rs index 416f7d4971ac9..8dec2541d075b 100644 --- a/src/storage/hummock_test/src/vacuum_tests.rs +++ b/src/storage/hummock_test/src/vacuum_tests.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/error.rs b/src/storage/src/error.rs index 24b393e4daf59..441f41215ec3b 100644 --- a/src/storage/src/error.rs +++ b/src/storage/src/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/backup_reader.rs b/src/storage/src/hummock/backup_reader.rs index d0c928a91fc42..1bcfe9bc1a72a 100644 --- a/src/storage/src/hummock/backup_reader.rs +++ b/src/storage/src/hummock/backup_reader.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,7 +26,6 @@ use risingwave_backup::storage::{ DummyMetaSnapshotStorage, MetaSnapshotStorageRef, ObjectStoreMetaSnapshotStorage, }; use risingwave_backup::MetaSnapshotId; -use risingwave_common::config::RwConfig; use risingwave_object_store::object::object_metrics::ObjectStoreMetrics; use risingwave_object_store::object::parse_remote_object_store; @@ -42,20 +41,19 @@ type VersionHolder = ( ); pub async fn parse_meta_snapshot_storage( - config: &RwConfig, + storage_url: &str, + storage_directory: &str, ) -> StorageResult { let backup_object_store = Arc::new( parse_remote_object_store( - &config.backup.storage_url, + storage_url, Arc::new(ObjectStoreMetrics::unused()), - true, "Meta Backup", ) .await, ); let store = Arc::new( - ObjectStoreMetaSnapshotStorage::new(&config.backup.storage_directory, backup_object_store) - .await?, + ObjectStoreMetaSnapshotStorage::new(storage_directory, backup_object_store).await?, ); Ok(store) } diff --git a/src/storage/src/hummock/block_cache.rs b/src/storage/src/hummock/block_cache.rs index 180aa8c53e678..208466397a7ed 100644 --- a/src/storage/src/hummock/block_cache.rs +++ b/src/storage/src/hummock/block_cache.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/compactor/compaction_executor.rs b/src/storage/src/hummock/compactor/compaction_executor.rs index ba1ec04f186e2..c7336a61203b7 100644 --- a/src/storage/src/hummock/compactor/compaction_executor.rs +++ b/src/storage/src/hummock/compactor/compaction_executor.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/compactor/compaction_filter.rs b/src/storage/src/hummock/compactor/compaction_filter.rs index b413449a6a61d..dd53d0a2434d6 100644 --- a/src/storage/src/hummock/compactor/compaction_filter.rs +++ b/src/storage/src/hummock/compactor/compaction_filter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/compactor/compaction_utils.rs b/src/storage/src/hummock/compactor/compaction_utils.rs new file mode 100644 index 0000000000000..d06052d55b25a --- /dev/null +++ b/src/storage/src/hummock/compactor/compaction_utils.rs @@ -0,0 +1,249 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashSet; +use std::marker::PhantomData; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Arc; + +use itertools::Itertools; +use minstant::Instant; +use risingwave_common::constants::hummock::CompactionFilterFlag; +use risingwave_hummock_sdk::filter_key_extractor::FilterKeyExtractorImpl; +use risingwave_hummock_sdk::key::FullKey; +use risingwave_hummock_sdk::key_range::KeyRange; +use risingwave_hummock_sdk::prost_key_range::KeyRangeExt; +use risingwave_hummock_sdk::table_stats::TableStatsMap; +use risingwave_hummock_sdk::{HummockEpoch, KeyComparator}; +use risingwave_pb::hummock::{compact_task, CompactTask, KeyRange as KeyRange_vec, LevelType}; + +pub use super::context::CompactorContext; +use crate::hummock::compactor::{ + MultiCompactionFilter, StateCleanUpCompactionFilter, TtlCompactionFilter, +}; +use crate::hummock::multi_builder::TableBuilderFactory; +use crate::hummock::sstable::DEFAULT_ENTRY_SIZE; +use crate::hummock::{ + CachePolicy, FilterBuilder, HummockResult, MemoryLimiter, SstableBuilder, + SstableBuilderOptions, SstableIdManagerRef, SstableWriterFactory, SstableWriterOptions, +}; +use crate::monitor::StoreLocalStatistic; + +pub struct RemoteBuilderFactory { + pub sstable_id_manager: SstableIdManagerRef, + pub limiter: Arc, + pub options: SstableBuilderOptions, + pub policy: CachePolicy, + pub remote_rpc_cost: Arc, + pub filter_key_extractor: Arc, + pub sstable_writer_factory: W, + pub _phantom: PhantomData, +} + +#[async_trait::async_trait] +impl TableBuilderFactory for RemoteBuilderFactory { + type Filter = F; + type Writer = W::Writer; + + async fn open_builder(&mut self) -> HummockResult> { + // TODO: memory consumption may vary based on `SstableWriter`, `ObjectStore` and cache + let tracker = self + .limiter + .require_memory((self.options.capacity + self.options.block_capacity) as u64) + .await; + let timer = Instant::now(); + let table_id = self.sstable_id_manager.get_new_sst_id().await?; + let cost = (timer.elapsed().as_secs_f64() * 1000000.0).round() as u64; + self.remote_rpc_cost.fetch_add(cost, Ordering::Relaxed); + let writer_options = SstableWriterOptions { + capacity_hint: Some(self.options.capacity + self.options.block_capacity), + tracker: Some(tracker), + policy: self.policy, + }; + let writer = self + .sstable_writer_factory + .create_sst_writer(table_id, writer_options)?; + let builder = SstableBuilder::new( + table_id, + writer, + Self::Filter::create( + self.options.bloom_false_positive, + self.options.capacity / DEFAULT_ENTRY_SIZE + 1, + ), + self.options.clone(), + self.filter_key_extractor.clone(), + ); + Ok(builder) + } +} + +/// `CompactionStatistics` will count the results of each compact split +#[derive(Default, Debug)] +pub struct CompactionStatistics { + // to report per-table metrics + pub delta_drop_stat: TableStatsMap, + + // to calculate delete ratio + pub iter_total_key_counts: u64, + pub iter_drop_key_counts: u64, +} + +impl CompactionStatistics { + #[allow(dead_code)] + fn delete_ratio(&self) -> Option { + if self.iter_total_key_counts == 0 { + return None; + } + + Some(self.iter_drop_key_counts / self.iter_total_key_counts) + } +} + +#[derive(Clone)] +pub struct TaskConfig { + pub key_range: KeyRange, + pub cache_policy: CachePolicy, + pub gc_delete_keys: bool, + pub watermark: u64, + /// `stats_target_table_ids` decides whether a dropped key should be counted as table stats + /// change. For an divided SST as input, a dropped key shouldn't be counted if its table id + /// doesn't belong to this divided SST. See `Compactor::compact_and_build_sst`. + pub stats_target_table_ids: Option>, + pub task_type: compact_task::TaskType, +} + +pub fn estimate_memory_use_for_compaction(task: &CompactTask) -> u64 { + let mut total_memory_size = 0; + for level in &task.input_ssts { + if level.level_type == LevelType::Nonoverlapping as i32 { + if let Some(table) = level.table_infos.first() { + total_memory_size += table.file_size * task.splits.len() as u64; + } + } else { + for table in &level.table_infos { + total_memory_size += table.file_size; + } + } + } + total_memory_size +} + +pub fn build_multi_compaction_filter(compact_task: &CompactTask) -> MultiCompactionFilter { + use risingwave_common::catalog::TableOption; + let mut multi_filter = MultiCompactionFilter::default(); + let compaction_filter_flag = + CompactionFilterFlag::from_bits(compact_task.compaction_filter_mask).unwrap_or_default(); + if compaction_filter_flag.contains(CompactionFilterFlag::STATE_CLEAN) { + let state_clean_up_filter = Box::new(StateCleanUpCompactionFilter::new( + HashSet::from_iter(compact_task.existing_table_ids.clone()), + )); + + multi_filter.register(state_clean_up_filter); + } + + if compaction_filter_flag.contains(CompactionFilterFlag::TTL) { + let id_to_ttl = compact_task + .table_options + .iter() + .filter(|id_to_option| { + let table_option: TableOption = id_to_option.1.into(); + table_option.retention_seconds.is_some() + }) + .map(|id_to_option| (*id_to_option.0, id_to_option.1.retention_seconds)) + .collect(); + + let ttl_filter = Box::new(TtlCompactionFilter::new( + id_to_ttl, + compact_task.current_epoch_time, + )); + multi_filter.register(ttl_filter); + } + + multi_filter +} + +pub async fn generate_splits(compact_task: &mut CompactTask, context: Arc) { + let sstable_infos = compact_task + .input_ssts + .iter() + .flat_map(|level| level.table_infos.iter()) + .collect_vec(); + + let compaction_size = compact_task + .input_ssts + .iter() + .flat_map(|level| level.table_infos.iter()) + .map(|table_info| table_info.file_size) + .sum::(); + + let sstable_size = (context.storage_opts.sstable_size_mb as u64) << 20; + if compaction_size > sstable_size * 2 { + let mut indexes = vec![]; + // preload the meta and get the smallest key to split sub_compaction + for sstable_info in sstable_infos { + indexes.extend( + context + .sstable_store + .sstable(sstable_info, &mut StoreLocalStatistic::default()) + .await + .unwrap() + .value() + .meta + .block_metas + .iter() + .map(|block| { + let data_size = block.len; + let full_key = FullKey { + user_key: FullKey::decode(&block.smallest_key).user_key, + epoch: HummockEpoch::MAX, + } + .encode(); + (data_size as u64, full_key) + }) + .collect_vec(), + ); + } + // sort by key, as for every data block has the same size; + indexes.sort_by(|a, b| KeyComparator::compare_encoded_full_key(a.1.as_ref(), b.1.as_ref())); + let mut splits: Vec = vec![]; + splits.push(KeyRange_vec::new(vec![], vec![])); + let parallelism = std::cmp::min( + indexes.len() as u64, + context.storage_opts.max_sub_compaction as u64, + ); + let sub_compaction_data_size = std::cmp::max(compaction_size / parallelism, sstable_size); + let parallelism = compaction_size / sub_compaction_data_size; + + if parallelism > 1 { + let mut last_buffer_size = 0; + let mut last_key: Vec = vec![]; + let mut remaining_size = indexes.iter().map(|block| block.0).sum::(); + for (data_size, key) in indexes { + if last_buffer_size >= sub_compaction_data_size + && !last_key.eq(&key) + && remaining_size > sstable_size + { + splits.last_mut().unwrap().right = key.clone(); + splits.push(KeyRange_vec::new(key.clone(), vec![])); + last_buffer_size = data_size; + } else { + last_buffer_size += data_size; + } + remaining_size -= data_size; + last_key = key; + } + compact_task.splits = splits; + } + } +} diff --git a/src/storage/src/hummock/compactor/compactor_runner.rs b/src/storage/src/hummock/compactor/compactor_runner.rs index ad04ef0ae3754..16ddb3b8c658e 100644 --- a/src/storage/src/hummock/compactor/compactor_runner.rs +++ b/src/storage/src/hummock/compactor/compactor_runner.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ use risingwave_hummock_sdk::key_range::{KeyRange, KeyRangeCommon}; use risingwave_pb::hummock::{CompactTask, LevelType}; use super::task_progress::TaskProgress; +use super::TaskConfig; use crate::hummock::compactor::iterator::ConcatSstableIterator; use crate::hummock::compactor::{CompactOutput, CompactionFilter, Compactor, CompactorContext}; use crate::hummock::iterator::{Forward, HummockIterator, UnorderedMergeIteratorInner}; @@ -42,15 +43,15 @@ pub struct CompactorRunner { } impl CompactorRunner { - pub fn new(split_index: usize, context: &CompactorContext, task: CompactTask) -> Self { - let max_target_file_size = context.context.options.sstable_size_mb as usize * (1 << 20); + pub fn new(split_index: usize, context: Arc, task: CompactTask) -> Self { + let max_target_file_size = context.storage_opts.sstable_size_mb as usize * (1 << 20); let total_file_size = task .input_ssts .iter() .flat_map(|level| level.table_infos.iter()) .map(|table| table.file_size) .sum::(); - let mut options: SstableBuilderOptions = context.context.options.as_ref().into(); + let mut options: SstableBuilderOptions = context.storage_opts.as_ref().into(); options.capacity = std::cmp::min(task.target_file_size as usize, max_target_file_size); options.compression_algorithm = match task.compression_algorithm { 0 => CompressionAlgorithm::None, @@ -76,20 +77,24 @@ impl CompactorRunner { .collect_vec() }) .collect(); + let compactor = Compactor::new( - context.context.clone(), + context.clone(), options, - key_range.clone(), - CachePolicy::NotFill, - task.gc_delete_keys, - task.watermark, - Some(stats_target_table_ids), + TaskConfig { + key_range: key_range.clone(), + cache_policy: CachePolicy::NotFill, + gc_delete_keys: task.gc_delete_keys, + watermark: task.watermark, + stats_target_table_ids: Some(stats_target_table_ids), + task_type: task.task_type(), + }, ); Self { compactor, compact_task: task, - sstable_store: context.context.sstable_store.clone(), + sstable_store: context.sstable_store.clone(), key_range, split_index, } @@ -103,7 +108,7 @@ impl CompactorRunner { task_progress: Arc, ) -> HummockResult { let iter = self.build_sst_iter()?; - let (ssts, table_stats_map) = self + let (ssts, compaction_stat) = self .compactor .compact_key_range( iter, @@ -113,7 +118,7 @@ impl CompactorRunner { Some(task_progress), ) .await?; - Ok((self.split_index, ssts, table_stats_map)) + Ok((self.split_index, ssts, compaction_stat)) } pub async fn build_delete_range_iter( diff --git a/src/storage/src/hummock/compactor/context.rs b/src/storage/src/hummock/compactor/context.rs index 79bf6d82b52a7..41252ea0033b1 100644 --- a/src/storage/src/hummock/compactor/context.rs +++ b/src/storage/src/hummock/compactor/context.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ use std::sync::Arc; -use risingwave_common::config::StorageConfig; use risingwave_hummock_sdk::compact::CompactorRuntimeConfig; use risingwave_hummock_sdk::filter_key_extractor::FilterKeyExtractorManagerRef; use risingwave_rpc_client::HummockMetaClient; @@ -24,12 +23,13 @@ use crate::hummock::compactor::CompactionExecutor; use crate::hummock::sstable_store::SstableStoreRef; use crate::hummock::{MemoryLimiter, SstableIdManagerRef}; use crate::monitor::CompactorMetrics; +use crate::opts::StorageOpts; /// A `CompactorContext` describes the context of a compactor. #[derive(Clone)] -pub struct Context { - /// Storage configurations. - pub options: Arc, +pub struct CompactorContext { + /// Storage options. + pub storage_opts: Arc, /// The meta client. pub hummock_meta_client: Arc, @@ -52,28 +52,32 @@ pub struct Context { pub sstable_id_manager: SstableIdManagerRef, pub task_progress_manager: TaskProgressManagerRef, + + pub compactor_runtime_config: Arc>, } -impl Context { +impl CompactorContext { pub fn new_local_compact_context( - options: Arc, + storage_opts: Arc, sstable_store: SstableStoreRef, hummock_meta_client: Arc, compactor_metrics: Arc, sstable_id_manager: SstableIdManagerRef, filter_key_extractor_manager: FilterKeyExtractorManagerRef, + compactor_runtime_config: CompactorRuntimeConfig, ) -> Self { - let compaction_executor = if options.share_buffer_compaction_worker_threads_number == 0 { + let compaction_executor = if storage_opts.share_buffer_compaction_worker_threads_number == 0 + { Arc::new(CompactionExecutor::new(None)) } else { Arc::new(CompactionExecutor::new(Some( - options.share_buffer_compaction_worker_threads_number as usize, + storage_opts.share_buffer_compaction_worker_threads_number as usize, ))) }; // not limit memory for local compact let memory_limiter = MemoryLimiter::unlimit(); - Context { - options, + Self { + storage_opts, hummock_meta_client, sstable_store, compactor_metrics, @@ -83,33 +87,11 @@ impl Context { read_memory_limiter: memory_limiter, sstable_id_manager, task_progress_manager: Default::default(), - } - } -} -#[derive(Clone)] -pub struct CompactorContext { - pub context: Arc, - config: Arc>, -} - -impl CompactorContext { - pub fn new(context: Arc) -> Self { - Self::with_config( - context, - CompactorRuntimeConfig { - max_concurrent_task_number: u64::MAX, - }, - ) - } - - pub fn with_config(context: Arc, config: CompactorRuntimeConfig) -> Self { - Self { - context, - config: Arc::new(tokio::sync::Mutex::new(config)), + compactor_runtime_config: Arc::new(tokio::sync::Mutex::new(compactor_runtime_config)), } } pub async fn lock_config(&self) -> tokio::sync::MutexGuard<'_, CompactorRuntimeConfig> { - self.config.lock().await + self.compactor_runtime_config.lock().await } } diff --git a/src/storage/src/hummock/compactor/iterator.rs b/src/storage/src/hummock/compactor/iterator.rs index 34a21cae8e647..79976dc420a67 100644 --- a/src/storage/src/hummock/compactor/iterator.rs +++ b/src/storage/src/hummock/compactor/iterator.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -42,6 +42,9 @@ struct SstableStreamIterator { /// Counts the time used for IO. stats_ptr: Arc, + + // For debugging + sstable_info: SstableInfo, } impl SstableStreamIterator { @@ -60,6 +63,7 @@ impl SstableStreamIterator { /// Initialises a new [`SstableStreamIterator`] which iterates over the given [`BlockStream`]. /// The iterator reads at most `max_block_count` from the stream. pub fn new( + sstable_info: &SstableInfo, block_stream: BlockStream, max_block_count: usize, stats: &StoreLocalStatistic, @@ -69,6 +73,7 @@ impl SstableStreamIterator { block_iter: None, remaining_blocks: max_block_count, stats_ptr: stats.remote_io_time.clone(), + sstable_info: sstable_info.clone(), } } @@ -152,18 +157,33 @@ impl SstableStreamIterator { } fn key(&self) -> &[u8] { - self.block_iter.as_ref().expect("no block iter").key() + self.block_iter + .as_ref() + .unwrap_or_else(|| panic!("no block iter sstinfo={}", self.sst_debug_info())) + .key() } fn value(&self) -> HummockValue<&[u8]> { - let raw_value = self.block_iter.as_ref().expect("no block iter").value(); - HummockValue::from_slice(raw_value).expect("decode error") + let raw_value = self + .block_iter + .as_ref() + .unwrap_or_else(|| panic!("no block iter sstinfo={}", self.sst_debug_info())) + .value(); + HummockValue::from_slice(raw_value) + .unwrap_or_else(|_| panic!("decode error sstinfo={}", self.sst_debug_info())) } fn is_valid(&self) -> bool { // True iff block_iter exists and is valid. self.block_iter.as_ref().map_or(false, |i| i.is_valid()) } + + fn sst_debug_info(&self) -> String { + format!( + "sst_id={}, meta_offset={}, table_ids={:?}", + self.sstable_info.id, self.sstable_info.meta_offset, self.sstable_info.table_ids + ) + } } /// Iterates over the KV-pairs of a given list of SSTs. The key-ranges of these SSTs are assumed to @@ -264,8 +284,12 @@ impl ConcatSstableIterator { let add = (now.elapsed().as_secs_f64() * 1000.0).ceil(); stats_ptr.fetch_add(add as u64, atomic::Ordering::Relaxed); - let mut sstable_iter = - SstableStreamIterator::new(block_stream, end_index - start_index, &self.stats); + let mut sstable_iter = SstableStreamIterator::new( + table_info, + block_stream, + end_index - start_index, + &self.stats, + ); sstable_iter.seek(seek_key).await?; self.sstable_iter = Some(sstable_iter); diff --git a/src/storage/src/hummock/compactor/mod.rs b/src/storage/src/hummock/compactor/mod.rs index e7ddac8302278..44749bc5412a1 100644 --- a/src/storage/src/hummock/compactor/mod.rs +++ b/src/storage/src/hummock/compactor/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ mod compaction_executor; mod compaction_filter; +mod compaction_utils; mod compactor_runner; mod context; mod iterator; @@ -21,119 +22,62 @@ mod shared_buffer_compact; pub(super) mod task_progress; use std::collections::{HashMap, HashSet}; +use std::marker::PhantomData; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, Mutex}; -use std::time::{Duration, Instant}; +use std::time::Duration; pub use compaction_executor::CompactionExecutor; pub use compaction_filter::{ CompactionFilter, DummyCompactionFilter, MultiCompactionFilter, StateCleanUpCompactionFilter, TtlCompactionFilter, }; -pub use context::{CompactorContext, Context}; +pub use context::CompactorContext; use futures::future::try_join_all; use futures::{stream, StreamExt}; pub use iterator::ConcatSstableIterator; use itertools::Itertools; -use risingwave_common::constants::hummock::CompactionFilterFlag; use risingwave_hummock_sdk::compact::compact_task_to_string; use risingwave_hummock_sdk::filter_key_extractor::FilterKeyExtractorImpl; use risingwave_hummock_sdk::key::FullKey; -use risingwave_hummock_sdk::key_range::KeyRange; -use risingwave_hummock_sdk::prost_key_range::KeyRangeExt; use risingwave_hummock_sdk::table_stats::{add_table_stats_map, TableStats, TableStatsMap}; -use risingwave_hummock_sdk::{HummockEpoch, KeyComparator, LocalSstableInfo}; +use risingwave_hummock_sdk::LocalSstableInfo; use risingwave_pb::hummock::compact_task::TaskStatus; use risingwave_pb::hummock::subscribe_compact_tasks_response::Task; -use risingwave_pb::hummock::{ - CompactTask, CompactTaskProgress, KeyRange as KeyRange_vec, LevelType, - SubscribeCompactTasksResponse, -}; +use risingwave_pb::hummock::{CompactTask, CompactTaskProgress, SubscribeCompactTasksResponse}; use risingwave_rpc_client::HummockMetaClient; pub use shared_buffer_compact::compact; use tokio::sync::oneshot::{Receiver, Sender}; use tokio::task::JoinHandle; +pub use self::compaction_utils::{CompactionStatistics, RemoteBuilderFactory, TaskConfig}; use self::task_progress::TaskProgress; use super::multi_builder::CapacitySplitTableBuilder; -use super::{HummockResult, SstableBuilderOptions, SstableWriterOptions}; +use super::{HummockResult, SstableBuilderOptions, XorFilterBuilder}; +use crate::hummock::compactor::compaction_utils::{ + build_multi_compaction_filter, estimate_memory_use_for_compaction, generate_splits, +}; use crate::hummock::compactor::compactor_runner::CompactorRunner; use crate::hummock::compactor::task_progress::TaskProgressGuard; use crate::hummock::iterator::{Forward, HummockIterator}; use crate::hummock::multi_builder::{SplitTableOutput, TableBuilderFactory}; -use crate::hummock::utils::MemoryLimiter; use crate::hummock::vacuum::Vacuum; use crate::hummock::{ - validate_ssts, BatchSstableWriterFactory, CachePolicy, DeleteRangeAggregator, HummockError, - RangeTombstonesCollector, SstableBuilder, SstableIdManagerRef, SstableWriterFactory, - StreamingSstableWriterFactory, + validate_ssts, BatchSstableWriterFactory, DeleteRangeAggregator, HummockError, + RangeTombstonesCollector, SstableWriterFactory, StreamingSstableWriterFactory, }; use crate::monitor::{CompactorMetrics, StoreLocalStatistic}; -pub struct RemoteBuilderFactory { - sstable_id_manager: SstableIdManagerRef, - limiter: Arc, - options: SstableBuilderOptions, - policy: CachePolicy, - remote_rpc_cost: Arc, - filter_key_extractor: Arc, - sstable_writer_factory: F, -} - -#[async_trait::async_trait] -impl TableBuilderFactory for RemoteBuilderFactory { - type Writer = F::Writer; - - async fn open_builder(&self) -> HummockResult> { - // TODO: memory consumption may vary based on `SstableWriter`, `ObjectStore` and cache - let tracker = self - .limiter - .require_memory((self.options.capacity + self.options.block_capacity) as u64) - .await; - let timer = Instant::now(); - let table_id = self.sstable_id_manager.get_new_sst_id().await?; - let cost = (timer.elapsed().as_secs_f64() * 1000000.0).round() as u64; - self.remote_rpc_cost.fetch_add(cost, Ordering::Relaxed); - let writer_options = SstableWriterOptions { - capacity_hint: Some(self.options.capacity + self.options.block_capacity), - tracker: Some(tracker), - policy: self.policy, - }; - let writer = self - .sstable_writer_factory - .create_sst_writer(table_id, writer_options)?; - let builder = SstableBuilder::new( - table_id, - writer, - self.options.clone(), - self.filter_key_extractor.clone(), - ); - Ok(builder) - } -} - -#[derive(Clone)] -pub struct TaskConfig { - pub key_range: KeyRange, - pub cache_policy: CachePolicy, - pub gc_delete_keys: bool, - pub watermark: u64, - /// `stats_target_table_ids` decides whether a dropped key should be counted as table stats - /// change. For an divided SST as input, a dropped key shouldn't be counted if its table id - /// doesn't belong to this divided SST. See `Compactor::compact_and_build_sst`. - pub stats_target_table_ids: Option>, -} - /// Implementation of Hummock compaction. pub struct Compactor { /// The context of the compactor. - context: Arc, + context: Arc, task_config: TaskConfig, options: SstableBuilderOptions, get_id_time: Arc, } -pub type CompactOutput = (usize, Vec, TableStatsMap); +pub type CompactOutput = (usize, Vec, CompactionStatistics); impl Compactor { /// Handles a compaction task and reports its status to hummock manager. @@ -143,7 +87,7 @@ impl Compactor { mut compact_task: CompactTask, mut shutdown_rx: Receiver<()>, ) -> TaskStatus { - let context = compactor_context.context.clone(); + let context = compactor_context.clone(); // Set a watermark SST id to prevent full GC from accidentally deleting SSTs for in-progress // write op. The watermark is invalidated when this method exits. let tracker_id = match context.sstable_id_manager.add_watermark_sst_id(None).await { @@ -236,7 +180,7 @@ impl Compactor { TaskProgressGuard::new(compact_task.task_id, context.task_progress_manager.clone()); let delete_range_agg = match CompactorRunner::build_delete_range_iter( &compact_task, - &compactor_context.context.sstable_store, + &compactor_context.sstable_store, &mut multi_filter, ) .await @@ -251,11 +195,8 @@ impl Compactor { for (split_index, _) in compact_task.splits.iter().enumerate() { let filter = multi_filter.clone(); let multi_filter_key_extractor = multi_filter_key_extractor.clone(); - let compactor_runner = CompactorRunner::new( - split_index, - compactor_context.as_ref(), - compact_task.clone(), - ); + let compactor_runner = + CompactorRunner::new(split_index, compactor_context.clone(), compact_task.clone()); let del_agg = delete_range_agg.clone(); let task_progress = task_progress_guard.progress.clone(); let handle = tokio::spawn(async move { @@ -276,8 +217,8 @@ impl Compactor { } future_result = buffered.next() => { match future_result { - Some(Ok(Ok((split_index, ssts, table_stats_map)))) => { - output_ssts.push((split_index, ssts, table_stats_map)); + Some(Ok(Ok((split_index, ssts, compact_stat)))) => { + output_ssts.push((split_index, ssts, compact_stat)); } Some(Ok(Err(e))) => { task_status = TaskStatus::ExecuteFailed; @@ -304,7 +245,9 @@ impl Compactor { } // Sort by split/key range index. - output_ssts.sort_by_key(|(split_index, ..)| *split_index); + if !output_ssts.is_empty() { + output_ssts.sort_by_key(|(split_index, ..)| *split_index); + } sync_point::sync_point!("BEFORE_COMPACT_REPORT"); // After a compaction is done, mutate the compaction task. @@ -328,7 +271,7 @@ impl Compactor { /// Fill in the compact task and let hummock manager know the compaction output ssts. async fn compact_done( compact_task: &mut CompactTask, - context: Arc, + context: Arc, output_ssts: Vec, task_status: TaskStatus, ) { @@ -338,8 +281,15 @@ impl Compactor { .sorted_output_ssts .reserve(compact_task.splits.len()); let mut compaction_write_bytes = 0; - for (_, ssts, table_stats_change) in output_ssts { - add_table_stats_map(&mut table_stats_map, &table_stats_change); + for ( + _, + ssts, + CompactionStatistics { + delta_drop_stat, .. + }, + ) in output_ssts + { + add_table_stats_map(&mut table_stats_map, &delta_drop_stat); for sst_info in ssts { compaction_write_bytes += sst_info.file_size(); compact_task.sorted_output_ssts.push(sst_info.sst_info); @@ -382,7 +332,7 @@ impl Compactor { type CompactionShutdownMap = Arc>>>; let (shutdown_tx, mut shutdown_rx) = tokio::sync::oneshot::channel(); let stream_retry_interval = Duration::from_secs(60); - let task_progress = compactor_context.context.task_progress_manager.clone(); + let task_progress = compactor_context.task_progress_manager.clone(); let task_progress_update_interval = Duration::from_millis(1000); let join_handle = tokio::spawn(async move { let shutdown_map = CompactionShutdownMap::default(); @@ -419,7 +369,7 @@ impl Compactor { }; drop(config); - let executor = compactor_context.context.compaction_executor.clone(); + let executor = compactor_context.compaction_executor.clone(); // This inner loop is to consume stream or report task progress. 'consume_stream: loop { let message = tokio::select! { @@ -471,7 +421,7 @@ impl Compactor { Task::VacuumTask(vacuum_task) => { Vacuum::vacuum( vacuum_task, - context.context.sstable_store.clone(), + context.sstable_store.clone(), meta_client, ) .await; @@ -479,7 +429,7 @@ impl Compactor { Task::FullScanTask(full_scan_task) => { Vacuum::full_scan( full_scan_task, - context.context.sstable_store.clone(), + context.sstable_store.clone(), meta_client, ) .await; @@ -487,7 +437,7 @@ impl Compactor { Task::ValidationTask(validation_task) => { validate_ssts( validation_task, - context.context.sstable_store.clone(), + context.sstable_store.clone(), ) .await; } @@ -535,7 +485,7 @@ impl Compactor { compactor_metrics: Arc, mut iter: impl HummockIterator, mut compaction_filter: impl CompactionFilter, - ) -> HummockResult + ) -> HummockResult where F: TableBuilderFactory, { @@ -566,8 +516,10 @@ impl Compactor { let mut table_stats_drop = TableStatsMap::default(); let mut last_table_stats = TableStats::default(); let mut last_table_id = None; + let mut compaction_statistics = CompactionStatistics::default(); while iter.is_valid() { let iter_key = iter.key(); + compaction_statistics.iter_total_key_counts += 1; let is_new_user_key = last_key.is_empty() || iter_key.user_key != last_key.user_key.as_ref(); @@ -619,6 +571,8 @@ impl Compactor { watermark_can_see_last_key = true; } if drop { + compaction_statistics.iter_drop_key_counts += 1; + let should_count = match task_config.stats_target_table_ids.as_ref() { Some(target_table_ids) => { target_table_ids.contains(&last_key.user_key.table_id.table_id) @@ -646,31 +600,23 @@ impl Compactor { } iter.collect_local_statistic(&mut local_stats); local_stats.report_compactor(compactor_metrics.as_ref()); - Ok(table_stats_drop) + compaction_statistics.delta_drop_stat = table_stats_drop; + + Ok(compaction_statistics) } } impl Compactor { /// Create a new compactor. pub fn new( - context: Arc, + context: Arc, options: SstableBuilderOptions, - key_range: KeyRange, - cache_policy: CachePolicy, - gc_delete_keys: bool, - watermark: u64, - stats_target_table_ids: Option>, + task_config: TaskConfig, ) -> Self { Self { context, options, - task_config: TaskConfig { - key_range, - cache_policy, - gc_delete_keys, - watermark, - stats_target_table_ids, - }, + task_config, get_id_time: Arc::new(AtomicU64::new(0)), } } @@ -686,7 +632,7 @@ impl Compactor { del_agg: Arc, filter_key_extractor: Arc, task_progress: Option>, - ) -> HummockResult<(Vec, TableStatsMap)> { + ) -> HummockResult<(Vec, CompactionStatistics)> { // Monitor time cost building shared buffer to SSTs. let compact_timer = if self.context.is_share_buffer_compact { self.context @@ -701,7 +647,7 @@ impl Compactor { }; let (split_table_outputs, table_stats_map) = if self.options.capacity as u64 - > self.context.options.min_sst_size_for_streaming_upload + > self.context.storage_opts.min_sst_size_for_streaming_upload { self.compact_key_range_impl( StreamingSstableWriterFactory::new(self.context.sstable_store.clone()), @@ -782,8 +728,8 @@ impl Compactor { del_agg: Arc, filter_key_extractor: Arc, task_progress: Option>, - ) -> HummockResult<(Vec, TableStatsMap)> { - let builder_factory = RemoteBuilderFactory { + ) -> HummockResult<(Vec, CompactionStatistics)> { + let builder_factory = RemoteBuilderFactory:: { sstable_id_manager: self.context.sstable_id_manager.clone(), limiter: self.context.read_memory_limiter.clone(), options: self.options.clone(), @@ -791,6 +737,7 @@ impl Compactor { remote_rpc_cost: self.get_id_time.clone(), filter_key_extractor, sstable_writer_factory: writer_factory, + _phantom: PhantomData, }; let mut sst_builder = CapacitySplitTableBuilder::new( @@ -800,7 +747,7 @@ impl Compactor { del_agg, self.task_config.key_range.clone(), ); - let table_stats_map = Compactor::compact_and_build_sst( + let compaction_statistics = Compactor::compact_and_build_sst( &mut sst_builder, &self.task_config, self.context.compactor_metrics.clone(), @@ -808,132 +755,9 @@ impl Compactor { compaction_filter, ) .await?; - let ssts = sst_builder.finish().await?; - Ok((ssts, table_stats_map)) - } -} -pub fn estimate_memory_use_for_compaction(task: &CompactTask) -> u64 { - let mut total_memory_size = 0; - for level in &task.input_ssts { - if level.level_type == LevelType::Nonoverlapping as i32 { - if let Some(table) = level.table_infos.first() { - total_memory_size += table.file_size * task.splits.len() as u64; - } - } else { - for table in &level.table_infos { - total_memory_size += table.file_size; - } - } - } - total_memory_size -} - -fn build_multi_compaction_filter(compact_task: &CompactTask) -> MultiCompactionFilter { - use risingwave_common::catalog::TableOption; - let mut multi_filter = MultiCompactionFilter::default(); - let compaction_filter_flag = - CompactionFilterFlag::from_bits(compact_task.compaction_filter_mask).unwrap_or_default(); - if compaction_filter_flag.contains(CompactionFilterFlag::STATE_CLEAN) { - let state_clean_up_filter = Box::new(StateCleanUpCompactionFilter::new( - HashSet::from_iter(compact_task.existing_table_ids.clone()), - )); - - multi_filter.register(state_clean_up_filter); - } - - if compaction_filter_flag.contains(CompactionFilterFlag::TTL) { - let id_to_ttl = compact_task - .table_options - .iter() - .filter(|id_to_option| { - let table_option: TableOption = id_to_option.1.into(); - table_option.retention_seconds.is_some() - }) - .map(|id_to_option| (*id_to_option.0, id_to_option.1.retention_seconds)) - .collect(); - - let ttl_filter = Box::new(TtlCompactionFilter::new( - id_to_ttl, - compact_task.current_epoch_time, - )); - multi_filter.register(ttl_filter); - } - - multi_filter -} + let ssts = sst_builder.finish().await?; -async fn generate_splits(compact_task: &mut CompactTask, context: Arc) { - let sstable_infos = compact_task - .input_ssts - .iter() - .flat_map(|level| level.table_infos.iter()) - .collect_vec(); - - let compaction_size = compact_task - .input_ssts - .iter() - .flat_map(|level| level.table_infos.iter()) - .map(|table_info| table_info.file_size) - .sum::(); - - let sstable_size = (context.options.sstable_size_mb as u64) << 20; - if compaction_size > sstable_size * 2 { - let mut indexes = vec![]; - // preload the meta and get the smallest key to split sub_compaction - for sstable_info in sstable_infos { - indexes.extend( - context - .sstable_store - .sstable(sstable_info, &mut StoreLocalStatistic::default()) - .await - .unwrap() - .value() - .meta - .block_metas - .iter() - .map(|block| { - let data_size = block.len; - let full_key = FullKey { - user_key: FullKey::decode(&block.smallest_key).user_key, - epoch: HummockEpoch::MAX, - } - .encode(); - (data_size as u64, full_key) - }) - .collect_vec(), - ); - } - // sort by key, as for every data block has the same size; - indexes.sort_by(|a, b| KeyComparator::compare_encoded_full_key(a.1.as_ref(), b.1.as_ref())); - let mut splits: Vec = vec![]; - splits.push(KeyRange_vec::new(vec![], vec![])); - let parallelism = std::cmp::min( - indexes.len() as u64, - context.options.max_sub_compaction as u64, - ); - let sub_compaction_data_size = std::cmp::max(compaction_size / parallelism, sstable_size); - let parallelism = compaction_size / sub_compaction_data_size; - - if parallelism > 1 { - let mut last_buffer_size = 0; - let mut last_key: Vec = vec![]; - let mut remaining_size = indexes.iter().map(|block| block.0).sum::(); - for (data_size, key) in indexes { - if last_buffer_size >= sub_compaction_data_size - && !last_key.eq(&key) - && remaining_size > sstable_size - { - splits.last_mut().unwrap().right = key.clone(); - splits.push(KeyRange_vec::new(key.clone(), vec![])); - last_buffer_size = data_size; - } else { - last_buffer_size += data_size; - } - remaining_size -= data_size; - last_key = key; - } - compact_task.splits = splits; - } + Ok((ssts, compaction_statistics)) } } diff --git a/src/storage/src/hummock/compactor/shared_buffer_compact.rs b/src/storage/src/hummock/compactor/shared_buffer_compact.rs index 78edd969d8221..49a4b9ac958b5 100644 --- a/src/storage/src/hummock/compactor/shared_buffer_compact.rs +++ b/src/storage/src/hummock/compactor/shared_buffer_compact.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,9 +25,10 @@ use risingwave_hummock_sdk::filter_key_extractor::FilterKeyExtractorImpl; use risingwave_hummock_sdk::key::{FullKey, UserKey}; use risingwave_hummock_sdk::key_range::KeyRange; use risingwave_hummock_sdk::{CompactionGroupId, HummockEpoch, LocalSstableInfo}; +use risingwave_pb::hummock::compact_task; use crate::hummock::compactor::compaction_filter::DummyCompactionFilter; -use crate::hummock::compactor::context::Context; +use crate::hummock::compactor::context::CompactorContext; use crate::hummock::compactor::{CompactOutput, Compactor}; use crate::hummock::iterator::{Forward, HummockIterator}; use crate::hummock::shared_buffer::shared_buffer_uploader::UploadTaskPayload; @@ -44,7 +45,7 @@ const GC_WATERMARK_FOR_FLUSH: u64 = 0; /// Flush shared buffer to level0. Resulted SSTs are grouped by compaction group. pub async fn compact( - context: Arc, + context: Arc, payload: UploadTaskPayload, compaction_group_index: Arc>, ) -> HummockResult> { @@ -108,7 +109,7 @@ pub async fn compact( /// For compaction from shared buffer to level 0, this is the only function gets called. async fn compact_shared_buffer( - context: Arc, + context: Arc, payload: UploadTaskPayload, ) -> HummockResult> { // Local memory compaction looks at all key ranges. @@ -145,9 +146,9 @@ async fn compact_shared_buffer( splits.last_mut().unwrap().right = key_before_last.clone(); splits.push(KeyRange::new(key_before_last.clone(), Bytes::new())); }; - let sstable_size = (context.options.sstable_size_mb as u64) << 20; + let sstable_size = (context.storage_opts.sstable_size_mb as u64) << 20; let parallelism = std::cmp::min( - context.options.share_buffers_sync_parallelism as u64, + context.storage_opts.share_buffers_sync_parallelism as u64, size_and_start_user_keys.len() as u64, ); let sub_compaction_data_size = if compact_data_size > sstable_size && parallelism > 1 { @@ -293,19 +294,22 @@ impl SharedBufferCompactRunner { pub fn new( split_index: usize, key_range: KeyRange, - context: Arc, + context: Arc, sub_compaction_sstable_size: usize, ) -> Self { - let mut options: SstableBuilderOptions = context.options.as_ref().into(); + let mut options: SstableBuilderOptions = context.storage_opts.as_ref().into(); options.capacity = sub_compaction_sstable_size; let compactor = Compactor::new( context, options, - key_range, - CachePolicy::Fill, - GC_DELETE_KEYS_FOR_FLUSH, - GC_WATERMARK_FOR_FLUSH, - None, + super::TaskConfig { + key_range, + cache_policy: CachePolicy::Fill, + gc_delete_keys: GC_DELETE_KEYS_FOR_FLUSH, + watermark: GC_WATERMARK_FOR_FLUSH, + stats_target_table_ids: None, + task_type: compact_task::TaskType::SharedBuffer, + }, ); Self { compactor, diff --git a/src/storage/src/hummock/compactor/task_progress.rs b/src/storage/src/hummock/compactor/task_progress.rs index 8ba18d30cd3cf..2f4f0283a56bf 100644 --- a/src/storage/src/hummock/compactor/task_progress.rs +++ b/src/storage/src/hummock/compactor/task_progress.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/conflict_detector.rs b/src/storage/src/hummock/conflict_detector.rs index b510e0e3e228b..419c81273cf0c 100644 --- a/src/storage/src/hummock/conflict_detector.rs +++ b/src/storage/src/hummock/conflict_detector.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,10 +19,10 @@ use std::sync::Arc; use bytes::Bytes; use crossbeam::atomic::AtomicCell; use dashmap::DashMap; -use risingwave_common::config::StorageConfig; use crate::hummock::value::HummockValue; use crate::hummock::HummockEpoch; +use crate::opts::StorageOpts; pub struct ConflictDetector { // epoch -> key-sets @@ -40,7 +40,7 @@ impl Default for ConflictDetector { } impl ConflictDetector { - pub fn new_from_config(options: &StorageConfig) -> Option> { + pub fn new_from_config(options: &StorageOpts) -> Option> { if options.write_conflict_detection_enabled { Some(Arc::new(ConflictDetector::default())) } else { @@ -145,7 +145,6 @@ mod test { detector.check_conflict_and_track_write_batch( (0..2) .map(|_| (Bytes::from("conflicted-key"), HummockValue::Delete)) - .into_iter() .collect_vec() .as_slice(), 233, diff --git a/src/storage/src/hummock/error.rs b/src/storage/src/hummock/error.rs index 166b09b5530d5..4690d3e562345 100644 --- a/src/storage/src/hummock/error.rs +++ b/src/storage/src/hummock/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ use std::backtrace::Backtrace; use risingwave_object_store::object::ObjectError; use thiserror::Error; +use tokio::sync::oneshot::error::RecvError; #[derive(Error, Debug)] enum HummockErrorInner { @@ -44,7 +45,7 @@ enum HummockErrorInner { SharedBufferError(String), #[error("Wait epoch error {0}.")] WaitEpoch(String), - #[error("Read current epoch error {0}.")] + #[error("ReadCurrentEpoch error {0}.")] ReadCurrentEpoch(String), #[error("Expired Epoch: watermark {safe_epoch}, epoch {epoch}.")] ExpiredEpoch { safe_epoch: u64, epoch: u64 }, @@ -170,6 +171,12 @@ impl From for HummockError { } } +impl From for HummockError { + fn from(error: RecvError) -> Self { + ObjectError::from(error).into() + } +} + impl std::fmt::Debug for HummockError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use std::error::Error; diff --git a/src/storage/src/hummock/event_handler/hummock_event_handler.rs b/src/storage/src/hummock/event_handler/hummock_event_handler.rs index c58fdf38236bd..d8cee044b406b 100644 --- a/src/storage/src/hummock/event_handler/hummock_event_handler.rs +++ b/src/storage/src/hummock/event_handler/hummock_event_handler.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,19 +18,19 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use arc_swap::ArcSwap; +use async_stack_trace::StackTrace; use futures::future::{select, Either}; use futures::FutureExt; use parking_lot::RwLock; -use risingwave_common::config::StorageConfig; use risingwave_hummock_sdk::compaction_group::hummock_version_ext::HummockVersionUpdateExt; -use risingwave_hummock_sdk::{HummockEpoch, LocalSstableInfo}; -use risingwave_pb::hummock::pin_version_response::Payload; +use risingwave_hummock_sdk::{info_in_release, HummockEpoch, LocalSstableInfo}; +use risingwave_pb::hummock::version_update_payload::Payload; use tokio::spawn; use tokio::sync::{mpsc, oneshot}; use tracing::{error, info}; use super::{LocalInstanceGuard, LocalInstanceId, ReadVersionMappingType}; -use crate::hummock::compactor::{compact, Context}; +use crate::hummock::compactor::{compact, CompactorContext}; use crate::hummock::conflict_detector::ConflictDetector; use crate::hummock::event_handler::uploader::{ HummockUploader, UploadTaskInfo, UploadTaskPayload, UploaderEvent, @@ -43,6 +43,7 @@ use crate::hummock::store::version::{ }; use crate::hummock::utils::validate_table_key_range; use crate::hummock::{HummockError, HummockResult, MemoryLimiter, SstableIdManagerRef, TrackerId}; +use crate::opts::StorageOpts; use crate::store::SyncResult; #[derive(Clone)] @@ -53,8 +54,8 @@ pub struct BufferTracker { } impl BufferTracker { - pub fn from_storage_config(config: &StorageConfig) -> Self { - let capacity = config.shared_buffer_capacity_mb as usize * (1 << 20); + pub fn from_storage_opts(config: &StorageOpts) -> Self { + let capacity = config.shared_buffer_capacity_mb * (1 << 20); let flush_threshold = capacity * 4 / 5; Self::new(capacity, flush_threshold) } @@ -69,7 +70,7 @@ impl BufferTracker { } pub fn for_test() -> Self { - Self::from_storage_config(&StorageConfig::default()) + Self::from_storage_opts(&StorageOpts::default()) } pub fn get_buffer_size(&self) -> usize { @@ -112,7 +113,7 @@ pub struct HummockEventHandler { async fn flush_imms( payload: UploadTaskPayload, task_info: UploadTaskInfo, - compactor_context: Arc, + compactor_context: Arc, ) -> HummockResult> { for epoch in &task_info.epochs { let _ = compactor_context @@ -131,6 +132,7 @@ async fn flush_imms( .collect(), task_info.compaction_group_index, ) + .verbose_stack_trace("shared_buffer_compact") .await } @@ -139,14 +141,15 @@ impl HummockEventHandler { hummock_event_tx: mpsc::UnboundedSender, hummock_event_rx: mpsc::UnboundedReceiver, pinned_version: PinnedVersion, - compactor_context: Arc, + compactor_context: Arc, ) -> Self { let (version_update_notifier_tx, _) = tokio::sync::watch::channel(pinned_version.max_committed_epoch()); let version_update_notifier_tx = Arc::new(version_update_notifier_tx); let read_version_mapping = Arc::new(RwLock::new(HashMap::default())); - let buffer_tracker = BufferTracker::from_storage_config(&compactor_context.options); - let write_conflict_detector = ConflictDetector::new_from_config(&compactor_context.options); + let buffer_tracker = BufferTracker::from_storage_opts(&compactor_context.storage_opts); + let write_conflict_detector = + ConflictDetector::new_from_config(&compactor_context.storage_opts); let sstable_id_manager = compactor_context.sstable_id_manager.clone(); let uploader = HummockUploader::new( pinned_version.clone(), @@ -208,6 +211,7 @@ impl HummockEventHandler { epoch: HummockEpoch, newly_uploaded_sstables: Vec, ) { + info_in_release!("epoch has been synced: {}.", epoch); if !newly_uploaded_sstables.is_empty() { newly_uploaded_sstables .into_iter() @@ -279,6 +283,7 @@ impl HummockEventHandler { new_sync_epoch: HummockEpoch, sync_result_sender: oneshot::Sender>, ) { + info_in_release!("receive await sync epoch: {}", new_sync_epoch); // The epoch to sync has been committed already. if new_sync_epoch <= self.uploader.max_committed_epoch() { send_sync_result( @@ -293,6 +298,11 @@ impl HummockEventHandler { } // The epoch has been synced if new_sync_epoch <= self.uploader.max_synced_epoch() { + info_in_release!( + "epoch {} has been synced. Current max_sync_epoch {}", + new_sync_epoch, + self.uploader.max_synced_epoch() + ); if let Some(result) = self.uploader.get_synced_data(new_sync_epoch) { let result = to_sync_result(result); send_sync_result(sync_result_sender, result); @@ -307,6 +317,12 @@ impl HummockEventHandler { return; } + info_in_release!( + "awaiting for epoch to be synced: {}, max_synced_epoch: {}", + new_sync_epoch, + self.uploader.max_synced_epoch() + ); + // If the epoch is not synced, we add to the `pending_sync_requests` anyway. If the epoch is // not a checkpoint epoch, it will be clear with the max synced epoch bumps up. if let Some(old_sync_result_sender) = self @@ -327,6 +343,12 @@ impl HummockEventHandler { } fn handle_clear(&mut self, notifier: oneshot::Sender<()>) { + info!( + "handle clear event. max_committed_epoch: {}, max_synced_epoch: {}, max_sealed_epoch: {}", + self.uploader.max_committed_epoch(), + self.uploader.max_synced_epoch(), + self.uploader.max_sealed_epoch(), + ); self.uploader.clear(); for (epoch, result_sender) in self.pending_sync_requests.drain_filter(|_, _| true) { @@ -403,6 +425,12 @@ impl HummockEventHandler { self.pinned_version.load().max_committed_epoch(), )); + info_in_release!( + "update to hummock version: {}, epoch: {}", + new_pinned_version.id(), + new_pinned_version.max_committed_epoch() + ); + self.uploader.update_pinned_version(new_pinned_version); } } @@ -475,6 +503,12 @@ impl HummockEventHandler { let instance_id = self.generate_instance_id(); + info_in_release!( + "new read version registered: table_id: {}, instance_id: {}", + table_id, + instance_id + ); + { let mut read_version_mapping_guard = self.read_version_mapping.write(); @@ -504,16 +538,24 @@ impl HummockEventHandler { table_id, instance_id, } => { + info_in_release!( + "read version deregister: table_id: {}, instance_id: {}", + table_id, + instance_id + ); let mut read_version_mapping_guard = self.read_version_mapping.write(); - read_version_mapping_guard + let entry = read_version_mapping_guard .get_mut(&table_id) .unwrap_or_else(|| { panic!( "DestroyHummockInstance table_id {} instance_id {} fail", table_id, instance_id ) - }) - .remove(&instance_id).unwrap_or_else(|| panic!("DestroyHummockInstance inexist instance table_id {} instance_id {}", table_id, instance_id)); + }); + entry.remove(&instance_id).unwrap_or_else(|| panic!("DestroyHummockInstance inexist instance table_id {} instance_id {}", table_id, instance_id)); + if entry.is_empty() { + read_version_mapping_guard.remove(&table_id); + } } } } diff --git a/src/storage/src/hummock/event_handler/mod.rs b/src/storage/src/hummock/event_handler/mod.rs index f647c62ae2757..f6e47c8762f37 100644 --- a/src/storage/src/hummock/event_handler/mod.rs +++ b/src/storage/src/hummock/event_handler/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ use std::sync::Arc; use parking_lot::RwLock; use risingwave_common::catalog::TableId; use risingwave_hummock_sdk::HummockEpoch; -use risingwave_pb::hummock::pin_version_response; +use risingwave_pb::hummock::version_update_payload; use tokio::sync::{mpsc, oneshot}; use crate::hummock::shared_buffer::shared_buffer_batch::SharedBufferBatch; @@ -57,7 +57,7 @@ pub enum HummockEvent { Shutdown, - VersionUpdate(pin_version_response::Payload), + VersionUpdate(version_update_payload::Payload), ImmToUploader(ImmutableMemtable), @@ -97,8 +97,8 @@ impl HummockEvent { HummockEvent::Shutdown => "Shutdown".to_string(), - HummockEvent::VersionUpdate(pin_version_response) => { - format!("VersionUpdate {:?}", pin_version_response) + HummockEvent::VersionUpdate(version_update_payload) => { + format!("VersionUpdate {:?}", version_update_payload) } HummockEvent::ImmToUploader(imm) => format!("ImmToUploader {:?}", imm), diff --git a/src/storage/src/hummock/event_handler/uploader.rs b/src/storage/src/hummock/event_handler/uploader.rs index d13fad94dd73d..31a8fbf145946 100644 --- a/src/storage/src/hummock/event_handler/uploader.rs +++ b/src/storage/src/hummock/event_handler/uploader.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,9 +27,9 @@ use futures::future::{try_join_all, TryJoinAll}; use futures::FutureExt; use itertools::Itertools; use risingwave_common::catalog::TableId; -use risingwave_hummock_sdk::{CompactionGroupId, HummockEpoch, LocalSstableInfo}; +use risingwave_hummock_sdk::{info_in_release, CompactionGroupId, HummockEpoch, LocalSstableInfo}; use tokio::task::JoinHandle; -use tracing::{error, warn}; +use tracing::error; use crate::hummock::event_handler::hummock_event_handler::BufferTracker; use crate::hummock::local_version::pinned_version::PinnedVersion; @@ -46,7 +46,7 @@ pub type SpawnUploadTask = Arc< + 'static, >; -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct UploadTaskInfo { pub task_size: usize, pub epochs: Vec, @@ -54,6 +54,16 @@ pub struct UploadTaskInfo { pub compaction_group_index: Arc>, } +impl Debug for UploadTaskInfo { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("UploadTaskInfo") + .field("task_size", &self.task_size) + .field("epochs", &self.epochs) + .field("imm_ids", &self.imm_ids) + .finish() + } +} + /// A wrapper for a uploading task that compacts and uploads the imm payload. Task context are /// stored so that when the task fails, it can be re-tried. struct UploadingTask { @@ -103,6 +113,7 @@ impl UploadingTask { .buffer_tracker .global_upload_task_size() .fetch_add(task_size, Relaxed); + info_in_release!("start upload task: {:?}", task_info); let join_handle = (context.spawn_upload_task)(payload.clone(), task_info.clone()); Self { payload, @@ -116,14 +127,17 @@ impl UploadingTask { /// Poll the result of the uploading task fn poll_result(&mut self, cx: &mut Context<'_>) -> Poll> { Poll::Ready(match ready!(self.join_handle.poll_unpin(cx)) { - Ok(task_result) => task_result.map(|ssts| { - StagingSstableInfo::new( - ssts, - self.task_info.epochs.clone(), - self.task_info.imm_ids.clone(), - self.task_info.task_size, - ) - }), + Ok(task_result) => task_result + .inspect(|_| info_in_release!("upload task finish {:?}", self.task_info)) + .map(|ssts| { + StagingSstableInfo::new( + ssts, + self.task_info.epochs.clone(), + self.task_info.imm_ids.clone(), + self.task_info.task_size, + ) + }), + Err(err) => Err(HummockError::other(format!( "fail to join upload join handle: {:?}", err @@ -138,7 +152,10 @@ impl UploadingTask { match result { Ok(sstables) => return Poll::Ready(sstables), Err(e) => { - error!("a flush task {:?} failed. {:?}", self.task_info, e); + error!( + "a flush task {:?} failed, start retry. Task info: {:?}", + self.task_info, e + ); self.join_handle = (self.spawn_upload_task)(self.payload.clone(), self.task_info.clone()); // It is important not to return Poll::pending here immediately, because the new @@ -402,6 +419,10 @@ impl HummockUploader { &self.context.buffer_tracker } + pub(crate) fn max_sealed_epoch(&self) -> HummockEpoch { + self.max_sealed_epoch + } + pub(crate) fn max_synced_epoch(&self) -> HummockEpoch { self.max_synced_epoch } @@ -431,6 +452,7 @@ impl HummockUploader { } pub(crate) fn seal_epoch(&mut self, epoch: HummockEpoch) { + info_in_release!("epoch {} is sealed", epoch); assert!( epoch > self.max_sealed_epoch, "sealing a sealed epoch {}. {}", @@ -452,12 +474,15 @@ impl HummockUploader { .expect("we have checked non-empty"); self.sealed_data.seal_new_epoch(epoch, unsealed_data); } else { - warn!("epoch {} to seal has no data", epoch); + info_in_release!("epoch {} to seal has no data", epoch); } + } else { + info_in_release!("epoch {} to seal has no data", epoch); } } pub(crate) fn start_sync_epoch(&mut self, epoch: HummockEpoch) { + info_in_release!("start sync epoch: {}", epoch); assert!( epoch > self.max_syncing_epoch, "the epoch {} has started syncing already: {}", diff --git a/src/storage/src/hummock/file_cache/alloc.rs b/src/storage/src/hummock/file_cache/alloc.rs index f49858068c541..c8b8f50224954 100644 --- a/src/storage/src/hummock/file_cache/alloc.rs +++ b/src/storage/src/hummock/file_cache/alloc.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/file_cache/buffer.rs b/src/storage/src/hummock/file_cache/buffer.rs index 295db0f162aa9..b44d07889e759 100644 --- a/src/storage/src/hummock/file_cache/buffer.rs +++ b/src/storage/src/hummock/file_cache/buffer.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/file_cache/cache.rs b/src/storage/src/hummock/file_cache/cache.rs index 57219d6187737..778a53826d612 100644 --- a/src/storage/src/hummock/file_cache/cache.rs +++ b/src/storage/src/hummock/file_cache/cache.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,8 +16,8 @@ use std::collections::hash_map::RandomState; use std::sync::Arc; use async_trait::async_trait; -use itertools::Itertools; use risingwave_common::cache::LruCache; +use risingwave_common::util::iter_util::ZipEqFast; use tokio::sync::Notify; use super::buffer::TwoLevelBuffer; @@ -101,8 +101,8 @@ where for ((key, encoded_value_len), slot) in keys .into_iter() - .zip_eq(encoded_value_lens.into_iter()) - .zip_eq(slots.into_iter()) + .zip_eq_fast(encoded_value_lens.into_iter()) + .zip_eq_fast(slots.into_iter()) { let hash = self.hash_builder.hash_one(&key); self.indices.insert( diff --git a/src/storage/src/hummock/file_cache/error.rs b/src/storage/src/hummock/file_cache/error.rs index 294dfe69d1787..d2b5274194363 100644 --- a/src/storage/src/hummock/file_cache/error.rs +++ b/src/storage/src/hummock/file_cache/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/file_cache/file.rs b/src/storage/src/hummock/file_cache/file.rs index 2bdc2e07c55e9..61e264f3044d2 100644 --- a/src/storage/src/hummock/file_cache/file.rs +++ b/src/storage/src/hummock/file_cache/file.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/file_cache/meta.rs b/src/storage/src/hummock/file_cache/meta.rs index 468d3904bd42f..44db5ec004c5e 100644 --- a/src/storage/src/hummock/file_cache/meta.rs +++ b/src/storage/src/hummock/file_cache/meta.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/file_cache/metrics.rs b/src/storage/src/hummock/file_cache/metrics.rs index 1d8a783d27171..f62648c6174a7 100644 --- a/src/storage/src/hummock/file_cache/metrics.rs +++ b/src/storage/src/hummock/file_cache/metrics.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/file_cache/mod.rs b/src/storage/src/hummock/file_cache/mod.rs index f3b86d5684cbe..d2f1470e863c3 100644 --- a/src/storage/src/hummock/file_cache/mod.rs +++ b/src/storage/src/hummock/file_cache/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/file_cache/store.rs b/src/storage/src/hummock/file_cache/store.rs index 613f40c07aa98..4b718c96d9958 100644 --- a/src/storage/src/hummock/file_cache/store.rs +++ b/src/storage/src/hummock/file_cache/store.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,12 +16,12 @@ use std::marker::PhantomData; use std::path::PathBuf; use std::sync::Arc; -use itertools::Itertools; use nix::sys::statfs::{ statfs, FsType as NixFsType, BTRFS_SUPER_MAGIC, EXT4_SUPER_MAGIC, TMPFS_MAGIC, }; use parking_lot::RwLock; use risingwave_common::cache::{LruCache, LruCacheEventListener}; +use risingwave_common::util::iter_util::ZipEqFast; use tokio::sync::RwLock as AsyncRwLock; use tracing::Instrument; @@ -200,7 +200,7 @@ where .instrument(tracing::trace_span!("meta_write_lock_update_slots")) .await; - for (key, bloc) in self.keys.iter().zip_eq(self.blocs.iter()) { + for (key, bloc) in self.keys.iter().zip_eq_fast(self.blocs.iter()) { slots.push(guard.insert(key, bloc)?); } diff --git a/src/storage/src/hummock/file_cache/test_utils.rs b/src/storage/src/hummock/file_cache/test_utils.rs index 51b017431bc5e..e49dbdef5c1bf 100644 --- a/src/storage/src/hummock/file_cache/test_utils.rs +++ b/src/storage/src/hummock/file_cache/test_utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/file_cache/utils.rs b/src/storage/src/hummock/file_cache/utils.rs index 889e5359cd8b4..9be2f0125f788 100644 --- a/src/storage/src/hummock/file_cache/utils.rs +++ b/src/storage/src/hummock/file_cache/utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/hummock_meta_client.rs b/src/storage/src/hummock/hummock_meta_client.rs index 6ab78d80a8011..2c4e0e8af1849 100644 --- a/src/storage/src/hummock/hummock_meta_client.rs +++ b/src/storage/src/hummock/hummock_meta_client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ use futures::stream::BoxStream; use risingwave_hummock_sdk::table_stats::TableStatsMap; use risingwave_hummock_sdk::{HummockSstableId, LocalSstableInfo, SstIdRange}; use risingwave_pb::hummock::{ - CompactTask, CompactTaskProgress, CompactionGroup, HummockSnapshot, HummockVersion, VacuumTask, + CompactTask, CompactTaskProgress, HummockSnapshot, HummockVersion, VacuumTask, }; use risingwave_rpc_client::error::Result; use risingwave_rpc_client::{CompactTaskItem, HummockMetaClient, MetaClient}; @@ -141,10 +141,6 @@ impl HummockMetaClient for MonitoredHummockMetaClient { self.meta_client.report_vacuum_task(vacuum_task).await } - async fn get_compaction_groups(&self) -> Result> { - self.meta_client.get_compaction_groups().await - } - async fn trigger_manual_compaction( &self, compaction_group_id: u64, diff --git a/src/storage/src/hummock/iterator/backward_concat.rs b/src/storage/src/hummock/iterator/backward_concat.rs index 89d1dbc9f4c2b..a466f0554807a 100644 --- a/src/storage/src/hummock/iterator/backward_concat.rs +++ b/src/storage/src/hummock/iterator/backward_concat.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/iterator/backward_merge.rs b/src/storage/src/hummock/iterator/backward_merge.rs index 0137b63c331a3..ebc2517d14cd3 100644 --- a/src/storage/src/hummock/iterator/backward_merge.rs +++ b/src/storage/src/hummock/iterator/backward_merge.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/iterator/backward_user.rs b/src/storage/src/hummock/iterator/backward_user.rs index a6dfcc5176ec3..b6de94bc59d28 100644 --- a/src/storage/src/hummock/iterator/backward_user.rs +++ b/src/storage/src/hummock/iterator/backward_user.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -1017,7 +1017,7 @@ mod tests { let begin_key: usize = rng.gen_range(1..=end_key); let begin_key_bytes = key_from_num(begin_key); chaos_test_case( - clone_sst(&sst), + sst.clone(), Unbounded, Unbounded, &truth, @@ -1038,7 +1038,7 @@ mod tests { let begin_key: usize = rng.gen_range(1..=end_key); let begin_key_bytes = key_from_num(begin_key); chaos_test_case( - clone_sst(&sst), + sst.clone(), Unbounded, Included(end_key_bytes.clone()), &truth, @@ -1059,7 +1059,7 @@ mod tests { let begin_key: usize = rng.gen_range(1..=end_key); let begin_key_bytes = key_from_num(begin_key); chaos_test_case( - clone_sst(&sst), + sst.clone(), Included(begin_key_bytes.clone()), Unbounded, &truth, @@ -1080,7 +1080,7 @@ mod tests { let begin_key: usize = rng.gen_range(1..=end_key); let begin_key_bytes = key_from_num(begin_key); chaos_test_case( - clone_sst(&sst), + sst.clone(), Excluded(begin_key_bytes.clone()), Unbounded, &truth, @@ -1101,7 +1101,7 @@ mod tests { let begin_key: usize = rng.gen_range(1..=end_key); let begin_key_bytes = key_from_num(begin_key); chaos_test_case( - clone_sst(&sst), + sst.clone(), Included(begin_key_bytes.clone()), Included(end_key_bytes.clone()), &truth, @@ -1122,7 +1122,7 @@ mod tests { let begin_key: usize = rng.gen_range(1..=end_key); let begin_key_bytes = key_from_num(begin_key); chaos_test_case( - clone_sst(&sst), + sst.clone(), Excluded(begin_key_bytes), Included(end_key_bytes), &truth, @@ -1132,13 +1132,6 @@ mod tests { } } - fn clone_sst(sst: &Sstable) -> Sstable { - Sstable { - id: sst.id, - meta: sst.meta.clone(), - } - } - #[tokio::test] async fn test_min_epoch() { let sstable_store = mock_sstable_store(); diff --git a/src/storage/src/hummock/iterator/concat_inner.rs b/src/storage/src/hummock/iterator/concat_inner.rs index bf2b5fd49d096..ed90916485550 100644 --- a/src/storage/src/hummock/iterator/concat_inner.rs +++ b/src/storage/src/hummock/iterator/concat_inner.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/iterator/delete_range_iterator.rs b/src/storage/src/hummock/iterator/delete_range_iterator.rs index c7be7c3e97e9a..29e7f2aebeecf 100644 --- a/src/storage/src/hummock/iterator/delete_range_iterator.rs +++ b/src/storage/src/hummock/iterator/delete_range_iterator.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/iterator/forward_concat.rs b/src/storage/src/hummock/iterator/forward_concat.rs index 8ae9e324f9501..a5d54039a81f6 100644 --- a/src/storage/src/hummock/iterator/forward_concat.rs +++ b/src/storage/src/hummock/iterator/forward_concat.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/iterator/forward_merge.rs b/src/storage/src/hummock/iterator/forward_merge.rs index acfbfb4fc2e96..a1e43bde20694 100644 --- a/src/storage/src/hummock/iterator/forward_merge.rs +++ b/src/storage/src/hummock/iterator/forward_merge.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/iterator/forward_user.rs b/src/storage/src/hummock/iterator/forward_user.rs index f9a831fb45132..37c5c3926b707 100644 --- a/src/storage/src/hummock/iterator/forward_user.rs +++ b/src/storage/src/hummock/iterator/forward_user.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/iterator/merge_inner.rs b/src/storage/src/hummock/iterator/merge_inner.rs index 81a87cbbfa928..de525644c3a9b 100644 --- a/src/storage/src/hummock/iterator/merge_inner.rs +++ b/src/storage/src/hummock/iterator/merge_inner.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,7 +18,6 @@ use std::future::Future; use std::ops::{Deref, DerefMut}; use risingwave_hummock_sdk::key::{FullKey, TableKey, UserKey}; -use tracing::warn; use crate::hummock::iterator::{DirectionEnum, HummockIterator, HummockIteratorDirection}; use crate::hummock::value::HummockValue; @@ -261,7 +260,9 @@ impl<'a, T: Ord> Drop for PeekMutGuard<'a, T> { /// call `PeekMut::pop` on the `PeekMut` and recycle the node to the unused list. fn drop(&mut self) { if let Some(peek) = self.peek.take() { - warn!("PeekMut are dropped without used. May be caused by future cancellation"); + tracing::debug!( + "PeekMut are dropped without used. May be caused by future cancellation" + ); let top = PeekMut::pop(peek); self.unused.push_back(top); } diff --git a/src/storage/src/hummock/iterator/mod.rs b/src/storage/src/hummock/iterator/mod.rs index 734be1730bbbf..481aeaf2e1bca 100644 --- a/src/storage/src/hummock/iterator/mod.rs +++ b/src/storage/src/hummock/iterator/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/iterator/test_utils.rs b/src/storage/src/hummock/iterator/test_utils.rs index 2a434e51e7894..aaaa7a2b5ceef 100644 --- a/src/storage/src/hummock/iterator/test_utils.rs +++ b/src/storage/src/hummock/iterator/test_utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/local_version/local_version_impl.rs b/src/storage/src/hummock/local_version/local_version_impl.rs index 109539870e529..c3cca8b27e638 100644 --- a/src/storage/src/hummock/local_version/local_version_impl.rs +++ b/src/storage/src/hummock/local_version/local_version_impl.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/local_version/local_version_manager.rs b/src/storage/src/hummock/local_version/local_version_manager.rs index d3942c6911059..07b6b240e6330 100644 --- a/src/storage/src/hummock/local_version/local_version_manager.rs +++ b/src/storage/src/hummock/local_version/local_version_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,12 +22,12 @@ use risingwave_common::catalog::TableId; use risingwave_hummock_sdk::compaction_group::hummock_version_ext::HummockVersionUpdateExt; use risingwave_hummock_sdk::key::TableKey; use risingwave_hummock_sdk::CompactionGroupId; -use risingwave_pb::hummock::pin_version_response; -use risingwave_pb::hummock::pin_version_response::Payload; +use risingwave_pb::hummock::version_update_payload; +use risingwave_pb::hummock::version_update_payload::Payload; use tokio::task::JoinHandle; use tracing::{error, info}; -use crate::hummock::compactor::Context; +use crate::hummock::compactor::CompactorContext; use crate::hummock::event_handler::hummock_event_handler::BufferTracker; use crate::hummock::local_version::pinned_version::PinnedVersion; use crate::hummock::local_version::{LocalVersion, ReadVersion, SyncUncommittedDataStage}; @@ -57,7 +57,7 @@ pub struct LocalVersionManager { impl LocalVersionManager { pub fn new( pinned_version: PinnedVersion, - compactor_context: Arc, + compactor_context: Arc, buffer_tracker: BufferTracker, ) -> Arc { assert!(pinned_version.is_valid()); @@ -80,7 +80,7 @@ impl LocalVersionManager { /// being referenced by some readers. pub fn try_update_pinned_version( &self, - pin_resp_payload: pin_version_response::Payload, + pin_resp_payload: version_update_payload::Payload, ) -> Option { let old_version = self.local_version.read(); let new_version_id = match &pin_resp_payload { diff --git a/src/storage/src/hummock/local_version/mod.rs b/src/storage/src/hummock/local_version/mod.rs index 8ef94a7594372..9c6da23568b9f 100644 --- a/src/storage/src/hummock/local_version/mod.rs +++ b/src/storage/src/hummock/local_version/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/local_version/pinned_version.rs b/src/storage/src/hummock/local_version/pinned_version.rs index af49f2991d798..16eb80562c20f 100644 --- a/src/storage/src/hummock/local_version/pinned_version.rs +++ b/src/storage/src/hummock/local_version/pinned_version.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/local_version/upload_handle_manager.rs b/src/storage/src/hummock/local_version/upload_handle_manager.rs index ce1a7cb7d13e3..336cd23e7ec0a 100644 --- a/src/storage/src/hummock/local_version/upload_handle_manager.rs +++ b/src/storage/src/hummock/local_version/upload_handle_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/mod.rs b/src/storage/src/hummock/mod.rs index 749143b4882bd..4b577640f273e 100644 --- a/src/storage/src/hummock/mod.rs +++ b/src/storage/src/hummock/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,12 +21,12 @@ use std::sync::Arc; use arc_swap::ArcSwap; use bytes::Bytes; use risingwave_common::catalog::TableId; -use risingwave_common::config::StorageConfig; +use risingwave_hummock_sdk::compact::CompactorRuntimeConfig; use risingwave_hummock_sdk::key::{FullKey, TableKey}; use risingwave_hummock_sdk::{HummockEpoch, *}; #[cfg(any(test, feature = "test"))] use risingwave_pb::hummock::HummockVersion; -use risingwave_pb::hummock::{pin_version_response, SstableInfo}; +use risingwave_pb::hummock::{version_update_payload, SstableInfo}; use risingwave_rpc_client::HummockMetaClient; use tokio::sync::mpsc::{unbounded_channel, UnboundedSender}; use tokio::sync::watch; @@ -36,6 +36,7 @@ mod block_cache; pub use block_cache::*; use crate::hummock::store::state_store::LocalHummockStorage; +use crate::opts::StorageOpts; #[cfg(target_os = "linux")] pub mod file_cache; @@ -84,7 +85,7 @@ pub use self::sstable_store::*; use super::monitor::HummockStateStoreMetrics; use crate::error::StorageResult; use crate::hummock::backup_reader::{BackupReader, BackupReaderRef}; -use crate::hummock::compactor::Context; +use crate::hummock::compactor::CompactorContext; use crate::hummock::event_handler::hummock_event_handler::BufferTracker; use crate::hummock::event_handler::{HummockEvent, HummockEventHandler}; use crate::hummock::iterator::{ @@ -119,7 +120,7 @@ impl Drop for HummockStorageShutdownGuard { pub struct HummockStorage { hummock_event_sender: UnboundedSender, - context: Arc, + context: Arc, buffer_tracker: BufferTracker, @@ -147,7 +148,7 @@ impl HummockStorage { /// Creates a [`HummockStorage`]. #[allow(clippy::too_many_arguments)] pub async fn new( - options: Arc, + options: Arc, sstable_store: SstableStoreRef, backup_reader: BackupReaderRef, hummock_meta_client: Arc, @@ -176,7 +177,7 @@ impl HummockStorage { observer_manager.start().await; let hummock_version = match event_rx.recv().await { - Some(HummockEvent::VersionUpdate(pin_version_response::Payload::PinnedVersion(version))) => version, + Some(HummockEvent::VersionUpdate(version_update_payload::Payload::PinnedVersion(version))) => version, _ => unreachable!("the hummock observer manager is the first one to take the event tx. Should be full hummock version") }; @@ -187,13 +188,14 @@ impl HummockStorage { hummock_meta_client.clone(), )); - let compactor_context = Arc::new(Context::new_local_compact_context( + let compactor_context = Arc::new(CompactorContext::new_local_compact_context( options.clone(), sstable_store.clone(), hummock_meta_client.clone(), compactor_metrics.clone(), sstable_id_manager.clone(), filter_key_extractor_manager.clone(), + CompactorRuntimeConfig::default(), )); let seal_epoch = Arc::new(AtomicU64::new(pinned_version.max_committed_epoch())); @@ -279,7 +281,7 @@ impl HummockStorage { let version_id = version.id; self.hummock_event_sender .send(HummockEvent::VersionUpdate( - pin_version_response::Payload::PinnedVersion(version), + version_update_payload::Payload::PinnedVersion(version), )) .unwrap(); @@ -316,7 +318,7 @@ impl HummockStorage { /// Creates a [`HummockStorage`] with default stats. Should only be used by tests. pub async fn for_test( - options: Arc, + options: Arc, sstable_store: SstableStoreRef, hummock_meta_client: Arc, notification_client: impl NotificationClient, @@ -334,8 +336,8 @@ impl HummockStorage { .await } - pub fn options(&self) -> &Arc { - &self.context.options + pub fn storage_opts(&self) -> &Arc { + &self.context.storage_opts } pub fn version_reader(&self) -> &HummockVersionReader { @@ -348,7 +350,7 @@ pub async fn get_from_sstable_info( sstable_info: &SstableInfo, full_key: FullKey<&[u8]>, read_options: &ReadOptions, - dist_key_hash: Option, + dist_key_hash: Option, local_stats: &mut StoreLocalStatistic, ) -> HummockResult>> { let sstable = sstable_store_ref.sstable(sstable_info, local_stats).await?; @@ -411,16 +413,15 @@ pub async fn get_from_sstable_info( pub fn hit_sstable_bloom_filter( sstable_info_ref: &Sstable, - prefix_hash: u32, + prefix_hash: u64, local_stats: &mut StoreLocalStatistic, ) -> bool { local_stats.bloom_filter_check_counts += 1; - let surely_not_have = sstable_info_ref.surely_not_have_hashvalue(prefix_hash); - - if surely_not_have { - local_stats.bloom_filter_true_negative_count += 1; + let may_exist = sstable_info_ref.may_match_hash(prefix_hash); + if !may_exist { + local_stats.bloom_filter_true_negative_counts += 1; } - !surely_not_have + may_exist } /// Get `user_value` from `OrderSortedUncommittedData`. If not get successful, return None. @@ -433,10 +434,9 @@ pub async fn get_from_order_sorted_uncommitted_data( ) -> StorageResult<(Option>, i32)> { let mut table_counts = 0; let epoch = full_key.epoch; - let dist_key_hash = read_options - .prefix_hint - .as_ref() - .map(|dist_key| Sstable::hash_for_bloom_filter(dist_key.as_ref())); + let dist_key_hash = read_options.prefix_hint.as_ref().map(|dist_key| { + Sstable::hash_for_bloom_filter(dist_key.as_ref(), read_options.table_id.table_id()) + }); let min_epoch = gen_min_epoch(epoch, read_options.retention_seconds.as_ref()); @@ -496,7 +496,7 @@ pub fn get_from_batch( #[derive(Clone)] pub struct HummockStorageV1 { - options: Arc, + options: Arc, local_version_manager: LocalVersionManagerRef, @@ -521,7 +521,7 @@ pub struct HummockStorageV1 { impl HummockStorageV1 { /// Creates a [`HummockStorageV1`]. pub async fn new( - options: Arc, + options: Arc, sstable_store: SstableStoreRef, hummock_meta_client: Arc, notification_client: impl NotificationClient, @@ -552,7 +552,7 @@ impl HummockStorageV1 { observer_manager.start().await; let hummock_version = match event_rx.recv().await { - Some(HummockEvent::VersionUpdate(pin_version_response::Payload::PinnedVersion(version))) => version, + Some(HummockEvent::VersionUpdate(version_update_payload::Payload::PinnedVersion(version))) => version, _ => unreachable!("the hummock observer manager is the first one to take the event tx. Should be full hummock version") }; @@ -563,16 +563,17 @@ impl HummockStorageV1 { hummock_meta_client.clone(), )); - let compactor_context = Arc::new(Context::new_local_compact_context( + let compactor_context = Arc::new(CompactorContext::new_local_compact_context( options.clone(), sstable_store.clone(), hummock_meta_client.clone(), compactor_metrics.clone(), sstable_id_manager.clone(), filter_key_extractor_manager.clone(), + CompactorRuntimeConfig::default(), )); - let buffer_tracker = BufferTracker::from_storage_config(&options); + let buffer_tracker = BufferTracker::from_storage_opts(&options); let local_version_manager = LocalVersionManager::new(pinned_version.clone(), compactor_context, buffer_tracker); @@ -622,7 +623,7 @@ impl HummockStorageV1 { Ok(instance) } - pub fn options(&self) -> &Arc { + pub fn options(&self) -> &Arc { &self.options } diff --git a/src/storage/src/hummock/observer_manager.rs b/src/storage/src/hummock/observer_manager.rs index 542fa620cfe74..8f7c519c55ea3 100644 --- a/src/storage/src/hummock/observer_manager.rs +++ b/src/storage/src/hummock/observer_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ use risingwave_hummock_sdk::filter_key_extractor::{ FilterKeyExtractorImpl, FilterKeyExtractorManagerRef, }; use risingwave_pb::catalog::Table; -use risingwave_pb::hummock::pin_version_response; +use risingwave_pb::hummock::version_update_payload; use risingwave_pb::meta::subscribe_response::{Info, Operation}; use risingwave_pb::meta::SubscribeResponse; use tokio::sync::mpsc::UnboundedSender; @@ -64,7 +64,7 @@ impl ObserverState for HummockObserverNode { let _ = self .version_update_sender .send(HummockEvent::VersionUpdate( - pin_version_response::Payload::VersionDeltas(hummock_version_deltas), + version_update_payload::Payload::VersionDeltas(hummock_version_deltas), )) .inspect_err(|e| { tracing::error!("unable to send version delta: {:?}", e); @@ -96,7 +96,7 @@ impl ObserverState for HummockObserverNode { let _ = self .version_update_sender .send(HummockEvent::VersionUpdate( - pin_version_response::Payload::PinnedVersion( + version_update_payload::Payload::PinnedVersion( snapshot .hummock_version .expect("should get hummock version"), diff --git a/src/storage/src/hummock/shared_buffer/mod.rs b/src/storage/src/hummock/shared_buffer/mod.rs index f6d7674d85734..c3b8dbade3b4b 100644 --- a/src/storage/src/hummock/shared_buffer/mod.rs +++ b/src/storage/src/hummock/shared_buffer/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/shared_buffer/shared_buffer_batch.rs b/src/storage/src/hummock/shared_buffer/shared_buffer_batch.rs index 4b52d8c87ff3f..8ac4682d3dd85 100644 --- a/src/storage/src/hummock/shared_buffer/shared_buffer_batch.rs +++ b/src/storage/src/hummock/shared_buffer/shared_buffer_batch.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::Ordering; use std::fmt::Debug; use std::future::Future; use std::marker::PhantomData; @@ -23,7 +24,7 @@ use std::sync::{Arc, LazyLock}; use bytes::Bytes; use itertools::Itertools; use risingwave_common::catalog::TableId; -use risingwave_hummock_sdk::key::{FullKey, TableKey, UserKey}; +use risingwave_hummock_sdk::key::{FullKey, TableKey, TableKeyRange, UserKey}; use crate::hummock::iterator::{ Backward, DeleteRangeIterator, DirectionEnum, Forward, HummockIterator, @@ -194,6 +195,33 @@ impl SharedBufferBatch { .le(table_key.as_ref()) } + pub fn range_exists(&self, table_key_range: &TableKeyRange) -> bool { + self.inner + .binary_search_by(|m| { + let key = &m.0; + let too_left = match &table_key_range.0 { + std::ops::Bound::Included(range_start) => range_start.as_ref() > key.as_ref(), + std::ops::Bound::Excluded(range_start) => range_start.as_ref() >= key.as_ref(), + std::ops::Bound::Unbounded => false, + }; + if too_left { + return Ordering::Less; + } + + let too_right = match &table_key_range.1 { + std::ops::Bound::Included(range_end) => range_end.as_ref() < key.as_ref(), + std::ops::Bound::Excluded(range_end) => range_end.as_ref() <= key.as_ref(), + std::ops::Bound::Unbounded => false, + }; + if too_right { + return Ordering::Greater; + } + + Ordering::Equal + }) + .is_ok() + } + pub fn into_directed_iter(self) -> SharedBufferBatchIterator { SharedBufferBatchIterator::::new(self.inner, self.table_id, self.epoch) } @@ -498,7 +526,10 @@ impl DeleteRangeIterator for SharedBufferDeleteRangeIterator { #[cfg(test)] mod tests { + use std::ops::Bound::{Excluded, Included}; + use itertools::Itertools; + use risingwave_hummock_sdk::key::map_table_key_range; use super::*; use crate::hummock::iterator::test_utils::{ @@ -764,4 +795,48 @@ mod tests { .await .unwrap(); } + + #[tokio::test] + async fn test_shared_buffer_batch_range_existx() { + let epoch = 1; + let shared_buffer_items = vec![ + (Vec::from("a_1"), HummockValue::put(Bytes::from("value1"))), + (Vec::from("a_3"), HummockValue::put(Bytes::from("value2"))), + (Vec::from("a_5"), HummockValue::put(Bytes::from("value3"))), + (Vec::from("b_2"), HummockValue::put(Bytes::from("value3"))), + ]; + let shared_buffer_batch = SharedBufferBatch::for_test( + transform_shared_buffer(shared_buffer_items), + epoch, + Default::default(), + ); + + let range = (Included(Vec::from("a")), Excluded(Vec::from("b"))); + assert!(shared_buffer_batch.range_exists(&map_table_key_range(range))); + let range = (Included(Vec::from("a_")), Excluded(Vec::from("b_"))); + assert!(shared_buffer_batch.range_exists(&map_table_key_range(range))); + let range = (Included(Vec::from("a_1")), Included(Vec::from("a_1"))); + assert!(shared_buffer_batch.range_exists(&map_table_key_range(range))); + let range = (Included(Vec::from("a_1")), Included(Vec::from("a_2"))); + assert!(shared_buffer_batch.range_exists(&map_table_key_range(range))); + let range = (Included(Vec::from("a_0x")), Included(Vec::from("a_2x"))); + assert!(shared_buffer_batch.range_exists(&map_table_key_range(range))); + let range = (Included(Vec::from("a_")), Excluded(Vec::from("c_"))); + assert!(shared_buffer_batch.range_exists(&map_table_key_range(range))); + let range = (Included(Vec::from("b_0x")), Included(Vec::from("b_2x"))); + assert!(shared_buffer_batch.range_exists(&map_table_key_range(range))); + let range = (Included(Vec::from("b_2")), Excluded(Vec::from("c_1x"))); + assert!(shared_buffer_batch.range_exists(&map_table_key_range(range))); + + let range = (Included(Vec::from("a_0")), Excluded(Vec::from("a_1"))); + assert!(!shared_buffer_batch.range_exists(&map_table_key_range(range))); + let range = (Included(Vec::from("a__0")), Excluded(Vec::from("a__5"))); + assert!(!shared_buffer_batch.range_exists(&map_table_key_range(range))); + let range = (Included(Vec::from("b_1")), Excluded(Vec::from("b_2"))); + assert!(!shared_buffer_batch.range_exists(&map_table_key_range(range))); + let range = (Included(Vec::from("b_3")), Excluded(Vec::from("c_1"))); + assert!(!shared_buffer_batch.range_exists(&map_table_key_range(range))); + let range = (Included(Vec::from("b__x")), Excluded(Vec::from("c__x"))); + assert!(!shared_buffer_batch.range_exists(&map_table_key_range(range))); + } } diff --git a/src/storage/src/hummock/shared_buffer/shared_buffer_uploader.rs b/src/storage/src/hummock/shared_buffer/shared_buffer_uploader.rs index 9e992c6e81dcd..cd252bf8eaf92 100644 --- a/src/storage/src/hummock/shared_buffer/shared_buffer_uploader.rs +++ b/src/storage/src/hummock/shared_buffer/shared_buffer_uploader.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ use std::sync::Arc; use risingwave_common::catalog::TableId; use risingwave_hummock_sdk::{CompactionGroupId, HummockEpoch, LocalSstableInfo}; -use crate::hummock::compactor::{compact, Context}; +use crate::hummock::compactor::{compact, CompactorContext}; use crate::hummock::shared_buffer::OrderSortedUncommittedData; use crate::hummock::HummockResult; @@ -26,11 +26,11 @@ pub(crate) type UploadTaskPayload = OrderSortedUncommittedData; pub(crate) type UploadTaskResult = HummockResult>; pub struct SharedBufferUploader { - compactor_context: Arc, + compactor_context: Arc, } impl SharedBufferUploader { - pub fn new(compactor_context: Arc) -> Self { + pub fn new(compactor_context: Arc) -> Self { Self { compactor_context } } } diff --git a/src/storage/src/hummock/sstable/backward_sstable_iterator.rs b/src/storage/src/hummock/sstable/backward_sstable_iterator.rs index 511ad6f89e9f7..d8e55506309f4 100644 --- a/src/storage/src/hummock/sstable/backward_sstable_iterator.rs +++ b/src/storage/src/hummock/sstable/backward_sstable_iterator.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/sstable/block.rs b/src/storage/src/hummock/sstable/block.rs index bfb214153544b..a4dc167d83a94 100644 --- a/src/storage/src/hummock/sstable/block.rs +++ b/src/storage/src/hummock/sstable/block.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,10 +17,11 @@ use std::io::{Read, Write}; use std::ops::Range; use bytes::{Buf, BufMut, Bytes, BytesMut}; +use risingwave_hummock_sdk::key::MAX_KEY_LEN; use risingwave_hummock_sdk::KeyComparator; use {lz4, zstd}; -use super::utils::{bytes_diff, xxhash64_verify, CompressionAlgorithm}; +use super::utils::{bytes_diff_below_max_key_length, xxhash64_verify, CompressionAlgorithm}; use crate::hummock::sstable::utils::xxhash64_checksum; use crate::hummock::{HummockError, HummockResult}; @@ -150,13 +151,21 @@ pub struct KeyPrefix { impl KeyPrefix { pub fn encode(&self, buf: &mut impl BufMut) { buf.put_u16(self.overlap as u16); - buf.put_u16(self.diff as u16); + if self.diff >= MAX_KEY_LEN { + buf.put_u16(MAX_KEY_LEN as u16); + buf.put_u32(self.diff as u32); + } else { + buf.put_u16(self.diff as u16); + } buf.put_u32(self.value as u32); } pub fn decode(buf: &mut impl Buf, offset: usize) -> Self { let overlap = buf.get_u16() as usize; - let diff = buf.get_u16() as usize; + let mut diff = buf.get_u16() as usize; + if diff == MAX_KEY_LEN { + diff = buf.get_u32() as usize; + } let value = buf.get_u32() as usize; Self { overlap, @@ -168,7 +177,11 @@ impl KeyPrefix { /// Encoded length. fn len(&self) -> usize { - 2 + 2 + 4 + if self.diff >= MAX_KEY_LEN { + 12 // 2 + 2 + 4 + 4 + } else { + 8 // 2 + 2 + 4 + } } /// Gets overlap len. @@ -249,7 +262,10 @@ impl BlockBuilder { /// # Format /// /// ```plain - /// entry (kv pair): | overlap len (2B) | diff len (2B) | value len(4B) | diff key | value | + /// For diff len < MAX_KEY_LEN (65536) + /// entry (kv pair): | overlap len (2B) | diff len (2B) | value len(4B) | diff key | value | + /// For diff len >= MAX_KEY_LEN (65536) + /// entry (kv pair): | overlap len (2B) | MAX_KEY_LEN (2B) | diff len (4B) | value len(4B) | diff key | value | /// ``` /// /// # Panics @@ -268,7 +284,7 @@ impl BlockBuilder { self.restart_points.push(self.buf.len() as u32); key } else { - bytes_diff(&self.last_key, key) + bytes_diff_below_max_key_length(&self.last_key, key) }; let prefix = KeyPrefix { @@ -465,4 +481,39 @@ mod tests { buf.put_u64(!epoch); buf.freeze() } + + #[test] + fn test_block_enc_large_key() { + let options = BlockBuilderOptions::default(); + let mut builder = BlockBuilder::new(options); + let medium_key = vec![b'a'; MAX_KEY_LEN - 500]; + let large_key = vec![b'b'; MAX_KEY_LEN]; + let xlarge_key = vec![b'c'; MAX_KEY_LEN + 500]; + + builder.add(&full_key(&medium_key, 1), b"v1"); + builder.add(&full_key(&large_key, 2), b"v2"); + builder.add(&full_key(&xlarge_key, 3), b"v3"); + let capacity = builder.uncompressed_block_size(); + let buf = builder.build().to_vec(); + let block = Box::new(Block::decode(buf.into(), capacity).unwrap()); + let mut bi = BlockIterator::new(BlockHolder::from_owned_block(block)); + + bi.seek_to_first(); + assert!(bi.is_valid()); + assert_eq!(&full_key(&medium_key, 1)[..], bi.key()); + assert_eq!(b"v1", bi.value()); + + bi.next(); + assert!(bi.is_valid()); + assert_eq!(&full_key(&large_key, 2)[..], bi.key()); + assert_eq!(b"v2", bi.value()); + + bi.next(); + assert!(bi.is_valid()); + assert_eq!(&full_key(&xlarge_key, 3)[..], bi.key()); + assert_eq!(b"v3", bi.value()); + + bi.next(); + assert!(!bi.is_valid()); + } } diff --git a/src/storage/src/hummock/sstable/block_iterator.rs b/src/storage/src/hummock/sstable/block_iterator.rs index 2e51b2ab7ba68..26550a39648f2 100644 --- a/src/storage/src/hummock/sstable/block_iterator.rs +++ b/src/storage/src/hummock/sstable/block_iterator.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/sstable/bloom.rs b/src/storage/src/hummock/sstable/bloom.rs index 78a133aecfaa3..9f58d48ba8fbc 100644 --- a/src/storage/src/hummock/sstable/bloom.rs +++ b/src/storage/src/hummock/sstable/bloom.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,9 @@ use std::f64; use bytes::BufMut; +use super::filter::FilterBuilder; +use super::Sstable; + pub trait BitSlice { fn get_bit(&self, idx: usize) -> bool; fn bit_len(&self) -> usize; @@ -52,42 +55,114 @@ impl> BitSliceMut for T { } /// Bloom implements Bloom filter functionalities over a bit-slice of data. -pub struct Bloom<'a> { +#[allow(dead_code)] +#[derive(Clone)] +pub struct BloomFilterReader { /// data of filter in bits - filter: &'a [u8], + data: Vec, /// number of hash functions k: u8, } -impl<'a> Bloom<'a> { +impl BloomFilterReader { /// Creates a Bloom filter from a byte slice - pub fn new(buf: &'a [u8]) -> Self { - let filter = &buf[..buf.len() - 1]; + #[allow(dead_code)] + pub fn new(mut buf: Vec) -> Self { + if buf.len() <= 1 { + return Self { data: vec![], k: 0 }; + } let k = buf[buf.len() - 1]; - Self { filter, k } + buf.resize(buf.len() - 1, 0); + Self { data: buf, k } + } + + #[allow(dead_code)] + pub fn is_empty(&self) -> bool { + self.data.is_empty() } - /// Gets Bloom filter bits per key from entries count and FPR - pub fn bloom_bits_per_key(entries: usize, false_positive_rate: f64) -> usize { - let size = -1.0 * (entries as f64) * false_positive_rate.ln() / f64::consts::LN_2.powi(2); - let locs = (size / (entries as f64)).ceil(); - locs as usize + #[allow(dead_code)] + pub fn get_raw_data(&self) -> &[u8] { + &self.data + } + + /// Judges whether the hash value is in the table with the given false positive rate. + /// + /// Note: + /// - if the return value is false, then the table surely does not have the user key that has + /// the hash; + /// - if the return value is true, then the table may or may not have the user key that has + /// the hash actually, a.k.a. we don't know the answer. + #[allow(dead_code)] + pub fn may_match(&self, mut h: u32) -> bool { + if self.k > 30 || self.k == 00 { + // potential new encoding for short Bloom filters + true + } else { + let nbits = self.data.bit_len(); + let delta = (h >> 17) | (h << 15); + for _ in 0..self.k { + let bit_pos = h % (nbits as u32); + if !self.data.get_bit(bit_pos as usize) { + return false; + } + h = h.wrapping_add(delta); + } + true + } } +} + +pub struct BloomFilterBuilder { + key_hash_entries: Vec, + bits_per_key: usize, +} - /// Builds Bloom filter from key hashes - pub fn build_from_key_hashes(keys: &[u32], bits_per_key: usize) -> Vec { +impl BloomFilterBuilder { + pub fn new(bloom_false_positive: f64, capacity: usize) -> Self { + let key_hash_entries = if capacity > 0 { + Vec::with_capacity(capacity) + } else { + vec![] + }; + let bits_per_key = bloom_bits_per_key(capacity, bloom_false_positive); + Self { + key_hash_entries, + bits_per_key, + } + } +} + +/// Gets Bloom filter bits per key from entries count and FPR +pub fn bloom_bits_per_key(entries: usize, false_positive_rate: f64) -> usize { + let size = -1.0 * (entries as f64) * false_positive_rate.ln() / f64::consts::LN_2.powi(2); + let locs = (size / (entries as f64)).ceil(); + locs as usize +} + +impl FilterBuilder for BloomFilterBuilder { + fn add_key(&mut self, key: &[u8], table_id: u32) { + self.key_hash_entries + .push(Sstable::hash_for_bloom_filter_u32(key, table_id)); + } + + fn approximate_len(&self) -> usize { + self.key_hash_entries.len() * 4 + } + + fn finish(&mut self) -> Vec { // 0.69 is approximately ln(2) - let k = ((bits_per_key as f64) * 0.69) as u32; + let k = ((self.bits_per_key as f64) * 0.69) as u32; // limit k in [1, 30] let k = k.clamp(1, 30); // For small len(keys), we set a minimum Bloom filter length to avoid high FPR - let nbits = (keys.len() * bits_per_key).max(64); + let nbits = (self.key_hash_entries.len() * self.bits_per_key).max(64); let nbytes = (nbits + 7) / 8; // nbits is always multiplication of 8 let nbits = nbytes * 8; let mut filter = Vec::with_capacity(nbytes + 1); filter.resize(nbytes, 0); - for h in keys { + for h in &self.key_hash_entries { let mut h = *h; let delta = (h >> 17) | (h << 15); for _ in 0..k { @@ -97,37 +172,19 @@ impl<'a> Bloom<'a> { } } filter.put_u8(k as u8); + self.key_hash_entries.clear(); filter } - /// Judges whether the hash value is in the table with the given false positive rate. - /// - /// Note: - /// - if the return value is true, then the table surely does not have the user key that has - /// the hash; - /// - if the return value is false, then the table may or may not have the user key that has - /// the hash actually, a.k.a. we don't know the answer. - pub fn surely_not_have_hash(&self, mut h: u32) -> bool { - if self.k > 30 { - // potential new encoding for short Bloom filters - false - } else { - let nbits = self.filter.bit_len(); - let delta = (h >> 17) | (h << 15); - for _ in 0..self.k { - let bit_pos = h % (nbits as u32); - if !self.filter.get_bit(bit_pos as usize) { - return true; - } - h = h.wrapping_add(delta); - } - false - } + fn create(fpr: f64, capacity: usize) -> Self { + BloomFilterBuilder::new(fpr, capacity) } } #[cfg(test)] mod tests { + use std::ops::BitXor; + use bytes::Bytes; use xxhash_rust::xxh32; @@ -135,11 +192,10 @@ mod tests { #[test] fn test_small_bloom_filter() { - let hash: Vec = vec![b"hello".to_vec(), b"world".to_vec()] - .into_iter() - .map(|x| xxh32::xxh32(&x, 0)) - .collect(); - let buf = Bloom::build_from_key_hashes(&hash, 10); + let mut builder = BloomFilterBuilder::new(0.01, 2); + builder.add_key(b"hello", 0); + builder.add_key(b"world", 0); + let buf = builder.finish(); let check_hash: Vec = vec![ b"hello".to_vec(), @@ -148,16 +204,16 @@ mod tests { b"fool".to_vec(), ] .into_iter() - .map(|x| xxh32::xxh32(&x, 0)) + .map(|x| xxh32::xxh32(&x, 0).bitxor(0)) .collect(); - let f = Bloom::new(&buf); + let f = BloomFilterReader::new(buf); assert_eq!(f.k, 6); - assert!(!f.surely_not_have_hash(check_hash[0])); - assert!(!f.surely_not_have_hash(check_hash[1])); - assert!(f.surely_not_have_hash(check_hash[2])); - assert!(f.surely_not_have_hash(check_hash[3])); + assert!(f.may_match(check_hash[0])); + assert!(f.may_match(check_hash[1])); + assert!(!f.may_match(check_hash[2])); + assert!(!f.may_match(check_hash[3])); } fn false_positive_rate_case( @@ -165,23 +221,20 @@ mod tests { test_key_count: usize, expected_false_positive_rate: f64, ) { - let mut key_list = vec![]; - + let mut builder = BloomFilterBuilder::new(expected_false_positive_rate, preset_key_count); for i in 0..preset_key_count { let k = Bytes::from(format!("{:032}", i)); - let h = xxh32::xxh32(&k, 0); - key_list.push(h); + builder.add_key(&k, 0); } - let bits_per_key = Bloom::bloom_bits_per_key(key_list.len(), expected_false_positive_rate); - let vec = Bloom::build_from_key_hashes(&key_list, bits_per_key); - let filter = Bloom::new(&vec); + let data = builder.finish(); + let filter = BloomFilterReader::new(data); let mut true_count = 0; for i in preset_key_count..preset_key_count + test_key_count { let k = Bytes::from(format!("{:032}", i)); let h = xxh32::xxh32(&k, 0); - if filter.surely_not_have_hash(h) { + if !filter.may_match(h) { true_count += 1; } } diff --git a/src/storage/src/hummock/sstable/builder.rs b/src/storage/src/hummock/sstable/builder.rs index e46c8bc94d783..313c7fee698c7 100644 --- a/src/storage/src/hummock/sstable/builder.rs +++ b/src/storage/src/hummock/sstable/builder.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,24 +17,23 @@ use std::sync::Arc; use bytes::BytesMut; use risingwave_common::catalog::TableId; -use risingwave_common::config::StorageConfig; use risingwave_hummock_sdk::filter_key_extractor::{ FilterKeyExtractorImpl, FullKeyFilterKeyExtractor, }; -use risingwave_hummock_sdk::key::{user_key, FullKey}; +use risingwave_hummock_sdk::key::{user_key, FullKey, MAX_KEY_LEN}; use risingwave_hummock_sdk::table_stats::{TableStats, TableStatsMap}; use risingwave_hummock_sdk::{HummockEpoch, KeyComparator, LocalSstableInfo}; use risingwave_pb::hummock::SstableInfo; -use xxhash_rust::xxh32; -use super::bloom::Bloom; use super::utils::CompressionAlgorithm; use super::{ BlockBuilder, BlockBuilderOptions, BlockMeta, SstableMeta, SstableWriter, DEFAULT_BLOCK_SIZE, DEFAULT_ENTRY_SIZE, DEFAULT_RESTART_INTERVAL, VERSION, }; +use crate::hummock::sstable::{FilterBuilder, XorFilterBuilder}; use crate::hummock::value::HummockValue; use crate::hummock::{DeleteRangeTombstone, HummockResult}; +use crate::opts::StorageOpts; pub const DEFAULT_SSTABLE_SIZE: usize = 4 * 1024 * 1024; pub const DEFAULT_BLOOM_FALSE_POSITIVE: f64 = 0.001; @@ -52,8 +51,8 @@ pub struct SstableBuilderOptions { pub compression_algorithm: CompressionAlgorithm, } -impl From<&StorageConfig> for SstableBuilderOptions { - fn from(options: &StorageConfig) -> SstableBuilderOptions { +impl From<&StorageOpts> for SstableBuilderOptions { + fn from(options: &StorageOpts) -> SstableBuilderOptions { let capacity = (options.sstable_size_mb as usize) * (1 << 20); SstableBuilderOptions { capacity, @@ -85,7 +84,7 @@ pub struct SstableBuilderOutput { pub avg_value_size: usize, } -pub struct SstableBuilder { +pub struct SstableBuilder { /// Options. options: SstableBuilderOptions, /// Data writer. @@ -99,8 +98,6 @@ pub struct SstableBuilder { range_tombstones: Vec, /// `table_id` of added keys. table_ids: BTreeSet, - /// Hashes of user keys. - user_key_hashes: Vec, last_full_key: Vec, last_extract_key: Vec, /// Buffer for encoded key and value to avoid allocation. @@ -118,23 +115,28 @@ pub struct SstableBuilder { /// `last_table_stats` accumulates stats for `last_table_id` and finalizes it in `table_stats` /// by `finalize_last_table_stats` last_table_stats: TableStats, + filter_builder: F, } -impl SstableBuilder { +impl SstableBuilder { pub fn for_test(sstable_id: u64, writer: W, options: SstableBuilderOptions) -> Self { Self::new( sstable_id, writer, + XorFilterBuilder::new(options.capacity / DEFAULT_ENTRY_SIZE + 1), options, Arc::new(FilterKeyExtractorImpl::FullKey( FullKeyFilterKeyExtractor::default(), )), ) } +} +impl SstableBuilder { pub fn new( sstable_id: u64, writer: W, + filter_builder: F, options: SstableBuilderOptions, filter_key_extractor: Arc, ) -> Self { @@ -146,9 +148,9 @@ impl SstableBuilder { restart_interval: options.restart_interval, compression_algorithm: options.compression_algorithm, }), + filter_builder, block_metas: Vec::with_capacity(options.capacity / options.block_capacity + 1), table_ids: BTreeSet::new(), - user_key_hashes: Vec::with_capacity(options.capacity / DEFAULT_ENTRY_SIZE + 1), last_table_id: None, raw_key: BytesMut::new(), raw_value: BytesMut::new(), @@ -184,14 +186,19 @@ impl SstableBuilder { value: HummockValue<&[u8]>, is_new_user_key: bool, ) -> HummockResult<()> { - // Rotate block builder if the previous one has been built. - if self.block_builder.is_empty() { - self.block_metas.push(BlockMeta { - offset: self.writer.data_len() as u32, - len: 0, - smallest_key: full_key.encode(), - uncompressed_size: 0, - }) + const LARGE_KEY_LEN: usize = MAX_KEY_LEN >> 1; + + let mut is_new_table = false; + + let table_key_len = full_key.user_key.table_key.as_ref().len(); + if table_key_len >= LARGE_KEY_LEN { + let table_id = full_key.user_key.table_id.table_id(); + tracing::warn!( + "A large key (table_id={}, len={}, epoch={}) is added to block", + table_id, + table_key_len, + full_key.epoch + ); } // TODO: refine me @@ -199,10 +206,12 @@ impl SstableBuilder { value.encode(&mut self.raw_value); if is_new_user_key { let table_id = full_key.user_key.table_id.table_id(); - if self.last_table_id.is_none() || self.last_table_id.unwrap() != table_id { + is_new_table = self.last_table_id.is_none() || self.last_table_id.unwrap() != table_id; + if is_new_table { self.table_ids.insert(table_id); self.finalize_last_table_stats(); self.last_table_id = Some(table_id); + self.last_extract_key.clear(); } let mut extract_key = user_key(&self.raw_key); extract_key = self.filter_key_extractor.extract(extract_key); @@ -212,7 +221,7 @@ impl SstableBuilder { // 2. extract_key key is not duplicate if !extract_key.is_empty() && extract_key != self.last_extract_key.as_slice() { // avoid duplicate add to bloom filter - self.user_key_hashes.push(xxh32::xxh32(extract_key, 0)); + self.filter_builder.add_key(extract_key, table_id); self.last_extract_key.clear(); self.last_extract_key.extend_from_slice(extract_key); } @@ -222,6 +231,20 @@ impl SstableBuilder { self.total_key_count += 1; self.last_table_stats.total_key_count += 1; + if is_new_table && !self.block_builder.is_empty() { + self.build_block().await?; + } + + // Rotate block builder if the previous one has been built. + if self.block_builder.is_empty() { + self.block_metas.push(BlockMeta { + offset: self.writer.data_len() as u32, + len: 0, + smallest_key: full_key.encode(), + uncompressed_size: 0, + }) + } + self.block_builder .add(self.raw_key.as_ref(), self.raw_value.as_ref()); self.last_table_stats.total_key_size += full_key.encoded_len() as i64; @@ -292,18 +315,15 @@ impl SstableBuilder { } self.total_key_count += self.range_tombstones.len() as u64; self.stale_key_count += self.range_tombstones.len() as u64; + let bloom_filter = if self.options.bloom_false_positive > 0.0 { + self.filter_builder.finish() + } else { + vec![] + }; let mut meta = SstableMeta { block_metas: self.block_metas, - bloom_filter: if self.options.bloom_false_positive > 0.0 { - let bits_per_key = Bloom::bloom_bits_per_key( - self.user_key_hashes.len(), - self.options.bloom_false_positive, - ); - Bloom::build_from_key_hashes(&self.user_key_hashes, bits_per_key) - } else { - vec![] - }, + bloom_filter, estimated_size: 0, key_count: self.total_key_count as u32, smallest_key, @@ -364,7 +384,7 @@ impl SstableBuilder { pub fn approximate_len(&self) -> usize { self.writer.data_len() + self.block_builder.approximate_len() - + self.user_key_hashes.len() * 4 + + self.filter_builder.approximate_len() } async fn build_block(&mut self) -> HummockResult<()> { @@ -383,11 +403,11 @@ impl SstableBuilder { } pub fn len(&self) -> usize { - self.user_key_hashes.len() + self.total_key_count as usize } pub fn is_empty(&self) -> bool { - self.user_key_hashes.is_empty() + self.total_key_count > 0 } /// Returns true if we roughly reached capacity @@ -510,7 +530,7 @@ pub(super) mod tests { assert_eq!(table.has_bloom_filter(), with_blooms); for i in 0..key_count { let full_key = test_key_of(i); - assert!(!table.surely_not_have_dist_key(full_key.user_key.encode().as_slice())); + assert!(table.may_match(full_key.user_key.encode().as_slice())); } } diff --git a/src/storage/src/hummock/sstable/delete_range_aggregator.rs b/src/storage/src/hummock/sstable/delete_range_aggregator.rs index 424efa84d0801..a56d0b0d743f3 100644 --- a/src/storage/src/hummock/sstable/delete_range_aggregator.rs +++ b/src/storage/src/hummock/sstable/delete_range_aggregator.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/sstable/filter.rs b/src/storage/src/hummock/sstable/filter.rs new file mode 100644 index 0000000000000..5ac41f8c74c14 --- /dev/null +++ b/src/storage/src/hummock/sstable/filter.rs @@ -0,0 +1,26 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. + +pub trait FilterBuilder: Send { + /// add key which need to be filter for construct filter data. + fn add_key(&mut self, dist_key: &[u8], table_id: u32); + /// Builds Bloom filter from key hashes + fn finish(&mut self) -> Vec; + /// approximate memory of filter builder + fn approximate_len(&self) -> usize; + + fn create(fpr: f64, capacity: usize) -> Self; +} diff --git a/src/storage/src/hummock/sstable/forward_sstable_iterator.rs b/src/storage/src/hummock/sstable/forward_sstable_iterator.rs index 005ae2b008a16..acf718b94d8a1 100644 --- a/src/storage/src/hummock/sstable/forward_sstable_iterator.rs +++ b/src/storage/src/hummock/sstable/forward_sstable_iterator.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/sstable/mod.rs b/src/storage/src/hummock/sstable/mod.rs index c51b720c73a43..14a7f472f2c00 100644 --- a/src/storage/src/hummock/sstable/mod.rs +++ b/src/storage/src/hummock/sstable/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,12 +18,16 @@ mod block; use std::fmt::{Debug, Formatter}; +use std::ops::BitXor; pub use block::*; mod block_iterator; pub use block_iterator::*; mod bloom; -use bloom::Bloom; +mod xor_filter; +pub use bloom::BloomFilterBuilder; +pub use xor_filter::XorFilterBuilder; +use xor_filter::XorFilterReader; pub mod builder; pub use builder::*; pub mod writer; @@ -42,16 +46,19 @@ use risingwave_hummock_sdk::{HummockEpoch, HummockSstableId}; use risingwave_pb::hummock::{KeyRange, SstableInfo}; mod delete_range_aggregator; +mod filter; mod sstable_id_manager; mod utils; + pub use delete_range_aggregator::{ get_delete_range_epoch_from_sstable, DeleteRangeAggregator, DeleteRangeAggregatorBuilder, RangeTombstonesCollector, SstableDeleteRangeIterator, }; +pub use filter::FilterBuilder; pub use sstable_id_manager::*; pub use utils::CompressionAlgorithm; use utils::{get_length_prefixed_slice, put_length_prefixed_slice}; -use xxhash_rust::xxh32; +use xxhash_rust::{xxh32, xxh64}; use self::utils::{xxhash64_checksum, xxhash64_verify}; use super::{HummockError, HummockResult}; @@ -120,6 +127,7 @@ impl DeleteRangeTombstone { pub struct Sstable { pub id: HummockSstableId, pub meta: SstableMeta, + pub filter_reader: XorFilterReader, } impl Debug for Sstable { @@ -132,36 +140,51 @@ impl Debug for Sstable { } impl Sstable { - pub fn new(id: HummockSstableId, meta: SstableMeta) -> Self { - Self { id, meta } + pub fn new(id: HummockSstableId, mut meta: SstableMeta) -> Self { + let filter_data = std::mem::take(&mut meta.bloom_filter); + let filter_reader = XorFilterReader::new(filter_data); + Self { + id, + meta, + filter_reader, + } } + #[inline(always)] pub fn has_bloom_filter(&self) -> bool { - !self.meta.bloom_filter.is_empty() + !self.filter_reader.is_empty() } - pub fn surely_not_have_dist_key(&self, dist_key: &[u8]) -> bool { + pub fn may_match(&self, dist_key: &[u8]) -> bool { let enable_bloom_filter: fn() -> bool = || { fail_point!("disable_bloom_filter", |_| false); true }; if enable_bloom_filter() && self.has_bloom_filter() { - let hash = xxh32::xxh32(dist_key, 0); - self.surely_not_have_hashvalue(hash) + let hash = xxh64::xxh64(dist_key, 0); + self.may_match_hash(hash) } else { - false + true } } #[inline(always)] - pub fn hash_for_bloom_filter(dist_key: &[u8]) -> u32 { - xxh32::xxh32(dist_key, 0) + pub fn hash_for_bloom_filter_u32(dist_key: &[u8], table_id: u32) -> u32 { + let dist_key_hash = xxh32::xxh32(dist_key, 0); + // congyi adds this because he aims to dedup keys in different tables + table_id.bitxor(dist_key_hash) + } + + #[inline(always)] + pub fn hash_for_bloom_filter(dist_key: &[u8], table_id: u32) -> u64 { + let dist_key_hash = xxh64::xxh64(dist_key, 0); + // congyi adds this because he aims to dedup keys in different tables + (table_id as u64).bitxor(dist_key_hash) } #[inline(always)] - pub fn surely_not_have_hashvalue(&self, hash: u32) -> bool { - let bloom = Bloom::new(&self.meta.bloom_filter); - bloom.surely_not_have_hash(hash) + pub fn may_match_hash(&self, hash: u64) -> bool { + self.filter_reader.may_match(hash) } pub fn block_count(&self) -> usize { @@ -170,7 +193,7 @@ impl Sstable { #[inline] pub fn estimate_size(&self) -> usize { - 8 /* id */ + self.meta.encoded_size() + 8 /* id */ + self.filter_reader.estimate_size() + self.meta.encoded_size() } #[cfg(test)] diff --git a/src/storage/src/hummock/sstable/multi_builder.rs b/src/storage/src/hummock/sstable/multi_builder.rs index 346f23e247d4c..6d6e4900e8c13 100644 --- a/src/storage/src/hummock/sstable/multi_builder.rs +++ b/src/storage/src/hummock/sstable/multi_builder.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,12 +22,13 @@ use risingwave_hummock_sdk::LocalSstableInfo; use tokio::task::JoinHandle; use crate::hummock::compactor::task_progress::TaskProgress; +use crate::hummock::sstable::filter::FilterBuilder; use crate::hummock::sstable_store::SstableStoreRef; use crate::hummock::value::HummockValue; use crate::hummock::{ BatchUploadWriter, CachePolicy, DeleteRangeTombstone, HummockResult, MemoryLimiter, RangeTombstonesCollector, SstableBuilder, SstableBuilderOptions, SstableWriter, - SstableWriterOptions, + SstableWriterOptions, XorFilterBuilder, }; use crate::monitor::CompactorMetrics; @@ -36,7 +37,8 @@ pub type UploadJoinHandle = JoinHandle>; #[async_trait::async_trait] pub trait TableBuilderFactory { type Writer: SstableWriter; - async fn open_builder(&self) -> HummockResult>; + type Filter: FilterBuilder; + async fn open_builder(&mut self) -> HummockResult>; } pub struct SplitTableOutput { @@ -57,7 +59,7 @@ where sst_outputs: Vec, - current_builder: Option>, + current_builder: Option>, /// Statistics. pub compactor_metrics: Arc, @@ -251,9 +253,12 @@ impl LocalTableBuilderFactory { #[async_trait::async_trait] impl TableBuilderFactory for LocalTableBuilderFactory { + type Filter = XorFilterBuilder; type Writer = BatchUploadWriter; - async fn open_builder(&self) -> HummockResult> { + async fn open_builder( + &mut self, + ) -> HummockResult> { let id = self.next_id.fetch_add(1, SeqCst); let tracker = self.limiter.require_memory(1).await; let writer_options = SstableWriterOptions { diff --git a/src/storage/src/hummock/sstable/sstable_id_manager.rs b/src/storage/src/hummock/sstable/sstable_id_manager.rs index 79d87e39cc734..b2a371a55951d 100644 --- a/src/storage/src/hummock/sstable/sstable_id_manager.rs +++ b/src/storage/src/hummock/sstable/sstable_id_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/sstable/utils.rs b/src/storage/src/hummock/sstable/utils.rs index d11cf45209f73..5abd76dbd075c 100644 --- a/src/storage/src/hummock/sstable/utils.rs +++ b/src/storage/src/hummock/sstable/utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,9 +14,9 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use std::cmp::{self}; use std::ptr; +use risingwave_hummock_sdk::key::MAX_KEY_LEN; use xxhash_rust::xxh64; use super::{HummockError, HummockResult}; @@ -30,8 +30,8 @@ unsafe fn u32(ptr: *const u8) -> u32 { } #[inline] -pub fn bytes_diff<'a>(base: &[u8], target: &'a [u8]) -> &'a [u8] { - let end = cmp::min(base.len(), target.len()); +pub fn bytes_diff_below_max_key_length<'a>(base: &[u8], target: &'a [u8]) -> &'a [u8] { + let end = base.len().min(target.len()).min(MAX_KEY_LEN); let mut i = 0; unsafe { while i + 8 <= end { diff --git a/src/storage/src/hummock/sstable/writer.rs b/src/storage/src/hummock/sstable/writer.rs index c2ac0bf02d4fb..95eedba42dee2 100644 --- a/src/storage/src/hummock/sstable/writer.rs +++ b/src/storage/src/hummock/sstable/writer.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -74,8 +74,8 @@ impl SstableWriter for InMemWriter { mod tests { use bytes::Bytes; - use itertools::Itertools; use rand::{Rng, SeedableRng}; + use risingwave_common::util::iter_util::ZipEqFast; use crate::hummock::sstable::VERSION; use crate::hummock::{BlockMeta, InMemWriter, SstableMeta, SstableWriter}; @@ -100,7 +100,7 @@ mod tests { } let meta = SstableMeta { block_metas, - bloom_filter: Vec::new(), + bloom_filter: vec![], estimated_size: 0, key_count: 0, smallest_key: Vec::new(), @@ -117,7 +117,7 @@ mod tests { async fn test_in_mem_writer() { let (data, blocks, meta) = get_sst(); let mut writer = Box::new(InMemWriter::new(0)); - for (block, meta) in blocks.iter().zip_eq(meta.block_metas.iter()) { + for (block, meta) in blocks.iter().zip_eq_fast(meta.block_metas.iter()) { writer.write_block(&block[..], meta).await.unwrap(); } diff --git a/src/storage/src/hummock/sstable/xor_filter.rs b/src/storage/src/hummock/sstable/xor_filter.rs new file mode 100644 index 0000000000000..ea6a678b27107 --- /dev/null +++ b/src/storage/src/hummock/sstable/xor_filter.rs @@ -0,0 +1,140 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashSet; + +use bytes::{Buf, BufMut}; +use itertools::Itertools; +use xorf::{Filter, Xor16}; + +use super::{FilterBuilder, Sstable}; + +pub struct XorFilterBuilder { + key_hash_entries: Vec, +} + +impl XorFilterBuilder { + pub fn new(capacity: usize) -> Self { + let key_hash_entries = if capacity > 0 { + Vec::with_capacity(capacity) + } else { + vec![] + }; + Self { key_hash_entries } + } +} + +impl FilterBuilder for XorFilterBuilder { + fn add_key(&mut self, key: &[u8], table_id: u32) { + self.key_hash_entries + .push(Sstable::hash_for_bloom_filter(key, table_id)); + } + + fn approximate_len(&self) -> usize { + self.key_hash_entries.len() * 4 + } + + fn finish(&mut self) -> Vec { + let xor_filter = Xor16::from( + &HashSet::::from_iter(std::mem::take(&mut self.key_hash_entries).into_iter()) + .into_iter() + .collect_vec(), + ); + let mut buf = Vec::with_capacity(8 + 4 + xor_filter.fingerprints.len() * 2 + 1); + buf.put_u64_le(xor_filter.seed); + buf.put_u32_le(xor_filter.block_length as u32); + xor_filter + .fingerprints + .iter() + .for_each(|x| buf.put_u16_le(*x)); + // We add an extra byte so we can distinguish bloom filter and xor filter by the last + // byte(255 indicates a xor filter and others indicate a bloom filter). + buf.put_u8(255); + buf + } + + fn create(_fpr: f64, capacity: usize) -> Self { + XorFilterBuilder::new(capacity) + } +} + +pub struct XorFilterReader { + filter: Xor16, +} + +impl XorFilterReader { + /// Creates an xor filter from a byte slice + pub fn new(buf: Vec) -> Self { + if buf.len() <= 1 { + return Self { + filter: Xor16 { + seed: 0, + block_length: 0, + fingerprints: vec![].into_boxed_slice(), + }, + }; + } + let buf = &mut &buf[..]; + let xor_filter_seed = buf.get_u64_le(); + let xor_filter_block_length = buf.get_u32_le(); + // is correct even when there is an extra 0xff byte in the end of buf + let len = buf.len() / 2; + let xor_filter_fingerprints = (0..len) + .map(|_| buf.get_u16_le()) + .collect_vec() + .into_boxed_slice(); + Self { + filter: Xor16 { + seed: xor_filter_seed, + block_length: xor_filter_block_length as usize, + fingerprints: xor_filter_fingerprints, + }, + } + } + + pub fn estimate_size(&self) -> usize { + self.filter.fingerprints.len() * std::mem::size_of::() + } + + pub fn is_empty(&self) -> bool { + self.filter.block_length == 0 + } + + /// Judges whether the hash value is in the table with the given false positive rate. + /// + /// Note: + /// - if the return value is false, then the table surely does not have the user key that has + /// the hash; + /// - if the return value is true, then the table may or may not have the user key that has + /// the hash actually, a.k.a. we don't know the answer. + pub fn may_match(&self, h: u64) -> bool { + if self.is_empty() { + true + } else { + self.filter.contains(&h) + } + } +} + +impl Clone for XorFilterReader { + fn clone(&self) -> Self { + Self { + filter: Xor16 { + seed: self.filter.seed, + block_length: self.filter.block_length, + fingerprints: self.filter.fingerprints.clone(), + }, + } + } +} diff --git a/src/storage/src/hummock/sstable_store.rs b/src/storage/src/hummock/sstable_store.rs index b5a2f7077fe85..272764a83f4f5 100644 --- a/src/storage/src/hummock/sstable_store.rs +++ b/src/storage/src/hummock/sstable_store.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,10 +20,10 @@ use bytes::{Buf, BufMut, Bytes}; use fail::fail_point; use itertools::Itertools; use risingwave_common::cache::LruCacheEventListener; -use risingwave_hummock_sdk::{is_remote_sst_id, HummockSstableId}; +use risingwave_hummock_sdk::HummockSstableId; use risingwave_object_store::object::{ - get_local_path, BlockLocation, MonitoredStreamingReader, ObjectError, ObjectMetadata, - ObjectStoreRef, ObjectStreamingUploader, + BlockLocation, MonitoredStreamingReader, ObjectError, ObjectMetadata, ObjectStoreRef, + ObjectStreamingUploader, }; use risingwave_pb::hummock::SstableInfo; use tokio::task::JoinHandle; @@ -126,6 +126,8 @@ impl SstableStore { meta_cache_capacity: usize, tiered_cache: TieredCache<(HummockSstableId, u64), Box>, ) -> Self { + // TODO: We should validate path early. Otherwise object store won't report invalid path + // error until first write attempt. let mut shard_bits = MAX_META_CACHE_SHARD_BITS; while (meta_cache_capacity >> shard_bits) < MIN_BUFFER_SIZE_PER_SHARD && shard_bits > 0 { shard_bits -= 1; @@ -283,13 +285,8 @@ impl SstableStore { } pub fn get_sst_data_path(&self, sst_id: HummockSstableId) -> String { - let is_remote = is_remote_sst_id(sst_id); - let obj_prefix = self.store.get_object_prefix(sst_id, is_remote); - let mut ret = format!("{}/{}{}.data", self.path, obj_prefix, sst_id); - if !is_remote { - ret = get_local_path(&ret); - } - ret + let obj_prefix = self.store.get_object_prefix(sst_id, true); + format!("{}/{}{}.data", self.path, obj_prefix, sst_id) } pub fn get_sst_id_from_path(&self, path: &str) -> HummockSstableId { @@ -349,7 +346,7 @@ impl SstableStore { .map_err(HummockError::object_io_error)?; let meta = SstableMeta::decode(&mut &buf[..])?; let sst = Sstable::new(sst_id, meta); - let charge = sst.meta.encoded_size(); + let charge = sst.estimate_size(); let add = (now.elapsed().as_secs_f64() * 1000.0).ceil(); stats_ptr.fetch_add(add as u64, Ordering::Relaxed); Ok((Box::new(sst), charge)) @@ -375,7 +372,7 @@ impl SstableStore { pub async fn list_ssts_from_object_store(&self) -> HummockResult> { self.store - .list(&self.path) + .list(&format!("{}/", self.path)) .await .map_err(HummockError::object_io_error) } @@ -869,11 +866,12 @@ mod tests { async fn validate_sst( sstable_store: SstableStoreRef, info: &SstableInfo, - meta: SstableMeta, + mut meta: SstableMeta, x_range: Range, ) { let mut stats = StoreLocalStatistic::default(); let holder = sstable_store.sstable(info, &mut stats).await.unwrap(); + std::mem::take(&mut meta.bloom_filter); assert_eq!(holder.value().meta, meta); let holder = sstable_store.sstable(info, &mut stats).await.unwrap(); assert_eq!(holder.value().meta, meta); diff --git a/src/storage/src/hummock/state_store.rs b/src/storage/src/hummock/state_store.rs index efcaff4bb0690..97a941a0cf0b9 100644 --- a/src/storage/src/hummock/state_store.rs +++ b/src/storage/src/hummock/state_store.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -37,6 +37,7 @@ use crate::hummock::store::memtable::ImmutableMemtable; use crate::hummock::store::state_store::LocalHummockStorage; use crate::hummock::store::version::read_filter_for_batch; use crate::hummock::{HummockEpoch, HummockError}; +use crate::monitor::StoreLocalStatistic; use crate::store::*; use crate::{ define_state_store_associated_type, define_state_store_read_associated_type, StateStore, @@ -268,6 +269,7 @@ impl StateStore for HummockStorage { is_checkpoint, }) .expect("should send success"); + StoreLocalStatistic::flush_all(); } fn clear_shared_buffer(&self) -> Self::ClearSharedBufferFuture<'_> { @@ -297,7 +299,7 @@ impl StateStore for HummockStorage { let sealed_epoch = self.seal_epoch.load(MemOrdering::SeqCst); if read_current_epoch > sealed_epoch { return Err(HummockError::read_current_epoch(format!( - "cannot read current epoch because read epoch {} > sealed epoch {}", + "Cannot read when cluster is under recovery. read {} > max seal epoch {}", read_current_epoch, sealed_epoch )) .into()); @@ -306,7 +308,7 @@ impl StateStore for HummockStorage { let min_current_epoch = self.min_current_epoch.load(MemOrdering::SeqCst); if read_current_epoch < min_current_epoch { return Err(HummockError::read_current_epoch(format!( - "cannot read current epoch because read epoch {} < min current epoch {}", + "Cannot read when cluster is under recovery. read {} < min current epoch {}", read_current_epoch, min_current_epoch )) .into()); diff --git a/src/storage/src/hummock/state_store_v1.rs b/src/storage/src/hummock/state_store_v1.rs index bf6b5de554146..ee063a2c3434f 100644 --- a/src/storage/src/hummock/state_store_v1.rs +++ b/src/storage/src/hummock/state_store_v1.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ use std::cmp::Ordering; use std::future::Future; -use std::ops::Bound::{Excluded, Included}; use std::ops::{Bound, RangeBounds}; use std::sync::Arc; use std::time::Duration; @@ -30,7 +29,7 @@ use risingwave_hummock_sdk::key::{ bound_table_key_range, map_table_key_range, user_key, FullKey, TableKey, TableKeyRange, UserKey, }; use risingwave_hummock_sdk::key_range::KeyRangeCommon; -use risingwave_hummock_sdk::{can_concat, HummockReadEpoch}; +use risingwave_hummock_sdk::HummockReadEpoch; use risingwave_pb::hummock::LevelType; use tokio::sync::oneshot; use tracing::log::warn; @@ -51,17 +50,19 @@ use crate::hummock::iterator::{ use crate::hummock::local_version::ReadVersion; use crate::hummock::shared_buffer::build_ordered_merge_iter; use crate::hummock::sstable::SstableIteratorReadOptions; -use crate::hummock::utils::{prune_ssts, search_sst_idx}; +use crate::hummock::utils::{prune_nonoverlapping_ssts, prune_overlapping_ssts}; use crate::hummock::{ DeleteRangeAggregator, ForwardIter, HummockEpoch, HummockError, HummockIteratorType, HummockResult, Sstable, }; -use crate::monitor::{HummockStateStoreMetrics, StoreLocalStatistic}; +use crate::monitor::{ + GetLocalMetricsGuard, HummockStateStoreMetrics, IterLocalMetricsGuard, StoreLocalStatistic, +}; use crate::storage_value::StorageValue; use crate::store::*; use crate::{ - define_state_store_associated_type, define_state_store_read_associated_type, - define_state_store_write_associated_type, + define_local_state_store_associated_type, define_state_store_associated_type, + define_state_store_read_associated_type, define_state_store_write_associated_type, }; impl HummockStorageV1 { @@ -79,16 +80,15 @@ impl HummockStorageV1 { read_options: ReadOptions, ) -> StorageResult> { let table_id = read_options.table_id; - let table_id_string = table_id.to_string(); - let table_id_label = table_id_string.as_str(); - let mut local_stats = StoreLocalStatistic::default(); + let mut stats_guard = + GetLocalMetricsGuard::new(self.state_store_metrics.clone(), read_options.table_id); + stats_guard.local_stats.found_key = true; let ReadVersion { shared_buffer_data, pinned_version, sync_uncommitted_data, } = self.read_filter(epoch, &read_options, &(table_key..=table_key))?; - let mut table_counts = 0; let full_key = FullKey::new(table_id, table_key, epoch); // Query shared buffer. Return the value without iterating SSTs if found @@ -98,48 +98,33 @@ impl HummockStorageV1 { self.sstable_store.clone(), uncommitted_data, full_key, - &mut local_stats, + &mut stats_guard.local_stats, &read_options, ) .await?; if let Some(v) = value { - local_stats.report_bloom_filter_metrics( - self.state_store_metrics.as_ref(), - "get", - table_id_label, - false, - ); - local_stats.report(self.state_store_metrics.as_ref(), table_id_label); return Ok(v.into_user_value()); } - table_counts += table_count; + stats_guard.local_stats.sub_iter_count += table_count as u64; } for sync_uncommitted_data in sync_uncommitted_data { let (value, table_count) = get_from_order_sorted_uncommitted_data( self.sstable_store.clone(), sync_uncommitted_data, full_key, - &mut local_stats, + &mut stats_guard.local_stats, &read_options, ) .await?; if let Some(v) = value { - local_stats.report_bloom_filter_metrics( - self.state_store_metrics.as_ref(), - "get", - table_id_label, - false, - ); - local_stats.report(self.state_store_metrics.as_ref(), table_id_label); return Ok(v.into_user_value()); } - table_counts += table_count; + stats_guard.local_stats.sub_iter_count += table_count as u64; } - let dist_key_hash = read_options - .prefix_hint - .as_ref() - .map(|dist_key| Sstable::hash_for_bloom_filter(dist_key.as_ref())); + let dist_key_hash = read_options.prefix_hint.as_ref().map(|dist_key| { + Sstable::hash_for_bloom_filter(dist_key.as_ref(), read_options.table_id.table_id()) + }); // Because SST meta records encoded key range, // the filter key needs to be encoded as well. @@ -153,27 +138,24 @@ impl HummockStorageV1 { } match level.level_type() { LevelType::Overlapping | LevelType::Unspecified => { - let sstable_infos = - prune_ssts(level.table_infos.iter(), table_id, &(table_key..=table_key)); + let single_table_key_range = table_key..=table_key; + let sstable_infos = prune_overlapping_ssts( + &level.table_infos, + table_id, + &single_table_key_range, + ); for sstable_info in sstable_infos { - table_counts += 1; + stats_guard.local_stats.sub_iter_count += 1; if let Some(v) = get_from_sstable_info( self.sstable_store.clone(), sstable_info, full_key, &read_options, dist_key_hash, - &mut local_stats, + &mut stats_guard.local_stats, ) .await? { - local_stats.report_bloom_filter_metrics( - self.state_store_metrics.as_ref(), - "get", - table_id_label, - false, - ); - local_stats.report(self.state_store_metrics.as_ref(), table_id_label); return Ok(v.into_user_value()); } } @@ -198,41 +180,23 @@ impl HummockStorageV1 { continue; } - table_counts += 1; + stats_guard.local_stats.sub_iter_count += 1; if let Some(v) = get_from_sstable_info( self.sstable_store.clone(), &level.table_infos[table_info_idx], full_key, &read_options, dist_key_hash, - &mut local_stats, + &mut stats_guard.local_stats, ) .await? { - local_stats.report_bloom_filter_metrics( - self.state_store_metrics.as_ref(), - "get", - table_id_label, - false, - ); - local_stats.report(self.state_store_metrics.as_ref(), table_id_label); return Ok(v.into_user_value()); } } } } - - local_stats.report_bloom_filter_metrics( - self.state_store_metrics.as_ref(), - "get", - table_id_label, - true, - ); - local_stats.report(self.state_store_metrics.as_ref(), table_id_label); - self.state_store_metrics - .iter_merge_sstable_counts - .with_label_values(&["", "sub-iter"]) - .observe(table_counts as f64); + stats_guard.local_stats.found_key = false; Ok(None) } @@ -306,11 +270,8 @@ impl HummockStorageV1 { .await?, )) } - self.state_store_metrics - .iter_merge_sstable_counts - .with_label_values(&["", "memory-iter"]) - .observe(overlapped_iters.len() as f64); + local_stats.staging_imm_iter_count = overlapped_iters.len() as u64; // Generate iterators for versioned ssts by filter out ssts that do not overlap with the // user key range derived from the given `table_key_range` and `table_id`. @@ -336,30 +297,18 @@ impl HummockStorageV1 { let bloom_filter_prefix_hash = read_options .prefix_hint .as_ref() - .map(|hint| Sstable::hash_for_bloom_filter(hint)); + .map(|hint| Sstable::hash_for_bloom_filter(hint, read_options.table_id.table_id())); for level in pinned_version.levels(table_id) { if level.table_infos.is_empty() { continue; } if level.level_type == LevelType::Nonoverlapping as i32 { - debug_assert!(can_concat(&level.table_infos)); - let start_table_idx = match encoded_user_key_range.start_bound() { - Included(key) | Excluded(key) => search_sst_idx(&level.table_infos, key), - _ => 0, - }; - let end_table_idx = match encoded_user_key_range.end_bound() { - Included(key) | Excluded(key) => search_sst_idx(&level.table_infos, key), - _ => level.table_infos.len().saturating_sub(1), - }; - assert!( - start_table_idx < level.table_infos.len() - && end_table_idx < level.table_infos.len() - ); - let matched_table_infos = &level.table_infos[start_table_idx..=end_table_idx]; + let matched_table_infos = + prune_nonoverlapping_ssts(&level.table_infos, &encoded_user_key_range); let pruned_sstables = match T::Direction::direction() { - DirectionEnum::Backward => matched_table_infos.iter().rev().collect_vec(), - DirectionEnum::Forward => matched_table_infos.iter().collect_vec(), + DirectionEnum::Backward => matched_table_infos.rev().collect_vec(), + DirectionEnum::Forward => matched_table_infos.collect_vec(), }; let mut sstables = vec![]; @@ -388,7 +337,8 @@ impl HummockStorageV1 { iter_read_options.clone(), ))); } else { - let table_infos = prune_ssts(level.table_infos.iter(), table_id, &table_key_range); + let table_infos = + prune_overlapping_ssts(&level.table_infos, table_id, &table_key_range); for table_info in table_infos.into_iter().rev() { let sstable = self .sstable_store @@ -416,10 +366,7 @@ impl HummockStorageV1 { } } - self.state_store_metrics - .iter_merge_sstable_counts - .with_label_values(&["", "sub-iter"]) - .observe(overlapped_iters.len() as f64); + local_stats.sub_iter_count = overlapped_iters.len() as u64; // TODO: implement delete range if the code of this file would not be delete. let delete_range_iter = ForwardMergeRangeIterator::default(); @@ -440,22 +387,12 @@ impl HummockStorageV1 { .in_span(Span::enter_with_local_parent("rewind")) .await?; - let table_id_string = read_options.table_id.to_string(); - let table_id_label = table_id_string.as_str(); - local_stats.report_bloom_filter_metrics( - self.state_store_metrics.as_ref(), - "iter", - table_id_label, - user_iterator.is_valid(), - ); - - local_stats.report( - self.state_store_metrics.as_ref(), - table_id.to_string().as_str(), - ); + local_stats.found_key = user_iterator.is_valid(); Ok(HummockStateStoreIter::new( user_iterator, self.state_store_metrics.clone(), + read_options.table_id, + local_stats, )) } } @@ -518,7 +455,17 @@ impl StateStoreWrite for HummockStorageV1 { } } -impl LocalStateStore for HummockStorageV1 {} +impl LocalStateStore for HummockStorageV1 { + define_local_state_store_associated_type!(); + + fn may_exist( + &self, + _key_range: (Bound>, Bound>), + _read_options: ReadOptions, + ) -> Self::MayExistFuture<'_> { + async move { Ok(true) } + } +} impl StateStore for HummockStorageV1 { type Local = Self; @@ -645,17 +592,21 @@ impl StateStore for HummockStorageV1 { pub struct HummockStateStoreIter { inner: DirectedUserIterator, - metrics: Arc, + stats_guard: IterLocalMetricsGuard, } impl HummockStateStoreIter { #[allow(dead_code)] - fn new(inner: DirectedUserIterator, metrics: Arc) -> Self { - Self { inner, metrics } - } - - fn collect_local_statistic(&self, stats: &mut StoreLocalStatistic) { - self.inner.collect_local_statistic(stats); + fn new( + inner: DirectedUserIterator, + metrics: Arc, + table_id: TableId, + local_stats: StoreLocalStatistic, + ) -> Self { + Self { + inner, + stats_guard: IterLocalMetricsGuard::new(metrics, table_id, local_stats), + } } } @@ -681,8 +632,7 @@ impl StateStoreIter for HummockStateStoreIter { impl Drop for HummockStateStoreIter { fn drop(&mut self) { - let mut stats = StoreLocalStatistic::default(); - self.collect_local_statistic(&mut stats); - stats.report(&self.metrics, ""); + self.inner + .collect_local_statistic(&mut self.stats_guard.local_stats); } } diff --git a/src/storage/src/hummock/store/event_handler.rs b/src/storage/src/hummock/store/event_handler.rs index 307b9f2c66559..97e8a787bae21 100644 --- a/src/storage/src/hummock/store/event_handler.rs +++ b/src/storage/src/hummock/store/event_handler.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/store/memtable.rs b/src/storage/src/hummock/store/memtable.rs index b688013b4f4a3..1167beed46ddf 100644 --- a/src/storage/src/hummock/store/memtable.rs +++ b/src/storage/src/hummock/store/memtable.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/store/mod.rs b/src/storage/src/hummock/store/mod.rs index f47955db58bea..72690f4659ff4 100644 --- a/src/storage/src/hummock/store/mod.rs +++ b/src/storage/src/hummock/store/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/store/state_store.rs b/src/storage/src/hummock/store/state_store.rs index 04c93575f367e..e4559cd647c30 100644 --- a/src/storage/src/hummock/store/state_store.rs +++ b/src/storage/src/hummock/store/state_store.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ use minitrace::future::FutureExt; use parking_lot::RwLock; use risingwave_common::catalog::TableId; use risingwave_hummock_sdk::key::{map_table_key_range, TableKey, TableKeyRange}; +use risingwave_hummock_sdk::HummockEpoch; use tokio::sync::mpsc; use tracing::warn; @@ -35,15 +36,15 @@ use crate::hummock::shared_buffer::shared_buffer_batch::{ }; use crate::hummock::store::version::{read_filter_for_local, HummockVersionReader}; use crate::hummock::{MemoryLimiter, SstableIterator}; -use crate::monitor::{HummockStateStoreMetrics, StoreLocalStatistic}; +use crate::monitor::{HummockStateStoreMetrics, IterLocalMetricsGuard, StoreLocalStatistic}; use crate::storage_value::StorageValue; use crate::store::*; use crate::{ - define_state_store_read_associated_type, define_state_store_write_associated_type, - StateStoreIter, + define_local_state_store_associated_type, define_state_store_read_associated_type, + define_state_store_write_associated_type, StateStoreIter, }; -pub struct HummockStorageCore { +pub struct LocalHummockStorage { /// Mutable memtable. // memtable: Memtable, instance_guard: LocalInstanceGuard, @@ -57,41 +58,11 @@ pub struct HummockStorageCore { memory_limiter: Arc, hummock_version_reader: HummockVersionReader, -} -pub struct LocalHummockStorage { - core: Arc, tracing: Arc, } -// Clone is only used for unit test -#[cfg(any(test, feature = "test"))] -impl Clone for LocalHummockStorage { - fn clone(&self) -> Self { - Self { - core: self.core.clone(), - tracing: self.tracing.clone(), - } - } -} - -impl HummockStorageCore { - pub fn new( - instance_guard: LocalInstanceGuard, - read_version: Arc>, - hummock_version_reader: HummockVersionReader, - event_sender: mpsc::UnboundedSender, - memory_limiter: Arc, - ) -> Self { - Self { - instance_guard, - read_version, - event_sender, - memory_limiter, - hummock_version_reader, - } - } - +impl LocalHummockStorage { /// See `HummockReadVersion::update` for more details. pub fn update(&self, info: VersionUpdate) { self.read_version.write().update(info) @@ -137,6 +108,25 @@ impl HummockStorageCore { .iter(table_key_range, epoch, read_options, read_snapshot) .await } + + pub async fn may_exist_inner( + &self, + key_range: (Bound>, Bound>), + read_options: ReadOptions, + ) -> StorageResult { + let table_key_range = map_table_key_range(key_range); + + let read_snapshot = read_filter_for_local( + HummockEpoch::MAX, // Use MAX epoch to make sure we read from latest + read_options.table_id, + &table_key_range, + self.read_version.clone(), + )?; + + self.hummock_version_reader + .may_exist(table_key_range, read_options, read_snapshot) + .await + } } impl StateStoreRead for LocalHummockStorage { @@ -150,7 +140,7 @@ impl StateStoreRead for LocalHummockStorage { epoch: u64, read_options: ReadOptions, ) -> Self::GetFuture<'_> { - self.core.get_inner(TableKey(key), epoch, read_options) + self.get_inner(TableKey(key), epoch, read_options) } fn iter( @@ -159,8 +149,7 @@ impl StateStoreRead for LocalHummockStorage { epoch: u64, read_options: ReadOptions, ) -> Self::IterFuture<'_> { - self.core - .iter_inner(map_table_key_range(key_range), epoch, read_options) + self.iter_inner(map_table_key_range(key_range), epoch, read_options) .in_span(self.tracing.new_tracer("hummock_iter")) } } @@ -184,7 +173,7 @@ impl StateStoreWrite for LocalHummockStorage { let sorted_items = SharedBufferBatch::build_shared_buffer_item_batches(kv_pairs); let size = SharedBufferBatch::measure_batch_size(&sorted_items); - let limiter = self.core.memory_limiter.as_ref(); + let limiter = self.memory_limiter.as_ref(); let tracker = if let Some(tracker) = limiter.try_require_memory(size as u64) { tracker } else { @@ -193,8 +182,7 @@ impl StateStoreWrite for LocalHummockStorage { size, limiter.get_memory_usage() ); - self.core - .event_sender + self.event_sender .send(HummockEvent::BufferMayFlush) .expect("should be able to send"); let tracker = limiter.require_memory(size as u64).await; @@ -215,12 +203,10 @@ impl StateStoreWrite for LocalHummockStorage { Some(tracker), ); let imm_size = imm.size(); - self.core - .update(VersionUpdate::Staging(StagingData::ImmMem(imm.clone()))); + self.update(VersionUpdate::Staging(StagingData::ImmMem(imm.clone()))); // insert imm to uploader - self.core - .event_sender + self.event_sender .send(HummockEvent::ImmToUploader(imm)) .unwrap(); @@ -229,7 +215,17 @@ impl StateStoreWrite for LocalHummockStorage { } } -impl LocalStateStore for LocalHummockStorage {} +impl LocalStateStore for LocalHummockStorage { + define_local_state_store_associated_type!(); + + fn may_exist( + &self, + key_range: (Bound>, Bound>), + read_options: ReadOptions, + ) -> Self::MayExistFuture<'_> { + self.may_exist_inner(key_range, read_options) + } +} impl LocalHummockStorage { pub fn new( @@ -240,35 +236,28 @@ impl LocalHummockStorage { memory_limiter: Arc, tracing: Arc, ) -> Self { - let storage_core = HummockStorageCore::new( + Self { instance_guard, read_version, - hummock_version_reader, event_sender, memory_limiter, - ); - - Self { - core: Arc::new(storage_core), + hummock_version_reader, tracing, } } /// See `HummockReadVersion::update` for more details. - pub fn update(&self, info: VersionUpdate) { - self.core.update(info) - } pub fn read_version(&self) -> Arc> { - self.core.read_version.clone() + self.read_version.clone() } pub fn table_id(&self) -> TableId { - self.core.instance_guard.table_id + self.instance_guard.table_id } pub fn instance_id(&self) -> u64 { - self.core.instance_guard.instance_id + self.instance_guard.instance_id } } @@ -286,8 +275,7 @@ type HummockStorageIteratorPayload = UnorderedMergeIteratorInner< pub struct HummockStorageIterator { inner: UserIterator, - metrics: Arc, - table_id: TableId, + stats_guard: IterLocalMetricsGuard, } impl StateStoreIter for HummockStorageIterator { @@ -315,23 +303,18 @@ impl HummockStorageIterator { inner: UserIterator, metrics: Arc, table_id: TableId, + local_stats: StoreLocalStatistic, ) -> Self { Self { inner, - metrics, - table_id, + stats_guard: IterLocalMetricsGuard::new(metrics, table_id, local_stats), } } - - fn collect_local_statistic(&self, stats: &mut StoreLocalStatistic) { - self.inner.collect_local_statistic(stats); - } } impl Drop for HummockStorageIterator { fn drop(&mut self) { - let mut stats = StoreLocalStatistic::default(); - self.collect_local_statistic(&mut stats); - stats.report(&self.metrics, self.table_id.to_string().as_str()); + self.inner + .collect_local_statistic(&mut self.stats_guard.local_stats); } } diff --git a/src/storage/src/hummock/store/version.rs b/src/storage/src/hummock/store/version.rs index 3f8d3a3b86f74..5a71706d0a463 100644 --- a/src/storage/src/hummock/store/version.rs +++ b/src/storage/src/hummock/store/version.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,8 +15,6 @@ use std::cmp::Ordering; use std::collections::{HashSet, VecDeque}; use std::iter::once; -use std::ops::Bound::{Excluded, Included}; -use std::ops::{Deref, RangeBounds}; use std::sync::Arc; use bytes::Bytes; @@ -27,10 +25,10 @@ use minitrace::Span; use parking_lot::RwLock; use risingwave_common::catalog::TableId; use risingwave_hummock_sdk::key::{ - bound_table_key_range, user_key, FullKey, TableKey, TableKeyRange, UserKey, + bound_table_key_range, FullKey, TableKey, TableKeyRange, UserKey, }; use risingwave_hummock_sdk::key_range::KeyRangeCommon; -use risingwave_hummock_sdk::{can_concat, HummockEpoch, LocalSstableInfo}; +use risingwave_hummock_sdk::{HummockEpoch, LocalSstableInfo}; use risingwave_pb::hummock::{HummockVersionDelta, LevelType, SstableInfo}; use sync_point::sync_point; @@ -46,13 +44,16 @@ use crate::hummock::sstable::SstableIteratorReadOptions; use crate::hummock::sstable_store::SstableStoreRef; use crate::hummock::store::state_store::HummockStorageIterator; use crate::hummock::utils::{ - check_subset_preserve_order, filter_single_sst, prune_ssts, range_overlap, search_sst_idx, + check_subset_preserve_order, filter_single_sst, prune_nonoverlapping_ssts, + prune_overlapping_ssts, range_overlap, search_sst_idx, }; use crate::hummock::{ get_from_batch, get_from_sstable_info, hit_sstable_bloom_filter, DeleteRangeAggregator, Sstable, SstableDeleteRangeIterator, SstableIterator, }; -use crate::monitor::{HummockStateStoreMetrics, StoreLocalStatistic}; +use crate::monitor::{ + GetLocalMetricsGuard, HummockStateStoreMetrics, MayExistLocalMetricsGuard, StoreLocalStatistic, +}; use crate::store::{gen_min_epoch, ReadOptions, StateStoreIterExt, StreamTypeOfIter}; // TODO: use a custom data structure to allow in-place update instead of proto @@ -410,12 +411,11 @@ impl HummockVersionReader { read_options: ReadOptions, read_version_tuple: (Vec, Vec, CommittedVersion), ) -> StorageResult> { - let mut table_counts = 0; - let mut local_stats = StoreLocalStatistic::default(); - let table_id_string = read_options.table_id.to_string(); - let table_id_label = table_id_string.as_str(); let (imms, uncommitted_ssts, committed_version) = read_version_tuple; let min_epoch = gen_min_epoch(epoch, read_options.retention_seconds.as_ref()); + let mut stats_guard = + GetLocalMetricsGuard::new(self.state_store_metrics.clone(), read_options.table_id); + stats_guard.local_stats.found_key = true; // 1. read staging data for imm in &imms { @@ -423,36 +423,29 @@ impl HummockVersionReader { continue; } - if let Some(data) = get_from_batch(imm, table_key, &mut local_stats) { + if let Some(data) = get_from_batch(imm, table_key, &mut stats_guard.local_stats) { return Ok(data.into_user_value()); } } // 2. order guarantee: imm -> sst - let dist_key_hash = read_options - .prefix_hint - .as_ref() - .map(|dist_key| Sstable::hash_for_bloom_filter(dist_key.as_ref())); + let dist_key_hash = read_options.prefix_hint.as_ref().map(|dist_key| { + Sstable::hash_for_bloom_filter(dist_key.as_ref(), read_options.table_id.table_id()) + }); let full_key = FullKey::new(read_options.table_id, table_key, epoch); for local_sst in &uncommitted_ssts { - table_counts += 1; + stats_guard.local_stats.sub_iter_count += 1; if let Some(data) = get_from_sstable_info( self.sstable_store.clone(), local_sst, full_key, &read_options, dist_key_hash, - &mut local_stats, + &mut stats_guard.local_stats, ) .await? { - local_stats.report_bloom_filter_metrics( - self.state_store_metrics.as_ref(), - "get", - table_id_label, - false, - ); return Ok(data.into_user_value()); } } @@ -469,41 +462,30 @@ impl HummockVersionReader { match level.level_type() { LevelType::Overlapping | LevelType::Unspecified => { - let sstable_infos = prune_ssts( - level.table_infos.iter(), + let single_table_key_range = table_key..=table_key; + let sstable_infos = prune_overlapping_ssts( + &level.table_infos, read_options.table_id, - &(table_key..=table_key), + &single_table_key_range, ); for sstable_info in sstable_infos { - table_counts += 1; + stats_guard.local_stats.sub_iter_count += 1; if let Some(v) = get_from_sstable_info( self.sstable_store.clone(), sstable_info, full_key, &read_options, dist_key_hash, - &mut local_stats, + &mut stats_guard.local_stats, ) .await? { - local_stats.report_bloom_filter_metrics( - self.state_store_metrics.as_ref(), - "get", - table_id_label, - false, - ); - // todo add global stat to report - local_stats.report(self.state_store_metrics.as_ref(), table_id_label); return Ok(v.into_user_value()); } } } LevelType::Nonoverlapping => { - let mut table_info_idx = level.table_infos.partition_point(|table| { - let ord = user_key(&table.key_range.as_ref().unwrap().left) - .cmp(encoded_user_key.as_ref()); - ord == Ordering::Less || ord == Ordering::Equal - }); + let mut table_info_idx = search_sst_idx(&level.table_infos, &encoded_user_key); if table_info_idx == 0 { continue; } @@ -519,42 +501,23 @@ impl HummockVersionReader { continue; } - table_counts += 1; + stats_guard.local_stats.sub_iter_count += 1; if let Some(v) = get_from_sstable_info( self.sstable_store.clone(), &level.table_infos[table_info_idx], full_key, &read_options, dist_key_hash, - &mut local_stats, + &mut stats_guard.local_stats, ) .await? { - local_stats.report_bloom_filter_metrics( - self.state_store_metrics.as_ref(), - "get", - table_id_label, - false, - ); - local_stats.report(self.state_store_metrics.as_ref(), table_id_label); return Ok(v.into_user_value()); } } } } - - local_stats.report_bloom_filter_metrics( - self.state_store_metrics.as_ref(), - "get", - table_id_label, - true, - ); - local_stats.report(self.state_store_metrics.as_ref(), table_id_label); - self.state_store_metrics - .iter_merge_sstable_counts - .with_label_values(&[table_id_label, "sub-iter"]) - .observe(table_counts as f64); - + stats_guard.local_stats.found_key = false; Ok(None) } @@ -572,10 +535,7 @@ impl HummockVersionReader { let mut local_stats = StoreLocalStatistic::default(); let mut staging_iters = Vec::with_capacity(imms.len() + uncommitted_ssts.len()); let mut delete_range_iter = ForwardMergeRangeIterator::default(); - self.state_store_metrics - .iter_merge_sstable_counts - .with_label_values(&[table_id_label, "staging-imm-iter"]) - .observe(imms.len() as f64); + local_stats.staging_imm_iter_count = imms.len() as u64; for imm in imms { if imm.has_range_tombstone() && !read_options.ignore_range_tombstone { delete_range_iter.add_batch_iter(imm.delete_range_iter()); @@ -587,7 +547,7 @@ impl HummockVersionReader { let bloom_filter_prefix_hash = read_options .prefix_hint .as_ref() - .map(|hint| Sstable::hash_for_bloom_filter(hint)); + .map(|hint| Sstable::hash_for_bloom_filter(hint, read_options.table_id.table_id())); for sstable_info in &uncommitted_ssts { let table_holder = self @@ -614,10 +574,7 @@ impl HummockVersionReader { Arc::new(SstableIteratorReadOptions::default()), ))); } - self.state_store_metrics - .iter_merge_sstable_counts - .with_label_values(&[table_id_label, "staging-sst-iter"]) - .observe(staging_sst_iter_count as f64); + local_stats.staging_sst_iter_count = staging_sst_iter_count; let staging_iter: StagingDataIterator = OrderedMergeIteratorInner::new(staging_iters); // 2. build iterator from committed @@ -638,22 +595,10 @@ impl HummockVersionReader { } if level.level_type == LevelType::Nonoverlapping as i32 { - debug_assert!(can_concat(&level.table_infos)); - let start_table_idx = match encoded_user_key_range.start_bound() { - Included(key) | Excluded(key) => search_sst_idx(&level.table_infos, key), - _ => 0, - }; - let end_table_idx = match encoded_user_key_range.end_bound() { - Included(key) | Excluded(key) => search_sst_idx(&level.table_infos, key), - _ => level.table_infos.len().saturating_sub(1), - }; - assert!( - start_table_idx < level.table_infos.len() - && end_table_idx < level.table_infos.len() - ); + let table_infos = + prune_nonoverlapping_ssts(&level.table_infos, &encoded_user_key_range); - let fetch_meta_req = level.table_infos[start_table_idx..=end_table_idx] - .iter() + let fetch_meta_req = table_infos .filter(|sstable_info| { sstable_info .table_ids @@ -663,17 +608,16 @@ impl HummockVersionReader { .collect_vec(); fetch_meta_reqs.push((level.level_type, fetch_meta_req)); } else { - let table_infos = prune_ssts( - level.table_infos.iter(), + let table_infos = prune_overlapping_ssts( + &level.table_infos, read_options.table_id, &table_key_range, ); - if table_infos.is_empty() { - continue; - } // Overlapping - let fetch_meta_req = table_infos.into_iter().rev().collect_vec(); - fetch_meta_reqs.push((level.level_type, fetch_meta_req)); + let fetch_meta_req = table_infos.rev().collect_vec(); + if !fetch_meta_req.is_empty() { + fetch_meta_reqs.push((level.level_type, fetch_meta_req)); + } } } let mut flatten_reqs = vec![]; @@ -715,17 +659,17 @@ impl HummockVersionReader { flatten_resps.pop().unwrap().unwrap(); assert_eq!(sstable_info.id, sstable.value().id); local_stats.apply_meta_fetch(local_cache_meta_block_miss); - if let Some(key_hash) = bloom_filter_prefix_hash.as_ref() { - if !hit_sstable_bloom_filter(sstable.value(), *key_hash, &mut local_stats) { - continue; - } - } if !sstable.value().meta.range_tombstone_list.is_empty() && !read_options.ignore_range_tombstone { delete_range_iter .add_sst_iter(SstableDeleteRangeIterator::new(sstable.clone())); } + if let Some(key_hash) = bloom_filter_prefix_hash.as_ref() { + if !hit_sstable_bloom_filter(sstable.value(), *key_hash, &mut local_stats) { + continue; + } + } sstables.push(sstable); } @@ -741,18 +685,18 @@ impl HummockVersionReader { flatten_resps.pop().unwrap().unwrap(); assert_eq!(sstable_info.id, sstable.value().id); local_stats.apply_meta_fetch(local_cache_meta_block_miss); - if let Some(dist_hash) = bloom_filter_prefix_hash.as_ref() { - if !hit_sstable_bloom_filter(sstable.value(), *dist_hash, &mut local_stats) - { - continue; - } - } if !sstable.value().meta.range_tombstone_list.is_empty() && !read_options.ignore_range_tombstone { delete_range_iter .add_sst_iter(SstableDeleteRangeIterator::new(sstable.clone())); } + if let Some(dist_hash) = bloom_filter_prefix_hash.as_ref() { + if !hit_sstable_bloom_filter(sstable.value(), *dist_hash, &mut local_stats) + { + continue; + } + } iters.push(SstableIterator::new( sstable, self.sstable_store.clone(), @@ -763,15 +707,8 @@ impl HummockVersionReader { overlapping_iters.push(OrderedMergeIteratorInner::new(iters)); } } - - self.state_store_metrics - .iter_merge_sstable_counts - .with_label_values(&[table_id_label, "committed-overlapping-iter"]) - .observe(overlapping_iter_count as f64); - self.state_store_metrics - .iter_merge_sstable_counts - .with_label_values(&[table_id_label, "committed-non-overlapping-iter"]) - .observe(non_overlapping_iters.len() as f64); + local_stats.overlapping_iter_count = overlapping_iter_count; + local_stats.non_overlapping_iter_count = non_overlapping_iters.len() as u64; // 3. build user_iterator let merge_iter = UnorderedMergeIteratorInner::new( @@ -802,21 +739,138 @@ impl HummockVersionReader { .rewind() .in_span(Span::enter_with_local_parent("rewind")) .await?; - - local_stats.report_bloom_filter_metrics( - self.state_store_metrics.as_ref(), - "iter", - table_id_label, - user_iter.is_valid(), - ); - - local_stats.report(self.state_store_metrics.deref(), table_id_label); + local_stats.found_key = user_iter.is_valid(); Ok(HummockStorageIterator::new( user_iter, self.state_store_metrics.clone(), read_options.table_id, + local_stats, ) .into_stream()) } + + // Note: this method will not check the kv tomestones and delete range tomestones + pub async fn may_exist( + &self, + table_key_range: TableKeyRange, + read_options: ReadOptions, + read_version_tuple: (Vec, Vec, CommittedVersion), + ) -> StorageResult { + let table_id = read_options.table_id; + let mut table_counts = 0; + let (imms, uncommitted_ssts, committed_version) = read_version_tuple; + let mut stats_guard = + MayExistLocalMetricsGuard::new(self.state_store_metrics.clone(), table_id); + + // 1. check staging data + for imm in &imms { + if imm.range_exists(&table_key_range) { + return Ok(true); + } + } + + let user_key_range = bound_table_key_range(read_options.table_id, &table_key_range); + let encoded_user_key_range = ( + user_key_range.0.as_ref().map(UserKey::encode), + user_key_range.1.as_ref().map(UserKey::encode), + ); + let bloom_filter_prefix_hash = if let Some(prefix_hint) = read_options.prefix_hint { + Sstable::hash_for_bloom_filter(&prefix_hint, table_id.table_id) + } else { + // only use `table_key_range` to see whether all SSTs are filtered out + // without looking at bloom filter because prefix_hint is not provided + if !uncommitted_ssts.is_empty() { + // uncommitted_ssts is already pruned by `table_key_range` so no extra check is + // needed. + return Ok(true); + } + for level in committed_version.levels(table_id) { + match level.level_type() { + LevelType::Overlapping | LevelType::Unspecified => { + if prune_overlapping_ssts(&level.table_infos, table_id, &table_key_range) + .next() + .is_some() + { + return Ok(true); + } + } + LevelType::Nonoverlapping => { + if prune_nonoverlapping_ssts(&level.table_infos, &encoded_user_key_range) + .next() + .is_some() + { + return Ok(true); + } + } + } + } + return Ok(false); + }; + + // 2. order guarantee: imm -> sst + for local_sst in &uncommitted_ssts { + table_counts += 1; + if hit_sstable_bloom_filter( + self.sstable_store + .sstable(local_sst, &mut stats_guard.local_stats) + .await? + .value(), + bloom_filter_prefix_hash, + &mut stats_guard.local_stats, + ) { + return Ok(true); + } + } + + // 3. read from committed_version sst file + // Because SST meta records encoded key range, + // the filter key needs to be encoded as well. + assert!(committed_version.is_valid()); + for level in committed_version.levels(table_id) { + if level.table_infos.is_empty() { + continue; + } + match level.level_type() { + LevelType::Overlapping | LevelType::Unspecified => { + let sstable_infos = + prune_overlapping_ssts(&level.table_infos, table_id, &table_key_range); + for sstable_info in sstable_infos { + table_counts += 1; + if hit_sstable_bloom_filter( + self.sstable_store + .sstable(sstable_info, &mut stats_guard.local_stats) + .await? + .value(), + bloom_filter_prefix_hash, + &mut stats_guard.local_stats, + ) { + return Ok(true); + } + } + } + LevelType::Nonoverlapping => { + let table_infos = + prune_nonoverlapping_ssts(&level.table_infos, &encoded_user_key_range); + + for table_info in table_infos { + table_counts += 1; + if hit_sstable_bloom_filter( + self.sstable_store + .sstable(table_info, &mut stats_guard.local_stats) + .await? + .value(), + bloom_filter_prefix_hash, + &mut stats_guard.local_stats, + ) { + return Ok(true); + } + } + } + } + } + + stats_guard.local_stats.may_exist_check_sstable_count = table_counts; + Ok(false) + } } diff --git a/src/storage/src/hummock/test_utils.rs b/src/storage/src/hummock/test_utils.rs index 3eeec5937d48c..dbe8530da2e52 100644 --- a/src/storage/src/hummock/test_utils.rs +++ b/src/storage/src/hummock/test_utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,7 +18,6 @@ use bytes::Bytes; use futures::{Stream, TryStreamExt}; use itertools::Itertools; use risingwave_common::catalog::TableId; -use risingwave_common::config::StorageConfig; use risingwave_hummock_sdk::key::{FullKey, UserKey}; use risingwave_hummock_sdk::{HummockEpoch, HummockSstableId}; use risingwave_pb::hummock::{KeyRange, SstableInfo}; @@ -36,10 +35,11 @@ use crate::hummock::{ SstableStoreRef, SstableWriter, }; use crate::monitor::StoreLocalStatistic; +use crate::opts::StorageOpts; use crate::storage_value::StorageValue; -pub fn default_config_for_test() -> StorageConfig { - StorageConfig { +pub fn default_opts_for_test() -> StorageOpts { + StorageOpts { sstable_size_mb: 4, block_size_kb: 64, bloom_false_positive: 0.1, diff --git a/src/storage/src/hummock/tiered_cache.rs b/src/storage/src/hummock/tiered_cache.rs index 7b1b9e908351f..b7b27ae9ea1ed 100644 --- a/src/storage/src/hummock/tiered_cache.rs +++ b/src/storage/src/hummock/tiered_cache.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/utils.rs b/src/storage/src/hummock/utils.rs index 1cfc19c2cfe7c..5e153aa0e34f4 100644 --- a/src/storage/src/hummock/utils.rs +++ b/src/storage/src/hummock/utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,12 +14,13 @@ use std::cmp::Ordering; use std::fmt::{Debug, Formatter}; -use std::ops::Bound::{Excluded, Included, Unbounded}; +use std::ops::Bound::{self, Excluded, Included, Unbounded}; use std::ops::RangeBounds; use std::sync::atomic::{AtomicU64, Ordering as AtomicOrdering}; use std::sync::Arc; use risingwave_common::catalog::TableId; +use risingwave_hummock_sdk::can_concat; use risingwave_hummock_sdk::key::{bound_table_key_range, user_key, TableKey, UserKey}; use risingwave_pb::hummock::{HummockVersion, SstableInfo}; use tokio::sync::Notify; @@ -103,21 +104,6 @@ where .is_ok() } -/// Prune SSTs that does not overlap with a specific key range or does not overlap with a specific -/// vnode set. Returns the sst ids after pruning -pub fn prune_ssts<'a, R, B>( - ssts: impl Iterator, - table_id: TableId, - table_key_range: &R, -) -> Vec<&'a SstableInfo> -where - R: RangeBounds>, - B: AsRef<[u8]>, -{ - ssts.filter(|info| filter_single_sst(info, table_id, table_key_range)) - .collect() -} - /// Search the SST containing the specified key within a level, using binary search. pub(crate) fn search_sst_idx(ssts: &[SstableInfo], key: &B) -> usize where @@ -127,7 +113,39 @@ where let ord = user_key(&table.key_range.as_ref().unwrap().left).cmp(key.as_ref()); ord == Ordering::Less || ord == Ordering::Equal }) - .saturating_sub(1) // considering the boundary of 0 +} + +/// Prune overlapping SSTs that does not overlap with a specific key range or does not overlap with +/// a specific table id. Returns the sst ids after pruning. +pub fn prune_overlapping_ssts<'a, R, B>( + ssts: &'a [SstableInfo], + table_id: TableId, + table_key_range: &'a R, +) -> impl DoubleEndedIterator +where + R: RangeBounds>, + B: AsRef<[u8]>, +{ + ssts.iter() + .filter(move |info| filter_single_sst(info, table_id, table_key_range)) +} + +/// Prune non-overlapping SSTs that does not overlap with a specific key range or does not overlap +/// with a specific table id. Returns the sst ids after pruning. +pub fn prune_nonoverlapping_ssts<'a>( + ssts: &'a [SstableInfo], + encoded_user_key_range: &'a (Bound>, Bound>), +) -> impl DoubleEndedIterator { + debug_assert!(can_concat(ssts)); + let start_table_idx = match encoded_user_key_range.start_bound() { + Included(key) | Excluded(key) => search_sst_idx(ssts, key).saturating_sub(1), + _ => 0, + }; + let end_table_idx = match encoded_user_key_range.end_bound() { + Included(key) | Excluded(key) => search_sst_idx(ssts, key).saturating_sub(1), + _ => ssts.len().saturating_sub(1), + }; + ssts[start_table_idx..=end_table_idx].iter() } struct MemoryLimiterInner { diff --git a/src/storage/src/hummock/vacuum.rs b/src/storage/src/hummock/vacuum.rs index 8f01e0f6730ac..832502d24f139 100644 --- a/src/storage/src/hummock/vacuum.rs +++ b/src/storage/src/hummock/vacuum.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/validator.rs b/src/storage/src/hummock/validator.rs index 5d6d8980bca6d..5012750222c4f 100644 --- a/src/storage/src/hummock/validator.rs +++ b/src/storage/src/hummock/validator.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/hummock/value.rs b/src/storage/src/hummock/value.rs index 250d87095a9a0..70f32bba0e0c8 100644 --- a/src/storage/src/hummock/value.rs +++ b/src/storage/src/hummock/value.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/lib.rs b/src/storage/src/lib.rs index 014ab72abdf99..2edca6aef5268 100644 --- a/src/storage/src/lib.rs +++ b/src/storage/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -42,6 +42,7 @@ #![feature(provide_any)] #![feature(let_chains)] #![feature(associated_type_bounds)] +#![feature(local_key_cell_methods)] pub mod hummock; pub mod memory; @@ -52,6 +53,7 @@ pub mod storage_value; #[macro_use] pub mod store; pub mod error; +pub mod opts; pub mod store_impl; pub mod table; pub mod write_batch; diff --git a/src/storage/src/mem_table.rs b/src/storage/src/mem_table.rs index 9a7bcff98e08d..d9b23de733f4c 100644 --- a/src/storage/src/mem_table.rs +++ b/src/storage/src/mem_table.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,18 +11,29 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + +use std::cmp::Ordering; use std::collections::btree_map::Entry; use std::collections::BTreeMap; +use std::mem::swap; use std::ops::RangeBounds; use bytes::Bytes; +use futures::{pin_mut, StreamExt}; +use futures_async_stream::try_stream; +use risingwave_common::catalog::TableId; use risingwave_common::row::RowDeserializer; +use risingwave_hummock_sdk::key::{FullKey, TableKey}; use thiserror::Error; +use crate::error::StorageError; +use crate::store::*; + #[derive(Clone, Debug)] pub enum KeyOp { Insert(Bytes), Delete(Bytes), + /// (old_value, new_value) Update((Bytes, Bytes)), } @@ -30,31 +41,31 @@ pub enum KeyOp { #[derive(Clone)] pub struct MemTable { buffer: BTreeMap, + is_consistent_op: bool, } -pub type MemTableIter<'a> = impl Iterator; - #[derive(Error, Debug)] pub enum MemTableError { - #[error("conflicted row operations on same key")] - Conflict { key: Bytes, prev: KeyOp, new: KeyOp }, + #[error("Inconsistent operation")] + InconsistentOperation { key: Bytes, prev: KeyOp, new: KeyOp }, } type Result = std::result::Result>; -impl Default for MemTable { - fn default() -> Self { - Self::new() - } -} - impl MemTable { - pub fn new() -> Self { + pub fn new(is_consistent_op: bool) -> Self { Self { buffer: BTreeMap::new(), + is_consistent_op, } } + pub fn drain(&mut self) -> Self { + let mut temp = Self::new(self.is_consistent_op); + swap(&mut temp, self); + temp + } + pub fn is_dirty(&self) -> bool { !self.buffer.is_empty() } @@ -66,6 +77,10 @@ impl MemTable { /// write methods pub fn insert(&mut self, pk: Bytes, value: Bytes) -> Result<()> { + if !self.is_consistent_op { + self.buffer.insert(pk, KeyOp::Insert(value)); + return Ok(()); + } let entry = self.buffer.entry(pk); match entry { Entry::Vacant(e) => { @@ -78,7 +93,7 @@ impl MemTable { e.insert(KeyOp::Update((old_val, value))); Ok(()) } - _ => Err(MemTableError::Conflict { + KeyOp::Insert(_) | KeyOp::Update(_) => Err(MemTableError::InconsistentOperation { key: e.key().clone(), prev: e.get().clone(), new: KeyOp::Insert(value), @@ -89,6 +104,10 @@ impl MemTable { } pub fn delete(&mut self, pk: Bytes, old_value: Bytes) -> Result<()> { + if !self.is_consistent_op { + self.buffer.insert(pk, KeyOp::Delete(old_value)); + return Ok(()); + } let entry = self.buffer.entry(pk); match entry { Entry::Vacant(e) => { @@ -97,11 +116,17 @@ impl MemTable { } Entry::Occupied(mut e) => match e.get_mut() { KeyOp::Insert(original_value) => { - debug_assert_eq!(original_value, &old_value); + if ENABLE_SANITY_CHECK && original_value != &old_value { + return Err(Box::new(MemTableError::InconsistentOperation { + key: e.key().clone(), + prev: e.get().clone(), + new: KeyOp::Delete(old_value), + })); + } e.remove(); Ok(()) } - KeyOp::Delete(_) => Err(MemTableError::Conflict { + KeyOp::Delete(_) => Err(MemTableError::InconsistentOperation { key: e.key().clone(), prev: e.get().clone(), new: KeyOp::Delete(old_value), @@ -109,7 +134,13 @@ impl MemTable { .into()), KeyOp::Update(value) => { let (original_old_value, original_new_value) = std::mem::take(value); - debug_assert_eq!(original_new_value, old_value); + if ENABLE_SANITY_CHECK && original_new_value != old_value { + return Err(Box::new(MemTableError::InconsistentOperation { + key: e.key().clone(), + prev: e.get().clone(), + new: KeyOp::Delete(old_value), + })); + } e.insert(KeyOp::Delete(original_old_value)); Ok(()) } @@ -118,6 +149,11 @@ impl MemTable { } pub fn update(&mut self, pk: Bytes, old_value: Bytes, new_value: Bytes) -> Result<()> { + if !self.is_consistent_op { + self.buffer + .insert(pk, KeyOp::Update((old_value, new_value))); + return Ok(()); + } let entry = self.buffer.entry(pk); match entry { Entry::Vacant(e) => { @@ -125,23 +161,24 @@ impl MemTable { Ok(()) } Entry::Occupied(mut e) => match e.get_mut() { - KeyOp::Insert(original_value) => { - debug_assert_eq!(original_value, &old_value); - e.insert(KeyOp::Insert(new_value)); + KeyOp::Insert(ref mut original_new_value) + | KeyOp::Update((_, ref mut original_new_value)) => { + if ENABLE_SANITY_CHECK && original_new_value != &old_value { + return Err(Box::new(MemTableError::InconsistentOperation { + key: e.key().clone(), + prev: e.get().clone(), + new: KeyOp::Update((old_value, new_value)), + })); + } + *original_new_value = new_value; Ok(()) } - KeyOp::Delete(_) => Err(MemTableError::Conflict { + KeyOp::Delete(_) => Err(MemTableError::InconsistentOperation { key: e.key().clone(), prev: e.get().clone(), new: KeyOp::Update((old_value, new_value)), } .into()), - KeyOp::Update(value) => { - let (original_old_value, original_new_value) = std::mem::take(value); - debug_assert_eq!(original_new_value, old_value); - e.insert(KeyOp::Update((original_old_value, new_value))); - Ok(()) - } }, } } @@ -150,7 +187,7 @@ impl MemTable { self.buffer } - pub fn iter<'a, R>(&'a self, key_range: R) -> MemTableIter<'a> + pub fn iter<'a, R>(&'a self, key_range: R) -> impl Iterator where R: RangeBounds + 'a, { @@ -182,3 +219,91 @@ impl KeyOp { } } } + +#[try_stream(ok = StateStoreIterItem, error = StorageError)] +pub async fn merge_stream<'a>( + mem_table_iter: impl Iterator + 'a, + inner_stream: impl StateStoreReadIterStream, + table_id: TableId, + epoch: u64, +) { + let inner_stream = inner_stream.peekable(); + pin_mut!(inner_stream); + + let mut mem_table_iter = mem_table_iter.fuse().peekable(); + + loop { + match (inner_stream.as_mut().peek().await, mem_table_iter.peek()) { + (None, None) => break, + // The mem table side has come to an end, return data from the shared storage. + (Some(_), None) => { + let (key, value) = inner_stream.next().await.unwrap()?; + yield (key, value) + } + // The stream side has come to an end, return data from the mem table. + (None, Some(_)) => { + let (key, key_op) = mem_table_iter.next().unwrap(); + match key_op { + KeyOp::Insert(value) | KeyOp::Update((_, value)) => { + yield ( + FullKey::new(table_id, TableKey(key.clone()), epoch), + value.clone(), + ) + } + _ => {} + } + } + (Some(Ok((inner_key, _))), Some((mem_table_key, _))) => { + debug_assert_eq!(inner_key.user_key.table_id, table_id); + match inner_key.user_key.table_key.0.cmp(mem_table_key) { + Ordering::Less => { + // yield data from storage + let (key, value) = inner_stream.next().await.unwrap()?; + yield (key, value); + } + Ordering::Equal => { + // both memtable and storage contain the key, so we advance both + // iterators and return the data in memory. + + let (_, key_op) = mem_table_iter.next().unwrap(); + let (key, old_value_in_inner) = inner_stream.next().await.unwrap()?; + match key_op { + KeyOp::Insert(value) => { + yield (key.clone(), value.clone()); + } + KeyOp::Delete(_) => {} + KeyOp::Update((old_value, new_value)) => { + debug_assert!(old_value == &old_value_in_inner); + + yield (key, new_value.clone()); + } + } + } + Ordering::Greater => { + // yield data from mem table + let (key, key_op) = mem_table_iter.next().unwrap(); + + match key_op { + KeyOp::Insert(value) => { + yield ( + FullKey::new(table_id, TableKey(key.clone()), epoch), + value.clone(), + ); + } + KeyOp::Delete(_) => {} + KeyOp::Update(_) => unreachable!( + "memtable update should always be paired with a storage key" + ), + } + } + } + } + (Some(Err(_)), Some(_)) => { + // Throw the error. + return Err(inner_stream.next().await.unwrap().unwrap_err()); + } + } + } +} + +const ENABLE_SANITY_CHECK: bool = cfg!(debug_assertions); diff --git a/src/storage/src/memory.rs b/src/storage/src/memory.rs index 7afdb3486b766..625c601ba74ee 100644 --- a/src/storage/src/memory.rs +++ b/src/storage/src/memory.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -28,8 +28,8 @@ use crate::error::StorageResult; use crate::storage_value::StorageValue; use crate::store::*; use crate::{ - define_state_store_associated_type, define_state_store_read_associated_type, - define_state_store_write_associated_type, + define_local_state_store_associated_type, define_state_store_associated_type, + define_state_store_read_associated_type, define_state_store_write_associated_type, }; pub type BytesFullKey = FullKey; @@ -604,7 +604,17 @@ impl StateStoreWrite for RangeKvStateStore { } } -impl LocalStateStore for RangeKvStateStore {} +impl LocalStateStore for RangeKvStateStore { + define_local_state_store_associated_type!(); + + fn may_exist( + &self, + _key_range: (Bound>, Bound>), + _read_options: ReadOptions, + ) -> Self::MayExistFuture<'_> { + async move { Ok(true) } + } +} impl StateStore for RangeKvStateStore { type Local = Self; diff --git a/src/storage/src/monitor/compactor_metrics.rs b/src/storage/src/monitor/compactor_metrics.rs index fb5fb240843a3..9aafd7b39e8df 100644 --- a/src/storage/src/monitor/compactor_metrics.rs +++ b/src/storage/src/monitor/compactor_metrics.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/monitor/hummock_metrics.rs b/src/storage/src/monitor/hummock_metrics.rs index 3135442cf5666..4bfe47219cbec 100644 --- a/src/storage/src/monitor/hummock_metrics.rs +++ b/src/storage/src/monitor/hummock_metrics.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/monitor/hummock_state_store_metrics.rs b/src/storage/src/monitor/hummock_state_store_metrics.rs index 71a5a35aa8637..82ba5e99391fd 100644 --- a/src/storage/src/monitor/hummock_state_store_metrics.rs +++ b/src/storage/src/monitor/hummock_state_store_metrics.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/monitor/local_metrics.rs b/src/storage/src/monitor/local_metrics.rs index a69f3e9d281df..b77647b4f9e8f 100644 --- a/src/storage/src/monitor/local_metrics.rs +++ b/src/storage/src/monitor/local_metrics.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,14 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cell::RefCell; +use std::collections::HashMap; #[cfg(all(debug_assertions, not(any(madsim, test, feature = "test"))))] use std::sync::atomic::AtomicBool; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; +use prometheus::core::GenericLocalCounter; +use prometheus::local::LocalHistogram; +use risingwave_common::catalog::TableId; + use super::HummockStateStoreMetrics; use crate::monitor::CompactorMetrics; +thread_local!(static LOCAL_METRICS: RefCell> = RefCell::new(HashMap::default())); + +macro_rules! inc_local_metrics { + ($self:ident, $metrics: ident, $($x:ident),*) => {{ + $( + $metrics.$x.inc_by($self.$x); + )* + }} +} + #[derive(Default, Debug)] pub struct StoreLocalStatistic { pub cache_data_block_miss: u64, @@ -32,10 +48,17 @@ pub struct StoreLocalStatistic { pub skip_multi_version_key_count: u64, pub skip_delete_key_count: u64, pub processed_key_count: u64, - pub bloom_filter_true_negative_count: u64, + pub bloom_filter_true_negative_counts: u64, pub remote_io_time: Arc, pub bloom_filter_check_counts: u64, pub get_shared_buffer_hit_counts: u64, + pub staging_imm_iter_count: u64, + pub staging_sst_iter_count: u64, + pub overlapping_iter_count: u64, + pub non_overlapping_iter_count: u64, + pub may_exist_check_sstable_count: u64, + pub sub_iter_count: u64, + pub found_key: bool, #[cfg(all(debug_assertions, not(any(madsim, test, feature = "test"))))] reported: AtomicBool, @@ -54,7 +77,7 @@ impl StoreLocalStatistic { self.skip_multi_version_key_count += other.skip_multi_version_key_count; self.skip_delete_key_count += other.skip_delete_key_count; self.processed_key_count += other.processed_key_count; - self.bloom_filter_true_negative_count += other.bloom_filter_true_negative_count; + self.bloom_filter_true_negative_counts += other.bloom_filter_true_negative_counts; self.remote_io_time.fetch_add( other.remote_io_time.load(Ordering::Relaxed), Ordering::Relaxed, @@ -74,78 +97,45 @@ impl StoreLocalStatistic { self.cache_meta_block_miss += local_cache_meta_block_miss; } - pub fn report(&self, metrics: &HummockStateStoreMetrics, table_id_label: &str) { - if self.cache_data_block_total > 0 { - metrics - .sst_store_block_request_counts - .with_label_values(&[table_id_label, "data_total"]) - .inc_by(self.cache_data_block_total); - } - - if self.cache_data_block_miss > 0 { - metrics - .sst_store_block_request_counts - .with_label_values(&[table_id_label, "data_miss"]) - .inc_by(self.cache_data_block_miss); - } - - if self.cache_meta_block_total > 0 { - metrics - .sst_store_block_request_counts - .with_label_values(&[table_id_label, "meta_total"]) - .inc_by(self.cache_meta_block_total); - } - - if self.cache_meta_block_miss > 0 { - metrics - .sst_store_block_request_counts - .with_label_values(&[table_id_label, "meta_miss"]) - .inc_by(self.cache_meta_block_miss); - } - + fn report(&self, metrics: &mut LocalStoreMetrics) { + inc_local_metrics!( + self, + metrics, + cache_data_block_total, + cache_data_block_miss, + cache_meta_block_total, + cache_meta_block_miss, + skip_multi_version_key_count, + skip_delete_key_count, + get_shared_buffer_hit_counts, + total_key_count, + processed_key_count + ); let t = self.remote_io_time.load(Ordering::Relaxed) as f64; if t > 0.0 { - metrics - .remote_read_time - .with_label_values(&[table_id_label]) - .observe(t / 1000.0); - } - - if self.processed_key_count > 0 { - metrics - .iter_scan_key_counts - .with_label_values(&[table_id_label, "processed"]) - .inc_by(self.processed_key_count); - } - - if self.skip_multi_version_key_count > 0 { - metrics - .iter_scan_key_counts - .with_label_values(&[table_id_label, "skip_multi_version"]) - .inc_by(self.skip_multi_version_key_count); - } - - if self.skip_delete_key_count > 0 { - metrics - .iter_scan_key_counts - .with_label_values(&[table_id_label, "skip_delete"]) - .inc_by(self.skip_delete_key_count); - } - - if self.total_key_count > 0 { - metrics - .iter_scan_key_counts - .with_label_values(&[table_id_label, "total"]) - .inc_by(self.total_key_count); + metrics.remote_io_time.observe(t / 1000.0); } + metrics + .staging_imm_iter_count + .observe(self.staging_imm_iter_count as f64); + metrics + .staging_sst_iter_count + .observe(self.staging_sst_iter_count as f64); + metrics + .overlapping_iter_count + .observe(self.overlapping_iter_count as f64); + metrics + .non_overlapping_iter_count + .observe(self.non_overlapping_iter_count as f64); + metrics + .may_exist_check_sstable_count + .observe(self.may_exist_check_sstable_count as f64); - if self.get_shared_buffer_hit_counts > 0 { - metrics - .get_shared_buffer_hit_counts - .with_label_values(&[table_id_label]) - .inc_by(self.get_shared_buffer_hit_counts); + metrics.collect_count += 1; + if metrics.collect_count > FLUSH_LOCAL_METRICS_TIMES { + metrics.flush(); + metrics.collect_count = 0; } - #[cfg(all(debug_assertions, not(any(madsim, test, feature = "test"))))] if self.reported.fetch_or(true, Ordering::Relaxed) || self.added.load(Ordering::Relaxed) { tracing::error!("double reported\n{:#?}", self); @@ -191,56 +181,39 @@ impl StoreLocalStatistic { } } - pub fn report_bloom_filter_metrics( - &self, - metrics: &HummockStateStoreMetrics, - oper_type: &str, - table_id_label: &str, - is_non_existent_key: bool, - ) { + fn report_bloom_filter_metrics(&self, metrics: &mut BloomFilterLocalMetrics) { if self.bloom_filter_check_counts == 0 { return; } - // checks SST bloom filters - metrics - .bloom_filter_check_counts - .with_label_values(&[table_id_label, oper_type]) - .inc_by(self.bloom_filter_check_counts); - - metrics - .read_req_check_bloom_filter_counts - .with_label_values(&[table_id_label, oper_type]) - .inc(); + inc_local_metrics!(self, metrics, bloom_filter_true_negative_counts); - if self.bloom_filter_true_negative_count > 0 { - // true negative - metrics - .bloom_filter_true_negative_counts - .with_label_values(&[table_id_label, oper_type]) - .inc_by(self.bloom_filter_true_negative_count); - } + metrics.read_req_check_bloom_filter_counts.inc(); - if self.bloom_filter_check_counts > self.bloom_filter_true_negative_count { - if is_non_existent_key { + if self.bloom_filter_check_counts > self.bloom_filter_true_negative_counts { + if !self.found_key { // false positive // checks SST bloom filters (at least one bloom filter return true) but returns // nothing - metrics - .read_req_positive_but_non_exist_counts - .with_label_values(&[table_id_label, oper_type]) - .inc(); - } else { - // true positive - // checks SST bloom filters and at least one bloom filter returns true - metrics - .read_req_bloom_filter_positive_counts - .with_label_values(&[table_id_label, oper_type]) - .inc(); + metrics.read_req_positive_but_non_exist_counts.inc(); } + // positive + // checks SST bloom filters and at least one bloom filter returns positive + metrics.read_req_bloom_filter_positive_counts.inc(); } } + pub fn flush_all() { + LOCAL_METRICS.with_borrow_mut(|local_metrics| { + for (_, metrics) in local_metrics.iter_mut() { + if metrics.collect_count > 0 { + metrics.flush(); + metrics.collect_count = 0; + } + } + }); + } + pub fn ignore(&self) { #[cfg(all(debug_assertions, not(any(madsim, test, feature = "test"))))] self.reported.store(true, Ordering::Relaxed); @@ -255,7 +228,7 @@ impl StoreLocalStatistic { || self.skip_multi_version_key_count != 0 || self.skip_delete_key_count != 0 || self.processed_key_count != 0 - || self.bloom_filter_true_negative_count != 0 + || self.bloom_filter_true_negative_counts != 0 || self.remote_io_time.load(Ordering::Relaxed) != 0 || self.bloom_filter_check_counts != 0 } @@ -272,3 +245,278 @@ impl Drop for StoreLocalStatistic { } } } + +struct LocalStoreMetrics { + cache_data_block_total: GenericLocalCounter, + cache_data_block_miss: GenericLocalCounter, + cache_meta_block_total: GenericLocalCounter, + cache_meta_block_miss: GenericLocalCounter, + remote_io_time: LocalHistogram, + processed_key_count: GenericLocalCounter, + skip_multi_version_key_count: GenericLocalCounter, + skip_delete_key_count: GenericLocalCounter, + total_key_count: GenericLocalCounter, + get_shared_buffer_hit_counts: GenericLocalCounter, + staging_imm_iter_count: LocalHistogram, + staging_sst_iter_count: LocalHistogram, + overlapping_iter_count: LocalHistogram, + non_overlapping_iter_count: LocalHistogram, + may_exist_check_sstable_count: LocalHistogram, + iter_filter_metrics: BloomFilterLocalMetrics, + get_filter_metrics: BloomFilterLocalMetrics, + may_exist_filter_metrics: BloomFilterLocalMetrics, + collect_count: usize, +} + +const FLUSH_LOCAL_METRICS_TIMES: usize = 32; + +impl LocalStoreMetrics { + pub fn new(metrics: &HummockStateStoreMetrics, table_id_label: &str) -> Self { + let cache_data_block_total = metrics + .sst_store_block_request_counts + .with_label_values(&[table_id_label, "data_total"]) + .local(); + + let cache_data_block_miss = metrics + .sst_store_block_request_counts + .with_label_values(&[table_id_label, "data_miss"]) + .local(); + + let cache_meta_block_total = metrics + .sst_store_block_request_counts + .with_label_values(&[table_id_label, "meta_total"]) + .local(); + + let cache_meta_block_miss = metrics + .sst_store_block_request_counts + .with_label_values(&[table_id_label, "meta_miss"]) + .local(); + + let remote_io_time = metrics + .remote_read_time + .with_label_values(&[table_id_label]) + .local(); + + let processed_key_count = metrics + .iter_scan_key_counts + .with_label_values(&[table_id_label, "processed"]) + .local(); + + let skip_multi_version_key_count = metrics + .iter_scan_key_counts + .with_label_values(&[table_id_label, "skip_multi_version"]) + .local(); + + let skip_delete_key_count = metrics + .iter_scan_key_counts + .with_label_values(&[table_id_label, "skip_delete"]) + .local(); + + let total_key_count = metrics + .iter_scan_key_counts + .with_label_values(&[table_id_label, "total"]) + .local(); + + let get_shared_buffer_hit_counts = metrics + .get_shared_buffer_hit_counts + .with_label_values(&[table_id_label]) + .local(); + + let staging_imm_iter_count = metrics + .iter_merge_sstable_counts + .with_label_values(&[table_id_label, "staging-imm-iter"]) + .local(); + let staging_sst_iter_count = metrics + .iter_merge_sstable_counts + .with_label_values(&[table_id_label, "staging-sst-iter"]) + .local(); + let overlapping_iter_count = metrics + .iter_merge_sstable_counts + .with_label_values(&[table_id_label, "committed-overlapping-iter"]) + .local(); + let non_overlapping_iter_count = metrics + .iter_merge_sstable_counts + .with_label_values(&[table_id_label, "committed-non-overlapping-iter"]) + .local(); + let may_exist_check_sstable_count = metrics + .iter_merge_sstable_counts + .with_label_values(&[table_id_label, "may-exist-check-sstable"]) + .local(); + let get_filter_metrics = BloomFilterLocalMetrics::new(metrics, table_id_label, "get"); + let iter_filter_metrics = BloomFilterLocalMetrics::new(metrics, table_id_label, "iter"); + let may_exist_filter_metrics = + BloomFilterLocalMetrics::new(metrics, table_id_label, "may_exist"); + Self { + cache_data_block_total, + cache_data_block_miss, + cache_meta_block_total, + cache_meta_block_miss, + remote_io_time, + processed_key_count, + skip_multi_version_key_count, + skip_delete_key_count, + total_key_count, + get_shared_buffer_hit_counts, + staging_imm_iter_count, + staging_sst_iter_count, + overlapping_iter_count, + non_overlapping_iter_count, + may_exist_check_sstable_count, + get_filter_metrics, + iter_filter_metrics, + may_exist_filter_metrics, + collect_count: 0, + } + } + + pub fn flush(&mut self) { + self.cache_data_block_total.flush(); + self.cache_data_block_miss.flush(); + self.cache_meta_block_total.flush(); + self.cache_meta_block_miss.flush(); + self.remote_io_time.flush(); + self.skip_multi_version_key_count.flush(); + self.skip_delete_key_count.flush(); + self.get_shared_buffer_hit_counts.flush(); + self.total_key_count.flush(); + self.processed_key_count.flush(); + self.iter_filter_metrics.flush(); + self.get_filter_metrics.flush(); + } +} + +macro_rules! define_bloom_filter_metrics { + ($($x:ident),*) => ( + struct BloomFilterLocalMetrics { + $($x: GenericLocalCounter,)* + } + + impl BloomFilterLocalMetrics { + pub fn new(metrics: &HummockStateStoreMetrics, table_id_label: &str, oper_type: &str) -> Self { + // checks SST bloom filters + Self { + $($x: metrics.$x.with_label_values(&[table_id_label, oper_type]).local(),)* + } + } + + pub fn flush(&mut self) { + $( + self.$x.flush(); + )* + } + } + ) +} + +define_bloom_filter_metrics!( + read_req_check_bloom_filter_counts, + bloom_filter_true_negative_counts, + read_req_positive_but_non_exist_counts, + read_req_bloom_filter_positive_counts +); + +pub struct GetLocalMetricsGuard { + metrics: Arc, + table_id: TableId, + pub local_stats: StoreLocalStatistic, +} + +impl GetLocalMetricsGuard { + pub fn new(metrics: Arc, table_id: TableId) -> Self { + Self { + metrics, + table_id, + local_stats: StoreLocalStatistic::default(), + } + } +} + +impl Drop for GetLocalMetricsGuard { + fn drop(&mut self) { + LOCAL_METRICS.with_borrow_mut(|local_metrics| { + let table_metrics = local_metrics + .entry(self.table_id.table_id) + .or_insert_with(|| { + LocalStoreMetrics::new( + self.metrics.as_ref(), + self.table_id.to_string().as_str(), + ) + }); + self.local_stats.report(table_metrics); + self.local_stats + .report_bloom_filter_metrics(&mut table_metrics.get_filter_metrics); + }); + } +} + +pub struct IterLocalMetricsGuard { + metrics: Arc, + table_id: TableId, + pub local_stats: StoreLocalStatistic, +} + +impl IterLocalMetricsGuard { + pub fn new( + metrics: Arc, + table_id: TableId, + local_stats: StoreLocalStatistic, + ) -> Self { + Self { + metrics, + table_id, + local_stats, + } + } +} + +impl Drop for IterLocalMetricsGuard { + fn drop(&mut self) { + LOCAL_METRICS.with_borrow_mut(|local_metrics| { + let table_metrics = local_metrics + .entry(self.table_id.table_id) + .or_insert_with(|| { + LocalStoreMetrics::new( + self.metrics.as_ref(), + self.table_id.to_string().as_str(), + ) + }); + self.local_stats.report(table_metrics); + self.local_stats + .report_bloom_filter_metrics(&mut table_metrics.iter_filter_metrics); + }); + } +} + +pub struct MayExistLocalMetricsGuard { + metrics: Arc, + table_id: TableId, + pub local_stats: StoreLocalStatistic, +} + +impl MayExistLocalMetricsGuard { + pub fn new(metrics: Arc, table_id: TableId) -> Self { + Self { + metrics, + table_id, + local_stats: StoreLocalStatistic::default(), + } + } +} + +impl Drop for MayExistLocalMetricsGuard { + fn drop(&mut self) { + LOCAL_METRICS.with_borrow_mut(|local_metrics| { + let table_metrics = local_metrics + .entry(self.table_id.table_id) + .or_insert_with(|| { + LocalStoreMetrics::new( + self.metrics.as_ref(), + self.table_id.to_string().as_str(), + ) + }); + self.local_stats.report(table_metrics); + self.local_stats + .report_bloom_filter_metrics(&mut table_metrics.may_exist_filter_metrics); + }); + } +} diff --git a/src/storage/src/monitor/mod.rs b/src/storage/src/monitor/mod.rs index cf0fcf80c6ad8..079b9bb2391df 100644 --- a/src/storage/src/monitor/mod.rs +++ b/src/storage/src/monitor/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/monitor/monitored_storage_metrics.rs b/src/storage/src/monitor/monitored_storage_metrics.rs index 9abaf9e13917f..922fa0b662f76 100644 --- a/src/storage/src/monitor/monitored_storage_metrics.rs +++ b/src/storage/src/monitor/monitored_storage_metrics.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ use prometheus::core::{AtomicU64, GenericCounterVec}; use prometheus::{ - exponential_buckets, histogram_opts, register_histogram_vec_with_registry, + exponential_buckets, histogram_opts, linear_buckets, register_histogram_vec_with_registry, register_histogram_with_registry, register_int_counter_vec_with_registry, Histogram, HistogramVec, Registry, }; @@ -29,6 +29,7 @@ pub struct MonitoredStorageMetrics { pub iter_item: HistogramVec, pub iter_duration: HistogramVec, pub iter_scan_duration: HistogramVec, + pub may_exist_duration: HistogramVec, pub iter_in_process_counts: GenericCounterVec, pub write_batch_tuple_counts: GenericCounterVec, @@ -58,10 +59,16 @@ impl MonitoredStorageMetrics { let get_value_size = register_histogram_vec_with_registry!(opts, &["table_id"], registry).unwrap(); + let mut buckets = exponential_buckets(0.000004, 2.0, 4).unwrap(); // 4 ~ 32us + buckets.extend(linear_buckets(0.00006, 0.00004, 5).unwrap()); // 60 ~ 220us. + buckets.extend(linear_buckets(0.0003, 0.0001, 3).unwrap()); // 300 ~ 500us. + buckets.extend(exponential_buckets(0.001, 2.0, 5).unwrap()); // 1 ~ 16ms. + buckets.extend(exponential_buckets(0.05, 4.0, 5).unwrap()); // 0.05 ~ 1.28s. + buckets.push(16.0); // 16s let get_duration_opts = histogram_opts!( "state_store_get_duration", "Total latency of get that have been issued to state store", - exponential_buckets(0.00001, 2.0, 21).unwrap() // max 10s + buckets.clone(), ); let get_duration = register_histogram_vec_with_registry!(get_duration_opts, &["table_id"], registry) @@ -86,7 +93,7 @@ impl MonitoredStorageMetrics { let opts = histogram_opts!( "state_store_iter_duration", "Histogram of iterator scan and initialization time that have been issued to state store", - exponential_buckets(0.0001, 2.0, 21).unwrap() // max 104s + buckets.clone(), ); let iter_duration = register_histogram_vec_with_registry!(opts, &["table_id"], registry).unwrap(); @@ -94,7 +101,7 @@ impl MonitoredStorageMetrics { let opts = histogram_opts!( "state_store_iter_scan_duration", "Histogram of iterator scan time that have been issued to state store", - exponential_buckets(0.0001, 2.0, 21).unwrap() // max 104s + buckets.clone(), ); let iter_scan_duration = register_histogram_vec_with_registry!(opts, &["table_id"], registry).unwrap(); @@ -107,6 +114,14 @@ impl MonitoredStorageMetrics { ) .unwrap(); + let opts = histogram_opts!( + "state_store_may_exist_duration", + "Histogram of may exist time that have been issued to state store", + buckets, + ); + let may_exist_duration = + register_histogram_vec_with_registry!(opts, &["table_id"], registry).unwrap(); + // ----- write_batch ----- let write_batch_tuple_counts = register_int_counter_vec_with_registry!( "state_store_write_batch_tuple_counts", @@ -154,6 +169,7 @@ impl MonitoredStorageMetrics { iter_item, iter_duration, iter_scan_duration, + may_exist_duration, iter_in_process_counts, write_batch_tuple_counts, write_batch_duration, diff --git a/src/storage/src/monitor/monitored_store.rs b/src/storage/src/monitor/monitored_store.rs index ecc4fb8408056..8a238989e090e 100644 --- a/src/storage/src/monitor/monitored_store.rs +++ b/src/storage/src/monitor/monitored_store.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -30,8 +30,8 @@ use crate::hummock::{HummockStorage, SstableIdManagerRef}; use crate::storage_value::StorageValue; use crate::store::*; use crate::{ - define_state_store_associated_type, define_state_store_read_associated_type, - define_state_store_write_associated_type, + define_local_state_store_associated_type, define_state_store_associated_type, + define_state_store_read_associated_type, define_state_store_write_associated_type, }; /// A state store wrapper for monitoring metrics. @@ -68,6 +68,10 @@ impl MonitoredStateStore { .await .inspect_err(|e| error!("Failed in iter: {:?}", e))?; + self.storage_metrics + .iter_duration + .with_label_values(&[table_id_label.as_str()]) + .observe(start_time.elapsed().as_secs_f64()); // statistics of iter in process count to estimate the read ops in the same time self.storage_metrics .iter_in_process_counts @@ -80,7 +84,6 @@ impl MonitoredStateStore { stats: MonitoredStateStoreIterStats { total_items: 0, total_size: 0, - start_time, scan_time: minstant::Instant::now(), storage_metrics: self.storage_metrics.clone(), table_id, @@ -185,7 +188,27 @@ impl StateStoreWrite for MonitoredStateStore { } } -impl LocalStateStore for MonitoredStateStore {} +impl LocalStateStore for MonitoredStateStore { + define_local_state_store_associated_type!(); + + fn may_exist( + &self, + key_range: (Bound>, Bound>), + read_options: ReadOptions, + ) -> Self::MayExistFuture<'_> { + async move { + let table_id_label = read_options.table_id.to_string(); + let timer = self + .storage_metrics + .may_exist_duration + .with_label_values(&[table_id_label.as_str()]) + .start_timer(); + let res = self.inner.may_exist(key_range, read_options).await; + timer.observe_duration(); + res + } + } +} impl StateStore for MonitoredStateStore { type Local = MonitoredStateStore; @@ -279,7 +302,6 @@ pub struct MonitoredStateStoreIter { struct MonitoredStateStoreIterStats { total_items: usize, total_size: usize, - start_time: minstant::Instant, scan_time: minstant::Instant, storage_metrics: Arc, @@ -311,10 +333,6 @@ impl Drop for MonitoredStateStoreIterStats { fn drop(&mut self) { let table_id_label = self.table_id.to_string(); - self.storage_metrics - .iter_duration - .with_label_values(&[table_id_label.as_str()]) - .observe(self.start_time.elapsed().as_secs_f64()); self.storage_metrics .iter_scan_duration .with_label_values(&[table_id_label.as_str()]) diff --git a/src/storage/src/opts.rs b/src/storage/src/opts.rs new file mode 100644 index 0000000000000..83b721445c5ea --- /dev/null +++ b/src/storage/src/opts.rs @@ -0,0 +1,126 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_common::config::RwConfig; +use risingwave_pb::meta::SystemParams; +use risingwave_rpc_client::SystemParamsReader; + +#[derive(Clone, Debug)] +pub struct StorageOpts { + /// Target size of the Sstable. + pub sstable_size_mb: u32, + /// Size of each block in bytes in SST. + pub block_size_kb: u32, + /// False positive probability of bloom filter. + pub bloom_false_positive: f64, + /// parallelism while syncing share buffers into L0 SST. Should NOT be 0. + pub share_buffers_sync_parallelism: u32, + /// Worker threads number of dedicated tokio runtime for share buffer compaction. 0 means use + /// tokio's default value (number of CPU core). + pub share_buffer_compaction_worker_threads_number: u32, + /// Maximum shared buffer size, writes attempting to exceed the capacity will stall until there + /// is enough space. + pub shared_buffer_capacity_mb: usize, + /// Remote directory for storing data and metadata objects. + pub data_directory: String, + /// Whether to enable write conflict detection + pub write_conflict_detection_enabled: bool, + /// Capacity of sstable block cache. + pub block_cache_capacity_mb: usize, + /// Capacity of sstable meta cache. + pub meta_cache_capacity_mb: usize, + pub disable_remote_compactor: bool, + pub enable_local_spill: bool, + /// Local object store root. We should call `get_local_object_store` to get the object store. + pub local_object_store: String, + /// Number of tasks shared buffer can upload in parallel. + pub share_buffer_upload_concurrency: usize, + /// Capacity of sstable meta cache. + pub compactor_memory_limit_mb: usize, + /// Number of SST ids fetched from meta per RPC + pub sstable_id_remote_fetch_number: u32, + /// Whether to enable streaming upload for sstable. + pub min_sst_size_for_streaming_upload: u64, + /// Max sub compaction task numbers + pub max_sub_compaction: u32, + pub max_concurrent_compaction_task_number: u64, + pub enable_state_store_v1: bool, + + pub file_cache_dir: String, + pub file_cache_capacity_mb: usize, + pub file_cache_total_buffer_capacity_mb: usize, + pub file_cache_file_fallocate_unit_mb: usize, + pub file_cache_meta_fallocate_unit_mb: usize, + pub file_cache_file_max_write_size_mb: usize, + + /// The storage url for storing backups. + pub backup_storage_url: String, + /// The storage directory for storing backups. + pub backup_storage_directory: String, +} + +impl Default for StorageOpts { + fn default() -> Self { + let c = RwConfig::default(); + let p = SystemParams { + barrier_interval_ms: Some(c.streaming.barrier_interval_ms), + checkpoint_frequency: Some(c.streaming.checkpoint_frequency as u64), + sstable_size_mb: Some(c.storage.sstable_size_mb), + block_size_kb: Some(c.storage.block_size_kb), + bloom_false_positive: Some(c.storage.bloom_false_positive), + data_directory: Some(c.storage.data_directory.clone()), + backup_storage_url: Some(c.backup.storage_url.clone()), + backup_storage_directory: Some(c.backup.storage_directory.clone()), + state_store: None, // unused + }; + Self::from((&c, &p.into())) + } +} + +impl From<(&RwConfig, &SystemParamsReader)> for StorageOpts { + fn from((c, p): (&RwConfig, &SystemParamsReader)) -> Self { + Self { + sstable_size_mb: p.sstable_size_mb(), + block_size_kb: p.block_size_kb(), + bloom_false_positive: p.bloom_false_positive(), + share_buffers_sync_parallelism: c.storage.share_buffers_sync_parallelism, + share_buffer_compaction_worker_threads_number: c + .storage + .share_buffer_compaction_worker_threads_number, + shared_buffer_capacity_mb: c.storage.shared_buffer_capacity_mb, + data_directory: p.data_directory().to_string(), + write_conflict_detection_enabled: c.storage.write_conflict_detection_enabled, + block_cache_capacity_mb: c.storage.block_cache_capacity_mb, + meta_cache_capacity_mb: c.storage.meta_cache_capacity_mb, + disable_remote_compactor: c.storage.disable_remote_compactor, + enable_local_spill: c.storage.enable_local_spill, + local_object_store: c.storage.local_object_store.to_string(), + share_buffer_upload_concurrency: c.storage.share_buffer_upload_concurrency, + compactor_memory_limit_mb: c.storage.compactor_memory_limit_mb, + sstable_id_remote_fetch_number: c.storage.sstable_id_remote_fetch_number, + min_sst_size_for_streaming_upload: c.storage.min_sst_size_for_streaming_upload, + max_sub_compaction: c.storage.max_sub_compaction, + max_concurrent_compaction_task_number: c.storage.max_concurrent_compaction_task_number, + enable_state_store_v1: c.storage.enable_state_store_v1, + file_cache_dir: c.storage.file_cache.dir.clone(), + file_cache_capacity_mb: c.storage.file_cache.capacity_mb, + file_cache_total_buffer_capacity_mb: c.storage.file_cache.total_buffer_capacity_mb, + file_cache_file_fallocate_unit_mb: c.storage.file_cache.cache_file_fallocate_unit_mb, + file_cache_meta_fallocate_unit_mb: c.storage.file_cache.cache_meta_fallocate_unit_mb, + file_cache_file_max_write_size_mb: c.storage.file_cache.cache_file_max_write_size_mb, + backup_storage_url: p.backup_storage_url().to_string(), + backup_storage_directory: p.backup_storage_directory().to_string(), + } + } +} diff --git a/src/storage/src/panic_store.rs b/src/storage/src/panic_store.rs index 3ef06187b7cf8..9fe82d0b4f22e 100644 --- a/src/storage/src/panic_store.rs +++ b/src/storage/src/panic_store.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,8 +23,8 @@ use crate::error::StorageResult; use crate::storage_value::StorageValue; use crate::store::*; use crate::{ - define_state_store_associated_type, define_state_store_read_associated_type, - define_state_store_write_associated_type, + define_local_state_store_associated_type, define_state_store_associated_type, + define_state_store_read_associated_type, define_state_store_write_associated_type, }; /// A panic state store. If a workload is fully in-memory, we can use this state store to @@ -75,7 +75,19 @@ impl StateStoreWrite for PanicStateStore { } } -impl LocalStateStore for PanicStateStore {} +impl LocalStateStore for PanicStateStore { + define_local_state_store_associated_type!(); + + fn may_exist( + &self, + _key_range: (Bound>, Bound>), + _read_options: ReadOptions, + ) -> Self::MayExistFuture<'_> { + async move { + panic!("should not call may_exist from the state store!"); + } + } +} impl StateStore for PanicStateStore { type Local = Self; diff --git a/src/storage/src/row_serde/mod.rs b/src/storage/src/row_serde/mod.rs index b138a3f15a0c2..9b8f6de8c3eec 100644 --- a/src/storage/src/row_serde/mod.rs +++ b/src/storage/src/row_serde/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/row_serde/row_serde_util.rs b/src/storage/src/row_serde/row_serde_util.rs index ac87f4ad0564b..c62d35a8d9b3c 100644 --- a/src/storage/src/row_serde/row_serde_util.rs +++ b/src/storage/src/row_serde/row_serde_util.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/storage_failpoints/mod.rs b/src/storage/src/storage_failpoints/mod.rs index 1f876a5ca7058..7082293549c80 100644 --- a/src/storage/src/storage_failpoints/mod.rs +++ b/src/storage/src/storage_failpoints/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/storage_failpoints/test_iterator.rs b/src/storage/src/storage_failpoints/test_iterator.rs index c1f6b6b001952..06cd751c9cbc0 100644 --- a/src/storage/src/storage_failpoints/test_iterator.rs +++ b/src/storage/src/storage_failpoints/test_iterator.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/storage_failpoints/test_sstable.rs b/src/storage/src/storage_failpoints/test_sstable.rs index 6401f5a595e93..92f5e0862bce7 100644 --- a/src/storage/src/storage_failpoints/test_sstable.rs +++ b/src/storage/src/storage_failpoints/test_sstable.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/storage_value.rs b/src/storage/src/storage_value.rs index 34aa66b3cc401..5d6ed5bf62765 100644 --- a/src/storage/src/storage_value.rs +++ b/src/storage/src/storage_value.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/store.rs b/src/storage/src/store.rs index 2e27be603732f..09ee2d601753b 100644 --- a/src/storage/src/store.rs +++ b/src/storage/src/store.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -245,10 +245,21 @@ pub trait StateStore: StateStoreRead + StaticSendSync + Clone { fn validate_read_epoch(&self, epoch: HummockReadEpoch) -> StorageResult<()>; } +pub trait MayExistTrait<'a> = Future> + Send + 'a; + +#[macro_export] +macro_rules! define_local_state_store_associated_type { + () => { + type MayExistFuture<'a> = impl MayExistTrait<'a>; + }; +} + /// A state store that is dedicated for streaming operator, which only reads the uncommitted data /// written by itself. Each local state store is not `Clone`, and is owned by a streaming state /// table. pub trait LocalStateStore: StateStoreRead + StateStoreWrite + StaticSendSync { + type MayExistFuture<'a>: MayExistTrait<'a>; + /// Inserts a key-value entry associated with a given `epoch` into the state store. fn insert(&self, _key: Bytes, _val: Bytes) -> StorageResult<()> { unimplemented!() @@ -271,6 +282,21 @@ pub trait LocalStateStore: StateStoreRead + StateStoreWrite + StaticSendSync { fn advance_write_epoch(&mut self, _new_epoch: u64) -> StorageResult<()> { unimplemented!() } + + /// Check existence of a given `key_range`. + /// It is better to provide `prefix_hint` in `read_options`, which will be used + /// for checking bloom filter if hummock is used. If `prefix_hint` is not provided, + /// the false positive rate can be significantly higher because bloom filter cannot + /// be used. + /// + /// Returns: + /// - false: `key_range` is guaranteed to be absent in storage. + /// - true: `key_range` may or may not exist in storage. + fn may_exist( + &self, + key_range: (Bound>, Bound>), + read_options: ReadOptions, + ) -> Self::MayExistFuture<'_>; } #[derive(Default, Clone)] diff --git a/src/storage/src/store_impl.rs b/src/storage/src/store_impl.rs index 5e8f49cf40a1d..24e6ced30f2a4 100644 --- a/src/storage/src/store_impl.rs +++ b/src/storage/src/store_impl.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,7 +16,6 @@ use std::fmt::Debug; use std::sync::Arc; use enum_as_inner::EnumAsInner; -use risingwave_common::config::RwConfig; use risingwave_common_service::observer_manager::RpcNotificationClient; use risingwave_hummock_sdk::filter_key_extractor::FilterKeyExtractorManagerRef; use risingwave_object_store::object::{ @@ -37,6 +36,7 @@ use crate::monitor::{ CompactorMetrics, HummockStateStoreMetrics, MonitoredStateStore as Monitored, MonitoredStorageMetrics, ObjectStoreMetrics, }; +use crate::opts::StorageOpts; use crate::StateStore; pub type HummockStorageType = impl StateStore + AsHummockTrait; @@ -392,7 +392,20 @@ pub mod verify { } } - impl LocalStateStore for VerifyStateStore {} + impl LocalStateStore for VerifyStateStore { + define_local_state_store_associated_type!(); + + // We don't verify `may_exist` across different state stores because + // the return value of `may_exist` is implementation specific and may not + // be consistent across different state store backends. + fn may_exist( + &self, + _key_range: (Bound>, Bound>), + _read_options: ReadOptions, + ) -> Self::MayExistFuture<'_> { + async move { Ok(true) } + } + } impl StateStore for VerifyStateStore { type Local = VerifyStateStore; @@ -455,8 +468,7 @@ impl StateStoreImpl { #[allow(clippy::too_many_arguments)] pub async fn new( s: &str, - file_cache_dir: &str, - rw_config: &RwConfig, + opts: Arc, hummock_meta_client: Arc, state_store_metrics: Arc, object_store_metrics: Arc, @@ -465,30 +477,23 @@ impl StateStoreImpl { storage_metrics: Arc, compactor_metrics: Arc, ) -> StorageResult { - let config = Arc::new(rw_config.storage.clone()); #[cfg(not(target_os = "linux"))] let tiered_cache = TieredCache::none(); #[cfg(target_os = "linux")] - let tiered_cache = if file_cache_dir.is_empty() { + let tiered_cache = if opts.file_cache_dir.is_empty() { TieredCache::none() } else { use crate::hummock::file_cache::cache::FileCacheOptions; use crate::hummock::HummockError; let options = FileCacheOptions { - dir: file_cache_dir.to_string(), - capacity: config.file_cache.capacity_mb * 1024 * 1024, - total_buffer_capacity: config.file_cache.total_buffer_capacity_mb * 1024 * 1024, - cache_file_fallocate_unit: config.file_cache.cache_file_fallocate_unit_mb - * 1024 - * 1024, - cache_meta_fallocate_unit: config.file_cache.cache_meta_fallocate_unit_mb - * 1024 - * 1024, - cache_file_max_write_size: config.file_cache.cache_file_max_write_size_mb - * 1024 - * 1024, + dir: opts.file_cache_dir.to_string(), + capacity: opts.file_cache_capacity_mb * 1024 * 1024, + total_buffer_capacity: opts.file_cache_total_buffer_capacity_mb * 1024 * 1024, + cache_file_fallocate_unit: opts.file_cache_file_fallocate_unit_mb * 1024 * 1024, + cache_meta_fallocate_unit: opts.file_cache_meta_fallocate_unit_mb * 1024 * 1024, + cache_file_max_write_size: opts.file_cache_file_max_write_size_mb * 1024 * 1024, flush_buffer_hooks: vec![], }; let metrics = Arc::new(tiered_cache_metrics_builder.file()); @@ -502,13 +507,12 @@ impl StateStoreImpl { let remote_object_store = parse_remote_object_store( hummock.strip_prefix("hummock+").unwrap(), object_store_metrics.clone(), - config.object_store_use_batch_delete, "Hummock", ) .await; - let object_store = if config.enable_local_spill { + let object_store = if opts.enable_local_spill { let local_object_store = parse_local_object_store( - config.local_object_store.as_str(), + opts.local_object_store.as_str(), object_store_metrics.clone(), ); ObjectStoreImpl::hybrid(local_object_store, remote_object_store) @@ -518,19 +522,23 @@ impl StateStoreImpl { let sstable_store = Arc::new(SstableStore::new( Arc::new(object_store), - config.data_directory.to_string(), - config.block_cache_capacity_mb * (1 << 20), - config.meta_cache_capacity_mb * (1 << 20), + opts.data_directory.to_string(), + opts.block_cache_capacity_mb * (1 << 20), + opts.meta_cache_capacity_mb * (1 << 20), tiered_cache, )); let notification_client = RpcNotificationClient::new(hummock_meta_client.get_inner().clone()); - if !config.enable_state_store_v1 { - let backup_store = parse_meta_snapshot_storage(rw_config).await?; + if !opts.enable_state_store_v1 { + let backup_store = parse_meta_snapshot_storage( + &opts.backup_storage_url, + &opts.backup_storage_directory, + ) + .await?; let backup_reader = BackupReader::new(backup_store); let inner = HummockStorage::new( - config.clone(), + opts.clone(), sstable_store, backup_reader, hummock_meta_client.clone(), @@ -544,7 +552,7 @@ impl StateStoreImpl { StateStoreImpl::hummock(inner, storage_metrics) } else { let inner = HummockStorageV1::new( - config.clone(), + opts.clone(), sstable_store, hummock_meta_client.clone(), notification_client, @@ -789,14 +797,26 @@ pub mod boxed_state_store { // For LocalStateStore + #[async_trait::async_trait] pub trait DynamicDispatchedLocalStateStore: DynamicDispatchedStateStoreRead + DynamicDispatchedStateStoreWrite { + async fn may_exist( + &self, + key_range: (Bound>, Bound>), + read_options: ReadOptions, + ) -> StorageResult; } - impl - DynamicDispatchedLocalStateStore for S - { + #[async_trait::async_trait] + impl DynamicDispatchedLocalStateStore for S { + async fn may_exist( + &self, + key_range: (Bound>, Bound>), + read_options: ReadOptions, + ) -> StorageResult { + self.may_exist(key_range, read_options).await + } } pub type BoxDynamicDispatchedLocalStateStore = Box; @@ -804,7 +824,17 @@ pub mod boxed_state_store { impl_state_store_read_for_box!(BoxDynamicDispatchedLocalStateStore); impl_state_store_write_for_box!(BoxDynamicDispatchedLocalStateStore); - impl LocalStateStore for BoxDynamicDispatchedLocalStateStore {} + impl LocalStateStore for BoxDynamicDispatchedLocalStateStore { + define_local_state_store_associated_type!(); + + fn may_exist( + &self, + key_range: (Bound>, Bound>), + read_options: ReadOptions, + ) -> Self::MayExistFuture<'_> { + self.deref().may_exist(key_range, read_options) + } + } // For global StateStore diff --git a/src/storage/src/table/batch_table/iter_utils.rs b/src/storage/src/table/batch_table/iter_utils.rs index 6af6fdb277807..1378eb2d1bbd6 100644 --- a/src/storage/src/table/batch_table/iter_utils.rs +++ b/src/storage/src/table/batch_table/iter_utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/table/batch_table/mod.rs b/src/storage/src/table/batch_table/mod.rs index 1ad177bcd9ced..b4712092dc7df 100644 --- a/src/storage/src/table/batch_table/mod.rs +++ b/src/storage/src/table/batch_table/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/storage/src/table/batch_table/storage_table.rs b/src/storage/src/table/batch_table/storage_table.rs index 53205bebfcc6a..c9b0ab4e120ee 100644 --- a/src/storage/src/table/batch_table/storage_table.rs +++ b/src/storage/src/table/batch_table/storage_table.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -534,7 +534,6 @@ impl StorageTable { assert!(pk_prefix.len() <= self.pk_indices.len()); let pk_prefix_indices = (0..pk_prefix.len()) - .into_iter() .map(|index| self.pk_indices[index]) .collect_vec(); diff --git a/src/storage/src/table/mod.rs b/src/storage/src/table/mod.rs index b1ca1e428ecc6..5a7ac8627104c 100644 --- a/src/storage/src/table/mod.rs +++ b/src/storage/src/table/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,13 +16,13 @@ pub mod batch_table; use std::sync::{Arc, LazyLock}; -use itertools::Itertools; use risingwave_common::array::DataChunk; use risingwave_common::buffer::{Bitmap, BitmapBuilder}; use risingwave_common::catalog::Schema; use risingwave_common::hash::VirtualNode; use risingwave_common::row::{OwnedRow, Row, RowExt}; use risingwave_common::util::hash_util::Crc32FastBuilder; +use risingwave_common::util::iter_util::ZipEqFast; use crate::error::StorageResult; /// For tables without distribution (singleton), the `DEFAULT_VNODE` is encoded. @@ -81,7 +81,7 @@ pub trait TableIter: Send { for _ in 0..chunk_size.unwrap_or(usize::MAX) { match self.next_row().await? { Some(row) => { - for (datum, builder) in row.iter().zip_eq(builders.iter_mut()) { + for (datum, builder) in row.iter().zip_eq_fast(builders.iter_mut()) { builder.append_datum(datum); } row_count += 1; @@ -133,7 +133,7 @@ pub fn compute_chunk_vnode( chunk .get_hash_values(indices, Crc32FastBuilder) .into_iter() - .zip_eq(chunk.vis().iter()) + .zip_eq_fast(chunk.vis().iter()) .map(|(h, vis)| { let vnode = h.to_vnode(); // Ignore the invisible rows. diff --git a/src/storage/src/write_batch.rs b/src/storage/src/write_batch.rs index b356a044a8638..bdf561757d3fa 100644 --- a/src/storage/src/write_batch.rs +++ b/src/storage/src/write_batch.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/Cargo.toml b/src/stream/Cargo.toml index a89b52411d7b3..2c92d2566066f 100644 --- a/src/stream/Cargo.toml +++ b/src/stream/Cargo.toml @@ -8,26 +8,27 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = "1" async-recursion = "1" async-stream = "0.3" async-trait = "0.1" async_stack_trace = { path = "../utils/async_stack_trace" } -auto_enums = { version = "0.7", features = ["futures"] } -byteorder = "1" bytes = "1" -chrono = { version = "0.4", default-features = false, features = ["clock", "std"] } -crc32fast = "1" dyn-clone = "1" either = "1" enum-as-inner = "0.5" -farmhash = "1" fixedbitset = { version = "0.4", features = ["std"] } futures = { version = "0.3", default-features = false, features = ["alloc"] } futures-async-stream = "0.2" gen-iter = "0.3" -hyper = "0.14" +hytra = "0.1.2" iter-chunks = "0.1" itertools = "0.10" local_stats_alloc = { path = "../utils/local_stats_alloc" } @@ -39,7 +40,6 @@ minstant = "0.1" multimap = "0.8" num-traits = "0.2" parking_lot = "0.12" -paste = "1" pin-project = "1" prometheus = { version = "0.13", features = ["process"] } prost = "0.11" @@ -52,9 +52,6 @@ risingwave_pb = { path = "../prost" } risingwave_rpc_client = { path = "../rpc_client" } risingwave_source = { path = "../source" } risingwave_storage = { path = "../storage" } -serde = { version = "1", features = ["derive"] } -serde-value = "0.7" -serde_json = "1" smallvec = "1" static_assertions = "1" task_stats_alloc = { path = "../utils/task_stats_alloc" } @@ -71,14 +68,7 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [ tokio-metrics = "0.1.0" tokio-stream = "0.1" tonic = { version = "0.2", package = "madsim-tonic" } -tower = { version = "0.4", features = ["util", "load-shed"] } tracing = "0.1" -tracing-futures = "0.2" -twox-hash = "1" -url = "2" - -[target.'cfg(target_os = "linux")'.dependencies] -tikv-jemalloc-ctl = "0.5" [target.'cfg(not(madsim))'.dependencies] workspace-hack = { path = "../workspace-hack" } diff --git a/src/stream/src/cache/managed_lru.rs b/src/stream/src/cache/managed_lru.rs index 5eb5f70aa7bc1..e336c23d00d94 100644 --- a/src/stream/src/cache/managed_lru.rs +++ b/src/stream/src/cache/managed_lru.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/cache/mod.rs b/src/stream/src/cache/mod.rs index e1e23a0c3899b..b51484cbab328 100644 --- a/src/stream/src/cache/mod.rs +++ b/src/stream/src/cache/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,12 +16,12 @@ use std::alloc::{Allocator, Global}; use std::hash::{BuildHasher, Hash}; use std::ops::{Deref, DerefMut}; -use itertools::Itertools; use lru::{DefaultHasher, LruCache}; mod managed_lru; pub use managed_lru::*; use risingwave_common::buffer::Bitmap; +use risingwave_common::util::iter_util::ZipEqFast; pub struct ExecutorCache { /// An managed cache. Eviction depends on the node memory usage. @@ -102,7 +102,7 @@ pub(super) fn cache_may_stale( ) -> bool { let current_is_subset = previous_vnode_bitmap .iter() - .zip_eq(current_vnode_bitmap.iter()) + .zip_eq_fast(current_vnode_bitmap.iter()) .all(|(p, c)| p >= c); !current_is_subset diff --git a/src/stream/src/common/builder.rs b/src/stream/src/common/builder.rs index 0301ff4288d9c..1ca5e68466385 100644 --- a/src/stream/src/common/builder.rs +++ b/src/stream/src/common/builder.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use itertools::Itertools; use risingwave_common::array::{ArrayBuilderImpl, Op, StreamChunk}; use risingwave_common::row::Row; use risingwave_common::types::{DataType, Datum}; +use risingwave_common::util::iter_util::ZipEqFast; type IndexMappings = Vec<(usize, usize)>; @@ -180,7 +180,7 @@ impl StreamChunkBuilder { let new_columns = self .column_builders .iter_mut() - .zip_eq(&self.data_types) + .zip_eq_fast(&self.data_types) .map(|(builder, datatype)| { std::mem::replace(builder, datatype.create_array_builder(self.capacity)).finish() }) diff --git a/src/stream/src/common/column_mapping.rs b/src/stream/src/common/column_mapping.rs index 1455706dbfb12..4dc68fde7c6c3 100644 --- a/src/stream/src/common/column_mapping.rs +++ b/src/stream/src/common/column_mapping.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/common/infallible_expr.rs b/src/stream/src/common/infallible_expr.rs index 6b5f56feec498..2742c5960cb76 100644 --- a/src/stream/src/common/infallible_expr.rs +++ b/src/stream/src/common/infallible_expr.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/common/mod.rs b/src/stream/src/common/mod.rs index aab383a4211a1..026af37353995 100644 --- a/src/stream/src/common/mod.rs +++ b/src/stream/src/common/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/common/table/mod.rs b/src/stream/src/common/table/mod.rs index f72de561add6b..e8caa0b78d743 100644 --- a/src/stream/src/common/table/mod.rs +++ b/src/stream/src/common/table/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ // limitations under the License. pub mod state_table; +mod watermark; #[cfg(test)] pub mod test_state_table; diff --git a/src/stream/src/common/table/state_table.rs b/src/stream/src/common/table/state_table.rs index 19d83e0dd7d7f..3837a8de9eb0a 100644 --- a/src/stream/src/common/table/state_table.rs +++ b/src/stream/src/common/table/state_table.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,18 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::borrow::Cow; -use std::cmp::Ordering; use std::collections::BTreeMap; -use std::marker::PhantomData; use std::ops::Bound; use std::ops::Bound::*; use std::sync::Arc; -use async_stack_trace::StackTrace; use bytes::{BufMut, Bytes, BytesMut}; -use futures::{pin_mut, Stream, StreamExt}; -use futures_async_stream::try_stream; +use futures::{Stream, StreamExt}; use itertools::{izip, Itertools}; use risingwave_common::array::{Op, StreamChunk, Vis}; use risingwave_common::buffer::Bitmap; @@ -32,24 +27,24 @@ use risingwave_common::hash::VirtualNode; use risingwave_common::row::{self, CompactedRow, OwnedRow, Row, RowDeserializer, RowExt}; use risingwave_common::types::ScalarImpl; use risingwave_common::util::epoch::EpochPair; +use risingwave_common::util::iter_util::{ZipEqDebug, ZipEqFast}; use risingwave_common::util::ordered::OrderedRowSerde; use risingwave_common::util::sort_util::OrderType; use risingwave_hummock_sdk::key::{ end_bound_of_prefix, prefixed_range, range_of_prefix, start_bound_of_excluded_prefix, }; use risingwave_pb::catalog::Table; -use risingwave_storage::mem_table::{KeyOp, MemTable, MemTableError, MemTableIter}; +use risingwave_storage::mem_table::{merge_stream, KeyOp, MemTable, MemTableError}; use risingwave_storage::row_serde::row_serde_util::{ deserialize_pk_with_vnode, serialize_pk, serialize_pk_with_vnode, }; use risingwave_storage::storage_value::StorageValue; -use risingwave_storage::store::{ - LocalStateStore, ReadOptions, StateStoreRead, StateStoreWrite, WriteOptions, -}; +use risingwave_storage::store::*; use risingwave_storage::table::{compute_chunk_vnode, compute_vnode, Distribution}; use risingwave_storage::StateStore; use tracing::trace; +use super::watermark::{WatermarkBufferByEpoch, WatermarkBufferStrategy}; use crate::executor::{StreamExecutorError, StreamExecutorResult}; /// This num is arbitrary and we may want to improve this choice in the future. @@ -58,7 +53,10 @@ const STATE_CLEANING_PERIOD_EPOCH: usize = 5; /// `StateTable` is the interface accessing relational data in KV(`StateStore`) with /// row-based encoding. #[derive(Clone)] -pub struct StateTable { +pub struct StateTable< + S: StateStore, + W: WatermarkBufferStrategy = WatermarkBufferByEpoch, +> { /// Id for this table. table_id: TableId, @@ -100,8 +98,14 @@ pub struct StateTable { /// Used for catalog table_properties table_option: TableOption, - /// If true, sanity check is disabled on this table. - disable_sanity_check: bool, + /// Whether the operation is consistent. The term `consistent` requires the following: + /// + /// 1. A key cannot be inserted or deleted for more than once, i.e. inserting to an existing + /// key or deleting an non-existing key is not allowed. + /// + /// 2. The old value passed from + /// `update` and `delete` should match the original stored value. + is_consistent_op: bool, /// An optional column index which is the vnode of each row computed by the table's consistent /// hash distribution. @@ -118,36 +122,35 @@ pub struct StateTable { /// latest watermark cur_watermark: Option, - /// number of commits with watermark since the last time we did state cleaning by watermark. - num_wmked_commits_since_last_clean: usize, + watermark_buffer_strategy: W, } // initialize -impl StateTable { +impl StateTable { /// Create state table from table catalog and store. pub async fn from_table_catalog( table_catalog: &Table, store: S, vnodes: Option>, ) -> Self { - Self::from_table_catalog_may_disable_sanity_check(table_catalog, store, vnodes, false).await + Self::from_table_catalog_inner(table_catalog, store, vnodes, true).await } /// Create state table from table catalog and store with sanity check disabled. - pub async fn from_table_catalog_no_sanity_check( + pub async fn from_table_catalog_inconsistent_op( table_catalog: &Table, store: S, vnodes: Option>, ) -> Self { - Self::from_table_catalog_may_disable_sanity_check(table_catalog, store, vnodes, true).await + Self::from_table_catalog_inner(table_catalog, store, vnodes, false).await } /// Create state table from table catalog and store. - async fn from_table_catalog_may_disable_sanity_check( + async fn from_table_catalog_inner( table_catalog: &Table, store: S, vnodes: Option>, - disable_sanity_check: bool, + is_consistent_op: bool, ) -> Self { let table_id = TableId::new(table_catalog.id); let table_columns: Vec = table_catalog @@ -226,7 +229,7 @@ impl StateTable { let prefix_hint_len = table_catalog.read_prefix_len_hint as usize; Self { table_id, - mem_table: MemTable::new(), + mem_table: MemTable::new(is_consistent_op), local_store: local_state_store, pk_serde, row_deserializer: RowDeserializer::new(data_types), @@ -236,13 +239,13 @@ impl StateTable { prefix_hint_len, vnodes, table_option: TableOption::build_table_option(table_catalog.get_properties()), - disable_sanity_check, + is_consistent_op, vnode_col_idx_in_pk, value_indices, epoch: None, last_watermark: None, cur_watermark: None, - num_wmked_commits_since_last_clean: 0, + watermark_buffer_strategy: W::default(), } } @@ -266,8 +269,8 @@ impl StateTable { .await } - /// Create a state table without distribution, used for unit tests. - pub async fn new_with_value_indices_without_distribution( + /// Create a state table without distribution, with given `value_indices`, used for unit tests. + pub async fn new_without_distribution_with_value_indices( store: S, table_id: TableId, columns: Vec, @@ -288,14 +291,14 @@ impl StateTable { } /// Create a state table without distribution, used for unit tests. - pub async fn new_without_distribution_no_sanity_check( + pub async fn new_without_distribution_inconsistent_op( store: S, table_id: TableId, columns: Vec, order_types: Vec, pk_indices: Vec, ) -> Self { - Self::new_with_distribution_may_disable_sanity_check( + Self::new_with_distribution_inner( store, table_id, columns, @@ -303,28 +306,7 @@ impl StateTable { pk_indices, Distribution::fallback(), None, - true, - ) - .await - } - - /// Create a state table with given `value_indices`, used for unit tests. - pub async fn new_without_distribution_partial( - store: S, - table_id: TableId, - columns: Vec, - order_types: Vec, - pk_indices: Vec, - value_indices: Vec, - ) -> Self { - Self::new_with_distribution( - store, - table_id, - columns, - order_types, - pk_indices, - Distribution::fallback(), - Some(value_indices), + false, ) .await } @@ -340,7 +322,7 @@ impl StateTable { distribution: Distribution, value_indices: Option>, ) -> Self { - Self::new_with_distribution_may_disable_sanity_check( + Self::new_with_distribution_inner( store, table_id, table_columns, @@ -348,12 +330,12 @@ impl StateTable { pk_indices, distribution, value_indices, - false, + true, ) .await } - pub async fn new_with_distribution_no_sanity_check( + pub async fn new_with_distribution_inconsistent_op( store: S, table_id: TableId, table_columns: Vec, @@ -362,7 +344,7 @@ impl StateTable { distribution: Distribution, value_indices: Option>, ) -> Self { - Self::new_with_distribution_may_disable_sanity_check( + Self::new_with_distribution_inner( store, table_id, table_columns, @@ -370,13 +352,13 @@ impl StateTable { pk_indices, distribution, value_indices, - true, + false, ) .await } #[allow(clippy::too_many_arguments)] - async fn new_with_distribution_may_disable_sanity_check( + async fn new_with_distribution_inner( store: S, table_id: TableId, table_columns: Vec, @@ -387,7 +369,7 @@ impl StateTable { vnodes, }: Distribution, value_indices: Option>, - disable_sanity_check: bool, + is_consistent_op: bool, ) -> Self { let local_state_store = store.new_local(table_id).await; @@ -407,7 +389,7 @@ impl StateTable { let dist_key_in_pk_indices = get_dist_key_in_pk_indices(&dist_key_indices, &pk_indices); Self { table_id, - mem_table: MemTable::new(), + mem_table: MemTable::new(is_consistent_op), local_store: local_state_store, pk_serde, row_deserializer: RowDeserializer::new(data_types), @@ -417,13 +399,13 @@ impl StateTable { prefix_hint_len: 0, vnodes, table_option: Default::default(), - disable_sanity_check, + is_consistent_op, vnode_col_idx_in_pk: None, value_indices, epoch: None, last_watermark: None, cur_watermark: None, - num_wmked_commits_since_last_clean: 0, + watermark_buffer_strategy: W::default(), } } @@ -580,7 +562,7 @@ impl StateTable { !self.is_dirty(), "vnode bitmap should only be updated when state table is clean" ); - if self.dist_key_indices.is_empty() { + if self.vnode_col_idx_in_pk.is_none() && self.dist_key_indices.is_empty() { assert_eq!( new_vnodes, self.vnodes, "should not update vnode bitmap for singleton table" @@ -594,15 +576,16 @@ impl StateTable { std::mem::replace(&mut self.vnodes, new_vnodes) } } + // write impl StateTable { #[expect(clippy::boxed_local)] fn handle_mem_table_error(&self, e: Box) { match *e { - MemTableError::Conflict { key, prev, new } => { + MemTableError::InconsistentOperation { key, prev, new } => { let (vnode, key) = deserialize_pk_with_vnode(&key, &self.pk_serde).unwrap(); panic!( - "mem-table operation conflicts! table_id: {}, vnode: {}, key: {:?}, prev: {}, new: {}", + "mem-table operation inconsistent! table_id: {}, vnode: {}, key: {:?}, prev: {}, new: {}", self.table_id(), vnode, &key, @@ -682,7 +665,7 @@ impl StateTable { let key_chunk = chunk.reorder_columns(self.pk_indices()); let vnode_and_pks = key_chunk .rows_with_holes() - .zip_eq(vnodes.iter()) + .zip_eq_fast(vnodes.iter()) .map(|(r, vnode)| { let mut buffer = BytesMut::new(); buffer.put_slice(&vnode.to_be_bytes()[..]); @@ -696,7 +679,9 @@ impl StateTable { let (_, vis) = key_chunk.into_parts(); match vis { Vis::Bitmap(vis) => { - for ((op, key, value), vis) in izip!(op, vnode_and_pks, values).zip_eq(vis.iter()) { + for ((op, key, value), vis) in + izip!(op, vnode_and_pks, values).zip_eq_debug(vis.iter()) + { if vis { match op { Op::Insert | Op::UpdateInsert => self.mem_table.insert(key, value), @@ -735,7 +720,7 @@ impl StateTable { pub async fn commit(&mut self, new_epoch: EpochPair) -> StreamExecutorResult<()> { assert_eq!(self.epoch(), new_epoch.prev); - let mem_table = std::mem::take(&mut self.mem_table).into_parts(); + let mem_table = self.mem_table.drain().into_parts(); self.batch_write_rows(mem_table, new_epoch.prev).await?; self.update_epoch(new_epoch); Ok(()) @@ -748,7 +733,7 @@ impl StateTable { assert_eq!(self.epoch(), new_epoch.prev); assert!(!self.is_dirty()); if self.cur_watermark.is_some() { - self.num_wmked_commits_since_last_clean += 1; + self.watermark_buffer_strategy.tick(); } self.update_epoch(new_epoch); } @@ -759,15 +744,19 @@ impl StateTable { buffer: BTreeMap, epoch: u64, ) -> StreamExecutorResult<()> { - let watermark = self.cur_watermark.as_ref().and_then(|cur_watermark_ref| { - self.num_wmked_commits_since_last_clean += 1; - - if self.num_wmked_commits_since_last_clean >= STATE_CLEANING_PERIOD_EPOCH { - Some(cur_watermark_ref) + let watermark = { + if let Some(watermark) = self.cur_watermark.take() { + self.watermark_buffer_strategy.tick(); + if !self.watermark_buffer_strategy.apply() { + self.cur_watermark = Some(watermark); + None + } else { + Some(watermark) + } } else { None } - }); + }; let mut write_batch = self.local_store.start_write_batch(WriteOptions { epoch, @@ -781,7 +770,7 @@ impl StateTable { }; let range_end_suffix = watermark.map(|watermark| { serialize_pk( - row::once(Some(watermark.clone())), + row::once(Some(watermark)), prefix_serializer.as_ref().unwrap(), ) }); @@ -794,19 +783,19 @@ impl StateTable { // workaround you may call disable the check by initializing the state store with // `disable_sanity_check=true`. KeyOp::Insert(row) => { - if ENABLE_SANITY_CHECK && !self.disable_sanity_check { + if ENABLE_SANITY_CHECK && self.is_consistent_op { self.do_insert_sanity_check(&pk, &row, epoch).await?; } write_batch.put(pk, StorageValue::new_put(row)); } KeyOp::Delete(row) => { - if ENABLE_SANITY_CHECK && !self.disable_sanity_check { + if ENABLE_SANITY_CHECK && self.is_consistent_op { self.do_delete_sanity_check(&pk, &row, epoch).await?; } write_batch.delete(pk); } KeyOp::Update((old_row, new_row)) => { - if ENABLE_SANITY_CHECK && !self.disable_sanity_check { + if ENABLE_SANITY_CHECK && self.is_consistent_op { self.do_update_sanity_check(&pk, &old_row, &new_row, epoch) .await?; } @@ -832,10 +821,6 @@ impl StateTable { } } write_batch.ingest().await?; - if watermark.is_some() { - self.last_watermark = self.cur_watermark.take(); - self.num_wmked_commits_since_last_clean = 0; - } Ok(()) } @@ -947,7 +932,7 @@ fn get_second(arg: StreamExecutorResult<(T, U)>) -> StreamExecutorResult StateTable { +impl StateTable { /// This function scans rows from the relational table. pub async fn iter(&self) -> StreamExecutorResult> { self.iter_with_pk_prefix(row::empty()).await @@ -958,16 +943,7 @@ impl StateTable { &self, pk_prefix: impl Row, ) -> StreamExecutorResult> { - let (mem_table_iter, storage_iter_stream) = self - .iter_with_pk_prefix_inner(pk_prefix, self.epoch()) - .await?; - - let storage_iter = storage_iter_stream.into_stream(); - Ok( - StateTableRowIter::new(mem_table_iter, storage_iter, self.row_deserializer.clone()) - .into_stream() - .map(get_second), - ) + Ok(self.iter_key_and_val(pk_prefix).await?.map(get_second)) } /// This function scans rows from the relational table with specific `pk_prefix`. @@ -978,19 +954,16 @@ impl StateTable { // For now, we require this parameter, and will panic. In the future, when `None`, we can // iterate over each vnode that the `StateTable` owns. vnode: VirtualNode, - ) -> StreamExecutorResult<(MemTableIter<'_>, StorageIterInner)> { + ) -> StreamExecutorResult> { let memcomparable_range = prefix_range_to_memcomparable(&self.pk_serde, pk_range); let memcomparable_range_with_vnode = prefixed_range(memcomparable_range, &vnode.to_be_bytes()); // TODO: provide a trace of useful params. - - let (mem_table_iter, storage_iter_stream) = self - .iter_inner(memcomparable_range_with_vnode, None, self.epoch()) - .await?; - - Ok((mem_table_iter, storage_iter_stream)) + self.iter_inner(memcomparable_range_with_vnode, None) + .await + .map_err(StreamExecutorError::from) } pub async fn iter_with_pk_range( @@ -1001,14 +974,10 @@ impl StateTable { // iterate over each vnode that the `StateTable` owns. vnode: VirtualNode, ) -> StreamExecutorResult> { - let (mem_table_iter, storage_iter_stream) = - self.iter_with_pk_range_inner(pk_range, vnode).await?; - let storage_iter = storage_iter_stream.into_stream(); - Ok( - StateTableRowIter::new(mem_table_iter, storage_iter, self.row_deserializer.clone()) - .into_stream() - .map(get_second), - ) + Ok(self + .iter_key_and_val_with_pk_range(pk_range, vnode) + .await? + .map(get_second)) } pub async fn iter_key_and_val_with_pk_range( @@ -1019,13 +988,10 @@ impl StateTable { // iterate over each vnode that the `StateTable` owns. vnode: VirtualNode, ) -> StreamExecutorResult> { - let (mem_table_iter, storage_iter_stream) = - self.iter_with_pk_range_inner(pk_range, vnode).await?; - let storage_iter = storage_iter_stream.into_stream(); - Ok( - StateTableRowIter::new(mem_table_iter, storage_iter, self.row_deserializer.clone()) - .into_stream(), - ) + Ok(deserialize_row_stream( + self.iter_with_pk_range_inner(pk_range, vnode).await?, + self.row_deserializer.clone(), + )) } /// This function scans rows from the relational table with specific `pk_prefix`, return both @@ -1034,22 +1000,16 @@ impl StateTable { &self, pk_prefix: impl Row, ) -> StreamExecutorResult> { - let (mem_table_iter, storage_iter_stream) = self - .iter_with_pk_prefix_inner(pk_prefix, self.epoch()) - .await?; - let storage_iter = storage_iter_stream.into_stream(); - - Ok( - StateTableRowIter::new(mem_table_iter, storage_iter, self.row_deserializer.clone()) - .into_stream(), - ) + Ok(deserialize_row_stream( + self.iter_with_pk_prefix_inner(pk_prefix).await?, + self.row_deserializer.clone(), + )) } async fn iter_with_pk_prefix_inner( &self, pk_prefix: impl Row, - epoch: u64, - ) -> StreamExecutorResult<(MemTableIter<'_>, StorageIterInner)> { + ) -> StreamExecutorResult> { let prefix_serializer = self.pk_serde.prefix(pk_prefix.len()); let encoded_prefix = serialize_pk(&pk_prefix, &prefix_serializer); let encoded_key_range = range_of_prefix(&encoded_prefix); @@ -1084,7 +1044,7 @@ impl StateTable { "storage_iter_with_prefix" ); - self.iter_inner(encoded_key_range_with_vnode, prefix_hint, epoch) + self.iter_inner(encoded_key_range_with_vnode, prefix_hint) .await } @@ -1092,8 +1052,7 @@ impl StateTable { &self, key_range: (Bound>, Bound>), prefix_hint: Option, - epoch: u64, - ) -> StreamExecutorResult<(MemTableIter<'_>, StorageIterInner)> { + ) -> StreamExecutorResult> { let (l, r) = key_range.clone(); let bytes_key_range = (l.map(Bytes::from), r.map(Bytes::from)); // Mem table iterator. @@ -1106,177 +1065,91 @@ impl StateTable { read_version_from_backup: false, }; - // Storage iterator. - let storage_iter = StorageIterInner::::new( - &self.local_store, - epoch, - key_range, - read_options, - self.row_deserializer.clone(), - ) - .await?; + let iter = self + .local_store + .iter(key_range, self.epoch(), read_options) + .await?; - Ok((mem_table_iter, storage_iter)) + Ok(merge_stream( + mem_table_iter, + iter, + self.table_id, + self.epoch(), + )) } pub fn get_vnodes(&self) -> Arc { self.vnodes.clone() } -} -pub type RowStream<'a, S: StateStore> = impl Stream> + 'a; -pub type RowStreamWithPk<'a, S: StateStore> = - impl Stream, OwnedRow)>> + 'a; - -/// `StateTableRowIter` is able to read the just written data (uncommitted data). -/// It will merge the result of `mem_table_iter` and `state_store_iter`. -struct StateTableRowIter<'a, M, C> { - mem_table_iter: M, - storage_iter: C, - _phantom: PhantomData<&'a ()>, - deserializer: RowDeserializer, -} + /// Returns: + /// false: the provided pk prefix is absent in state store. + /// true: the provided pk prefix may or may not be present in state store. + pub async fn may_exist(&self, pk_prefix: impl Row) -> StreamExecutorResult { + let prefix_serializer = self.pk_serde.prefix(pk_prefix.len()); + let encoded_prefix = serialize_pk(&pk_prefix, &prefix_serializer); + let encoded_key_range = range_of_prefix(&encoded_prefix); -impl<'a, M, C> StateTableRowIter<'a, M, C> -where - M: Iterator, - C: Stream>, -{ - fn new(mem_table_iter: M, storage_iter: C, deserializer: RowDeserializer) -> Self { - Self { - mem_table_iter, - storage_iter, - _phantom: PhantomData, - deserializer, + // We assume that all usages of iterating the state table only access a single vnode. + // If this assertion fails, then something must be wrong with the operator implementation or + // the distribution derivation from the optimizer. + let vnode = self.compute_prefix_vnode(&pk_prefix).to_be_bytes(); + let encoded_key_range_with_vnode = prefixed_range(encoded_key_range, &vnode); + let (l, r) = encoded_key_range_with_vnode.clone(); + let bytes_key_range = (l.map(Bytes::from), r.map(Bytes::from)); + if self.mem_table.iter(bytes_key_range).next().is_some() { + return Ok(true); } - } - /// This function scans kv pairs from the `shared_storage` and - /// memory(`mem_table`) with optional pk_bounds. If a record exist in both `shared_storage` and - /// `mem_table`, result `mem_table` is returned according to the operation(RowOp) on it. - #[try_stream(ok = (Cow<'a, Bytes>, OwnedRow), error = StreamExecutorError)] - async fn into_stream(self) { - let storage_iter = self.storage_iter.peekable(); - pin_mut!(storage_iter); - - let mut mem_table_iter = self.mem_table_iter.fuse().peekable(); - - loop { - match (storage_iter.as_mut().peek().await, mem_table_iter.peek()) { - (None, None) => break, - // The mem table side has come to an end, return data from the shared storage. - (Some(_), None) => { - let (pk, row) = storage_iter.next().await.unwrap()?; - yield (Cow::Owned(pk), row) - } - // The stream side has come to an end, return data from the mem table. - (None, Some(_)) => { - let (pk, key_op) = mem_table_iter.next().unwrap(); - match key_op { - KeyOp::Insert(row_bytes) | KeyOp::Update((_, row_bytes)) => { - let row = self.deserializer.deserialize(row_bytes.as_ref())?; - - yield (Cow::Borrowed(pk), row) - } - _ => {} - } - } - (Some(Ok((storage_pk, _))), Some((mem_table_pk, _))) => { - match storage_pk.cmp(mem_table_pk) { - Ordering::Less => { - // yield data from storage - let (pk, row) = storage_iter.next().await.unwrap()?; - yield (Cow::Owned(pk), row); - } - Ordering::Equal => { - // both memtable and storage contain the key, so we advance both - // iterators and return the data in memory. - - let (pk, key_op) = mem_table_iter.next().unwrap(); - let (_, old_row_in_storage) = storage_iter.next().await.unwrap()?; - match key_op { - KeyOp::Insert(row_bytes) => { - let row = self.deserializer.deserialize(row_bytes.as_ref())?; - - yield (Cow::Borrowed(pk), row); - } - KeyOp::Delete(_) => {} - KeyOp::Update((old_row_bytes, new_row_bytes)) => { - let old_row = - self.deserializer.deserialize(old_row_bytes.as_ref())?; - let new_row = - self.deserializer.deserialize(new_row_bytes.as_ref())?; - - debug_assert!(old_row == old_row_in_storage); - - yield (Cow::Borrowed(pk), new_row); - } - } - } - Ordering::Greater => { - // yield data from mem table - let (pk, key_op) = mem_table_iter.next().unwrap(); - - match key_op { - KeyOp::Insert(row_bytes) => { - let row = self.deserializer.deserialize(row_bytes.as_ref())?; - - yield (Cow::Borrowed(pk), row); - } - KeyOp::Delete(_) => {} - KeyOp::Update(_) => unreachable!( - "memtable update should always be paired with a storage key" - ), - } - } - } - } - (Some(Err(_)), Some(_)) => { - // Throw the error. - return Err(storage_iter.next().await.unwrap().unwrap_err()); - } - } + // Construct prefix hint for prefix bloom filter. + if self.prefix_hint_len != 0 { + debug_assert_eq!(self.prefix_hint_len, pk_prefix.len()); } - } -} + let prefix_hint = { + if self.prefix_hint_len == 0 || self.prefix_hint_len > pk_prefix.len() { + panic!(); + } else { + let encoded_prefix_len = self + .pk_serde + .deserialize_prefix_len(&encoded_prefix, self.prefix_hint_len)?; -struct StorageIterInner { - /// An iterator that returns raw bytes from storage. - iter: S::IterStream, + Some(Bytes::from(encoded_prefix[..encoded_prefix_len].to_vec())) + } + }; - deserializer: RowDeserializer, -} + let read_options = ReadOptions { + prefix_hint, + ignore_range_tombstone: false, + retention_seconds: None, + table_id: self.table_id, + read_version_from_backup: false, + }; -impl StorageIterInner { - async fn new( - store: &S, - epoch: u64, - raw_key_range: (Bound>, Bound>), - read_options: ReadOptions, - deserializer: RowDeserializer, - ) -> StreamExecutorResult { - let iter = store.iter(raw_key_range, epoch, read_options).await?; - let iter = Self { iter, deserializer }; - Ok(iter) + self.local_store + .may_exist(encoded_key_range_with_vnode, read_options) + .await + .map_err(Into::into) } +} - /// Yield a row with its primary key. - #[try_stream(ok = (Bytes, OwnedRow), error = StreamExecutorError)] - async fn into_stream(self) { - use futures::TryStreamExt; - - // No need for table id and epoch. - let iter = self.iter.map_ok(|(k, v)| (k.user_key.table_key.0, v)); - futures::pin_mut!(iter); - while let Some((key, value)) = iter - .try_next() - .verbose_stack_trace("storage_table_iter_next") - .await? - { - let row = self.deserializer.deserialize(value.as_ref())?; - yield (key, row); - } - } +pub type RowStream<'a, S: StateStore> = impl Stream> + 'a; +pub type RowStreamWithPk<'a, S: StateStore> = + impl Stream> + 'a; +pub type IterItemStream<'a, S: StateStore> = impl StateStoreIterItemStream + 'a; + +fn deserialize_row_stream( + stream: impl StateStoreIterItemStream, + deserializer: RowDeserializer, +) -> impl Stream> { + stream.map(move |result| { + result + .map_err(StreamExecutorError::from) + .and_then(|(key, value)| { + Ok(deserializer + .deserialize(value) + .map(move |row| (key.user_key.table_key.0, row))?) + }) + }) } pub fn prefix_range_to_memcomparable( diff --git a/src/stream/src/common/table/test_state_table.rs b/src/stream/src/common/table/test_state_table.rs index cfd0272682273..26ae96de4660a 100644 --- a/src/stream/src/common/table/test_state_table.rs +++ b/src/stream/src/common/table/test_state_table.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,145 +21,13 @@ use risingwave_common::types::DataType; use risingwave_common::util::epoch::EpochPair; use risingwave_common::util::sort_util::OrderType; use risingwave_hummock_test::test_utils::prepare_hummock_test_env; +use risingwave_rpc_client::HummockMetaClient; use risingwave_storage::table::DEFAULT_VNODE; +use risingwave_storage::StateStore; use crate::common::table::state_table::StateTable; use crate::common::table::test_utils::{gen_prost_table, gen_prost_table_with_value_indices}; -// test state table -#[tokio::test] -async fn test_state_table() { - const TEST_TABLE_ID: TableId = TableId { table_id: 233 }; - let test_env = prepare_hummock_test_env().await; - - let column_descs = vec![ - ColumnDesc::unnamed(ColumnId::from(0), DataType::Int32), - ColumnDesc::unnamed(ColumnId::from(1), DataType::Int32), - ColumnDesc::unnamed(ColumnId::from(2), DataType::Int32), - ]; - let order_types = vec![OrderType::Ascending]; - let pk_index = vec![0_usize]; - let read_prefix_len_hint = 1; - let table = gen_prost_table( - TEST_TABLE_ID, - column_descs, - order_types, - pk_index, - read_prefix_len_hint, - ); - - test_env.register_table(table.clone()).await; - let mut state_table = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) - .await; - - let mut epoch = EpochPair::new_test_epoch(1); - state_table.init_epoch(epoch); - - state_table.insert(OwnedRow::new(vec![ - Some(1_i32.into()), - Some(11_i32.into()), - Some(111_i32.into()), - ])); - state_table.insert(OwnedRow::new(vec![ - Some(2_i32.into()), - Some(22_i32.into()), - Some(222_i32.into()), - ])); - state_table.insert(OwnedRow::new(vec![ - Some(3_i32.into()), - Some(33_i32.into()), - Some(333_i32.into()), - ])); - - // test read visibility - let row1 = state_table - .get_row(&OwnedRow::new(vec![Some(1_i32.into())])) - .await - .unwrap(); - assert_eq!( - row1, - Some(OwnedRow::new(vec![ - Some(1_i32.into()), - Some(11_i32.into()), - Some(111_i32.into()) - ])) - ); - - let row2 = state_table - .get_row(&OwnedRow::new(vec![Some(2_i32.into())])) - .await - .unwrap(); - assert_eq!( - row2, - Some(OwnedRow::new(vec![ - Some(2_i32.into()), - Some(22_i32.into()), - Some(222_i32.into()) - ])) - ); - - state_table.delete(OwnedRow::new(vec![ - Some(2_i32.into()), - Some(22_i32.into()), - Some(222_i32.into()), - ])); - - let row2_delete = state_table - .get_row(&OwnedRow::new(vec![Some(2_i32.into())])) - .await - .unwrap(); - assert_eq!(row2_delete, None); - - epoch.inc(); - state_table.commit(epoch).await.unwrap(); - - let row2_delete_commit = state_table - .get_row(&OwnedRow::new(vec![Some(2_i32.into())])) - .await - .unwrap(); - assert_eq!(row2_delete_commit, None); - - state_table.delete(OwnedRow::new(vec![ - Some(3_i32.into()), - Some(33_i32.into()), - Some(333_i32.into()), - ])); - - state_table.insert(OwnedRow::new(vec![Some(4_i32.into()), None, None])); - let row4 = state_table - .get_row(&OwnedRow::new(vec![Some(4_i32.into())])) - .await - .unwrap(); - assert_eq!( - row4, - Some(OwnedRow::new(vec![Some(4_i32.into()), None, None])) - ); - - let non_exist_row = state_table - .get_row(&OwnedRow::new(vec![Some(0_i32.into())])) - .await - .unwrap(); - assert_eq!(non_exist_row, None); - - state_table.delete(OwnedRow::new(vec![Some(4_i32.into()), None, None])); - - epoch.inc(); - state_table.commit(epoch).await.unwrap(); - - let row3_delete = state_table - .get_row(&OwnedRow::new(vec![Some(3_i32.into())])) - .await - .unwrap(); - assert_eq!(row3_delete, None); - - let row4_delete = state_table - .get_row(&OwnedRow::new(vec![Some(4_i32.into())])) - .await - .unwrap(); - assert_eq!(row4_delete, None); -} - #[tokio::test] async fn test_state_table_update_insert() { const TEST_TABLE_ID: TableId = TableId { table_id: 233 }; @@ -184,7 +52,7 @@ async fn test_state_table_update_insert() { test_env.register_table(table.clone()).await; let mut state_table = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) + StateTable::from_table_catalog_inconsistent_op(&table, test_env.storage.clone(), None) .await; let mut epoch = EpochPair::new_test_epoch(1); @@ -336,231 +204,6 @@ async fn test_state_table_update_insert() { assert_eq!(row1_commit, None); } -#[tokio::test] -async fn test_state_table_iter() { - const TEST_TABLE_ID: TableId = TableId { table_id: 233 }; - let test_env = prepare_hummock_test_env().await; - - let order_types = vec![OrderType::Ascending, OrderType::Descending]; - let column_ids = vec![ColumnId::from(0), ColumnId::from(1), ColumnId::from(2)]; - let column_descs = vec![ - ColumnDesc::unnamed(column_ids[0], DataType::Int32), - ColumnDesc::unnamed(column_ids[1], DataType::Int32), - ColumnDesc::unnamed(column_ids[2], DataType::Int32), - ]; - let pk_index = vec![0_usize, 1_usize]; - let read_prefix_len_hint = 0; - let table = gen_prost_table( - TEST_TABLE_ID, - column_descs, - order_types, - pk_index, - read_prefix_len_hint, - ); - - test_env.register_table(table.clone()).await; - let mut state_table = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) - .await; - - let mut epoch = EpochPair::new_test_epoch(1); - state_table.init_epoch(epoch); - - state_table.insert(OwnedRow::new(vec![ - Some(1_i32.into()), - Some(11_i32.into()), - Some(111_i32.into()), - ])); - state_table.insert(OwnedRow::new(vec![ - Some(2_i32.into()), - Some(22_i32.into()), - Some(222_i32.into()), - ])); - state_table.delete(OwnedRow::new(vec![ - Some(2_i32.into()), - Some(22_i32.into()), - Some(222_i32.into()), - ])); - - state_table.insert(OwnedRow::new(vec![ - Some(3_i32.into()), - Some(33_i32.into()), - Some(3333_i32.into()), - ])); - - state_table.insert(OwnedRow::new(vec![ - Some(6_i32.into()), - Some(66_i32.into()), - Some(666_i32.into()), - ])); - - state_table.insert(OwnedRow::new(vec![ - Some(9_i32.into()), - Some(99_i32.into()), - Some(999_i32.into()), - ])); - - { - let iter = state_table.iter().await.unwrap(); - pin_mut!(iter); - - let res = iter.next().await.unwrap().unwrap(); - assert_eq!( - &OwnedRow::new(vec![ - Some(1_i32.into()), - Some(11_i32.into()), - Some(111_i32.into()) - ]), - res.as_ref() - ); - - // will not get [2, 22, 222] - let res = iter.next().await.unwrap().unwrap(); - assert_eq!( - &OwnedRow::new(vec![ - Some(3_i32.into()), - Some(33_i32.into()), - Some(3333_i32.into()) - ]), - res.as_ref() - ); - - let res = iter.next().await.unwrap().unwrap(); - assert_eq!( - &OwnedRow::new(vec![ - Some(6_i32.into()), - Some(66_i32.into()), - Some(666_i32.into()) - ]), - res.as_ref() - ); - } - epoch.inc(); - state_table.commit(epoch).await.unwrap(); - - // write [3, 33, 333], [4, 44, 444], [5, 55, 555], [7, 77, 777], [8, 88, 888]into mem_table, - // [1, 11, 111], [3, 33, 3333], [6, 66, 666], [9, 99, 999] exists in - // shared_storage - - state_table.delete(OwnedRow::new(vec![ - Some(1_i32.into()), - Some(11_i32.into()), - Some(111_i32.into()), - ])); - state_table.insert(OwnedRow::new(vec![ - Some(3_i32.into()), - Some(33_i32.into()), - Some(333_i32.into()), - ])); - - state_table.insert(OwnedRow::new(vec![ - Some(4_i32.into()), - Some(44_i32.into()), - Some(444_i32.into()), - ])); - - state_table.insert(OwnedRow::new(vec![ - Some(5_i32.into()), - Some(55_i32.into()), - Some(555_i32.into()), - ])); - state_table.insert(OwnedRow::new(vec![ - Some(7_i32.into()), - Some(77_i32.into()), - Some(777_i32.into()), - ])); - - state_table.insert(OwnedRow::new(vec![ - Some(8_i32.into()), - Some(88_i32.into()), - Some(888_i32.into()), - ])); - let iter = state_table.iter().await.unwrap(); - pin_mut!(iter); - - let res = iter.next().await.unwrap().unwrap(); - // this pk exist in both shared_storage and mem_table - assert_eq!( - &OwnedRow::new(vec![ - Some(3_i32.into()), - Some(33_i32.into()), - Some(333_i32.into()) - ]), - res.as_ref() - ); - // this row exists in mem_table - let res = iter.next().await.unwrap().unwrap(); - assert_eq!( - &OwnedRow::new(vec![ - Some(4_i32.into()), - Some(44_i32.into()), - Some(444_i32.into()) - ]), - res.as_ref() - ); - let res = iter.next().await.unwrap().unwrap(); - - // this row exists in mem_table - assert_eq!( - &OwnedRow::new(vec![ - Some(5_i32.into()), - Some(55_i32.into()), - Some(555_i32.into()) - ]), - res.as_ref() - ); - let res = iter.next().await.unwrap().unwrap(); - - // this row exists in shared_storage - assert_eq!( - &OwnedRow::new(vec![ - Some(6_i32.into()), - Some(66_i32.into()), - Some(666_i32.into()) - ]), - res.as_ref() - ); - - let res = iter.next().await.unwrap().unwrap(); - // this row exists in mem_table - assert_eq!( - &OwnedRow::new(vec![ - Some(7_i32.into()), - Some(77_i32.into()), - Some(777.into()) - ]), - res.as_ref() - ); - - let res = iter.next().await.unwrap().unwrap(); - - // this row exists in mem_table - assert_eq!( - &OwnedRow::new(vec![ - Some(8_i32.into()), - Some(88_i32.into()), - Some(888_i32.into()) - ]), - res.as_ref() - ); - - let res = iter.next().await.unwrap().unwrap(); - - // this row exists in shared_storage - assert_eq!( - &OwnedRow::new(vec![ - Some(9_i32.into()), - Some(99_i32.into()), - Some(999_i32.into()) - ]), - res.as_ref() - ); - - // there is no row in both shared_storage and mem_table - let res = iter.next().await; - assert!(res.is_none()); -} - #[tokio::test] async fn test_state_table_iter_with_prefix() { const TEST_TABLE_ID: TableId = TableId { table_id: 233 }; @@ -587,7 +230,7 @@ async fn test_state_table_iter_with_prefix() { test_env.register_table(table.clone()).await; let mut state_table = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) + StateTable::from_table_catalog_inconsistent_op(&table, test_env.storage.clone(), None) .await; let mut epoch = EpochPair::new_test_epoch(1); @@ -712,7 +355,7 @@ async fn test_state_table_iter_with_pk_range() { test_env.register_table(table.clone()).await; let mut state_table = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) + StateTable::from_table_catalog_inconsistent_op(&table, test_env.storage.clone(), None) .await; let mut epoch = EpochPair::new_test_epoch(1); @@ -846,8 +489,7 @@ async fn test_mem_table_assertion() { test_env.register_table(table.clone()).await; let mut state_table = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) - .await; + StateTable::from_table_catalog(&table, test_env.storage.clone(), None).await; let epoch = EpochPair::new_test_epoch(1); state_table.init_epoch(epoch); @@ -889,7 +531,7 @@ async fn test_state_table_iter_with_value_indices() { test_env.register_table(table.clone()).await; let mut state_table = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) + StateTable::from_table_catalog_inconsistent_op(&table, test_env.storage.clone(), None) .await; let mut epoch = EpochPair::new_test_epoch(1); @@ -1050,7 +692,7 @@ async fn test_state_table_iter_with_shuffle_value_indices() { test_env.register_table(table.clone()).await; let mut state_table = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) + StateTable::from_table_catalog_inconsistent_op(&table, test_env.storage.clone(), None) .await; let mut epoch = EpochPair::new_test_epoch(1); @@ -1285,7 +927,7 @@ async fn test_state_table_write_chunk() { test_env.register_table(table.clone()).await; let mut state_table = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) + StateTable::from_table_catalog_inconsistent_op(&table, test_env.storage.clone(), None) .await; let epoch = EpochPair::new_test_epoch(1); @@ -1414,7 +1056,7 @@ async fn test_state_table_write_chunk_visibility() { test_env.register_table(table.clone()).await; let mut state_table = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) + StateTable::from_table_catalog_inconsistent_op(&table, test_env.storage.clone(), None) .await; let epoch = EpochPair::new_test_epoch(1); @@ -1541,7 +1183,7 @@ async fn test_state_table_write_chunk_value_indices() { test_env.register_table(table.clone()).await; let mut state_table = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) + StateTable::from_table_catalog_inconsistent_op(&table, test_env.storage.clone(), None) .await; let epoch = EpochPair::new_test_epoch(1); @@ -1606,3 +1248,203 @@ async fn test_state_table_write_chunk_value_indices() { &OwnedRow::new(vec![Some(false.into()), Some(4888i64.into()),]) ); } + +async fn check_may_exist( + state_table: &StateTable, + existent_prefix: Vec, + non_existent_prefix: Vec, +) where + S: StateStore, +{ + for prefix in existent_prefix { + let pk_prefix = OwnedRow::new(vec![Some(prefix.into())]); + assert!(state_table.may_exist(&pk_prefix).await.unwrap()); + } + for prefix in non_existent_prefix { + let pk_prefix = OwnedRow::new(vec![Some(prefix.into())]); + assert!(!state_table.may_exist(&pk_prefix).await.unwrap()); + } +} + +#[tokio::test] +async fn test_state_table_may_exist() { + const TEST_TABLE_ID: TableId = TableId { table_id: 233 }; + let test_env = prepare_hummock_test_env().await; + + // let pk_columns = vec![0, 1]; leave a message to indicate pk columns + let order_types = vec![OrderType::Ascending, OrderType::Descending]; + + let column_ids = vec![ColumnId::from(0), ColumnId::from(1), ColumnId::from(2)]; + let column_descs = vec![ + ColumnDesc::unnamed(column_ids[0], DataType::Int32), + ColumnDesc::unnamed(column_ids[1], DataType::Int32), + ColumnDesc::unnamed(column_ids[2], DataType::Int32), + ]; + let pk_index = vec![0_usize, 1_usize]; + let read_prefix_len_hint = 1; + let table = gen_prost_table( + TEST_TABLE_ID, + column_descs, + order_types, + pk_index, + read_prefix_len_hint, + ); + + test_env.register_table(table.clone()).await; + let mut state_table = + StateTable::from_table_catalog_inconsistent_op(&table, test_env.storage.clone(), None) + .await; + + let mut epoch = EpochPair::new_test_epoch(1); + state_table.init_epoch(epoch); + + state_table.insert(OwnedRow::new(vec![ + Some(1_i32.into()), + Some(11_i32.into()), + Some(111_i32.into()), + ])); + state_table.insert(OwnedRow::new(vec![ + Some(1_i32.into()), + Some(22_i32.into()), + Some(222_i32.into()), + ])); + + state_table.insert(OwnedRow::new(vec![ + Some(4_i32.into()), + Some(44_i32.into()), + Some(444_i32.into()), + ])); + + state_table.insert(OwnedRow::new(vec![ + Some(1_i32.into()), + Some(55_i32.into()), + Some(555_i32.into()), + ])); + + // test may_exist with data only in memtable (e1) + check_may_exist(&state_table, vec![1, 4], vec![2, 3, 6, 12]).await; + + epoch.inc(); + state_table.commit(epoch).await.unwrap(); + let e1 = epoch.prev; + + // test may_exist with data only in immutable memtable (e1) + check_may_exist(&state_table, vec![1, 4], vec![2, 3, 6, 12]).await; + + let e1_res = test_env.storage.seal_and_sync_epoch(e1).await.unwrap(); + + // test may_exist with data only in uncommitted ssts (e1) + check_may_exist(&state_table, vec![1, 4], vec![2, 3, 6, 12]).await; + + test_env + .meta_client + .commit_epoch(e1, e1_res.uncommitted_ssts) + .await + .unwrap(); + test_env.storage.try_wait_epoch_for_test(e1).await; + + // test may_exist with data only in committed ssts (e1) + check_may_exist(&state_table, vec![1, 4], vec![2, 3, 6, 12]).await; + + state_table.insert(OwnedRow::new(vec![ + Some(1_i32.into()), + Some(33_i32.into()), + Some(333_i32.into()), + ])); + state_table.insert(OwnedRow::new(vec![ + Some(1_i32.into()), + Some(55_i32.into()), + Some(5555_i32.into()), + ])); + state_table.insert(OwnedRow::new(vec![ + Some(6_i32.into()), + Some(66_i32.into()), + Some(666_i32.into()), + ])); + + // test may_exist with data in memtable (e2), committed ssts (e1) + check_may_exist(&state_table, vec![1, 4, 6], vec![2, 3, 12]).await; + + epoch.inc(); + state_table.commit(epoch).await.unwrap(); + let e2 = epoch.prev; + + // test may_exist with data in immutable memtable (e2), committed ssts (e1) + check_may_exist(&state_table, vec![1, 4, 6], vec![2, 3, 12]).await; + + state_table.insert(OwnedRow::new(vec![ + Some(1_i32.into()), + Some(44_i32.into()), + Some(444_i32.into()), + ])); + state_table.insert(OwnedRow::new(vec![ + Some(3_i32.into()), + Some(1_i32.into()), + Some(111_i32.into()), + ])); + + // test may_exist with data in memtable (e3), immutable memtable (e2), committed ssts (e1) + check_may_exist(&state_table, vec![1, 3, 4, 6], vec![2, 12]).await; + + let e2_res = test_env.storage.seal_and_sync_epoch(e2).await.unwrap(); + + // test may_exist with data in memtable (e3), uncommitted ssts (e2), committed ssts (e1) + check_may_exist(&state_table, vec![1, 3, 4, 6], vec![2, 12]).await; + + epoch.inc(); + state_table.commit(epoch).await.unwrap(); + let e3 = epoch.prev; + + // test may_exist with data in immutable memtable (e3), uncommitted ssts (e2), committed + // ssts (e1) + check_may_exist(&state_table, vec![1, 3, 4, 6], vec![2, 12]).await; + + state_table.insert(OwnedRow::new(vec![ + Some(1_i32.into()), + Some(55_i32.into()), + Some(555_i32.into()), + ])); + state_table.insert(OwnedRow::new(vec![ + Some(2_i32.into()), + Some(1_i32.into()), + Some(111_i32.into()), + ])); + + // test may_exist with data in memtable (e4), immutable memtable (e3), uncommitted ssts + // (e2), committed ssts (e1) + check_may_exist(&state_table, vec![1, 3, 4, 6], vec![12]).await; + + test_env + .meta_client + .commit_epoch(e2, e2_res.uncommitted_ssts) + .await + .unwrap(); + test_env.storage.try_wait_epoch_for_test(e2).await; + + epoch.inc(); + state_table.commit(epoch).await.unwrap(); + let e4 = epoch.prev; + + let e3_res = test_env.storage.seal_and_sync_epoch(e3).await.unwrap(); + let e4_res = test_env.storage.seal_and_sync_epoch(e4).await.unwrap(); + + // test may_exist with data in uncommitted ssts (e3, e4), committed ssts (e1, e2, e3, e4) + check_may_exist(&state_table, vec![1, 3, 4, 6], vec![12]).await; + + test_env + .meta_client + .commit_epoch(e3, e3_res.uncommitted_ssts) + .await + .unwrap(); + test_env.storage.try_wait_epoch_for_test(e3).await; + + test_env + .meta_client + .commit_epoch(e4, e4_res.uncommitted_ssts) + .await + .unwrap(); + test_env.storage.try_wait_epoch_for_test(e4).await; + + // test may_exist with data in committed ssts (e1, e2, e3, e4) + check_may_exist(&state_table, vec![1, 3, 4, 6], vec![12]).await; +} diff --git a/src/stream/src/common/table/test_storage_table.rs b/src/stream/src/common/table/test_storage_table.rs index bbae6dd64850f..36cbac220842c 100644 --- a/src/stream/src/common/table/test_storage_table.rs +++ b/src/stream/src/common/table/test_storage_table.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,110 +27,9 @@ use risingwave_storage::table::{Distribution, TableIter}; use crate::common::table::state_table::StateTable; use crate::common::table::test_utils::{gen_prost_table, gen_prost_table_with_value_indices}; -/// There are three struct in relational layer, StateTable, MemTable and CellBasedTable. +/// There are three struct in relational layer, StateTable, MemTable and StorageTable. /// `StateTable` provides read/write interfaces to the upper layer streaming operator. /// `MemTable` is an in-memory buffer used to cache operator operations. - -// test storage table -#[tokio::test] -async fn test_storage_table_get_row() { - const TEST_TABLE_ID: TableId = TableId { table_id: 233 }; - let test_env = prepare_hummock_test_env().await; - - let column_ids = vec![ColumnId::from(0), ColumnId::from(1), ColumnId::from(2)]; - let column_descs = vec![ - ColumnDesc::unnamed(column_ids[0], DataType::Int32), - ColumnDesc::unnamed(column_ids[1], DataType::Int32), - ColumnDesc::unnamed(column_ids[2], DataType::Int32), - ]; - let pk_indices = vec![0_usize, 1_usize]; - let order_types = vec![OrderType::Ascending, OrderType::Descending]; - let read_prefix_len_hint = 2; - let table = gen_prost_table( - TEST_TABLE_ID, - column_descs.clone(), - order_types.clone(), - pk_indices.clone(), - read_prefix_len_hint, - ); - - test_env.register_table(table.clone()).await; - let mut state = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) - .await; - - let table = StorageTable::for_test( - test_env.storage.clone(), - TEST_TABLE_ID, - column_descs, - order_types, - pk_indices, - vec![0, 1, 2], - ); - let mut epoch = EpochPair::new_test_epoch(1); - state.init_epoch(epoch); - - state.insert(OwnedRow::new(vec![Some(1_i32.into()), None, None])); - state.insert(OwnedRow::new(vec![ - Some(2_i32.into()), - None, - Some(222_i32.into()), - ])); - state.insert(OwnedRow::new(vec![Some(3_i32.into()), None, None])); - - state.delete(OwnedRow::new(vec![ - Some(2_i32.into()), - None, - Some(222_i32.into()), - ])); - - epoch.inc(); - state.commit(epoch).await.unwrap(); - test_env.commit_epoch(epoch.prev).await; - - let get_row1_res = table - .get_row( - &OwnedRow::new(vec![Some(1_i32.into()), None]), - HummockReadEpoch::Committed(epoch.prev), - ) - .await - .unwrap(); - assert_eq!( - get_row1_res, - Some(OwnedRow::new(vec![Some(1_i32.into()), None, None,])) - ); - - let get_row2_res = table - .get_row( - &OwnedRow::new(vec![Some(2_i32.into()), None]), - HummockReadEpoch::Committed(epoch.prev), - ) - .await - .unwrap(); - assert_eq!(get_row2_res, None); - - let get_row3_res = table - .get_row( - &OwnedRow::new(vec![Some(3_i32.into()), None]), - HummockReadEpoch::Committed(epoch.prev), - ) - .await - .unwrap(); - assert_eq!( - get_row3_res, - Some(OwnedRow::new(vec![Some(3_i32.into()), None, None])) - ); - - let get_no_exist_res = table - .get_row( - &OwnedRow::new(vec![Some(0_i32.into()), Some(00_i32.into())]), - HummockReadEpoch::Committed(epoch.prev), - ) - .await - .unwrap(); - assert_eq!(get_no_exist_res, None); -} - #[tokio::test] async fn test_storage_table_value_indices() { const TEST_TABLE_ID: TableId = TableId { table_id: 233 }; @@ -148,7 +47,7 @@ async fn test_storage_table_value_indices() { ColumnDesc::unnamed(column_ids[1], DataType::Int32), ColumnDesc::unnamed(column_ids[2], DataType::Int32), ColumnDesc::unnamed(column_ids[3], DataType::Int32), - ColumnDesc::unnamed(column_ids[4], DataType::Int32), + ColumnDesc::unnamed(column_ids[4], DataType::Varchar), ]; let pk_indices = vec![0_usize, 2_usize]; let order_types = vec![OrderType::Ascending, OrderType::Descending]; @@ -165,7 +64,7 @@ async fn test_storage_table_value_indices() { test_env.register_table(table.clone()).await; let mut state = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) + StateTable::from_table_catalog_inconsistent_op(&table, test_env.storage.clone(), None) .await; let table = StorageTable::for_test( @@ -184,21 +83,21 @@ async fn test_storage_table_value_indices() { None, Some(11_i32.into()), Some(111_i32.into()), - Some(1111_i32.into()), + Some("1111".to_string().into()), ])); state.insert(OwnedRow::new(vec![ Some(2_i32.into()), None, Some(22_i32.into()), Some(222_i32.into()), - Some(2222_i32.into()), + Some("2222".to_string().into()), ])); state.insert(OwnedRow::new(vec![ Some(3_i32.into()), None, Some(33_i32.into()), Some(333_i32.into()), - Some(3333_i32.into()), + Some("3333".to_string().into()), ])); state.delete(OwnedRow::new(vec![ @@ -206,7 +105,7 @@ async fn test_storage_table_value_indices() { None, Some(22_i32.into()), Some(222_i32.into()), - Some(2222_i32.into()), + Some("2222".to_string().into()), ])); epoch.inc(); @@ -227,7 +126,7 @@ async fn test_storage_table_value_indices() { None, Some(11_i32.into()), Some(111_i32.into()), - Some(1111_i32.into()) + Some("1111".to_string().into()) ])) ); @@ -254,7 +153,7 @@ async fn test_storage_table_value_indices() { None, Some(33_i32.into()), Some(333_i32.into()), - Some(3333_i32.into()) + Some("3333".to_string().into()) ])) ); @@ -268,96 +167,6 @@ async fn test_storage_table_value_indices() { assert_eq!(get_no_exist_res, None); } -#[tokio::test] -async fn test_storage_get_row_for_string() { - const TEST_TABLE_ID: TableId = TableId { table_id: 233 }; - let test_env = prepare_hummock_test_env().await; - - let order_types = vec![OrderType::Ascending, OrderType::Descending]; - let column_ids = vec![ColumnId::from(1), ColumnId::from(4), ColumnId::from(7)]; - let column_descs = vec![ - ColumnDesc::unnamed(column_ids[0], DataType::Varchar), - ColumnDesc::unnamed(column_ids[1], DataType::Varchar), - ColumnDesc::unnamed(column_ids[2], DataType::Varchar), - ]; - let pk_indices = vec![0_usize, 1_usize]; - let read_prefix_len_hint = 2; - let table = gen_prost_table( - TEST_TABLE_ID, - column_descs.clone(), - order_types.clone(), - pk_indices.clone(), - read_prefix_len_hint, - ); - - test_env.register_table(table.clone()).await; - let mut state = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) - .await; - - let table = StorageTable::for_test( - test_env.storage.clone(), - TEST_TABLE_ID, - column_descs.clone(), - order_types.clone(), - pk_indices, - vec![0, 1, 2], - ); - let mut epoch = EpochPair::new_test_epoch(1); - state.init_epoch(epoch); - - state.insert(OwnedRow::new(vec![ - Some("1".to_string().into()), - Some("11".to_string().into()), - Some("111".to_string().into()), - ])); - state.insert(OwnedRow::new(vec![ - Some("4".to_string().into()), - Some("44".to_string().into()), - Some("444".to_string().into()), - ])); - state.delete(OwnedRow::new(vec![ - Some("4".to_string().into()), - Some("44".to_string().into()), - Some("444".to_string().into()), - ])); - - epoch.inc(); - state.commit(epoch).await.unwrap(); - test_env.commit_epoch(epoch.prev).await; - - let get_row1_res = table - .get_row( - &OwnedRow::new(vec![ - Some("1".to_string().into()), - Some("11".to_string().into()), - ]), - HummockReadEpoch::Committed(epoch.prev), - ) - .await - .unwrap(); - assert_eq!( - get_row1_res, - Some(OwnedRow::new(vec![ - Some("1".to_string().into()), - Some("11".to_string().into()), - Some("111".to_string().into()), - ])) - ); - - let get_row2_res = table - .get_row( - &OwnedRow::new(vec![ - Some("4".to_string().into()), - Some("44".to_string().into()), - ]), - HummockReadEpoch::Committed(epoch.prev), - ) - .await - .unwrap(); - assert_eq!(get_row2_res, None); -} - #[tokio::test] async fn test_shuffled_column_id_for_storage_table_get_row() { const TEST_TABLE_ID: TableId = TableId { table_id: 233 }; @@ -383,7 +192,7 @@ async fn test_shuffled_column_id_for_storage_table_get_row() { test_env.register_table(table.clone()).await; let mut state = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) + StateTable::from_table_catalog_inconsistent_op(&table, test_env.storage.clone(), None) .await; let mut epoch = EpochPair::new_test_epoch(1); @@ -486,7 +295,7 @@ async fn test_row_based_storage_table_point_get_in_batch_mode() { test_env.register_table(table.clone()).await; let mut state = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) + StateTable::from_table_catalog_inconsistent_op(&table, test_env.storage.clone(), None) .await; let column_ids_partial = vec![ColumnId::from(1), ColumnId::from(2)]; @@ -561,89 +370,6 @@ async fn test_row_based_storage_table_point_get_in_batch_mode() { assert_eq!(get_no_exist_res, None); } -#[tokio::test] -async fn test_row_based_storage_table_scan_in_batch_mode() { - const TEST_TABLE_ID: TableId = TableId { table_id: 233 }; - let test_env = prepare_hummock_test_env().await; - - let order_types = vec![OrderType::Ascending, OrderType::Descending]; - let column_ids = vec![ColumnId::from(0), ColumnId::from(1), ColumnId::from(2)]; - let column_descs = vec![ - ColumnDesc::unnamed(column_ids[0], DataType::Int32), - ColumnDesc::unnamed(column_ids[1], DataType::Int32), - ColumnDesc::unnamed(column_ids[2], DataType::Int32), - ]; - let pk_indices = vec![0_usize, 1_usize]; - let value_indices: Vec = vec![0, 1, 2]; - let read_prefix_len_hint = 0; - let table = gen_prost_table_with_value_indices( - TEST_TABLE_ID, - column_descs.clone(), - order_types.clone(), - pk_indices.clone(), - read_prefix_len_hint, - value_indices.iter().map(|v| *v as i32).collect_vec(), - ); - - test_env.register_table(table.clone()).await; - let mut state = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) - .await; - - let column_ids_partial = vec![ColumnId::from(1), ColumnId::from(2)]; - let table = StorageTable::new_partial( - test_env.storage.clone(), - TEST_TABLE_ID, - column_descs.clone(), - column_ids_partial, - order_types.clone(), - pk_indices, - Distribution::fallback(), - TableOption::default(), - value_indices, - 0, - ); - let mut epoch = EpochPair::new_test_epoch(1); - state.init_epoch(epoch); - - state.insert(OwnedRow::new(vec![ - Some(1_i32.into()), - Some(11_i32.into()), - Some(111_i32.into()), - ])); - state.insert(OwnedRow::new(vec![ - Some(2_i32.into()), - Some(22_i32.into()), - Some(222_i32.into()), - ])); - state.delete(OwnedRow::new(vec![ - Some(2_i32.into()), - Some(22_i32.into()), - Some(222_i32.into()), - ])); - - epoch.inc(); - state.commit(epoch).await.unwrap(); - test_env.commit_epoch(epoch.prev).await; - - let iter = table - .batch_iter(HummockReadEpoch::Committed(epoch.prev), false) - .await - .unwrap(); - pin_mut!(iter); - - let res = iter.next_row().await.unwrap(); - - // only scan two columns - assert_eq!( - OwnedRow::new(vec![Some(11_i32.into()), Some(111_i32.into())]), - res.unwrap() - ); - - let res = iter.next_row().await.unwrap(); - assert!(res.is_none()); -} - #[tokio::test] async fn test_batch_scan_with_value_indices() { const TEST_TABLE_ID: TableId = TableId { table_id: 233 }; @@ -676,7 +402,7 @@ async fn test_batch_scan_with_value_indices() { test_env.register_table(table.clone()).await; let mut state = - StateTable::from_table_catalog_no_sanity_check(&table, test_env.storage.clone(), None) + StateTable::from_table_catalog_inconsistent_op(&table, test_env.storage.clone(), None) .await; let column_ids_partial = vec![ColumnId::from(1), ColumnId::from(2)]; diff --git a/src/stream/src/common/table/test_utils.rs b/src/stream/src/common/table/test_utils.rs index 1f3fb82f29ee4..18ddf487646ff 100644 --- a/src/stream/src/common/table/test_utils.rs +++ b/src/stream/src/common/table/test_utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ use itertools::Itertools; use risingwave_common::catalog::{ColumnDesc, TableId}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::sort_util::OrderType; use risingwave_pb::catalog::Table as ProstTable; use risingwave_pb::plan_common::{ColumnCatalog, ColumnOrder}; @@ -46,7 +47,7 @@ pub(crate) fn gen_prost_table_with_value_indices( ) -> ProstTable { let prost_pk = pk_index .iter() - .zip_eq(order_types.iter()) + .zip_eq_fast(order_types.iter()) .map(|(idx, order)| ColumnOrder { index: *idx as u32, order_type: order.to_prost() as i32, diff --git a/src/stream/src/common/table/watermark.rs b/src/stream/src/common/table/watermark.rs new file mode 100644 index 0000000000000..e7314dc21e55c --- /dev/null +++ b/src/stream/src/common/table/watermark.rs @@ -0,0 +1,60 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Strategy to decide how to buffer the watermarks, used for state cleaning. +pub trait WatermarkBufferStrategy: Default { + /// Trigger when a epoch is committed. + fn tick(&mut self); + + /// Whether to clear the buffer. + /// + /// Returns true to indicate that the buffer should be cleared and the strategy states reset. + fn apply(&mut self) -> bool; +} + +/// No buffer, apply watermark to memory immediately. +/// Use the strategy when you want to apply the watermark immediately. +#[derive(Default, Debug)] +pub struct WatermarkNoBuffer; + +impl WatermarkBufferStrategy for WatermarkNoBuffer { + fn tick(&mut self) {} + + fn apply(&mut self) -> bool { + true + } +} + +/// Buffer the watermark by a epoch period. +/// The strategy reduced the delete-range calls to storage. +#[derive(Default, Debug)] +pub struct WatermarkBufferByEpoch { + /// number of epochs since the last time we did state cleaning by watermark. + buffered_epochs_cnt: usize, +} + +impl WatermarkBufferStrategy for WatermarkBufferByEpoch { + fn tick(&mut self) { + self.buffered_epochs_cnt += 1; + } + + fn apply(&mut self) -> bool { + if self.buffered_epochs_cnt >= PERIOD { + self.buffered_epochs_cnt = 0; + true + } else { + false + } + } +} diff --git a/src/stream/src/error.rs b/src/stream/src/error.rs index 7e35ebbe09435..0c8809225a03a 100644 --- a/src/stream/src/error.rs +++ b/src/stream/src/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/actor.rs b/src/stream/src/executor/actor.rs index 7fd8a80566742..48b8b188d8b61 100644 --- a/src/stream/src/executor/actor.rs +++ b/src/stream/src/executor/actor.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,11 +13,13 @@ // limitations under the License. use std::collections::HashMap; +use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use async_stack_trace::{SpanValue, StackTrace}; use futures::future::join_all; use futures::pin_mut; +use hytra::TrAdder; use minitrace::prelude::*; use parking_lot::Mutex; use risingwave_common::util::epoch::EpochPair; @@ -36,6 +38,10 @@ pub struct ActorContext { // TODO: report errors and prompt the user. pub errors: Mutex>>, + + last_mem_val: Arc, + cur_mem_val: Arc, + total_mem_val: Arc>, } pub type ActorContextRef = Arc; @@ -45,6 +51,19 @@ impl ActorContext { Arc::new(Self { id, errors: Default::default(), + cur_mem_val: Arc::new(0.into()), + last_mem_val: Arc::new(0.into()), + total_mem_val: Arc::new(TrAdder::new()), + }) + } + + pub fn create_with_counter(id: ActorId, total_mem_val: Arc>) -> ActorContextRef { + Arc::new(Self { + id, + errors: Default::default(), + cur_mem_val: Arc::new(0.into()), + last_mem_val: Arc::new(0.into()), + total_mem_val, }) } @@ -56,6 +75,23 @@ impl ActorContext { .or_default() .push(err); } + + pub fn store_mem_usage(&self, val: usize) { + // Record the last mem val. + // Calculate the difference between old val and new value, and apply the diff to total + // memory usage value. + let old_value = self.cur_mem_val.load(Ordering::Relaxed); + self.last_mem_val.store(old_value, Ordering::Relaxed); + let diff = val as i64 - old_value as i64; + + self.total_mem_val.inc(diff); + + self.cur_mem_val.store(val, Ordering::Relaxed); + } + + pub fn mem_usage(&self) -> usize { + self.cur_mem_val.load(Ordering::Relaxed) + } } /// `Actor` is the basic execution unit in the streaming framework. diff --git a/src/stream/src/executor/agg_common.rs b/src/stream/src/executor/agg_common.rs new file mode 100644 index 0000000000000..2e32c297113c2 --- /dev/null +++ b/src/stream/src/executor/agg_common.rs @@ -0,0 +1,57 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::sync::Arc; + +use risingwave_storage::StateStore; + +use super::aggregation::{AggCall, AggStateStorage}; +use super::Executor; +use crate::common::table::state_table::StateTable; +use crate::executor::monitor::StreamingMetrics; +use crate::executor::{ActorContextRef, PkIndices}; +use crate::task::AtomicU64Ref; + +/// Arguments needed to construct an `XxxAggExecutor`. +pub struct AggExecutorArgs { + // basic + pub input: Box, + pub actor_ctx: ActorContextRef, + pub pk_indices: PkIndices, + pub executor_id: u64, + + // system configs + pub extreme_cache_size: usize, + + // agg common things + pub agg_calls: Vec, + pub storages: Vec>, + pub result_table: StateTable, + pub distinct_dedup_tables: HashMap>, + + // extra + pub extra: Option, +} + +/// Extra arguments needed to construct an `XxxAggExecutor`. +pub struct AggExecutorArgsExtra { + // hash agg specific things + pub group_key_indices: Vec, + + // things only used by hash agg currently + pub metrics: Arc, + pub chunk_size: usize, + pub watermark_epoch: AtomicU64Ref, +} diff --git a/src/stream/src/executor/aggregation/agg_call.rs b/src/stream/src/executor/aggregation/agg_call.rs index 4c5eb14a7b827..3221ea90cc8b1 100644 --- a/src/stream/src/executor/aggregation/agg_call.rs +++ b/src/stream/src/executor/aggregation/agg_call.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -71,4 +71,7 @@ pub struct AggCall { /// Filter of aggregation. pub filter: Option, + + /// Should deduplicate the input before aggregation. + pub distinct: bool, } diff --git a/src/stream/src/executor/aggregation/agg_group.rs b/src/stream/src/executor/aggregation/agg_group.rs index 4e5f5774278e1..65896dcffb490 100644 --- a/src/stream/src/executor/aggregation/agg_group.rs +++ b/src/stream/src/executor/aggregation/agg_group.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use std::fmt::Debug; use itertools::Itertools; @@ -21,10 +22,11 @@ use risingwave_common::buffer::Bitmap; use risingwave_common::catalog::Schema; use risingwave_common::must_match; use risingwave_common::row::{OwnedRow, Row, RowExt}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_storage::StateStore; use super::agg_state::{AggState, AggStateStorage}; -use super::AggCall; +use super::{AggCall, DistinctDeduplicater}; use crate::common::table::state_table::StateTable; use crate::executor::error::StreamExecutorResult; use crate::executor::PkIndices; @@ -37,6 +39,9 @@ pub struct AggGroup { /// Current managed states for all [`AggCall`]s. states: Vec>, + /// Distinct deduplicater to deduplicate input rows for each distinct agg call. + distinct_dedup: DistinctDeduplicater, + /// Previous outputs of managed states. Initializing with `None`. prev_outputs: Option, } @@ -97,6 +102,7 @@ impl AggGroup { Ok(Self { group_key, states, + distinct_dedup: DistinctDeduplicater::new(agg_calls), prev_outputs, }) } @@ -115,18 +121,36 @@ impl AggGroup { } } + pub(crate) fn is_uninitialized(&self) -> bool { + self.prev_outputs.is_none() + } + /// Apply input chunk to all managed agg states. /// `visibilities` contains the row visibility of the input chunk for each agg call. - pub fn apply_chunk( + pub async fn apply_chunk( &mut self, storages: &mut [AggStateStorage], ops: &[Op], columns: &[Column], visibilities: Vec>, + distinct_dedup_tables: &mut HashMap>, ) -> StreamExecutorResult<()> { + let visibilities = self + .distinct_dedup + .dedup_chunk( + ops, + columns, + visibilities, + distinct_dedup_tables, + self.group_key.as_ref(), + ) + .await?; let columns = columns.iter().map(|col| col.array_ref()).collect_vec(); - for ((state, storage), visibility) in - self.states.iter_mut().zip_eq(storages).zip_eq(visibilities) + for ((state, storage), visibility) in self + .states + .iter_mut() + .zip_eq_fast(storages) + .zip_eq_fast(visibilities) { state.apply_chunk(ops, visibility.as_ref(), &columns, storage)?; } @@ -139,8 +163,9 @@ impl AggGroup { pub async fn flush_state_if_needed( &self, storages: &mut [AggStateStorage], + distinct_dedup_tables: &mut HashMap>, ) -> StreamExecutorResult<()> { - futures::future::try_join_all(self.states.iter().zip_eq(storages).filter_map( + futures::future::try_join_all(self.states.iter().zip_eq_fast(storages).filter_map( |(state, storage)| match state { AggState::Table(state) => Some(state.flush_state_if_needed( must_match!(storage, AggStateStorage::Table { table } => table), @@ -150,6 +175,7 @@ impl AggGroup { }, )) .await?; + self.distinct_dedup.flush(distinct_dedup_tables)?; Ok(()) } @@ -181,7 +207,7 @@ impl AggGroup { futures::future::try_join_all( self.states .iter_mut() - .zip_eq(storages) + .zip_eq_fast(storages) .map(|(state, storage)| state.get_output(storage, self.group_key.as_ref())), ) .await @@ -215,10 +241,8 @@ impl AggGroup { self.group_key().is_some(), self.prev_outputs.is_some(), ) { - (0, 0, _, _) => { - // Previous state is empty, current state is also empty. - // FIXME: for `SimpleAgg`, should we still build some changes when `row_count` is 0 - // While other aggs may not be `0`? + (0, 0, true, _) => { + // We never output any rows for row_count = 0 when group_key is_some 0 } @@ -227,7 +251,7 @@ impl AggGroup { // Previous state is empty, current state is not empty, insert one `Insert` op. new_ops.push(Op::Insert); - for (builder, new_value) in builders.iter_mut().zip_eq(curr_outputs.iter()) { + for (builder, new_value) in builders.iter_mut().zip_eq_fast(curr_outputs.iter()) { trace!("append_datum (0 -> N): {:?}", new_value); builder.append_datum(new_value); } @@ -241,7 +265,7 @@ impl AggGroup { for (builder, old_value) in builders .iter_mut() - .zip_eq(self.prev_outputs.as_ref().unwrap().iter()) + .zip_eq_fast(self.prev_outputs.as_ref().unwrap().iter()) { trace!("append_datum (N -> 0): {:?}", old_value); builder.append_datum(old_value); diff --git a/src/stream/src/executor/aggregation/agg_impl/approx_count_distinct.rs b/src/stream/src/executor/aggregation/agg_impl/approx_count_distinct.rs index 6a02e992b440b..a860a87237d63 100644 --- a/src/stream/src/executor/aggregation/agg_impl/approx_count_distinct.rs +++ b/src/stream/src/executor/aggregation/agg_impl/approx_count_distinct.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/aggregation/agg_impl/approx_distinct_append.rs b/src/stream/src/executor/aggregation/agg_impl/approx_distinct_append.rs index c0d4612c45e52..63175ef98c088 100644 --- a/src/stream/src/executor/aggregation/agg_impl/approx_distinct_append.rs +++ b/src/stream/src/executor/aggregation/agg_impl/approx_distinct_append.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/aggregation/agg_impl/approx_distinct_utils.rs b/src/stream/src/executor/aggregation/agg_impl/approx_distinct_utils.rs index 6388ded302e0a..e8c4698ac4818 100644 --- a/src/stream/src/executor/aggregation/agg_impl/approx_distinct_utils.rs +++ b/src/stream/src/executor/aggregation/agg_impl/approx_distinct_utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,12 +16,12 @@ use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; use dyn_clone::DynClone; -use itertools::Itertools; use risingwave_common::array::stream_chunk::Ops; use risingwave_common::array::*; use risingwave_common::buffer::Bitmap; use risingwave_common::must_match; use risingwave_common::types::{Datum, DatumRef, Scalar, ScalarImpl}; +use risingwave_common::util::iter_util::ZipEqFast; use crate::executor::aggregation::agg_impl::StreamingAggImpl; use crate::executor::StreamExecutorResult; @@ -174,7 +174,7 @@ pub(super) trait StreamingApproxCountDistinct: Sized { ) -> StreamExecutorResult<()> { match visibility { None => { - for (op, datum) in ops.iter().zip_eq(data[0].iter()) { + for (op, datum) in ops.iter().zip_eq_fast(data[0].iter()) { match op { Op::Insert | Op::UpdateInsert => self.update_registers(datum, true)?, Op::Delete | Op::UpdateDelete => self.update_registers(datum, false)?, @@ -182,8 +182,10 @@ pub(super) trait StreamingApproxCountDistinct: Sized { } } Some(visibility) => { - for ((visible, op), datum) in - visibility.iter().zip_eq(ops.iter()).zip_eq(data[0].iter()) + for ((visible, op), datum) in visibility + .iter() + .zip_eq_fast(ops.iter()) + .zip_eq_fast(data[0].iter()) { if visible { match op { diff --git a/src/stream/src/executor/aggregation/agg_impl/foldable.rs b/src/stream/src/executor/aggregation/agg_impl/foldable.rs index b91245aa81da4..2f025eda1fde2 100644 --- a/src/stream/src/executor/aggregation/agg_impl/foldable.rs +++ b/src/stream/src/executor/aggregation/agg_impl/foldable.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,12 +16,12 @@ use std::marker::PhantomData; -use itertools::Itertools; use risingwave_common::array::stream_chunk::Ops; use risingwave_common::array::*; use risingwave_common::bail; use risingwave_common::buffer::Bitmap; use risingwave_common::types::{Datum, Scalar, ScalarRef}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_expr::ExprError; use super::{StreamingAggImpl, StreamingAggInput, StreamingAggOutput}; @@ -276,7 +276,7 @@ where ) -> StreamExecutorResult<()> { match visibility { None => { - for (op, data) in ops.iter().zip_eq(data.iter()) { + for (op, data) in ops.iter().zip_eq_fast(data.iter()) { match op { Op::Insert | Op::UpdateInsert => { self.result = S::accumulate(self.result.as_ref(), data)? @@ -288,8 +288,10 @@ where } } Some(visibility) => { - for ((visible, op), data) in - visibility.iter().zip_eq(ops.iter()).zip_eq(data.iter()) + for ((visible, op), data) in visibility + .iter() + .zip_eq_fast(ops.iter()) + .zip_eq_fast(data.iter()) { if visible { match op { diff --git a/src/stream/src/executor/aggregation/agg_impl/mod.rs b/src/stream/src/executor/aggregation/agg_impl/mod.rs index 95d958728d971..92108a82a8f47 100644 --- a/src/stream/src/executor/aggregation/agg_impl/mod.rs +++ b/src/stream/src/executor/aggregation/agg_impl/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/aggregation/agg_impl/row_count.rs b/src/stream/src/executor/aggregation/agg_impl/row_count.rs index 31e88c11f2220..5a0007cd2cf5b 100644 --- a/src/stream/src/executor/aggregation/agg_impl/row_count.rs +++ b/src/stream/src/executor/aggregation/agg_impl/row_count.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,11 +14,11 @@ //! This module implements `StreamingRowCountAgg`. -use itertools::Itertools; use risingwave_common::array::stream_chunk::Ops; use risingwave_common::array::*; use risingwave_common::buffer::Bitmap; use risingwave_common::types::{DataType, Datum, ScalarImpl}; +use risingwave_common::util::iter_util::ZipEqFast; use super::StreamingAggImpl; use crate::executor::error::StreamExecutorResult; @@ -79,7 +79,7 @@ impl StreamingAggImpl for StreamingRowCountAgg { } } Some(visibility) => { - for (op, visible) in ops.iter().zip_eq(visibility.iter()) { + for (op, visible) in ops.iter().zip_eq_fast(visibility.iter()) { if visible { match op { Op::Insert | Op::UpdateInsert => self.row_cnt += 1, diff --git a/src/stream/src/executor/aggregation/agg_state.rs b/src/stream/src/executor/aggregation/agg_state.rs index 873122a4a9f36..800609c8cc70f 100644 --- a/src/stream/src/executor/aggregation/agg_state.rs +++ b/src/stream/src/executor/aggregation/agg_state.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/aggregation/distinct.rs b/src/stream/src/executor/aggregation/distinct.rs new file mode 100644 index 0000000000000..e0019e9aa0d0e --- /dev/null +++ b/src/stream/src/executor/aggregation/distinct.rs @@ -0,0 +1,627 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::marker::PhantomData; + +use itertools::Itertools; +use risingwave_common::array::column::Column; +use risingwave_common::array::{ArrayImpl, Op, Vis, VisRef}; +use risingwave_common::buffer::{Bitmap, BitmapBuilder}; +use risingwave_common::row::{self, OwnedRow, Row, RowExt}; +use risingwave_common::types::ScalarImpl; +use risingwave_storage::StateStore; + +use super::AggCall; +use crate::common::table::state_table::StateTable; +use crate::executor::StreamExecutorResult; + +/// Deduplicater for one distinct column. +struct Deduplicater { + agg_call_indices: Vec, + _phantom: PhantomData, +} + +impl Deduplicater { + fn new(indices_and_calls: Vec<(usize, &AggCall)>) -> Self { + let agg_call_indices = indices_and_calls + .into_iter() + .map(|(call_idx, _)| call_idx) + .collect(); + Self { + agg_call_indices, + _phantom: PhantomData, + } + } + + /// Get the indices of agg calls that distinct on this column. + /// The index is the position of the agg call in the original agg call list. + fn agg_call_indices(&self) -> &[usize] { + &self.agg_call_indices + } + + /// Update the `visibilities` of distinct agg calls that distinct on the `column`, + /// according to the counts of distinct keys for each call. + /// + /// * `ops`: Ops for each datum in `column`. + /// * `column`: The column to distinct on. + /// * `visibilities` - Visibilities for agg calls that distinct on the this column. + /// * `dedup_table` - The deduplication table for this distinct column. + async fn dedup( + &mut self, + ops: &[Op], + column: &ArrayImpl, + mut visibilities: Vec<&mut Vis>, + dedup_table: &mut StateTable, + group_key: Option<&OwnedRow>, + ) -> StreamExecutorResult<()> { + assert_eq!(visibilities.len(), self.agg_call_indices.len()); + + // TODO(rc): move to field of `Deduplicater` + let mut cache = HashMap::new(); + let mut old_rows = HashMap::new(); + + // inverted masks for visibilities, 1 means hidden, 0 means visible + let mut vis_masks_inv = (0..visibilities.len()) + .map(|_| BitmapBuilder::zeroed(column.len())) + .collect_vec(); + for (datum_idx, (op, datum)) in ops.iter().zip_eq(column.iter()).enumerate() { + // get counts of the distinct key of all agg calls that distinct on this column + let counts = if let Some(counts) = cache.get_mut(&datum) { + counts + } else { + let counts_row: Option = dedup_table + .get_row(group_key.chain(row::once(datum))) + .await?; + let counts = counts_row.map_or_else( + || vec![0; self.agg_call_indices.len()], + |r| { + old_rows.insert(datum, r.clone()); + r.iter() + .map(|d| if let Some(d) = d { d.into_int64() } else { 0 }) + .collect() + }, + ); + cache.insert(datum, counts); + cache.get_mut(&datum).unwrap() + }; + debug_assert_eq!(counts.len(), visibilities.len()); + + match op { + Op::Insert | Op::UpdateInsert => { + // iterate over vis of each distinct agg call, count up for visible datum + for (i, vis) in visibilities.iter().enumerate() { + if vis.is_set(datum_idx) { + counts[i] += 1; + if counts[i] > 1 { + // duplicate, hide this one + vis_masks_inv[i].set(datum_idx, true); + } + } + } + } + Op::Delete | Op::UpdateDelete => { + // iterate over vis of each distinct agg call, count down for visible datum + for (i, vis) in visibilities.iter().enumerate() { + if vis.is_set(datum_idx) { + counts[i] -= 1; + debug_assert!(counts[i] >= 0); + if counts[i] > 0 { + // still exists at least one duplicate, hide this one + vis_masks_inv[i].set(datum_idx, true); + } + } + } + } + } + } + + cache.into_iter().for_each(|(key, counts)| { + let new_row = group_key.chain(row::once(key)).chain(OwnedRow::new( + counts.into_iter().map(ScalarImpl::from).map(Some).collect(), + )); + if let Some(old_row) = old_rows.remove(&key) { + dedup_table.update(group_key.chain(row::once(key)).chain(old_row), new_row) + } else { + dedup_table.insert(new_row) + } + }); + + for (vis, vis_mask_inv) in visibilities.iter_mut().zip_eq(vis_masks_inv.into_iter()) { + let mask = !vis_mask_inv.finish(); + if !mask.all() { + // update visibility if needed + **vis = vis.as_ref() & VisRef::from(&mask); + } + } + + Ok(()) + } + + /// Flush the deduplication table. + fn flush(&self, _dedup_table: &mut StateTable) { + // TODO(rc): now we flush the table in `dedup` method. + } +} + +/// # Safety +/// +/// There must not be duplicate items in `indices`. +unsafe fn get_many_mut_from_slice<'a, T>(slice: &'a mut [T], indices: &[usize]) -> Vec<&'a mut T> { + let mut res = Vec::with_capacity(indices.len()); + let ptr = slice.as_mut_ptr(); + for &idx in indices { + res.push(&mut *ptr.add(idx)); + } + res +} + +pub struct DistinctDeduplicater { + /// Key: distinct column index, value: deduplicater for the column. + deduplicaters: HashMap>, + + _phantom: PhantomData, +} + +impl DistinctDeduplicater { + pub fn new(agg_calls: &[AggCall]) -> Self { + let deduplicaters: HashMap<_, _> = agg_calls + .iter() + .enumerate() + .filter(|(_, call)| call.distinct) // only distinct agg calls need dedup table + .into_group_map_by(|(_, call)| call.args.val_indices()[0]) + .into_iter() + .map(|(k, v)| (k, Deduplicater::new(v))) + .collect(); + Self { + deduplicaters, + _phantom: PhantomData, + } + } + + /// Deduplicate the chunk for each agg call, by returning new visibilities + /// that hide duplicate rows. + pub async fn dedup_chunk( + &mut self, + ops: &[Op], + columns: &[Column], + visibilities: Vec>, + dedup_tables: &mut HashMap>, + group_key: Option<&OwnedRow>, + ) -> StreamExecutorResult>> { + // convert `Option` to `Vis` for convenience + let mut visibilities = visibilities + .into_iter() + .map(|v| match v { + Some(bitmap) => Vis::from(bitmap), + None => Vis::from(ops.len()), + }) + .collect_vec(); + for (distinct_col, deduplicater) in &mut self.deduplicaters { + let column = columns[*distinct_col].array_ref(); + let dedup_table = dedup_tables.get_mut(distinct_col).unwrap(); + // Select visibilities (as mutable references) of distinct agg calls that distinct on + // `distinct_col` so that `Deduplicater` doesn't need to care about index mapping. + // Safety: all items in `agg_call_indices` are unique by nature. + let visibilities = unsafe { + get_many_mut_from_slice(&mut visibilities, deduplicater.agg_call_indices()) + }; + deduplicater + .dedup(ops, column, visibilities, dedup_table, group_key) + .await?; + } + Ok(visibilities + .into_iter() + .map(|v| v.into_visibility()) + .collect()) + } + + /// Flush dedup state caches to dedup tables. + pub fn flush( + &self, + dedup_tables: &mut HashMap>, + ) -> StreamExecutorResult<()> { + for (distinct_col, deduplicater) in &self.deduplicaters { + let dedup_table = dedup_tables.get_mut(distinct_col).unwrap(); + deduplicater.flush(dedup_table); + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use risingwave_common::array::StreamChunk; + use risingwave_common::catalog::{ColumnDesc, ColumnId, TableId}; + use risingwave_common::test_prelude::StreamChunkTestExt; + use risingwave_common::types::DataType; + use risingwave_common::util::epoch::EpochPair; + use risingwave_common::util::sort_util::OrderType; + use risingwave_storage::memory::MemoryStateStore; + + use super::*; + use crate::executor::aggregation::{AggArgs, AggCall, AggKind}; + + fn count_agg_call(kind: AggKind, col_idx: usize, distinct: bool) -> AggCall { + AggCall { + kind, + args: AggArgs::Unary(DataType::Int64, col_idx), + return_type: DataType::Int64, + distinct, + + order_pairs: vec![], + append_only: false, + filter: None, + } + } + + async fn infer_dedup_tables( + agg_calls: &[AggCall], + group_key_types: &[DataType], + store: S, + ) -> HashMap> { + // corresponding to `Agg::infer_distinct_dedup_table` in frontend + let mut dedup_tables = HashMap::new(); + + for (distinct_col, indices_and_calls) in agg_calls + .iter() + .enumerate() + .filter(|(_, call)| call.distinct) // only distinct agg calls need dedup table + .into_group_map_by(|(_, call)| call.args.val_indices()[0]) + { + let mut columns = vec![]; + let mut order_types = vec![]; + + let mut next_column_id = 0; + let mut add_column_desc = |data_type: DataType| { + columns.push(ColumnDesc::unnamed( + ColumnId::new(next_column_id), + data_type, + )); + next_column_id += 1; + }; + + // group key columns + for data_type in group_key_types { + add_column_desc(data_type.clone()); + order_types.push(OrderType::Ascending); + } + + // distinct key column + add_column_desc(indices_and_calls[0].1.args.arg_types()[0].clone()); + order_types.push(OrderType::Ascending); + + // count columns + for (_, _) in indices_and_calls { + add_column_desc(DataType::Int64); + } + + let n_columns = columns.len(); + let table = StateTable::new_without_distribution_with_value_indices( + store.clone(), + TableId::new(2333), + columns, + order_types, + (0..(group_key_types.len() + 1)).collect(), + ((group_key_types.len() + 1)..n_columns).collect(), + ) + .await; + dedup_tables.insert(distinct_col, table); + } + + dedup_tables + } + + fn option_bitmap_to_vec_bool(bm: &Option, size: usize) -> Vec { + match bm { + Some(bm) => bm.iter().take(size).collect(), + None => vec![true; size], + } + } + + #[tokio::test] + async fn test_distinct_deduplicater() { + // Schema: + // a: int, b int, c int + // Agg calls: + // count(a), count(distinct a), sum(distinct a), count(distinct b) + // Group keys: + // empty + + let agg_calls = [ + // count(a) + count_agg_call(AggKind::Count, 0, false), + // count(distinct a) + count_agg_call(AggKind::Count, 0, true), + // sum(distinct a) + count_agg_call(AggKind::Sum, 0, true), + // count(distinct b) + count_agg_call(AggKind::Count, 1, true), + ]; + + let store = MemoryStateStore::new(); + let mut epoch = EpochPair::new_test_epoch(1); + let mut dedup_tables = infer_dedup_tables(&agg_calls, &[], store).await; + dedup_tables + .values_mut() + .for_each(|table| table.init_epoch(epoch)); + + let mut deduplicater = DistinctDeduplicater::new(&agg_calls); + + // --- chunk 1 --- + + let chunk = StreamChunk::from_pretty( + " I I I + + 1 10 100 + + 1 11 101", + ); + let (ops, columns, visibility) = chunk.into_inner(); + + let visibilities = std::iter::repeat(visibility) + .take(agg_calls.len()) + .collect_vec(); + let visibilities = deduplicater + .dedup_chunk(&ops, &columns, visibilities, &mut dedup_tables, None) + .await + .unwrap(); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[0], ops.len()), + vec![true, true] // same as original chunk + ); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[1], ops.len()), + vec![true, false] // distinct on a + ); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[2], ops.len()), + vec![true, false] // distinct on a, same as above + ); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[3], ops.len()), + vec![true, true] // distinct on b + ); + + deduplicater.flush(&mut dedup_tables).unwrap(); + + epoch.inc(); + for table in dedup_tables.values_mut() { + table.commit(epoch).await.unwrap(); + } + + // --- chunk 2 --- + + let chunk = StreamChunk::from_pretty( + " I I I + + 1 11 -102 + + 2 12 103 D + + 2 12 -104", + ); + let (ops, columns, visibility) = chunk.into_inner(); + + let visibilities = std::iter::repeat(visibility) + .take(agg_calls.len()) + .collect_vec(); + let visibilities = deduplicater + .dedup_chunk(&ops, &columns, visibilities, &mut dedup_tables, None) + .await + .unwrap(); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[0], ops.len()), + vec![true, false, true] // same as original chunk + ); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[1], ops.len()), + vec![false, false, true] // distinct on a + ); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[2], ops.len()), + vec![false, false, true] // distinct on a, same as above + ); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[3], ops.len()), + vec![false, false, true] // distinct on b + ); + + deduplicater.flush(&mut dedup_tables).unwrap(); + + epoch.inc(); + for table in dedup_tables.values_mut() { + table.commit(epoch).await.unwrap(); + } + + // test recovery + let mut deduplicater = DistinctDeduplicater::new(&agg_calls); + + // --- chunk 3 --- + + let chunk = StreamChunk::from_pretty( + " I I I + - 1 10 100 D + - 1 11 101 + - 1 11 -102", + ); + let (ops, columns, visibility) = chunk.into_inner(); + + let visibilities = std::iter::repeat(visibility) + .take(agg_calls.len()) + .collect_vec(); + let visibilities = deduplicater + .dedup_chunk(&ops, &columns, visibilities, &mut dedup_tables, None) + .await + .unwrap(); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[0], ops.len()), + vec![false, true, true] // same as original chunk + ); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[1], ops.len()), + // distinct on a + vec![ + false, // hidden in original chunk + false, // not the last one + false, // not the last one + ] + ); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[2], ops.len()), + // distinct on a, same as above + vec![ + false, // hidden in original chunk + false, // not the last one + false, // not the last one + ] + ); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[3], ops.len()), + // distinct on b + vec![ + false, // hidden in original chunk + false, // not the last one + true, // is the last one + ] + ); + + deduplicater.flush(&mut dedup_tables).unwrap(); + + epoch.inc(); + for table in dedup_tables.values_mut() { + table.commit(epoch).await.unwrap(); + } + } + + #[tokio::test] + async fn test_distinct_deduplicater_with_group() { + // Schema: + // a: int, b int, c int + // Agg calls: + // count(a), count(distinct a), count(distinct b) + // Group keys: + // c + + let agg_calls = [ + // count(a) + count_agg_call(AggKind::Count, 0, false), + // count(distinct a) + count_agg_call(AggKind::Count, 0, true), + // count(distinct b) + count_agg_call(AggKind::Count, 1, true), + ]; + + let group_key_types = [DataType::Int64]; + let group_key = OwnedRow::new(vec![Some(100.into())]); + + let store = MemoryStateStore::new(); + let mut epoch = EpochPair::new_test_epoch(1); + let mut dedup_tables = infer_dedup_tables(&agg_calls, &group_key_types, store).await; + dedup_tables + .values_mut() + .for_each(|table| table.init_epoch(epoch)); + + let mut deduplicater = DistinctDeduplicater::new(&agg_calls); + + let chunk = StreamChunk::from_pretty( + " I I I + + 1 10 100 + + 1 11 100 + + 1 11 100 + + 2 12 200 D + + 2 12 100", + ); + let (ops, columns, visibility) = chunk.into_inner(); + + let visibilities = std::iter::repeat(visibility) + .take(agg_calls.len()) + .collect_vec(); + let visibilities = deduplicater + .dedup_chunk( + &ops, + &columns, + visibilities, + &mut dedup_tables, + Some(&group_key), + ) + .await + .unwrap(); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[0], ops.len()), + vec![true, true, true, false, true] // same as original chunk + ); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[1], ops.len()), + vec![true, false, false, false, true] // distinct on a + ); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[2], ops.len()), + vec![true, true, false, false, true] // distinct on b + ); + + deduplicater.flush(&mut dedup_tables).unwrap(); + + epoch.inc(); + for table in dedup_tables.values_mut() { + table.commit(epoch).await.unwrap(); + } + + let chunk = StreamChunk::from_pretty( + " I I I + - 1 10 100 D + - 1 11 100 + - 1 11 100", + ); + let (ops, columns, visibility) = chunk.into_inner(); + + let visibilities = std::iter::repeat(visibility) + .take(agg_calls.len()) + .collect_vec(); + let visibilities = deduplicater + .dedup_chunk( + &ops, + &columns, + visibilities, + &mut dedup_tables, + Some(&group_key), + ) + .await + .unwrap(); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[0], ops.len()), + vec![false, true, true] // same as original chunk + ); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[1], ops.len()), + // distinct on a + vec![ + false, // hidden in original chunk + false, // not the last one + false, // not the last one + ] + ); + assert_eq!( + option_bitmap_to_vec_bool(&visibilities[2], ops.len()), + // distinct on b + vec![ + false, // hidden in original chunk + false, // not the last one + true, // is the last one + ] + ); + + deduplicater.flush(&mut dedup_tables).unwrap(); + + epoch.inc(); + for table in dedup_tables.values_mut() { + table.commit(epoch).await.unwrap(); + } + } +} diff --git a/src/stream/src/executor/aggregation/minput.rs b/src/stream/src/executor/aggregation/minput.rs index 918e36c663703..6350e4e0ae413 100644 --- a/src/stream/src/executor/aggregation/minput.rs +++ b/src/stream/src/executor/aggregation/minput.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -280,6 +280,7 @@ mod tests { use risingwave_common::test_prelude::StreamChunkTestExt; use risingwave_common::types::{DataType, ScalarImpl}; use risingwave_common::util::epoch::EpochPair; + use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::sort_util::{OrderPair, OrderType}; use risingwave_expr::expr::AggKind; use risingwave_storage::memory::MemoryStateStore; @@ -343,6 +344,7 @@ mod tests { order_pairs: vec![], append_only: false, filter: None, + distinct: false, } } @@ -646,7 +648,7 @@ mod tests { [chunk_1, chunk_2] .into_iter() - .zip_eq([&mut state_1, &mut state_2]) + .zip_eq_fast([&mut state_1, &mut state_2]) .try_for_each(|(chunk, state)| { let (ops, columns, visibility) = chunk.into_inner(); let columns: Vec<_> = columns.iter().map(|col| col.array_ref()).collect(); @@ -1039,6 +1041,7 @@ mod tests { ], append_only: false, filter: None, + distinct: false, }; let group_key = None; @@ -1140,6 +1143,7 @@ mod tests { ], append_only: false, filter: None, + distinct: false, }; let group_key = None; diff --git a/src/stream/src/executor/aggregation/mod.rs b/src/stream/src/executor/aggregation/mod.rs index 74fde5ebcf63c..5958ccdb9b685 100644 --- a/src/stream/src/executor/aggregation/mod.rs +++ b/src/stream/src/executor/aggregation/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ pub use agg_call::*; pub use agg_group::*; pub use agg_state::*; +pub use distinct::*; use risingwave_common::array::column::Column; use risingwave_common::array::ArrayImpl::Bool; use risingwave_common::array::DataChunk; @@ -34,6 +35,7 @@ mod agg_call; mod agg_group; pub mod agg_impl; mod agg_state; +mod distinct; mod minput; mod state_cache; mod table; diff --git a/src/stream/src/executor/aggregation/state_cache/array_agg.rs b/src/stream/src/executor/aggregation/state_cache/array_agg.rs index 1a207137b90a9..74565ea42e693 100644 --- a/src/stream/src/executor/aggregation/state_cache/array_agg.rs +++ b/src/stream/src/executor/aggregation/state_cache/array_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/aggregation/state_cache/cache.rs b/src/stream/src/executor/aggregation/state_cache/cache.rs index 8f97767398922..d402de62bfa26 100644 --- a/src/stream/src/executor/aggregation/state_cache/cache.rs +++ b/src/stream/src/executor/aggregation/state_cache/cache.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/aggregation/state_cache/extreme.rs b/src/stream/src/executor/aggregation/state_cache/extreme.rs index e4d5d2d731fd0..7c96b888a3312 100644 --- a/src/stream/src/executor/aggregation/state_cache/extreme.rs +++ b/src/stream/src/executor/aggregation/state_cache/extreme.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/aggregation/state_cache/mod.rs b/src/stream/src/executor/aggregation/state_cache/mod.rs index bb4e971515bc0..98c74c87fac3b 100644 --- a/src/stream/src/executor/aggregation/state_cache/mod.rs +++ b/src/stream/src/executor/aggregation/state_cache/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/aggregation/state_cache/string_agg.rs b/src/stream/src/executor/aggregation/state_cache/string_agg.rs index ad80f376386e3..44f6d057cd999 100644 --- a/src/stream/src/executor/aggregation/state_cache/string_agg.rs +++ b/src/stream/src/executor/aggregation/state_cache/string_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/aggregation/table.rs b/src/stream/src/executor/aggregation/table.rs index a33b8a78a7723..f4e7fa5ecd3d8 100644 --- a/src/stream/src/executor/aggregation/table.rs +++ b/src/stream/src/executor/aggregation/table.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/aggregation/value.rs b/src/stream/src/executor/aggregation/value.rs index 900ba2246ba8e..b0b64a1a5339c 100644 --- a/src/stream/src/executor/aggregation/value.rs +++ b/src/stream/src/executor/aggregation/value.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -93,6 +93,7 @@ mod tests { order_pairs: vec![], append_only: false, filter: None, + distinct: false, } } @@ -135,6 +136,7 @@ mod tests { order_pairs: vec![], append_only: true, filter: None, + distinct: false, } } diff --git a/src/stream/src/executor/backfill.rs b/src/stream/src/executor/backfill.rs index a7ea84ccd681c..13c970654dc7c 100644 --- a/src/stream/src/executor/backfill.rs +++ b/src/stream/src/executor/backfill.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,11 +20,11 @@ use either::Either; use futures::stream::select_with_strategy; use futures::{pin_mut, stream, StreamExt, TryStreamExt}; use futures_async_stream::try_stream; -use itertools::Itertools; use risingwave_common::array::{Op, StreamChunk}; use risingwave_common::buffer::BitmapBuilder; use risingwave_common::catalog::Schema; use risingwave_common::row::{self, OwnedRow, Row, RowExt}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::sort_util::OrderType; use risingwave_hummock_sdk::HummockReadEpoch; use risingwave_storage::table::batch_table::storage_table::StorageTable; @@ -33,7 +33,7 @@ use risingwave_storage::StateStore; use super::error::StreamExecutorError; use super::{expect_first_barrier, BoxedExecutor, Executor, ExecutorInfo, Message, PkIndicesRef}; -use crate::executor::PkIndices; +use crate::executor::{PkIndices, Watermark}; use crate::task::{ActorId, CreateMviewProgress}; /// An implementation of the RFC: Use Backfill To Let Mv On Mv Stream Again.(https://github.com/risingwavelabs/rfcs/pull/13) @@ -136,11 +136,11 @@ where if !to_backfill { // Forward messages directly to the downstream. - let upstream = upstream - .map(move |result| result.map(|msg| Self::mapping_message(msg, &upstream_indices))); #[for_await] for message in upstream { - yield message?; + if let Some(message) = Self::mapping_message(message?, &upstream_indices) { + yield message; + } } return Ok(()); @@ -153,6 +153,9 @@ where // `None` means it starts from the beginning. let mut current_pos: Option = None; + // Keep track of rows from the upstream and snapshot. + let mut processed_rows: u64 = 0; + // Backfill Algorithm: // // backfill_stream @@ -205,6 +208,7 @@ where // Consume upstream buffer chunk for chunk in upstream_chunk_buffer.drain(..) { if let Some(current_pos) = ¤t_pos { + processed_rows += chunk.cardinality() as u64; yield Message::Chunk(Self::mapping_chunk( Self::mark_chunk( chunk, @@ -222,8 +226,11 @@ where yield Message::Barrier(barrier); - self.progress - .update(snapshot_read_epoch, snapshot_read_epoch); + self.progress.update( + snapshot_read_epoch, + snapshot_read_epoch, + processed_rows, + ); // Break the for loop and start a new snapshot read stream. break; } @@ -232,7 +239,7 @@ where upstream_chunk_buffer.push(chunk.compact()); } Message::Watermark(_) => { - todo!("https://github.com/risingwavelabs/risingwave/issues/6042") + // Ignore watermark during backfill. } } } @@ -245,6 +252,7 @@ where // in the buffer. Here we choose to never mark the chunk. // Consume with the renaming stream buffer chunk without mark. for chunk in upstream_chunk_buffer.drain(..) { + processed_rows += chunk.cardinality() as u64; yield Message::Chunk(Self::mapping_chunk( chunk, &upstream_indices, @@ -267,7 +275,7 @@ where .project(table_pk_indices) .into_owned_row(), ); - + processed_rows += chunk.cardinality() as u64; yield Message::Chunk(Self::mapping_chunk(chunk, &upstream_indices)); } } @@ -283,15 +291,14 @@ where // Backfill has already finished. // Forward messages directly to the downstream. - let upstream = upstream - .map(move |result| result.map(|msg| Self::mapping_message(msg, &upstream_indices))); #[for_await] for msg in upstream { - let msg: Message = msg?; - if let Some(barrier) = msg.as_barrier() { - self.progress.finish(barrier.epoch.curr); + if let Some(msg) = Self::mapping_message(msg?, &upstream_indices) { + if let Some(barrier) = msg.as_barrier() { + self.progress.finish(barrier.epoch.curr); + } + yield msg; } - yield msg; } } @@ -306,6 +313,15 @@ where // `current_pos` is None means it needs to scan from the beginning, so we use Unbounded to // scan. Otherwise, use Excluded. let range_bounds = if let Some(current_pos) = current_pos { + // If `current_pos` is an empty row which means upstream mv contains only one row and it + // has been consumed. The iter interface doesn't support + // `Excluded(empty_row)` range bound, so we can simply return `None`. + if current_pos.is_empty() { + assert!(table.pk_indices().is_empty()); + yield None; + return Ok(()); + } + (Bound::Excluded(current_pos), Bound::Unbounded) } else { (Bound::Unbounded, Bound::Unbounded) @@ -355,7 +371,7 @@ where match row .project(table_pk_indices) .iter() - .zip_eq(pk_order.iter()) + .zip_eq_fast(pk_order.iter()) .cmp_by(current_pos.iter(), |(x, order), y| match order { OrderType::Ascending => x.cmp(&y), OrderType::Descending => y.cmp(&x), @@ -379,10 +395,22 @@ where StreamChunk::new(ops, mapped_columns, visibility) } - fn mapping_message(msg: Message, upstream_indices: &[usize]) -> Message { + fn mapping_watermark(watermark: Watermark, upstream_indices: &[usize]) -> Option { + upstream_indices + .iter() + .position(|&idx| idx == watermark.col_idx) + .map(|idx| watermark.with_idx(idx)) + } + + fn mapping_message(msg: Message, upstream_indices: &[usize]) -> Option { match msg { - Message::Barrier(_) | Message::Watermark(_) => msg, - Message::Chunk(chunk) => Message::Chunk(Self::mapping_chunk(chunk, upstream_indices)), + Message::Barrier(_) => Some(msg), + Message::Watermark(watermark) => { + Self::mapping_watermark(watermark, upstream_indices).map(Message::Watermark) + } + Message::Chunk(chunk) => { + Some(Message::Chunk(Self::mapping_chunk(chunk, upstream_indices))) + } } } } diff --git a/src/stream/src/executor/barrier_align.rs b/src/stream/src/executor/barrier_align.rs index ef4a57f7982cf..423f0927e1e56 100644 --- a/src/stream/src/executor/barrier_align.rs +++ b/src/stream/src/executor/barrier_align.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/batch_query.rs b/src/stream/src/executor/batch_query.rs index c23cb25eabe9d..f7dae9d0c6a5d 100644 --- a/src/stream/src/executor/batch_query.rs +++ b/src/stream/src/executor/batch_query.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/chain.rs b/src/stream/src/executor/chain.rs index d2854a590a6ef..b899d45f80eab 100644 --- a/src/stream/src/executor/chain.rs +++ b/src/stream/src/executor/chain.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ use risingwave_common::catalog::Schema; use super::error::StreamExecutorError; use super::{expect_first_barrier, BoxedExecutor, Executor, ExecutorInfo, Message}; -use crate::executor::PkIndices; +use crate::executor::{PkIndices, Watermark}; use crate::task::{ActorId, CreateMviewProgress}; /// [`ChainExecutor`] is an executor that enables synchronization between the existing stream and @@ -38,9 +38,12 @@ pub struct ChainExecutor { actor_id: ActorId, info: ExecutorInfo, + + /// Only consume upstream messages. + upstream_only: bool, } -fn mapping(upstream_indices: &[usize], chunk: StreamChunk) -> StreamChunk { +fn mapping_chunk(chunk: StreamChunk, upstream_indices: &[usize]) -> StreamChunk { let (ops, columns, visibility) = chunk.into_inner(); let mapped_columns = upstream_indices .iter() @@ -49,6 +52,13 @@ fn mapping(upstream_indices: &[usize], chunk: StreamChunk) -> StreamChunk { StreamChunk::new(ops, mapped_columns, visibility) } +fn mapping_watermark(watermark: Watermark, upstream_indices: &[usize]) -> Option { + upstream_indices + .iter() + .position(|&idx| idx == watermark.col_idx) + .map(|idx| watermark.with_idx(idx)) +} + impl ChainExecutor { pub fn new( snapshot: BoxedExecutor, @@ -57,6 +67,7 @@ impl ChainExecutor { progress: CreateMviewProgress, schema: Schema, pk_indices: PkIndices, + upstream_only: bool, ) -> Self { Self { info: ExecutorInfo { @@ -69,6 +80,7 @@ impl ChainExecutor { upstream_indices, actor_id: progress.actor_id(), progress, + upstream_only, } } @@ -83,7 +95,11 @@ impl ChainExecutor { // If the barrier is a conf change of creating this mview, init snapshot from its epoch // and begin to consume the snapshot. // Otherwise, it means we've recovered and the snapshot is already consumed. - let to_consume_snapshot = barrier.is_add_dispatcher(self.actor_id); + let to_consume_snapshot = barrier.is_add_dispatcher(self.actor_id) && !self.upstream_only; + + if self.upstream_only { + self.progress.finish(barrier.epoch.curr); + } // The first barrier message should be propagated. yield Message::Barrier(barrier); @@ -105,11 +121,14 @@ impl ChainExecutor { #[for_await] for msg in upstream { match msg? { - Message::Watermark(_) => { - todo!("https://github.com/risingwavelabs/risingwave/issues/6042") + Message::Watermark(watermark) => { + match mapping_watermark(watermark, &self.upstream_indices) { + Some(mapped_watermark) => yield Message::Watermark(mapped_watermark), + None => continue, + } } Message::Chunk(chunk) => { - yield Message::Chunk(mapping(&self.upstream_indices, chunk)); + yield Message::Chunk(mapping_chunk(chunk, &self.upstream_indices)); } Message::Barrier(barrier) => { self.progress.finish(barrier.epoch.curr); @@ -193,7 +212,15 @@ mod test { ], )); - let chain = ChainExecutor::new(first, second, vec![0], progress, schema, PkIndices::new()); + let chain = ChainExecutor::new( + first, + second, + vec![0], + progress, + schema, + PkIndices::new(), + false, + ); let mut chain = Box::new(chain).execute(); chain.next().await; diff --git a/src/stream/src/executor/dispatch.rs b/src/stream/src/executor/dispatch.rs index ddf66217e2678..058400a3dbd18 100644 --- a/src/stream/src/executor/dispatch.rs +++ b/src/stream/src/executor/dispatch.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,9 +24,9 @@ use futures_async_stream::try_stream; use itertools::Itertools; use risingwave_common::array::{Op, StreamChunk}; use risingwave_common::buffer::BitmapBuilder; -use risingwave_common::hash::VirtualNode; -use risingwave_common::util::compress::decompress_data; +use risingwave_common::hash::{ActorMapping, ExpandedActorMapping, VirtualNode}; use risingwave_common::util::hash_util::Crc32FastBuilder; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_pb::stream_plan::update_mutation::DispatcherUpdate as ProstDispatcherUpdate; use risingwave_pb::stream_plan::Dispatcher as ProstDispatcher; use smallvec::{smallvec, SmallVec}; @@ -164,13 +164,8 @@ impl DispatchExecutorInner { // example, the `Broadcast` inner side of the dynamic filter. There're too many combinations // to handle here, so we just ignore the `hash_mapping` field for any other exchange types. if let DispatcherImpl::Hash(dispatcher) = dispatcher { - dispatcher.hash_mapping = { - let compressed_mapping = update.get_hash_mapping()?; - decompress_data( - &compressed_mapping.original_indices, - &compressed_mapping.data, - ) - } + dispatcher.hash_mapping = + ActorMapping::from_protobuf(update.get_hash_mapping()?).to_expanded(); } Ok(()) @@ -320,13 +315,8 @@ impl DispatcherImpl { .map(|i| *i as usize) .collect(); - let hash_mapping = { - let compressed_mapping = dispatcher.get_hash_mapping()?; - decompress_data( - &compressed_mapping.original_indices, - &compressed_mapping.data, - ) - }; + let hash_mapping = + ActorMapping::from_protobuf(dispatcher.get_hash_mapping()?).to_expanded(); DispatcherImpl::Hash(HashDataDispatcher::new( outputs, @@ -521,7 +511,7 @@ pub struct HashDataDispatcher { keys: Vec, /// Mapping from virtual node to actor id, used for hash data dispatcher to dispatch tasks to /// different downstream actors. - hash_mapping: Vec, + hash_mapping: ExpandedActorMapping, dispatcher_id: DispatcherId, } @@ -539,7 +529,7 @@ impl HashDataDispatcher { pub fn new( outputs: Vec, keys: Vec, - hash_mapping: Vec, + hash_mapping: ExpandedActorMapping, dispatcher_id: DispatcherId, ) -> Self { Self { @@ -607,7 +597,7 @@ impl Dispatcher for HashDataDispatcher { let mut build_op_vis = |vnode: VirtualNode, op: Op, visible: bool| { // Build visibility map for every output chunk. - for (output, vis_map) in self.outputs.iter().zip_eq(vis_maps.iter_mut()) { + for (output, vis_map) in self.outputs.iter().zip_eq_fast(vis_maps.iter_mut()) { vis_map.append( visible && self.hash_mapping[vnode.to_index()] == output.actor_id(), ); @@ -638,16 +628,20 @@ impl Dispatcher for HashDataDispatcher { match visibility { None => { - vnodes.iter().copied().zip_eq(ops).for_each(|(vnode, op)| { - build_op_vis(vnode, op, true); - }); + vnodes + .iter() + .copied() + .zip_eq_fast(ops) + .for_each(|(vnode, op)| { + build_op_vis(vnode, op, true); + }); } Some(visibility) => { vnodes .iter() .copied() - .zip_eq(ops) - .zip_eq(visibility.iter()) + .zip_eq_fast(ops) + .zip_eq_fast(visibility.iter()) .for_each(|((vnode, op), visible)| { build_op_vis(vnode, op, visible); }); @@ -657,7 +651,7 @@ impl Dispatcher for HashDataDispatcher { let ops = new_ops; // individually output StreamChunk integrated with vis_map - for (vis_map, output) in vis_maps.into_iter().zip_eq(self.outputs.iter_mut()) { + for (vis_map, output) in vis_maps.into_iter().zip_eq_fast(self.outputs.iter_mut()) { let vis_map = vis_map.finish(); // columns is not changed in this function let new_stream_chunk = @@ -865,6 +859,7 @@ mod tests { use risingwave_common::array::{Array, ArrayBuilder, I32ArrayBuilder, Op}; use risingwave_common::catalog::Schema; use risingwave_common::hash::VirtualNode; + use risingwave_common::util::iter_util::ZipEqFast; use risingwave_pb::stream_plan::DispatcherType; use super::*; @@ -1190,12 +1185,12 @@ mod tests { } let output_idx = hash_mapping[hasher.finish() as usize % VirtualNode::COUNT] as usize - 1; - for (builder, val) in builders.iter_mut().zip_eq(one_row.iter()) { + for (builder, val) in builders.iter_mut().zip_eq_fast(one_row.iter()) { builder.append(Some(*val)); } output_cols[output_idx] .iter_mut() - .zip_eq(one_row.iter()) + .zip_eq_fast(one_row.iter()) .for_each(|(each_column, val)| each_column.push(*val)); output_ops[output_idx].push(op); } @@ -1226,7 +1221,7 @@ mod tests { real_chunk .columns() .iter() - .zip_eq(output_cols[output_idx].iter()) + .zip_eq_fast(output_cols[output_idx].iter()) .for_each(|(real_col, expect_col)| { let real_vals = real_chunk .visibility() diff --git a/src/stream/src/executor/dml.rs b/src/stream/src/executor/dml.rs index a69bcb4ef594c..9a16b75fbc1d3 100644 --- a/src/stream/src/executor/dml.rs +++ b/src/stream/src/executor/dml.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,8 +16,8 @@ use futures::future::Either; use futures::stream::select; use futures::StreamExt; use futures_async_stream::try_stream; -use risingwave_common::array::StreamChunk; -use risingwave_common::catalog::{ColumnDesc, Schema, TableId}; +use risingwave_common::catalog::{ColumnDesc, Schema, TableId, TableVersionId}; +use risingwave_connector::source::StreamChunkWithState; use risingwave_source::dml_manager::DmlManagerRef; use super::error::StreamExecutorError; @@ -43,11 +43,15 @@ pub struct DmlExecutor { // Id of the table on which DML performs. table_id: TableId, + // Version of the table on which DML performs. + table_version_id: TableVersionId, + // Column descriptions of the table. column_descs: Vec, } impl DmlExecutor { + #[allow(clippy::too_many_arguments)] pub fn new( upstream: BoxedExecutor, schema: Schema, @@ -55,6 +59,7 @@ impl DmlExecutor { executor_id: u64, dml_manager: DmlManagerRef, table_id: TableId, + table_version_id: TableVersionId, column_descs: Vec, ) -> Self { Self { @@ -64,6 +69,7 @@ impl DmlExecutor { identity: format!("DmlExecutor {:X}", executor_id), dml_manager, table_id, + table_version_id, column_descs, } } @@ -72,20 +78,25 @@ impl DmlExecutor { async fn execute_inner(self: Box) { let mut upstream = self.upstream.execute(); + // The first barrier message should be propagated. + let barrier = expect_first_barrier(&mut upstream).await?; + // Construct the reader of batch data (DML from users). We must create a variable to hold - // this `Arc` here, or it will be dropped due to the `Weak` reference in + // this `Arc` here, or it will be dropped due to the `Weak` reference in // `DmlManager`. + // + // Note(bugen): Only register after the first barrier message is received, which means the + // current executor is activated. This avoids the new reader overwriting the old one during + // the preparation of schema change. let batch_reader = self .dml_manager - .register_reader(self.table_id, &self.column_descs) + .register_reader(self.table_id, self.table_version_id, &self.column_descs) .map_err(StreamExecutorError::connector_error)?; let batch_reader = batch_reader - .stream_reader_v2() - .into_stream_v2() + .stream_reader() + .into_stream() .map(Either::Right); - // The first barrier message should be propagated. - let barrier = expect_first_barrier(&mut upstream).await?; yield Message::Barrier(barrier); // Stream data from the upstream executor. @@ -104,8 +115,9 @@ impl DmlExecutor { } Either::Right(chunk) => { // Batch data. - let chunk: StreamChunk = chunk.map_err(StreamExecutorError::connector_error)?; - yield Message::Chunk(chunk); + let chunk: StreamChunkWithState = + chunk.map_err(StreamExecutorError::connector_error)?; + yield Message::Chunk(chunk.chunk); } } } @@ -134,7 +146,8 @@ impl Executor for DmlExecutor { mod tests { use std::sync::Arc; - use risingwave_common::catalog::{ColumnId, Field}; + use risingwave_common::array::StreamChunk; + use risingwave_common::catalog::{ColumnId, Field, INITIAL_TABLE_VERSION_ID}; use risingwave_common::test_prelude::StreamChunkTestExt; use risingwave_common::types::DataType; use risingwave_source::dml_manager::DmlManager; @@ -164,6 +177,7 @@ mod tests { 1, dml_manager.clone(), table_id, + INITIAL_TABLE_VERSION_ID, column_descs, )); let mut dml_executor = dml_executor.execute(); @@ -201,7 +215,10 @@ mod tests { tx.push_chunk(stream_chunk3); // Message from batch - dml_manager.write_chunk(&table_id, batch_chunk).unwrap(); + dml_manager + .write_chunk(table_id, INITIAL_TABLE_VERSION_ID, batch_chunk) + .await + .unwrap(); // Consume the 1st message from upstream executor let msg = dml_executor.next().await.unwrap().unwrap(); diff --git a/src/stream/src/executor/dynamic_filter.rs b/src/stream/src/executor/dynamic_filter.rs index 96f29d257fde9..1f83825522b2f 100644 --- a/src/stream/src/executor/dynamic_filter.rs +++ b/src/stream/src/executor/dynamic_filter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ use std::sync::Arc; use futures::{pin_mut, StreamExt}; use futures_async_stream::try_stream; -use itertools::Itertools; use risingwave_common::array::{Array, ArrayImpl, DataChunk, Op, StreamChunk}; use risingwave_common::bail; use risingwave_common::buffer::{Bitmap, BitmapBuilder}; @@ -25,8 +24,10 @@ use risingwave_common::catalog::Schema; use risingwave_common::hash::VirtualNode; use risingwave_common::row::{once, OwnedRow as RowData, Row}; use risingwave_common::types::{DataType, Datum, ScalarImpl, ToDatumRef, ToOwnedDatum}; -use risingwave_expr::expr::expr_binary_nonnull::new_binary_expr; -use risingwave_expr::expr::{BoxedExpression, InputRefExpression, LiteralExpression}; +use risingwave_common::util::iter_util::ZipEqDebug; +use risingwave_expr::expr::{ + new_binary_expr, BoxedExpression, InputRefExpression, LiteralExpression, +}; use risingwave_pb::expr::expr_node::Type as ExprNodeType; use risingwave_pb::expr::expr_node::Type::{ GreaterThan, GreaterThanOrEqual, LessThan, LessThanOrEqual, @@ -108,7 +109,7 @@ impl DynamicFilterExecutor { }) }); - for (idx, (row, op)) in data_chunk.rows().zip_eq(ops.iter()).enumerate() { + for (idx, (row, op)) in data_chunk.rows().zip_eq_debug(ops.iter()).enumerate() { let left_val = row.datum_at(self.key_l).to_owned_datum(); let res = if let Some(array) = &eval_results { @@ -335,7 +336,7 @@ impl DynamicFilterExecutor { let chunk = chunk.compact(); // Is this unnecessary work? let (data_chunk, ops) = chunk.into_parts(); - for (row, op) in data_chunk.rows().zip_eq(ops.iter()) { + for (row, op) in data_chunk.rows().zip_eq_debug(ops.iter()) { match *op { Op::UpdateInsert | Op::Insert => { current_epoch_value = Some(row.datum_at(0).to_owned_datum()); @@ -490,8 +491,8 @@ mod tests { mem_state: MemoryStateStore, ) -> (StateTable, StateTable) { let column_descs = ColumnDesc::unnamed(ColumnId::new(0), DataType::Int64); - // TODO: enable sanity check for dynamic filter - let state_table_l = StateTable::new_without_distribution_no_sanity_check( + // TODO: use consistent operations for dynamic filter + let state_table_l = StateTable::new_without_distribution_inconsistent_op( mem_state.clone(), TableId::new(0), vec![column_descs.clone()], @@ -499,7 +500,7 @@ mod tests { vec![0], ) .await; - let state_table_r = StateTable::new_without_distribution_no_sanity_check( + let state_table_r = StateTable::new_without_distribution_inconsistent_op( mem_state, TableId::new(1), vec![column_descs], diff --git a/src/stream/src/executor/error.rs b/src/stream/src/executor/error.rs index cf8f74ffb6ddc..eafb17e977ae4 100644 --- a/src/stream/src/executor/error.rs +++ b/src/stream/src/executor/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/exchange/input.rs b/src/stream/src/executor/exchange/input.rs index dab7ae524b82d..4827361f84580 100644 --- a/src/stream/src/executor/exchange/input.rs +++ b/src/stream/src/executor/exchange/input.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -168,11 +168,6 @@ impl RemoteInput { let msg = message.unwrap(); let bytes = Message::get_encoded_len(&msg); - metrics - .exchange_recv_size - .with_label_values(&[&up_actor_id, &down_actor_id]) - .inc_by(bytes as u64); - metrics .exchange_frag_recv_size .with_label_values(&[&up_fragment_id, &down_fragment_id]) diff --git a/src/stream/src/executor/exchange/mod.rs b/src/stream/src/executor/exchange/mod.rs index 11cc8329c55d6..f13530cf01bcc 100644 --- a/src/stream/src/executor/exchange/mod.rs +++ b/src/stream/src/executor/exchange/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/exchange/output.rs b/src/stream/src/executor/exchange/output.rs index 89ea48ae816cd..c2954d6cd2dd1 100644 --- a/src/stream/src/executor/exchange/output.rs +++ b/src/stream/src/executor/exchange/output.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/exchange/permit.rs b/src/stream/src/executor/exchange/permit.rs index c8757fc1019e6..eea8bfe07b519 100644 --- a/src/stream/src/executor/exchange/permit.rs +++ b/src/stream/src/executor/exchange/permit.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/expand.rs b/src/stream/src/executor/expand.rs index 6cc737c593174..37b8b967181a3 100644 --- a/src/stream/src/executor/expand.rs +++ b/src/stream/src/executor/expand.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/filter.rs b/src/stream/src/executor/filter.rs index f1e919094200a..80354bd48f691 100644 --- a/src/stream/src/executor/filter.rs +++ b/src/stream/src/executor/filter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,10 +15,10 @@ use std::fmt::{Debug, Formatter}; use std::sync::Arc; -use itertools::Itertools; use risingwave_common::array::{Array, ArrayImpl, Op, StreamChunk, Vis}; use risingwave_common::buffer::BitmapBuilder; use risingwave_common::catalog::Schema; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_expr::expr::BoxedExpression; use super::{ @@ -97,7 +97,7 @@ impl SimpleFilterExecutor { }); if let ArrayImpl::Bool(bool_array) = &*filter { - for (op, res) in ops.into_iter().zip_eq(bool_array.iter()) { + for (op, res) in ops.into_iter().zip_eq_fast(bool_array.iter()) { // SAFETY: ops.len() == pred_output.len() == visibility.len() let res = res.unwrap_or(false); match op { @@ -198,8 +198,7 @@ mod tests { use risingwave_common::array::StreamChunk; use risingwave_common::catalog::{Field, Schema}; use risingwave_common::types::DataType; - use risingwave_expr::expr::expr_binary_nonnull::new_binary_expr; - use risingwave_expr::expr::InputRefExpression; + use risingwave_expr::expr::{new_binary_expr, InputRefExpression}; use risingwave_pb::expr::expr_node::Type; use super::super::test_utils::MockSource; diff --git a/src/stream/src/executor/global_simple_agg.rs b/src/stream/src/executor/global_simple_agg.rs index 21d78c43b6e44..1bcea746593f2 100644 --- a/src/stream/src/executor/global_simple_agg.rs +++ b/src/stream/src/executor/global_simple_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,8 +17,10 @@ use futures_async_stream::try_stream; use risingwave_common::array::StreamChunk; use risingwave_common::catalog::Schema; use risingwave_common::row::RowExt; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_storage::StateStore; +use super::agg_common::AggExecutorArgs; use super::aggregation::{ agg_call_filter_res, iter_table_storage, AggChangesInfo, AggStateStorage, }; @@ -27,7 +29,7 @@ use crate::common::table::state_table::StateTable; use crate::error::StreamResult; use crate::executor::aggregation::{generate_agg_schema, AggCall, AggGroup}; use crate::executor::error::StreamExecutorError; -use crate::executor::{BoxedMessageStream, Message, PkIndices}; +use crate::executor::{BoxedMessageStream, Message}; /// `GlobalSimpleAggExecutor` is the aggregation operator for streaming system. /// To create an aggregation operator, states and expressions should be passed along the @@ -44,13 +46,17 @@ use crate::executor::{BoxedMessageStream, Message, PkIndices}; /// `GlobalSimpleAggExecutor`. pub struct GlobalSimpleAggExecutor { input: Box, + inner: ExecutorInner, +} + +struct ExecutorInner { + actor_ctx: ActorContextRef, info: ExecutorInfo, - ctx: ActorContextRef, - /// Pk indices from input + /// Pk indices from input. input_pk_indices: Vec, - /// Schema from input + /// Schema from input. input_schema: Schema, /// An operator will support multiple aggregation calls. @@ -64,8 +70,17 @@ pub struct GlobalSimpleAggExecutor { /// table when `flush_data` is called. result_table: StateTable, + /// State tables for deduplicating rows on distinct key for distinct agg calls. + /// One table per distinct column (may be shared by multiple agg calls). + distinct_dedup_tables: HashMap>, + /// Extreme state cache size extreme_cache_size: usize, +} + +struct ExecutionVars { + /// The single [`AggGroup`]. + agg_group: AggGroup, /// Mark the agg state is changed in the current epoch or not. state_changed: bool, @@ -77,97 +92,62 @@ impl Executor for GlobalSimpleAggExecutor { } fn schema(&self) -> &Schema { - &self.info.schema + &self.inner.info.schema } fn pk_indices(&self) -> PkIndicesRef<'_> { - &self.info.pk_indices + &self.inner.info.pk_indices } fn identity(&self) -> &str { - &self.info.identity + &self.inner.info.identity } } impl GlobalSimpleAggExecutor { - #[allow(clippy::too_many_arguments)] - pub fn new( - ctx: ActorContextRef, - input: Box, - agg_calls: Vec, - storages: Vec>, - result_table: StateTable, - pk_indices: PkIndices, - executor_id: u64, - extreme_cache_size: usize, - ) -> StreamResult { - let input_info = input.info(); - let schema = generate_agg_schema(input.as_ref(), &agg_calls, None); - + pub fn new(args: AggExecutorArgs) -> StreamResult { + let input_info = args.input.info(); + let schema = generate_agg_schema(args.input.as_ref(), &args.agg_calls, None); Ok(Self { - ctx, - input, - info: ExecutorInfo { - schema, - pk_indices, - identity: format!("GlobalSimpleAggExecutor-{:X}", executor_id), + input: args.input, + inner: ExecutorInner { + actor_ctx: args.actor_ctx, + info: ExecutorInfo { + schema, + pk_indices: args.pk_indices, + identity: format!("GlobalSimpleAggExecutor-{:X}", args.executor_id), + }, + input_pk_indices: input_info.pk_indices, + input_schema: input_info.schema, + agg_calls: args.agg_calls, + storages: args.storages, + result_table: args.result_table, + distinct_dedup_tables: args.distinct_dedup_tables, + extreme_cache_size: args.extreme_cache_size, }, - input_pk_indices: input_info.pk_indices, - input_schema: input_info.schema, - agg_calls, - storages, - result_table, - extreme_cache_size, - state_changed: false, }) } - #[allow(clippy::too_many_arguments)] async fn apply_chunk( - ctx: &ActorContextRef, - identity: &str, - agg_calls: &[AggCall], - storages: &mut [AggStateStorage], - result_table: &mut StateTable, - input_pk_indices: &PkIndices, - input_schema: &Schema, - agg_group: &mut Option>, + this: &mut ExecutorInner, + vars: &mut ExecutionVars, chunk: StreamChunk, - extreme_cache_size: usize, - state_changed: &mut bool, ) -> StreamExecutorResult<()> { - // Create `AggGroup` if not exists. This will fetch previous agg result - // from the result table. - if agg_group.is_none() { - *agg_group = Some( - AggGroup::create( - None, - agg_calls, - storages, - result_table, - input_pk_indices, - extreme_cache_size, - input_schema, - ) - .await?, - ); - } - let agg_group = agg_group.as_mut().unwrap(); - // Mark state as changed. - *state_changed = true; + vars.state_changed = true; // Decompose the input chunk. let capacity = chunk.capacity(); let (ops, columns, visibility) = chunk.into_inner(); // Calculate the row visibility for every agg call. - let visibilities: Vec<_> = agg_calls + let visibilities: Vec<_> = this + .agg_calls .iter() .map(|agg_call| { agg_call_filter_res( - ctx, - identity, + &this.actor_ctx, + &this.info.identity, agg_call, &columns, visibility.as_ref(), @@ -177,9 +157,9 @@ impl GlobalSimpleAggExecutor { .try_collect()?; // Materialize input chunk if needed. - storages + this.storages .iter_mut() - .zip_eq(visibilities.iter().map(Option::as_ref)) + .zip_eq_fast(visibilities.iter().map(Option::as_ref)) .for_each(|(storage, visibility)| { if let AggStateStorage::MaterializedInput { table, mapping } = storage { let needed_columns = mapping @@ -196,56 +176,66 @@ impl GlobalSimpleAggExecutor { }); // Apply chunk to each of the state (per agg_call) - agg_group.apply_chunk(storages, &ops, &columns, visibilities)?; + vars.agg_group + .apply_chunk( + &mut this.storages, + &ops, + &columns, + visibilities, + &mut this.distinct_dedup_tables, + ) + .await?; Ok(()) } async fn flush_data( - schema: &Schema, - agg_group: &mut Option>, + this: &mut ExecutorInner, + vars: &mut ExecutionVars, epoch: EpochPair, - storages: &mut [AggStateStorage], - result_table: &mut StateTable, - state_changed: &mut bool, ) -> StreamExecutorResult> { - if *state_changed { - let agg_group = agg_group.as_mut().unwrap(); - agg_group.flush_state_if_needed(storages).await?; + if vars.state_changed { + vars.agg_group + .flush_state_if_needed(&mut this.storages, &mut this.distinct_dedup_tables) + .await?; // Commit all state tables except for result table. futures::future::try_join_all( - iter_table_storage(storages).map(|state_table| state_table.commit(epoch)), + iter_table_storage(&mut this.storages) + .chain(this.distinct_dedup_tables.values_mut()) + .map(|state_table| state_table.commit(epoch)), ) .await?; // Create array builders. // As the datatype is retrieved from schema, it contains both group key and aggregation // state outputs. - let mut builders = schema.create_array_builders(2); + let mut builders = this.info.schema.create_array_builders(2); let mut new_ops = Vec::with_capacity(2); // Retrieve modified states and put the changes into the builders. - let curr_outputs = agg_group.get_outputs(storages).await?; + let curr_outputs = vars.agg_group.get_outputs(&this.storages).await?; let AggChangesInfo { result_row, prev_outputs, n_appended_ops, - } = agg_group.build_changes(curr_outputs, &mut builders, &mut new_ops); + } = vars + .agg_group + .build_changes(curr_outputs, &mut builders, &mut new_ops); if n_appended_ops == 0 { // Agg result is not changed. - result_table.commit_no_data_expected(epoch); + this.result_table.commit_no_data_expected(epoch); return Ok(None); } // Update the result table with latest agg outputs. if let Some(prev_outputs) = prev_outputs { - let old_row = agg_group.group_key().chain(prev_outputs); - result_table.update(old_row, result_row); + let old_row = vars.agg_group.group_key().chain(prev_outputs); + this.result_table.update(old_row, result_row); } else { - result_table.insert(result_row); + this.result_table.insert(result_row); } - result_table.commit(epoch).await?; + this.result_table.commit(epoch).await?; let columns = builders .into_iter() @@ -254,15 +244,17 @@ impl GlobalSimpleAggExecutor { let chunk = StreamChunk::new(new_ops, columns, None); - *state_changed = false; + vars.state_changed = false; Ok(Some(chunk)) } else { // No state is changed. // Call commit on state table to increment the epoch. - iter_table_storage(storages).for_each(|state_table| { - state_table.commit_no_data_expected(epoch); - }); - result_table.commit_no_data_expected(epoch); + iter_table_storage(&mut this.storages) + .chain(this.distinct_dedup_tables.values_mut()) + .for_each(|state_table| { + state_table.commit_no_data_expected(epoch); + }); + this.result_table.commit_no_data_expected(epoch); Ok(None) } } @@ -270,26 +262,45 @@ impl GlobalSimpleAggExecutor { #[try_stream(ok = Message, error = StreamExecutorError)] async fn execute_inner(self) { let GlobalSimpleAggExecutor { - ctx, input, - info, - input_pk_indices, - input_schema, - agg_calls, - extreme_cache_size, - mut storages, - mut result_table, - mut state_changed, + inner: mut this, } = self; - let mut agg_group = None; - let mut input = input.execute(); let barrier = expect_first_barrier(&mut input).await?; - iter_table_storage(&mut storages).for_each(|state_table| { - state_table.init_epoch(barrier.epoch); - }); - result_table.init_epoch(barrier.epoch); + iter_table_storage(&mut this.storages) + .chain(this.distinct_dedup_tables.values_mut()) + .for_each(|state_table| { + state_table.init_epoch(barrier.epoch); + }); + this.result_table.init_epoch(barrier.epoch); + + let mut vars = ExecutionVars { + // Create `AggGroup`. This will fetch previous agg result from the result table. + agg_group: AggGroup::create( + None, + &this.agg_calls, + &this.storages, + &this.result_table, + &this.input_pk_indices, + this.extreme_cache_size, + &this.input_schema, + ) + .await?, + state_changed: false, + }; + + if vars.agg_group.is_uninitialized() { + let data_types = this + .input_schema + .fields + .iter() + .map(|f| f.data_type()) + .collect::>(); + let chunk = StreamChunk::from_rows(&[], &data_types[..]); + // Apply empty chunk + Self::apply_chunk(&mut this, &mut vars, chunk).await?; + } yield Message::Barrier(barrier); @@ -297,36 +308,13 @@ impl GlobalSimpleAggExecutor { for msg in input { let msg = msg?; match msg { - Message::Watermark(_) => { - todo!("https://github.com/risingwavelabs/risingwave/issues/6042") - } - + Message::Watermark(_) => {} Message::Chunk(chunk) => { - Self::apply_chunk( - &ctx, - &info.identity, - &agg_calls, - &mut storages, - &mut result_table, - &input_pk_indices, - &input_schema, - &mut agg_group, - chunk, - extreme_cache_size, - &mut state_changed, - ) - .await?; + Self::apply_chunk(&mut this, &mut vars, chunk).await?; } Message::Barrier(barrier) => { - if let Some(chunk) = Self::flush_data( - &info.schema, - &mut agg_group, - barrier.epoch, - &mut storages, - &mut result_table, - &mut state_changed, - ) - .await? + if let Some(chunk) = + Self::flush_data(&mut this, &mut vars, barrier.epoch).await? { yield Message::Chunk(chunk); } @@ -368,13 +356,14 @@ mod tests { }; let (mut tx, source) = MockSource::channel(schema, vec![2]); // pk tx.push_barrier(1, false); + tx.push_barrier(2, false); tx.push_chunk(StreamChunk::from_pretty( " I I I + 100 200 1001 + 10 14 1002 + 4 300 1003", )); - tx.push_barrier(2, false); + tx.push_barrier(3, false); tx.push_chunk(StreamChunk::from_pretty( " I I I - 100 200 1001 @@ -382,7 +371,7 @@ mod tests { - 4 300 1003 + 104 500 1004", )); - tx.push_barrier(3, false); + tx.push_barrier(4, false); // This is local simple aggregation, so we add another row count state let append_only = false; @@ -394,6 +383,7 @@ mod tests { order_pairs: vec![], append_only, filter: None, + distinct: false, }, AggCall { kind: AggKind::Sum, @@ -402,6 +392,7 @@ mod tests { order_pairs: vec![], append_only, filter: None, + distinct: false, }, AggCall { kind: AggKind::Sum, @@ -410,6 +401,7 @@ mod tests { order_pairs: vec![], append_only, filter: None, + distinct: false, }, AggCall { kind: AggKind::Min, @@ -418,6 +410,7 @@ mod tests { order_pairs: vec![], append_only, filter: None, + distinct: false, }, ]; @@ -440,7 +433,22 @@ mod tests { *msg.as_chunk().unwrap(), StreamChunk::from_pretty( " I I I I - + 3 114 514 4" + + 0 . . . " + ) + ); + assert_matches!( + simple_agg.next().await.unwrap().unwrap(), + Message::Barrier { .. } + ); + + // Consume stream chunk + let msg = simple_agg.next().await.unwrap().unwrap(); + assert_eq!( + *msg.as_chunk().unwrap(), + StreamChunk::from_pretty( + " I I I I + U- 0 . . . + U+ 3 114 514 4" ) ); assert_matches!( diff --git a/src/stream/src/executor/hash_agg.rs b/src/stream/src/executor/hash_agg.rs index c85676eca9abb..efc640e009f68 100644 --- a/src/stream/src/executor/hash_agg.rs +++ b/src/stream/src/executor/hash_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ use std::collections::{HashMap, HashSet}; use std::marker::PhantomData; -use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; use futures::{stream, StreamExt, TryStreamExt}; @@ -22,17 +21,19 @@ use futures_async_stream::try_stream; use iter_chunks::IterChunks; use itertools::Itertools; use risingwave_common::array::StreamChunk; -use risingwave_common::buffer::Bitmap; +use risingwave_common::buffer::{Bitmap, BitmapBuilder}; use risingwave_common::catalog::Schema; -use risingwave_common::hash::{HashCode, HashKey, PrecomputedBuildHasher}; +use risingwave_common::hash::{HashKey, PrecomputedBuildHasher}; use risingwave_common::row::RowExt; use risingwave_common::util::epoch::EpochPair; -use risingwave_common::util::hash_util::Crc32FastBuilder; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_storage::StateStore; +use super::agg_common::AggExecutorArgs; use super::aggregation::{agg_call_filter_res, iter_table_storage, AggStateStorage}; use super::{ - expect_first_barrier, ActorContextRef, Executor, PkIndicesRef, StreamExecutorResult, Watermark, + expect_first_barrier, ActorContextRef, Executor, ExecutorInfo, PkIndicesRef, + StreamExecutorResult, Watermark, }; use crate::cache::{cache_may_stale, new_with_hasher, ExecutorCache}; use crate::common::table::state_table::StateTable; @@ -40,7 +41,7 @@ use crate::error::StreamResult; use crate::executor::aggregation::{generate_agg_schema, AggCall, AggChangesInfo, AggGroup}; use crate::executor::error::StreamExecutorError; use crate::executor::monitor::StreamingMetrics; -use crate::executor::{BoxedMessageStream, Message, PkIndices}; +use crate::executor::{BoxedMessageStream, Message}; use crate::task::AtomicU64Ref; type BoxedAggGroup = Box>; @@ -52,36 +53,31 @@ type AggGroupCache = ExecutorCache, PrecomputedBuildHa /// * The executor pulls data from the upstream, and apply the data chunks to the corresponding /// aggregation states. /// * While processing, it will record which keys have been modified in this epoch using -/// `modified_keys`. +/// `group_change_set`. /// * Upon a barrier is received, the executor will call `.flush` on the storage backend, so that /// all modifications will be flushed to the storage backend. Meanwhile, the executor will go -/// through `modified_keys`, and produce a stream chunk based on the state changes. +/// through `group_change_set`, and produce a stream chunk based on the state changes. pub struct HashAggExecutor { input: Box, - - extra: HashAggExecutorExtra, - - _phantom: PhantomData, + inner: ExecutorInner, } -struct HashAggExecutorExtra { - ctx: ActorContextRef, - - /// See [`Executor::schema`]. - schema: Schema, - - /// See [`Executor::pk_indices`]. - pk_indices: PkIndices, +struct ExecutorInner { + _phantom: PhantomData, - /// See [`Executor::identity`]. - identity: String, + actor_ctx: ActorContextRef, + info: ExecutorInfo, - /// Pk indices from input + /// Pk indices from input. input_pk_indices: Vec, - /// Schema from input + /// Schema from input. input_schema: Schema, + /// Indices of the columns + /// all of the aggregation functions in this executor should depend on same group of keys + group_key_indices: Vec, + /// A [`HashAggExecutor`] may have multiple [`AggCall`]s. agg_calls: Vec, @@ -94,218 +90,166 @@ struct HashAggExecutorExtra { /// table when `flush_data` is called. result_table: StateTable, - /// Indices of the columns - /// all of the aggregation functions in this executor should depend on same group of keys - group_key_indices: Vec, + /// State tables for deduplicating rows on distinct key for distinct agg calls. + /// One table per distinct column (may be shared by multiple agg calls). + distinct_dedup_tables: HashMap>, /// Lru manager. None if using local eviction. watermark_epoch: AtomicU64Ref, - /// How many times have we hit the cache of join executor for the lookup of each key - lookup_miss_count: AtomicU64, - - total_lookup_count: AtomicU64, - - /// How many times have we hit the cache of join executor for all the lookups generated by one - /// StreamChunk - chunk_lookup_miss_count: u64, + /// The maximum size of the chunk produced by executor at a time. + chunk_size: usize, - chunk_total_lookup_count: u64, + /// State cache size for extreme agg. + extreme_cache_size: usize, metrics: Arc, +} - /// Extreme state cache size - extreme_cache_size: usize, +struct ExecutionVars { + stats: ExecutionStats, + + /// Cache for [`AggGroup`]s. `HashKey` -> `AggGroup`. + agg_group_cache: AggGroupCache, /// Changed group keys in the current epoch (before next flush). group_change_set: HashSet, - /// The maximum size of the chunk produced by executor at a time. - chunk_size: usize, - - /// Map group key column idx to its position in group keys. - group_key_invert_idx: Vec>, - /// Buffer watermarks on group keys received since last barrier. buffered_watermarks: Vec>, } +struct ExecutionStats { + /// How many times have we hit the cache of join executor for the lookup of each key. + lookup_miss_count: u64, + total_lookup_count: u64, + + /// How many times have we hit the cache of join executor for all the lookups generated by one + /// StreamChunk. + chunk_lookup_miss_count: u64, + chunk_total_lookup_count: u64, +} + +impl ExecutionStats { + fn new() -> Self { + Self { + lookup_miss_count: 0, + total_lookup_count: 0, + chunk_lookup_miss_count: 0, + chunk_total_lookup_count: 0, + } + } +} + impl Executor for HashAggExecutor { fn execute(self: Box) -> BoxedMessageStream { self.execute_inner().boxed() } fn schema(&self) -> &Schema { - &self.extra.schema + &self.inner.info.schema } fn pk_indices(&self) -> PkIndicesRef<'_> { - &self.extra.pk_indices + &self.inner.info.pk_indices } fn identity(&self) -> &str { - &self.extra.identity + &self.inner.info.identity } } impl HashAggExecutor { - #[expect(clippy::too_many_arguments)] - pub fn new( - ctx: ActorContextRef, - input: Box, - agg_calls: Vec, - storages: Vec>, - result_table: StateTable, - pk_indices: PkIndices, - extreme_cache_size: usize, - executor_id: u64, - group_key_indices: Vec, - watermark_epoch: AtomicU64Ref, - metrics: Arc, - chunk_size: usize, - ) -> StreamResult { - let input_info = input.info(); - let schema = generate_agg_schema(input.as_ref(), &agg_calls, Some(&group_key_indices)); - - let mut group_key_invert_idx = vec![None; input.info().schema.len()]; - for (group_key_seq, group_key_idx) in group_key_indices.iter().enumerate() { - group_key_invert_idx[*group_key_idx] = Some(group_key_seq); - } - + pub fn new(args: AggExecutorArgs) -> StreamResult { + let extra_args = args.extra.unwrap(); + + let input_info = args.input.info(); + let schema = generate_agg_schema( + args.input.as_ref(), + &args.agg_calls, + Some(&extra_args.group_key_indices), + ); Ok(Self { - input, - extra: HashAggExecutorExtra { - ctx, - schema, - pk_indices, - identity: format!("HashAggExecutor {:X}", executor_id), + input: args.input, + inner: ExecutorInner { + _phantom: PhantomData, + actor_ctx: args.actor_ctx, + info: ExecutorInfo { + schema, + pk_indices: args.pk_indices, + identity: format!("HashAggExecutor {:X}", args.executor_id), + }, input_pk_indices: input_info.pk_indices, input_schema: input_info.schema, - agg_calls, - extreme_cache_size, - storages, - result_table, - group_key_indices, - watermark_epoch, - group_change_set: HashSet::new(), - lookup_miss_count: AtomicU64::new(0), - total_lookup_count: AtomicU64::new(0), - chunk_lookup_miss_count: 0, - chunk_total_lookup_count: 0, - metrics, - chunk_size, - group_key_invert_idx, - buffered_watermarks: Vec::default(), + group_key_indices: extra_args.group_key_indices, + agg_calls: args.agg_calls, + storages: args.storages, + result_table: args.result_table, + distinct_dedup_tables: args.distinct_dedup_tables, + watermark_epoch: extra_args.watermark_epoch, + chunk_size: extra_args.chunk_size, + extreme_cache_size: args.extreme_cache_size, + metrics: extra_args.metrics, }, - _phantom: PhantomData, }) } - /// Get unique keys, hash codes and visibility map of each key in a batch. + /// Get visibilities that mask rows in the chunk for each group. The returned visibility + /// is a `Bitmap` rather than `Option` because it's likely to have multiple groups + /// in one chunk. /// - /// The returned order is the same as how we get distinct final columns from original columns. - /// - /// `keys` are Hash Keys of all the rows - /// `key_hash_codes` are hash codes of the deserialized `keys` - /// `visibility`, leave invisible ones out of aggregation - fn get_unique_keys( - keys: Vec, - key_hash_codes: Vec, - visibility: Option<&Bitmap>, - ) -> StreamExecutorResult> { - let total_num_rows = keys.len(); - assert_eq!(key_hash_codes.len(), total_num_rows); - // Each hash key, e.g. `key1` corresponds to a visibility map that not only shadows - // all the rows whose keys are not `key1`, but also shadows those rows shadowed in the - // `input` The visibility map of each hash key will be passed into `ManagedStateImpl`. - let mut key_to_vis_maps = HashMap::new(); - - // Give all the unique keys an order and iterate them later, - // the order is the same as how we get distinct final columns from original columns. - let mut unique_key_and_hash_codes = Vec::new(); - - for (row_idx, (key, hash_code)) in keys.iter().zip_eq(key_hash_codes.iter()).enumerate() { - // if the visibility map has already shadowed this row, - // then we pass - if let Some(vis_map) = visibility && !vis_map.is_set(row_idx) { - continue; - } - let vis_map = key_to_vis_maps.entry(key).or_insert_with(|| { - unique_key_and_hash_codes.push((key, hash_code)); - vec![false; total_num_rows] - }); - vis_map[row_idx] = true; + /// * `keys`: Hash Keys of rows. + /// * `base_visibility`: Visibility of rows, `None` means all are visible. + fn get_group_visibilities(keys: Vec, base_visibility: Option<&Bitmap>) -> Vec<(K, Bitmap)> { + let n_rows = keys.len(); + let mut vis_builders = HashMap::new(); + for (row_idx, key) in keys.into_iter().enumerate().filter(|(row_idx, _)| { + base_visibility + .map(|vis| vis.is_set(*row_idx)) + .unwrap_or(true) + }) { + vis_builders + .entry(key) + .or_insert_with(|| BitmapBuilder::zeroed(n_rows)) + .set(row_idx, true); } - - let result = unique_key_and_hash_codes + vis_builders .into_iter() - .map(|(key, hash_code)| { - ( - key.clone(), - *hash_code, - key_to_vis_maps.remove(key).unwrap().into_iter().collect(), - ) - }) - .collect_vec(); - - Ok(result) + .map(|(key, vis_builder)| (key, vis_builder.finish())) + .collect() } async fn apply_chunk( - HashAggExecutorExtra:: { - ref ctx, - ref identity, - ref group_key_indices, - ref agg_calls, - ref mut storages, - ref result_table, - ref input_schema, - ref input_pk_indices, - ref extreme_cache_size, - ref mut group_change_set, - ref schema, - lookup_miss_count, - total_lookup_count, - ref mut chunk_lookup_miss_count, - ref mut chunk_total_lookup_count, - .. - }: &mut HashAggExecutorExtra, - agg_group_cache: &mut AggGroupCache, + this: &mut ExecutorInner, + vars: &mut ExecutionVars, chunk: StreamChunk, ) -> StreamExecutorResult<()> { - // Compute hash code here before serializing keys to avoid duplicate hash code computation. - let hash_codes = chunk - .data_chunk() - .get_hash_values(group_key_indices, Crc32FastBuilder); - let keys = - K::build_from_hash_code(group_key_indices, chunk.data_chunk(), hash_codes.clone()); + // Find groups in this chunk and generate visibility for each group key. + let keys = K::build(&this.group_key_indices, chunk.data_chunk())?; + let group_visibilities = Self::get_group_visibilities(keys, chunk.visibility()); - // Find unique keys in this batch and generate visibility map for each key - // TODO: this might be inefficient if there are not too many duplicated keys in one batch. - let unique_keys = Self::get_unique_keys(keys, hash_codes, chunk.visibility())?; + let group_key_types = &this.info.schema.data_types()[..this.group_key_indices.len()]; - let group_key_types = &schema.data_types()[..group_key_indices.len()]; - - let futs = unique_keys + let futs = group_visibilities .iter() - .filter_map(|(key, _, _)| { - total_lookup_count.fetch_add(1, Ordering::Relaxed); - if agg_group_cache.contains(key) { + .filter_map(|(key, _)| { + vars.stats.total_lookup_count += 1; + if vars.agg_group_cache.contains(key) { None } else { - lookup_miss_count.fetch_add(1, Ordering::Relaxed); + vars.stats.lookup_miss_count += 1; Some(async { // Create `AggGroup` for the current group if not exists. This will fetch // previous agg result from the result table. let agg_group = Box::new( AggGroup::create( Some(key.deserialize(group_key_types)?), - agg_calls, - storages, - result_table, - input_pk_indices, - *extreme_cache_size, - input_schema, + &this.agg_calls, + &this.storages, + &this.result_table, + &this.input_pk_indices, + this.extreme_cache_size, + &this.input_schema, ) .await?, ); @@ -317,13 +261,13 @@ impl HashAggExecutor { // If not all the required states/keys are in the cache, this is a chunk-level cache miss. if !futs.is_empty() { - *chunk_lookup_miss_count += 1; + vars.stats.chunk_lookup_miss_count += 1; } - *chunk_total_lookup_count += 1; + vars.stats.chunk_total_lookup_count += 1; let mut buffered = stream::iter(futs).buffer_unordered(10).fuse(); while let Some(result) = buffered.next().await { let (key, agg_group) = result?; - agg_group_cache.put(key, agg_group); + vars.agg_group_cache.put(key, agg_group); } drop(buffered); // drop to avoid accidental use @@ -332,12 +276,13 @@ impl HashAggExecutor { let (ops, columns, visibility) = chunk.into_inner(); // Calculate the row visibility for every agg call. - let visibilities: Vec<_> = agg_calls + let call_visibilities: Vec<_> = this + .agg_calls .iter() .map(|agg_call| { agg_call_filter_res( - ctx, - identity, + &this.actor_ctx, + &this.info.identity, agg_call, &columns, visibility.as_ref(), @@ -347,9 +292,9 @@ impl HashAggExecutor { .try_collect()?; // Materialize input chunk if needed. - storages + this.storages .iter_mut() - .zip_eq(visibilities.iter().map(Option::as_ref)) + .zip_eq_fast(call_visibilities.iter().map(Option::as_ref)) .for_each(|(storage, visibility)| { if let AggStateStorage::MaterializedInput { table, mapping } = storage { let needed_columns = mapping @@ -366,17 +311,25 @@ impl HashAggExecutor { }); // Apply chunk to each of the state (per agg_call), for each group. - for (key, _, vis_map) in &unique_keys { + for (key, visibility) in &group_visibilities { // Mark the group as changed. - group_change_set.insert(key.clone()); - let agg_group = agg_group_cache.get_mut(key).unwrap().as_mut(); - let visibilities = visibilities + vars.group_change_set.insert(key.clone()); + let agg_group = vars.agg_group_cache.get_mut(key).unwrap().as_mut(); + let visibilities = call_visibilities .iter() .map(Option::as_ref) - .map(|v| v.map_or_else(|| vis_map.clone(), |v| v & vis_map)) + .map(|call_vis| call_vis.map_or_else(|| visibility.clone(), |v| v & visibility)) .map(Some) .collect(); - agg_group.apply_chunk(storages, &ops, &columns, visibilities)?; + agg_group + .apply_chunk( + &mut this.storages, + &ops, + &columns, + visibilities, + &mut this.distinct_dedup_tables, + ) + .await?; } Ok(()) @@ -384,85 +337,79 @@ impl HashAggExecutor { #[try_stream(ok = StreamChunk, error = StreamExecutorError)] async fn flush_data<'a>( - &mut HashAggExecutorExtra:: { - ref ctx, - ref group_key_indices, - ref schema, - ref mut storages, - ref mut result_table, - ref mut group_change_set, - ref lookup_miss_count, - ref total_lookup_count, - ref mut chunk_lookup_miss_count, - ref mut chunk_total_lookup_count, - ref metrics, - ref chunk_size, - ref buffered_watermarks, - .. - }: &'a mut HashAggExecutorExtra, - agg_group_cache: &'a mut AggGroupCache, + this: &'a mut ExecutorInner, + vars: &'a mut ExecutionVars, epoch: EpochPair, ) { - let state_clean_watermark = buffered_watermarks + let state_clean_watermark = vars + .buffered_watermarks .first() .and_then(|opt_watermark| opt_watermark.as_ref()) .map(|watermark| watermark.val.clone()); - let actor_id_str = ctx.id.to_string(); - metrics + let actor_id_str = this.actor_ctx.id.to_string(); + this.metrics .agg_lookup_miss_count .with_label_values(&[&actor_id_str]) - .inc_by(lookup_miss_count.swap(0, Ordering::Relaxed)); - metrics + .inc_by(vars.stats.lookup_miss_count); + vars.stats.lookup_miss_count = 0; + this.metrics .agg_total_lookup_count .with_label_values(&[&actor_id_str]) - .inc_by(total_lookup_count.swap(0, Ordering::Relaxed)); - metrics + .inc_by(vars.stats.total_lookup_count); + vars.stats.total_lookup_count = 0; + this.metrics .agg_cached_keys .with_label_values(&[&actor_id_str]) - .set(agg_group_cache.len() as i64); - metrics + .set(vars.agg_group_cache.len() as i64); + this.metrics .agg_chunk_lookup_miss_count .with_label_values(&[&actor_id_str]) - .inc_by(*chunk_lookup_miss_count); - *chunk_lookup_miss_count = 0; - metrics + .inc_by(vars.stats.chunk_lookup_miss_count); + vars.stats.chunk_lookup_miss_count = 0; + this.metrics .agg_chunk_total_lookup_count .with_label_values(&[&actor_id_str]) - .inc_by(*chunk_total_lookup_count); - *chunk_total_lookup_count = 0; + .inc_by(vars.stats.chunk_total_lookup_count); + vars.stats.chunk_total_lookup_count = 0; - let dirty_cnt = group_change_set.len(); + let dirty_cnt = vars.group_change_set.len(); if dirty_cnt > 0 { // Produce the stream chunk - let group_key_data_types = &schema.data_types()[..group_key_indices.len()]; - let mut group_chunks = IterChunks::chunks(group_change_set.drain(), *chunk_size); + let group_key_data_types = + &this.info.schema.data_types()[..this.group_key_indices.len()]; + let mut group_chunks = + IterChunks::chunks(vars.group_change_set.drain(), this.chunk_size); while let Some(batch) = group_chunks.next() { let keys_in_batch = batch.into_iter().collect_vec(); // Flush agg states. for key in &keys_in_batch { - let agg_group = agg_group_cache + let agg_group = vars + .agg_group_cache .get_mut(key) .expect("changed group must have corresponding AggGroup") .as_mut(); - agg_group.flush_state_if_needed(storages).await?; + agg_group + .flush_state_if_needed(&mut this.storages, &mut this.distinct_dedup_tables) + .await?; } // Create array builders. // As the datatype is retrieved from schema, it contains both group key and // aggregation state outputs. - let mut builders = schema.create_array_builders(chunk_size * 2); - let mut new_ops = Vec::with_capacity(chunk_size * 2); + let mut builders = this.info.schema.create_array_builders(this.chunk_size * 2); + let mut new_ops = Vec::with_capacity(this.chunk_size * 2); // Calculate current outputs, concurrently. let futs = keys_in_batch.into_iter().map(|key| { // Pop out the agg group temporarily. - let mut agg_group = agg_group_cache + let mut agg_group = vars + .agg_group_cache .pop(&key) .expect("changed group must have corresponding AggGroup"); async { - let curr_outputs = agg_group.get_outputs(storages).await?; + let curr_outputs = agg_group.get_outputs(&this.storages).await?; Ok::<_, StreamExecutorError>((key, agg_group, curr_outputs)) } }); @@ -479,27 +426,27 @@ impl HashAggExecutor { prev_outputs, } = agg_group.build_changes( curr_outputs, - &mut builders[group_key_indices.len()..], + &mut builders[this.group_key_indices.len()..], &mut new_ops, ); if n_appended_ops != 0 { for _ in 0..n_appended_ops { key.deserialize_to_builders( - &mut builders[..group_key_indices.len()], + &mut builders[..this.group_key_indices.len()], group_key_data_types, )?; } if let Some(prev_outputs) = prev_outputs { let old_row = agg_group.group_key().chain(prev_outputs); - result_table.update(old_row, result_row); + this.result_table.update(old_row, result_row); } else { - result_table.insert(result_row); + this.result_table.insert(result_row); } } // Put the agg group back into the agg group cache. - agg_group_cache.put(key, agg_group); + vars.agg_group_cache.put(key, agg_group); } let columns = builders @@ -514,33 +461,39 @@ impl HashAggExecutor { } // Commit all state tables. - futures::future::try_join_all(iter_table_storage(storages).map(|state_table| async { - if let Some(watermark) = state_clean_watermark.as_ref() { - state_table.update_watermark(watermark.clone()) - }; - state_table.commit(epoch).await - })) + futures::future::try_join_all( + iter_table_storage(&mut this.storages) + .chain(this.distinct_dedup_tables.values_mut()) + .map(|state_table| async { + if let Some(watermark) = state_clean_watermark.as_ref() { + state_table.update_watermark(watermark.clone()) + }; + state_table.commit(epoch).await + }), + ) .await?; if let Some(watermark) = state_clean_watermark.as_ref() { - result_table.update_watermark(watermark.clone()); + this.result_table.update_watermark(watermark.clone()); }; - result_table.commit(epoch).await?; + this.result_table.commit(epoch).await?; // Evict cache to target capacity. - agg_group_cache.evict(); + vars.agg_group_cache.evict(); } else { // Nothing to flush. // Call commit on state table to increment the epoch. - iter_table_storage(storages).for_each(|state_table| { - if let Some(watermark) = state_clean_watermark.as_ref() { - state_table.update_watermark(watermark.clone()) - }; - state_table.commit_no_data_expected(epoch); - }); + iter_table_storage(&mut this.storages) + .chain(this.distinct_dedup_tables.values_mut()) + .for_each(|state_table| { + if let Some(watermark) = state_clean_watermark.as_ref() { + state_table.update_watermark(watermark.clone()) + }; + state_table.commit_no_data_expected(epoch); + }); if let Some(watermark) = state_clean_watermark.as_ref() { - result_table.update_watermark(watermark.clone()); + this.result_table.update_watermark(watermark.clone()); }; - result_table.commit_no_data_expected(epoch); + this.result_table.commit_no_data_expected(epoch); return Ok(()); } } @@ -548,71 +501,85 @@ impl HashAggExecutor { #[try_stream(ok = Message, error = StreamExecutorError)] async fn execute_inner(self) { let HashAggExecutor { - input, mut extra, .. + input, + inner: mut this, + .. } = self; - // The cached state managers. `HashKey` -> `AggGroup`. - let mut agg_group_cache = AggGroupCache::new(new_with_hasher( - extra.watermark_epoch.clone(), - PrecomputedBuildHasher, - )); + let mut vars = ExecutionVars { + stats: ExecutionStats::new(), + agg_group_cache: AggGroupCache::new(new_with_hasher( + this.watermark_epoch.clone(), + PrecomputedBuildHasher, + )), + group_change_set: HashSet::new(), + buffered_watermarks: vec![None; this.group_key_indices.len()], + }; + + let group_key_invert_idx = { + let mut group_key_invert_idx = vec![None; input.info().schema.len()]; + for (group_key_seq, group_key_idx) in this.group_key_indices.iter().enumerate() { + group_key_invert_idx[*group_key_idx] = Some(group_key_seq); + } + group_key_invert_idx + }; // First barrier let mut input = input.execute(); let barrier = expect_first_barrier(&mut input).await?; - iter_table_storage(&mut extra.storages).for_each(|state_table| { - state_table.init_epoch(barrier.epoch); - }); - extra.result_table.init_epoch(barrier.epoch); - agg_group_cache.update_epoch(barrier.epoch.curr); + iter_table_storage(&mut this.storages) + .chain(this.distinct_dedup_tables.values_mut()) + .for_each(|state_table| { + state_table.init_epoch(barrier.epoch); + }); + this.result_table.init_epoch(barrier.epoch); + vars.agg_group_cache.update_epoch(barrier.epoch.curr); yield Message::Barrier(barrier); - extra.buffered_watermarks = vec![None; extra.group_key_indices.len()]; - #[for_await] for msg in input { let msg = msg?; match msg { Message::Watermark(mut watermark) => { - let group_key_seq = extra.group_key_invert_idx[watermark.col_idx]; + let group_key_seq = group_key_invert_idx[watermark.col_idx]; if let Some(group_key_seq) = group_key_seq { watermark.col_idx = group_key_seq; - extra.buffered_watermarks[group_key_seq] = Some(watermark); + vars.buffered_watermarks[group_key_seq] = Some(watermark); } } Message::Chunk(chunk) => { - Self::apply_chunk(&mut extra, &mut agg_group_cache, chunk).await?; + Self::apply_chunk(&mut this, &mut vars, chunk).await?; } Message::Barrier(barrier) => { #[for_await] - for chunk in Self::flush_data(&mut extra, &mut agg_group_cache, barrier.epoch) { + for chunk in Self::flush_data(&mut this, &mut vars, barrier.epoch) { yield Message::Chunk(chunk?); } - for buffered_watermark in &mut extra.buffered_watermarks { + for buffered_watermark in &mut vars.buffered_watermarks { if let Some(watermark) = buffered_watermark.take() { yield Message::Watermark(watermark); } } // Update the vnode bitmap for state tables of all agg calls if asked. - if let Some(vnode_bitmap) = barrier.as_update_vnode_bitmap(extra.ctx.id) { - iter_table_storage(&mut extra.storages).for_each(|state_table| { + if let Some(vnode_bitmap) = barrier.as_update_vnode_bitmap(this.actor_ctx.id) { + iter_table_storage(&mut this.storages).for_each(|state_table| { let _ = state_table.update_vnode_bitmap(vnode_bitmap.clone()); }); let previous_vnode_bitmap = - extra.result_table.update_vnode_bitmap(vnode_bitmap.clone()); + this.result_table.update_vnode_bitmap(vnode_bitmap.clone()); // Manipulate the cache if necessary. if cache_may_stale(&previous_vnode_bitmap, &vnode_bitmap) { - agg_group_cache.clear(); + vars.agg_group_cache.clear(); } } // Update the current epoch. - agg_group_cache.update_epoch(barrier.epoch.curr); + vars.agg_group_cache.update_epoch(barrier.epoch.curr); yield Message::Barrier(barrier); } @@ -635,13 +602,17 @@ mod tests { use risingwave_common::hash::SerializedKey; use risingwave_common::row::{AscentOwnedRow, OwnedRow, Row}; use risingwave_common::types::DataType; + use risingwave_common::util::iter_util::ZipEqDebug; use risingwave_expr::expr::*; use risingwave_storage::memory::MemoryStateStore; use risingwave_storage::StateStore; + use crate::executor::agg_common::{AggExecutorArgs, AggExecutorArgsExtra}; use crate::executor::aggregation::{AggArgs, AggCall}; use crate::executor::monitor::StreamingMetrics; - use crate::executor::test_utils::agg_executor::{create_agg_state_table, create_result_table}; + use crate::executor::test_utils::agg_executor::{ + create_agg_state_storage, create_result_table, + }; use crate::executor::test_utils::*; use crate::executor::{ActorContext, Executor, HashAggExecutor, Message, PkIndices}; @@ -655,10 +626,10 @@ mod tests { extreme_cache_size: usize, executor_id: u64, ) -> Box { - let mut agg_state_tables = Vec::with_capacity(agg_calls.iter().len()); + let mut storages = Vec::with_capacity(agg_calls.iter().len()); for (idx, agg_call) in agg_calls.iter().enumerate() { - agg_state_tables.push( - create_agg_state_table( + storages.push( + create_agg_state_storage( store.clone(), TableId::new(idx as u32), agg_call, @@ -679,25 +650,32 @@ mod tests { ) .await; - HashAggExecutor::::new( - ActorContext::create(123), + HashAggExecutor::::new(AggExecutorArgs { input, - agg_calls, - agg_state_tables, - result_table, + actor_ctx: ActorContext::create(123), pk_indices, - extreme_cache_size, executor_id, - group_key_indices, - Arc::new(AtomicU64::new(0)), - Arc::new(StreamingMetrics::unused()), - 1024, - ) + + extreme_cache_size, + + agg_calls, + storages, + result_table, + distinct_dedup_tables: Default::default(), + + extra: Some(AggExecutorArgsExtra { + group_key_indices, + + metrics: Arc::new(StreamingMetrics::unused()), + chunk_size: 1024, + watermark_epoch: Arc::new(AtomicU64::new(0)), + }), + }) .unwrap() .boxed() } - // --- Test HashAgg with in-memory KeyedState --- + // --- Test HashAgg with in-memory StateStore --- #[tokio::test] async fn test_local_hash_aggregation_count_in_memory() { @@ -751,6 +729,7 @@ mod tests { order_pairs: vec![], append_only, filter: None, + distinct: false, }, AggCall { kind: AggKind::Count, @@ -759,6 +738,7 @@ mod tests { order_pairs: vec![], append_only, filter: None, + distinct: false, }, AggCall { kind: AggKind::Count, @@ -767,6 +747,7 @@ mod tests { order_pairs: vec![], append_only, filter: None, + distinct: false, }, ]; @@ -852,6 +833,7 @@ mod tests { order_pairs: vec![], append_only, filter: None, + distinct: false, }, AggCall { kind: AggKind::Sum, @@ -860,6 +842,7 @@ mod tests { order_pairs: vec![], append_only, filter: None, + distinct: false, }, // This is local hash aggregation, so we add another sum state AggCall { @@ -869,6 +852,7 @@ mod tests { order_pairs: vec![], append_only, filter: None, + distinct: false, }, ]; @@ -955,6 +939,7 @@ mod tests { order_pairs: vec![], append_only: false, filter: None, + distinct: false, }, AggCall { kind: AggKind::Min, @@ -963,6 +948,7 @@ mod tests { order_pairs: vec![], append_only: false, filter: None, + distinct: false, }, ]; @@ -1054,6 +1040,7 @@ mod tests { order_pairs: vec![], append_only, filter: None, + distinct: false, }, AggCall { kind: AggKind::Min, @@ -1062,6 +1049,7 @@ mod tests { order_pairs: vec![], append_only, filter: None, + distinct: false, }, ]; @@ -1118,7 +1106,7 @@ mod tests { fn sorted_rows(self) -> Vec<(Op, OwnedRow)> { let (chunk, ops) = self.into_parts(); ops.into_iter() - .zip_eq( + .zip_eq_debug( chunk .rows() .map(Row::into_owned_row) diff --git a/src/stream/src/executor/hash_join.rs b/src/stream/src/executor/hash_join.rs index ac5346aeb935d..57e52589517d2 100644 --- a/src/stream/src/executor/hash_join.rs +++ b/src/stream/src/executor/hash_join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ use risingwave_common::hash::HashKey; use risingwave_common::row::{OwnedRow, Row}; use risingwave_common::types::{DataType, ToOwnedDatum}; use risingwave_common::util::epoch::EpochPair; +use risingwave_common::util::iter_util::ZipEqDebug; use risingwave_expr::expr::BoxedExpression; use risingwave_storage::StateStore; @@ -140,15 +141,15 @@ fn is_subset(vec1: Vec, vec2: Vec) -> bool { pub struct JoinParams { /// Indices of the join keys pub join_key_indices: Vec, - /// Indices of the distribution keys - pub dist_keys: Vec, + /// Indices of the input pk after dedup + pub deduped_pk_indices: Vec, } impl JoinParams { - pub fn new(join_key_indices: Vec, dist_keys: Vec) -> Self { + pub fn new(join_key_indices: Vec, deduped_pk_indices: Vec) -> Self { Self { join_key_indices, - dist_keys, + deduped_pk_indices, } } } @@ -158,8 +159,8 @@ struct JoinSide { ht: JoinHashMap, /// Indices of the join key columns join_key_indices: Vec, - /// The primary key indices of state table on this side - pk_indices: Vec, + /// The primary key indices of state table on this side after dedup + deduped_pk_indices: Vec, /// The data type of all columns without degree. all_data_types: Vec, /// The start position for the side in output new columns @@ -175,7 +176,7 @@ impl std::fmt::Debug for JoinSide { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("JoinSide") .field("join_key_indices", &self.join_key_indices) - .field("pk_indices", &self.pk_indices) + .field("deduped_pk_indices", &self.deduped_pk_indices) .field("col_types", &self.all_data_types) .field("start_pos", &self.start_pos) .field("i2o_mapping", &self.i2o_mapping) @@ -561,12 +562,12 @@ impl HashJoinExecutor HashJoinExecutor HashJoinExecutor = Self::hash_eq_match(key, &mut side_match.ht).await?; match op { @@ -1010,8 +1011,7 @@ mod tests { use risingwave_common::hash::{Key128, Key64}; use risingwave_common::types::ScalarImpl; use risingwave_common::util::sort_util::OrderType; - use risingwave_expr::expr::expr_binary_nonnull::new_binary_expr; - use risingwave_expr::expr::InputRefExpression; + use risingwave_expr::expr::{new_binary_expr, InputRefExpression}; use risingwave_pb::expr::expr_node::Type; use risingwave_storage::memory::MemoryStateStore; @@ -1088,8 +1088,8 @@ mod tests { }; let (tx_l, source_l) = MockSource::channel(schema.clone(), vec![1]); let (tx_r, source_r) = MockSource::channel(schema, vec![1]); - let params_l = JoinParams::new(vec![0], vec![]); - let params_r = JoinParams::new(vec![0], vec![]); + let params_l = JoinParams::new(vec![0], vec![1]); + let params_r = JoinParams::new(vec![0], vec![1]); let cond = with_condition.then(create_cond); let mem_state = MemoryStateStore::new(); @@ -1126,7 +1126,7 @@ mod tests { params_r, vec![null_safe], vec![1], - (0..schema_len).into_iter().collect_vec(), + (0..schema_len).collect_vec(), 1, cond, "HashJoinExecutor".to_string(), @@ -1199,7 +1199,7 @@ mod tests { params_r, vec![false], vec![1], - (0..schema_len).into_iter().collect_vec(), + (0..schema_len).collect_vec(), 1, cond, "HashJoinExecutor".to_string(), diff --git a/src/stream/src/executor/hop_window.rs b/src/stream/src/executor/hop_window.rs index 627a1ed84a8b5..4972d9c858102 100644 --- a/src/stream/src/executor/hop_window.rs +++ b/src/stream/src/executor/hop_window.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,8 +20,7 @@ use num_traits::CheckedSub; use risingwave_common::array::column::Column; use risingwave_common::array::{DataChunk, StreamChunk, Vis}; use risingwave_common::types::{DataType, IntervalUnit, ScalarImpl}; -use risingwave_expr::expr::expr_binary_nonnull::new_binary_expr; -use risingwave_expr::expr::{Expression, InputRefExpression, LiteralExpression}; +use risingwave_expr::expr::{new_binary_expr, Expression, InputRefExpression, LiteralExpression}; use risingwave_expr::ExprError; use risingwave_pb::expr::expr_node; diff --git a/src/stream/src/executor/integration_tests.rs b/src/stream/src/executor/integration_tests.rs index 046e08cf1873f..36db18440d28b 100644 --- a/src/stream/src/executor/integration_tests.rs +++ b/src/stream/src/executor/integration_tests.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -61,6 +61,7 @@ async fn test_merger_sum_aggr() { order_pairs: vec![], append_only, filter: None, + distinct: false, }, AggCall { kind: AggKind::Sum, @@ -69,6 +70,7 @@ async fn test_merger_sum_aggr() { order_pairs: vec![], append_only, filter: None, + distinct: false, }, ], vec![], @@ -112,8 +114,11 @@ async fn test_merger_sum_aggr() { // create a round robin dispatcher, which dispatches messages to the actors let (input, rx) = channel_for_test(); - let _schema = Schema { - fields: vec![Field::unnamed(DataType::Int64)], + let schema = Schema { + fields: vec![ + Field::unnamed(DataType::Int64), + Field::unnamed(DataType::Int64), + ], }; let receiver_op = Box::new(ReceiverExecutor::for_test(rx)); let dispatcher = DispatchExecutor::new( @@ -136,7 +141,7 @@ async fn test_merger_sum_aggr() { handles.push(tokio::spawn(actor.run())); // use a merge operator to collect data from dispatchers before sending them to aggregator - let merger = MergeExecutor::for_test(outputs); + let merger = MergeExecutor::for_test(outputs, schema); // for global aggregator, we need to sum data and sum row count let append_only = false; @@ -152,6 +157,7 @@ async fn test_merger_sum_aggr() { order_pairs: vec![], append_only, filter: None, + distinct: false, }, AggCall { kind: AggKind::Sum, @@ -160,6 +166,7 @@ async fn test_merger_sum_aggr() { order_pairs: vec![], append_only, filter: None, + distinct: false, }, ], vec![], diff --git a/src/stream/src/executor/local_simple_agg.rs b/src/stream/src/executor/local_simple_agg.rs index 8950265d1dd0d..b0c5889fd8d54 100644 --- a/src/stream/src/executor/local_simple_agg.rs +++ b/src/stream/src/executor/local_simple_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ use itertools::Itertools; use risingwave_common::array::column::Column; use risingwave_common::array::{Op, StreamChunk}; use risingwave_common::catalog::Schema; +use risingwave_common::util::iter_util::ZipEqFast; use super::aggregation::agg_impl::{create_streaming_agg_impl, StreamingAggImpl}; use super::aggregation::{agg_call_filter_res, generate_agg_schema, AggCall}; @@ -75,8 +76,8 @@ impl LocalSimpleAggExecutor { .try_collect()?; agg_calls .iter() - .zip_eq(visibilities) - .zip_eq(aggregators) + .zip_eq_fast(visibilities) + .zip_eq_fast(aggregators) .try_for_each(|((agg_call, visibility), state)| { let col_refs = agg_call .args @@ -115,10 +116,7 @@ impl LocalSimpleAggExecutor { for msg in input { let msg = msg?; match msg { - Message::Watermark(_) => { - todo!("https://github.com/risingwavelabs/risingwave/issues/6042") - } - + Message::Watermark(_) => {} Message::Chunk(chunk) => { Self::apply_chunk(&ctx, &info.identity, &agg_calls, &mut aggregators, chunk)?; is_dirty = true; @@ -130,7 +128,7 @@ impl LocalSimpleAggExecutor { let mut builders = info.schema.create_array_builders(1); aggregators .iter_mut() - .zip_eq(builders.iter_mut()) + .zip_eq_fast(builders.iter_mut()) .try_for_each(|(state, builder)| { let data = state.get_output()?; trace!("append_datum: {:?}", data); @@ -208,6 +206,7 @@ mod tests { order_pairs: vec![], append_only: false, filter: None, + distinct: false, }]; let simple_agg = Box::new( @@ -266,6 +265,7 @@ mod tests { order_pairs: vec![], append_only: false, filter: None, + distinct: false, }, AggCall { kind: AggKind::Sum, @@ -274,6 +274,7 @@ mod tests { order_pairs: vec![], append_only: false, filter: None, + distinct: false, }, AggCall { kind: AggKind::Sum, @@ -282,6 +283,7 @@ mod tests { order_pairs: vec![], append_only: false, filter: None, + distinct: false, }, ]; diff --git a/src/stream/src/executor/lookup.rs b/src/stream/src/executor/lookup.rs index 62c106afa51b4..db068fefc9b5b 100644 --- a/src/stream/src/executor/lookup.rs +++ b/src/stream/src/executor/lookup.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/lookup/cache.rs b/src/stream/src/executor/lookup/cache.rs index e18ecf5ccda24..498cb164a9620 100644 --- a/src/stream/src/executor/lookup/cache.rs +++ b/src/stream/src/executor/lookup/cache.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/lookup/impl_.rs b/src/stream/src/executor/lookup/impl_.rs index 29dbfcb6185f0..e88d2cd7af04a 100644 --- a/src/stream/src/executor/lookup/impl_.rs +++ b/src/stream/src/executor/lookup/impl_.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,11 +19,14 @@ use risingwave_common::array::RowRef; use risingwave_common::catalog::{ColumnDesc, Schema}; use risingwave_common::row::{OwnedRow, Row, RowExt}; use risingwave_common::util::epoch::EpochPair; +use risingwave_common::util::iter_util::ZipEqDebug; use risingwave_common::util::sort_util::OrderPair; +use risingwave_hummock_sdk::HummockReadEpoch; +use risingwave_storage::table::batch_table::storage_table::StorageTable; +use risingwave_storage::table::TableIter; use risingwave_storage::StateStore; use super::sides::{stream_lookup_arrange_prev_epoch, stream_lookup_arrange_this_epoch}; -use crate::common::table::state_table::StateTable; use crate::common::StreamChunkBuilder; use crate::executor::error::{StreamExecutorError, StreamExecutorResult}; use crate::executor::lookup::cache::LookupCache; @@ -102,7 +105,7 @@ pub struct LookupExecutorParams { /// The join keys on the arrangement side. pub arrange_join_key_indices: Vec, - pub state_table: StateTable, + pub storage_table: StorageTable, pub watermark_epoch: AtomicU64Ref, @@ -122,7 +125,7 @@ impl LookupExecutor { arrange_join_key_indices, schema: output_schema, column_mapping, - state_table, + storage_table, watermark_epoch, chunk_size, } = params; @@ -216,7 +219,7 @@ impl LookupExecutor { order_rules: arrangement_order_rules, key_indices: arrange_join_key_indices, use_current_epoch, - state_table, + storage_table, }, column_mapping, key_indices_mapping, @@ -252,6 +255,12 @@ impl LookupExecutor { self.arrangement.col_types.len(), ); + let reorder_chunk_data_types = self + .column_mapping + .iter() + .map(|x| self.chunk_data_types[*x].clone()) + .collect_vec(); + #[for_await] for msg in input { let msg = msg?; @@ -276,7 +285,6 @@ impl LookupExecutor { ArrangeMessage::ArrangeReady(arrangement_chunks, barrier) => { // The arrangement is ready, and we will receive a bunch of stream messages for // the next poll. - // TODO: apply chunk as soon as we receive them, instead of batching. for chunk in arrangement_chunks { @@ -300,13 +308,16 @@ impl LookupExecutor { let mut builder = StreamChunkBuilder::new( self.chunk_size, - &self.chunk_data_types, + &reorder_chunk_data_types, stream_to_output.clone(), arrange_to_output.clone(), ); - for (op, row) in ops.iter().zip_eq(chunk.rows()) { - for matched_row in self.lookup_one_row(&row).await? { + for (op, row) in ops.iter().zip_eq_debug(chunk.rows()) { + for matched_row in self + .lookup_one_row(&row, self.last_barrier.as_ref().unwrap().epoch) + .await? + { tracing::trace!(target: "events::stream::lookup::put", "{:?} {:?}", row, matched_row); if let Some(chunk) = builder.append_row(*op, row, &matched_row) { @@ -327,45 +338,7 @@ impl LookupExecutor { /// Store the barrier. #[expect(clippy::unused_async)] async fn process_barrier(&mut self, barrier: Barrier) -> StreamExecutorResult<()> { - if self.last_barrier.is_none() { - assert_ne!(barrier.epoch.prev, 0, "lookup requires prev epoch != 0"); - - // This is the first barrier, and we need to take special care of it. - // - // **Case 1: Lookup after Chain** - // - // In this case, we have the full state already on shared storage, so we must set prev - // epoch = 0, and only one lookup path will work. Otherwise, the result might be - // duplicated. - // - // **Case 2: Lookup after Arrange** - // - // The lookup is created by delta join plan without index, and prev epoch can't have any - // data. Therefore, it is also okay to simply set prev epoch to 0. - - self.last_barrier = Some(Barrier { - epoch: EpochPair { - prev: 0, - curr: barrier.epoch.curr, - }, - ..barrier - }); - - self.arrangement.state_table.init_epoch(barrier.epoch); - return Ok(()); - } else { - // there is no write operation on the arrangement table by the lookup executor, so here - // the `state_table::commit(epoch)` just means the data in the epoch will be visible by - // the lookup executor - // TODO(st1page): maybe we should not use state table here. - - self.arrangement - .state_table - .commit_no_data_expected(barrier.epoch); - - self.last_barrier = Some(barrier) - } - + self.last_barrier = Some(barrier); Ok(()) } @@ -373,12 +346,8 @@ impl LookupExecutor { async fn lookup_one_row( &mut self, stream_row: &RowRef<'_>, + epoch_pair: EpochPair, ) -> StreamExecutorResult> { - // fast-path for empty look-ups. - if self.arrangement.state_table.epoch() == 0 { - return Ok(vec![]); - } - // stream_row is the row from stream side, we need to transform into the correct order of // the arrangement side. let lookup_row = stream_row @@ -396,23 +365,31 @@ impl LookupExecutor { let all_data_iter = match self.arrangement.use_current_epoch { true => { self.arrangement - .state_table - .iter_with_pk_prefix(&lookup_row) + .storage_table + .batch_iter_with_pk_bounds( + HummockReadEpoch::NoWait(epoch_pair.curr), + &lookup_row, + .., + false, + ) .await? } false => { - unreachable!("iter over prev epoch is not deprecated") - // self.arrangement - // .state_table - // .iter_prev_epoch_with_pk_prefix(&lookup_row) - // .await? + self.arrangement + .storage_table + .batch_iter_with_pk_bounds( + HummockReadEpoch::NoWait(epoch_pair.prev), + &lookup_row, + .., + false, + ) + .await? } }; pin_mut!(all_data_iter); - while let Some(inner) = all_data_iter.next().await { + while let Some(row) = all_data_iter.next_row().await? { // Only need value (include storage pk). - let row = inner.unwrap(); all_rows.push(row); } } diff --git a/src/stream/src/executor/lookup/sides.rs b/src/stream/src/executor/lookup/sides.rs index 2df1a174765c2..e4ab39327c53c 100644 --- a/src/stream/src/executor/lookup/sides.rs +++ b/src/stream/src/executor/lookup/sides.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,9 +20,9 @@ use risingwave_common::array::StreamChunk; use risingwave_common::catalog::ColumnDesc; use risingwave_common::types::DataType; use risingwave_common::util::sort_util::OrderPair; +use risingwave_storage::table::batch_table::storage_table::StorageTable; use risingwave_storage::StateStore; -use crate::common::table::state_table::StateTable; use crate::executor::error::StreamExecutorError; use crate::executor::{Barrier, Executor, Message, MessageStream}; @@ -72,7 +72,7 @@ pub(crate) struct ArrangeJoinSide { /// Whether to join with the arrangement of the current epoch pub use_current_epoch: bool, - pub state_table: StateTable, + pub storage_table: StorageTable, } /// Message from the `arrange_join_stream`. diff --git a/src/stream/src/executor/lookup/tests.rs b/src/stream/src/executor/lookup/tests.rs index 7d4a8e7ac2e87..3b95b324db69b 100644 --- a/src/stream/src/executor/lookup/tests.rs +++ b/src/stream/src/executor/lookup/tests.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,9 +24,8 @@ use risingwave_common::catalog::{ColumnDesc, ColumnId, Field, Schema, TableId}; use risingwave_common::types::DataType; use risingwave_common::util::sort_util::{OrderPair, OrderType}; use risingwave_storage::memory::MemoryStateStore; -use risingwave_storage::StateStore; +use risingwave_storage::table::batch_table::storage_table::StorageTable; -use crate::common::table::state_table::StateTable; use crate::executor::lookup::impl_::LookupExecutorParams; use crate::executor::lookup::LookupExecutor; use crate::executor::test_utils::*; @@ -210,22 +209,6 @@ fn check_chunk_eq(chunk1: &StreamChunk, chunk2: &StreamChunk) { assert_eq!(format!("{:?}", chunk1), format!("{:?}", chunk2)); } -async fn build_state_table_helper( - s: S, - table_id: TableId, - columns: Vec, - order_types: Vec, - pk_indices: Vec, -) -> StateTable { - StateTable::new_without_distribution( - s, - table_id, - columns, - order_types.iter().map(|pair| pair.order_type).collect_vec(), - pk_indices, - ) - .await -} #[tokio::test] async fn test_lookup_this_epoch() { // TODO: memory state store doesn't support read epoch yet, so it is possible that this test @@ -250,14 +233,17 @@ async fn test_lookup_this_epoch() { Field::with_name(DataType::Int64, "rowid_column"), Field::with_name(DataType::Int64, "join_column"), ]), - state_table: build_state_table_helper( + storage_table: StorageTable::for_test( store.clone(), table_id, arrangement_col_descs(), - arrangement_col_arrange_rules(), + arrangement_col_arrange_rules() + .iter() + .map(|x| x.order_type) + .collect_vec(), vec![1, 0], - ) - .await, + vec![0, 1], + ), watermark_epoch: Arc::new(AtomicU64::new(0)), chunk_size: 1024, })); @@ -318,14 +304,17 @@ async fn test_lookup_last_epoch() { Field::with_name(DataType::Int64, "join_column"), Field::with_name(DataType::Int64, "rowid_column"), ]), - state_table: build_state_table_helper( + storage_table: StorageTable::for_test( store.clone(), table_id, arrangement_col_descs(), - arrangement_col_arrange_rules(), + arrangement_col_arrange_rules() + .iter() + .map(|x| x.order_type) + .collect_vec(), vec![1, 0], - ) - .await, + vec![0, 1], + ), watermark_epoch: Arc::new(AtomicU64::new(0)), chunk_size: 1024, })); diff --git a/src/stream/src/executor/lookup_union.rs b/src/stream/src/executor/lookup_union.rs index c0077af4156aa..a1be7e90b3984 100644 --- a/src/stream/src/executor/lookup_union.rs +++ b/src/stream/src/executor/lookup_union.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/managed_state/join/join_entry_state.rs b/src/stream/src/executor/managed_state/join/join_entry_state.rs index f50a33eeec29f..52d8dac3083e0 100644 --- a/src/stream/src/executor/managed_state/join/join_entry_state.rs +++ b/src/stream/src/executor/managed_state/join/join_entry_state.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -76,6 +76,7 @@ mod tests { use itertools::Itertools; use risingwave_common::array::*; use risingwave_common::types::ScalarImpl; + use risingwave_common::util::iter_util::ZipEqDebug; use super::*; @@ -105,7 +106,7 @@ mod tests { for ((_, matched_row), (d1, d2)) in managed_state .values_mut(&col_types) - .zip_eq(col1.iter().zip_eq(col2.iter())) + .zip_eq_debug(col1.iter().zip_eq_debug(col2.iter())) { let matched_row = matched_row.unwrap(); assert_eq!(matched_row.row[0], Some(ScalarImpl::Int64(*d1))); diff --git a/src/stream/src/executor/managed_state/join/mod.rs b/src/stream/src/executor/managed_state/join/mod.rs index 74464425a6b5f..0f34b73dfcd24 100644 --- a/src/stream/src/executor/managed_state/join/mod.rs +++ b/src/stream/src/executor/managed_state/join/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/managed_state/mod.rs b/src/stream/src/executor/managed_state/mod.rs index d203eab3fafa1..c53ad22556f12 100644 --- a/src/stream/src/executor/managed_state/mod.rs +++ b/src/stream/src/executor/managed_state/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/managed_state/top_n/mod.rs b/src/stream/src/executor/managed_state/top_n/mod.rs index c71d687132784..3cc59f0cb8fa5 100644 --- a/src/stream/src/executor/managed_state/top_n/mod.rs +++ b/src/stream/src/executor/managed_state/top_n/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/managed_state/top_n/top_n_state.rs b/src/stream/src/executor/managed_state/top_n/top_n_state.rs index fc8e53fb2e550..b2ba5adb9ad1c 100644 --- a/src/stream/src/executor/managed_state/top_n/top_n_state.rs +++ b/src/stream/src/executor/managed_state/top_n/top_n_state.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/merge.rs b/src/stream/src/executor/merge.rs index 9d21210dfc790..479b3ba16bc61 100644 --- a/src/stream/src/executor/merge.rs +++ b/src/stream/src/executor/merge.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -85,12 +85,12 @@ impl MergeExecutor { } #[cfg(test)] - pub fn for_test(inputs: Vec) -> Self { + pub fn for_test(inputs: Vec, schema: Schema) -> Self { use super::exchange::input::LocalInput; use crate::executor::exchange::input::Input; Self::new( - Schema::default(), + schema, vec![], ActorContext::create(114), 514, @@ -147,6 +147,14 @@ impl MergeExecutor { if let Some(update) = barrier.as_update_merge(self.actor_context.id, self.upstream_fragment_id) { + assert!(update.new_upstream_fragment_id.is_none()); + + // `Watermark` of upstream may become stale after upstream scaling. + select_all + .buffered_watermarks + .values_mut() + .for_each(|buffers| buffers.clear()); + if !update.added_upstream_actor_id.is_empty() { // Create new upstreams receivers. let new_upstreams: Vec<_> = update @@ -193,11 +201,9 @@ impl MergeExecutor { for buffers in select_all.buffered_watermarks.values_mut() { // Call `check_heap` in case the only upstream(s) that does not have // watermark in heap is removed - if let Some(watermark) = buffers.remove_buffer( + buffers.remove_buffer( update.removed_upstream_actor_id.iter().copied().collect(), - ) { - yield Message::Watermark(watermark); - } + ); } } @@ -450,7 +456,7 @@ mod tests { txs.push(tx); rxs.push(rx); } - let merger = MergeExecutor::for_test(rxs); + let merger = MergeExecutor::for_test(rxs, Schema::default()); let mut handles = Vec::with_capacity(CHANNEL_NUMBER); let epochs = (10..1000u64).step_by(10).collect_vec(); @@ -606,6 +612,7 @@ mod tests { (actor_id, upstream_fragment_id) => MergeUpdate { actor_id, upstream_fragment_id, + new_upstream_fragment_id: None, added_upstream_actor_id: vec![new], removed_upstream_actor_id: vec![old], } diff --git a/src/stream/src/executor/mod.rs b/src/stream/src/executor/mod.rs index a270ffd2045d1..ac7f897a391dc 100644 --- a/src/stream/src/executor/mod.rs +++ b/src/stream/src/executor/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -54,6 +54,7 @@ mod barrier_align; pub mod exchange; pub mod monitor; +pub mod agg_common; pub mod aggregation; mod batch_query; mod chain; @@ -126,7 +127,9 @@ use simple::{SimpleExecutor, SimpleExecutorWrapper}; pub use sink::SinkExecutor; pub use sort::SortExecutor; pub use source::*; -pub use top_n::{AppendOnlyTopNExecutor, GroupTopNExecutor, TopNExecutor}; +pub use top_n::{ + AppendOnlyGroupTopNExecutor, AppendOnlyTopNExecutor, GroupTopNExecutor, TopNExecutor, +}; pub use union::UnionExecutor; pub use watermark_filter::WatermarkFilterExecutor; pub use wrapper::WrapperExecutor; diff --git a/src/stream/src/executor/monitor/mod.rs b/src/stream/src/executor/monitor/mod.rs index e20ad22e56ac8..bbbe30fb4c288 100644 --- a/src/stream/src/executor/monitor/mod.rs +++ b/src/stream/src/executor/monitor/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/monitor/streaming_stats.rs b/src/stream/src/executor/monitor/streaming_stats.rs index d4adb895cebe8..13ab83d6486f3 100644 --- a/src/stream/src/executor/monitor/streaming_stats.rs +++ b/src/stream/src/executor/monitor/streaming_stats.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -43,8 +43,10 @@ pub struct StreamingMetrics { pub actor_sampled_deserialize_duration_ns: GenericCounterVec, pub source_output_row_count: GenericCounterVec, pub source_row_per_barrier: GenericCounterVec, - pub exchange_recv_size: GenericCounterVec, + + // Exchange (see also `compute::ExchangeServiceMetrics`) pub exchange_frag_recv_size: GenericCounterVec, + pub stream_total_mem_usage: IntGauge, // Streaming Join pub join_lookup_miss_count: GenericCounterVec, @@ -132,14 +134,6 @@ impl StreamingMetrics { ) .unwrap(); - let exchange_recv_size = register_int_counter_vec_with_registry!( - "stream_exchange_recv_size", - "Total size of messages that have been received from upstream Actor", - &["up_actor_id", "down_actor_id"], - registry - ) - .unwrap(); - let exchange_frag_recv_size = register_int_counter_vec_with_registry!( "stream_exchange_frag_recv_size", "Total size of messages that have been received from upstream Fragment", @@ -148,6 +142,13 @@ impl StreamingMetrics { ) .unwrap(); + let stream_total_mem_usage = register_int_gauge_with_registry!( + "stream_total_mem_usage", + "The memory allocated by streaming jobs, get from TaskLocalAlloc", + registry + ) + .unwrap(); + let actor_fast_poll_duration = register_gauge_vec_with_registry!( "stream_actor_fast_poll_duration", "tokio's metrics", @@ -451,8 +452,8 @@ impl StreamingMetrics { actor_sampled_deserialize_duration_ns, source_output_row_count, source_row_per_barrier, - exchange_recv_size, exchange_frag_recv_size, + stream_total_mem_usage, join_lookup_miss_count, join_total_lookup_count, join_insert_cache_miss_count, diff --git a/src/stream/src/executor/mview/materialize.rs b/src/stream/src/executor/mview/materialize.rs index 32ec5c2522cdd..4f95450ff35f6 100644 --- a/src/stream/src/executor/mview/materialize.rs +++ b/src/stream/src/executor/mview/materialize.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ use risingwave_common::catalog::{ColumnDesc, ColumnId, Schema, TableId}; use risingwave_common::row::{CompactedRow, RowDeserializer}; use risingwave_common::types::DataType; use risingwave_common::util::chunk_coalesce::DataChunkBuilder; +use risingwave_common::util::iter_util::{ZipEqDebug, ZipEqFast}; use risingwave_common::util::ordered::OrderedRowSerde; use risingwave_common::util::sort_util::OrderPair; use risingwave_pb::catalog::Table; @@ -112,7 +113,7 @@ impl MaterializeExecutor { let schema = input.schema().clone(); let columns = column_ids .into_iter() - .zip_eq(schema.fields.iter()) + .zip_eq_fast(schema.fields.iter()) .map(|(column_id, field)| ColumnDesc::unnamed(column_id, field.data_type())) .collect_vec(); @@ -159,9 +160,7 @@ impl MaterializeExecutor { for msg in input { let msg = msg?; yield match msg { - Message::Watermark(_) => { - todo!("https://github.com/risingwavelabs/risingwave/issues/6042") - } + Message::Watermark(w) => Message::Watermark(w), Message::Chunk(chunk) => { match self.handle_pk_conflict { true => { @@ -294,7 +293,7 @@ impl MaterializeBuffer { let key_chunk = data_chunk.reorder_columns(pk_indices); key_chunk .rows_with_holes() - .zip_eq(pks.iter_mut()) + .zip_eq_fast(pks.iter_mut()) .for_each(|(r, vnode_and_pk)| { if let Some(r) = r { pk_serde.serialize(r, vnode_and_pk); @@ -306,7 +305,7 @@ impl MaterializeBuffer { let mut buffer = MaterializeBuffer::new(); match vis { Vis::Bitmap(vis) => { - for ((op, key, value), vis) in izip!(ops, pks, values).zip_eq(vis.iter()) { + for ((op, key, value), vis) in izip!(ops, pks, values).zip_eq_debug(vis.iter()) { if vis { match op { Op::Insert | Op::UpdateInsert => buffer.insert(key, value), diff --git a/src/stream/src/executor/mview/mod.rs b/src/stream/src/executor/mview/mod.rs index 43078a2f99933..15f3e82c5533c 100644 --- a/src/stream/src/executor/mview/mod.rs +++ b/src/stream/src/executor/mview/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/mview/test_utils.rs b/src/stream/src/executor/mview/test_utils.rs index 8e23fda6332c6..df060d1e43a92 100644 --- a/src/stream/src/executor/mview/test_utils.rs +++ b/src/stream/src/executor/mview/test_utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/now.rs b/src/stream/src/executor/now.rs index aefa4dcb7d1d9..3476c8433676c 100644 --- a/src/stream/src/executor/now.rs +++ b/src/stream/src/executor/now.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -99,24 +99,27 @@ impl NowExecutor { let time_millis = Epoch::from(barrier.epoch.curr).as_unix_millis(); let timestamp = Some(ScalarImpl::Int64((time_millis * 1000) as i64)); - let data_chunk = DataChunk::from_rows( - &if last_timestamp.is_some() { - vec![ + let stream_chunk = if last_timestamp.is_some() { + let data_chunk = DataChunk::from_rows( + &[ row::once(last_timestamp.to_datum_ref()), row::once(timestamp.to_datum_ref()), - ] - } else { - vec![row::once(timestamp.to_datum_ref())] - }, - &schema.data_types(), - ); - let mut ops = if last_timestamp.is_some() { - vec![Op::Delete] + ], + &schema.data_types(), + ); + let ops = vec![Op::Delete, Op::Insert]; + + StreamChunk::from_parts(ops, data_chunk) } else { - vec![] + let data_chunk = DataChunk::from_rows( + &[row::once(timestamp.to_datum_ref())], + &schema.data_types(), + ); + let ops = vec![Op::Insert]; + + StreamChunk::from_parts(ops, data_chunk) }; - ops.push(Op::Insert); - let stream_chunk = StreamChunk::from_parts(ops, data_chunk); + yield Message::Chunk(stream_chunk); // TODO: depends on "https://github.com/risingwavelabs/risingwave/issues/6042" diff --git a/src/stream/src/executor/project.rs b/src/stream/src/executor/project.rs index 0b0146940eec0..b691ac4e7b71f 100644 --- a/src/stream/src/executor/project.rs +++ b/src/stream/src/executor/project.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -169,8 +169,7 @@ mod tests { use risingwave_common::array::StreamChunk; use risingwave_common::catalog::{Field, Schema}; use risingwave_common::types::DataType; - use risingwave_expr::expr::expr_binary_nonnull::new_binary_expr; - use risingwave_expr::expr::{InputRefExpression, LiteralExpression}; + use risingwave_expr::expr::{new_binary_expr, InputRefExpression, LiteralExpression}; use risingwave_pb::expr::expr_node::Type; use super::super::test_utils::MockSource; diff --git a/src/stream/src/executor/project_set.rs b/src/stream/src/executor/project_set.rs index ed343a1f3182e..2d5c1643b9303 100644 --- a/src/stream/src/executor/project_set.rs +++ b/src/stream/src/executor/project_set.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ use risingwave_common::array::column::Column; use risingwave_common::array::{ArrayBuilder, DataChunk, I64ArrayBuilder, Op, StreamChunk}; use risingwave_common::catalog::{Field, Schema}; use risingwave_common::types::DataType; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_expr::table_function::ProjectSetSelectItem; use super::error::StreamExecutorError; @@ -166,7 +167,7 @@ impl ProjectSetExecutor { projected_row_id_builder.append(Some(i as i64)); } - for (item, builder) in items.into_iter().zip_eq(builders.iter_mut()) { + for (item, builder) in items.into_iter().zip_eq_fast(builders.iter_mut()) { match item { Either::Left(array_ref) => { builder.append_array(&array_ref); @@ -208,8 +209,9 @@ mod tests { use risingwave_common::array::StreamChunk; use risingwave_common::catalog::{Field, Schema}; use risingwave_common::types::DataType; - use risingwave_expr::expr::expr_binary_nonnull::new_binary_expr; - use risingwave_expr::expr::{Expression, InputRefExpression, LiteralExpression}; + use risingwave_expr::expr::{ + new_binary_expr, Expression, InputRefExpression, LiteralExpression, + }; use risingwave_expr::table_function::repeat_tf; use risingwave_pb::expr::expr_node::Type; diff --git a/src/stream/src/executor/rearranged_chain.rs b/src/stream/src/executor/rearranged_chain.rs index fec705fabc9ff..ea17229300c5f 100644 --- a/src/stream/src/executor/rearranged_chain.rs +++ b/src/stream/src/executor/rearranged_chain.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ use super::error::StreamExecutorError; use super::{ expect_first_barrier, Barrier, BoxedExecutor, Executor, ExecutorInfo, Message, MessageStream, }; -use crate::executor::PkIndices; +use crate::executor::{BoxedMessageStream, PkIndices, Watermark}; use crate::task::{ActorId, CreateMviewProgress}; /// `ChainExecutor` is an executor that enables synchronization between the existing stream and @@ -49,35 +49,47 @@ pub struct RearrangedChainExecutor { info: ExecutorInfo, } -fn mapping(upstream_indices: &[usize], msg: Message) -> Message { +fn mapping(upstream_indices: &[usize], msg: Message) -> Option { match msg { - Message::Watermark(_) => { - todo!("https://github.com/risingwavelabs/risingwave/issues/6042") + Message::Watermark(watermark) => { + mapping_watermark(watermark, upstream_indices).map(Message::Watermark) } - Message::Chunk(chunk) => { let (ops, columns, visibility) = chunk.into_inner(); let mapped_columns = upstream_indices .iter() .map(|&i| columns[i].clone()) .collect(); - Message::Chunk(StreamChunk::new(ops, mapped_columns, visibility)) + Some(Message::Chunk(StreamChunk::new( + ops, + mapped_columns, + visibility, + ))) } - _ => msg, + Message::Barrier(_) => Some(msg), } } +fn mapping_watermark(watermark: Watermark, upstream_indices: &[usize]) -> Option { + upstream_indices + .iter() + .position(|&idx| idx == watermark.col_idx) + .map(|idx| watermark.with_idx(idx)) +} + #[derive(Debug)] enum RearrangedMessage { RearrangedBarrier(Barrier), PhantomBarrier(Barrier), Chunk(StreamChunk), + // This watermark is just a place holder. + Watermark, } impl RearrangedMessage { fn phantom_into(self) -> Option { match self { - RearrangedMessage::RearrangedBarrier(_) => None, + RearrangedMessage::RearrangedBarrier(_) | RearrangedMessage::Watermark => None, RearrangedMessage::PhantomBarrier(barrier) => Message::Barrier(barrier).into(), RearrangedMessage::Chunk(chunk) => Message::Chunk(chunk).into(), } @@ -87,10 +99,7 @@ impl RearrangedMessage { impl RearrangedMessage { fn rearranged_from(msg: Message) -> Self { match msg { - Message::Watermark(_) => { - todo!("https://github.com/risingwavelabs/risingwave/issues/6042") - } - + Message::Watermark(_) => RearrangedMessage::Watermark, Message::Chunk(chunk) => RearrangedMessage::Chunk(chunk), Message::Barrier(barrier) => RearrangedMessage::RearrangedBarrier(barrier), } @@ -98,10 +107,7 @@ impl RearrangedMessage { fn phantom_from(msg: Message) -> Self { match msg { - Message::Watermark(_) => { - todo!("https://github.com/risingwavelabs/risingwave/issues/6042") - } - + Message::Watermark(_) => RearrangedMessage::Watermark, Message::Chunk(chunk) => RearrangedMessage::Chunk(chunk), Message::Barrier(barrier) => RearrangedMessage::PhantomBarrier(barrier), } @@ -135,10 +141,11 @@ impl RearrangedChainExecutor { async fn execute_inner(mut self) { // 0. Project the upstream with `upstream_indices`. let upstream_indices = self.upstream_indices.clone(); - let mut upstream = self - .upstream - .execute() - .map(move |result| result.map(|msg| mapping(&upstream_indices, msg))); + + let mut upstream = Box::pin(Self::mapping_stream( + self.upstream.execute(), + &upstream_indices, + )); // 1. Poll the upstream to get the first barrier. let first_barrier = expect_first_barrier(&mut upstream).await?; @@ -190,6 +197,8 @@ impl RearrangedChainExecutor { let mut last_rearranged_epoch = create_epoch; let mut stop_rearrange_tx = Some(stop_rearrange_tx); + let mut processed_rows: u64 = 0; + // 6. Consume the merged `rearranged` stream. #[for_await] for rearranged_msg in &mut rearranged { @@ -201,8 +210,11 @@ impl RearrangedChainExecutor { // consumed the whole snapshot and be on the upstream now. RearrangedMessage::PhantomBarrier(barrier) => { // Update the progress since we've consumed all chunks before this phantom. - self.progress - .update(last_rearranged_epoch.curr, barrier.epoch.curr); + self.progress.update( + last_rearranged_epoch.curr, + barrier.epoch.curr, + processed_rows, + ); if barrier.epoch.curr >= last_rearranged_epoch.curr { // Stop the background rearrangement task. @@ -218,7 +230,13 @@ impl RearrangedChainExecutor { last_rearranged_epoch = barrier.epoch; yield Message::Barrier(barrier); } - RearrangedMessage::Chunk(chunk) => yield Message::Chunk(chunk), + RearrangedMessage::Chunk(chunk) => { + processed_rows += chunk.cardinality() as u64; + yield Message::Chunk(chunk) + } + RearrangedMessage::Watermark => { + // Ignore watermark during snapshot consumption. + } } } @@ -310,6 +328,18 @@ impl RearrangedChainExecutor { } } } + + #[expect(clippy::needless_lifetimes, reason = "code generated by try_stream")] + #[try_stream(ok = Message, error = StreamExecutorError)] + async fn mapping_stream(stream: BoxedMessageStream, upstream_indices: &[usize]) { + #[for_await] + for msg in stream { + match mapping(upstream_indices, msg?) { + Some(msg) => yield msg, + None => continue, + } + } + } } impl Executor for RearrangedChainExecutor { diff --git a/src/stream/src/executor/receiver.rs b/src/stream/src/executor/receiver.rs index a264d5e28944c..364c738f42c29 100644 --- a/src/stream/src/executor/receiver.rs +++ b/src/stream/src/executor/receiver.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -308,6 +308,7 @@ mod tests { (actor_id, upstream_fragment_id) => MergeUpdate { actor_id, upstream_fragment_id, + new_upstream_fragment_id: None, added_upstream_actor_id: vec![new], removed_upstream_actor_id: vec![old], } diff --git a/src/stream/src/executor/row_id_gen.rs b/src/stream/src/executor/row_id_gen.rs index 612eea12f7000..8489893d435d6 100644 --- a/src/stream/src/executor/row_id_gen.rs +++ b/src/stream/src/executor/row_id_gen.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,13 +14,13 @@ use futures::StreamExt; use futures_async_stream::try_stream; -use itertools::Itertools; use risingwave_common::array::column::Column; use risingwave_common::array::stream_chunk::Ops; use risingwave_common::array::{ArrayBuilder, I64ArrayBuilder, Op, StreamChunk}; use risingwave_common::buffer::Bitmap; use risingwave_common::catalog::Schema; -use risingwave_common::util::epoch::UNIX_SINGULARITY_DATE_EPOCH; +use risingwave_common::util::epoch::UNIX_RISINGWAVE_DATE_EPOCH; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_source::row_id::RowIdGenerator; use super::{ @@ -71,7 +71,7 @@ impl RowIdGenExecutor { // TODO: We may generate row id for each vnode in the future instead of using the first // vnode. let vnode_id = vnodes.next_set_bit(0).unwrap() as u32; - RowIdGenerator::with_epoch(vnode_id, *UNIX_SINGULARITY_DATE_EPOCH) + RowIdGenerator::with_epoch(vnode_id, *UNIX_RISINGWAVE_DATE_EPOCH) } /// Generate a row ID column according to ops. @@ -79,7 +79,7 @@ impl RowIdGenExecutor { let len = column.array_ref().len(); let mut builder = I64ArrayBuilder::new(len); - for (datum, op) in column.array_ref().iter().zip_eq(ops) { + for (datum, op) in column.array_ref().iter().zip_eq_fast(ops) { // Only refill row_id for insert operation. match op { Op::Insert => builder.append(Some(self.row_id_generator.next().await)), diff --git a/src/stream/src/executor/simple.rs b/src/stream/src/executor/simple.rs index e26be7df7d3e7..ccbdbabecba2f 100644 --- a/src/stream/src/executor/simple.rs +++ b/src/stream/src/executor/simple.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/sink.rs b/src/stream/src/executor/sink.rs index 20676fb3a7402..2f32aede5c7a0 100644 --- a/src/stream/src/executor/sink.rs +++ b/src/stream/src/executor/sink.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,7 +17,12 @@ use std::time::Instant; use futures::StreamExt; use futures_async_stream::try_stream; +use risingwave_common::array::{Op, StreamChunk}; use risingwave_common::catalog::Schema; +use risingwave_common::row::Row; +use risingwave_common::types::DataType; +use risingwave_common::util::chunk_coalesce::DataChunkBuilder; +use risingwave_connector::sink::catalog::SinkType; use risingwave_connector::sink::{Sink, SinkConfig, SinkImpl}; use risingwave_connector::ConnectorParams; @@ -34,6 +39,7 @@ pub struct SinkExecutor { connector_params: ConnectorParams, schema: Schema, pk_indices: Vec, + sink_type: SinkType, } async fn build_sink( @@ -41,13 +47,29 @@ async fn build_sink( schema: Schema, pk_indices: PkIndices, connector_params: ConnectorParams, + sink_type: SinkType, ) -> StreamExecutorResult> { Ok(Box::new( - SinkImpl::new(config, schema, pk_indices, connector_params).await?, + SinkImpl::new(config, schema, pk_indices, connector_params, sink_type).await?, )) } +// Drop all the UPDATE/DELETE messages in this chunk. +fn force_append_only(chunk: StreamChunk, data_types: Vec) -> StreamChunk { + let mut builder = DataChunkBuilder::new(data_types, chunk.cardinality() + 1); + for (op, row_ref) in chunk.rows() { + if op == Op::Insert { + let finished = builder.append_one_row(row_ref.into_owned_row()); + assert!(finished.is_none()); + } + } + let data_chunk = builder.consume_all().unwrap(); + let ops = vec![Op::Insert; data_chunk.capacity()]; + StreamChunk::from_parts(ops, data_chunk) +} + impl SinkExecutor { + #[allow(clippy::too_many_arguments)] pub fn new( materialize_executor: BoxedExecutor, metrics: Arc, @@ -56,6 +78,7 @@ impl SinkExecutor { connector_params: ConnectorParams, schema: Schema, pk_indices: Vec, + sink_type: SinkType, ) -> Self { Self { input: materialize_executor, @@ -65,6 +88,7 @@ impl SinkExecutor { pk_indices, schema, connector_params, + sink_type, } } @@ -72,15 +96,17 @@ impl SinkExecutor { async fn execute_inner(self) { // the flag is required because kafka transaction requires at least one // message, so we should abort the transaction if the flag is true. - let mut empty_epoch_flag = true; + let mut empty_checkpoint_flag = true; let mut in_transaction = false; let mut epoch = 0; + let data_types = self.schema.data_types(); let mut sink = build_sink( self.config.clone(), self.schema, self.pk_indices, self.connector_params, + self.sink_type, ) .await?; @@ -89,46 +115,53 @@ impl SinkExecutor { #[for_await] for msg in input { match msg? { - Message::Watermark(_) => { - todo!("https://github.com/risingwavelabs/risingwave/issues/6042") - } + Message::Watermark(w) => yield Message::Watermark(w), Message::Chunk(chunk) => { if !in_transaction { sink.begin_epoch(epoch).await?; in_transaction = true; } - let visible_chunk = chunk.clone().compact(); + let visible_chunk = if self.sink_type == SinkType::ForceAppendOnly { + // Force append-only by dropping UPDATE/DELETE messages. We do this when the + // user forces the sink to be append-only while it is actually not based on + // the frontend derivation result. + force_append_only(chunk.clone(), data_types.clone()) + } else { + chunk.clone().compact() + }; if let Err(e) = sink.write_batch(visible_chunk).await { sink.abort().await?; return Err(e.into()); } - empty_epoch_flag = false; + empty_checkpoint_flag = false; yield Message::Chunk(chunk); } Message::Barrier(barrier) => { - if in_transaction { - if empty_epoch_flag { - sink.abort().await?; - tracing::debug!( - "transaction abort due to empty epoch, epoch: {:?}", - epoch - ); - } else { - let start_time = Instant::now(); - sink.commit().await?; - self.metrics - .sink_commit_duration - .with_label_values(&[ - self.identity.as_str(), - self.config.get_connector(), - ]) - .observe(start_time.elapsed().as_millis() as f64); + if barrier.checkpoint { + if in_transaction { + if empty_checkpoint_flag { + sink.abort().await?; + tracing::debug!( + "transaction abort due to empty epoch, epoch: {:?}", + epoch + ); + } else { + let start_time = Instant::now(); + sink.commit().await?; + self.metrics + .sink_commit_duration + .with_label_values(&[ + self.identity.as_str(), + self.config.get_connector(), + ]) + .observe(start_time.elapsed().as_millis() as f64); + } } + in_transaction = false; + empty_checkpoint_flag = true; } - in_transaction = false; - empty_epoch_flag = true; epoch = barrier.epoch.curr; yield Message::Barrier(barrier); } @@ -210,6 +243,7 @@ mod test { Default::default(), schema.clone(), pk.clone(), + SinkType::AppendOnly, ); let mut executor = SinkExecutor::execute(Box::new(sink_executor)); @@ -219,4 +253,83 @@ mod test { executor.next().await.unwrap().unwrap(); executor.next().await.unwrap().unwrap(); } + + #[ignore] + #[tokio::test] + async fn test_force_append_only_sink() { + use risingwave_common::array::stream_chunk::StreamChunk; + use risingwave_common::array::StreamChunkTestExt; + use risingwave_common::catalog::Field; + use risingwave_common::types::DataType; + + use crate::executor::Barrier; + + let properties = maplit::hashmap! { + "connector".into() => "console".into(), + "format".into() => "append_only".into(), + "force_append_only".into() => "true".into() + }; + let schema = Schema::new(vec![ + Field::with_name(DataType::Int64, "v1"), + Field::with_name(DataType::Int64, "v2"), + ]); + let pk = vec![]; + + // Mock `child` + let mock = MockSource::with_messages( + schema.clone(), + pk.clone(), + vec![ + Message::Chunk(std::mem::take(&mut StreamChunk::from_pretty( + " I I + + 3 2", + ))), + Message::Barrier(Barrier::new_test_barrier(1)), + Message::Chunk(std::mem::take(&mut StreamChunk::from_pretty( + " I I + U- 3 2 + U+ 3 4 + + 6 5", + ))), + ], + ); + + let config = SinkConfig::from_hashmap(properties).unwrap(); + let sink_executor = SinkExecutor::new( + Box::new(mock), + Arc::new(StreamingMetrics::unused()), + config, + 0, + Default::default(), + schema.clone(), + pk.clone(), + SinkType::ForceAppendOnly, + ); + + let mut executor = SinkExecutor::execute(Box::new(sink_executor)); + + executor.next().await.unwrap().unwrap(); + // let chunk_msg = executor.next().await.unwrap().unwrap(); + // assert_eq!( + // chunk_msg.into_chunk().unwrap(), + // StreamChunk::from_pretty( + // " I I + // + 3 2", + // ) + // ); + + executor.next().await.unwrap().unwrap(); + + executor.next().await.unwrap().unwrap(); + // let chunk_msg = executor.next().await.unwrap().unwrap(); + // assert_eq!( + // chunk_msg.into_chunk().unwrap(), + // StreamChunk::from_pretty( + // " I I + // + 6 5", + // ) + // ); + + executor.next().await.unwrap().unwrap(); + } } diff --git a/src/stream/src/executor/sort.rs b/src/stream/src/executor/sort.rs index 81bbf62c33b00..753945f2a6d0f 100644 --- a/src/stream/src/executor/sort.rs +++ b/src/stream/src/executor/sort.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/sort_buffer.rs b/src/stream/src/executor/sort_buffer.rs index 2c503b923f594..e7d96b8ffdb7f 100644 --- a/src/stream/src/executor/sort_buffer.rs +++ b/src/stream/src/executor/sort_buffer.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -140,36 +140,25 @@ impl SortBuffer { // Only records with timestamp greater than the last watermark will be output, so // records will only be emitted exactly once unless recovery. let start_bound = if let Some(last_watermark) = last_watermark.clone() { - // TODO: `start_bound` is wrong here, only values with `val.0 > last_watermark` - // should be output, but it's hard to represent `OwnedRow::MAX`. A possible - // implementation is introducing `next_unit` on a subset of `ScalarImpl` variants. - // Currently, we can skip some values explicitly. - Bound::Excluded((last_watermark, OwnedRow::empty().into())) + Bound::Excluded(( + // TODO: unsupported type or watermark overflow. Do we have better ways instead + // of unwrap? + last_watermark.successor().unwrap(), + OwnedRow::empty().into(), + )) } else { Bound::Unbounded }; - // TODO: `end_bound` = `Bound::Inclusive((watermark_value + 1, OwnedRow::empty()))`, but - // it's hard to represent now, so we end the loop by an explicit break. - let end_bound = Bound::Unbounded; - - for ((time_col, _), (row, _)) in self.buffer.range((start_bound, end_bound)) { - if let Some(ref last_watermark) = &last_watermark && time_col == last_watermark { - continue; - } - // Only when a record's timestamp is prior to the watermark should it be - // sent to downstream. - if time_col <= watermark_val { - // Add the record to stream chunk data. Note that we retrieve the - // record from a BTreeMap, so data in this chunk should be ordered - // by timestamp and pk. - if let Some(data_chunk) = data_chunk_builder.append_one_row(row) { - // When the chunk size reaches its maximum, we construct a data chunk and - // send it to downstream. - yield data_chunk; - } - } else { - // We have collected all data below watermark. - break; + let end_bound = Bound::Excluded(( + (watermark_val.successor().unwrap()), + OwnedRow::empty().into(), + )); + + for (_, (row, _)) in self.buffer.range((start_bound, end_bound)) { + if let Some(data_chunk) = data_chunk_builder.append_one_row(row) { + // When the chunk size reaches its maximum, we construct a data chunk and + // send it to downstream. + yield data_chunk; } } diff --git a/src/stream/src/executor/source/executor_core.rs b/src/stream/src/executor/source/executor_core.rs new file mode 100644 index 0000000000000..233ecf269a83a --- /dev/null +++ b/src/stream/src/executor/source/executor_core.rs @@ -0,0 +1,71 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; + +use risingwave_common::catalog::{ColumnId, TableId}; +use risingwave_connector::source::{SplitId, SplitImpl}; +use risingwave_source::source_desc::SourceDescBuilder; +use risingwave_storage::StateStore; + +use super::SourceStateTableHandler; + +/// [`StreamSourceCore`] stores the necessary information for the source executor to execute on the +/// external connector. +pub struct StreamSourceCore { + pub(crate) source_id: TableId, + pub(crate) source_name: String, + + pub(crate) column_ids: Vec, + + pub(crate) source_identify: String, + + /// `source_desc_builder` will be taken (`mem::take`) on execution. A `SourceDesc` (currently + /// named `SourceDescV2`) will be constructed and used for execution. + pub(crate) source_desc_builder: Option, + + /// Split info for stream source. A source executor might read data from several splits of + /// external connector. + pub(crate) stream_source_splits: HashMap, + + /// Stores information of the splits. + pub(crate) split_state_store: SourceStateTableHandler, + + /// In-memory cache for the splits. + pub(crate) state_cache: HashMap, +} + +impl StreamSourceCore +where + S: StateStore, +{ + pub fn new( + source_id: TableId, + source_name: String, + column_ids: Vec, + source_desc_builder: SourceDescBuilder, + split_state_store: SourceStateTableHandler, + ) -> Self { + Self { + source_id, + source_name, + column_ids, + source_identify: "Table_".to_string() + &source_id.table_id().to_string(), + source_desc_builder: Some(source_desc_builder), + stream_source_splits: HashMap::new(), + split_state_store, + state_cache: HashMap::new(), + } + } +} diff --git a/src/stream/src/executor/source/fs_source_executor.rs b/src/stream/src/executor/source/fs_source_executor.rs index d497b998b8e28..7120e9d5c349b 100644 --- a/src/stream/src/executor/source/fs_source_executor.rs +++ b/src/stream/src/executor/source/fs_source_executor.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,114 +19,127 @@ use anyhow::anyhow; use either::Either; use futures::StreamExt; use futures_async_stream::try_stream; -use risingwave_common::catalog::{ColumnId, Schema, TableId}; -use risingwave_connector::source::{ConnectorState, SplitId, SplitImpl, SplitMetaData}; -use risingwave_connector::{BoxSourceWithStateStream, StreamChunkWithState}; -use risingwave_source::connector_source::{SourceContext, SourceDescBuilderV2}; -use risingwave_source::fs_connector_source::FsConnectorSource; -use risingwave_source::monitor::SourceMetrics; +use risingwave_common::catalog::Schema; +use risingwave_connector::source::{ + BoxSourceWithStateStream, ConnectorState, SourceInfo, SplitId, SplitImpl, SplitMetaData, + StreamChunkWithState, +}; +use risingwave_source::source_desc::{FsSourceDesc, SourceDescBuilder}; use risingwave_storage::StateStore; use tokio::sync::mpsc::UnboundedReceiver; use tokio::time::Instant; +use super::executor_core::StreamSourceCore; use crate::error::StreamResult; use crate::executor::error::StreamExecutorError; use crate::executor::monitor::StreamingMetrics; use crate::executor::source::reader::SourceReaderStream; -use crate::executor::source::state_table_handler::SourceStateTableHandler; use crate::executor::*; /// [`FsSourceExecutor`] is a streaming source, fir external file systems /// such as s3. pub struct FsSourceExecutor { ctx: ActorContextRef, - source_id: TableId, - source_desc_builder: SourceDescBuilderV2, + identity: String, - column_ids: Vec, schema: Schema, - pk_indices: PkIndices, - /// Identity string - identity: String, + pk_indices: PkIndices, - /// Receiver of barrier channel. - barrier_receiver: Option>, + /// Streaming source for external + stream_source_core: StreamSourceCore, /// Metrics for monitor. metrics: Arc, - source_identify: String, - source_name: String, - - split_state_store: SourceStateTableHandler, - - // state_cache of current epoch - state_cache: HashMap, - - /// just store information about the split that is currently being read - /// because state_cache will is cleared every epoch - stream_source_splits: HashMap, + /// Receiver of barrier channel. + barrier_receiver: Option>, /// Expected barrier latency expected_barrier_latency_ms: u64, } -// epoch 1: actor 1: A, B, C; actor 2: D, E -// epoch 2: actor 1: A, B; actor 2: C, D, actor 3: E, F -// actor needs to know if C has been read - impl FsSourceExecutor { #[allow(clippy::too_many_arguments)] pub fn new( ctx: ActorContextRef, - source_desc_builder: SourceDescBuilderV2, - source_id: TableId, - source_name: String, - state_table: SourceStateTableHandler, - column_ids: Vec, schema: Schema, pk_indices: PkIndices, + stream_source_core: StreamSourceCore, + metrics: Arc, barrier_receiver: UnboundedReceiver, - executor_id: u64, - _operator_id: u64, - _op_info: String, - streaming_metrics: Arc, expected_barrier_latency_ms: u64, + executor_id: u64, ) -> StreamResult { Ok(Self { ctx, - source_id, - source_name, - source_desc_builder, - column_ids, + identity: format!("SourceExecutor {:X}", executor_id), schema, pk_indices, + stream_source_core, + metrics, barrier_receiver: Some(barrier_receiver), - identity: format!("SourceExecutor {:X}", executor_id), - metrics: streaming_metrics, - source_identify: "Table_".to_string() + &source_id.table_id().to_string(), - split_state_store: state_table, - stream_source_splits: HashMap::new(), - state_cache: HashMap::new(), expected_barrier_latency_ms, }) } -} -impl FsSourceExecutor { + async fn build_stream_source_reader( + &mut self, + source_desc: &FsSourceDesc, + state: ConnectorState, + ) -> StreamExecutorResult { + let column_ids = source_desc + .columns + .iter() + .map(|column_desc| column_desc.column_id) + .collect_vec(); + let steam_reader = source_desc + .source + .stream_reader( + state, + column_ids, + source_desc.metrics.clone(), + SourceInfo::new(self.ctx.id, self.stream_source_core.source_id), + ) + .await + .map_err(StreamExecutorError::connector_error)?; + Ok(steam_reader.into_stream()) + } + + async fn apply_split_change( + &mut self, + source_desc: &FsSourceDesc, + stream: &mut SourceReaderStream, + mapping: &HashMap>, + ) -> StreamExecutorResult<()> { + if let Some(target_splits) = mapping.get(&self.ctx.id).cloned() { + if let Some(target_state) = self.get_diff(target_splits).await? { + tracing::info!( + actor_id = self.ctx.id, + state = ?target_state, + "apply split change" + ); + + self.replace_stream_reader_with_target_state(source_desc, stream, target_state) + .await?; + } + } + + Ok(()) + } + // Note: get_diff will modify the state_cache + // rhs can not be None because we do not support split number reduction async fn get_diff(&mut self, rhs: Vec) -> StreamExecutorResult { - // rhs can not be None because we do not support split number reduction - - let all_completed: HashSet = self.split_state_store.get_all_completed().await?; + let core = &mut self.stream_source_core; + let all_completed: HashSet = core.split_state_store.get_all_completed().await?; tracing::debug!(actor = self.ctx.id, all_completed = ?all_completed , "get diff"); let mut target_state: Vec = Vec::new(); let mut no_change_flag = true; for sc in rhs { - if let Some(s) = self.state_cache.get(&sc.id()) { + if let Some(s) = core.state_cache.get(&sc.id()) { let fs = s .as_fs() .unwrap_or_else(|| panic!("split {:?} is not fs", s)); @@ -140,7 +153,7 @@ impl FsSourceExecutor { } else { no_change_flag = false; // write new assigned split to state cache. snapshot is base on cache. - let state = if let Some(recover_state) = self + let state = if let Some(recover_state) = core .split_state_store .try_recover_from_state_store(&sc) .await? @@ -150,7 +163,7 @@ impl FsSourceExecutor { sc }; - self.state_cache + core.state_cache .entry(state.id()) .or_insert_with(|| state.clone()); target_state.push(state); @@ -159,8 +172,38 @@ impl FsSourceExecutor { Ok((!no_change_flag).then_some(target_state)) } - async fn take_snapshot(&mut self, epoch: EpochPair) -> StreamExecutorResult<()> { - let incompleted = self + async fn replace_stream_reader_with_target_state( + &mut self, + source_desc: &FsSourceDesc, + stream: &mut SourceReaderStream, + target_state: Vec, + ) -> StreamExecutorResult<()> { + tracing::info!( + "actor {:?} apply source split change to {:?}", + self.ctx.id, + target_state + ); + + // Replace the source reader with a new one of the new state. + let reader = self + .build_stream_source_reader(source_desc, Some(target_state.clone())) + .await?; + stream.replace_source_stream(reader); + + self.stream_source_core.stream_source_splits = target_state + .into_iter() + .map(|split| (split.id(), split)) + .collect(); + + Ok(()) + } + + async fn take_snapshot_and_clear_cache( + &mut self, + epoch: EpochPair, + ) -> StreamExecutorResult<()> { + let core = &mut self.stream_source_core; + let incompleted = core .state_cache .values() .filter(|split| { @@ -172,7 +215,7 @@ impl FsSourceExecutor { .cloned() .collect_vec(); - let completed = self + let completed = core .state_cache .values() .filter(|split| { @@ -186,37 +229,20 @@ impl FsSourceExecutor { if !incompleted.is_empty() { tracing::debug!(actor_id = self.ctx.id, incompleted = ?incompleted, "take snapshot"); - self.split_state_store.take_snapshot(incompleted).await? + core.split_state_store.take_snapshot(incompleted).await? } if !completed.is_empty() { tracing::debug!(actor_id = self.ctx.id, completed = ?completed, "take snapshot"); - self.split_state_store.set_all_complete(completed).await? + core.split_state_store.set_all_complete(completed).await? } // commit anyway, even if no message saved - self.split_state_store.state_store.commit(epoch).await?; + core.split_state_store.state_store.commit(epoch).await?; + core.state_cache.clear(); Ok(()) } - async fn build_stream_source_reader( - &mut self, - fs_source: &FsConnectorSource, - source_metrics: Arc, - state: ConnectorState, - ) -> StreamExecutorResult { - let steam_reader = fs_source - .stream_reader( - state, - self.column_ids.clone(), - source_metrics.clone(), - SourceContext::new(self.ctx.id, self.source_id), - ) - .await - .map_err(StreamExecutorError::connector_error)?; - Ok(steam_reader.into_stream()) - } - #[try_stream(ok = Message, error = StreamExecutorError)] async fn into_stream(mut self) { let mut barrier_receiver = self.barrier_receiver.take().unwrap(); @@ -228,16 +254,17 @@ impl FsSourceExecutor { StreamExecutorError::from(anyhow!( "failed to receive the first barrier, actor_id: {:?}, source_id: {:?}", self.ctx.id, - self.source_id + self.stream_source_core.source_id )) })?; - let fs_source = self - .source_desc_builder - .build_fs_stream_source() - .map_err(StreamExecutorError::connector_error)?; + let source_desc_builder: SourceDescBuilder = + self.stream_source_core.source_desc_builder.take().unwrap(); - let source_metrics = self.source_desc_builder.metrics(); + let source_desc = source_desc_builder + .build_fs_source_desc() + .await + .map_err(StreamExecutorError::connector_error)?; // If the first barrier is configuration change, then the source executor must be newly // created, and we should start with the paused state. @@ -259,9 +286,15 @@ impl FsSourceExecutor { } } - self.split_state_store.init_epoch(barrier.epoch); + self.stream_source_core + .split_state_store + .init_epoch(barrier.epoch); - let all_completed: HashSet = self.split_state_store.get_all_completed().await?; + let all_completed: HashSet = self + .stream_source_core + .split_state_store + .get_all_completed() + .await?; tracing::debug!(actor = self.ctx.id, all_completed = ?all_completed , "get diff"); let mut boot_state = boot_state @@ -269,9 +302,16 @@ impl FsSourceExecutor { .filter(|split| !all_completed.contains(&split.id())) .collect_vec(); + self.stream_source_core.stream_source_splits = boot_state + .clone() + .into_iter() + .map(|split| (split.id(), split)) + .collect(); + // restore the newest split info for ele in &mut boot_state { if let Some(recover_state) = self + .stream_source_core .split_state_store .try_recover_from_state_store(ele) .await? @@ -280,17 +320,11 @@ impl FsSourceExecutor { } } - self.stream_source_splits = boot_state - .clone() - .into_iter() - .map(|split| (split.id(), split)) - .collect(); - let recover_state: ConnectorState = (!boot_state.is_empty()).then_some(boot_state); tracing::info!(actor_id = self.ctx.id, state = ?recover_state, "start with state"); let source_chunk_reader = self - .build_stream_source_reader(&fs_source, source_metrics.clone(), recover_state) + .build_stream_source_reader(&source_desc, recover_state) .stack_trace("fs_source_start_reader") .await?; @@ -307,6 +341,7 @@ impl FsSourceExecutor { let max_wait_barrier_time_ms = self.expected_barrier_latency_ms as u128 * 5; let mut last_barrier_time = Instant::now(); let mut self_paused = false; + let mut metric_row_per_barrier: u64 = 0; while let Some(msg) = stream.next().await { match msg? { // This branch will be preferred. @@ -321,30 +356,29 @@ impl FsSourceExecutor { if let Some(ref mutation) = barrier.mutation.as_deref() { match mutation { Mutation::SourceChangeSplit(actor_splits) => { - self.apply_split_change( - &fs_source, - source_metrics.clone(), - &mut stream, - actor_splits, - ) - .await? + self.apply_split_change(&source_desc, &mut stream, actor_splits) + .await? } Mutation::Pause => stream.pause_source(), Mutation::Resume => stream.resume_source(), Mutation::Update { actor_splits, .. } => { - self.apply_split_change( - &fs_source, - source_metrics.clone(), - &mut stream, - actor_splits, - ) - .await?; + self.apply_split_change(&source_desc, &mut stream, actor_splits) + .await?; } _ => {} } } - self.take_snapshot(epoch).await?; - self.state_cache.clear(); + self.take_snapshot_and_clear_cache(epoch).await?; + + self.metrics + .source_row_per_barrier + .with_label_values(&[ + self.ctx.id.to_string().as_str(), + self.stream_source_core.source_identify.as_ref(), + ]) + .inc_by(metric_row_per_barrier); + metric_row_per_barrier = 0; + yield Message::Barrier(barrier); } @@ -364,20 +398,21 @@ impl FsSourceExecutor { let state: Vec<(SplitId, SplitImpl)> = mapping .iter() .flat_map(|(id, offset)| { - let origin_split = self.stream_source_splits.get(id); + let origin_split = + self.stream_source_core.stream_source_splits.get(id); origin_split.map(|split| (id.clone(), split.update(offset.clone()))) }) .collect_vec(); - self.state_cache.extend(state) + self.stream_source_core.state_cache.extend(state); } self.metrics .source_output_row_count .with_label_values(&[ - self.source_identify.as_str(), - self.source_name.as_str(), + self.stream_source_core.source_identify.as_str(), + self.stream_source_core.source_name.as_ref(), ]) .inc_by(chunk.cardinality() as u64); yield Message::Chunk(chunk); @@ -391,61 +426,6 @@ impl FsSourceExecutor { "source executor exited unexpectedly" ) } - - async fn apply_split_change( - &mut self, - fs_source: &FsConnectorSource, - source_metrics: Arc, - stream: &mut SourceReaderStream, - mapping: &HashMap>, - ) -> StreamExecutorResult<()> { - if let Some(target_splits) = mapping.get(&self.ctx.id).cloned() { - if let Some(target_state) = self.get_diff(target_splits).await? { - tracing::info!( - actor_id = self.ctx.id, - state = ?target_state, - "apply split change" - ); - - self.replace_stream_reader_with_target_state( - fs_source, - source_metrics, - stream, - target_state, - ) - .await?; - } - } - - Ok(()) - } - - async fn replace_stream_reader_with_target_state( - &mut self, - fs_source: &FsConnectorSource, - source_metrics: Arc, - stream: &mut SourceReaderStream, - target_state: Vec, - ) -> StreamExecutorResult<()> { - tracing::info!( - "actor {:?} apply source split change to {:?}", - self.ctx.id, - target_state - ); - - // Replace the source reader with a new one of the new state. - let reader = self - .build_stream_source_reader(fs_source, source_metrics, Some(target_state.clone())) - .await?; - stream.replace_source_stream(reader); - - self.stream_source_splits = target_state - .into_iter() - .map(|split| (split.id(), split)) - .collect(); - - Ok(()) - } } impl Executor for FsSourceExecutor { @@ -468,9 +448,9 @@ impl Executor for FsSourceExecutor { impl Debug for FsSourceExecutor { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("SourceExecutor") - .field("source_id", &self.source_id) - .field("column_ids", &self.column_ids) + f.debug_struct("FsSourceExecutor") + .field("source_id", &self.stream_source_core.source_id) + .field("column_ids", &self.stream_source_core.column_ids) .field("pk_indices", &self.pk_indices) .finish() } diff --git a/src/stream/src/executor/source/mod.rs b/src/stream/src/executor/source/mod.rs index 1c3057bd6e269..1ca31c79696da 100644 --- a/src/stream/src/executor/source/mod.rs +++ b/src/stream/src/executor/source/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,10 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +pub mod executor_core; +pub use executor_core::StreamSourceCore; mod fs_source_executor; pub use fs_source_executor::*; -pub mod source_executor_v2; +pub mod source_executor; mod reader; pub mod state_table_handler; diff --git a/src/stream/src/executor/source/reader.rs b/src/stream/src/executor/source/reader.rs index d1cf1a8080344..b801b853eb0eb 100644 --- a/src/stream/src/executor/source/reader.rs +++ b/src/stream/src/executor/source/reader.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ use futures::stream::{select_with_strategy, BoxStream, PollNext, SelectWithStrat use futures::{Stream, StreamExt, TryStreamExt}; use futures_async_stream::try_stream; use risingwave_common::bail; -use risingwave_connector::{BoxSourceWithStateStream, StreamChunkWithState}; +use risingwave_connector::source::{BoxSourceWithStateStream, StreamChunkWithState}; use tokio::sync::mpsc::UnboundedReceiver; use crate::executor::error::{StreamExecutorError, StreamExecutorResult}; @@ -145,7 +145,7 @@ mod tests { use assert_matches::assert_matches; use futures::{pin_mut, FutureExt}; use risingwave_common::array::StreamChunk; - use risingwave_source::TableSource; + use risingwave_source::TableDmlHandle; use tokio::sync::mpsc; use super::*; @@ -154,12 +154,8 @@ mod tests { async fn test_pause_and_resume() { let (barrier_tx, barrier_rx) = mpsc::unbounded_channel(); - let table_source = TableSource::new(vec![]); - let source_stream = table_source - .stream_reader(vec![]) - .await - .unwrap() - .into_stream(); + let table_dml_handle = TableDmlHandle::new(vec![]); + let source_stream = table_dml_handle.stream_reader().into_stream(); let stream = SourceReaderStream::new(barrier_rx, source_stream); pin_mut!(stream); @@ -175,7 +171,10 @@ mod tests { } // Write a chunk, and we should receive it. - table_source.write_chunk(StreamChunk::default()).unwrap(); + table_dml_handle + .write_chunk(StreamChunk::default()) + .await + .unwrap(); assert_matches!(next!().unwrap(), Either::Right(_)); // Write a barrier, and we should receive it. barrier_tx.send(Barrier::new_test_barrier(1)).unwrap(); @@ -187,7 +186,10 @@ mod tests { // Write a barrier. barrier_tx.send(Barrier::new_test_barrier(2)).unwrap(); // Then write a chunk. - table_source.write_chunk(StreamChunk::default()).unwrap(); + table_dml_handle + .write_chunk(StreamChunk::default()) + .await + .unwrap(); // We should receive the barrier. assert_matches!(next!().unwrap(), Either::Left(_)); diff --git a/src/stream/src/executor/source/source_executor_v2.rs b/src/stream/src/executor/source/source_executor.rs similarity index 89% rename from src/stream/src/executor/source/source_executor_v2.rs rename to src/stream/src/executor/source/source_executor.rs index b1d15d5bcf2c2..d54372bec1b20 100644 --- a/src/stream/src/executor/source/source_executor_v2.rs +++ b/src/stream/src/executor/source/source_executor.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,15 +18,15 @@ use anyhow::anyhow; use either::Either; use futures::StreamExt; use futures_async_stream::try_stream; -use risingwave_common::catalog::{ColumnId, TableId}; -use risingwave_connector::source::{ConnectorState, SplitId, SplitMetaData}; -use risingwave_connector::{BoxSourceWithStateStream, StreamChunkWithState}; -use risingwave_source::connector_source::{SourceContext, SourceDescBuilderV2, SourceDescV2}; +use risingwave_connector::source::{ + BoxSourceWithStateStream, ConnectorState, SourceInfo, SplitMetaData, StreamChunkWithState, +}; +use risingwave_source::source_desc::{SourceDesc, SourceDescBuilder}; use risingwave_storage::StateStore; use tokio::sync::mpsc::UnboundedReceiver; use tokio::time::Instant; -use super::SourceStateTableHandler; +use super::executor_core::StreamSourceCore; use crate::executor::monitor::StreamingMetrics; use crate::executor::source::reader::SourceReaderStream; use crate::executor::*; @@ -35,56 +35,7 @@ use crate::executor::*; /// some latencies in network and cost in meta. const WAIT_BARRIER_MULTIPLE_TIMES: u128 = 5; -/// [`StreamSourceCore`] stores the necessary information for the source executor to execute on the -/// external connector. -pub struct StreamSourceCore { - source_id: TableId, - source_name: String, - - column_ids: Vec, - - source_identify: String, - - /// `source_desc_builder` will be taken (`mem::take`) on execution. A `SourceDesc` (currently - /// named `SourceDescV2`) will be constructed and used for execution. - source_desc_builder: Option, - - /// Split info for stream source. A source executor might read data from several splits of - /// external connector. - stream_source_splits: HashMap, - - /// Stores information of the splits. - split_state_store: SourceStateTableHandler, - - /// In-memory cache for the splits. - state_cache: HashMap, -} - -impl StreamSourceCore -where - S: StateStore, -{ - pub fn new( - source_id: TableId, - source_name: String, - column_ids: Vec, - source_desc_builder: SourceDescBuilderV2, - split_state_store: SourceStateTableHandler, - ) -> Self { - Self { - source_id, - source_name, - column_ids, - source_identify: "Table_".to_string() + &source_id.table_id().to_string(), - source_desc_builder: Some(source_desc_builder), - stream_source_splits: HashMap::new(), - split_state_store, - state_cache: HashMap::new(), - } - } -} - -pub struct SourceExecutorV2 { +pub struct SourceExecutor { ctx: ActorContextRef, identity: String, @@ -106,7 +57,7 @@ pub struct SourceExecutorV2 { expected_barrier_latency_ms: u64, } -impl SourceExecutorV2 { +impl SourceExecutor { #[allow(clippy::too_many_arguments)] pub fn new( ctx: ActorContextRef, @@ -132,7 +83,7 @@ impl SourceExecutorV2 { async fn build_stream_source_reader( &self, - source_desc: &SourceDescV2, + source_desc: &SourceDesc, state: ConnectorState, ) -> StreamExecutorResult { let column_ids = source_desc @@ -140,25 +91,24 @@ impl SourceExecutorV2 { .iter() .map(|column_desc| column_desc.column_id) .collect_vec(); - Ok(source_desc + source_desc .source .stream_reader( state, column_ids, source_desc.metrics.clone(), - SourceContext::new( + SourceInfo::new( self.ctx.id, self.stream_source_core.as_ref().unwrap().source_id, ), ) .await - .map_err(StreamExecutorError::connector_error)? - .into_stream()) + .map_err(StreamExecutorError::connector_error) } async fn apply_split_change( &mut self, - source_desc: &SourceDescV2, + source_desc: &SourceDesc, stream: &mut SourceReaderStream, mapping: &HashMap>, ) -> StreamExecutorResult<()> { @@ -215,7 +165,7 @@ impl SourceExecutorV2 { async fn replace_stream_reader_with_target_state( &mut self, - source_desc: &SourceDescV2, + source_desc: &SourceDesc, stream: &mut SourceReaderStream, target_state: Vec, ) -> StreamExecutorResult<()> { @@ -288,10 +238,8 @@ impl SourceExecutorV2 { let mut core = self.stream_source_core.unwrap(); // Build source description from the builder. - let source_desc = core - .source_desc_builder - .take() - .unwrap() + let source_desc_builder: SourceDescBuilder = core.source_desc_builder.take().unwrap(); + let source_desc = source_desc_builder .build() .await .map_err(StreamExecutorError::connector_error)?; @@ -490,7 +438,7 @@ impl SourceExecutorV2 { } } -impl Executor for SourceExecutorV2 { +impl Executor for SourceExecutor { fn execute(self: Box) -> BoxedMessageStream { if self.stream_source_core.is_some() { self.execute_with_stream_source().boxed() @@ -512,7 +460,7 @@ impl Executor for SourceExecutorV2 { } } -impl Debug for SourceExecutorV2 { +impl Debug for SourceExecutor { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { if let Some(core) = &self.stream_source_core { f.debug_struct("SourceExecutor") @@ -533,7 +481,7 @@ mod tests { use maplit::{convert_args, hashmap}; use risingwave_common::array::StreamChunk; - use risingwave_common::catalog::{Field, Schema, TableId}; + use risingwave_common::catalog::{ColumnId, Field, Schema, TableId}; use risingwave_common::test_prelude::StreamChunkTestExt; use risingwave_common::types::DataType; use risingwave_common::util::sort_util::{OrderPair, OrderType}; @@ -553,20 +501,17 @@ mod tests { async fn test_source_executor() { let table_id = TableId::default(); let schema = Schema { - fields: vec![ - Field::unnamed(DataType::Int64), - Field::with_name(DataType::Int32, "sequence_int"), - ], + fields: vec![Field::with_name(DataType::Int32, "sequence_int")], }; - let row_id_index = Some(0); + let row_id_index = None; let pk_column_ids = vec![0]; let pk_indices = vec![0]; let source_info = StreamSourceInfo { - row_format: ProstRowFormatType::Json as i32, + row_format: ProstRowFormatType::Native as i32, ..Default::default() }; let (barrier_tx, barrier_rx) = unbounded_channel::(); - let column_ids = vec![0, 1].into_iter().map(ColumnId::from).collect(); + let column_ids = vec![0].into_iter().map(ColumnId::from).collect(); // This datagen will generate 3 rows at one time. let properties: HashMap = convert_args!(hashmap!( @@ -599,7 +544,7 @@ mod tests { source_name: MOCK_SOURCE_NAME.to_string(), }; - let executor = SourceExecutorV2::new( + let executor = SourceExecutor::new( ActorContext::create(0), schema, pk_indices, @@ -635,10 +580,10 @@ mod tests { assert_eq!( msg.into_chunk().unwrap(), StreamChunk::from_pretty( - " I i - + . 11 - + . 12 - + . 13" + " i + + 11 + + 12 + + 13" ) ); } @@ -653,7 +598,7 @@ mod tests { let pk_column_ids = vec![0]; let pk_indices = vec![0_usize]; let source_info = StreamSourceInfo { - row_format: ProstRowFormatType::Json as i32, + row_format: ProstRowFormatType::Native as i32, ..Default::default() }; let properties = convert_args!(hashmap!( @@ -691,7 +636,7 @@ mod tests { source_name: MOCK_SOURCE_NAME.to_string(), }; - let executor = SourceExecutorV2::new( + let executor = SourceExecutor::new( ActorContext::create(0), schema, pk_indices, diff --git a/src/stream/src/executor/source/state_table_handler.rs b/src/stream/src/executor/source/state_table_handler.rs index 3d12581f46f72..ba2e201aec155 100644 --- a/src/stream/src/executor/source/state_table_handler.rs +++ b/src/stream/src/executor/source/state_table_handler.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -212,8 +212,8 @@ pub fn default_source_internal_table(id: u32) -> ProstTable { ]; ProstTable { id, - schema_id: SchemaId::placeholder() as u32, - database_id: DatabaseId::placeholder() as u32, + schema_id: SchemaId::placeholder().schema_id, + database_id: DatabaseId::placeholder().database_id, name: String::new(), columns, table_type: TableType::Internal as i32, diff --git a/src/stream/src/executor/subtask.rs b/src/stream/src/executor/subtask.rs index 07bde0ef6792c..22a7c44684d3d 100644 --- a/src/stream/src/executor/subtask.rs +++ b/src/stream/src/executor/subtask.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/test_utils.rs b/src/stream/src/executor/test_utils.rs index 66c7e358caf0c..1597f79252291 100644 --- a/src/stream/src/executor/test_utils.rs +++ b/src/stream/src/executor/test_utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -215,14 +215,15 @@ pub mod agg_executor { use crate::common::table::state_table::StateTable; use crate::common::StateTableColumnMapping; + use crate::executor::agg_common::AggExecutorArgs; use crate::executor::aggregation::{AggCall, AggStateStorage}; use crate::executor::{ ActorContextRef, BoxedExecutor, Executor, GlobalSimpleAggExecutor, PkIndices, }; - /// Create state table for the given agg call. + /// Create state storage for the given agg call. /// Should infer the schema in the same way as `LogicalAgg::infer_stream_agg_state`. - pub async fn create_agg_state_table( + pub async fn create_agg_state_storage( store: S, table_id: TableId, agg_call: &AggCall, @@ -330,17 +331,17 @@ pub mod agg_executor { } pub async fn new_boxed_simple_agg_executor( - ctx: ActorContextRef, + actor_ctx: ActorContextRef, store: S, input: BoxedExecutor, agg_calls: Vec, pk_indices: PkIndices, executor_id: u64, ) -> Box { - let mut agg_state_tables = Vec::with_capacity(agg_calls.iter().len()); + let mut storages = Vec::with_capacity(agg_calls.iter().len()); for (idx, agg_call) in agg_calls.iter().enumerate() { - agg_state_tables.push( - create_agg_state_table( + storages.push( + create_agg_state_storage( store.clone(), TableId::new(idx as u32), agg_call, @@ -361,19 +362,23 @@ pub mod agg_executor { ) .await; - Box::new( - GlobalSimpleAggExecutor::new( - ctx, - input, - agg_calls, - agg_state_tables, - result_table, - pk_indices, - executor_id, - 1 << 10, - ) - .unwrap(), - ) + GlobalSimpleAggExecutor::new(AggExecutorArgs { + input, + actor_ctx, + pk_indices, + executor_id, + + extreme_cache_size: 1024, + + agg_calls, + storages, + result_table, + distinct_dedup_tables: Default::default(), + + extra: None, + }) + .unwrap() + .boxed() } } diff --git a/src/stream/src/executor/top_n/group_top_n.rs b/src/stream/src/executor/top_n/group_top_n.rs index a10f355503b5a..2aeffa4fff1f3 100644 --- a/src/stream/src/executor/top_n/group_top_n.rs +++ b/src/stream/src/executor/top_n/group_top_n.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,15 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::ops::{Deref, DerefMut}; use std::sync::Arc; use async_trait::async_trait; -use itertools::Itertools; use risingwave_common::array::{Op, StreamChunk}; use risingwave_common::buffer::Bitmap; use risingwave_common::hash::HashKey; use risingwave_common::row::RowExt; use risingwave_common::util::epoch::EpochPair; +use risingwave_common::util::iter_util::ZipEqDebug; use risingwave_common::util::sort_util::OrderPair; use risingwave_storage::StateStore; @@ -32,7 +33,7 @@ use crate::common::table::state_table::StateTable; use crate::error::StreamResult; use crate::executor::error::StreamExecutorResult; use crate::executor::managed_state::top_n::ManagedTopNState; -use crate::executor::{ActorContextRef, Executor, ExecutorInfo, PkIndices}; +use crate::executor::{ActorContextRef, Executor, ExecutorInfo, PkIndices, Watermark}; use crate::task::AtomicU64Ref; pub type GroupTopNExecutor = @@ -117,7 +118,7 @@ impl InnerGroupTopNExecutorNew info: ExecutorInfo { schema, pk_indices, - identity: format!("TopNExecutorNew {:X}", executor_id), + identity: format!("GroupTopNExecutor {:X}", executor_id), }, offset: offset_and_limit.0, limit: offset_and_limit.1, @@ -139,27 +140,22 @@ impl GroupTopNCache { let cache = ExecutorCache::new(new_unbounded(lru_manager)); Self { data: cache } } +} - fn clear(&mut self) { - self.data.clear() - } - - fn get_mut(&mut self, key: &K) -> Option<&mut TopNCache> { - self.data.get_mut(key) - } +impl Deref for GroupTopNCache { + type Target = ExecutorCache>; - fn contains(&mut self, key: &K) -> bool { - self.data.contains(key) + fn deref(&self) -> &Self::Target { + &self.data } +} - fn insert(&mut self, key: K, value: TopNCache) { - self.data.push(key, value); - } - - fn evict(&mut self) { - self.data.evict() +impl DerefMut for GroupTopNCache { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.data } } + #[async_trait] impl TopNExecutorBase for InnerGroupTopNExecutorNew @@ -172,7 +168,7 @@ where let chunk = chunk.compact(); let keys = K::build(&self.group_by, chunk.data_chunk())?; - for ((op, row_ref), group_cache_key) in chunk.rows().zip_eq(keys.iter()) { + for ((op, row_ref), group_cache_key) in chunk.rows().zip_eq_debug(keys.iter()) { // The pk without group by let pk_row = row_ref.project(&self.storage_key_indices[self.group_by.len()..]); let cache_key = serialize_pk_to_cache_key(pk_row, &self.cache_key_serde); @@ -186,7 +182,7 @@ where self.managed_state .init_topn_cache(Some(group_key), &mut topn_cache) .await?; - self.caches.insert(group_cache_key.clone(), topn_cache); + self.caches.push(group_cache_key.clone(), topn_cache); } let cache = self.caches.get_mut(group_cache_key).unwrap(); @@ -243,6 +239,17 @@ where self.managed_state.state_table.init_epoch(epoch); Ok(()) } + + async fn handle_watermark(&mut self, watermark: Watermark) -> Option { + if watermark.col_idx == self.group_by[0] { + self.managed_state + .state_table + .update_watermark(watermark.val.clone()); + Some(watermark) + } else { + None + } + } } #[cfg(test)] @@ -532,6 +539,7 @@ mod tests { ), ); } + #[tokio::test] async fn test_multi_group_key() { let source = create_source(); diff --git a/src/stream/src/executor/top_n/group_top_n_appendonly.rs b/src/stream/src/executor/top_n/group_top_n_appendonly.rs new file mode 100644 index 0000000000000..ea2141af2d03a --- /dev/null +++ b/src/stream/src/executor/top_n/group_top_n_appendonly.rs @@ -0,0 +1,239 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Copyright 2023 Singularity Data +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use async_trait::async_trait; +use risingwave_common::array::{Op, StreamChunk}; +use risingwave_common::buffer::Bitmap; +use risingwave_common::hash::HashKey; +use risingwave_common::row::{RowDeserializer, RowExt}; +use risingwave_common::util::epoch::EpochPair; +use risingwave_common::util::iter_util::ZipEqDebug; +use risingwave_common::util::sort_util::OrderPair; +use risingwave_storage::StateStore; + +use super::group_top_n::GroupTopNCache; +use super::top_n_cache::AppendOnlyTopNCacheTrait; +use super::utils::*; +use super::TopNCache; +use crate::cache::cache_may_stale; +use crate::common::table::state_table::StateTable; +use crate::error::StreamResult; +use crate::executor::error::StreamExecutorResult; +use crate::executor::managed_state::top_n::ManagedTopNState; +use crate::executor::{ActorContextRef, Executor, ExecutorInfo, PkIndices, Watermark}; +use crate::task::AtomicU64Ref; + +/// If the input contains only append, `AppendOnlyGroupTopNExecutor` does not need +/// to keep all the data records/rows that have been seen. As long as a record +/// is no longer being in the result set, it can be deleted. +pub type AppendOnlyGroupTopNExecutor = + TopNExecutorWrapper>; + +impl + AppendOnlyGroupTopNExecutor +{ + #[allow(clippy::too_many_arguments)] + pub fn new( + input: Box, + ctx: ActorContextRef, + storage_key: Vec, + offset_and_limit: (usize, usize), + order_by: Vec, + executor_id: u64, + group_by: Vec, + state_table: StateTable, + watermark_epoch: AtomicU64Ref, + ) -> StreamResult { + let info = input.info(); + Ok(TopNExecutorWrapper { + input, + ctx, + inner: InnerAppendOnlyGroupTopNExecutorNew::new( + info, + storage_key, + offset_and_limit, + order_by, + executor_id, + group_by, + state_table, + watermark_epoch, + )?, + }) + } +} + +pub struct InnerAppendOnlyGroupTopNExecutorNew { + info: ExecutorInfo, + + /// `LIMIT XXX`. None means no limit. + limit: usize, + + /// `OFFSET XXX`. `0` means no offset. + offset: usize, + + /// The storage key indices of the `AppendOnlyGroupTopNExecutor` + storage_key_indices: PkIndices, + + managed_state: ManagedTopNState, + + /// which column we used to group the data. + group_by: Vec, + + /// group key -> cache for this group + caches: GroupTopNCache, + + /// Used for serializing pk into CacheKey. + cache_key_serde: CacheKeySerde, +} + +impl + InnerAppendOnlyGroupTopNExecutorNew +{ + #[allow(clippy::too_many_arguments)] + pub fn new( + input_info: ExecutorInfo, + storage_key: Vec, + offset_and_limit: (usize, usize), + order_by: Vec, + executor_id: u64, + group_by: Vec, + state_table: StateTable, + lru_manager: AtomicU64Ref, + ) -> StreamResult { + let ExecutorInfo { + pk_indices, schema, .. + } = input_info; + + let cache_key_serde = + create_cache_key_serde(&storage_key, &pk_indices, &schema, &order_by, &group_by); + let managed_state = ManagedTopNState::::new(state_table, cache_key_serde.clone()); + + Ok(Self { + info: ExecutorInfo { + schema, + pk_indices, + identity: format!("AppendOnlyGroupTopNExecutor {:X}", executor_id), + }, + offset: offset_and_limit.0, + limit: offset_and_limit.1, + managed_state, + storage_key_indices: storage_key.into_iter().map(|op| op.column_idx).collect(), + group_by, + caches: GroupTopNCache::new(lru_manager), + cache_key_serde, + }) + } +} +#[async_trait] +impl TopNExecutorBase + for InnerAppendOnlyGroupTopNExecutorNew +where + TopNCache: AppendOnlyTopNCacheTrait, +{ + async fn apply_chunk(&mut self, chunk: StreamChunk) -> StreamExecutorResult { + let mut res_ops = Vec::with_capacity(self.limit); + let mut res_rows = Vec::with_capacity(self.limit); + let chunk = chunk.compact(); + let keys = K::build(&self.group_by, chunk.data_chunk())?; + + let data_types = self.schema().data_types(); + let row_deserializer = RowDeserializer::new(data_types); + + for ((op, row_ref), group_cache_key) in chunk.rows().zip_eq_debug(keys.iter()) { + // The pk without group by + let pk_row = row_ref.project(&self.storage_key_indices[self.group_by.len()..]); + let cache_key = serialize_pk_to_cache_key(pk_row, &self.cache_key_serde); + + let group_key = row_ref.project(&self.group_by); + + // If 'self.caches' does not already have a cache for the current group, create a new + // cache for it and insert it into `self.caches` + if !self.caches.contains(group_cache_key) { + let mut topn_cache = TopNCache::new(self.offset, self.limit); + self.managed_state + .init_topn_cache(Some(group_key), &mut topn_cache) + .await?; + self.caches.push(group_cache_key.clone(), topn_cache); + } + let cache = self.caches.get_mut(group_cache_key).unwrap(); + + debug_assert_eq!(op, Op::Insert); + cache.insert( + cache_key, + row_ref, + &mut res_ops, + &mut res_rows, + &mut self.managed_state, + &row_deserializer, + )?; + } + + generate_output(res_rows, res_ops, self.schema()) + } + + async fn flush_data(&mut self, epoch: EpochPair) -> StreamExecutorResult<()> { + self.managed_state.flush(epoch).await + } + + fn info(&self) -> &ExecutorInfo { + &self.info + } + + fn update_vnode_bitmap(&mut self, vnode_bitmap: Arc) { + let previous_vnode_bitmap = self + .managed_state + .state_table + .update_vnode_bitmap(vnode_bitmap.clone()); + + if cache_may_stale(&previous_vnode_bitmap, &vnode_bitmap) { + self.caches.clear(); + } + } + + fn evict(&mut self) { + self.caches.evict() + } + + async fn init(&mut self, epoch: EpochPair) -> StreamExecutorResult<()> { + self.managed_state.state_table.init_epoch(epoch); + Ok(()) + } + + async fn handle_watermark(&mut self, watermark: Watermark) -> Option { + if watermark.col_idx == self.group_by[0] { + self.managed_state + .state_table + .update_watermark(watermark.val.clone()); + Some(watermark) + } else { + None + } + } +} diff --git a/src/stream/src/executor/top_n/mod.rs b/src/stream/src/executor/top_n/mod.rs index f68a7889b3b17..36225869984bd 100644 --- a/src/stream/src/executor/top_n/mod.rs +++ b/src/stream/src/executor/top_n/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,10 +20,12 @@ use top_n_cache::TopNCacheTrait; // `TopN` variants mod group_top_n; +mod group_top_n_appendonly; mod top_n_appendonly; mod top_n_plain; pub use group_top_n::GroupTopNExecutor; +pub use group_top_n_appendonly::AppendOnlyGroupTopNExecutor; pub use top_n_appendonly::AppendOnlyTopNExecutor; pub use top_n_cache::{CacheKey, TopNCache}; pub use top_n_plain::TopNExecutor; diff --git a/src/stream/src/executor/top_n/top_n_appendonly.rs b/src/stream/src/executor/top_n/top_n_appendonly.rs index 6434f028c778c..0397a286036fb 100644 --- a/src/stream/src/executor/top_n/top_n_appendonly.rs +++ b/src/stream/src/executor/top_n/top_n_appendonly.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ use crate::common::table::state_table::StateTable; use crate::error::StreamResult; use crate::executor::error::StreamExecutorResult; use crate::executor::managed_state::top_n::{ManagedTopNState, NO_GROUP_KEY}; -use crate::executor::{ActorContextRef, Executor, ExecutorInfo, PkIndices}; +use crate::executor::{ActorContextRef, Executor, ExecutorInfo, PkIndices, Watermark}; /// If the input contains only append, `AppendOnlyTopNExecutor` does not need /// to keep all the data records/rows that have been seen. As long as a record @@ -187,6 +187,11 @@ where .init_topn_cache(NO_GROUP_KEY, &mut self.cache) .await } + + async fn handle_watermark(&mut self, _: Watermark) -> Option { + // TODO(yuhao): handle watermark + None + } } #[cfg(test)] diff --git a/src/stream/src/executor/top_n/top_n_cache.rs b/src/stream/src/executor/top_n/top_n_cache.rs index 16b212a8084c7..b500cd8eff90f 100644 --- a/src/stream/src/executor/top_n/top_n_cache.rs +++ b/src/stream/src/executor/top_n/top_n_cache.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/top_n/top_n_plain.rs b/src/stream/src/executor/top_n/top_n_plain.rs index 042e1517c7d7a..fc552d8c20d43 100644 --- a/src/stream/src/executor/top_n/top_n_plain.rs +++ b/src/stream/src/executor/top_n/top_n_plain.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ use crate::common::table::state_table::StateTable; use crate::error::StreamResult; use crate::executor::error::StreamExecutorResult; use crate::executor::managed_state::top_n::{ManagedTopNState, NO_GROUP_KEY}; -use crate::executor::{ActorContextRef, Executor, ExecutorInfo, PkIndices}; +use crate::executor::{ActorContextRef, Executor, ExecutorInfo, PkIndices, Watermark}; /// `TopNExecutor` works with input with modification, it keeps all the data /// records/rows that have been seen, and returns topN records overall. @@ -163,7 +163,7 @@ impl InnerTopNExecutorNew { info: ExecutorInfo { schema, pk_indices, - identity: format!("TopNExecutorNew {:X}", executor_id), + identity: format!("TopNExecutor {:X}", executor_id), }, managed_state, storage_key_indices: storage_key.into_iter().map(|op| op.column_idx).collect(), @@ -227,6 +227,11 @@ where .init_topn_cache(NO_GROUP_KEY, &mut self.cache) .await } + + async fn handle_watermark(&mut self, _: Watermark) -> Option { + // TODO(yuhao): handle watermark + None + } } #[cfg(test)] diff --git a/src/stream/src/executor/top_n/utils.rs b/src/stream/src/executor/top_n/utils.rs index c8ac2b05a7c10..0b5676c90c23f 100644 --- a/src/stream/src/executor/top_n/utils.rs +++ b/src/stream/src/executor/top_n/utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ use super::top_n_cache::CacheKey; use crate::executor::error::{StreamExecutorError, StreamExecutorResult}; use crate::executor::{ expect_first_barrier, ActorContextRef, BoxedExecutor, BoxedMessageStream, Executor, - ExecutorInfo, Message, PkIndicesRef, + ExecutorInfo, Message, PkIndicesRef, Watermark, }; #[async_trait] @@ -68,6 +68,9 @@ pub trait TopNExecutorBase: Send + 'static { fn evict(&mut self) {} async fn init(&mut self, epoch: EpochPair) -> StreamExecutorResult<()>; + + /// Handle incoming watermarks + async fn handle_watermark(&mut self, watermark: Watermark) -> Option; } /// The struct wraps a [`TopNExecutorBase`] @@ -122,8 +125,10 @@ where for msg in input { let msg = msg?; match msg { - Message::Watermark(_) => { - todo!("https://github.com/risingwavelabs/risingwave/issues/6042") + Message::Watermark(watermark) => { + if let Some(output_watermark) = self.inner.handle_watermark(watermark).await { + yield Message::Watermark(output_watermark); + } } Message::Chunk(chunk) => yield Message::Chunk(self.inner.apply_chunk(chunk).await?), Message::Barrier(barrier) => { diff --git a/src/stream/src/executor/union.rs b/src/stream/src/executor/union.rs index 94a2f5d200f78..824a1f18e5702 100644 --- a/src/stream/src/executor/union.rs +++ b/src/stream/src/executor/union.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/watermark/mod.rs b/src/stream/src/executor/watermark/mod.rs index 60462892c8dbb..4d1b4b32beb6c 100644 --- a/src/stream/src/executor/watermark/mod.rs +++ b/src/stream/src/executor/watermark/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -51,6 +51,15 @@ impl BufferedWatermarks { }); } + pub fn clear(&mut self) { + self.first_buffered_watermarks.clear(); + self.other_buffered_watermarks + .values_mut() + .for_each(|staged_watermarks| { + std::mem::take(staged_watermarks); + }); + } + /// Handle a new watermark message. Optionally returns the watermark message to emit and the /// buffer id. pub fn handle_watermark(&mut self, buffer_id: ID, watermark: Watermark) -> Option { diff --git a/src/stream/src/executor/watermark_filter.rs b/src/stream/src/executor/watermark_filter.rs index 7c479feed5273..9ecd56330b392 100644 --- a/src/stream/src/executor/watermark_filter.rs +++ b/src/stream/src/executor/watermark_filter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,8 +22,9 @@ use risingwave_common::hash::VirtualNode; use risingwave_common::row::{OwnedRow, Row}; use risingwave_common::types::{DataType, ScalarImpl}; use risingwave_common::{bail, row}; -use risingwave_expr::expr::expr_binary_nonnull::new_binary_expr; -use risingwave_expr::expr::{BoxedExpression, Expression, InputRefExpression, LiteralExpression}; +use risingwave_expr::expr::{ + new_binary_expr, BoxedExpression, Expression, InputRefExpression, LiteralExpression, +}; use risingwave_expr::Result as ExprResult; use risingwave_pb::expr::expr_node::Type; use risingwave_storage::StateStore; @@ -311,8 +312,8 @@ mod tests { .map(|(id, data_type)| ColumnDesc::unnamed(ColumnId::new(id as i32), data_type.clone())) .collect_vec(); - // TODO: may enable sanity check for watermark filter after we have upsert. - StateTable::new_with_distribution_no_sanity_check( + // TODO: use consistent operations for watermark filter after we have upsert. + StateTable::new_with_distribution_inconsistent_op( mem_state, TableId::new(table_id), column_descs, diff --git a/src/stream/src/executor/wrapper.rs b/src/stream/src/executor/wrapper.rs index 4f7230928a9d7..da406af23b3be 100644 --- a/src/stream/src/executor/wrapper.rs +++ b/src/stream/src/executor/wrapper.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/wrapper/epoch_check.rs b/src/stream/src/executor/wrapper/epoch_check.rs index 79d79f47fb431..3b2975d08366b 100644 --- a/src/stream/src/executor/wrapper/epoch_check.rs +++ b/src/stream/src/executor/wrapper/epoch_check.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/wrapper/schema_check.rs b/src/stream/src/executor/wrapper/schema_check.rs index 6137864f31141..4bc99d3b3c8bb 100644 --- a/src/stream/src/executor/wrapper/schema_check.rs +++ b/src/stream/src/executor/wrapper/schema_check.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/wrapper/trace.rs b/src/stream/src/executor/wrapper/trace.rs index 3ffbdb939cabe..9e9753a24370e 100644 --- a/src/stream/src/executor/wrapper/trace.rs +++ b/src/stream/src/executor/wrapper/trace.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/executor/wrapper/update_check.rs b/src/stream/src/executor/wrapper/update_check.rs index e9ba84696fd17..f14366d55550d 100644 --- a/src/stream/src/executor/wrapper/update_check.rs +++ b/src/stream/src/executor/wrapper/update_check.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/from_proto/agg_common.rs b/src/stream/src/from_proto/agg_common.rs index d36a914370f90..8d8faa3a619be 100644 --- a/src/stream/src/from_proto/agg_common.rs +++ b/src/stream/src/from_proto/agg_common.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use std::convert::TryFrom; use std::sync::Arc; @@ -76,6 +77,7 @@ pub fn build_agg_call_from_prost( order_pairs, append_only, filter, + distinct: agg_call_proto.distinct, }) } @@ -131,3 +133,20 @@ pub async fn build_agg_state_storages_from_proto( result } + +pub async fn build_distinct_dedup_table_from_proto( + dedup_tables: &HashMap, + store: S, + vnodes: Option>, +) -> HashMap> { + if dedup_tables.is_empty() { + return HashMap::new(); + } + futures::future::join_all(dedup_tables.iter().map(|(distinct_col, table_pb)| async { + let table = StateTable::from_table_catalog(table_pb, store.clone(), vnodes.clone()).await; + (*distinct_col as usize, table) + })) + .await + .into_iter() + .collect() +} diff --git a/src/stream/src/from_proto/batch_query.rs b/src/stream/src/from_proto/batch_query.rs index 7b2b982296f1e..891a1e7cbbf97 100644 --- a/src/stream/src/from_proto/batch_query.rs +++ b/src/stream/src/from_proto/batch_query.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/from_proto/chain.rs b/src/stream/src/from_proto/chain.rs index f660bca5c067d..a83c462b4591a 100644 --- a/src/stream/src/from_proto/chain.rs +++ b/src/stream/src/from_proto/chain.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -59,6 +59,17 @@ impl ExecutorBuilder for ChainExecutorBuilder { progress, schema, params.pk_indices, + false, + ) + .boxed(), + ChainType::UpstreamOnly => ChainExecutor::new( + snapshot, + mview, + upstream_indices, + progress, + schema, + params.pk_indices, + true, ) .boxed(), ChainType::Rearrange => RearrangedChainExecutor::new( diff --git a/src/stream/src/from_proto/dml.rs b/src/stream/src/from_proto/dml.rs index a30fcbc4ebd81..97d565f47aa0c 100644 --- a/src/stream/src/from_proto/dml.rs +++ b/src/stream/src/from_proto/dml.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -47,6 +47,7 @@ impl ExecutorBuilder for DmlExecutorBuilder { params.executor_id, params.env.dml_manager_ref(), table_id, + node.table_version_id, column_descs, ))) } diff --git a/src/stream/src/from_proto/dynamic_filter.rs b/src/stream/src/from_proto/dynamic_filter.rs index ace14e0173256..7502bc44038fd 100644 --- a/src/stream/src/from_proto/dynamic_filter.rs +++ b/src/stream/src/from_proto/dynamic_filter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -56,8 +56,8 @@ impl ExecutorBuilder for DynamicFilterExecutorBuilder { ); } - // TODO: enable sanity check for dynamic filter - let state_table_l = StateTable::from_table_catalog_no_sanity_check( + // TODO: use consistent operation for dynamic filter + let state_table_l = StateTable::from_table_catalog_inconsistent_op( node.get_left_table()?, store.clone(), Some(vnodes), @@ -65,7 +65,7 @@ impl ExecutorBuilder for DynamicFilterExecutorBuilder { .await; let state_table_r = - StateTable::from_table_catalog_no_sanity_check(node.get_right_table()?, store, None) + StateTable::from_table_catalog_inconsistent_op(node.get_right_table()?, store, None) .await; Ok(Box::new(DynamicFilterExecutor::new( diff --git a/src/stream/src/from_proto/expand.rs b/src/stream/src/from_proto/expand.rs index 108678a8638f3..b4897684a04c4 100644 --- a/src/stream/src/from_proto/expand.rs +++ b/src/stream/src/from_proto/expand.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/from_proto/filter.rs b/src/stream/src/from_proto/filter.rs index 01a8f909ca92c..32341f1c5ebb1 100644 --- a/src/stream/src/from_proto/filter.rs +++ b/src/stream/src/from_proto/filter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/from_proto/global_simple_agg.rs b/src/stream/src/from_proto/global_simple_agg.rs index 8be3c1c26ecf8..e9147c8b4f3a5 100644 --- a/src/stream/src/from_proto/global_simple_agg.rs +++ b/src/stream/src/from_proto/global_simple_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,9 +16,13 @@ use risingwave_pb::stream_plan::SimpleAggNode; -use super::agg_common::{build_agg_call_from_prost, build_agg_state_storages_from_proto}; +use super::agg_common::{ + build_agg_call_from_prost, build_agg_state_storages_from_proto, + build_distinct_dedup_table_from_proto, +}; use super::*; use crate::common::table::state_table::StateTable; +use crate::executor::agg_common::AggExecutorArgs; use crate::executor::aggregation::AggCall; use crate::executor::GlobalSimpleAggExecutor; @@ -44,18 +48,27 @@ impl ExecutorBuilder for GlobalSimpleAggExecutorBuilder { build_agg_state_storages_from_proto(node.get_agg_call_states(), store.clone(), None) .await; let result_table = - StateTable::from_table_catalog(node.get_result_table().unwrap(), store, None).await; + StateTable::from_table_catalog(node.get_result_table().unwrap(), store.clone(), None) + .await; + let distinct_dedup_tables = + build_distinct_dedup_table_from_proto(node.get_distinct_dedup_tables(), store, None) + .await; - Ok(GlobalSimpleAggExecutor::new( - params.actor_context, + Ok(GlobalSimpleAggExecutor::new(AggExecutorArgs { input, + actor_ctx: params.actor_context, + pk_indices: params.pk_indices, + executor_id: params.executor_id, + + extreme_cache_size: stream.config.developer.unsafe_stream_extreme_cache_size, + agg_calls, storages, result_table, - params.pk_indices, - params.executor_id, - stream.config.developer.unsafe_stream_extreme_cache_size, - )? + distinct_dedup_tables, + + extra: None, + })? .boxed()) } } diff --git a/src/stream/src/from_proto/group_top_n.rs b/src/stream/src/from_proto/group_top_n.rs index b2449697326cb..6d287f2a464e5 100644 --- a/src/stream/src/from_proto/group_top_n.rs +++ b/src/stream/src/from_proto/group_top_n.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/from_proto/group_top_n_appendonly.rs b/src/stream/src/from_proto/group_top_n_appendonly.rs new file mode 100644 index 0000000000000..f7093bd1cdbd6 --- /dev/null +++ b/src/stream/src/from_proto/group_top_n_appendonly.rs @@ -0,0 +1,136 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Copyright 2023 Singularity Data +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use risingwave_common::hash::{HashKey, HashKeyDispatcher}; +use risingwave_common::types::DataType; +use risingwave_common::util::sort_util::OrderPair; +use risingwave_pb::stream_plan::GroupTopNNode; + +use super::*; +use crate::common::table::state_table::StateTable; +use crate::executor::{ActorContextRef, AppendOnlyGroupTopNExecutor}; +use crate::task::AtomicU64Ref; + +pub struct AppendOnlyGroupTopNExecutorBuilder; + +#[async_trait::async_trait] +impl ExecutorBuilder for AppendOnlyGroupTopNExecutorBuilder { + type Node = GroupTopNNode; + + async fn new_boxed_executor( + params: ExecutorParams, + node: &Self::Node, + store: impl StateStore, + stream: &mut LocalStreamManagerCore, + ) -> StreamResult { + let group_by: Vec = node + .get_group_key() + .iter() + .map(|idx| *idx as usize) + .collect(); + let table = node.get_table()?; + let vnodes = params.vnode_bitmap.map(Arc::new); + let state_table = StateTable::from_table_catalog(table, store, vnodes).await; + let storage_key = table.get_pk().iter().map(OrderPair::from_prost).collect(); + let [input]: [_; 1] = params.input.try_into().unwrap(); + let group_key_types = group_by + .iter() + .map(|i| input.schema()[*i].data_type()) + .collect(); + let order_by = node.order_by.iter().map(OrderPair::from_prost).collect(); + + assert_eq!(¶ms.pk_indices, input.pk_indices()); + let args = AppendOnlyGroupTopNExecutorDispatcherArgs { + input, + ctx: params.actor_context, + storage_key, + offset_and_limit: (node.offset as usize, node.limit as usize), + order_by, + executor_id: params.executor_id, + group_by, + state_table, + watermark_epoch: stream.get_watermark_epoch(), + with_ties: node.with_ties, + group_key_types, + }; + args.dispatch() + } +} + +struct AppendOnlyGroupTopNExecutorDispatcherArgs { + input: BoxedExecutor, + ctx: ActorContextRef, + storage_key: Vec, + offset_and_limit: (usize, usize), + order_by: Vec, + executor_id: u64, + group_by: Vec, + state_table: StateTable, + watermark_epoch: AtomicU64Ref, + with_ties: bool, + group_key_types: Vec, +} + +impl HashKeyDispatcher for AppendOnlyGroupTopNExecutorDispatcherArgs { + type Output = StreamResult; + + fn dispatch_impl(self) -> Self::Output { + match self.with_ties { + true => Ok(AppendOnlyGroupTopNExecutor::::new( + self.input, + self.ctx, + self.storage_key, + self.offset_and_limit, + self.order_by, + self.executor_id, + self.group_by, + self.state_table, + self.watermark_epoch, + )? + .boxed()), + false => Ok(AppendOnlyGroupTopNExecutor::::new( + self.input, + self.ctx, + self.storage_key, + self.offset_and_limit, + self.order_by, + self.executor_id, + self.group_by, + self.state_table, + self.watermark_epoch, + )? + .boxed()), + } + } + + fn data_types(&self) -> &[DataType] { + &self.group_key_types + } +} diff --git a/src/stream/src/from_proto/hash_agg.rs b/src/stream/src/from_proto/hash_agg.rs index 7021445569030..ca94c7a69e271 100644 --- a/src/stream/src/from_proto/hash_agg.rs +++ b/src/stream/src/from_proto/hash_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,49 +20,26 @@ use risingwave_common::hash::{HashKey, HashKeyDispatcher}; use risingwave_common::types::DataType; use risingwave_pb::stream_plan::HashAggNode; -use super::agg_common::{build_agg_call_from_prost, build_agg_state_storages_from_proto}; +use super::agg_common::{ + build_agg_call_from_prost, build_agg_state_storages_from_proto, + build_distinct_dedup_table_from_proto, +}; use super::*; use crate::common::table::state_table::StateTable; -use crate::executor::aggregation::{AggCall, AggStateStorage}; -use crate::executor::monitor::StreamingMetrics; -use crate::executor::{ActorContextRef, HashAggExecutor, PkIndices}; -use crate::task::AtomicU64Ref; +use crate::executor::agg_common::{AggExecutorArgs, AggExecutorArgsExtra}; +use crate::executor::aggregation::AggCall; +use crate::executor::HashAggExecutor; pub struct HashAggExecutorDispatcherArgs { - ctx: ActorContextRef, - input: BoxedExecutor, - agg_calls: Vec, - storages: Vec>, - result_table: StateTable, - group_key_indices: Vec, + args: AggExecutorArgs, group_key_types: Vec, - pk_indices: PkIndices, - extreme_cache_size: usize, - executor_id: u64, - watermark_epoch: AtomicU64Ref, - metrics: Arc, - chunk_size: usize, } impl HashKeyDispatcher for HashAggExecutorDispatcherArgs { type Output = StreamResult; fn dispatch_impl(self) -> Self::Output { - Ok(HashAggExecutor::::new( - self.ctx, - self.input, - self.agg_calls, - self.storages, - self.result_table, - self.pk_indices, - self.extreme_cache_size, - self.executor_id, - self.group_key_indices, - self.watermark_epoch, - self.metrics, - self.chunk_size, - )? - .boxed()) + Ok(HashAggExecutor::::new(self.args)?.boxed()) } fn data_types(&self) -> &[DataType] { @@ -108,25 +85,40 @@ impl ExecutorBuilder for HashAggExecutorBuilder { vnodes.clone(), ) .await; + let result_table = StateTable::from_table_catalog( + node.get_result_table().unwrap(), + store.clone(), + vnodes.clone(), + ) + .await; + let distinct_dedup_tables = + build_distinct_dedup_table_from_proto(node.get_distinct_dedup_tables(), store, vnodes) + .await; + + HashAggExecutorDispatcherArgs { + args: AggExecutorArgs { + input, + actor_ctx: params.actor_context, + pk_indices: params.pk_indices, + executor_id: params.executor_id, + + extreme_cache_size: stream.config.developer.unsafe_stream_extreme_cache_size, + + agg_calls, + storages, + result_table, + distinct_dedup_tables, - let result_table = - StateTable::from_table_catalog(node.get_result_table().unwrap(), store, vnodes).await; + extra: Some(AggExecutorArgsExtra { + group_key_indices, - let args = HashAggExecutorDispatcherArgs { - ctx: params.actor_context, - input, - agg_calls, - storages, - result_table, - group_key_indices, + metrics: params.executor_stats, + chunk_size: params.env.config().developer.stream_chunk_size, + watermark_epoch: stream.get_watermark_epoch(), + }), + }, group_key_types, - pk_indices: params.pk_indices, - extreme_cache_size: stream.config.developer.unsafe_stream_extreme_cache_size, - executor_id: params.executor_id, - watermark_epoch: stream.get_watermark_epoch(), - metrics: params.executor_stats, - chunk_size: params.env.config().developer.stream_chunk_size, - }; - args.dispatch() + } + .dispatch() } } diff --git a/src/stream/src/from_proto/hash_join.rs b/src/stream/src/from_proto/hash_join.rs index 82459acd19240..4f0b8b19143ac 100644 --- a/src/stream/src/from_proto/hash_join.rs +++ b/src/stream/src/from_proto/hash_join.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -55,8 +55,7 @@ impl ExecutorBuilder for HashJoinExecutorBuilder { .iter() .map(|key| *key as usize) .collect_vec(), - table_l - .distribution_key + node.get_left_deduped_input_pk_indices() .iter() .map(|key| *key as usize) .collect_vec(), @@ -66,8 +65,7 @@ impl ExecutorBuilder for HashJoinExecutorBuilder { .iter() .map(|key| *key as usize) .collect_vec(), - table_r - .distribution_key + node.get_right_deduped_input_pk_indices() .iter() .map(|key| *key as usize) .collect_vec(), diff --git a/src/stream/src/from_proto/hop_window.rs b/src/stream/src/from_proto/hop_window.rs index 14a9aad84fd38..7fed7f55ee033 100644 --- a/src/stream/src/from_proto/hop_window.rs +++ b/src/stream/src/from_proto/hop_window.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/from_proto/local_simple_agg.rs b/src/stream/src/from_proto/local_simple_agg.rs index 36baa13005d06..ec5491a4d0423 100644 --- a/src/stream/src/from_proto/local_simple_agg.rs +++ b/src/stream/src/from_proto/local_simple_agg.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/from_proto/lookup.rs b/src/stream/src/from_proto/lookup.rs index 7eaed94406cf8..dfcad316584ca 100644 --- a/src/stream/src/from_proto/lookup.rs +++ b/src/stream/src/from_proto/lookup.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; - -use risingwave_common::catalog::ColumnDesc; -use risingwave_common::util::sort_util::OrderPair; +use risingwave_common::catalog::{ColumnDesc, TableId, TableOption}; +use risingwave_common::util::sort_util::{OrderPair, OrderType}; +use risingwave_pb::plan_common::{OrderType as ProstOrderType, StorageTableDesc}; use risingwave_pb::stream_plan::LookupNode; +use risingwave_storage::table::batch_table::storage_table::StorageTable; +use risingwave_storage::table::Distribution; use super::*; -use crate::common::table::state_table::StateTable; use crate::executor::{LookupExecutor, LookupExecutorParams}; pub struct LookupExecutorBuilder; @@ -51,12 +51,72 @@ impl ExecutorBuilder for LookupExecutorBuilder { .iter() .map(ColumnDesc::from) .collect(); - let state_table = StateTable::from_table_catalog( - lookup.arrangement_table.as_ref().unwrap(), + + let table_desc: &StorageTableDesc = lookup + .get_arrangement_table_info()? + .table_desc + .as_ref() + .unwrap(); + + let table_id = TableId { + table_id: table_desc.table_id, + }; + + let order_types = table_desc + .pk + .iter() + .map(|desc| OrderType::from_prost(&ProstOrderType::from_i32(desc.order_type).unwrap())) + .collect_vec(); + + let column_descs = table_desc + .columns + .iter() + .map(ColumnDesc::from) + .collect_vec(); + let column_ids = column_descs.iter().map(|x| x.column_id).collect_vec(); + + // Use indices based on full table instead of streaming executor output. + let pk_indices = table_desc.pk.iter().map(|k| k.index as usize).collect_vec(); + + let dist_key_indices = table_desc + .dist_key_indices + .iter() + .map(|&k| k as usize) + .collect_vec(); + let distribution = match params.vnode_bitmap { + Some(vnodes) => Distribution { + dist_key_indices, + vnodes: vnodes.into(), + }, + None => Distribution::fallback(), + }; + + let table_option = TableOption { + retention_seconds: if table_desc.retention_seconds > 0 { + Some(table_desc.retention_seconds) + } else { + None + }, + }; + let value_indices = table_desc + .get_value_indices() + .iter() + .map(|&k| k as usize) + .collect_vec(); + let prefix_hint_len = table_desc.get_read_prefix_len_hint() as usize; + + let storage_table = StorageTable::new_partial( store, - params.vnode_bitmap.map(Arc::new), - ) - .await; + table_id, + column_descs, + column_ids, + order_types, + pk_indices, + distribution, + table_option, + value_indices, + prefix_hint_len, + ); Ok(Box::new(LookupExecutor::new(LookupExecutorParams { schema: params.schema, @@ -69,7 +129,7 @@ impl ExecutorBuilder for LookupExecutorBuilder { stream_join_key_indices: lookup.stream_key.iter().map(|x| *x as usize).collect(), arrange_join_key_indices: lookup.arrange_key.iter().map(|x| *x as usize).collect(), column_mapping: lookup.column_mapping.iter().map(|x| *x as usize).collect(), - state_table, + storage_table, watermark_epoch: stream_manager.get_watermark_epoch(), chunk_size: params.env.config().developer.stream_chunk_size, }))) diff --git a/src/stream/src/from_proto/lookup_union.rs b/src/stream/src/from_proto/lookup_union.rs index 5122c026cc6c1..38d2fbf5bf036 100644 --- a/src/stream/src/from_proto/lookup_union.rs +++ b/src/stream/src/from_proto/lookup_union.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/from_proto/merge.rs b/src/stream/src/from_proto/merge.rs index 39ce75e9da2c2..8fd401c2ab4a9 100644 --- a/src/stream/src/from_proto/merge.rs +++ b/src/stream/src/from_proto/merge.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/from_proto/mod.rs b/src/stream/src/from_proto/mod.rs index 1b28e5bed3b58..2b800417430a1 100644 --- a/src/stream/src/from_proto/mod.rs +++ b/src/stream/src/from_proto/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ mod expand; mod filter; mod global_simple_agg; mod group_top_n; +mod group_top_n_appendonly; mod hash_agg; mod hash_join; mod hop_window; @@ -57,6 +58,7 @@ use self::expand::*; use self::filter::*; use self::global_simple_agg::*; use self::group_top_n::GroupTopNExecutorBuilder; +use self::group_top_n_appendonly::AppendOnlyGroupTopNExecutorBuilder; use self::hash_agg::*; use self::hash_join::*; use self::hop_window::*; @@ -141,6 +143,7 @@ pub async fn create_executor( NodeBody::DynamicFilter => DynamicFilterExecutorBuilder, NodeBody::ProjectSet => ProjectSetExecutorBuilder, NodeBody::GroupTopN => GroupTopNExecutorBuilder, + NodeBody::AppendOnlyGroupTopN => AppendOnlyGroupTopNExecutorBuilder, NodeBody::Sort => SortExecutorBuilder, NodeBody::WatermarkFilter => WatermarkFilterBuilder, NodeBody::Dml => DmlExecutorBuilder, diff --git a/src/stream/src/from_proto/mview.rs b/src/stream/src/from_proto/mview.rs index 77a5560bf3344..33e1713f14bed 100644 --- a/src/stream/src/from_proto/mview.rs +++ b/src/stream/src/from_proto/mview.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/from_proto/now.rs b/src/stream/src/from_proto/now.rs index c3991681aebcf..f34f2ab07abe3 100644 --- a/src/stream/src/from_proto/now.rs +++ b/src/stream/src/from_proto/now.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/from_proto/project.rs b/src/stream/src/from_proto/project.rs index 5f0a9fe2352da..e9c0808921c48 100644 --- a/src/stream/src/from_proto/project.rs +++ b/src/stream/src/from_proto/project.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ // limitations under the License. use multimap::MultiMap; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_expr::expr::build_from_prost; use risingwave_pb::stream_plan::ProjectNode; @@ -42,7 +43,7 @@ impl ExecutorBuilder for ProjectExecutorBuilder { node.get_watermark_input_key() .iter() .map(|key| *key as usize) - .zip_eq( + .zip_eq_fast( node.get_watermark_output_key() .iter() .map(|key| *key as usize), diff --git a/src/stream/src/from_proto/project_set.rs b/src/stream/src/from_proto/project_set.rs index 2dba66bda8316..f1a7d0ca39c1c 100644 --- a/src/stream/src/from_proto/project_set.rs +++ b/src/stream/src/from_proto/project_set.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/from_proto/row_id_gen.rs b/src/stream/src/from_proto/row_id_gen.rs index c525ab32b624b..92f13d4544e9f 100644 --- a/src/stream/src/from_proto/row_id_gen.rs +++ b/src/stream/src/from_proto/row_id_gen.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/from_proto/sink.rs b/src/stream/src/from_proto/sink.rs index a7a6e1d49a6d3..2a185ff888405 100644 --- a/src/stream/src/from_proto/sink.rs +++ b/src/stream/src/from_proto/sink.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use risingwave_common::catalog::Field; +use risingwave_connector::sink::catalog::SinkType; use risingwave_connector::sink::SinkConfig; use risingwave_pb::stream_plan::SinkNode; @@ -33,13 +33,15 @@ impl ExecutorBuilder for SinkExecutorBuilder { ) -> StreamResult { let [materialize_executor]: [_; 1] = params.input.try_into().unwrap(); - let mut properties = node.get_properties().clone(); - let pk_indices = node - .sink_pk + let sink_desc = node.sink_desc.as_ref().unwrap(); + let sink_type = SinkType::from_proto(sink_desc.get_sink_type().unwrap()); + let mut properties = sink_desc.get_properties().clone(); + let pk_indices = sink_desc + .pk .iter() - .map(|idx| *idx as usize) + .map(|pk| pk.index as usize) .collect::>(); - let schema = node.fields.iter().map(Field::from).collect(); + let schema = sink_desc.columns.iter().map(Into::into).collect(); // This field can be used to distinguish a specific actor in parallelism to prevent // transaction execution errors properties.insert( @@ -56,6 +58,7 @@ impl ExecutorBuilder for SinkExecutorBuilder { params.env.connector_params(), schema, pk_indices, + sink_type, ))) } } diff --git a/src/stream/src/from_proto/sort.rs b/src/stream/src/from_proto/sort.rs index 2da2c5aa6a8f6..fe6d4a28d2e23 100644 --- a/src/stream/src/from_proto/sort.rs +++ b/src/stream/src/from_proto/sort.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/from_proto/source.rs b/src/stream/src/from_proto/source.rs index 95e8722592bad..1c5dc862f5b48 100644 --- a/src/stream/src/from_proto/source.rs +++ b/src/stream/src/from_proto/source.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,15 +15,17 @@ use risingwave_common::catalog::{ColumnId, Field, Schema, TableId}; use risingwave_common::types::DataType; use risingwave_pb::stream_plan::SourceNode; -use risingwave_source::connector_source::SourceDescBuilderV2; +use risingwave_source::source_desc::SourceDescBuilder; use risingwave_storage::panic_store::PanicStateStore; use tokio::sync::mpsc::unbounded_channel; use super::*; -use crate::executor::source_executor_v2::{SourceExecutorV2, StreamSourceCore}; +use crate::executor::source::StreamSourceCore; +use crate::executor::source_executor::SourceExecutor; use crate::executor::state_table_handler::SourceStateTableHandler; use crate::executor::FsSourceExecutor; +const FS_CONNECTORS: &[&str] = &["s3"]; pub struct SourceExecutorBuilder; #[async_trait::async_trait] @@ -46,7 +48,7 @@ impl ExecutorBuilder for SourceExecutorBuilder { let source_id = TableId::new(source.source_id); let source_name = source.source_name.clone(); - let source_desc_builder = SourceDescBuilderV2::new( + let source_desc_builder = SourceDescBuilder::new( source.columns.clone(), params.env.source_metrics(), source.pk_column_ids.clone(), @@ -83,41 +85,34 @@ impl ExecutorBuilder for SourceExecutorBuilder { store.clone(), ) .await; + let stream_source_core = StreamSourceCore::new( + source_id, + source_name, + column_ids, + source_desc_builder, + state_table_handler, + ); - // so ugly here, need some graceful method - let is_s3 = source + let connector = source .properties .get("connector") - .map(|s| s.to_lowercase()) - .unwrap_or_default() - .eq("s3"); - if is_s3 { + .map(|c| c.to_ascii_lowercase()) + .unwrap_or_default(); + let is_fs_connector = FS_CONNECTORS.contains(&connector.as_str()); + + if is_fs_connector { Ok(Box::new(FsSourceExecutor::new( params.actor_context, - source_desc_builder, - source_id, - source_name, - state_table_handler, - column_ids, schema, params.pk_indices, - barrier_receiver, - params.executor_id, - params.operator_id, - params.op_info, + stream_source_core, params.executor_stats, + barrier_receiver, stream.config.barrier_interval_ms as u64, + params.executor_id, )?)) } else { - let stream_source_core = StreamSourceCore::new( - source_id, - source_name, - column_ids, - source_desc_builder, - state_table_handler, - ); - - Ok(Box::new(SourceExecutorV2::new( + Ok(Box::new(SourceExecutor::new( params.actor_context, schema, params.pk_indices, @@ -131,7 +126,7 @@ impl ExecutorBuilder for SourceExecutorBuilder { } else { // If there is no external stream source, then no data should be persisted. We pass a // `PanicStateStore` type here for indication. - Ok(Box::new(SourceExecutorV2::::new( + Ok(Box::new(SourceExecutor::::new( params.actor_context, params.schema, params.pk_indices, diff --git a/src/stream/src/from_proto/top_n.rs b/src/stream/src/from_proto/top_n.rs index 22a24ec361b07..6b80f17c208b9 100644 --- a/src/stream/src/from_proto/top_n.rs +++ b/src/stream/src/from_proto/top_n.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/from_proto/top_n_appendonly.rs b/src/stream/src/from_proto/top_n_appendonly.rs index 16a5e3cd6f8df..3f23dc690a28f 100644 --- a/src/stream/src/from_proto/top_n_appendonly.rs +++ b/src/stream/src/from_proto/top_n_appendonly.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/from_proto/union.rs b/src/stream/src/from_proto/union.rs index 717b939f4cd6d..4a4b86e319682 100644 --- a/src/stream/src/from_proto/union.rs +++ b/src/stream/src/from_proto/union.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/from_proto/watermark_filter.rs b/src/stream/src/from_proto/watermark_filter.rs index 6db47221d3573..84b39288c7048 100644 --- a/src/stream/src/from_proto/watermark_filter.rs +++ b/src/stream/src/from_proto/watermark_filter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -34,18 +34,20 @@ impl ExecutorBuilder for WatermarkFilterBuilder { _stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [input]: [_; 1] = params.input.try_into().unwrap(); - let watermark_expr = build_from_prost(node.get_watermark_expr()?)?; - let event_time_col_idx = node.get_event_time_col_idx() as usize; + let watermark_descs = node.get_watermark_descs().clone(); + let [watermark_desc]: [_; 1] = watermark_descs.try_into().unwrap(); + let watermark_expr = build_from_prost(&watermark_desc.expr.unwrap())?; + let event_time_col_idx = watermark_desc.watermark_idx as usize; let vnodes = Arc::new( params .vnode_bitmap .expect("vnodes not set for watermark filter"), ); - // TODO: may enable sanity check for watermark filter after we have upsert. + // TODO: may use consistent op for watermark filter after we have upsert. + let [table]: [_; 1] = node.get_tables().clone().try_into().unwrap(); let table = - StateTable::from_table_catalog_no_sanity_check(node.get_table()?, store, Some(vnodes)) - .await; + StateTable::from_table_catalog_inconsistent_op(&table, store, Some(vnodes)).await; Ok(WatermarkFilterExecutor::new( input, diff --git a/src/stream/src/lib.rs b/src/stream/src/lib.rs index 2b9785274dc82..07f4673542947 100644 --- a/src/stream/src/lib.rs +++ b/src/stream/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/task/barrier_manager.rs b/src/stream/src/task/barrier_manager.rs index c3e3b92720ccd..3116aba68164f 100644 --- a/src/stream/src/task/barrier_manager.rs +++ b/src/stream/src/task/barrier_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/task/barrier_manager/managed_state.rs b/src/stream/src/task/barrier_manager/managed_state.rs index dbcfe268a7b14..78bca4fa0ae30 100644 --- a/src/stream/src/task/barrier_manager/managed_state.rs +++ b/src/stream/src/task/barrier_manager/managed_state.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -123,9 +123,13 @@ impl ManagedBarrierState { chain_actor_id: actor, done: matches!(state, ChainState::Done), consumed_epoch: match state { - ChainState::ConsumingUpstream(consumed_epoch) => consumed_epoch, + ChainState::ConsumingUpstream(consumed_epoch, _) => consumed_epoch, ChainState::Done => epoch, }, + consumed_rows: match state { + ChainState::ConsumingUpstream(_, consumed_rows) => consumed_rows, + ChainState::Done => 0, + }, }) .collect(); diff --git a/src/stream/src/task/barrier_manager/progress.rs b/src/stream/src/task/barrier_manager/progress.rs index 2b274c8293926..f5695013505bb 100644 --- a/src/stream/src/task/barrier_manager/progress.rs +++ b/src/stream/src/task/barrier_manager/progress.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,10 +18,11 @@ use super::{BarrierState, LocalBarrierManager}; use crate::task::{ActorId, SharedContext}; type ConsumedEpoch = u64; +type ConsumedRows = u64; #[derive(Debug, Clone, Copy)] pub(super) enum ChainState { - ConsumingUpstream(ConsumedEpoch), + ConsumingUpstream(ConsumedEpoch, ConsumedRows), Done, } @@ -87,18 +88,28 @@ impl CreateMviewProgress { ); } - /// Update the progress to `ConsumingUpstream(consumed_epoch)`. The epoch must be monotonically - /// increasing. + /// Update the progress to `ConsumingUpstream(consumed_epoch, consumed_rows)`. The epoch must be + /// monotonically increasing. /// `current_epoch` should be provided to locate the barrier under concurrent checkpoint. - pub fn update(&mut self, current_epoch: u64, consumed_epoch: ConsumedEpoch) { + /// `current_consumed_rows` is an accumulated value. + pub fn update( + &mut self, + current_epoch: u64, + consumed_epoch: ConsumedEpoch, + current_consumed_rows: ConsumedRows, + ) { match self.state { - Some(ChainState::ConsumingUpstream(last)) => { + Some(ChainState::ConsumingUpstream(last, last_consumed_rows)) => { assert!(last < consumed_epoch); + assert!(last_consumed_rows <= current_consumed_rows); } Some(ChainState::Done) => unreachable!(), None => {} - } - self.update_inner(current_epoch, ChainState::ConsumingUpstream(consumed_epoch)); + }; + self.update_inner( + current_epoch, + ChainState::ConsumingUpstream(consumed_epoch, current_consumed_rows), + ); } /// Finish the progress. If the progress is already finished, then perform no-op. diff --git a/src/stream/src/task/barrier_manager/tests.rs b/src/stream/src/task/barrier_manager/tests.rs index e48abe8821538..432d81dd91517 100644 --- a/src/stream/src/task/barrier_manager/tests.rs +++ b/src/stream/src/task/barrier_manager/tests.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/task/env.rs b/src/stream/src/task/env.rs index c9f9669b55e36..32c468ffaddc2 100644 --- a/src/stream/src/task/env.rs +++ b/src/stream/src/task/env.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,11 +14,12 @@ use std::sync::Arc; +use hytra::TrAdder; use risingwave_common::config::StreamingConfig; use risingwave_common::util::addr::HostAddr; +use risingwave_connector::source::monitor::SourceMetrics; use risingwave_connector::ConnectorParams; use risingwave_source::dml_manager::DmlManagerRef; -use risingwave_source::monitor::SourceMetrics; use risingwave_storage::StateStoreImpl; pub(crate) type WorkerNodeId = u32; @@ -47,6 +48,9 @@ pub struct StreamEnvironment { /// Metrics for source. source_metrics: Arc, + + /// Total memory usage in stream. + total_mem_val: Arc>, } impl StreamEnvironment { @@ -67,6 +71,7 @@ impl StreamEnvironment { state_store, dml_manager, source_metrics, + total_mem_val: Arc::new(TrAdder::new()), } } @@ -85,6 +90,7 @@ impl StreamEnvironment { )), dml_manager: Arc::new(DmlManager::default()), source_metrics: Arc::new(SourceMetrics::default()), + total_mem_val: Arc::new(TrAdder::new()), } } @@ -115,4 +121,8 @@ impl StreamEnvironment { pub fn source_metrics(&self) -> Arc { self.source_metrics.clone() } + + pub fn total_mem_usage(&self) -> Arc> { + self.total_mem_val.clone() + } } diff --git a/src/stream/src/task/mod.rs b/src/stream/src/task/mod.rs index 79525154d8ca9..ce0b08b1475e3 100644 --- a/src/stream/src/task/mod.rs +++ b/src/stream/src/task/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/stream/src/task/stream_manager.rs b/src/stream/src/task/stream_manager.rs index bdccdfccf1e88..288343a626d12 100644 --- a/src/stream/src/task/stream_manager.rs +++ b/src/stream/src/task/stream_manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ use anyhow::{anyhow, Context}; use async_recursion::async_recursion; use async_stack_trace::{StackTraceManager, StackTraceReport, TraceConfig}; use futures::FutureExt; +use hytra::TrAdder; use itertools::Itertools; use risingwave_common::bail; use risingwave_common::buffer::Bitmap; @@ -86,6 +87,8 @@ pub struct LocalStreamManagerCore { /// Watermark epoch number. watermark_epoch: AtomicU64Ref, + + total_mem_val: Arc>, } /// `LocalStreamManager` manages all stream executors in this project. @@ -96,6 +99,8 @@ pub struct LocalStreamManager { state_store: StateStoreImpl, context: Arc, streaming_metrics: Arc, + + total_mem_val: Arc>, } pub struct ExecutorParams { @@ -152,6 +157,7 @@ impl LocalStreamManager { state_store: core.state_store.clone(), context: core.context.clone(), streaming_metrics: core.streaming_metrics.clone(), + total_mem_val: core.total_mem_val.clone(), core: Mutex::new(core), } } @@ -362,6 +368,10 @@ impl LocalStreamManager { let mut guard = self.core.lock().await; guard.watermark_epoch = watermark_epoch; } + + pub fn get_total_mem_val(&self) -> Arc> { + self.total_mem_val.clone() + } } fn update_upstreams(context: &SharedContext, ids: &[UpDownActorIds]) { @@ -421,6 +431,7 @@ impl LocalStreamManagerCore { config, stack_trace_manager: async_stack_trace_config.map(StackTraceManager::new), watermark_epoch: Arc::new(AtomicU64::new(0)), + total_mem_val: Arc::new(TrAdder::new()), } } @@ -603,7 +614,8 @@ impl LocalStreamManagerCore { StreamError::from(anyhow!("No such actor with actor id:{}", actor_id)) })?; let mview_definition = &actor.mview_definition; - let actor_context = ActorContext::create(actor_id); + let actor_context = + ActorContext::create_with_counter(actor_id, self.total_mem_val.clone()); let vnode_bitmap = actor .vnode_bitmap .as_ref() @@ -627,7 +639,7 @@ impl LocalStreamManagerCore { subtasks, self.context.clone(), self.streaming_metrics.clone(), - actor_context, + actor_context.clone(), ); let monitor = tokio_metrics::TaskMonitor::new(); @@ -659,7 +671,9 @@ impl LocalStreamManagerCore { metrics .actor_memory_usage .with_label_values(&[&actor_id_str]) - .set(bytes as i64) + .set(bytes as i64); + + actor_context.store_mem_usage(bytes); }, ); self.runtime.spawn(allocation_stated) diff --git a/src/test_runner/Cargo.toml b/src/test_runner/Cargo.toml index b483b3c7dcb39..8e9ee88194e18 100644 --- a/src/test_runner/Cargo.toml +++ b/src/test_runner/Cargo.toml @@ -8,6 +8,12 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] fail = "0.5" sync-point = { path = "../utils/sync-point" } diff --git a/src/test_runner/src/lib.rs b/src/test_runner/src/lib.rs index d993764837e26..13147c8f66d1b 100644 --- a/src/test_runner/src/lib.rs +++ b/src/test_runner/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/test_runner/src/test_runner.rs b/src/test_runner/src/test_runner.rs index 66cc8cf086260..82d1d0e9857c1 100644 --- a/src/test_runner/src/test_runner.rs +++ b/src/test_runner/src/test_runner.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/compaction_test/Cargo.toml b/src/tests/compaction_test/Cargo.toml index 81502be88c7c9..94889820c71f1 100644 --- a/src/tests/compaction_test/Cargo.toml +++ b/src/tests/compaction_test/Cargo.toml @@ -8,6 +8,12 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = "1" async-trait = "0.1" @@ -15,8 +21,6 @@ bytes = "1" clap = { version = "3", features = ["derive"] } futures = { version = "0.3", default-features = false, features = ["alloc"] } itertools = "0.10" -lazy_static = "1.4" -parking_lot = "0.12" rand = "0.8" risingwave_common = { path = "../../common" } risingwave_compactor = { path = "../../storage/compactor" } @@ -29,7 +33,6 @@ risingwave_rpc_client = { path = "../../rpc_client" } risingwave_rt = { path = "../../utils/runtime" } risingwave_storage = { path = "../../storage", features = ["test"] } risingwave_tracing = { path = "../../tracing" } -serde = { version = "1", features = ["derive"] } tokio = { version = "0.2", package = "madsim-tokio", features = [ "fs", "rt", @@ -39,8 +42,6 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [ "time", "signal", ] } -toml = "0.5" -tonic = { version = "0.2", package = "madsim-tonic" } tracing = "0.1" [target.'cfg(not(madsim))'.dependencies] diff --git a/src/tests/compaction_test/src/bin/compaction.rs b/src/tests/compaction_test/src/bin/compaction.rs index c6c6c956ec00d..c39ea5d3b6dc9 100644 --- a/src/tests/compaction_test/src/bin/compaction.rs +++ b/src/tests/compaction_test/src/bin/compaction.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ fn main() { let opts = risingwave_compaction_test::CompactionTestOpts::parse(); - risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new_default()); + risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new()); risingwave_rt::main_okk(risingwave_compaction_test::start(opts)) } diff --git a/src/tests/compaction_test/src/bin/delete_range.rs b/src/tests/compaction_test/src/bin/delete_range.rs index 4352bf07b1479..d7d3b7df0de02 100644 --- a/src/tests/compaction_test/src/bin/delete_range.rs +++ b/src/tests/compaction_test/src/bin/delete_range.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ fn main() { let opts = risingwave_compaction_test::CompactionTestOpts::parse(); - risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new_default()); + risingwave_rt::init_risingwave_logger(risingwave_rt::LoggerSettings::new()); risingwave_rt::main_okk(risingwave_compaction_test::start_delete_range(opts)) } diff --git a/src/tests/compaction_test/src/compaction_test_runner.rs b/src/tests/compaction_test/src/compaction_test_runner.rs index a7e056ed189dc..842b5157a0d4c 100644 --- a/src/tests/compaction_test/src/compaction_test_runner.rs +++ b/src/tests/compaction_test/src/compaction_test_runner.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ use std::collections::{BTreeMap, HashSet}; use std::net::SocketAddr; -use std::ops::{Bound, Deref}; +use std::ops::Bound; use std::pin::Pin; use std::sync::Arc; use std::thread::JoinHandle; @@ -24,10 +24,10 @@ use anyhow::anyhow; use bytes::{BufMut, BytesMut}; use clap::Parser; use futures::TryStreamExt; -use itertools::Itertools; use risingwave_common::catalog::TableId; -use risingwave_common::config::{load_config, RwConfig, StorageConfig}; +use risingwave_common::config::{load_config, NO_OVERRIDE}; use risingwave_common::util::addr::HostAddr; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_hummock_sdk::{CompactionGroupId, HummockEpoch, FIRST_VERSION_ID}; use risingwave_pb::common::WorkerType; use risingwave_pb::hummock::{HummockVersion, HummockVersionDelta}; @@ -38,6 +38,7 @@ use risingwave_storage::monitor::{ CompactorMetrics, HummockMetrics, HummockStateStoreMetrics, MonitoredStateStore, MonitoredStorageMetrics, ObjectStoreMetrics, }; +use risingwave_storage::opts::StorageOpts; use risingwave_storage::store::{ReadOptions, StateStoreRead}; use risingwave_storage::{StateStore, StateStoreImpl}; @@ -70,7 +71,7 @@ impl CompactionTestMetrics { /// `./risedev compaction-test --state-store hummock+s3://your-bucket -t ` pub async fn compaction_test_main( _listen_addr: SocketAddr, - client_addr: HostAddr, + advertise_addr: HostAddr, opts: CompactionTestOpts, ) -> anyhow::Result<()> { let meta_listen_addr = opts @@ -90,7 +91,7 @@ pub async fn compaction_test_main( let (compactor_thrd, compactor_shutdown_tx) = start_compactor_thread( opts.meta_address.clone(), - client_addr.to_string(), + advertise_addr.to_string(), opts.state_store.clone(), opts.config_path.clone(), ); @@ -101,7 +102,7 @@ pub async fn compaction_test_main( init_metadata_for_replay( original_meta_endpoint, &opts.meta_address, - &client_addr, + &advertise_addr, opts.ci_mode, &mut table_id, ) @@ -109,7 +110,7 @@ pub async fn compaction_test_main( assert_ne!(0, table_id, "Invalid table_id for correctness checking"); - let version_deltas = pull_version_deltas(original_meta_endpoint, &client_addr).await?; + let version_deltas = pull_version_deltas(original_meta_endpoint, &advertise_addr).await?; tracing::info!( "Pulled delta logs from Meta: len(logs): {}", @@ -133,7 +134,10 @@ pub async fn start_meta_node(listen_addr: String, config_path: String) { "--config-path", &config_path, ]); - let config = load_config(&meta_opts.config_path); + let config = load_config( + &meta_opts.config_path, + Some(meta_opts.override_opts.clone()), + ); assert!( config.meta.enable_compaction_deterministic, "enable_compaction_deterministic should be set" @@ -150,7 +154,7 @@ pub async fn start_meta_node(listen_addr: String, config_path: String) { async fn start_compactor_node( meta_rpc_endpoint: String, - client_addr: String, + advertise_addr: String, state_store: String, config_path: String, ) { @@ -158,8 +162,8 @@ async fn start_compactor_node( "compactor-node", "--host", "127.0.0.1:5550", - "--client-address", - &client_addr, + "--advertise-addr", + &advertise_addr, "--meta-address", &meta_rpc_endpoint, "--state-store", @@ -172,7 +176,7 @@ async fn start_compactor_node( pub fn start_compactor_thread( meta_endpoint: String, - client_addr: String, + advertise_addr: String, state_store: String, config_path: String, ) -> (JoinHandle<()>, std::sync::mpsc::Sender<()>) { @@ -185,7 +189,7 @@ pub fn start_compactor_thread( runtime.block_on(async { tokio::spawn(async { tracing::info!("Starting compactor node"); - start_compactor_node(meta_endpoint, client_addr, state_store, config_path).await + start_compactor_node(meta_endpoint, advertise_addr, state_store, config_path).await }); rx.recv().unwrap(); }); @@ -215,7 +219,7 @@ fn start_replay_thread( async fn init_metadata_for_replay( cluster_meta_endpoint: &str, new_meta_endpoint: &str, - client_addr: &HostAddr, + advertise_addr: &HostAddr, ci_mode: bool, table_id: &mut u32, ) -> anyhow::Result<()> { @@ -231,27 +235,27 @@ async fn init_metadata_for_replay( tracing::info!("Ctrl+C received, now exiting"); std::process::exit(0); }, - ret = MetaClient::register_new(cluster_meta_endpoint, WorkerType::RiseCtl, client_addr, 0) => { - meta_client = ret.unwrap(); + ret = MetaClient::register_new(cluster_meta_endpoint, WorkerType::RiseCtl, advertise_addr, 0) => { + (meta_client, _) = ret.unwrap(); }, } let worker_id = meta_client.worker_id(); tracing::info!("Assigned init worker id {}", worker_id); - meta_client.activate(client_addr).await.unwrap(); + meta_client.activate(advertise_addr).await.unwrap(); let tables = meta_client.risectl_list_state_tables().await?; - let compaction_groups = meta_client.risectl_list_compaction_group().await?; - let new_meta_client = - MetaClient::register_new(new_meta_endpoint, WorkerType::RiseCtl, client_addr, 0).await?; - new_meta_client.activate(client_addr).await.unwrap(); + let (new_meta_client, _) = + MetaClient::register_new(new_meta_endpoint, WorkerType::RiseCtl, advertise_addr, 0).await?; + new_meta_client.activate(advertise_addr).await.unwrap(); if ci_mode { let table_to_check = tables.iter().find(|t| t.name == "nexmark_q7").unwrap(); *table_id = table_to_check.id; } + // No need to init compaction_groups, because it will be done when replaying version delta. new_meta_client - .init_metadata_for_replay(tables, compaction_groups) + .init_metadata_for_replay(tables, vec![]) .await?; // shift the sst id to avoid conflict with the original meta node @@ -263,16 +267,20 @@ async fn init_metadata_for_replay( async fn pull_version_deltas( cluster_meta_endpoint: &str, - client_addr: &HostAddr, + advertise_addr: &HostAddr, ) -> anyhow::Result> { // Register to the cluster. // We reuse the RiseCtl worker type here - let meta_client = - MetaClient::register_new(cluster_meta_endpoint, WorkerType::RiseCtl, client_addr, 0) - .await?; + let (meta_client, _) = MetaClient::register_new( + cluster_meta_endpoint, + WorkerType::RiseCtl, + advertise_addr, + 0, + ) + .await?; let worker_id = meta_client.worker_id(); tracing::info!("Assigned pull worker id {}", worker_id); - meta_client.activate(client_addr).await.unwrap(); + meta_client.activate(advertise_addr).await.unwrap(); let (handle, shutdown_tx) = MetaClient::start_heartbeat_loop( meta_client.clone(), @@ -299,15 +307,15 @@ async fn start_replay( table_to_check: u32, version_delta_logs: Vec, ) -> anyhow::Result<()> { - let client_addr = "127.0.0.1:7770".parse().unwrap(); + let advertise_addr = "127.0.0.1:7770".parse().unwrap(); tracing::info!( - "Start to replay. Client address is {}, Table id {}", - client_addr, + "Start to replay. Advertise address is {}, Table id {}", + advertise_addr, table_to_check ); let mut metric = CompactionTestMetrics::new(); - let config = load_config(&opts.config_path_for_meta); + let config = load_config(&opts.config_path_for_meta, NO_OVERRIDE); tracing::info!( "Starting replay with config {:?} and opts {:?}", config, @@ -316,11 +324,12 @@ async fn start_replay( // Register to the cluster. // We reuse the RiseCtl worker type here - let meta_client = - MetaClient::register_new(&opts.meta_address, WorkerType::RiseCtl, &client_addr, 0).await?; + let (meta_client, system_params) = + MetaClient::register_new(&opts.meta_address, WorkerType::RiseCtl, &advertise_addr, 0) + .await?; let worker_id = meta_client.worker_id(); tracing::info!("Assigned replay worker id {}", worker_id); - meta_client.activate(&client_addr).await.unwrap(); + meta_client.activate(&advertise_addr).await.unwrap(); let sub_tasks = vec![MetaClient::start_heartbeat_loop( meta_client.clone(), @@ -341,9 +350,8 @@ async fn start_replay( } // Creates a hummock state store *after* we reset the hummock version - let storage_config = Arc::new(config.storage.clone()); - let hummock = - create_hummock_store_with_metrics(&meta_client, storage_config.clone(), &opts).await?; + let storage_opts = Arc::new(StorageOpts::from((&config, &system_params))); + let hummock = create_hummock_store_with_metrics(&meta_client, storage_opts, &opts).await?; // Replay version deltas from FIRST_VERSION_ID to the version before reset let mut modified_compaction_groups = HashSet::::new(); @@ -627,7 +635,9 @@ pub async fn check_compaction_results( mut expect_results: BTreeMap, mut actual_resutls: BTreeMap, ) -> anyhow::Result<()> { - let combined = expect_results.iter_mut().zip_eq(actual_resutls.iter_mut()); + let combined = expect_results + .iter_mut() + .zip_eq_fast(actual_resutls.iter_mut()); for ((e1, expect_iter), (e2, actual_iter)) in combined { assert_eq!(e1, e2); tracing::info!( @@ -669,7 +679,7 @@ struct StorageMetrics { pub async fn create_hummock_store_with_metrics( meta_client: &MetaClient, - storage_config: Arc, + storage_opts: Arc, opts: &CompactionTestOpts, ) -> anyhow::Result> { let metrics = StorageMetrics { @@ -679,15 +689,10 @@ pub async fn create_hummock_store_with_metrics( storage_metrics: Arc::new(MonitoredStorageMetrics::unused()), compactor_metrics: Arc::new(CompactorMetrics::unused()), }; - let rw_config = RwConfig { - storage: storage_config.deref().clone(), - ..Default::default() - }; let state_store_impl = StateStoreImpl::new( &opts.state_store, - "", - &rw_config, + storage_opts, Arc::new(MonitoredHummockMetaClient::new( meta_client.clone(), metrics.hummock_metrics.clone(), diff --git a/src/tests/compaction_test/src/delete_range_runner.rs b/src/tests/compaction_test/src/delete_range_runner.rs index 09ffc3708d98e..ed6bd69787ea3 100644 --- a/src/tests/compaction_test/src/delete_range_runner.rs +++ b/src/tests/compaction_test/src/delete_range_runner.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap}; use std::future::Future; use std::ops::Bound; use std::pin::Pin; @@ -24,9 +24,11 @@ use futures::StreamExt; use itertools::Itertools; use rand::rngs::StdRng; use rand::{RngCore, SeedableRng}; +use risingwave_common::catalog::hummock::PROPERTIES_RETENTION_SECOND_KEY; use risingwave_common::catalog::TableId; -use risingwave_common::config::{load_config, StorageConfig}; +use risingwave_common::config::{load_config, RwConfig, NO_OVERRIDE}; use risingwave_hummock_sdk::compact::CompactorRuntimeConfig; +use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; use risingwave_hummock_sdk::filter_key_extractor::{ FilterKeyExtractorImpl, FilterKeyExtractorManager, FullKeyFilterKeyExtractor, }; @@ -37,16 +39,18 @@ use risingwave_meta::hummock::MockHummockMetaClient; use risingwave_object_store::object::object_metrics::ObjectStoreMetrics; use risingwave_object_store::object::parse_remote_object_store; use risingwave_pb::catalog::Table as ProstTable; -use risingwave_pb::hummock::{CompactionConfig, CompactionGroup, TableOption}; +use risingwave_pb::hummock::{CompactionConfig, CompactionGroupInfo}; +use risingwave_pb::meta::SystemParams; use risingwave_rpc_client::HummockMetaClient; use risingwave_storage::hummock::backup_reader::BackupReader; -use risingwave_storage::hummock::compactor::{CompactionExecutor, CompactorContext, Context}; +use risingwave_storage::hummock::compactor::{CompactionExecutor, CompactorContext}; use risingwave_storage::hummock::sstable_store::SstableStoreRef; use risingwave_storage::hummock::store::state_store::LocalHummockStorage; use risingwave_storage::hummock::{ HummockStorage, MemoryLimiter, SstableIdManager, SstableStore, TieredCache, }; use risingwave_storage::monitor::{CompactorMetrics, HummockStateStoreMetrics}; +use risingwave_storage::opts::StorageOpts; use risingwave_storage::storage_value::StorageValue; use risingwave_storage::store::{ReadOptions, StateStoreRead, StateStoreWrite, WriteOptions}; use risingwave_storage::StateStore; @@ -83,23 +87,15 @@ pub fn start_delete_range(opts: CompactionTestOpts) -> Pin anyhow::Result<()> { - let config = load_config(&opts.config_path); - let mut storage_config = config.storage; - storage_config.enable_state_store_v1 = false; + let mut config = load_config(&opts.config_path, NO_OVERRIDE); + config.storage.enable_state_store_v1 = false; let compaction_config = CompactionConfigBuilder::new().build(); - compaction_test( - compaction_config, - storage_config, - &opts.state_store, - 1000000, - 800, - ) - .await + compaction_test(compaction_config, config, &opts.state_store, 1000000, 800).await } async fn compaction_test( compaction_config: CompactionConfig, - storage_config: StorageConfig, + config: RwConfig, state_store_type: &str, test_range: u64, test_count: u64, @@ -122,7 +118,10 @@ async fn compaction_test( distribution_key: vec![], stream_key: vec![], owner: 0, - properties: Default::default(), + properties: HashMap::::from([( + PROPERTIES_RETENTION_SECOND_KEY.to_string(), + 0.to_string(), + )]), fragment_id: 0, vnode_col_index: None, value_indices: vec![], @@ -134,36 +133,23 @@ async fn compaction_test( append_only: false, row_id_index: None, version: None, + watermark_indices: vec![], }; let mut delete_range_table = delete_key_table.clone(); delete_range_table.id = 2; delete_range_table.name = "delete-range-table".to_string(); - let mut group1 = CompactionGroup { - id: 3, + let group1 = CompactionGroupInfo { + id: StaticCompactionGroupId::StateDefault as _, parent_id: 0, member_table_ids: vec![1], compaction_config: Some(compaction_config.clone()), - table_id_to_options: Default::default(), }; - group1.table_id_to_options.insert( - 1, - TableOption { - retention_seconds: 0, - }, - ); - let mut group2 = CompactionGroup { - id: 4, + let group2 = CompactionGroupInfo { + id: StaticCompactionGroupId::MaterializedView as _, parent_id: 0, member_table_ids: vec![2], compaction_config: Some(compaction_config.clone()), - table_id_to_options: Default::default(), }; - group2.table_id_to_options.insert( - 2, - TableOption { - retention_seconds: 0, - }, - ); hummock_manager_ref .init_metadata_for_version_replay( vec![delete_key_table, delete_range_table], @@ -171,28 +157,36 @@ async fn compaction_test( ) .await?; - let config = Arc::new(storage_config); - + let system_params = SystemParams { + sstable_size_mb: Some(256), + block_size_kb: Some(1024), + bloom_false_positive: Some(0.001), + data_directory: Some("hummock_001".to_string()), + backup_storage_url: Some("memory".to_string()), + backup_storage_directory: Some("backup".to_string()), + ..Default::default() + } + .into(); + let storage_opts = Arc::new(StorageOpts::from((&config, &system_params))); let state_store_metrics = Arc::new(HummockStateStoreMetrics::unused()); let compactor_metrics = Arc::new(CompactorMetrics::unused()); let object_store_metrics = Arc::new(ObjectStoreMetrics::unused()); let remote_object_store = parse_remote_object_store( state_store_type.strip_prefix("hummock+").unwrap(), object_store_metrics.clone(), - false, "Hummock", ) .await; let sstable_store = Arc::new(SstableStore::new( Arc::new(remote_object_store), - config.data_directory.to_string(), - config.block_cache_capacity_mb * (1 << 20), - config.meta_cache_capacity_mb * (1 << 20), + system_params.data_directory().to_string(), + config.storage.block_cache_capacity_mb * (1 << 20), + config.storage.meta_cache_capacity_mb * (1 << 20), TieredCache::none(), )); let store = HummockStorage::new( - config.clone(), + storage_opts.clone(), sstable_store.clone(), BackupReader::unused(), meta_client.clone(), @@ -218,7 +212,7 @@ async fn compaction_test( ); let (compactor_thrd, compactor_shutdown_tx) = run_compactor_thread( - config, + storage_opts, sstable_store, meta_client.clone(), filter_key_extractor_manager, @@ -573,7 +567,7 @@ impl CheckState for DeleteRangeState { } fn run_compactor_thread( - config: Arc, + storage_opts: Arc, sstable_store: SstableStoreRef, meta_client: Arc, filter_key_extractor_manager: Arc, @@ -583,8 +577,8 @@ fn run_compactor_thread( tokio::task::JoinHandle<()>, tokio::sync::oneshot::Sender<()>, ) { - let context = Arc::new(Context { - options: config, + let compactor_context = Arc::new(CompactorContext { + storage_opts, hummock_meta_client: meta_client.clone(), sstable_store, compactor_metrics, @@ -594,14 +588,10 @@ fn run_compactor_thread( read_memory_limiter: MemoryLimiter::unlimit(), sstable_id_manager, task_progress_manager: Default::default(), - }); - let context = CompactorContext::with_config( - context, - CompactorRuntimeConfig { + compactor_runtime_config: Arc::new(tokio::sync::Mutex::new(CompactorRuntimeConfig { max_concurrent_task_number: 4, - }, - ); - let compactor_context = Arc::new(context); + })), + }); risingwave_storage::hummock::compactor::Compactor::start_compactor( compactor_context, meta_client, @@ -611,15 +601,18 @@ fn run_compactor_thread( #[cfg(test)] mod tests { - use risingwave_common::config::StorageConfig; + use risingwave_common::config::{RwConfig, StorageConfig}; use risingwave_meta::hummock::compaction::compaction_config::CompactionConfigBuilder; use super::compaction_test; #[tokio::test(flavor = "multi_thread", worker_threads = 3)] async fn test_small_data() { - let storage_config = StorageConfig { - enable_state_store_v1: false, + let config = RwConfig { + storage: StorageConfig { + enable_state_store_v1: false, + ..Default::default() + }, ..Default::default() }; let mut compaction_config = CompactionConfigBuilder::new().build(); @@ -627,14 +620,8 @@ mod tests { compaction_config.level0_tier_compact_file_number = 2; compaction_config.max_bytes_for_level_base = 512 * 1024; compaction_config.sub_level_max_compaction_bytes = 256 * 1024; - compaction_test( - compaction_config, - storage_config, - "hummock+memory", - 10000, - 60, - ) - .await - .unwrap(); + compaction_test(compaction_config, config, "hummock+memory", 10000, 60) + .await + .unwrap(); } } diff --git a/src/tests/compaction_test/src/lib.rs b/src/tests/compaction_test/src/lib.rs index db686e55a0af2..bbd1174d0c7fe 100644 --- a/src/tests/compaction_test/src/lib.rs +++ b/src/tests/compaction_test/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ pub struct CompactionTestOpts { #[clap(long, default_value = "127.0.0.1:6660")] pub host: String, - // Optional, we will use listen_address if not specified. + // Optional, we will use listen_addr if not specified. #[clap(long)] pub client_address: Option, @@ -99,8 +99,8 @@ pub fn start(opts: CompactionTestOpts) -> Pin + Send panic!("Invalid state store"); } } - let listen_address = opts.host.parse().unwrap(); - tracing::info!("Server Listening at {}", listen_address); + let listen_addr = opts.host.parse().unwrap(); + tracing::info!("Server Listening at {}", listen_addr); let client_address = opts .client_address @@ -112,7 +112,7 @@ pub fn start(opts: CompactionTestOpts) -> Pin + Send .parse() .unwrap(); - let ret = compaction_test_main(listen_address, client_address, opts).await; + let ret = compaction_test_main(listen_addr, client_address, opts).await; match ret { Ok(_) => { tracing::info!("Success"); diff --git a/src/tests/regress/Cargo.toml b/src/tests/regress/Cargo.toml index 412b00eec6fd3..b126350fe0aaa 100644 --- a/src/tests/regress/Cargo.toml +++ b/src/tests/regress/Cargo.toml @@ -7,12 +7,17 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = { version = "1", features = ["backtrace"] } clap = { version = "3", features = ["derive"] } path-absolutize = "3.0" similar = "2" -tempfile = "3" tokio = { version = "0.2", package = "madsim-tokio", features = ["rt", "rt-multi-thread", "sync", "macros", "time", "signal", "process"] } tracing = "0.1" tracing-subscriber = "0.3.16" diff --git a/src/tests/regress/README.md b/src/tests/regress/README.md index 5b540d5b56e38..69f7c5592c33b 100644 --- a/src/tests/regress/README.md +++ b/src/tests/regress/README.md @@ -31,6 +31,7 @@ In general, these files are not meant to be modified a lot. The test runner has * `cd` to the root directory of risingwave. * Start risingwave cluster. * Run tests against RisingWave. + ```shell RUST_BACKTRACE=1 target/debug/risingwave_regress_test -h 127.0.0.1 \ -p 4566 \ @@ -42,6 +43,7 @@ RUST_BACKTRACE=1 target/debug/risingwave_regress_test -h 127.0.0.1 \ ``` * Run tests against PostgreSQL. Make sure PostgreSQL is running. + ```shell RUST_BACKTRACE=1 target/debug/risingwave_regress_test -h 127.0.0.1 \ -p 5432 \ diff --git a/src/tests/regress/data/expected/float8.out b/src/tests/regress/data/expected/float8.out index adcfc2c1fbf27..ccefc5798c4ab 100644 --- a/src/tests/regress/data/expected/float8.out +++ b/src/tests/regress/data/expected/float8.out @@ -616,7 +616,15 @@ ERROR: cannot take logarithm of zero SELECT ln(f.f1) from FLOAT8_TBL f where f.f1 < '0.0' ; ERROR: cannot take logarithm of a negative number SELECT exp(f.f1) from FLOAT8_TBL f; -ERROR: value out of range: underflow + exp +----------------------- + 1 + 0 + 0 + 7.399123060905129e-16 + 1 +(5 rows) + SELECT f.f1 / '0.0' from FLOAT8_TBL f; ERROR: division by zero SELECT * FROM FLOAT8_TBL; diff --git a/src/tests/regress/data/sql/float8.sql b/src/tests/regress/data/sql/float8.sql index d66d57878fb17..6448a8e2eb0a4 100644 --- a/src/tests/regress/data/sql/float8.sql +++ b/src/tests/regress/data/sql/float8.sql @@ -182,7 +182,7 @@ SELECT ln(f.f1) from FLOAT8_TBL f where f.f1 = '0.0' ; SELECT ln(f.f1) from FLOAT8_TBL f where f.f1 < '0.0' ; -SELECT exp(f.f1) from FLOAT8_TBL f; +--@ SELECT exp(f.f1) from FLOAT8_TBL f; --@ SELECT f.f1 / '0.0' from FLOAT8_TBL f; diff --git a/src/tests/regress/src/bin/main.rs b/src/tests/regress/src/bin/main.rs index 55d6e8011270c..9367139453c2e 100644 --- a/src/tests/regress/src/bin/main.rs +++ b/src/tests/regress/src/bin/main.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/regress/src/env.rs b/src/tests/regress/src/env.rs index 7f343984623b9..5d9dcdd39fe8b 100644 --- a/src/tests/regress/src/env.rs +++ b/src/tests/regress/src/env.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/regress/src/file.rs b/src/tests/regress/src/file.rs index 6acde741ab2cc..08440b195b18f 100644 --- a/src/tests/regress/src/file.rs +++ b/src/tests/regress/src/file.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/regress/src/lib.rs b/src/tests/regress/src/lib.rs index 371620d3b1f0a..fe061b154b064 100644 --- a/src/tests/regress/src/lib.rs +++ b/src/tests/regress/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/regress/src/opts.rs b/src/tests/regress/src/opts.rs index bdbfea807053b..6ac71eeaddd09 100644 --- a/src/tests/regress/src/opts.rs +++ b/src/tests/regress/src/opts.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/regress/src/psql.rs b/src/tests/regress/src/psql.rs index 7c7c35b4ec92a..b1d262a05ee06 100644 --- a/src/tests/regress/src/psql.rs +++ b/src/tests/regress/src/psql.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/regress/src/schedule.rs b/src/tests/regress/src/schedule.rs index f956afa170636..d0f66f69a438e 100644 --- a/src/tests/regress/src/schedule.rs +++ b/src/tests/regress/src/schedule.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/simulation/Cargo.toml b/src/tests/simulation/Cargo.toml index 04a70ce443bd0..43ed2025367e1 100644 --- a/src/tests/simulation/Cargo.toml +++ b/src/tests/simulation/Cargo.toml @@ -4,19 +4,26 @@ version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["serde"] + +[package.metadata.cargo-udeps.ignore] +normal = ["serde"] + [dependencies] anyhow = "1.0" async-trait = "0.1" +aws-sdk-s3 = { version = "0.2.17", package = "madsim-aws-sdk-s3" } clap = "3" console = "0.15" -etcd-client = { version = "0.2.13", package = "madsim-etcd-client" } +etcd-client = { version = "0.2.17", package = "madsim-etcd-client" } futures = { version = "0.3", default-features = false, features = ["alloc"] } glob = "0.3" itertools = "0.10" -madsim = "0.2.13" +madsim = "0.2.17" paste = "1" rand = "0.8" -rdkafka = { package = "madsim-rdkafka", version = "=0.2.13-alpha", features = ["cmake-build"] } +rdkafka = { package = "madsim-rdkafka", version = "=0.2.14-alpha", features = ["cmake-build"] } risingwave_common = { path = "../../common" } risingwave_compactor = { path = "../../storage/compactor" } risingwave_compute = { path = "../../compute" } @@ -30,7 +37,7 @@ serde_derive = "1.0.152" serde_json = "1.0.91" sqllogictest = "0.11.1" tempfile = "3" -tokio = { version = "0.2", package = "madsim-tokio" } +tokio = { version = "0.2.15", package = "madsim-tokio" } tokio-postgres = "0.7.7" tracing = "0.1" tracing-subscriber = "0.3" diff --git a/src/tests/simulation/src/client.rs b/src/tests/simulation/src/client.rs index 3e032827da13a..458ac12abcb5c 100644 --- a/src/tests/simulation/src/client.rs +++ b/src/tests/simulation/src/client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/simulation/src/cluster.rs b/src/tests/simulation/src/cluster.rs index 8d080d8f58f5e..792f50f58e151 100644 --- a/src/tests/simulation/src/cluster.rs +++ b/src/tests/simulation/src/cluster.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ use std::time::Duration; use anyhow::{bail, Result}; use clap::Parser; use futures::future::join_all; +use madsim::net::ipvs::*; use madsim::runtime::{Handle, NodeHandle}; use rand::Rng; use sqllogictest::AsyncDB; @@ -50,6 +51,9 @@ pub struct Configuration { /// The number of compute nodes. pub compute_nodes: usize, + /// The number of meta nodes. + pub meta_nodes: usize, + /// The number of compactor nodes. pub compactor_nodes: usize, @@ -72,6 +76,7 @@ impl Configuration { config_path: CONFIG_PATH.as_os_str().to_string_lossy().into(), frontend_nodes: 2, compute_nodes: 3, + meta_nodes: 1, compactor_nodes: 2, compute_node_cores: 2, etcd_timeout_rate: 0.0, @@ -86,13 +91,14 @@ impl Configuration { /// /// | Name | IP | /// | -------------- | ------------- | -/// | meta | 192.168.1.1 | +/// | meta-x | 192.168.1.x | /// | frontend-x | 192.168.2.x | /// | compute-x | 192.168.3.x | /// | compactor-x | 192.168.4.x | /// | etcd | 192.168.10.1 | /// | kafka-broker | 192.168.11.1 | /// | kafka-producer | 192.168.11.2 | +/// | s3 | 192.168.12.1 | /// | client | 192.168.100.1 | /// | ctl | 192.168.101.1 | pub struct Cluster { @@ -108,6 +114,23 @@ impl Cluster { println!("seed = {}", handle.seed()); println!("{:#?}", conf); + // setup DNS and load balance + let net = madsim::net::NetSim::current(); + net.add_dns_record("etcd", "192.168.10.1".parse().unwrap()); + net.add_dns_record("meta", "192.168.1.1".parse().unwrap()); + + net.add_dns_record("frontend", "192.168.2.0".parse().unwrap()); + net.global_ipvs().add_service( + ServiceAddr::Tcp("192.168.2.0:4566".into()), + Scheduler::RoundRobin, + ); + for i in 1..=conf.frontend_nodes { + net.global_ipvs().add_server( + ServiceAddr::Tcp("192.168.2.0:4566".into()), + &format!("192.168.2.{i}:4566"), + ) + } + // etcd node let etcd_data = conf .etcd_data_path @@ -140,29 +163,46 @@ impl Cluster { }) .build(); + // s3 + handle + .create_node() + .name("s3") + .ip("192.168.12.1".parse().unwrap()) + .init(move || async move { + aws_sdk_s3::server::SimServer::default() + .with_bucket("hummock001") + .serve("0.0.0.0:9301".parse().unwrap()) + .await + }) + .build(); + // wait for the service to be ready tokio::time::sleep(std::time::Duration::from_secs(1)).await; - std::env::set_var("RW_META_ADDR", "https://192.168.1.1:5690/"); + std::env::set_var("RW_META_ADDR", "https://meta:5690/"); // meta node - let opts = risingwave_meta::MetaNodeOpts::parse_from([ - "meta-node", - "--config-path", - &conf.config_path, - "--listen-addr", - "0.0.0.0:5690", - "--backend", - "etcd", - "--etcd-endpoints", - "192.168.10.1:2388", - ]); - handle - .create_node() - .name("meta") - .ip([192, 168, 1, 1].into()) - .init(move || risingwave_meta::start(opts.clone())) - .build(); + for i in 1..=conf.meta_nodes { + let opts = risingwave_meta::MetaNodeOpts::parse_from([ + "meta-node", + "--config-path", + &conf.config_path, + "--listen-addr", + "0.0.0.0:5690", + "--advertise-addr", + &format!("192.168.1.{i}:5690"), + "--backend", + "etcd", + "--etcd-endpoints", + "etcd:2388", + ]); + handle + .create_node() + .name(format!("meta-{i}")) + .ip([192, 168, 1, i as u8].into()) + .init(move || risingwave_meta::start(opts.clone())) + .build(); + } // wait for the service to be ready tokio::time::sleep(std::time::Duration::from_secs(15)).await; @@ -173,12 +213,12 @@ impl Cluster { "frontend-node", "--config-path", &conf.config_path, - "--host", + "--listen-addr", "0.0.0.0:4566", - "--client-address", + "--advertise-addr", &format!("192.168.2.{i}:4566"), "--meta-addr", - "192.168.1.1:5690", + "meta:5690", ]); handle .create_node() @@ -194,14 +234,14 @@ impl Cluster { "compute-node", "--config-path", &conf.config_path, - "--host", + "--listen-addr", "0.0.0.0:5688", - "--client-address", + "--advertise-addr", &format!("192.168.3.{i}:5688"), "--meta-address", - "192.168.1.1:5690", + "meta:5690", "--state-store", - "hummock+memory-shared", + "hummock+minio://hummockadmin:hummockadmin@192.168.12.1:9301/hummock001", "--parallelism", &conf.compute_node_cores.to_string(), ]); @@ -220,14 +260,14 @@ impl Cluster { "compactor-node", "--config-path", &conf.config_path, - "--host", + "--listen-addr", "0.0.0.0:6660", - "--client-address", + "--advertise-addr", &format!("192.168.4.{i}:6660"), "--meta-address", - "192.168.1.1:5690", + "meta:5690", "--state-store", - "hummock+memory-shared", + "hummock+minio://hummockadmin:hummockadmin@192.168.12.1:9301/hummock001", ]); handle .create_node() @@ -264,14 +304,13 @@ impl Cluster { /// Run a SQL query from the client. pub async fn run(&mut self, sql: impl Into) -> Result { - let frontend = self.rand_frontend_ip(); let sql = sql.into(); let result = self .client .spawn(async move { // TODO: reuse session - let mut session = RisingWave::connect(frontend, "dev".to_string()) + let mut session = RisingWave::connect("frontend".into(), "dev".into()) .await .expect("failed to connect to RisingWave"); let result = session.run(&sql).await?; @@ -343,8 +382,15 @@ impl Cluster { pub async fn kill_node(&self, opts: &KillOpts) { let mut nodes = vec![]; if opts.kill_meta { - if rand::thread_rng().gen_bool(0.5) { - nodes.push("meta".to_string()); + let rand = rand::thread_rng().gen_range(0..3); + for i in 1..=self.config.meta_nodes { + match rand { + 0 => break, // no killed + 1 => {} // all killed + _ if !rand::thread_rng().gen_bool(0.5) => continue, // random killed + _ => {} + } + nodes.push(format!("meta-{}", i)); } } if opts.kill_frontend { @@ -398,7 +444,7 @@ impl Cluster { } /// Create a node for kafka producer and prepare data. - pub fn create_kafka_producer(&self, datadir: &str) { + pub async fn create_kafka_producer(&self, datadir: &str) { self.handle .create_node() .name("kafka-producer") @@ -407,7 +453,9 @@ impl Cluster { .spawn(crate::kafka::producer( "192.168.11.1:29092", datadir.to_string(), - )); + )) + .await + .unwrap(); } /// Create a kafka topic. @@ -420,21 +468,46 @@ impl Cluster { .spawn(crate::kafka::create_topics("192.168.11.1:29092", topics)); } - /// Return the IP of a random frontend node. - pub fn rand_frontend_ip(&self) -> String { - let i = rand::thread_rng().gen_range(1..=self.config.frontend_nodes); - format!("192.168.2.{i}") + pub fn config(&self) -> Configuration { + self.config.clone() } - /// Return the IP of all frontend nodes. - pub fn frontend_ips(&self) -> Vec { - (1..=self.config.frontend_nodes) - .map(|i| format!("192.168.2.{i}")) - .collect() - } + /// Graceful shutdown all RisingWave nodes. + pub async fn graceful_shutdown(&self) { + let mut nodes = vec![]; + let mut metas = vec![]; + for i in 1..=self.config.meta_nodes { + metas.push(format!("meta-{i}")); + } + for i in 1..=self.config.frontend_nodes { + nodes.push(format!("frontend-{i}")); + } + for i in 1..=self.config.compute_nodes { + nodes.push(format!("compute-{i}")); + } + for i in 1..=self.config.compactor_nodes { + nodes.push(format!("compactor-{i}")); + } - pub fn config(&self) -> Configuration { - self.config.clone() + tracing::info!("graceful shutdown"); + let waiting_time = Duration::from_secs(10); + // shutdown frontends, computes, compactors + for node in &nodes { + self.handle.send_ctrl_c(node); + } + madsim::time::sleep(waiting_time).await; + // shutdown metas + for meta in &metas { + self.handle.send_ctrl_c(meta); + } + madsim::time::sleep(waiting_time).await; + + // check all nodes are exited + for node in nodes.iter().chain(metas.iter()) { + if !self.handle.is_exit(node) { + panic!("failed to graceful shutdown {node} in {waiting_time:?}"); + } + } } } diff --git a/src/tests/simulation/src/ctl_ext.rs b/src/tests/simulation/src/ctl_ext.rs index 3cd93a5d62ec6..562ee1345737b 100644 --- a/src/tests/simulation/src/ctl_ext.rs +++ b/src/tests/simulation/src/ctl_ext.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/simulation/src/kafka.rs b/src/tests/simulation/src/kafka.rs index 3f62855e1f84a..fccf93f686054 100644 --- a/src/tests/simulation/src/kafka.rs +++ b/src/tests/simulation/src/kafka.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,9 +13,9 @@ // limitations under the License. use std::collections::HashMap; +use std::time::SystemTime; use rdkafka::admin::{AdminClient, AdminOptions, NewTopic, TopicReplication}; -use rdkafka::consumer::StreamConsumer; use rdkafka::error::{KafkaError, RDKafkaErrorCode}; use rdkafka::producer::{BaseProducer, BaseRecord}; use rdkafka::ClientConfig; @@ -61,7 +61,10 @@ pub async fn producer(broker_addr: &str, datadir: String) { for file in std::fs::read_dir(datadir).unwrap() { let file = file.unwrap(); let name = file.file_name().into_string().unwrap(); - let (topic, partitions) = name.split_once('.').unwrap(); + let Some((topic, partitions)) = name.split_once('.') else { + tracing::warn!("ignore file: {name:?}. expected format \"topic.partitions\""); + continue; + }; admin .create_topics( &[NewTopic::new( @@ -75,10 +78,19 @@ pub async fn producer(broker_addr: &str, datadir: String) { .expect("failed to create topic"); let content = std::fs::read(file.path()).unwrap(); - // binary message data, a file is a message - if topic.ends_with("bin") { + let msgs: Box + Send> = if topic.ends_with("bin") { + // binary message data, a file is a message + Box::new(std::iter::once(content.as_slice())) + } else { + Box::new(content.split(|&b| b == b'\n')) + }; + for msg in msgs { loop { - let record = BaseRecord::<(), _>::to(topic).payload(&content); + let ts = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_millis() as i64; + let record = BaseRecord::<(), _>::to(topic).payload(msg).timestamp(ts); match producer.send(record) { Ok(_) => break, Err((KafkaError::MessageProduction(RDKafkaErrorCode::QueueFull), _)) => { @@ -87,19 +99,6 @@ pub async fn producer(broker_addr: &str, datadir: String) { Err((e, _)) => panic!("failed to send message: {}", e), } } - } else { - for line in content.split(|&b| b == b'\n') { - loop { - let record = BaseRecord::<(), _>::to(topic).payload(line); - match producer.send(record) { - Ok(_) => break, - Err((KafkaError::MessageProduction(RDKafkaErrorCode::QueueFull), _)) => { - producer.flush(None).await.expect("failed to flush"); - } - Err((e, _)) => panic!("failed to send message: {}", e), - } - } - } } producer.flush(None).await.expect("failed to flush"); } diff --git a/src/tests/simulation/src/lib.rs b/src/tests/simulation/src/lib.rs index 732500c6f35e7..5d41b5094502f 100644 --- a/src/tests/simulation/src/lib.rs +++ b/src/tests/simulation/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/simulation/src/main.rs b/src/tests/simulation/src/main.rs index eb756e1a97de6..059d7a985da4b 100644 --- a/src/tests/simulation/src/main.rs +++ b/src/tests/simulation/src/main.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -51,6 +51,10 @@ pub struct Args { #[clap(long, default_value = "2")] compactor_nodes: usize, + /// The number of meta nodes. + #[clap(long, default_value = "3")] + meta_nodes: usize, + /// The number of CPU cores for each compute node. /// /// This determines worker_node_parallelism. @@ -124,10 +128,13 @@ async fn main() { use risingwave_simulation::client::RisingWave; use risingwave_simulation::cluster::{Cluster, Configuration, KillOpts}; use risingwave_simulation::slt::*; + use tracing_subscriber::EnvFilter; tracing_subscriber::fmt() + .with_env_filter(EnvFilter::from_default_env()) // no ANSI color codes when output to file .with_ansi(console::colors_enabled_stderr() && console::colors_enabled()) + .with_writer(std::io::stderr) .init(); let args = Args::parse(); @@ -137,6 +144,7 @@ async fn main() { compute_nodes: args.compute_nodes, compactor_nodes: args.compactor_nodes, compute_node_cores: args.compute_node_cores, + meta_nodes: args.meta_nodes, etcd_timeout_rate: args.etcd_timeout_rate, etcd_data_path: args.etcd_data, }; @@ -155,14 +163,15 @@ async fn main() { ); if let Some(datadir) = args.kafka_datadir { - cluster.create_kafka_producer(&datadir); + cluster.create_kafka_producer(&datadir).await; } if let Some(count) = args.sqlsmith { - let host = cluster.rand_frontend_ip(); cluster .run_on_client(async move { - let rw = RisingWave::connect(host, "dev".into()).await.unwrap(); + let rw = RisingWave::connect("frontend".into(), "dev".into()) + .await + .unwrap(); risingwave_sqlsmith::runner::run(rw.pg_client(), &args.files, count).await; }) .await; @@ -174,7 +183,7 @@ async fn main() { .run_on_client(async move { let glob = &args.files; if let Some(jobs) = args.jobs { - run_parallel_slt_task(cluster0, glob, jobs).await.unwrap(); + run_parallel_slt_task(glob, jobs).await.unwrap(); } else { run_slt_task(cluster0, glob, &kill_opts).await; } @@ -192,4 +201,5 @@ async fn main() { }) .await; } + cluster.graceful_shutdown().await; } diff --git a/src/tests/simulation/src/nexmark.rs b/src/tests/simulation/src/nexmark.rs index b4e7c08ccdd34..5372e4f1df6c5 100644 --- a/src/tests/simulation/src/nexmark.rs +++ b/src/tests/simulation/src/nexmark.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -67,58 +67,8 @@ impl NexmarkCluster { }; self.run(format!( - r#" -create source auction ( - id INTEGER, - item_name VARCHAR, - description VARCHAR, - initial_bid INTEGER, - reserve INTEGER, - date_time TIMESTAMP, - expires TIMESTAMP, - seller INTEGER, - category INTEGER) -with ( - connector = 'nexmark', - nexmark.table.type = 'Auction' - {extra_args} -) row format JSON; -"#, - )) - .await?; - - self.run(format!( - r#" -create source bid ( - auction INTEGER, - bidder INTEGER, - price INTEGER, - "date_time" TIMESTAMP) -with ( - connector = 'nexmark', - nexmark.table.type = 'Bid' - {extra_args} -) row format JSON; -"#, - )) - .await?; - - self.run(format!( - r#" -create source person ( - id INTEGER, - name VARCHAR, - email_address VARCHAR, - credit_card VARCHAR, - city VARCHAR, - state VARCHAR, - date_time TIMESTAMP) -with ( - connector = 'nexmark', - nexmark.table.type = 'Person' - {extra_args} -) row format JSON; -"#, + include_str!("nexmark/create_source.sql"), + extra_args = extra_args )) .await?; @@ -141,7 +91,6 @@ impl DerefMut for NexmarkCluster { } /// Nexmark queries. -// TODO: import the query from external files and avoid duplicating the queries in the code. pub mod queries { use std::time::Duration; @@ -149,372 +98,100 @@ pub mod queries { const DEFAULT_INITIAL_TIMEOUT: Duration = Duration::from_secs(20); pub mod q3 { - //! Covers hash inner join. - use super::*; - pub const CREATE: &str = r#" -CREATE MATERIALIZED VIEW nexmark_q3 -AS -SELECT - P.name, P.city, P.state, A.id -FROM - auction AS A INNER JOIN person AS P on A.seller = P.id -WHERE - A.category = 10 and (P.state = 'or' OR P.state = 'id' OR P.state = 'ca'); -"#; - pub const SELECT: &str = r#" -SELECT * FROM nexmark_q3 ORDER BY id; -"#; - pub const DROP: &str = r#" -DROP MATERIALIZED VIEW nexmark_q3; -"#; + pub const CREATE: &str = include_str!("nexmark/q3.sql"); + pub const SELECT: &str = "SELECT * FROM nexmark_q3 ORDER BY id;"; + pub const DROP: &str = "DROP MATERIALIZED VIEW nexmark_q3;"; pub const INITIAL_INTERVAL: Duration = DEFAULT_INITIAL_INTERVAL; pub const INITIAL_TIMEOUT: Duration = DEFAULT_INITIAL_TIMEOUT; } pub mod q4 { - //! Covers hash inner join and hash aggregation. - use super::*; - pub const CREATE: &str = r#" -CREATE MATERIALIZED VIEW nexmark_q4 -AS -SELECT - Q.category, - AVG(Q.final) as avg -FROM ( - SELECT - MAX(B.price) AS final,A.category - FROM - auction A, - bid B - WHERE - A.id = B.auction AND - B.date_time BETWEEN A.date_time AND A.expires - GROUP BY - A.id,A.category - ) Q -GROUP BY - Q.category; -"#; - pub const SELECT: &str = r#" -SELECT * FROM nexmark_q4 ORDER BY category; -"#; - pub const DROP: &str = r#" -DROP MATERIALIZED VIEW nexmark_q4; -"#; + pub const CREATE: &str = include_str!("nexmark/q4.sql"); + pub const SELECT: &str = "SELECT * FROM nexmark_q4 ORDER BY category;"; + pub const DROP: &str = "DROP MATERIALIZED VIEW nexmark_q4;"; pub const INITIAL_INTERVAL: Duration = DEFAULT_INITIAL_INTERVAL; pub const INITIAL_TIMEOUT: Duration = DEFAULT_INITIAL_TIMEOUT; } pub mod q5 { - //! Covers self-join. - use super::*; - pub const CREATE: &str = r#" -CREATE MATERIALIZED VIEW nexmark_q5 -AS -SELECT AuctionBids.auction, AuctionBids.num FROM ( - SELECT - bid.auction, - count(*) AS num, - window_start AS starttime - FROM - HOP(bid, date_time, INTERVAL '2' SECOND, INTERVAL '10' SECOND) - GROUP BY - window_start, - bid.auction -) AS AuctionBids -JOIN ( - SELECT - max(CountBids.num) AS maxn, - CountBids.starttime_c - FROM ( - SELECT - count(*) AS num, - window_start AS starttime_c - FROM HOP(bid, date_time, INTERVAL '2' SECOND, INTERVAL '10' SECOND) - GROUP BY - bid.auction, - window_start - ) AS CountBids - GROUP BY - CountBids.starttime_c -) AS MaxBids -ON AuctionBids.starttime = MaxBids.starttime_c AND AuctionBids.num >= MaxBids.maxn; -"#; - pub const SELECT: &str = r#" -SELECT * FROM nexmark_q5 ORDER BY auction; -"#; - pub const DROP: &str = r#" -DROP MATERIALIZED VIEW nexmark_q5; -"#; + pub const CREATE: &str = include_str!("nexmark/q5.sql"); + pub const SELECT: &str = "SELECT * FROM nexmark_q5 ORDER BY auction;"; + pub const DROP: &str = "DROP MATERIALIZED VIEW nexmark_q5;"; pub const INITIAL_INTERVAL: Duration = DEFAULT_INITIAL_INTERVAL; pub const INITIAL_TIMEOUT: Duration = DEFAULT_INITIAL_TIMEOUT; } pub mod q7 { - //! Covers self-join. - use super::*; - pub const CREATE: &str = r#" -CREATE MATERIALIZED VIEW nexmark_q7 -AS -SELECT - B.auction, - B.price, - B.bidder, - B.date_time -FROM - bid B -JOIN ( - SELECT - MAX(price) AS maxprice, - window_end as date_time - FROM - TUMBLE(bid, date_time, INTERVAL '10' SECOND) - GROUP BY - window_end -) B1 ON B.price = B1.maxprice -WHERE - B.date_time BETWEEN B1.date_time - INTERVAL '10' SECOND - AND B1.date_time; -"#; - pub const SELECT: &str = r#" -SELECT * FROM nexmark_q7 ORDER BY date_time; -"#; - pub const DROP: &str = r#" -DROP MATERIALIZED VIEW nexmark_q7; -"#; + pub const CREATE: &str = include_str!("nexmark/q7.sql"); + pub const SELECT: &str = "SELECT * FROM nexmark_q7 ORDER BY date_time;"; + pub const DROP: &str = "DROP MATERIALIZED VIEW nexmark_q7;"; pub const INITIAL_INTERVAL: Duration = DEFAULT_INITIAL_INTERVAL; pub const INITIAL_TIMEOUT: Duration = DEFAULT_INITIAL_TIMEOUT; } pub mod q8 { - //! Covers self-join. - use super::*; - pub const CREATE: &str = r#" -CREATE MATERIALIZED VIEW nexmark_q8 -AS -SELECT - P.id, - P.name, - P.starttime -FROM ( - SELECT - id, - name, - window_start AS starttime, - window_end AS endtime - FROM - TUMBLE(person, date_time, INTERVAL '10' SECOND) - GROUP BY - id, - name, - window_start, - window_end -) P -JOIN ( - SELECT - seller, - window_start AS starttime, - window_end AS endtime - FROM - TUMBLE(auction, date_time, INTERVAL '10' SECOND) - GROUP BY - seller, - window_start, - window_end -) A ON P.id = A.seller - AND P.starttime = A.starttime - AND P.endtime = A.endtime; -"#; - pub const SELECT: &str = r#" -SELECT * FROM nexmark_q8 ORDER BY id; -"#; - pub const DROP: &str = r#" -DROP MATERIALIZED VIEW nexmark_q8; -"#; + pub const CREATE: &str = include_str!("nexmark/q8.sql"); + pub const SELECT: &str = "SELECT * FROM nexmark_q8 ORDER BY id;"; + pub const DROP: &str = "DROP MATERIALIZED VIEW nexmark_q8;"; pub const INITIAL_INTERVAL: Duration = DEFAULT_INITIAL_INTERVAL; pub const INITIAL_TIMEOUT: Duration = DEFAULT_INITIAL_TIMEOUT; } pub mod q9 { - //! Covers group top-n. - use super::*; - pub const CREATE: &str = r#" -CREATE MATERIALIZED VIEW nexmark_q9 -AS -SELECT - id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, - auction, bidder, price, bid_date_time -FROM ( - SELECT A.*, B.auction, B.bidder, B.price, B.date_time AS bid_date_time, - ROW_NUMBER() OVER (PARTITION BY A.id ORDER BY B.price DESC, B.date_time ASC) AS rownum - FROM auction A, bid B - WHERE A.id = B.auction AND B.date_time BETWEEN A.date_time AND A.expires -) -WHERE rownum <= 1; -"#; - pub const SELECT: &str = r#" -SELECT * FROM nexmark_q9 ORDER BY id; -"#; - pub const DROP: &str = r#" -DROP MATERIALIZED VIEW nexmark_q9; -"#; + pub const CREATE: &str = include_str!("nexmark/q9.sql"); + pub const SELECT: &str = "SELECT * FROM nexmark_q9 ORDER BY id;"; + pub const DROP: &str = "DROP MATERIALIZED VIEW nexmark_q9;"; pub const INITIAL_INTERVAL: Duration = DEFAULT_INITIAL_INTERVAL; pub const INITIAL_TIMEOUT: Duration = DEFAULT_INITIAL_TIMEOUT; } pub mod q101 { - //! A self-made query that covers outer join. - //! - //! Monitor ongoing auctions and track the current highest bid for each one in real-time. If - //! the auction has no bids, the highest bid will be NULL. - use super::*; - pub const CREATE: &str = r#" -CREATE MATERIALIZED VIEW nexmark_q101 -AS -SELECT - a.id AS auction_id, - a.item_name AS auction_item_name, - b.max_price AS current_highest_bid -FROM auction a -LEFT OUTER JOIN ( - SELECT - b1.auction, - MAX(b1.price) max_price - FROM bid b1 - GROUP BY b1.auction -) b ON a.id = b.auction; -"#; - pub const SELECT: &str = r#" -SELECT * FROM nexmark_q101 ORDER BY auction_id; -"#; - pub const DROP: &str = r#" -DROP MATERIALIZED VIEW nexmark_q101; -"#; + pub const CREATE: &str = include_str!("nexmark/q101.sql"); + pub const SELECT: &str = "SELECT * FROM nexmark_q101 ORDER BY auction_id;"; + pub const DROP: &str = "DROP MATERIALIZED VIEW nexmark_q101;"; pub const INITIAL_INTERVAL: Duration = DEFAULT_INITIAL_INTERVAL; pub const INITIAL_TIMEOUT: Duration = DEFAULT_INITIAL_TIMEOUT; } pub mod q102 { - //! A self-made query that covers dynamic filter and simple aggregation. - //! - //! Show the auctions whose count of bids is greater than the overall average count of bids - //! per auction. - use super::*; - pub const CREATE: &str = r#" -CREATE MATERIALIZED VIEW nexmark_q102 -AS -SELECT - a.id AS auction_id, - a.item_name AS auction_item_name, - COUNT(b.auction) AS bid_count -FROM auction a -JOIN bid b ON a.id = b.auction -GROUP BY a.id, a.item_name -HAVING COUNT(b.auction) >= ( - SELECT COUNT(*) / COUNT(DISTINCT auction) FROM bid -); -"#; - pub const SELECT: &str = r#" -SELECT * FROM nexmark_q102 ORDER BY auction_id; -"#; - pub const DROP: &str = r#" -DROP MATERIALIZED VIEW nexmark_q102; -"#; + pub const CREATE: &str = include_str!("nexmark/q102.sql"); + pub const SELECT: &str = "SELECT * FROM nexmark_q102 ORDER BY auction_id;"; + pub const DROP: &str = "DROP MATERIALIZED VIEW nexmark_q102;"; pub const INITIAL_INTERVAL: Duration = DEFAULT_INITIAL_INTERVAL; pub const INITIAL_TIMEOUT: Duration = DEFAULT_INITIAL_TIMEOUT; } pub mod q103 { - //! A self-made query that covers semi join. - //! - //! Show the auctions that have at least 20 bids. - use super::*; - pub const CREATE: &str = r#" -CREATE MATERIALIZED VIEW nexmark_q103 -AS -SELECT - a.id AS auction_id, - a.item_name AS auction_item_name -FROM auction a -WHERE a.id IN ( - SELECT b.auction FROM bid b - GROUP BY b.auction - HAVING COUNT(*) >= 20 -); -"#; - pub const SELECT: &str = r#" -SELECT * FROM nexmark_q103 ORDER BY auction_id; -"#; - pub const DROP: &str = r#" -DROP MATERIALIZED VIEW nexmark_q103; -"#; + pub const CREATE: &str = include_str!("nexmark/q103.sql"); + pub const SELECT: &str = "SELECT * FROM nexmark_q103 ORDER BY auction_id;"; + pub const DROP: &str = "DROP MATERIALIZED VIEW nexmark_q103;"; pub const INITIAL_INTERVAL: Duration = DEFAULT_INITIAL_INTERVAL; pub const INITIAL_TIMEOUT: Duration = DEFAULT_INITIAL_TIMEOUT; } pub mod q104 { - //! A self-made query that covers anti join. - //! - //! This is the same as q103, which shows the auctions that have at least 20 bids. - use super::*; - pub const CREATE: &str = r#" -CREATE MATERIALIZED VIEW nexmark_q104 -AS -SELECT - a.id AS auction_id, - a.item_name AS auction_item_name -FROM auction a -WHERE a.id NOT IN ( - SELECT b.auction FROM bid b - GROUP BY b.auction - HAVING COUNT(*) < 20 -); -"#; - pub const SELECT: &str = r#" -SELECT * FROM nexmark_q104 ORDER BY auction_id; -"#; - pub const DROP: &str = r#" -DROP MATERIALIZED VIEW nexmark_q104; -"#; + pub const CREATE: &str = include_str!("nexmark/q104.sql"); + pub const SELECT: &str = "SELECT * FROM nexmark_q104 ORDER BY auction_id;"; + pub const DROP: &str = "DROP MATERIALIZED VIEW nexmark_q104;"; pub const INITIAL_INTERVAL: Duration = DEFAULT_INITIAL_INTERVAL; pub const INITIAL_TIMEOUT: Duration = DEFAULT_INITIAL_TIMEOUT; } pub mod q105 { - //! A self-made query that covers singleton top-n (and local-phase group top-n). - //! - //! Show the top 1000 auctions by the number of bids. - use super::*; - pub const CREATE: &str = r#" -CREATE MATERIALIZED VIEW nexmark_q105 -AS -SELECT - a.id AS auction_id, - a.item_name AS auction_item_name, - COUNT(b.auction) AS bid_count -FROM auction a -JOIN bid b ON a.id = b.auction -GROUP BY a.id, a.item_name -ORDER BY bid_count DESC -LIMIT 1000; -"#; - pub const SELECT: &str = r#" -SELECT * FROM nexmark_q105; -"#; - pub const DROP: &str = r#" -DROP MATERIALIZED VIEW nexmark_q105; -"#; + pub const CREATE: &str = include_str!("nexmark/q105.sql"); + pub const SELECT: &str = "SELECT * FROM nexmark_q105;"; + pub const DROP: &str = "DROP MATERIALIZED VIEW nexmark_q105;"; pub const INITIAL_INTERVAL: Duration = DEFAULT_INITIAL_INTERVAL; pub const INITIAL_TIMEOUT: Duration = DEFAULT_INITIAL_TIMEOUT; } diff --git a/src/tests/simulation/src/nexmark/create_source.sql b/src/tests/simulation/src/nexmark/create_source.sql new file mode 100644 index 0000000000000..8427779980f5b --- /dev/null +++ b/src/tests/simulation/src/nexmark/create_source.sql @@ -0,0 +1,45 @@ +create source auction ( + id BIGINT, + "item_name" VARCHAR, + description VARCHAR, + "initial_bid" BIGINT, + reserve BIGINT, + "date_time" TIMESTAMP, + expires TIMESTAMP, + seller BIGINT, + category BIGINT, + "extra" VARCHAR) +with ( + connector = 'nexmark', + nexmark.table.type = 'Auction' + {extra_args} +); + +create source bid ( + auction BIGINT, + bidder BIGINT, + price BIGINT, + "channel" VARCHAR, + "url" VARCHAR, + "date_time" TIMESTAMP, + "extra" VARCHAR) +with ( + connector = 'nexmark', + nexmark.table.type = 'Bid' + {extra_args} +); + +create source person ( + id BIGINT, + name VARCHAR, + "email_address" VARCHAR, + "credit_card" VARCHAR, + city VARCHAR, + state VARCHAR, + "date_time" TIMESTAMP, + "extra" VARCHAR) +with ( + connector = 'nexmark', + nexmark.table.type = 'Person' + {extra_args} +); diff --git a/src/tests/simulation/src/nexmark/q101.sql b/src/tests/simulation/src/nexmark/q101.sql new file mode 100644 index 0000000000000..8d2ecb6a6cc51 --- /dev/null +++ b/src/tests/simulation/src/nexmark/q101.sql @@ -0,0 +1,19 @@ +-- A self-made query that covers outer join. +-- +-- Monitor ongoing auctions and track the current highest bid for each one in real-time. If +-- the auction has no bids, the highest bid will be NULL. + +CREATE MATERIALIZED VIEW nexmark_q101 +AS +SELECT + a.id AS auction_id, + a.item_name AS auction_item_name, + b.max_price AS current_highest_bid +FROM auction a +LEFT OUTER JOIN ( + SELECT + b1.auction, + MAX(b1.price) max_price + FROM bid b1 + GROUP BY b1.auction +) b ON a.id = b.auction; diff --git a/src/tests/simulation/src/nexmark/q102.sql b/src/tests/simulation/src/nexmark/q102.sql new file mode 100644 index 0000000000000..f31eedcdcdd8d --- /dev/null +++ b/src/tests/simulation/src/nexmark/q102.sql @@ -0,0 +1,17 @@ +-- A self-made query that covers dynamic filter and simple aggregation. +-- +-- Show the auctions whose count of bids is greater than the overall average count of bids +-- per auction. + +CREATE MATERIALIZED VIEW nexmark_q102 +AS +SELECT + a.id AS auction_id, + a.item_name AS auction_item_name, + COUNT(b.auction) AS bid_count +FROM auction a +JOIN bid b ON a.id = b.auction +GROUP BY a.id, a.item_name +HAVING COUNT(b.auction) >= ( + SELECT COUNT(*) / COUNT(DISTINCT auction) FROM bid +); diff --git a/src/tests/simulation/src/nexmark/q103.sql b/src/tests/simulation/src/nexmark/q103.sql new file mode 100644 index 0000000000000..85c285e4a2cb0 --- /dev/null +++ b/src/tests/simulation/src/nexmark/q103.sql @@ -0,0 +1,15 @@ +-- A self-made query that covers semi join. +-- +-- Show the auctions that have at least 20 bids. + +CREATE MATERIALIZED VIEW nexmark_q103 +AS +SELECT + a.id AS auction_id, + a.item_name AS auction_item_name +FROM auction a +WHERE a.id IN ( + SELECT b.auction FROM bid b + GROUP BY b.auction + HAVING COUNT(*) >= 20 +); diff --git a/src/tests/simulation/src/nexmark/q104.sql b/src/tests/simulation/src/nexmark/q104.sql new file mode 100644 index 0000000000000..62ebbc99f0f85 --- /dev/null +++ b/src/tests/simulation/src/nexmark/q104.sql @@ -0,0 +1,15 @@ +-- A self-made query that covers anti join. +-- +-- This is the same as q103, which shows the auctions that have at least 20 bids. + +CREATE MATERIALIZED VIEW nexmark_q104 +AS +SELECT + a.id AS auction_id, + a.item_name AS auction_item_name +FROM auction a +WHERE a.id NOT IN ( + SELECT b.auction FROM bid b + GROUP BY b.auction + HAVING COUNT(*) < 20 +); diff --git a/src/tests/simulation/src/nexmark/q105.sql b/src/tests/simulation/src/nexmark/q105.sql new file mode 100644 index 0000000000000..9c3e566da440a --- /dev/null +++ b/src/tests/simulation/src/nexmark/q105.sql @@ -0,0 +1,15 @@ +-- A self-made query that covers singleton top-n (and local-phase group top-n). +-- +-- Show the top 1000 auctions by the number of bids. + +CREATE MATERIALIZED VIEW nexmark_q105 +AS +SELECT + a.id AS auction_id, + a.item_name AS auction_item_name, + COUNT(b.auction) AS bid_count +FROM auction a +JOIN bid b ON a.id = b.auction +GROUP BY a.id, a.item_name +ORDER BY bid_count DESC +LIMIT 1000; diff --git a/src/tests/simulation/src/nexmark/q3.sql b/src/tests/simulation/src/nexmark/q3.sql new file mode 100644 index 0000000000000..645eedc0c527a --- /dev/null +++ b/src/tests/simulation/src/nexmark/q3.sql @@ -0,0 +1,10 @@ +-- Covers hash inner join. + +CREATE MATERIALIZED VIEW nexmark_q3 +AS +SELECT + P.name, P.city, P.state, A.id +FROM + auction AS A INNER JOIN person AS P on A.seller = P.id +WHERE + A.category = 10 and (P.state = 'or' OR P.state = 'id' OR P.state = 'ca'); diff --git a/src/tests/simulation/src/nexmark/q4.sql b/src/tests/simulation/src/nexmark/q4.sql new file mode 100644 index 0000000000000..20273c2354cb4 --- /dev/null +++ b/src/tests/simulation/src/nexmark/q4.sql @@ -0,0 +1,21 @@ +-- Covers hash inner join and hash aggregation. + +CREATE MATERIALIZED VIEW nexmark_q4 +AS +SELECT + Q.category, + AVG(Q.final) as avg +FROM ( + SELECT + MAX(B.price) AS final,A.category + FROM + auction A, + bid B + WHERE + A.id = B.auction AND + B.date_time BETWEEN A.date_time AND A.expires + GROUP BY + A.id,A.category + ) Q +GROUP BY + Q.category; diff --git a/src/tests/simulation/src/nexmark/q5.sql b/src/tests/simulation/src/nexmark/q5.sql new file mode 100644 index 0000000000000..4e2f36580102a --- /dev/null +++ b/src/tests/simulation/src/nexmark/q5.sql @@ -0,0 +1,32 @@ +-- Covers self-join. + +CREATE MATERIALIZED VIEW nexmark_q5 +AS +SELECT AuctionBids.auction, AuctionBids.num FROM ( + SELECT + bid.auction, + count(*) AS num, + window_start AS starttime + FROM + HOP(bid, date_time, INTERVAL '2' SECOND, INTERVAL '10' SECOND) + GROUP BY + window_start, + bid.auction +) AS AuctionBids +JOIN ( + SELECT + max(CountBids.num) AS maxn, + CountBids.starttime_c + FROM ( + SELECT + count(*) AS num, + window_start AS starttime_c + FROM HOP(bid, date_time, INTERVAL '2' SECOND, INTERVAL '10' SECOND) + GROUP BY + bid.auction, + window_start + ) AS CountBids + GROUP BY + CountBids.starttime_c +) AS MaxBids +ON AuctionBids.starttime = MaxBids.starttime_c AND AuctionBids.num >= MaxBids.maxn; diff --git a/src/tests/simulation/src/nexmark/q7.sql b/src/tests/simulation/src/nexmark/q7.sql new file mode 100644 index 0000000000000..afe52835f1821 --- /dev/null +++ b/src/tests/simulation/src/nexmark/q7.sql @@ -0,0 +1,23 @@ +-- Covers self-join. + +CREATE MATERIALIZED VIEW nexmark_q7 +AS +SELECT + B.auction, + B.price, + B.bidder, + B.date_time +FROM + bid B +JOIN ( + SELECT + MAX(price) AS maxprice, + window_end as date_time + FROM + TUMBLE(bid, date_time, INTERVAL '10' SECOND) + GROUP BY + window_end +) B1 ON B.price = B1.maxprice +WHERE + B.date_time BETWEEN B1.date_time - INTERVAL '10' SECOND + AND B1.date_time; diff --git a/src/tests/simulation/src/nexmark/q8.sql b/src/tests/simulation/src/nexmark/q8.sql new file mode 100644 index 0000000000000..6e7c1417f59f2 --- /dev/null +++ b/src/tests/simulation/src/nexmark/q8.sql @@ -0,0 +1,36 @@ +-- Covers self-join. + +CREATE MATERIALIZED VIEW nexmark_q8 +AS +SELECT + P.id, + P.name, + P.starttime +FROM ( + SELECT + id, + name, + window_start AS starttime, + window_end AS endtime + FROM + TUMBLE(person, date_time, INTERVAL '10' SECOND) + GROUP BY + id, + name, + window_start, + window_end +) P +JOIN ( + SELECT + seller, + window_start AS starttime, + window_end AS endtime + FROM + TUMBLE(auction, date_time, INTERVAL '10' SECOND) + GROUP BY + seller, + window_start, + window_end +) A ON P.id = A.seller + AND P.starttime = A.starttime + AND P.endtime = A.endtime; diff --git a/src/tests/simulation/src/nexmark/q9.sql b/src/tests/simulation/src/nexmark/q9.sql new file mode 100644 index 0000000000000..37c25d3397b23 --- /dev/null +++ b/src/tests/simulation/src/nexmark/q9.sql @@ -0,0 +1,14 @@ +-- Covers group top-n. + +CREATE MATERIALIZED VIEW nexmark_q9 +AS +SELECT + id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, + auction, bidder, price, bid_date_time +FROM ( + SELECT A.*, B.auction, B.bidder, B.price, B.date_time AS bid_date_time, + ROW_NUMBER() OVER (PARTITION BY A.id ORDER BY B.price DESC, B.date_time ASC) AS rownum + FROM auction A, bid B + WHERE A.id = B.auction AND B.date_time BETWEEN A.date_time AND A.expires +) +WHERE rownum <= 1; diff --git a/src/tests/simulation/src/slt.rs b/src/tests/simulation/src/slt.rs index 2ba4e554237e3..da624ec200e6f 100644 --- a/src/tests/simulation/src/slt.rs +++ b/src/tests/simulation/src/slt.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -29,10 +29,50 @@ fn is_create_table_as(sql: &str) -> bool { .map(|s| s.to_lowercase()) .collect(); - println!("{:?}", parts); parts.len() >= 4 && parts[0] == "create" && parts[1] == "table" && parts[3] == "as" } +#[derive(PartialEq, Eq)] +enum SqlCmd { + Create { is_create_table_as: bool }, + Drop, + Dml, + Flush, + Others, +} + +impl SqlCmd { + // We won't kill during insert/update/delete since the atomicity is not guaranteed. + // Notice that `create table as` is also not atomic in our system. + fn ignore_kill(&self) -> bool { + matches!( + self, + SqlCmd::Dml + | SqlCmd::Create { + is_create_table_as: true + } + ) + } +} + +fn extract_sql_command(sql: &str) -> SqlCmd { + let cmd = sql + .trim_start() + .split_once(' ') + .unwrap_or_default() + .0 + .to_lowercase(); + match cmd.as_str() { + "create" => SqlCmd::Create { + is_create_table_as: is_create_table_as(sql), + }, + "drop" => SqlCmd::Drop, + "insert" | "update" | "delete" => SqlCmd::Dml, + "flush" => SqlCmd::Flush, + _ => SqlCmd::Others, + } +} + const KILL_IGNORE_FILES: &[&str] = &[ // TPCH queries are too slow for recovery. "tpch_snapshot.slt", @@ -45,8 +85,9 @@ const KILL_IGNORE_FILES: &[&str] = &[ /// Run the sqllogictest files in `glob`. pub async fn run_slt_task(cluster: Arc, glob: &str, opts: &KillOpts) { - let host = cluster.rand_frontend_ip(); - let risingwave = RisingWave::connect(host, "dev".to_string()).await.unwrap(); + let risingwave = RisingWave::connect("frontend".into(), "dev".into()) + .await + .unwrap(); let kill = opts.kill_compute || opts.kill_meta || opts.kill_frontend || opts.kill_compactor; let mut tester = sqllogictest::Runner::new(risingwave); let files = glob::glob(glob).expect("failed to read glob pattern"); @@ -58,61 +99,55 @@ pub async fn run_slt_task(cluster: Arc, glob: &str, opts: &KillOpts) { continue; } // XXX: hack for kafka source test - let tempfile = path.ends_with("kafka.slt").then(|| hack_kafka_test(path)); + let tempfile = (path.ends_with("kafka.slt") || path.ends_with("kafka_batch.slt")) + .then(|| hack_kafka_test(path)); let path = tempfile.as_ref().map(|p| p.path()).unwrap_or(path); for record in sqllogictest::parse_file(path).expect("failed to parse file") { if let sqllogictest::Record::Halt { .. } = record { break; } - let (is_create_table_as, is_create, is_drop, is_write) = - if let sqllogictest::Record::Statement { sql, .. } = &record { - let is_create_table_as = is_create_table_as(sql); - let sql = - (sql.trim_start().split_once(' ').unwrap_or_default().0).to_lowercase(); - ( - is_create_table_as, - !is_create_table_as && sql == "create", - sql == "drop", - sql == "insert" || sql == "update" || sql == "delete" || sql == "flush", - ) - } else { - (false, false, false, false) - }; - // we won't kill during create/insert/update/delete/flush since the atomicity is not - // guaranteed. Notice that `create table as` is also not atomic in our system. - if is_write || is_create_table_as { - if !kill { - if let Err(e) = tester.run_async(record).await { - panic!("{}", e); - } - } else { - for i in 0usize.. { - let delay = Duration::from_secs(1 << i); - match tester.run_async(record.clone()).await { - Ok(_) => break, - // cluster could be still under recovering if killed before, retry if - // meets `no reader for dml in table with id {}`. - Err(e) - if !e.to_string().contains("no reader for dml in table") - || i >= 5 => - { - panic!("failed to run test after retry {i} times: {e}") - } - Err(e) => { - tracing::error!("failed to run test: {e}\nretry after {delay:?}") - } - } - tokio::time::sleep(delay).await; - } - } - continue; - } - if !kill || is_write || is_create_table_as { + + // For normal records. + if !kill { match tester.run_async(record).await { Ok(_) => continue, Err(e) => panic!("{}", e), } } + + // For kill enabled. + let cmd = match &record { + sqllogictest::Record::Statement { sql, .. } + | sqllogictest::Record::Query { sql, .. } => extract_sql_command(sql), + _ => SqlCmd::Others, + }; + + // Since we've configured the session to always enable implicit flush, we don't need to + // execute `FLUSH` statements. + if cmd == SqlCmd::Flush { + continue; + } + + if cmd.ignore_kill() { + for i in 0usize.. { + let delay = Duration::from_secs(1 << i); + if let Err(err) = tester.run_async(record.clone()).await { + // cluster could be still under recovering if killed before, retry if + // meets `no reader for dml in table with id {}`. + let should_retry = + err.to_string().contains("no reader for dml in table") && i < 5; + if !should_retry { + panic!("{}", err); + } + tracing::error!("failed to run test: {err}\nretry after {delay:?}"); + } else { + break; + } + tokio::time::sleep(delay).await; + } + continue; + } + let should_kill = thread_rng().gen_ratio((opts.kill_rate * 1000.0) as u32, 1000); // spawn a background task to kill nodes let handle = if should_kill { @@ -134,15 +169,26 @@ pub async fn run_slt_task(cluster: Arc, glob: &str, opts: &KillOpts) { Ok(_) => break, // allow 'table exists' error when retry CREATE statement Err(e) - if is_create - && i != 0 + if matches!( + cmd, + SqlCmd::Create { + is_create_table_as: false + } + ) && i != 0 && e.to_string().contains("exists") && e.to_string().contains("Catalog error") => { break } // allow 'not found' error when retry DROP statement - Err(e) if is_drop && i != 0 && e.to_string().contains("not found") => break, + Err(e) + if cmd == SqlCmd::Drop + && i != 0 + && e.to_string().contains("not found") + && e.to_string().contains("Catalog error") => + { + break + } Err(e) if i >= 5 => panic!("failed to run test after retry {i} times: {e}"), Err(e) => tracing::error!("failed to run test: {e}\nretry after {delay:?}"), } @@ -155,18 +201,15 @@ pub async fn run_slt_task(cluster: Arc, glob: &str, opts: &KillOpts) { } } -pub async fn run_parallel_slt_task( - cluster: Arc, - glob: &str, - jobs: usize, -) -> Result<(), ParallelTestError> { - let host = cluster.rand_frontend_ip(); - let db = RisingWave::connect(host, "dev".to_string()).await.unwrap(); +pub async fn run_parallel_slt_task(glob: &str, jobs: usize) -> Result<(), ParallelTestError> { + let db = RisingWave::connect("frontend".into(), "dev".into()) + .await + .unwrap(); let mut tester = sqllogictest::Runner::new(db); tester .run_parallel_async( glob, - cluster.frontend_ips(), + vec!["frontend".into()], |host, dbname| async move { RisingWave::connect(host, dbname).await.unwrap() }, jobs, ) diff --git a/src/tests/simulation/src/utils.rs b/src/tests/simulation/src/utils.rs index 0b42d4e81e931..acc97d59814ec 100644 --- a/src/tests/simulation/src/utils.rs +++ b/src/tests/simulation/src/utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/simulation/tests/cascade_materialized_view.rs b/src/tests/simulation/tests/it/cascade_materialized_view.rs similarity index 99% rename from src/tests/simulation/tests/cascade_materialized_view.rs rename to src/tests/simulation/tests/it/cascade_materialized_view.rs index 4e84ffe077bbb..f3bcaf7c82283 100644 --- a/src/tests/simulation/tests/cascade_materialized_view.rs +++ b/src/tests/simulation/tests/it/cascade_materialized_view.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/simulation/tests/dynamic_filter.rs b/src/tests/simulation/tests/it/dynamic_filter.rs similarity index 99% rename from src/tests/simulation/tests/dynamic_filter.rs rename to src/tests/simulation/tests/it/dynamic_filter.rs index d7d7f6cf3f24c..e03432498d5dc 100644 --- a/src/tests/simulation/tests/dynamic_filter.rs +++ b/src/tests/simulation/tests/it/dynamic_filter.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/simulation/tests/hello.rs b/src/tests/simulation/tests/it/hello.rs similarity index 96% rename from src/tests/simulation/tests/hello.rs rename to src/tests/simulation/tests/it/hello.rs index e9434e77eb1cb..82ed517a0e685 100644 --- a/src/tests/simulation/tests/hello.rs +++ b/src/tests/simulation/tests/it/hello.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/simulation/tests/it/main.rs b/src/tests/simulation/tests/it/main.rs new file mode 100644 index 0000000000000..b9bd44b857b9e --- /dev/null +++ b/src/tests/simulation/tests/it/main.rs @@ -0,0 +1,27 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! The entry point to combine all integration tests into a single binary. +//! +//! See [this post](https://matklad.github.io/2021/02/27/delete-cargo-integration-tests.html) +//! for the rationale behind this approach. + +mod cascade_materialized_view; +mod dynamic_filter; +mod hello; +mod nexmark_chaos; +mod nexmark_q4; +mod nexmark_source; +mod singleton_migration; +mod streaming_parallelism; diff --git a/src/tests/simulation/tests/nexmark_chaos.rs b/src/tests/simulation/tests/it/nexmark_chaos.rs similarity index 99% rename from src/tests/simulation/tests/nexmark_chaos.rs rename to src/tests/simulation/tests/it/nexmark_chaos.rs index 0fed3377b1d63..d31ae131dd3ad 100644 --- a/src/tests/simulation/tests/nexmark_chaos.rs +++ b/src/tests/simulation/tests/it/nexmark_chaos.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/simulation/tests/nexmark_q4.rs b/src/tests/simulation/tests/it/nexmark_q4.rs similarity index 99% rename from src/tests/simulation/tests/nexmark_q4.rs rename to src/tests/simulation/tests/it/nexmark_q4.rs index 14f87586d13fd..58e3f290e30cd 100644 --- a/src/tests/simulation/tests/nexmark_q4.rs +++ b/src/tests/simulation/tests/it/nexmark_q4.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/simulation/tests/nexmark_source.rs b/src/tests/simulation/tests/it/nexmark_source.rs similarity index 98% rename from src/tests/simulation/tests/nexmark_source.rs rename to src/tests/simulation/tests/it/nexmark_source.rs index 64bb85d4a2944..030b6e21a55fb 100644 --- a/src/tests/simulation/tests/nexmark_source.rs +++ b/src/tests/simulation/tests/it/nexmark_source.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/simulation/tests/singleton_migration.rs b/src/tests/simulation/tests/it/singleton_migration.rs similarity index 98% rename from src/tests/simulation/tests/singleton_migration.rs rename to src/tests/simulation/tests/it/singleton_migration.rs index aa238aa2a2a14..e480b7aedb555 100644 --- a/src/tests/simulation/tests/singleton_migration.rs +++ b/src/tests/simulation/tests/it/singleton_migration.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/simulation/tests/sink.rs b/src/tests/simulation/tests/it/sink.rs similarity index 95% rename from src/tests/simulation/tests/sink.rs rename to src/tests/simulation/tests/it/sink.rs index 5b089f8c8279e..6e050f1c4b32b 100644 --- a/src/tests/simulation/tests/sink.rs +++ b/src/tests/simulation/tests/it/sink.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,10 +27,10 @@ use rdkafka::{ClientConfig, Message, TopicPartitionList}; use risingwave_simulation::cluster::{Cluster, Configuration}; use risingwave_simulation::ctl_ext::predicate::{identity_contains, no_identity_contains}; -const ROOT_TABLE_CREATE: &str = "create table t (v1 int);"; -const APPEND_ONLY_SINK_CREATE: &str = "create sink s1 from t with (connector='kafka', kafka.brokers='192.168.11.1:29092', kafka.topic='t_sink_append_only', format='append_only');"; +const ROOT_TABLE_CREATE: &str = "create table t (v1 int) with (appendonly=true);"; +const APPEND_ONLY_SINK_CREATE: &str = "create sink s1 from t with (connector='kafka', properties.bootstrap.server='192.168.11.1:29092', topic='t_sink_append_only', format='append_only');"; const MV_CREATE: &str = "create materialized view m as select count(*) from t;"; -const DEBEZIUM_SINK_CREATE: &str = "create sink s2 from m with (connector='kafka', kafka.brokers='192.168.11.1:29092', kafka.topic='t_sink_debezium', format='debezium');"; +const DEBEZIUM_SINK_CREATE: &str = "create sink s2 from m with (connector='kafka', properties.bootstrap.server='192.168.11.1:29092', topic='t_sink_debezium', format='debezium');"; const APPEND_ONLY_TOPIC: &str = "t_sink_append_only"; const DEBEZIUM_TOPIC: &str = "t_sink_debezium"; diff --git a/src/tests/simulation/tests/streaming_parallelism.rs b/src/tests/simulation/tests/it/streaming_parallelism.rs similarity index 83% rename from src/tests/simulation/tests/streaming_parallelism.rs rename to src/tests/simulation/tests/it/streaming_parallelism.rs index 0e45f657c4417..24d678eb7234b 100644 --- a/src/tests/simulation/tests/streaming_parallelism.rs +++ b/src/tests/simulation/tests/it/streaming_parallelism.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,22 +25,17 @@ async fn test_streaming_parallelism_default() -> Result<()> { let mut cluster = Cluster::start(Configuration::for_scale()).await?; let default_parallelism = cluster.config().compute_nodes * cluster.config().compute_node_cores; cluster.run("create table t1 (c1 int, c2 int);").await?; - let materialize_fragments = cluster - .locate_fragments([identity_contains("materialize")]) + let materialize_fragment = cluster + .locate_one_fragment([identity_contains("materialize")]) .await?; - assert_eq!(materialize_fragments.len(), 1); - assert_eq!( - materialize_fragments[0].inner.actors.len(), - default_parallelism - ); + assert_eq!(materialize_fragment.inner.actors.len(), default_parallelism); Ok(()) } async fn run_sqls_in_session(cluster: &Cluster, sqls: Vec) { - let frontend = cluster.rand_frontend_ip(); cluster .run_on_client(async move { - let mut session = RisingWave::connect(frontend, "dev".to_string()) + let mut session = RisingWave::connect("frontend".into(), "dev".into()) .await .expect("failed to connect to RisingWave"); for sql in sqls { @@ -64,14 +59,10 @@ async fn test_streaming_parallelism_set_some() -> Result<()> { ], ) .await; - let materialize_fragments = cluster - .locate_fragments([identity_contains("materialize")]) + let materialize_fragment = cluster + .locate_one_fragment([identity_contains("materialize")]) .await?; - assert_eq!(materialize_fragments.len(), 1); - assert_eq!( - materialize_fragments[0].inner.actors.len(), - target_parallelism - ); + assert_eq!(materialize_fragment.inner.actors.len(), target_parallelism); Ok(()) } @@ -87,14 +78,10 @@ async fn test_streaming_parallelism_set_zero() -> Result<()> { ], ) .await; - let materialize_fragments = cluster - .locate_fragments([identity_contains("materialize")]) + let materialize_fragment = cluster + .locate_one_fragment([identity_contains("materialize")]) .await?; - assert_eq!(materialize_fragments.len(), 1); - assert_eq!( - materialize_fragments[0].inner.actors.len(), - default_parallelism - ); + assert_eq!(materialize_fragment.inner.actors.len(), default_parallelism); Ok(()) } diff --git a/src/tests/sqlsmith/Cargo.toml b/src/tests/sqlsmith/Cargo.toml index 4bf656c15637c..06199f195952c 100644 --- a/src/tests/sqlsmith/Cargo.toml +++ b/src/tests/sqlsmith/Cargo.toml @@ -7,14 +7,16 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] -anyhow = { version = "1", features = ["backtrace"] } chrono = "0.4" clap = { version = "3", features = ["derive"] } -futures = { version = "0.3", default-features = false, features = ["alloc"] } itertools = "0.10" -libtest-mimic = "0.6" -paste = "1" rand = { version = "0.8", features = ["small_rng"] } risingwave_common = { path = "../../common" } risingwave_expr = { path = "../../expr" } @@ -28,6 +30,9 @@ tracing-subscriber = "0.3.16" [target.'cfg(not(madsim))'.dependencies] workspace-hack = { path = "../../workspace-hack" } +[dev-dependencies] +libtest-mimic = "0.6" + [[bin]] name = "sqlsmith" path = "src/bin/main.rs" diff --git a/src/tests/sqlsmith/src/bin/main.rs b/src/tests/sqlsmith/src/bin/main.rs index 30d7a237a0c5b..198d2fa1a50ae 100644 --- a/src/tests/sqlsmith/src/bin/main.rs +++ b/src/tests/sqlsmith/src/bin/main.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/sqlsmith/src/lib.rs b/src/tests/sqlsmith/src/lib.rs index 902adcab76857..4c7509376999a 100644 --- a/src/tests/sqlsmith/src/lib.rs +++ b/src/tests/sqlsmith/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #![feature(if_let_guard)] #![feature(once_cell)] +use rand::prelude::SliceRandom; use rand::Rng; use risingwave_sqlparser::ast::{ BinaryOperator, Expr, Join, JoinConstraint, JoinOperator, Statement, @@ -45,6 +46,25 @@ pub fn mview_sql_gen(rng: &mut R, tables: Vec
, name: &str) -> (St (mview.to_string(), table) } +/// TODO(noel): Eventually all session variables should be fuzzed. +/// For now we start of with a few hardcoded configs. +/// Some config need workarounds, for instance `QUERY_MODE`, +/// which can lead to stack overflow +/// (a simple workaround is limit length of +/// generated query when `QUERY_MODE=local`. +#[allow(dead_code)] +pub fn session_sql_gen(rng: &mut R) -> String { + [ + "SET RW_ENABLE_TWO_PHASE_AGG TO TRUE", + "SET RW_ENABLE_TWO_PHASE_AGG TO FALSE", + "SET RW_FORCE_TWO_PHASE_AGG TO TRUE", + "SET RW_FORCE_TWO_PHASE_AGG TO FALSE", + ] + .choose(rng) + .unwrap() + .to_string() +} + /// Parse SQL /// FIXME(Noel): Introduce error type for sqlsmith for this. pub fn parse_sql(sql: &str) -> Vec { diff --git a/src/tests/sqlsmith/src/runner.rs b/src/tests/sqlsmith/src/runner.rs index f837e4d703374..6d7225c6fbc4f 100644 --- a/src/tests/sqlsmith/src/runner.rs +++ b/src/tests/sqlsmith/src/runner.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,7 +18,9 @@ use rand::{Rng, SeedableRng}; use tokio_postgres::error::Error as PgError; use crate::validation::is_permissible_error; -use crate::{create_table_statement_to_table, mview_sql_gen, parse_sql, sql_gen, Table}; +use crate::{ + create_table_statement_to_table, mview_sql_gen, parse_sql, session_sql_gen, sql_gen, Table, +}; /// e2e test runner for sqlsmith pub async fn run(client: &tokio_postgres::Client, testdata: &str, count: usize) { @@ -65,6 +67,22 @@ async fn test_sqlsmith( } } +/// `SET QUERY_MODE TO DISTRIBUTED`. +/// Panics if it fails. +async fn set_distributed_query_mode(client: &tokio_postgres::Client) { + client + .query("SET query_mode TO distributed;", &[]) + .await + .unwrap(); +} + +#[allow(dead_code)] +async fn test_session_variable(client: &tokio_postgres::Client, rng: &mut R) { + let session_sql = session_sql_gen(rng); + tracing::info!("Executing: {}", session_sql); + client.query(session_sql.as_str(), &[]).await.unwrap(); +} + /// Test batch queries, returns skipped query statistics /// Runs in distributed mode, since queries can be complex and cause overflow in local execution /// mode. @@ -75,12 +93,11 @@ async fn test_batch_queries( setup_sql: &str, sample_size: usize, ) -> f64 { - client - .query("SET query_mode TO distributed;", &[]) - .await - .unwrap(); + set_distributed_query_mode(client).await; let mut skipped = 0; for _ in 0..sample_size { + // ENABLE: https://github.com/risingwavelabs/risingwave/issues/7928 + // test_session_variable(client, rng).await; let sql = sql_gen(rng, tables.clone()); tracing::info!("Executing: {}", sql); let response = client.query(sql.as_str(), &[]).await; @@ -99,6 +116,8 @@ async fn test_stream_queries( ) -> f64 { let mut skipped = 0; for _ in 0..sample_size { + // ENABLE: https://github.com/risingwavelabs/risingwave/issues/7928 + // test_session_variable(client, rng).await; let (sql, table) = mview_sql_gen(rng, tables.clone(), "stream_query"); tracing::info!("Executing: {}", sql); let response = client.execute(&sql, &[]).await; diff --git a/src/tests/sqlsmith/src/sql_gen/expr.rs b/src/tests/sqlsmith/src/sql_gen/expr.rs index f514eba32af24..ee8b5b68ed1c5 100644 --- a/src/tests/sqlsmith/src/sql_gen/expr.rs +++ b/src/tests/sqlsmith/src/sql_gen/expr.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,20 +12,31 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; + use itertools::Itertools; use rand::seq::SliceRandom; use rand::Rng; -use risingwave_common::types::DataType; +use risingwave_common::types::struct_type::StructType; +use risingwave_common::types::{DataType, DataTypeName}; use risingwave_expr::expr::AggKind; use risingwave_frontend::expr::{agg_func_sigs, cast_sigs, func_sigs, CastContext, ExprType}; use risingwave_sqlparser::ast::{ - BinaryOperator, Expr, Function, FunctionArg, FunctionArgExpr, Ident, ObjectName, + BinaryOperator, Expr, Function, FunctionArg, FunctionArgExpr, Ident, ObjectName, OrderByExpr, TrimWhereField, UnaryOperator, Value, }; -use crate::sql_gen::types::{data_type_to_ast_data_type, AGG_FUNC_TABLE, CAST_TABLE, FUNC_TABLE}; +use crate::sql_gen::types::{ + data_type_to_ast_data_type, AGG_FUNC_TABLE, EXPLICIT_CAST_TABLE, FUNC_TABLE, + IMPLICIT_CAST_TABLE, INVARIANT_FUNC_SET, +}; use crate::sql_gen::{SqlGenerator, SqlGeneratorContext}; +static STRUCT_FIELD_NAMES: [&str; 26] = [ + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", + "t", "u", "v", "w", "x", "y", "z", +]; + impl<'a, R: Rng> SqlGenerator<'a, R> { /// In generating expression, there are two execution modes: /// 1) Can have Aggregate expressions (`can_agg` = true) @@ -44,19 +55,136 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { }; } - let range = if context.can_gen_agg() { 99 } else { 90 }; + if *typ == DataType::Boolean && self.rng.gen_bool(0.05) { + return match self.rng.gen_bool(0.5) { + true => { + let (ty, expr) = self.gen_arbitrary_expr(context); + let n = self.rng.gen_range(1..=10); + Expr::InList { + expr: Box::new(Expr::Nested(Box::new(expr))), + list: self.gen_n_exprs_with_type(n, &ty, context), + negated: self.flip_coin(), + } + } + false => { + // TODO: InSubquery expression may not be always bound in all context. + // Parts labelled workaround can be removed or + // generalized if it is bound in all contexts. + // https://github.com/risingwavelabs/risingwave/issues/1343 + let old_ctxt = self.new_local_context(); // WORKAROUND + let (query, column) = self.gen_single_item_query(); + let ty = column.data_type; + let expr = self.gen_simple_scalar(&ty); // WORKAROUND + let in_subquery_expr = Expr::InSubquery { + expr: Box::new(expr), + subquery: Box::new(query), + negated: self.flip_coin(), + }; + self.restore_context(old_ctxt); // WORKAROUND + in_subquery_expr + } + }; + } + // NOTE: + // We generate AST first, then use its `Display` trait + // to generate an sql string. + // That may erase nesting context. + // For instance `IN(a, b)` is `a IN b`. + // this can lead to ambiguity, if `a` is an + // INFIX/POSTFIX compound expression too: + // - `a1 IN a2 IN b` + // - `a1 >= a2 IN b` + // ... + // We just nest compound expressions to avoid this. + let range = if context.can_gen_agg() { 99 } else { 90 }; match self.rng.gen_range(0..=range) { - 0..=70 => self.gen_func(typ, context), + 0..=70 => Expr::Nested(Box::new(self.gen_func(typ, context))), 71..=80 => self.gen_exists(typ, context), - 81..=90 => self.gen_cast(typ, context), + 81..=90 => self.gen_explicit_cast(typ, context), 91..=99 => self.gen_agg(typ), - // TODO: There are more that are not in the functions table, e.g. CAST. - // We will separately generate them. _ => unreachable!(), } } + fn gen_data_type(&mut self) -> DataType { + // Depth of struct/list nesting + let depth = self.rng.gen_range(0..=1); + self.gen_data_type_inner(depth) + } + + fn gen_data_type_inner(&mut self, depth: usize) -> DataType { + use {DataType as S, DataTypeName as T}; + let mut candidate_ret_types = vec![ + T::Boolean, + T::Int16, + T::Int32, + T::Int64, + T::Decimal, + T::Float32, + T::Float64, + T::Varchar, + T::Date, + T::Timestamp, + // ENABLE: https://github.com/risingwavelabs/risingwave/issues/5826 + // T::Timestamptz, + T::Time, + T::Interval, + ]; + if depth > 0 { + candidate_ret_types.push(T::Struct); + candidate_ret_types.push(T::List); + } + + let ret_type = candidate_ret_types.choose(&mut self.rng).unwrap(); + + match ret_type { + T::Boolean => S::Boolean, + T::Int16 => S::Int16, + T::Int32 => S::Int32, + T::Int64 => S::Int64, + T::Decimal => S::Decimal, + T::Float32 => S::Float32, + T::Float64 => S::Float64, + T::Varchar => S::Varchar, + T::Date => S::Date, + T::Timestamp => S::Timestamp, + T::Timestamptz => S::Timestamptz, + T::Time => S::Time, + T::Interval => S::Interval, + T::Struct => self.gen_struct_data_type(depth - 1), + T::List => self.gen_list_data_type(depth - 1), + _ => unreachable!(), + } + } + + fn gen_list_data_type(&mut self, depth: usize) -> DataType { + DataType::List { + datatype: Box::new(self.gen_data_type_inner(depth)), + } + } + + fn gen_struct_data_type(&mut self, depth: usize) -> DataType { + let num_fields = self.rng.gen_range(1..4); + let fields = (0..num_fields) + .map(|_| self.gen_data_type_inner(depth)) + .collect(); + let field_names = STRUCT_FIELD_NAMES[0..num_fields] + .iter() + .map(|s| (*s).into()) + .collect(); + DataType::Struct(Arc::new(StructType { + fields, + field_names, + })) + } + + pub(crate) fn gen_arbitrary_expr(&mut self, context: SqlGeneratorContext) -> (DataType, Expr) { + let ret_type = self.gen_data_type(); + let expr = self.gen_expr(&ret_type, context); + (ret_type, expr) + } + fn gen_col(&mut self, typ: &DataType, context: SqlGeneratorContext) -> Expr { let columns = if context.is_inside_agg() { if self.bound_relations.is_empty() { @@ -85,15 +213,19 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { } } - fn gen_cast(&mut self, ret: &DataType, context: SqlGeneratorContext) -> Expr { - self.gen_cast_inner(ret, context) + fn gen_explicit_cast(&mut self, ret: &DataType, context: SqlGeneratorContext) -> Expr { + self.gen_explicit_cast_inner(ret, context) .unwrap_or_else(|| self.gen_simple_scalar(ret)) } /// Generate casts from a cast map. /// TODO: Assign casts have to be tested via `INSERT`. - fn gen_cast_inner(&mut self, ret: &DataType, context: SqlGeneratorContext) -> Option { - let casts = CAST_TABLE.get(ret)?; + fn gen_explicit_cast_inner( + &mut self, + ret: &DataType, + context: SqlGeneratorContext, + ) -> Option { + let casts = EXPLICIT_CAST_TABLE.get(ret)?; let cast_sig = casts.choose(&mut self.rng).unwrap(); use CastContext as T; @@ -106,32 +238,18 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { Some(Expr::Cast { expr, data_type }) } - // TODO: Re-enable implicit casts - // Currently these implicit cast expressions may surface in: - // select items, functions and so on. - // Type-inference could result in different type from what SQLGenerator expects. - // For example: - // Suppose we had implicit cast expr from smallint->int. - // We then generated 1::smallint with implicit type int. - // If it was part of this expression: - // SELECT 1::smallint as col0; - // Then, when generating other expressions, SqlGenerator sees `col0` with type `int`, - // but its type will be inferred as `smallint` actually in the frontend. - // - // Functions also encounter problems, and could infer to the wrong type. - // May refer to type inference rules: - // https://github.com/risingwavelabs/risingwave/blob/650810a5a9b86028036cb3b51eec5b18d8f814d5/src/frontend/src/expr/type_inference/func.rs#L445-L464 - // Therefore it is disabled for now. - // T::Implicit if context.can_implicit_cast() => { - // self.gen_expr(cast_sig.from_type, context).into() - // } - // TODO: Generate this when e2e inserts are generated. // T::Assign - _ => None, + _ => unreachable!(), } } + /// NOTE: This can result in ambiguous expressions. + /// Should only be used in unambiguous context. + fn gen_implicit_cast(&mut self, ret: &DataType, context: SqlGeneratorContext) -> Expr { + self.gen_expr(ret, context) + } + fn gen_func(&mut self, ret: &DataType, context: SqlGeneratorContext) -> Expr { match self.rng.gen_bool(0.1) { true => self.gen_variadic_func(ret, context), @@ -161,7 +279,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { } fn gen_case(&mut self, ret: &DataType, context: SqlGeneratorContext) -> Expr { - let n = self.rng.gen_range(1..10); + let n = self.rng.gen_range(1..4); Expr::Case { operand: None, conditions: self.gen_n_exprs_with_type(n, &DataType::Boolean, context), @@ -190,8 +308,16 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { } fn gen_concat_args(&mut self, context: SqlGeneratorContext) -> Vec { - let n = self.rng.gen_range(1..10); - self.gen_n_exprs_with_type(n, &DataType::Varchar, context) + let n = self.rng.gen_range(1..4); + (0..n) + .map(|_| { + if self.rng.gen_bool(0.1) { + self.gen_explicit_cast(&DataType::Varchar, context) + } else { + self.gen_expr(&DataType::Varchar, context) + } + }) + .collect() } /// Generates `n` expressions of type `ret`. @@ -210,10 +336,19 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { Some(funcs) => funcs, }; let func = funcs.choose(&mut self.rng).unwrap(); + let can_implicit_cast = INVARIANT_FUNC_SET.contains(&func.func); let exprs: Vec = func .inputs_type .iter() - .map(|t| self.gen_expr(t, context)) + .map(|t| { + if let Some(from_tys) = IMPLICIT_CAST_TABLE.get(t) + && can_implicit_cast && self.flip_coin() { + let from_ty = &from_tys.choose(&mut self.rng).unwrap().from_type; + self.gen_implicit_cast(from_ty, context) + } else { + self.gen_expr(t, context) + } + }) .collect(); let expr = if exprs.len() == 1 { make_unary_op(func.func, &exprs[0]) @@ -232,18 +367,30 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { }; // Generating correlated subquery tends to create queries which cannot be unnested. // we still want to test it, but reduce the chance it occurs. - let (subquery, _) = match self.rng.gen_bool(0.1) { + let (subquery, _) = match self.rng.gen_bool(0.05) { true => self.gen_correlated_query(), false => self.gen_local_query(), }; Expr::Exists(Box::new(subquery)) } - fn gen_agg(&mut self, ret: &DataType) -> Expr { - // TODO: workaround for - if *ret == DataType::Interval { - return self.gen_simple_scalar(ret); + pub(crate) fn gen_order_by(&mut self) -> Vec { + if self.bound_columns.is_empty() || !self.is_distinct_allowed { + return vec![]; } + let mut order_by = vec![]; + while self.flip_coin() { + let column = self.bound_columns.choose(&mut self.rng).unwrap(); + order_by.push(OrderByExpr { + expr: Expr::Identifier(Ident::new(&column.name)), + asc: Some(self.rng.gen_bool(0.5)), + nulls_first: None, + }) + } + order_by + } + + fn gen_agg(&mut self, ret: &DataType) -> Expr { let funcs = match AGG_FUNC_TABLE.get(ret) { None => return self.gen_simple_scalar(ret), Some(funcs) => funcs, @@ -259,26 +406,85 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { .collect(); let distinct = self.flip_coin() && self.is_distinct_allowed; - self.make_agg_expr(func.func, &exprs, distinct) + let filter = if self.flip_coin() { + let context = SqlGeneratorContext::new_with_can_agg(false); + // ENABLE: https://github.com/risingwavelabs/risingwave/issues/4762 + // Prevent correlated query with `FILTER` + let old_ctxt = self.new_local_context(); + let expr = Some(Box::new(self.gen_expr(&DataType::Boolean, context))); + self.restore_context(old_ctxt); + expr + } else { + None + }; + let order_by = if self.flip_coin() && !distinct { + self.gen_order_by() + } else { + vec![] + }; + self.make_agg_expr(func.func, &exprs, distinct, filter, order_by) .unwrap_or_else(|| self.gen_simple_scalar(ret)) } /// Generates aggregate expressions. For internal / unsupported aggregators, we return `None`. - fn make_agg_expr(&mut self, func: AggKind, exprs: &[Expr], distinct: bool) -> Option { + fn make_agg_expr( + &mut self, + func: AggKind, + exprs: &[Expr], + distinct: bool, + filter: Option>, + order_by: Vec, + ) -> Option { use AggKind as A; match func { - A::Sum | A::Sum0 => Some(Expr::Function(make_agg_func("sum", exprs, distinct))), - A::Min => Some(Expr::Function(make_agg_func("min", exprs, distinct))), - A::Max => Some(Expr::Function(make_agg_func("max", exprs, distinct))), - A::Count => Some(Expr::Function(make_agg_func("count", exprs, distinct))), - A::Avg => Some(Expr::Function(make_agg_func("avg", exprs, distinct))), + A::Sum | A::Sum0 => Some(Expr::Function(make_agg_func( + "sum", exprs, distinct, filter, order_by, + ))), + A::Min => Some(Expr::Function(make_agg_func( + "min", exprs, distinct, filter, order_by, + ))), + A::Max => Some(Expr::Function(make_agg_func( + "max", exprs, distinct, filter, order_by, + ))), + A::Count => Some(Expr::Function(make_agg_func( + "count", exprs, distinct, filter, order_by, + ))), + A::Avg => Some(Expr::Function(make_agg_func( + "avg", exprs, distinct, filter, order_by, + ))), + A::VarSamp => Some(Expr::Function(make_agg_func( + "var_samp", exprs, distinct, filter, order_by, + ))), + A::VarPop => Some(Expr::Function(make_agg_func( + "var_pop", exprs, distinct, filter, order_by, + ))), + A::StddevSamp => Some(Expr::Function(make_agg_func( + "stddev_samp", + exprs, + distinct, + filter, + order_by, + ))), + A::StddevPop => Some(Expr::Function(make_agg_func( + "stddev_pop", + exprs, + distinct, + filter, + order_by, + ))), A::StringAgg => { // distinct and non_distinct_string_agg are incompatible according to // https://github.com/risingwavelabs/risingwave/blob/a703dc7d725aa995fecbaedc4e9569bc9f6ca5ba/src/frontend/src/optimizer/plan_node/logical_agg.rs#L394 if self.is_distinct_allowed && !distinct { None } else { - Some(Expr::Function(make_agg_func("string_agg", exprs, distinct))) + Some(Expr::Function(make_agg_func( + "string_agg", + exprs, + distinct, + filter, + order_by, + ))) } } A::FirstValue => None, @@ -286,15 +492,24 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { if self.is_distinct_allowed { None } else { + // It does not make sense to have `distinct`. + // That requires precision, which `approx_count_distinct` does not provide. Some(Expr::Function(make_agg_func( "approx_count_distinct", exprs, false, + filter, + order_by, ))) } } - // TODO(yuchao): `array_agg` support is still WIP, see #4657. - A::ArrayAgg => None, + A::ArrayAgg => Some(Expr::Function(make_agg_func( + "array_agg", + exprs, + distinct, + filter, + order_by, + ))), } } } @@ -326,6 +541,7 @@ fn make_general_expr(func: ExprType, exprs: Vec) -> Option { E::IsNotFalse => Some(Expr::IsNotFalse(Box::new(exprs[0].clone()))), E::Position => Some(Expr::Function(make_simple_func("position", &exprs))), E::RoundDigit => Some(Expr::Function(make_simple_func("round", &exprs))), + E::Pow => Some(Expr::Function(make_simple_func("pow", &exprs))), E::Repeat => Some(Expr::Function(make_simple_func("repeat", &exprs))), E::CharLength => Some(Expr::Function(make_simple_func("char_length", &exprs))), E::Substr => Some(Expr::Function(make_simple_func("substr", &exprs))), @@ -400,9 +616,14 @@ fn make_simple_func(func_name: &str, exprs: &[Expr]) -> Function { } /// This is the function that generate aggregate function. -/// DISTINCT , ORDER BY or FILTER is allowed in aggregation functions。 -/// Currently, distinct is allowed only, other and others rule is TODO: -fn make_agg_func(func_name: &str, exprs: &[Expr], distinct: bool) -> Function { +/// DISTINCT, ORDER BY or FILTER is allowed in aggregation functions。 +fn make_agg_func( + func_name: &str, + exprs: &[Expr], + distinct: bool, + filter: Option>, + order_by: Vec, +) -> Function { let args = exprs .iter() .map(|e| FunctionArg::Unnamed(FunctionArgExpr::Expr(e.clone()))) @@ -413,8 +634,8 @@ fn make_agg_func(func_name: &str, exprs: &[Expr], distinct: bool) -> Function { args, over: None, distinct, - order_by: vec![], - filter: None, + order_by, + filter, } } @@ -449,8 +670,14 @@ fn make_bin_op(func: ExprType, exprs: &[Expr]) -> Option { }) } +pub(crate) fn typed_null(ty: &DataType) -> Expr { + Expr::Cast { + expr: Box::new(sql_null()), + data_type: data_type_to_ast_data_type(ty), + } +} + /// Generates a `NULL` value. -/// TODO(Noel): Generate null for other scalar values. pub(crate) fn sql_null() -> Expr { Expr::Value(Value::Null) } @@ -488,7 +715,7 @@ pub fn print_function_table() -> String { .map(|sig| { format!( "{:?} CAST {:?} -> {:?}", - sig.context, sig.to_type, sig.from_type, + sig.context, sig.from_type, sig.to_type, ) }) .sorted() diff --git a/src/tests/sqlsmith/src/sql_gen/mod.rs b/src/tests/sqlsmith/src/sql_gen/mod.rs index 7e718c42e22ba..623873a55fcdd 100644 --- a/src/tests/sqlsmith/src/sql_gen/mod.rs +++ b/src/tests/sqlsmith/src/sql_gen/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -203,6 +203,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { columns: vec![], query, with_options: vec![], + emit_mode: None, }; (mview, table) } diff --git a/src/tests/sqlsmith/src/sql_gen/query.rs b/src/tests/sqlsmith/src/sql_gen/query.rs index 4cf2fdd5c6947..2ce67acdea7b0 100644 --- a/src/tests/sqlsmith/src/sql_gen/query.rs +++ b/src/tests/sqlsmith/src/sql_gen/query.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,27 +15,20 @@ //! Interface for generating a query //! We construct Query based on the AST representation, //! as defined in the [`risingwave_sqlparser`] module. -use std::sync::Arc; + use std::vec; use itertools::Itertools; use rand::prelude::SliceRandom; use rand::Rng; -use risingwave_common::types::struct_type::StructType; -use risingwave_common::types::{DataType, DataTypeName}; +use risingwave_common::types::DataType; use risingwave_sqlparser::ast::{ - Cte, Distinct, Expr, Ident, OrderByExpr, Query, Select, SelectItem, SetExpr, TableWithJoins, - With, + Cte, Distinct, Expr, Ident, Query, Select, SelectItem, SetExpr, TableWithJoins, With, }; use crate::sql_gen::utils::create_table_with_joins_from_table; use crate::sql_gen::{Column, SqlGenerator, SqlGeneratorContext, Table}; -static STRUCT_FIELD_NAMES: [&str; 26] = [ - "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", - "t", "u", "v", "w", "x", "y", "z", -]; - /// Generators impl<'a, R: Rng> SqlGenerator<'a, R> { /// Generates query expression and returns its @@ -51,8 +44,9 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { /// Generates a complex query which may recurse. /// e.g. through `gen_with` or other generated parts of the query. fn gen_complex_query(&mut self) -> (Query, Vec) { + let num_select_items = self.rng.gen_range(1..=4); let (with, with_tables) = self.gen_with(); - let (query, schema) = self.gen_set_expr(with_tables); + let (query, schema) = self.gen_set_expr(with_tables, num_select_items); let order_by = self.gen_order_by(); let has_order_by = !order_by.is_empty(); ( @@ -70,8 +64,9 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { /// Generates a simple query which will not recurse. fn gen_simple_query(&mut self) -> (Query, Vec) { + let num_select_items = self.rng.gen_range(1..=4); let with_tables = vec![]; - let (query, schema) = self.gen_set_expr(with_tables); + let (query, schema) = self.gen_set_expr(with_tables, num_select_items); ( Query { with: None, @@ -85,6 +80,24 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { ) } + /// Generates a query with a single SELECT item. e.g. SELECT v from t; + /// Returns the query and the SELECT column alias. + pub(crate) fn gen_single_item_query(&mut self) -> (Query, Column) { + let with_tables = vec![]; + let (query, schema) = self.gen_set_expr(with_tables, 1); + ( + Query { + with: None, + body: query, + order_by: vec![], + limit: None, + offset: None, + fetch: None, + }, + schema[0].clone(), + ) + } + /// Generates a query with local context. /// Used by `WITH`, `Table Subquery` in Relation pub(crate) fn gen_local_query(&mut self) -> (Query, Vec) { @@ -95,8 +108,6 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { } /// Generates a query with correlated context to ensure proper recursion. - /// Used by Exists `Subquery` - /// TODO: pub(crate) fn gen_correlated_query(&mut self) -> (Query, Vec) { let old_ctxt = self.clone_local_context(); let t = self.gen_query(); @@ -105,7 +116,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { } fn gen_with(&mut self) -> (Option, Vec
) { - match self.flip_coin() { + match self.rng.gen_bool(0.4) { true => (None, vec![]), false => { let (with, tables) = self.gen_with_inner(); @@ -137,32 +148,21 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { ) } - fn gen_set_expr(&mut self, with_tables: Vec
) -> (SetExpr, Vec) { + fn gen_set_expr( + &mut self, + with_tables: Vec
, + num_select_items: usize, + ) -> (SetExpr, Vec) { match self.rng.gen_range(0..=9) { + // TODO: Generate other `SetExpr` 0..=9 => { - let (select, schema) = self.gen_select_stmt(with_tables); + let (select, schema) = self.gen_select_stmt(with_tables, num_select_items); (SetExpr::Select(Box::new(select)), schema) } _ => unreachable!(), } } - fn gen_order_by(&mut self) -> Vec { - if self.bound_columns.is_empty() || !self.is_distinct_allowed { - return vec![]; - } - let mut order_by = vec![]; - while self.flip_coin() { - let column = self.bound_columns.choose(&mut self.rng).unwrap(); - order_by.push(OrderByExpr { - expr: Expr::Identifier(Ident::new(&column.name)), - asc: Some(self.rng.gen_bool(0.5)), - nulls_first: None, - }) - } - order_by - } - fn gen_limit(&mut self, has_order_by: bool) -> Option { if (!self.is_mview || has_order_by) && self.rng.gen_bool(0.2) { Some(self.rng.gen_range(0..=100).to_string()) @@ -171,13 +171,17 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { } } - fn gen_select_stmt(&mut self, with_tables: Vec
) -> (Select, Vec) { + fn gen_select_stmt( + &mut self, + with_tables: Vec
, + num_select_items: usize, + ) -> (Select, Vec) { // Generate random tables/relations first so that select items can refer to them. let from = self.gen_from(with_tables); let selection = self.gen_where(); let group_by = self.gen_group_by(); let having = self.gen_having(!group_by.is_empty()); - let (select_list, schema) = self.gen_select_list(); + let (select_list, schema) = self.gen_select_list(num_select_items); let select = Select { distinct: Distinct::All, projection: select_list, @@ -190,91 +194,16 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { (select, schema) } - fn gen_select_list(&mut self) -> (Vec, Vec) { - let items_num = self.rng.gen_range(1..=4); + fn gen_select_list(&mut self, num_select_items: usize) -> (Vec, Vec) { let can_agg = self.flip_coin(); let context = SqlGeneratorContext::new_with_can_agg(can_agg); - (0..items_num) + (0..num_select_items) .map(|i| self.gen_select_item(i, context)) .unzip() } - fn gen_list_data_type(&mut self, depth: usize) -> DataType { - DataType::List { - datatype: Box::new(self.gen_data_type_inner(depth)), - } - } - - fn gen_struct_data_type(&mut self, depth: usize) -> DataType { - let num_fields = self.rng.gen_range(1..10); - let fields = (0..num_fields) - .map(|_| self.gen_data_type_inner(depth)) - .collect(); - let field_names = STRUCT_FIELD_NAMES[0..num_fields] - .iter() - .map(|s| (*s).into()) - .collect(); - DataType::Struct(Arc::new(StructType { - fields, - field_names, - })) - } - - fn gen_data_type(&mut self) -> DataType { - // Depth of struct/list nesting - let depth = self.rng.gen_range(0..=1); - self.gen_data_type_inner(depth) - } - - fn gen_data_type_inner(&mut self, depth: usize) -> DataType { - use {DataType as S, DataTypeName as T}; - let mut candidate_ret_types = vec![ - T::Boolean, - T::Int16, - T::Int32, - T::Int64, - T::Decimal, - T::Float32, - T::Float64, - T::Varchar, - T::Date, - T::Timestamp, - // ENABLE: https://github.com/risingwavelabs/risingwave/issues/5826 - // T::Timestamptz, - T::Time, - T::Interval, - ]; - if depth > 0 { - candidate_ret_types.push(T::Struct); - candidate_ret_types.push(T::List); - } - - let ret_type = candidate_ret_types.choose(&mut self.rng).unwrap(); - - match ret_type { - T::Boolean => S::Boolean, - T::Int16 => S::Int16, - T::Int32 => S::Int32, - T::Int64 => S::Int64, - T::Decimal => S::Decimal, - T::Float32 => S::Float32, - T::Float64 => S::Float64, - T::Varchar => S::Varchar, - T::Date => S::Date, - T::Timestamp => S::Timestamp, - T::Timestamptz => S::Timestamptz, - T::Time => S::Time, - T::Interval => S::Interval, - T::Struct => self.gen_struct_data_type(depth - 1), - T::List => self.gen_list_data_type(depth - 1), - _ => unreachable!(), - } - } - - fn gen_select_item(&mut self, i: i32, context: SqlGeneratorContext) -> (SelectItem, Column) { - let ret_type = self.gen_data_type(); - - let expr = self.gen_expr(&ret_type, context); + fn gen_select_item(&mut self, i: usize, context: SqlGeneratorContext) -> (SelectItem, Column) { + let (ret_type, expr) = self.gen_arbitrary_expr(context); let alias = format!("col_{}", i); ( @@ -310,7 +239,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { // Generate CROSS JOIN let mut lateral_contexts = vec![]; - for _ in 0..self.tables.len() { + for _ in 0..usize::min(self.tables.len(), 5) { if self.flip_coin() { let (table_with_join, mut table) = self.gen_from_relation(); from.push(table_with_join); @@ -330,12 +259,14 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { } } + /// GROUP BY will constrain the generated columns. fn gen_group_by(&mut self) -> Vec { let mut available = self.bound_columns.clone(); if !available.is_empty() { available.shuffle(self.rng); - let n_group_by_cols = self.rng.gen_range(1..=available.len()); - let group_by_cols = available.drain(0..n_group_by_cols).collect_vec(); + let upper_bound = (available.len() + 1) / 2; + let n = self.rng.gen_range(1..=upper_bound); + let group_by_cols = available.drain(..n).collect_vec(); self.bound_columns = group_by_cols.clone(); group_by_cols .into_iter() diff --git a/src/tests/sqlsmith/src/sql_gen/relation.rs b/src/tests/sqlsmith/src/sql_gen/relation.rs index 5b64e3d8824f9..dc3609e854d60 100644 --- a/src/tests/sqlsmith/src/sql_gen/relation.rs +++ b/src/tests/sqlsmith/src/sql_gen/relation.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/sqlsmith/src/sql_gen/scalar.rs b/src/tests/sqlsmith/src/sql_gen/scalar.rs index fa347f3fb428c..daaa3bdeecd9d 100644 --- a/src/tests/sqlsmith/src/sql_gen/scalar.rs +++ b/src/tests/sqlsmith/src/sql_gen/scalar.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,10 +19,9 @@ use rand::distributions::Alphanumeric; use rand::prelude::SliceRandom; use rand::Rng; use risingwave_common::types::DataType; -use risingwave_sqlparser::ast::{DataType as AstDataType, Expr, Value}; +use risingwave_sqlparser::ast::{Array, DataType as AstDataType, Expr, Value}; -use crate::sql_gen::expr::sql_null; -use crate::sql_gen::types::data_type_to_ast_data_type; +use crate::sql_gen::expr::typed_null; use crate::sql_gen::SqlGenerator; impl<'a, R: Rng> SqlGenerator<'a, R> { @@ -35,37 +34,38 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { // NOTE(kwannoel): We generate Cast with NULL to avoid generating lots of ambiguous // expressions. For instance agg calls such as `max(NULL)` may be generated, // and coerced to VARCHAR, where we require a `NULL::int` instead. - return Expr::Cast { - expr: Box::new(sql_null()), - data_type: data_type_to_ast_data_type(typ), - }; + return typed_null(typ); } + // Scalars which may generate negative numbers are wrapped in + // `Nested` to ambiguity while parsing. + // e.g. -1 becomes -(1). + // See: https://github.com/risingwavelabs/risingwave/issues/4344 match *typ { - T::Int64 => Expr::Value(Value::Number( + T::Int64 => Expr::Nested(Box::new(Expr::Value(Value::Number( self.gen_int(i64::MIN as isize, i64::MAX as isize), - )), - T::Int32 => Expr::TypedString { + )))), + T::Int32 => Expr::Nested(Box::new(Expr::TypedString { data_type: AstDataType::Int, value: self.gen_int(i32::MIN as isize, i32::MAX as isize), - }, - T::Int16 => Expr::TypedString { + })), + T::Int16 => Expr::Nested(Box::new(Expr::TypedString { data_type: AstDataType::SmallInt, value: self.gen_int(i16::MIN as isize, i16::MAX as isize), - }, + })), T::Varchar => Expr::Value(Value::SingleQuotedString( (0..10) .map(|_| self.rng.sample(Alphanumeric) as char) .collect(), )), - T::Decimal => Expr::Value(Value::Number(self.gen_float())), - T::Float64 => Expr::TypedString { + T::Decimal => Expr::Nested(Box::new(Expr::Value(Value::Number(self.gen_float())))), + T::Float64 => Expr::Nested(Box::new(Expr::TypedString { data_type: AstDataType::Float(None), value: self.gen_float(), - }, - T::Float32 => Expr::TypedString { + })), + T::Float32 => Expr::Nested(Box::new(Expr::TypedString { data_type: AstDataType::Real, value: self.gen_float(), - }, + })), T::Boolean => Expr::Value(Value::Boolean(self.rng.gen_bool(0.5))), T::Date => Expr::TypedString { data_type: AstDataType::Date, @@ -83,13 +83,16 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { data_type: AstDataType::Timestamp(true), value: self.gen_temporal_scalar(typ), }, - T::Interval => Expr::TypedString { + T::Interval => Expr::Nested(Box::new(Expr::TypedString { data_type: AstDataType::Interval, value: self.gen_temporal_scalar(typ), - }, + })), T::List { datatype: ref ty } => { - let n = self.rng.gen_range(1..=100); // Avoid ambiguous type - Expr::Array(self.gen_simple_scalar_list(ty, n)) + let n = self.rng.gen_range(1..=4); // Avoid ambiguous type + Expr::Array(Array { + elem: self.gen_simple_scalar_list(ty, n), + named: true, + }) } // ENABLE: https://github.com/risingwavelabs/risingwave/issues/6934 // T::Struct(ref inner) => Expr::Row( @@ -99,7 +102,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { // .map(|typ| self.gen_simple_scalar(typ)) // .collect(), // ), - _ => sql_null(), + _ => typed_null(typ), } } @@ -108,31 +111,33 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { (0..n).map(|_| self.gen_simple_scalar(ty)).collect() } - fn gen_int(&mut self, _min: isize, max: isize) -> String { - let n = match self.rng.gen_range(0..=4) { - 0 => 0, - 1 => 1, - 2 => max, - // TODO: Negative numbers have a few issues. - // - Parsing, tracked by: . - // - Neg op with Interval, tracked by: - // 3 => i32::MIN as f64, - 3..=4 => self.rng.gen_range(1..max), + fn gen_int(&mut self, min: isize, max: isize) -> String { + // NOTE: Reduced chance for extreme values, + // since these tend to generate invalid expressions. + let n = match self.rng.gen_range(1..=100) { + 1..=5 => 0, + 6..=10 => 1, + 11..=15 => max, + 16..=20 => min, + 21..=25 => self.rng.gen_range(min + 1..0), + 26..=30 => self.rng.gen_range(1000..max), + 31..=100 => self.rng.gen_range(2..1000), _ => unreachable!(), }; n.to_string() } fn gen_float(&mut self) -> String { - let n = match self.rng.gen_range(0..=4) { - 0 => 0.0, - 1 => 1.0, - 2 => i32::MAX as f64, - // TODO: Negative numbers have a few issues. - // - Parsing, tracked by: . - // - Neg op with Interval, tracked by: - // 3 => i32::MIN as f64, - 3..=4 => self.rng.gen_range(1.0..i32::MAX as f64), + // NOTE: Reduced chance for extreme values, + // since these tend to generate invalid expressions. + let n = match self.rng.gen_range(1..=100) { + 1..=5 => 0.0, + 6..=10 => 1.0, + 11..=15 => i32::MAX as f64, + 16..=20 => i32::MIN as f64, + 21..=25 => self.rng.gen_range(i32::MIN + 1..0) as f64, + 26..=30 => self.rng.gen_range(1000..i32::MAX) as f64, + 31..=100 => self.rng.gen_range(2..1000) as f64, _ => unreachable!(), }; n.to_string() diff --git a/src/tests/sqlsmith/src/sql_gen/time_window.rs b/src/tests/sqlsmith/src/sql_gen/time_window.rs index d2e3eb981ebfa..0b638b8a7a96c 100644 --- a/src/tests/sqlsmith/src/sql_gen/time_window.rs +++ b/src/tests/sqlsmith/src/sql_gen/time_window.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tests/sqlsmith/src/sql_gen/types.rs b/src/tests/sqlsmith/src/sql_gen/types.rs index e1b98db85a9ae..670af90ce03d2 100644 --- a/src/tests/sqlsmith/src/sql_gen/types.rs +++ b/src/tests/sqlsmith/src/sql_gen/types.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,11 +14,12 @@ //! This module contains datatypes and functions which can be generated by sqlsmith. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::sync::LazyLock; use itertools::Itertools; use risingwave_common::types::{DataType, DataTypeName}; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_expr::expr::AggKind; use risingwave_expr::sig::agg::{agg_func_sigs, AggFuncSig as RwAggFuncSig}; use risingwave_expr::sig::cast::{cast_sigs, CastContext, CastSig as RwCastSig}; @@ -42,11 +43,12 @@ pub(super) fn data_type_to_ast_data_type(data_type: &DataType) -> AstDataType { DataType::Timestamptz => AstDataType::Timestamp(true), DataType::Time => AstDataType::Time(false), DataType::Interval => AstDataType::Interval, + DataType::Jsonb => AstDataType::Custom(vec!["JSONB".into()].into()), DataType::Struct(inner) => AstDataType::Struct( inner .field_names .iter() - .zip_eq(inner.fields.iter()) + .zip_eq_fast(inner.fields.iter()) .map(|(name, typ)| StructField { name: name.as_str().into(), data_type: data_type_to_ast_data_type(typ), @@ -169,6 +171,18 @@ pub(crate) static FUNC_TABLE: LazyLock>> = LazyLo funcs }); +/// Set of invariant functions +// ENABLE: https://github.com/risingwavelabs/risingwave/issues/5826 +pub(crate) static INVARIANT_FUNC_SET: LazyLock> = LazyLock::new(|| { + func_sigs() + .map(|sig| sig.func) + .counts() + .into_iter() + .filter(|(_key, count)| *count == 1) + .map(|(key, _)| key) + .collect() +}); + /// Table which maps aggregate functions' return types to possible function signatures. // ENABLE: https://github.com/risingwavelabs/risingwave/issues/5826 pub(crate) static AGG_FUNC_TABLE: LazyLock>> = @@ -191,14 +205,28 @@ pub(crate) static AGG_FUNC_TABLE: LazyLock>> = /// NOTE: We avoid cast from varchar to other datatypes apart from itself. /// This is because arbitrary strings may not be able to cast, /// creating large number of invalid queries. -pub(crate) static CAST_TABLE: LazyLock>> = LazyLock::new(|| { - let mut casts = HashMap::>::new(); - cast_sigs() - .filter_map(|cast| cast.try_into().ok()) - .filter(|cast: &CastSig| { - cast.context == CastContext::Explicit || cast.context == CastContext::Implicit - }) - .filter(|cast| cast.from_type != DataType::Varchar || cast.to_type == DataType::Varchar) - .for_each(|cast| casts.entry(cast.to_type.clone()).or_default().push(cast)); - casts -}); +pub(crate) static EXPLICIT_CAST_TABLE: LazyLock>> = + LazyLock::new(|| { + let mut casts = HashMap::>::new(); + cast_sigs() + .filter_map(|cast| cast.try_into().ok()) + .filter(|cast: &CastSig| cast.context == CastContext::Explicit) + .filter(|cast| cast.from_type != DataType::Varchar || cast.to_type == DataType::Varchar) + .for_each(|cast| casts.entry(cast.to_type.clone()).or_default().push(cast)); + casts + }); + +/// Build a cast map from return types to viable cast-signatures. +/// NOTE: We avoid cast from varchar to other datatypes apart from itself. +/// This is because arbitrary strings may not be able to cast, +/// creating large number of invalid queries. +pub(crate) static IMPLICIT_CAST_TABLE: LazyLock>> = + LazyLock::new(|| { + let mut casts = HashMap::>::new(); + cast_sigs() + .filter_map(|cast| cast.try_into().ok()) + .filter(|cast: &CastSig| cast.context == CastContext::Implicit) + .filter(|cast| cast.from_type != DataType::Varchar || cast.to_type == DataType::Varchar) + .for_each(|cast| casts.entry(cast.to_type.clone()).or_default().push(cast)); + casts + }); diff --git a/src/tests/sqlsmith/src/sql_gen/utils.rs b/src/tests/sqlsmith/src/sql_gen/utils.rs index 0708148b04013..b9f5b2db4b71c 100644 --- a/src/tests/sqlsmith/src/sql_gen/utils.rs +++ b/src/tests/sqlsmith/src/sql_gen/utils.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -45,7 +45,6 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { self.bound_columns = old_cols; } - // TODO: pub(crate) fn clone_local_context(&mut self) -> Context { let current_bound_relations = self.bound_relations.clone(); let current_bound_columns = self.bound_columns.clone(); diff --git a/src/tests/sqlsmith/src/validation.rs b/src/tests/sqlsmith/src/validation.rs index 0aa37f6568335..1ec18830bfa25 100644 --- a/src/tests/sqlsmith/src/validation.rs +++ b/src/tests/sqlsmith/src/validation.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,12 +15,19 @@ //! Provides validation logic for expected errors. use risingwave_expr::ExprError; -fn is_division_by_zero_err(db_error: &str) -> bool { - db_error.contains(&ExprError::DivisionByZero.to_string()) +/// Ignore errors related to `0`. +fn is_zero_err(db_error: &str) -> bool { + db_error.contains(&ExprError::DivisionByZero.to_string()) || db_error.contains("can't be zero") } +/// `Casting to u32 out of range` occurs when we have functions +/// which expect non-negative arguments, +/// e.g. `select 222 << -1` +// NOTE: If this error occurs too often, perhaps it is better to +// wrap call sites with `abs(rhs)`, e.g. 222 << abs(-1); fn is_numeric_out_of_range_err(db_error: &str) -> bool { db_error.contains(&ExprError::NumericOutOfRange.to_string()) + || db_error.contains("Casting to u32 out of range") } /// Skip queries with unimplemented features @@ -46,26 +53,31 @@ fn is_nested_loop_join_error(db_error: &str) -> bool { db_error.contains("Not supported: streaming nested-loop join") } -// FIXME: -// This error should not occur, remove once issue is fixed. -fn is_hash_shuffle_error(db_error: &str) -> bool { - db_error.contains("broken hash_shuffle_channel") -} - fn is_subquery_unnesting_error(db_error: &str) -> bool { db_error.contains("Subquery can not be unnested") } +/// Can't avoid numeric overflows, we do not eval const expr +fn is_numeric_overflow_error(db_error: &str) -> bool { + db_error.contains("Number") && db_error.contains("overflows") +} + +/// Negative substr error +fn is_neg_substr_error(db_error: &str) -> bool { + db_error.contains("length in substr should be non-negative") +} + /// Certain errors are permitted to occur. This is because: /// 1. It is more complex to generate queries without these errors. /// 2. These errors seldom occur, skipping them won't affect overall effectiveness of sqlsmith. pub fn is_permissible_error(db_error: &str) -> bool { is_numeric_out_of_range_err(db_error) - || is_division_by_zero_err(db_error) + || is_zero_err(db_error) || is_unimplemented_error(db_error) || not_unique_error(db_error) || is_window_error(db_error) - || is_hash_shuffle_error(db_error) || is_nested_loop_join_error(db_error) || is_subquery_unnesting_error(db_error) + || is_numeric_overflow_error(db_error) + || is_neg_substr_error(db_error) } diff --git a/src/tests/sqlsmith/tests/frontend/mod.rs b/src/tests/sqlsmith/tests/frontend/mod.rs index a6c7bf14b15f3..373830bad64f4 100644 --- a/src/tests/sqlsmith/tests/frontend/mod.rs +++ b/src/tests/sqlsmith/tests/frontend/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -46,7 +46,7 @@ pub struct SqlsmithEnv { /// Skip status is required, so that we know if a SQL statement writing to the database was skipped. /// Then, we can infer the correct state of the database. async fn handle(session: Arc, stmt: Statement, sql: &str) -> Result { - let result = handler::handle(session.clone(), stmt, sql, false) + let result = handler::handle(session.clone(), stmt, sql, vec![]) .await .map(|_| ()) .map_err(|e| format!("Error Reason:\n{}", e).into()); @@ -109,6 +109,7 @@ async fn create_tables( // Generate some mviews for i in 0..10 { let (sql, table) = mview_sql_gen(rng, tables.clone(), &format!("m{}", i)); + reproduce_failing_queries(&setup_sql, &sql); setup_sql.push_str(&format!("{};", &sql)); let stmts = parse_sql(&sql); let stmt = stmts[0].clone(); @@ -136,8 +137,9 @@ fn round_trip_parse_test(sql: &str) -> Result { let round_trip = parse_first_sql_stmt(&unparse(parse_first_sql_stmt(sql))); if start != round_trip { Err(format!( - "Roundtrip test failed\nStart: {}\nRoundtrip: {}", - start, round_trip + "Roundtrip test failed\nStart: {}\nRoundtrip: {}\n\ + Start AST: {:?}\nRoundtrip AST: {:?}", + sql, round_trip, start, round_trip ) .into()) } else { diff --git a/src/tests/sqlsmith/tests/test_runner.rs b/src/tests/sqlsmith/tests/test_runner.rs index d8d61acb1320e..d391bdb4d33ca 100644 --- a/src/tests/sqlsmith/tests/test_runner.rs +++ b/src/tests/sqlsmith/tests/test_runner.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/tracing/Cargo.toml b/src/tracing/Cargo.toml index e2bcceb139ff4..dd53225edcb2c 100644 --- a/src/tracing/Cargo.toml +++ b/src/tracing/Cargo.toml @@ -8,6 +8,12 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = "1" futures = { version = "0.3", default-features = false, features = ["alloc", "executor"] } diff --git a/src/tracing/src/lib.rs b/src/tracing/src/lib.rs index 9be332c2f9d81..9ff2d8d08b16d 100644 --- a/src/tracing/src/lib.rs +++ b/src/tracing/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/udf/Cargo.toml b/src/udf/Cargo.toml index 9d2083be43590..951a93ced41db 100644 --- a/src/udf/Cargo.toml +++ b/src/udf/Cargo.toml @@ -4,11 +4,16 @@ version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] -# need this latest PR: https://github.com/apache/arrow-rs/pull/3391 -arrow-array = { git = "https://github.com/apache/arrow-rs", rev = "6139d898" } -arrow-flight = { git = "https://github.com/apache/arrow-rs", rev = "6139d898" } -arrow-schema = { git = "https://github.com/apache/arrow-rs", rev = "6139d898" } +arrow-array = "31" +arrow-flight = "31" +arrow-schema = "31" futures-util = "0.3.25" thiserror = "1" tokio = { version = "0.2", package = "madsim-tokio", features = ["rt", "macros"] } diff --git a/src/udf/examples/client.rs b/src/udf/examples/client.rs index b30ce36facb19..034d3816eab48 100644 --- a/src/udf/examples/client.rs +++ b/src/udf/examples/client.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/udf/src/lib.rs b/src/udf/src/lib.rs index 88c6f7ea4a9af..6d8a2bb8f3250 100644 --- a/src/udf/src/lib.rs +++ b/src/udf/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/utils/async_stack_trace/Cargo.toml b/src/utils/async_stack_trace/Cargo.toml index b6f1dae6ed59f..22655e91c52d7 100644 --- a/src/utils/async_stack_trace/Cargo.toml +++ b/src/utils/async_stack_trace/Cargo.toml @@ -4,10 +4,15 @@ version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] coarsetime = "0.1" futures = { version = "0.3", default-features = false, features = ["alloc"] } -futures-async-stream = "0.2" indextree = "4.4" itertools = "0.10" pin-project = "1" @@ -15,5 +20,8 @@ tokio = { version = "0.2", package = "madsim-tokio", features = ["rt", "rt-multi tracing = "0.1" triomphe = "0.1" +[dev-dependencies] +futures-async-stream = "0.2" + [target.'cfg(not(madsim))'.dependencies] workspace-hack = { path = "../../workspace-hack" } diff --git a/src/utils/async_stack_trace/src/context.rs b/src/utils/async_stack_trace/src/context.rs index daf24a613b469..b680383137c54 100644 --- a/src/utils/async_stack_trace/src/context.rs +++ b/src/utils/async_stack_trace/src/context.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/utils/async_stack_trace/src/lib.rs b/src/utils/async_stack_trace/src/lib.rs index dc5a4c11ec3b5..f3ee15962e58f 100644 --- a/src/utils/async_stack_trace/src/lib.rs +++ b/src/utils/async_stack_trace/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/utils/async_stack_trace/src/manager.rs b/src/utils/async_stack_trace/src/manager.rs index 993c83889b5ed..02b1fa37244ce 100644 --- a/src/utils/async_stack_trace/src/manager.rs +++ b/src/utils/async_stack_trace/src/manager.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/utils/async_stack_trace/src/tests.rs b/src/utils/async_stack_trace/src/tests.rs index f6c971a71e304..0197acdcacf8f 100644 --- a/src/utils/async_stack_trace/src/tests.rs +++ b/src/utils/async_stack_trace/src/tests.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/utils/local_stats_alloc/Cargo.toml b/src/utils/local_stats_alloc/Cargo.toml index d5c35bbfef2c9..42a2118a94e73 100644 --- a/src/utils/local_stats_alloc/Cargo.toml +++ b/src/utils/local_stats_alloc/Cargo.toml @@ -13,3 +13,9 @@ description = "Local allocator with statistics" workspace-hack = { path = "../../workspace-hack" } [dev-dependencies] + +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] diff --git a/src/utils/local_stats_alloc/src/lib.rs b/src/utils/local_stats_alloc/src/lib.rs index f00a390550821..82d2effbb5413 100644 --- a/src/utils/local_stats_alloc/src/lib.rs +++ b/src/utils/local_stats_alloc/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/utils/pgwire/Cargo.toml b/src/utils/pgwire/Cargo.toml index 1b1c7aef2dff2..381aa8e84d20e 100644 --- a/src/utils/pgwire/Cargo.toml +++ b/src/utils/pgwire/Cargo.toml @@ -8,6 +8,12 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] anyhow = { version = "1.0", default-features = false } async-trait = "0.1" @@ -21,6 +27,7 @@ pg_interval = "0.4" postgres-types = { version = "0.2.4", features = ["derive","with-chrono-0_4"] } regex = "1.5" risingwave_common = { path = "../../common" } +risingwave_sqlparser = { path = "../../sqlparser" } rust_decimal = { version = "1", features = ["db-tokio-postgres"] } thiserror = "1" tokio = { version = "0.2", package = "madsim-tokio", features = ["rt", "macros"] } diff --git a/src/utils/pgwire/src/error.rs b/src/utils/pgwire/src/error.rs index 24730e0a94ea1..63f4745238fde 100644 --- a/src/utils/pgwire/src/error.rs +++ b/src/utils/pgwire/src/error.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/utils/pgwire/src/error_or_notice.rs b/src/utils/pgwire/src/error_or_notice.rs index 9e7a9c0dc989f..cc527e2fc614b 100644 --- a/src/utils/pgwire/src/error_or_notice.rs +++ b/src/utils/pgwire/src/error_or_notice.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/utils/pgwire/src/lib.rs b/src/utils/pgwire/src/lib.rs index b354fd5df2c57..19a4d3f5a6dfd 100644 --- a/src/utils/pgwire/src/lib.rs +++ b/src/utils/pgwire/src/lib.rs @@ -1,5 +1,4 @@ -#![feature(io_error_other)] -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,7 +11,13 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + +#![feature(io_error_other)] #![feature(lint_reasons, once_cell)] +#![feature(trait_alias)] +#![feature(result_option_inspect)] +#![feature(iterator_try_collect)] +#![feature(trusted_len)] #![expect(clippy::doc_markdown, reason = "FIXME: later")] pub mod error; diff --git a/src/utils/pgwire/src/pg_extended.rs b/src/utils/pgwire/src/pg_extended.rs index 6ec68e28f8ff5..ed35975d81fb4 100644 --- a/src/utils/pgwire/src/pg_extended.rs +++ b/src/utils/pgwire/src/pg_extended.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,10 +21,11 @@ use anyhow::anyhow; use bytes::Bytes; use futures::stream::FusedStream; use futures::{Stream, StreamExt, TryStreamExt}; -use itertools::{zip_eq, Itertools}; +use itertools::Itertools; use postgres_types::{FromSql, Type}; use regex::Regex; use risingwave_common::types::DataType; +use risingwave_common::util::iter_util::ZipEqFast; use tokio::io::{AsyncRead, AsyncWrite}; use crate::error::{PsqlError, PsqlResult}; @@ -33,7 +34,7 @@ use crate::pg_message::{BeCommandCompleteMessage, BeMessage}; use crate::pg_protocol::{cstr_to_str, Conn}; use crate::pg_response::{PgResponse, RowSetResult}; use crate::pg_server::{Session, SessionManager}; -use crate::types::Row; +use crate::types::{Format, FormatIterator, Row}; #[derive(Default)] pub struct PgStatement { @@ -78,28 +79,33 @@ impl PgStatement { &self, portal_name: String, params: &[Bytes], - result_format: bool, - param_format: bool, + result_formats: Vec, + param_formats: Vec, ) -> PsqlResult> where VS: Stream + Unpin + Send, { - let instance_query_string = self.prepared_statement.instance(params, param_format)?; + let instance_query_string = self.prepared_statement.instance(params, ¶m_formats)?; - let row_description: Vec = if result_format { + let format_iter = FormatIterator::new(&result_formats, self.row_description.len()) + .map_err(|err| PsqlError::Internal(anyhow!(err)))?; + let row_description: Vec = { let mut row_description = self.row_description.clone(); row_description .iter_mut() - .for_each(|desc| desc.set_to_binary()); + .zip_eq_fast(format_iter) + .for_each(|(desc, format)| { + if let Format::Binary = format { + desc.set_to_binary(); + } + }); row_description - } else { - self.row_description.clone() }; Ok(PgPortal { name: portal_name, query_string: instance_query_string, - result_format, + result_formats, is_query: self.is_query, row_description, result: None, @@ -120,7 +126,7 @@ where { name: String, query_string: String, - result_format: bool, + result_formats: Vec, is_query: bool, row_description: Vec, result: Option>, @@ -156,7 +162,7 @@ where result } else { let result = session - .run_statement(self.query_string.as_str(), self.result_format) + .run_statement(self.query_string.as_str(), self.result_formats.clone()) .await .map_err(|err| PsqlError::ExecuteError(err))?; self.result = Some(result); @@ -172,6 +178,9 @@ where } if result.is_empty() { + // Run the callback before sending the response. + result.run_callback().await?; + msg_stream.write_no_flush(&BeMessage::EmptyQueryResponse)?; } else if result.is_query() { // fetch row data @@ -206,6 +215,9 @@ where query_end = true; } if query_end { + // Run the callback before sending the `CommandComplete` message. + result.run_callback().await?; + msg_stream.write_no_flush(&BeMessage::CommandComplete( BeCommandCompleteMessage { stmt_type: result.get_stmt_type(), @@ -216,6 +228,9 @@ where msg_stream.write_no_flush(&BeMessage::PortalSuspended)?; } } else { + // Run the callback before sending the `CommandComplete` message. + result.run_callback().await?; + msg_stream.write_no_flush(&BeMessage::CommandComplete(BeCommandCompleteMessage { stmt_type: result.get_stmt_type(), rows_cnt: result @@ -355,153 +370,142 @@ impl PreparedStatement { }) } - /// parse_params is to parse raw_params:&[Bytes] into params:[String]. - /// The param produced by this function will be used in the PreparedStatement. - /// - /// type_description is a list of type oids. - /// raw_params is a list of raw params. - /// param_format is format code : false for text, true for binary. - /// - /// # Example - /// - /// ```ignore - /// let raw_params = vec!["A".into(), "B".into(), "C".into()]; - /// let type_description = vec![DataType::Varchar; 3]; - /// let params = parse_params(&type_description, &raw_params,false); - /// assert_eq!(params, vec!["'A'", "'B'", "'C'"]) - /// - /// let raw_params = vec!["1".into(), "2".into(), "3.1".into()]; - /// let type_description = vec![DataType::INT,DataType::INT,DataType::FLOAT4]; - /// let params = parse_params(&type_description, &raw_params,false); - /// assert_eq!(params, vec!["1::INT", "2::INT", "3.1::FLOAT4"]) - /// ``` fn parse_params( type_description: &[DataType], raw_params: &[Bytes], - param_format: bool, + param_formats: &[Format], ) -> PsqlResult> { + // Invariant check if type_description.len() != raw_params.len() { return Err(PsqlError::Internal(anyhow!( "The number of params doesn't match the number of types" ))); } - assert_eq!(type_description.len(), raw_params.len()); + if raw_params.is_empty() { + return Ok(vec![]); + } let mut params = Vec::with_capacity(raw_params.len()); - // BINARY FORMAT PARAMS let place_hodler = Type::ANY; - for (type_oid, raw_param) in zip_eq(type_description.iter(), raw_params.iter()) { + let format_iter = FormatIterator::new(param_formats, raw_params.len()) + .map_err(|err| PsqlError::Internal(anyhow!(err)))?; + + for ((type_oid, raw_param), param_format) in type_description + .iter() + .zip_eq_fast(raw_params.iter()) + .zip_eq_fast(format_iter) + { let str = match type_oid { DataType::Varchar | DataType::Bytea => { format!("'{}'", cstr_to_str(raw_param).unwrap().replace('\'', "''")) } - DataType::Boolean => { - if param_format { - bool::from_sql(&place_hodler, raw_param) - .unwrap() - .to_string() - } else { - cstr_to_str(raw_param).unwrap().to_string() - } - } - DataType::Int64 => { - if param_format { - i64::from_sql(&place_hodler, raw_param).unwrap().to_string() - } else { - cstr_to_str(raw_param).unwrap().to_string() - } - } - DataType::Int16 => { - if param_format { - i16::from_sql(&place_hodler, raw_param).unwrap().to_string() - } else { - cstr_to_str(raw_param).unwrap().to_string() - } - } - DataType::Int32 => { - if param_format { - i32::from_sql(&place_hodler, raw_param).unwrap().to_string() - } else { - cstr_to_str(raw_param).unwrap().to_string() - } - } + DataType::Boolean => match param_format { + Format::Binary => bool::from_sql(&place_hodler, raw_param) + .unwrap() + .to_string(), + Format::Text => cstr_to_str(raw_param).unwrap().to_string(), + }, + DataType::Int64 => match param_format { + Format::Binary => i64::from_sql(&place_hodler, raw_param).unwrap().to_string(), + Format::Text => cstr_to_str(raw_param).unwrap().to_string(), + }, + DataType::Int16 => match param_format { + Format::Binary => i16::from_sql(&place_hodler, raw_param).unwrap().to_string(), + Format::Text => cstr_to_str(raw_param).unwrap().to_string(), + }, + DataType::Int32 => match param_format { + Format::Binary => i32::from_sql(&place_hodler, raw_param).unwrap().to_string(), + Format::Text => cstr_to_str(raw_param).unwrap().to_string(), + }, DataType::Float32 => { - let tmp = if param_format { - f32::from_sql(&place_hodler, raw_param).unwrap().to_string() - } else { - cstr_to_str(raw_param).unwrap().to_string() + let tmp = match param_format { + Format::Binary => { + f32::from_sql(&place_hodler, raw_param).unwrap().to_string() + } + Format::Text => cstr_to_str(raw_param).unwrap().to_string(), }; format!("'{}'::FLOAT4", tmp) } DataType::Float64 => { - let tmp = if param_format { - f64::from_sql(&place_hodler, raw_param).unwrap().to_string() - } else { - cstr_to_str(raw_param).unwrap().to_string() + let tmp = match param_format { + Format::Binary => { + f64::from_sql(&place_hodler, raw_param).unwrap().to_string() + } + Format::Text => cstr_to_str(raw_param).unwrap().to_string(), }; format!("'{}'::FLOAT8", tmp) } DataType::Date => { - let tmp = if param_format { - chrono::NaiveDate::from_sql(&place_hodler, raw_param) + let tmp = match param_format { + Format::Binary => chrono::NaiveDate::from_sql(&place_hodler, raw_param) .unwrap() - .to_string() - } else { - cstr_to_str(raw_param).unwrap().to_string() + .to_string(), + Format::Text => cstr_to_str(raw_param).unwrap().to_string(), }; format!("'{}'::DATE", tmp) } DataType::Time => { - let tmp = if param_format { - chrono::NaiveTime::from_sql(&place_hodler, raw_param) + let tmp = match param_format { + Format::Binary => chrono::NaiveTime::from_sql(&place_hodler, raw_param) .unwrap() - .to_string() - } else { - cstr_to_str(raw_param).unwrap().to_string() + .to_string(), + Format::Text => cstr_to_str(raw_param).unwrap().to_string(), }; format!("'{}'::TIME", tmp) } DataType::Timestamp => { - let tmp = if param_format { - chrono::NaiveDateTime::from_sql(&place_hodler, raw_param) + let tmp = match param_format { + Format::Binary => chrono::NaiveDateTime::from_sql(&place_hodler, raw_param) .unwrap() - .to_string() - } else { - cstr_to_str(raw_param).unwrap().to_string() + .to_string(), + Format::Text => cstr_to_str(raw_param).unwrap().to_string(), }; format!("'{}'::TIMESTAMP", tmp) } DataType::Decimal => { - let tmp = if param_format { - rust_decimal::Decimal::from_sql(&place_hodler, raw_param) + let tmp = match param_format { + Format::Binary => rust_decimal::Decimal::from_sql(&place_hodler, raw_param) .unwrap() - .to_string() - } else { - cstr_to_str(raw_param).unwrap().to_string() + .to_string(), + Format::Text => cstr_to_str(raw_param).unwrap().to_string(), }; format!("'{}'::DECIMAL", tmp) } DataType::Timestamptz => { - let tmp = if param_format { - chrono::DateTime::::from_sql(&place_hodler, raw_param) - .unwrap() - .to_string() - } else { - cstr_to_str(raw_param).unwrap().to_string() + let tmp = match param_format { + Format::Binary => { + chrono::DateTime::::from_sql(&place_hodler, raw_param) + .unwrap() + .to_string() + } + Format::Text => cstr_to_str(raw_param).unwrap().to_string(), }; format!("'{}'::TIMESTAMPTZ", tmp) } DataType::Interval => { - let tmp = if param_format { - pg_interval::Interval::from_sql(&place_hodler, raw_param) + let tmp = match param_format { + Format::Binary => pg_interval::Interval::from_sql(&place_hodler, raw_param) .unwrap() - .to_postgres() - } else { - cstr_to_str(raw_param).unwrap().to_string() + .to_postgres(), + Format::Text => cstr_to_str(raw_param).unwrap().to_string(), }; format!("'{}'::INTERVAL", tmp) } + DataType::Jsonb => { + let tmp = match param_format { + Format::Binary => { + use risingwave_common::types::to_text::ToText as _; + use risingwave_common::types::Scalar as _; + risingwave_common::array::JsonbVal::value_deserialize(raw_param) + .unwrap() + .as_scalar_ref() + .to_text_with_type(&DataType::Jsonb) + } + Format::Text => cstr_to_str(raw_param).unwrap().to_string(), + }; + format!("'{}'::JSONB", tmp) + } DataType::Struct(_) | DataType::List { .. } => { return Err(PsqlError::Internal(anyhow!( "Unsupported param type {:?}", @@ -537,6 +541,7 @@ impl PreparedStatement { params.push("'2022-10-01 12:00:00+01:00'::timestamptz".to_string()) } DataType::Interval => params.push("'2 months ago'::interval".to_string()), + DataType::Jsonb => params.push("'null'::JSONB".to_string()), DataType::Struct(_) | DataType::List { .. } => { return Err(PsqlError::Internal(anyhow!( "Unsupported param type {:?}", @@ -593,8 +598,8 @@ impl PreparedStatement { Ok(self.replace_params(&default_params)) } - pub fn instance(&self, raw_params: &[Bytes], param_format: bool) -> PsqlResult { - let params = Self::parse_params(&self.param_types, raw_params, param_format)?; + pub fn instance(&self, raw_params: &[Bytes], param_formats: &[Format]) -> PsqlResult { + let params = Self::parse_params(&self.param_types, raw_params, param_formats)?; Ok(self.replace_params(¶ms)) } } @@ -611,6 +616,7 @@ mod tests { use tokio_postgres::types::{ToSql, Type}; use crate::pg_extended::PreparedStatement; + use crate::types::Format; #[test] fn test_prepared_statement_without_param() { @@ -618,7 +624,7 @@ mod tests { let prepared_statement = PreparedStatement::parse_statement(raw_statement, vec![]).unwrap(); let default_sql = prepared_statement.instance_default().unwrap(); assert!("SELECT * FROM test_table" == default_sql); - let sql = prepared_statement.instance(&[], false).unwrap(); + let sql = prepared_statement.instance(&[], &[]).unwrap(); assert!("SELECT * FROM test_table" == sql); } @@ -630,7 +636,7 @@ mod tests { .unwrap(); let default_sql = prepared_statement.instance_default().unwrap(); assert!("SELECT * FROM test_table WHERE id = 0::INT" == default_sql); - let sql = prepared_statement.instance(&["1".into()], false).unwrap(); + let sql = prepared_statement.instance(&["1".into()], &[]).unwrap(); assert!("SELECT * FROM test_table WHERE id = 1" == sql); let raw_statement = "INSERT INTO test (index,data) VALUES ($1,$2)".to_string(); @@ -642,7 +648,7 @@ mod tests { let default_sql = prepared_statement.instance_default().unwrap(); assert!("INSERT INTO test (index,data) VALUES (0::INT,'0')" == default_sql); let sql = prepared_statement - .instance(&["1".into(), "DATA".into()], false) + .instance(&["1".into(), "DATA".into()], &[]) .unwrap(); assert!("INSERT INTO test (index,data) VALUES (1,'DATA')" == sql); @@ -655,7 +661,7 @@ mod tests { let default_sql = prepared_statement.instance_default().unwrap(); assert!("UPDATE COFFEES SET SALES = 0::INT WHERE COF_NAME LIKE '0'" == default_sql); let sql = prepared_statement - .instance(&["1".into(), "DATA".into()], false) + .instance(&["1".into(), "DATA".into()], &[]) .unwrap(); assert!("UPDATE COFFEES SET SALES = 1 WHERE COF_NAME LIKE 'DATA'" == sql); @@ -672,7 +678,7 @@ mod tests { let default_sql = prepared_statement.instance_default().unwrap(); assert!("SELECT * FROM test_table WHERE id = 0::INT AND name = '0'" == default_sql); let sql = prepared_statement - .instance(&["1".into(), "DATA".into(), "NAME".into()], false) + .instance(&["1".into(), "DATA".into(), "NAME".into()], &[]) .unwrap(); assert!("SELECT * FROM test_table WHERE id = 1 AND name = 'NAME'" == sql); } @@ -683,7 +689,7 @@ mod tests { let prepared_statement = PreparedStatement::parse_statement(raw_statement, vec![]).unwrap(); let default_sql = prepared_statement.instance_default().unwrap(); assert!("SELECT * FROM test_table WHERE id = 0::INT" == default_sql); - let sql = prepared_statement.instance(&["1".into()], false).unwrap(); + let sql = prepared_statement.instance(&["1".into()], &[]).unwrap(); assert!("SELECT * FROM test_table WHERE id = 1" == sql); let raw_statement = @@ -692,7 +698,7 @@ mod tests { let default_sql = prepared_statement.instance_default().unwrap(); assert!("INSERT INTO test (index,data) VALUES (0::INT,'0')" == default_sql); let sql = prepared_statement - .instance(&["1".into(), "DATA".into()], false) + .instance(&["1".into(), "DATA".into()], &[]) .unwrap(); assert!("INSERT INTO test (index,data) VALUES (1,'DATA')" == sql); @@ -702,7 +708,7 @@ mod tests { let default_sql = prepared_statement.instance_default().unwrap(); assert!("UPDATE COFFEES SET SALES = 0::INT WHERE COF_NAME LIKE '0'" == default_sql); let sql = prepared_statement - .instance(&["1".into(), "DATA".into()], false) + .instance(&["1".into(), "DATA".into()], &[]) .unwrap(); assert!("UPDATE COFFEES SET SALES = 1 WHERE COF_NAME LIKE 'DATA'" == sql); } @@ -717,7 +723,7 @@ mod tests { let default_sql = prepared_statement.instance_default().unwrap(); assert!("SELECT * FROM test_table WHERE id = 0::INT AND name = '0'" == default_sql); let sql = prepared_statement - .instance(&["1".into(), "DATA".into()], false) + .instance(&["1".into(), "DATA".into()], &[]) .unwrap(); assert!("SELECT * FROM test_table WHERE id = 1 AND name = 'DATA'" == sql); @@ -728,7 +734,7 @@ mod tests { let default_sql = prepared_statement.instance_default().unwrap(); assert!("INSERT INTO test (index,data) VALUES (0::INT,'0')" == default_sql); let sql = prepared_statement - .instance(&["1".into(), "DATA".into()], false) + .instance(&["1".into(), "DATA".into()], &[]) .unwrap(); assert!("INSERT INTO test (index,data) VALUES (1,'DATA')" == sql); @@ -740,14 +746,14 @@ mod tests { let default_sql = prepared_statement.instance_default().unwrap(); assert!("UPDATE COFFEES SET SALES = 0::INT WHERE COF_NAME LIKE '0'" == default_sql); let sql = prepared_statement - .instance(&["1".into(), "DATA".into()], false) + .instance(&["1".into(), "DATA".into()], &[]) .unwrap(); assert!("UPDATE COFFEES SET SALES = 1 WHERE COF_NAME LIKE 'DATA'" == sql); let raw_statement = "SELECT $1,$2;".to_string(); let prepared_statement = PreparedStatement::parse_statement(raw_statement, vec![]).unwrap(); let sql = prepared_statement - .instance(&["test$2".into(), "test$1".into()], false) + .instance(&["test$2".into(), "test$1".into()], &[]) .unwrap(); assert!("SELECT 'test$2','test$1';" == sql); @@ -756,7 +762,7 @@ mod tests { PreparedStatement::parse_statement(raw_statement, vec![DataType::INT32.to_oid()]) .unwrap(); let sql = prepared_statement - .instance(&["1".into(), "DATA".into()], false) + .instance(&["1".into(), "DATA".into()], &[]) .unwrap(); assert!("SELECT 1,1,'DATA','DATA';" == sql); } @@ -765,20 +771,17 @@ mod tests { fn test_parse_params_text() { let raw_params = vec!["A".into(), "B".into(), "C".into()]; let type_description = vec![DataType::Varchar; 3]; - let params = - PreparedStatement::parse_params(&type_description, &raw_params, false).unwrap(); + let params = PreparedStatement::parse_params(&type_description, &raw_params, &[]).unwrap(); assert_eq!(params, vec!["'A'", "'B'", "'C'"]); let raw_params = vec!["false".into(), "true".into()]; let type_description = vec![DataType::Boolean; 2]; - let params = - PreparedStatement::parse_params(&type_description, &raw_params, false).unwrap(); + let params = PreparedStatement::parse_params(&type_description, &raw_params, &[]).unwrap(); assert_eq!(params, vec!["false", "true"]); let raw_params = vec!["1".into(), "2".into(), "3".into()]; let type_description = vec![DataType::Int16, DataType::Int32, DataType::Int64]; - let params = - PreparedStatement::parse_params(&type_description, &raw_params, false).unwrap(); + let params = PreparedStatement::parse_params(&type_description, &raw_params, &[]).unwrap(); assert_eq!(params, vec!["1", "2", "3"]); let raw_params = vec![ @@ -790,8 +793,7 @@ mod tests { .into(), ]; let type_description = vec![DataType::Float32, DataType::Float64, DataType::Decimal]; - let params = - PreparedStatement::parse_params(&type_description, &raw_params, false).unwrap(); + let params = PreparedStatement::parse_params(&type_description, &raw_params, &[]).unwrap(); assert_eq!( params, vec!["'1.0'::FLOAT4", "'2.0'::FLOAT8", "'3'::DECIMAL"] @@ -812,8 +814,7 @@ mod tests { .into(), ]; let type_description = vec![DataType::Date, DataType::Time, DataType::Timestamp]; - let params = - PreparedStatement::parse_params(&type_description, &raw_params, false).unwrap(); + let params = PreparedStatement::parse_params(&type_description, &raw_params, &[]).unwrap(); assert_eq!( params, vec![ @@ -831,7 +832,9 @@ mod tests { // Test VACHAR type. let raw_params = vec!["A".into(), "B".into(), "C".into()]; let type_description = vec![DataType::Varchar; 3]; - let params = PreparedStatement::parse_params(&type_description, &raw_params, true).unwrap(); + let params = + PreparedStatement::parse_params(&type_description, &raw_params, &[Format::Binary]) + .unwrap(); assert_eq!(params, vec!["'A'", "'B'", "'C'"]); // Test BOOLEAN type. @@ -843,7 +846,9 @@ mod tests { .map(|b| b.freeze()) .collect::>(); let type_description = vec![DataType::Boolean; 2]; - let params = PreparedStatement::parse_params(&type_description, &raw_params, true).unwrap(); + let params = + PreparedStatement::parse_params(&type_description, &raw_params, &[Format::Binary]) + .unwrap(); assert_eq!(params, vec!["false", "true"]); // Test SMALLINT, INT, BIGINT type. @@ -856,7 +861,9 @@ mod tests { .map(|b| b.freeze()) .collect::>(); let type_description = vec![DataType::Int16, DataType::Int32, DataType::Int64]; - let params = PreparedStatement::parse_params(&type_description, &raw_params, true).unwrap(); + let params = + PreparedStatement::parse_params(&type_description, &raw_params, &[Format::Binary]) + .unwrap(); assert_eq!(params, vec!["1", "2", "3"]); // Test FLOAT4, FLOAT8, DECIMAL type. @@ -872,7 +879,9 @@ mod tests { .map(|b| b.freeze()) .collect::>(); let type_description = vec![DataType::Float32, DataType::Float64, DataType::Decimal]; - let params = PreparedStatement::parse_params(&type_description, &raw_params, true).unwrap(); + let params = + PreparedStatement::parse_params(&type_description, &raw_params, &[Format::Binary]) + .unwrap(); assert_eq!(params, vec!["'1'::FLOAT4", "'2'::FLOAT8", "'3'::DECIMAL"]); let mut raw_params = vec![BytesMut::new(); 3]; @@ -888,7 +897,9 @@ mod tests { .map(|b| b.freeze()) .collect::>(); let type_description = vec![DataType::Float32, DataType::Float64, DataType::Float64]; - let params = PreparedStatement::parse_params(&type_description, &raw_params, true).unwrap(); + let params = + PreparedStatement::parse_params(&type_description, &raw_params, &[Format::Binary]) + .unwrap(); assert_eq!( params, vec!["'NaN'::FLOAT4", "'inf'::FLOAT8", "'-inf'::FLOAT8"] @@ -913,7 +924,9 @@ mod tests { .map(|b| b.freeze()) .collect::>(); let type_description = vec![DataType::Date, DataType::Time, DataType::Timestamp]; - let params = PreparedStatement::parse_params(&type_description, &raw_params, true).unwrap(); + let params = + PreparedStatement::parse_params(&type_description, &raw_params, &[Format::Binary]) + .unwrap(); assert_eq!( params, vec![ @@ -935,7 +948,9 @@ mod tests { .map(|b| b.freeze()) .collect::>(); let type_description = vec![DataType::Timestamptz, DataType::Interval]; - let params = PreparedStatement::parse_params(&type_description, &raw_params, true).unwrap(); + let params = + PreparedStatement::parse_params(&type_description, &raw_params, &[Format::Binary]) + .unwrap(); assert_eq!( params, vec![ @@ -944,4 +959,67 @@ mod tests { ] ); } + + #[test] + fn test_parse_params_mix_format() { + let place_hodler = Type::ANY; + + // Test VACHAR type. + let raw_params = vec!["A".into(), "B".into(), "C".into()]; + let type_description = vec![DataType::Varchar; 3]; + let params = + PreparedStatement::parse_params(&type_description, &raw_params, &[Format::Text; 3]) + .unwrap(); + assert_eq!(params, vec!["'A'", "'B'", "'C'"]); + + // Test BOOLEAN type. + let mut raw_params = vec![BytesMut::new(); 2]; + false.to_sql(&place_hodler, &mut raw_params[0]).unwrap(); + true.to_sql(&place_hodler, &mut raw_params[1]).unwrap(); + let raw_params = raw_params + .into_iter() + .map(|b| b.freeze()) + .collect::>(); + let type_description = vec![DataType::Boolean; 2]; + let params = + PreparedStatement::parse_params(&type_description, &raw_params, &[Format::Binary; 2]) + .unwrap(); + assert_eq!(params, vec!["false", "true"]); + + // Test SMALLINT, INT, BIGINT type. + let mut raw_params = vec![BytesMut::new(); 2]; + 1_i16.to_sql(&place_hodler, &mut raw_params[0]).unwrap(); + 2_i32.to_sql(&place_hodler, &mut raw_params[1]).unwrap(); + let mut raw_params = raw_params + .into_iter() + .map(|b| b.freeze()) + .collect::>(); + raw_params.push("3".into()); + let type_description = vec![DataType::Int16, DataType::Int32, DataType::Int64]; + let params = PreparedStatement::parse_params( + &type_description, + &raw_params, + &[Format::Binary, Format::Binary, Format::Text], + ) + .unwrap(); + assert_eq!(params, vec!["1", "2", "3"]); + + // Test FLOAT4, FLOAT8, DECIMAL type. + let mut raw_params = vec![BytesMut::new(); 2]; + 1.0_f32.to_sql(&place_hodler, &mut raw_params[0]).unwrap(); + 2.0_f64.to_sql(&place_hodler, &mut raw_params[1]).unwrap(); + let mut raw_params = raw_params + .into_iter() + .map(|b| b.freeze()) + .collect::>(); + raw_params.push("TEST".into()); + let type_description = vec![DataType::Float32, DataType::Float64, DataType::VARCHAR]; + let params = PreparedStatement::parse_params( + &type_description, + &raw_params, + &[Format::Binary, Format::Binary, Format::Text], + ) + .unwrap(); + assert_eq!(params, vec!["'1'::FLOAT4", "'2'::FLOAT8", "'TEST'"]); + } } diff --git a/src/utils/pgwire/src/pg_field_descriptor.rs b/src/utils/pgwire/src/pg_field_descriptor.rs index ecc3deb2aceab..fdc25702649db 100644 --- a/src/utils/pgwire/src/pg_field_descriptor.rs +++ b/src/utils/pgwire/src/pg_field_descriptor.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/utils/pgwire/src/pg_message.rs b/src/utils/pgwire/src/pg_message.rs index 2403652edab54..634d2f861b20b 100644 --- a/src/utils/pgwire/src/pg_message.rs +++ b/src/utils/pgwire/src/pg_message.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -82,15 +82,8 @@ pub struct FeQueryMessage { #[derive(Debug)] pub struct FeBindMessage { - // param_format_code: - // false: text - // true: binary - pub param_format_code: bool, - - // result_format_code: - // false: text - // true: binary - pub result_format_code: bool, + pub param_format_codes: Vec, + pub result_format_codes: Vec, pub params: Vec, pub portal_name: Bytes, @@ -175,22 +168,10 @@ impl FeBindMessage { pub fn parse(mut buf: Bytes) -> Result { let portal_name = read_null_terminated(&mut buf)?; let statement_name = read_null_terminated(&mut buf)?; - // Read FormatCode + let len = buf.get_i16(); + let param_format_codes = (0..len).map(|_| buf.get_i16()).collect(); - let param_format_code = if len == 0 || len == 1 { - if len == 0 { - false - } else { - buf.get_i16() == 1 - } - } else { - let first_value = buf.get_i16(); - for _ in 1..len { - assert!(buf.get_i16() == first_value,"Only support uniform param format (TEXT or BINARY), can't support mix format now."); - } - first_value == 1 - }; // Read Params let len = buf.get_i16(); let params = (0..len) @@ -200,34 +181,12 @@ impl FeBindMessage { }) .collect(); - // Read ResultFormatCode - // result format code depend on following rule: - // - If the length is 0, format is false(text). - // - If the length is 1, format is decide by format_codes[0]. - // - If the length > 1, each column can have their own format and it depend on according - // format code. But RisingWave can't support return col with different format now, when - // length>1, we guarantee all format code is the same (0,0,0..) or (1,1,1,...). let len = buf.get_i16(); - let format_codes = (0..len).map(|_| buf.get_i16()).collect::>(); - let all_elements_are_equal = format_codes.iter().all(|&x| x == format_codes[0]); - - if !all_elements_are_equal { - return Err(Error::new( - ErrorKind::InvalidInput, - "Only support uniform result format (TEXT or BINARY), can't support mix format now.", - )); - } - - let result_format_code = if len == 0 { - // default format:text - false - } else { - format_codes[0] == 1 - }; + let result_format_codes = (0..len).map(|_| buf.get_i16()).collect(); Ok(FeMessage::Bind(FeBindMessage { - param_format_code, - result_format_code, + param_format_codes, + result_format_codes, params, portal_name, statement_name, diff --git a/src/utils/pgwire/src/pg_protocol.rs b/src/utils/pgwire/src/pg_protocol.rs index 8ef326ff4e6ea..18059e33ebb93 100644 --- a/src/utils/pgwire/src/pg_protocol.rs +++ b/src/utils/pgwire/src/pg_protocol.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,10 +24,11 @@ use bytes::{Bytes, BytesMut}; use futures::stream::StreamExt; use futures::Stream; use openssl::ssl::{SslAcceptor, SslContext, SslContextRef, SslMethod}; +use risingwave_sqlparser::parser::Parser; use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; use tokio_openssl::SslStream; use tracing::log::trace; -use tracing::warn; +use tracing::{error, warn}; use crate::error::{PsqlError, PsqlResult}; use crate::pg_extended::{PgPortal, PgStatement, PreparedStatement}; @@ -39,6 +40,7 @@ use crate::pg_message::{ }; use crate::pg_response::RowSetResult; use crate::pg_server::{Session, SessionManager, UserAuthenticator}; +use crate::types::Format; /// The state machine for each psql connection. /// Read pg messages from tcp stream and write results back. @@ -165,9 +167,14 @@ where } } - PsqlError::StartupError(_) - | PsqlError::PasswordError(_) - | PsqlError::SslError(_) => { + PsqlError::SslError(e) => { + // For ssl error, because the stream has already been consumed, so there is + // no way to write more message. + error!("SSL connection setup error: {}", e); + return true; + } + + PsqlError::StartupError(_) | PsqlError::PasswordError(_) => { // TODO: Fix the unwrap in this stream. self.stream .write_no_flush(&BeMessage::ErrorResponse(Box::new(e))) @@ -267,7 +274,6 @@ where // Cancel request need this for identify and verification. According to postgres // doc, it should be written to buffer after receive AuthenticationOk. - // let id = self.session_mgr.insert_session(session.clone()); self.stream .write_no_flush(&BeMessage::BackendKeyData(session.id()))?; @@ -306,6 +312,7 @@ where fn process_cancel_msg(&mut self, m: FeCancelMessage) -> PsqlResult<()> { let session_id = (m.target_process_id, m.target_secret_key); self.session_mgr.cancel_queries_in_session(session_id); + self.session_mgr.cancel_creating_jobs_in_session(session_id); self.stream.write_no_flush(&BeMessage::EmptyQueryResponse)?; Ok(()) } @@ -317,46 +324,66 @@ where "(simple query)receive query: {}", sql); let session = self.session.clone().unwrap(); - // execute query - let mut res = session - .run_statement(sql, false) - .await - .map_err(|err| PsqlError::QueryError(err))?; - if let Some(notice) = res.get_notice() { - self.stream - .write_no_flush(&BeMessage::NoticeResponse(¬ice))?; - } + // Parse sql. + let stmts = Parser::parse_sql(sql) + .inspect_err(|e| tracing::error!("failed to parse sql:\n{}:\n{}", sql, e)) + .map_err(|err| PsqlError::QueryError(err.into()))?; - if res.is_query() { - self.stream - .write_no_flush(&BeMessage::RowDescription(&res.get_row_desc()))?; + // Execute multiple statements in simple query. KISS later. + for stmt in stmts { + let session = session.clone(); - let mut rows_cnt = 0; + // execute query + let mut res = session + .run_one_query(stmt, Format::Text) + .await + .map_err(|err| PsqlError::QueryError(err))?; - while let Some(row_set) = res.values_stream().next().await { - let row_set = row_set.map_err(|err| PsqlError::QueryError(err))?; - for row in row_set { - self.stream.write_no_flush(&BeMessage::DataRow(&row))?; - rows_cnt += 1; - } + if let Some(notice) = res.get_notice() { + self.stream + .write_no_flush(&BeMessage::NoticeResponse(¬ice))?; } - self.stream - .write_no_flush(&BeMessage::CommandComplete(BeCommandCompleteMessage { - stmt_type: res.get_stmt_type(), - rows_cnt, - }))?; - } else { - self.stream - .write_no_flush(&BeMessage::CommandComplete(BeCommandCompleteMessage { - stmt_type: res.get_stmt_type(), - rows_cnt: res - .get_effected_rows_cnt() - .expect("row count should be set"), - }))?; - } + if res.is_query() { + self.stream + .write_no_flush(&BeMessage::RowDescription(&res.get_row_desc()))?; + + let mut rows_cnt = 0; + + while let Some(row_set) = res.values_stream().next().await { + let row_set = row_set.map_err(|err| PsqlError::QueryError(err))?; + for row in row_set { + self.stream.write_no_flush(&BeMessage::DataRow(&row))?; + rows_cnt += 1; + } + } + + // Run the callback before sending the `CommandComplete` message. + res.run_callback().await?; + self.stream.write_no_flush(&BeMessage::CommandComplete( + BeCommandCompleteMessage { + stmt_type: res.get_stmt_type(), + rows_cnt, + }, + ))?; + } else { + // Run the callback before sending the `CommandComplete` message. + res.run_callback().await?; + + self.stream.write_no_flush(&BeMessage::CommandComplete( + BeCommandCompleteMessage { + stmt_type: res.get_stmt_type(), + rows_cnt: res + .get_effected_rows_cnt() + .expect("row count should be set"), + }, + ))?; + } + } + // Put this line inside the for loop above will lead to unfinished/stuck regress test...Not + // sure the reason. self.stream.write_no_flush(&BeMessage::ReadyForQuery)?; Ok(()) } @@ -433,13 +460,24 @@ where .ok_or_else(PsqlError::no_statement)? }; + let result_formats = msg + .result_format_codes + .iter() + .map(|&format_code| Format::from_i16(format_code)) + .try_collect()?; + let param_formats = msg + .param_format_codes + .iter() + .map(|&format_code| Format::from_i16(format_code)) + .try_collect()?; + // 2. Instance the statement to get the portal. let portal_name = cstr_to_str(&msg.portal_name).unwrap().to_string(); let portal = statement.instance( portal_name.clone(), &msg.params, - msg.result_format_code, - msg.param_format_code, + result_formats, + param_formats, )?; // 3. Insert the Portal. diff --git a/src/utils/pgwire/src/pg_response.rs b/src/utils/pgwire/src/pg_response.rs index cf3c067de51fb..7c70ae3622b99 100644 --- a/src/utils/pgwire/src/pg_response.rs +++ b/src/utils/pgwire/src/pg_response.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,14 +15,16 @@ use std::fmt::Formatter; use std::pin::Pin; -use futures::Stream; +use futures::{Future, FutureExt, Stream, StreamExt}; +use crate::error::PsqlError; use crate::pg_field_descriptor::PgFieldDescriptor; use crate::pg_server::BoxedError; use crate::types::Row; pub type RowSet = Vec; pub type RowSetResult = Result; +pub trait ValuesStream = Stream + Unpin + Send; #[derive(Copy, Clone, Debug, Eq, PartialEq)] #[expect(non_camel_case_types, clippy::upper_case_acronyms)] @@ -61,6 +63,7 @@ pub enum StatementType { DROP_DATABASE, DROP_USER, ALTER_TABLE, + ALTER_SYSTEM, REVOKE_PRIVILEGE, // Introduce ORDER_BY statement type cuz Calcite unvalidated AST has SqlKind.ORDER_BY. Note // that Statement Type is not designed to be one to one mapping with SqlKind. @@ -86,22 +89,23 @@ impl std::fmt::Display for StatementType { } } -pub struct PgResponse -where - VS: Stream + Unpin + Send, -{ +pub trait Callback = Future> + Send; +pub type BoxedCallback = Pin>; + +pub struct PgResponse { stmt_type: StatementType, // row count of effected row. Used for INSERT, UPDATE, DELETE, COPY, and other statements that // don't return rows. row_cnt: Option, notice: Option, values_stream: Option, + callback: Option, row_desc: Vec, } impl std::fmt::Debug for PgResponse where - VS: Stream + Unpin + Send, + VS: ValuesStream, { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("PgResponse") @@ -167,7 +171,7 @@ impl StatementType { impl PgResponse where - VS: Stream + Unpin + Send, + VS: ValuesStream, { pub fn empty_result(stmt_type: StatementType) -> Self { let row_cnt = if stmt_type.is_query() { None } else { Some(0) }; @@ -177,6 +181,7 @@ where values_stream: None, row_desc: vec![], notice: None, + callback: None, } } @@ -192,6 +197,7 @@ where } else { None }, + callback: None, } } @@ -201,15 +207,16 @@ where values_stream: VS, row_desc: Vec, ) -> Self { - Self::new_for_stream_inner(stmt_type, row_cnt, values_stream, row_desc, None) + Self::new_for_stream_inner(stmt_type, row_cnt, values_stream, row_desc, None, None) } - pub fn new_for_stream_with_notice( + pub fn new_for_stream_extra( stmt_type: StatementType, row_cnt: Option, values_stream: VS, row_desc: Vec, notice: String, + callback: impl Callback + 'static, ) -> Self { Self::new_for_stream_inner( stmt_type, @@ -221,6 +228,7 @@ where } else { None }, + Some(callback.boxed()), ) } @@ -230,6 +238,7 @@ where values_stream: VS, row_desc: Vec, notice: Option, + callback: Option, ) -> Self { assert!( stmt_type.is_query() ^ row_cnt.is_some(), @@ -241,6 +250,7 @@ where values_stream: Some(values_stream), row_desc, notice, + callback, } } @@ -268,11 +278,23 @@ where self.row_desc.clone() } - pub fn values_stream(&mut self) -> Pin<&mut VS> { - Pin::new( - self.values_stream - .as_mut() - .expect("getting values from empty result"), - ) + pub fn values_stream(&mut self) -> &mut VS { + self.values_stream.as_mut().expect("no values stream") + } + + /// Run the callback if there is one. + /// + /// This should only be called after the values stream has been exhausted. Multiple calls to + /// this function will be no-ops. + pub async fn run_callback(&mut self) -> Result<(), PsqlError> { + // Check if the stream is exhausted. + if let Some(values_stream) = &mut self.values_stream { + assert!(values_stream.next().await.is_none()); + } + + if let Some(callback) = self.callback.take() { + callback.await.map_err(PsqlError::ExecuteError)?; + } + Ok(()) } } diff --git a/src/utils/pgwire/src/pg_server.rs b/src/utils/pgwire/src/pg_server.rs index 9fae2fea699d3..ec28ffde5e178 100644 --- a/src/utils/pgwire/src/pg_server.rs +++ b/src/utils/pgwire/src/pg_server.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ use std::result::Result; use std::sync::Arc; use futures::Stream; +use risingwave_sqlparser::ast::Statement; use tokio::io::{AsyncRead, AsyncWrite}; use tokio::net::TcpListener; use tracing::debug; @@ -25,6 +26,7 @@ use tracing::debug; use crate::pg_field_descriptor::PgFieldDescriptor; use crate::pg_protocol::{PgProtocol, TlsConfig}; use crate::pg_response::{PgResponse, RowSetResult}; +use crate::types::Format; pub type BoxedError = Box; pub type SessionId = (i32, i32); @@ -40,15 +42,13 @@ where fn cancel_queries_in_session(&self, session_id: SessionId); + fn cancel_creating_jobs_in_session(&self, session_id: SessionId); + fn end_session(&self, session: &Self::Session); } /// A psql connection. Each connection binds with a database. Switching database will need to /// recreate another connection. -/// -/// format: -/// false: TEXT -/// true: BINARY #[async_trait::async_trait] pub trait Session: Send + Sync where @@ -57,8 +57,17 @@ where async fn run_statement( self: Arc, sql: &str, - format: bool, + formats: Vec, + ) -> Result, BoxedError>; + + /// The str sql can not use the unparse from AST: There is some problem when dealing with create + /// view, see https://github.com/risingwavelabs/risingwave/issues/6801. + async fn run_one_query( + self: Arc, + sql: Statement, + format: Format, ) -> Result, BoxedError>; + async fn infer_return_type( self: Arc, sql: &str, @@ -160,12 +169,16 @@ mod tests { use bytes::Bytes; use futures::stream::BoxStream; use futures::StreamExt; + use risingwave_sqlparser::ast::Statement; use tokio_postgres::types::*; use tokio_postgres::NoTls; use crate::pg_field_descriptor::PgFieldDescriptor; use crate::pg_response::{PgResponse, RowSetResult, StatementType}; - use crate::pg_server::{pg_serve, Session, SessionId, SessionManager, UserAuthenticator}; + use crate::pg_server::{ + pg_serve, BoxedError, Session, SessionId, SessionManager, UserAuthenticator, + }; + use crate::types; use crate::types::Row; struct MockSessionManager {} @@ -185,6 +198,10 @@ mod tests { todo!() } + fn cancel_creating_jobs_in_session(&self, _session_id: SessionId) { + todo!() + } + fn end_session(&self, _session: &Self::Session) {} } @@ -195,7 +212,7 @@ mod tests { async fn run_statement( self: Arc, sql: &str, - _format: bool, + _format: Vec, ) -> Result>, Box> { // split a statement and trim \' around the input param to construct result. @@ -228,6 +245,27 @@ mod tests { )) } + /// The test below will issue "BEGIN", "ROLLBACK" as simple query, but the results do not + /// matter, so just return a fake one. + async fn run_one_query( + self: Arc, + _sql: Statement, + _format: types::Format, + ) -> Result>, BoxedError> { + let res: Vec> = vec![Some(Bytes::new())]; + Ok(PgResponse::new_for_stream( + StatementType::SELECT, + None, + futures::stream::iter(vec![Ok(vec![Row::new(res)])]).boxed(), + vec![ + // 1043 is the oid of varchar type. + // -1 is the type len of varchar type. + PgFieldDescriptor::new("".to_string(), 1043, -1); + 1 + ], + )) + } + fn user_authenticator(&self) -> &UserAuthenticator { &UserAuthenticator::None } diff --git a/src/utils/pgwire/src/types.rs b/src/utils/pgwire/src/types.rs index 39c05ec0df11d..e05405a4cb023 100644 --- a/src/utils/pgwire/src/types.rs +++ b/src/utils/pgwire/src/types.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,10 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::iter::TrustedLen; use std::ops::Index; +use std::slice::Iter; +use anyhow::anyhow; use bytes::Bytes; +use crate::error::{PsqlError, PsqlResult}; + /// A row of data returned from the database by a query. #[derive(Debug, Clone)] // NOTE: Since we only support simple query protocol, the values are represented as strings. @@ -50,3 +55,88 @@ impl Index for Row { &self.0[index] } } + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Format { + Binary, + Text, +} + +impl Format { + pub fn from_i16(format_code: i16) -> PsqlResult { + match format_code { + 0 => Ok(Format::Text), + 1 => Ok(Format::Binary), + _ => Err(PsqlError::Internal(anyhow!( + "Unknown format code: {}", + format_code + ))), + } + } +} + +/// FormatIterator used to generate formats of actual length given the provided format. +/// According Postgres Document: +/// - If the length of provided format is 0, all format will be default format(TEXT). +/// - If the length of provided format is 1, all format will be the same as this only format. +/// - If the length of provided format > 1, provided format should be the actual format. +#[derive(Debug, Clone)] +pub struct FormatIterator<'a, 'b> +where + 'a: 'b, +{ + _formats: &'a [Format], + format_iter: Iter<'b, Format>, + actual_len: usize, + default_format: Format, +} + +impl<'a, 'b> FormatIterator<'a, 'b> { + pub fn new(provided_formats: &'a [Format], actual_len: usize) -> Result { + if !provided_formats.is_empty() + && provided_formats.len() != 1 + && provided_formats.len() != actual_len + { + return Err(format!( + "format codes length {} is not 0, 1 or equal to actual length {}", + provided_formats.len(), + actual_len + )); + } + + let default_format = provided_formats.get(0).copied().unwrap_or(Format::Text); + + Ok(Self { + _formats: provided_formats, + default_format, + format_iter: provided_formats.iter(), + actual_len, + }) + } +} + +impl Iterator for FormatIterator<'_, '_> { + type Item = Format; + + fn next(&mut self) -> Option { + if self.actual_len == 0 { + return None; + } + + self.actual_len -= 1; + + Some( + self.format_iter + .next() + .copied() + .unwrap_or(self.default_format), + ) + } + + fn size_hint(&self) -> (usize, Option) { + (self.actual_len, Some(self.actual_len)) + } +} + +impl ExactSizeIterator for FormatIterator<'_, '_> {} +unsafe impl TrustedLen for FormatIterator<'_, '_> {} diff --git a/src/utils/runtime/Cargo.toml b/src/utils/runtime/Cargo.toml index 9991d27d35b13..be0576b5ba721 100644 --- a/src/utils/runtime/Cargo.toml +++ b/src/utils/runtime/Cargo.toml @@ -8,15 +8,20 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] -async-trait = "0.1" async_stack_trace = { path = "../async_stack_trace" } console = "0.15" console-subscriber = "0.1.8" futures = { version = "0.3", default-features = false, features = ["alloc"] } parking_lot = { version = "0.12", features = ["deadlock_detection"] } pprof = { version = "0.11", features = ["flamegraph"] } -tokio = { version = "0.2.11", package = "madsim-tokio", features = [ +tokio = { version = "0.2", package = "madsim-tokio", features = [ "rt", "rt-multi-thread", "sync", @@ -25,7 +30,6 @@ tokio = { version = "0.2.11", package = "madsim-tokio", features = [ "signal", "fs" ] } -tokio-stream = "0.1" tracing = "0.1" tracing-subscriber = { version = "0.3.16", features = ["fmt", "parking_lot", "std", "time", "local-time"] } diff --git a/src/utils/runtime/src/lib.rs b/src/utils/runtime/src/lib.rs index 9fc53f209396a..9ae31962066fb 100644 --- a/src/utils/runtime/src/lib.rs +++ b/src/utils/runtime/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -48,7 +48,7 @@ fn configure_risingwave_targets_fmt(targets: filter::Targets) -> filter::Targets targets // enable trace for most modules .with_target("risingwave_stream", Level::DEBUG) - .with_target("risingwave_batch", Level::DEBUG) + .with_target("risingwave_batch", Level::INFO) .with_target("risingwave_storage", Level::DEBUG) .with_target("risingwave_sqlparser", Level::INFO) .with_target("risingwave_source", Level::INFO) @@ -63,36 +63,49 @@ fn configure_risingwave_targets_fmt(targets: filter::Targets) -> filter::Targets // if you want to enable any of them, find the target name and set it to `TRACE` // .with_target("events::stream::mview::scan", Level::TRACE) .with_target("events", Level::ERROR) - - // if env_var_is_true("RW_CI") { - // targets.with_target("events::meta::server_heartbeat", Level::TRACE) - // } else { - // targets - // } } -/// =========================================================================== -/// END SECTION -/// =========================================================================== +// =========================================================================== +// END SECTION +// =========================================================================== pub struct LoggerSettings { /// Enable tokio console output. enable_tokio_console: bool, /// Enable colorful output in console. colorful: bool, + targets: Vec<(String, tracing::metadata::LevelFilter)>, } -impl LoggerSettings { - pub fn new_default() -> Self { - Self::new(false) +impl Default for LoggerSettings { + fn default() -> Self { + Self::new() } +} - pub fn new(enable_tokio_console: bool) -> Self { +impl LoggerSettings { + pub fn new() -> Self { Self { - enable_tokio_console, + enable_tokio_console: false, colorful: console::colors_enabled_stderr() && console::colors_enabled(), + targets: vec![], } } + + pub fn enable_tokio_console(mut self, enable: bool) -> Self { + self.enable_tokio_console = enable; + self + } + + /// Overrides the default target settings. + pub fn with_target( + mut self, + target: impl Into, + level: impl Into, + ) -> Self { + self.targets.push((target.into(), level.into())); + self + } } /// Set panic hook to abort the process (without losing debug info and stack trace). @@ -151,6 +164,13 @@ pub fn init_risingwave_logger(settings: LoggerSettings) { #[cfg(debug_assertions)] let filter = filter.with_default(Level::DEBUG); + let filter = settings + .targets + .into_iter() + .fold(filter, |filter, (target, level)| { + filter.with_target(target, level) + }); + layers.push(fmt_layer.with_filter(to_env_filter(filter)).boxed()); }; diff --git a/src/utils/sync-point/Cargo.toml b/src/utils/sync-point/Cargo.toml index 584874d397a6f..050085ab47852 100644 --- a/src/utils/sync-point/Cargo.toml +++ b/src/utils/sync-point/Cargo.toml @@ -4,6 +4,12 @@ version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] futures-util = "0.3" spin = "0.9" diff --git a/src/utils/sync-point/src/lib.rs b/src/utils/sync-point/src/lib.rs index e906b29e56417..d87038c60f307 100644 --- a/src/utils/sync-point/src/lib.rs +++ b/src/utils/sync-point/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/utils/task_stats_alloc/Cargo.toml b/src/utils/task_stats_alloc/Cargo.toml index 6e5d22773fb6d..a8442fc9c09d6 100644 --- a/src/utils/task_stats_alloc/Cargo.toml +++ b/src/utils/task_stats_alloc/Cargo.toml @@ -5,6 +5,12 @@ edition = "2021" description = "Allocator with statistics" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[package.metadata.cargo-machete] +ignored = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + [dependencies] tokio = { version = "0.2", package = "madsim-tokio", features = [ "fs", diff --git a/src/utils/task_stats_alloc/src/lib.rs b/src/utils/task_stats_alloc/src/lib.rs index 50d6301353775..156708c84f11e 100644 --- a/src/utils/task_stats_alloc/src/lib.rs +++ b/src/utils/task_stats_alloc/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/utils/task_stats_alloc/tests/integration.rs b/src/utils/task_stats_alloc/tests/integration.rs index 45746ce7632a2..24780913cb148 100644 --- a/src/utils/task_stats_alloc/tests/integration.rs +++ b/src/utils/task_stats_alloc/tests/integration.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/utils/task_stats_alloc/tests/loom.rs b/src/utils/task_stats_alloc/tests/loom.rs index 7861bb9da2feb..393caa4a044ea 100644 --- a/src/utils/task_stats_alloc/tests/loom.rs +++ b/src/utils/task_stats_alloc/tests/loom.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/utils/workspace-config/Cargo.toml b/src/utils/workspace-config/Cargo.toml index c4a8885714432..6047fa00315ea 100644 --- a/src/utils/workspace-config/Cargo.toml +++ b/src/utils/workspace-config/Cargo.toml @@ -17,7 +17,7 @@ enable-static-log-level = ["log", "tracing"] isahc = { version = "1", optional = true, default-features = false, features = ["static-ssl", "static-curl"] } log = { version = "0.4", optional = true, features = ["release_max_level_info"] } openssl = { version = "0.10", optional = true, features = ["vendored"] } -rdkafka = { package = "madsim-rdkafka", version = "=0.2.13-alpha", optional = true, features = ["ssl-vendored", "gssapi-vendored"] } +rdkafka = { package = "madsim-rdkafka", version = "=0.2.14-alpha", optional = true, features = ["ssl-vendored", "gssapi-vendored"] } tracing = { version = "0.1", optional = true, features = ["release_max_level_info"] } # workspace-hack = { path = "../../workspace-hack" } # Don't add workspace-hack into this crate! diff --git a/src/utils/workspace-config/src/lib.rs b/src/utils/workspace-config/src/lib.rs index d9c8ffb8bc62d..163705a68bf34 100644 --- a/src/utils/workspace-config/src/lib.rs +++ b/src/utils/workspace-config/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/workspace-hack/Cargo.toml b/src/workspace-hack/Cargo.toml index 057bf57babf5e..a116d6c507230 100644 --- a/src/workspace-hack/Cargo.toml +++ b/src/workspace-hack/Cargo.toml @@ -18,172 +18,188 @@ publish = false ### BEGIN HAKARI SECTION [dependencies] -ahash = { version = "0.7", features = ["std"] } -anyhow = { version = "1", features = ["backtrace", "std"] } -auto_enums = { version = "0.7", features = ["futures", "std"] } -bytes = { version = "1", features = ["serde", "std"] } -chrono = { version = "0.4", features = ["clock", "iana-time-zone", "js-sys", "oldtime", "std", "time", "wasm-bindgen", "wasmbind", "winapi"] } -clap = { version = "3", features = ["atty", "clap_derive", "color", "derive", "env", "once_cell", "std", "strsim", "suggestions", "termcolor"] } -combine = { version = "4", features = ["alloc", "bytes", "std"] } -criterion = { version = "0.4", features = ["async", "async_futures", "async_tokio", "cargo_bench_support", "futures", "plotters", "rayon", "tokio"] } -crossbeam-channel = { version = "0.5", features = ["crossbeam-utils", "std"] } -crossbeam-deque = { version = "0.8", features = ["crossbeam-epoch", "crossbeam-utils", "std"] } -crossbeam-epoch = { version = "0.9", features = ["alloc", "once_cell", "std"] } -either = { version = "1", features = ["use_std"] } +ahash = { version = "0.8" } +anyhow = { version = "1", features = ["backtrace"] } +arrayvec = { version = "0.7", default-features = false, features = ["std"] } +auto_enums = { version = "0.7", features = ["futures"] } +aws-sdk-s3 = { version = "0.21", features = ["native-tls"] } +aws-smithy-client = { version = "0.51", default-features = false, features = ["native-tls", "rustls"] } +aws-types = { version = "0.51", default-features = false, features = ["hardcoded-credentials"] } +base64 = { version = "0.21" } +bytes = { version = "1", features = ["serde"] } +chrono = { version = "0.4" } +clap = { version = "3", features = ["derive", "env"] } +combine = { version = "4" } +criterion = { version = "0.4", features = ["async_futures", "async_tokio"] } +crossbeam-channel = { version = "0.5" } +crossbeam-deque = { version = "0.8" } +crossbeam-epoch = { version = "0.9" } +crossbeam-utils = { version = "0.8" } +digest = { version = "0.10", features = ["mac", "std"] } +either = { version = "1" } fail = { version = "0.5", default-features = false, features = ["failpoints"] } -fixedbitset = { version = "0.4", features = ["std"] } -flate2 = { version = "1", features = ["any_zlib", "libz-sys", "miniz_oxide", "rust_backend", "zlib"] } -frunk_core = { version = "0.4", default-features = false, features = ["std"] } -futures = { version = "0.3", features = ["alloc", "async-await", "executor", "futures-executor", "std"] } -futures-channel = { version = "0.3", features = ["alloc", "futures-sink", "sink", "std"] } -futures-core = { version = "0.3", features = ["alloc", "std"] } -futures-executor = { version = "0.3", features = ["std"] } -futures-io = { version = "0.3", features = ["std"] } -futures-sink = { version = "0.3", features = ["alloc", "std"] } -futures-task = { version = "0.3", default-features = false, features = ["alloc", "std"] } -futures-util = { version = "0.3", features = ["alloc", "async-await", "async-await-macro", "channel", "futures-channel", "futures-io", "futures-macro", "futures-sink", "io", "memchr", "sink", "slab", "std"] } -hashbrown = { version = "0.12", features = ["ahash", "inline-more", "nightly", "raw"] } -hdrhistogram = { version = "7", features = ["base64", "crossbeam-channel", "flate2", "nom", "serialization", "sync"] } -hyper = { version = "0.14", features = ["client", "full", "h2", "http1", "http2", "runtime", "server", "socket2", "stream", "tcp"] } -indexmap = { version = "1", default-features = false, features = ["std"] } -isahc = { version = "1", default-features = false, features = ["encoding_rs", "mime", "text-decoding"] } -itertools = { version = "0.10", features = ["use_alloc", "use_std"] } -lexical-core = { version = "0.8", features = ["floats", "format", "integers", "lexical-parse-float", "lexical-parse-integer", "lexical-write-float", "lexical-write-integer", "parse", "parse-floats", "parse-integers", "std", "write", "write-floats", "write-integers"] } +fixedbitset = { version = "0.4" } +futures = { version = "0.3" } +futures-channel = { version = "0.3", features = ["sink"] } +futures-core = { version = "0.3" } +futures-executor = { version = "0.3" } +futures-io = { version = "0.3" } +futures-sink = { version = "0.3" } +futures-task = { version = "0.3", default-features = false, features = ["std"] } +futures-util = { version = "0.3", features = ["channel", "io", "sink"] } +hashbrown-594e8ee84c453af0 = { package = "hashbrown", version = "0.13" } +hashbrown-5ef9efb8ec2df382 = { package = "hashbrown", version = "0.12", features = ["nightly", "raw"] } +hdrhistogram = { version = "7" } +hyper = { version = "0.14", features = ["full"] } +isahc = { version = "1", default-features = false, features = ["text-decoding"] } +itertools = { version = "0.10" } +lexical-core = { version = "0.8", features = ["format"] } lexical-parse-float = { version = "0.8", default-features = false, features = ["format", "std"] } lexical-parse-integer = { version = "0.8", default-features = false, features = ["format", "std"] } -lexical-util = { version = "0.8", default-features = false, features = ["floats", "format", "integers", "parse", "parse-floats", "parse-integers", "std", "write", "write-floats", "write-integers"] } +lexical-util = { version = "0.8", default-features = false, features = ["format", "parse-floats", "parse-integers", "std", "write-floats", "write-integers"] } lexical-write-float = { version = "0.8", default-features = false, features = ["format", "std"] } lexical-write-integer = { version = "0.8", default-features = false, features = ["format", "std"] } -libc = { version = "0.2", features = ["extra_traits", "std"] } -libz-sys = { version = "1", default-features = false, features = ["libc"] } +libc = { version = "0.2", features = ["extra_traits"] } lock_api = { version = "0.4", default-features = false, features = ["arc_lock"] } -log = { version = "0.4", default-features = false, features = ["kv_unstable", "std", "value-bag"] } +log = { version = "0.4", default-features = false, features = ["kv_unstable", "std"] } madsim-tokio = { version = "0.2", default-features = false, features = ["fs", "io-util", "macros", "net", "process", "rt", "rt-multi-thread", "signal", "sync", "time", "tracing"] } -memchr = { version = "2", features = ["std"] } -minimal-lexical = { version = "0.2", default-features = false, features = ["std"] } -multimap = { version = "0.8", features = ["serde", "serde_impl"] } -nom = { version = "7", features = ["alloc", "std"] } -num-bigint = { version = "0.4", features = ["std"] } -num-integer = { version = "0.1", features = ["i128", "std"] } -num-traits = { version = "0.2", features = ["i128", "libm", "std"] } +memchr = { version = "2" } +miniz_oxide = { version = "0.6", default-features = false, features = ["with-alloc"] } +multimap = { version = "0.8" } +num-bigint = { version = "0.4" } +num-integer = { version = "0.1", features = ["i128"] } +num-traits = { version = "0.2", features = ["i128", "libm"] } parking_lot = { version = "0.12", features = ["arc_lock", "deadlock_detection"] } -parking_lot_core = { version = "0.9", default-features = false, features = ["backtrace", "deadlock_detection", "petgraph", "thread-id"] } -petgraph = { version = "0.6", features = ["graphmap", "matrix_graph", "stable_graph"] } -phf = { version = "0.11", features = ["std", "uncased"] } -phf_shared = { version = "0.11", features = ["std", "uncased"] } -prometheus = { version = "0.13", features = ["libc", "process", "procfs", "protobuf"] } -prost = { version = "0.11", features = ["no-recursion-limit", "prost-derive", "std"] } -prost-types = { version = "0.11", features = ["std"] } -rand = { version = "0.8", features = ["alloc", "getrandom", "libc", "rand_chacha", "small_rng", "std", "std_rng"] } -rand_core = { version = "0.6", default-features = false, features = ["alloc", "getrandom", "std"] } -regex = { version = "1", features = ["aho-corasick", "memchr", "perf", "perf-cache", "perf-dfa", "perf-inline", "perf-literal", "std", "unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] } -regex-syntax = { version = "0.6", features = ["unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] } -reqwest = { version = "0.11", features = ["__tls", "default-tls", "hyper-tls", "json", "native-tls-crate", "serde_json", "tokio-native-tls"] } -ring = { version = "0.16", features = ["alloc", "dev_urandom_fallback", "once_cell", "std"] } -scopeguard = { version = "1", features = ["use_std"] } -serde = { version = "1", features = ["alloc", "derive", "rc", "serde_derive", "std"] } -smallvec = { version = "1", default-features = false, features = ["serde", "union", "write"] } +parking_lot_core = { version = "0.9", default-features = false, features = ["deadlock_detection"] } +petgraph = { version = "0.6" } +phf = { version = "0.11", features = ["uncased"] } +phf_shared = { version = "0.11", features = ["uncased"] } +prometheus = { version = "0.13", features = ["process"] } +prost = { version = "0.11", features = ["no-recursion-limit"] } +prost-types = { version = "0.11" } +rand = { version = "0.8", features = ["small_rng"] } +rand_core = { version = "0.6", default-features = false, features = ["std"] } +regex = { version = "1" } +regex-syntax = { version = "0.6" } +reqwest = { version = "0.11", features = ["json"] } +ring = { version = "0.16", features = ["std"] } +scopeguard = { version = "1" } +serde = { version = "1", features = ["alloc", "derive", "rc"] } +smallvec = { version = "1", default-features = false, features = ["serde"] } socket2 = { version = "0.4", default-features = false, features = ["all"] } -stable_deref_trait = { version = "1", features = ["alloc", "std"] } -strum = { version = "0.24", features = ["derive", "std", "strum_macros"] } -time = { version = "0.3", features = ["alloc", "formatting", "itoa", "local-offset", "macros", "parsing", "std", "time-macros"] } -tokio = { version = "1", features = ["bytes", "fs", "io-std", "io-util", "libc", "macros", "memchr", "mio", "net", "num_cpus", "parking_lot", "process", "rt", "rt-multi-thread", "signal", "signal-hook-registry", "socket2", "stats", "sync", "time", "tokio-macros", "tracing"] } -tokio-stream = { version = "0.1", features = ["net", "time"] } -tokio-util = { version = "0.7", features = ["codec", "io", "tracing"] } -tonic = { version = "0.8", features = ["async-trait", "axum", "channel", "codegen", "flate2", "gzip", "h2", "hyper", "hyper-timeout", "prost", "prost-derive", "prost1", "rustls-pemfile", "tls", "tls-roots-common", "tls-webpki-roots", "tokio", "tokio-rustls", "tower", "tracing-futures", "transport", "webpki-roots"] } -tower = { version = "0.4", features = ["__common", "balance", "buffer", "discover", "filter", "futures-core", "futures-util", "indexmap", "limit", "load", "load-shed", "log", "make", "pin-project", "pin-project-lite", "rand", "ready-cache", "retry", "slab", "timeout", "tokio", "tokio-util", "tracing", "util"] } -tower-http = { version = "0.3", features = ["add-extension", "cors", "map-response-body", "tower", "util"] } -tracing = { version = "0.1", features = ["attributes", "log", "release_max_level_trace", "std", "tracing-attributes"] } -tracing-core = { version = "0.1", features = ["once_cell", "std"] } -tracing-subscriber = { version = "0.3", features = ["alloc", "ansi", "env-filter", "fmt", "local-time", "matchers", "nu-ansi-term", "once_cell", "parking_lot", "regex", "registry", "sharded-slab", "smallvec", "std", "thread_local", "time", "tracing", "tracing-log"] } +stable_deref_trait = { version = "1" } +strum = { version = "0.24", features = ["derive"] } +time = { version = "0.3", features = ["formatting", "local-offset", "macros", "parsing"] } +tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "parking_lot", "process", "rt-multi-thread", "signal", "stats", "sync", "time", "tracing"] } +tokio-stream = { version = "0.1", features = ["net"] } +tokio-util = { version = "0.7", features = ["codec", "io"] } +tonic = { version = "0.8", features = ["gzip", "tls-webpki-roots"] } +tower = { version = "0.4", features = ["balance", "buffer", "filter", "limit", "load-shed", "retry", "timeout", "util"] } +tower-http = { version = "0.3", features = ["add-extension", "cors", "map-response-body", "util"] } +tracing = { version = "0.1", features = ["log", "release_max_level_trace"] } +tracing-core = { version = "0.1" } +tracing-futures = { version = "0.2" } +tracing-subscriber = { version = "0.3", features = ["env-filter", "local-time", "parking_lot"] } +triomphe = { version = "0.1" } url = { version = "2", features = ["serde"] } -uuid = { version = "1", features = ["fast-rng", "getrandom", "rand", "rng", "serde", "std", "v4"] } +uuid = { version = "1", features = ["fast-rng", "serde", "v4"] } +zstd = { version = "0.11" } +zstd-safe = { version = "5", default-features = false, features = ["arrays", "legacy", "std", "zdict_builder"] } +zstd-sys = { version = "2", default-features = false, features = ["legacy", "std", "zdict_builder"] } [build-dependencies] -ahash = { version = "0.7", features = ["std"] } -anyhow = { version = "1", features = ["backtrace", "std"] } -auto_enums = { version = "0.7", features = ["futures", "std"] } -auto_enums_derive = { version = "0.7", default-features = false, features = ["futures", "futures03", "std"] } -bytes = { version = "1", features = ["serde", "std"] } -cc = { version = "1", default-features = false, features = ["jobserver", "parallel"] } -chrono = { version = "0.4", features = ["clock", "iana-time-zone", "js-sys", "oldtime", "std", "time", "wasm-bindgen", "wasmbind", "winapi"] } -clap = { version = "3", features = ["atty", "clap_derive", "color", "derive", "env", "once_cell", "std", "strsim", "suggestions", "termcolor"] } -combine = { version = "4", features = ["alloc", "bytes", "std"] } -criterion = { version = "0.4", features = ["async", "async_futures", "async_tokio", "cargo_bench_support", "futures", "plotters", "rayon", "tokio"] } -crossbeam-channel = { version = "0.5", features = ["crossbeam-utils", "std"] } -crossbeam-deque = { version = "0.8", features = ["crossbeam-epoch", "crossbeam-utils", "std"] } -crossbeam-epoch = { version = "0.9", features = ["alloc", "once_cell", "std"] } -either = { version = "1", features = ["use_std"] } +ahash = { version = "0.8" } +anyhow = { version = "1", features = ["backtrace"] } +arrayvec = { version = "0.7", default-features = false, features = ["std"] } +auto_enums = { version = "0.7", features = ["futures"] } +auto_enums_derive = { version = "0.7", default-features = false, features = ["futures", "std"] } +aws-sdk-s3 = { version = "0.21", features = ["native-tls"] } +aws-smithy-client = { version = "0.51", default-features = false, features = ["native-tls", "rustls"] } +aws-types = { version = "0.51", default-features = false, features = ["hardcoded-credentials"] } +base64 = { version = "0.21" } +bytes = { version = "1", features = ["serde"] } +cc = { version = "1", default-features = false, features = ["parallel"] } +chrono = { version = "0.4" } +clap = { version = "3", features = ["derive", "env"] } +combine = { version = "4" } +criterion = { version = "0.4", features = ["async_futures", "async_tokio"] } +crossbeam-channel = { version = "0.5" } +crossbeam-deque = { version = "0.8" } +crossbeam-epoch = { version = "0.9" } +crossbeam-utils = { version = "0.8" } +digest = { version = "0.10", features = ["mac", "std"] } +either = { version = "1" } fail = { version = "0.5", default-features = false, features = ["failpoints"] } -fixedbitset = { version = "0.4", features = ["std"] } -flate2 = { version = "1", features = ["any_zlib", "libz-sys", "miniz_oxide", "rust_backend", "zlib"] } -frunk_core = { version = "0.4", default-features = false, features = ["std"] } -futures = { version = "0.3", features = ["alloc", "async-await", "executor", "futures-executor", "std"] } -futures-channel = { version = "0.3", features = ["alloc", "futures-sink", "sink", "std"] } -futures-core = { version = "0.3", features = ["alloc", "std"] } -futures-executor = { version = "0.3", features = ["std"] } -futures-io = { version = "0.3", features = ["std"] } -futures-sink = { version = "0.3", features = ["alloc", "std"] } -futures-task = { version = "0.3", default-features = false, features = ["alloc", "std"] } -futures-util = { version = "0.3", features = ["alloc", "async-await", "async-await-macro", "channel", "futures-channel", "futures-io", "futures-macro", "futures-sink", "io", "memchr", "sink", "slab", "std"] } -hashbrown = { version = "0.12", features = ["ahash", "inline-more", "nightly", "raw"] } -hdrhistogram = { version = "7", features = ["base64", "crossbeam-channel", "flate2", "nom", "serialization", "sync"] } -hyper = { version = "0.14", features = ["client", "full", "h2", "http1", "http2", "runtime", "server", "socket2", "stream", "tcp"] } -indexmap = { version = "1", default-features = false, features = ["std"] } -isahc = { version = "1", default-features = false, features = ["encoding_rs", "mime", "text-decoding"] } -itertools = { version = "0.10", features = ["use_alloc", "use_std"] } -lexical-core = { version = "0.8", features = ["floats", "format", "integers", "lexical-parse-float", "lexical-parse-integer", "lexical-write-float", "lexical-write-integer", "parse", "parse-floats", "parse-integers", "std", "write", "write-floats", "write-integers"] } +fixedbitset = { version = "0.4" } +futures = { version = "0.3" } +futures-channel = { version = "0.3", features = ["sink"] } +futures-core = { version = "0.3" } +futures-executor = { version = "0.3" } +futures-io = { version = "0.3" } +futures-sink = { version = "0.3" } +futures-task = { version = "0.3", default-features = false, features = ["std"] } +futures-util = { version = "0.3", features = ["channel", "io", "sink"] } +hashbrown-594e8ee84c453af0 = { package = "hashbrown", version = "0.13" } +hashbrown-5ef9efb8ec2df382 = { package = "hashbrown", version = "0.12", features = ["nightly", "raw"] } +hdrhistogram = { version = "7" } +hyper = { version = "0.14", features = ["full"] } +isahc = { version = "1", default-features = false, features = ["text-decoding"] } +itertools = { version = "0.10" } +lexical-core = { version = "0.8", features = ["format"] } lexical-parse-float = { version = "0.8", default-features = false, features = ["format", "std"] } lexical-parse-integer = { version = "0.8", default-features = false, features = ["format", "std"] } -lexical-util = { version = "0.8", default-features = false, features = ["floats", "format", "integers", "parse", "parse-floats", "parse-integers", "std", "write", "write-floats", "write-integers"] } +lexical-util = { version = "0.8", default-features = false, features = ["format", "parse-floats", "parse-integers", "std", "write-floats", "write-integers"] } lexical-write-float = { version = "0.8", default-features = false, features = ["format", "std"] } lexical-write-integer = { version = "0.8", default-features = false, features = ["format", "std"] } -libc = { version = "0.2", features = ["extra_traits", "std"] } -libz-sys = { version = "1", default-features = false, features = ["libc"] } +libc = { version = "0.2", features = ["extra_traits"] } lock_api = { version = "0.4", default-features = false, features = ["arc_lock"] } -log = { version = "0.4", default-features = false, features = ["kv_unstable", "std", "value-bag"] } +log = { version = "0.4", default-features = false, features = ["kv_unstable", "std"] } madsim-tokio = { version = "0.2", default-features = false, features = ["fs", "io-util", "macros", "net", "process", "rt", "rt-multi-thread", "signal", "sync", "time", "tracing"] } -memchr = { version = "2", features = ["std"] } -minimal-lexical = { version = "0.2", default-features = false, features = ["std"] } -multimap = { version = "0.8", features = ["serde", "serde_impl"] } -nom = { version = "7", features = ["alloc", "std"] } -num-bigint = { version = "0.4", features = ["std"] } -num-integer = { version = "0.1", features = ["i128", "std"] } -num-traits = { version = "0.2", features = ["i128", "libm", "std"] } +memchr = { version = "2" } +miniz_oxide = { version = "0.6", default-features = false, features = ["with-alloc"] } +multimap = { version = "0.8" } +num-bigint = { version = "0.4" } +num-integer = { version = "0.1", features = ["i128"] } +num-traits = { version = "0.2", features = ["i128", "libm"] } parking_lot = { version = "0.12", features = ["arc_lock", "deadlock_detection"] } -parking_lot_core = { version = "0.9", default-features = false, features = ["backtrace", "deadlock_detection", "petgraph", "thread-id"] } -petgraph = { version = "0.6", features = ["graphmap", "matrix_graph", "stable_graph"] } -phf = { version = "0.11", features = ["std", "uncased"] } -phf_shared = { version = "0.11", features = ["std", "uncased"] } -proc-macro2 = { version = "1", features = ["proc-macro", "span-locations"] } -prometheus = { version = "0.13", features = ["libc", "process", "procfs", "protobuf"] } -prost = { version = "0.11", features = ["no-recursion-limit", "prost-derive", "std"] } -prost-types = { version = "0.11", features = ["std"] } -rand = { version = "0.8", features = ["alloc", "getrandom", "libc", "rand_chacha", "small_rng", "std", "std_rng"] } -rand_core = { version = "0.6", default-features = false, features = ["alloc", "getrandom", "std"] } -regex = { version = "1", features = ["aho-corasick", "memchr", "perf", "perf-cache", "perf-dfa", "perf-inline", "perf-literal", "std", "unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] } -regex-syntax = { version = "0.6", features = ["unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] } -reqwest = { version = "0.11", features = ["__tls", "default-tls", "hyper-tls", "json", "native-tls-crate", "serde_json", "tokio-native-tls"] } -ring = { version = "0.16", features = ["alloc", "dev_urandom_fallback", "once_cell", "std"] } -scopeguard = { version = "1", features = ["use_std"] } -serde = { version = "1", features = ["alloc", "derive", "rc", "serde_derive", "std"] } -smallvec = { version = "1", default-features = false, features = ["serde", "union", "write"] } +parking_lot_core = { version = "0.9", default-features = false, features = ["deadlock_detection"] } +petgraph = { version = "0.6" } +phf = { version = "0.11", features = ["uncased"] } +phf_shared = { version = "0.11", features = ["uncased"] } +proc-macro2 = { version = "1", features = ["span-locations"] } +prometheus = { version = "0.13", features = ["process"] } +prost = { version = "0.11", features = ["no-recursion-limit"] } +prost-types = { version = "0.11" } +rand = { version = "0.8", features = ["small_rng"] } +rand_core = { version = "0.6", default-features = false, features = ["std"] } +regex = { version = "1" } +regex-syntax = { version = "0.6" } +reqwest = { version = "0.11", features = ["json"] } +ring = { version = "0.16", features = ["std"] } +scopeguard = { version = "1" } +serde = { version = "1", features = ["alloc", "derive", "rc"] } +smallvec = { version = "1", default-features = false, features = ["serde"] } socket2 = { version = "0.4", default-features = false, features = ["all"] } -stable_deref_trait = { version = "1", features = ["alloc", "std"] } -strum = { version = "0.24", features = ["derive", "std", "strum_macros"] } -syn = { version = "1", features = ["clone-impls", "derive", "extra-traits", "full", "parsing", "printing", "proc-macro", "quote", "visit", "visit-mut"] } -time = { version = "0.3", features = ["alloc", "formatting", "itoa", "local-offset", "macros", "parsing", "std", "time-macros"] } -tokio = { version = "1", features = ["bytes", "fs", "io-std", "io-util", "libc", "macros", "memchr", "mio", "net", "num_cpus", "parking_lot", "process", "rt", "rt-multi-thread", "signal", "signal-hook-registry", "socket2", "stats", "sync", "time", "tokio-macros", "tracing"] } -tokio-stream = { version = "0.1", features = ["net", "time"] } -tokio-util = { version = "0.7", features = ["codec", "io", "tracing"] } -tonic = { version = "0.8", features = ["async-trait", "axum", "channel", "codegen", "flate2", "gzip", "h2", "hyper", "hyper-timeout", "prost", "prost-derive", "prost1", "rustls-pemfile", "tls", "tls-roots-common", "tls-webpki-roots", "tokio", "tokio-rustls", "tower", "tracing-futures", "transport", "webpki-roots"] } -tonic-build = { version = "0.8", features = ["prost", "prost-build", "transport"] } -tower = { version = "0.4", features = ["__common", "balance", "buffer", "discover", "filter", "futures-core", "futures-util", "indexmap", "limit", "load", "load-shed", "log", "make", "pin-project", "pin-project-lite", "rand", "ready-cache", "retry", "slab", "timeout", "tokio", "tokio-util", "tracing", "util"] } -tower-http = { version = "0.3", features = ["add-extension", "cors", "map-response-body", "tower", "util"] } -tracing = { version = "0.1", features = ["attributes", "log", "release_max_level_trace", "std", "tracing-attributes"] } -tracing-core = { version = "0.1", features = ["once_cell", "std"] } -tracing-subscriber = { version = "0.3", features = ["alloc", "ansi", "env-filter", "fmt", "local-time", "matchers", "nu-ansi-term", "once_cell", "parking_lot", "regex", "registry", "sharded-slab", "smallvec", "std", "thread_local", "time", "tracing", "tracing-log"] } +stable_deref_trait = { version = "1" } +strum = { version = "0.24", features = ["derive"] } +syn = { version = "1", features = ["extra-traits", "full", "visit", "visit-mut"] } +time = { version = "0.3", features = ["formatting", "local-offset", "macros", "parsing"] } +tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "parking_lot", "process", "rt-multi-thread", "signal", "stats", "sync", "time", "tracing"] } +tokio-stream = { version = "0.1", features = ["net"] } +tokio-util = { version = "0.7", features = ["codec", "io"] } +tonic = { version = "0.8", features = ["gzip", "tls-webpki-roots"] } +tonic-build = { version = "0.8" } +tower = { version = "0.4", features = ["balance", "buffer", "filter", "limit", "load-shed", "retry", "timeout", "util"] } +tower-http = { version = "0.3", features = ["add-extension", "cors", "map-response-body", "util"] } +tracing = { version = "0.1", features = ["log", "release_max_level_trace"] } +tracing-core = { version = "0.1" } +tracing-futures = { version = "0.2" } +tracing-subscriber = { version = "0.3", features = ["env-filter", "local-time", "parking_lot"] } +triomphe = { version = "0.1" } url = { version = "2", features = ["serde"] } -uuid = { version = "1", features = ["fast-rng", "getrandom", "rand", "rng", "serde", "std", "v4"] } +uuid = { version = "1", features = ["fast-rng", "serde", "v4"] } +zstd = { version = "0.11" } +zstd-safe = { version = "5", default-features = false, features = ["arrays", "legacy", "std", "zdict_builder"] } +zstd-sys = { version = "2", default-features = false, features = ["legacy", "std", "zdict_builder"] } ### END HAKARI SECTION diff --git a/src/workspace-hack/build.rs b/src/workspace-hack/build.rs index cbaeeee1e0cfd..829be1d30c217 100644 --- a/src/workspace-hack/build.rs +++ b/src/workspace-hack/build.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/workspace-hack/src/lib.rs b/src/workspace-hack/src/lib.rs index 821fb64886886..76ed3b308a2d0 100644 --- a/src/workspace-hack/src/lib.rs +++ b/src/workspace-hack/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Singularity Data +// Copyright 2023 RisingWave Labs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License.