duckdb · Mytherin · Jul 2, 2025 · May 17, 2025 · May 17, 2025 · Jun 12, 2025
diff --git a/.github/workflows/LinuxRelease.yml b/.github/workflows/LinuxRelease.yml
@@ -121,6 +121,7 @@ jobs:
         AWS_ACCESS_KEY_ID: ${{ secrets.S3_DUCKDB_STAGING_ID }}
         AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_DUCKDB_STAGING_KEY }}
       run: |
+        python3 scripts/amalgamation.py
         zip -j duckdb_cli-linux-${{ matrix.config.arch }}.zip build/release/duckdb
         gzip -9 -k -n -c build/release/duckdb > duckdb_cli-linux-${{ matrix.config.arch }}.gz
         zip -j libduckdb-linux-${{ matrix.config.arch }}.zip build/release/src/libduckdb*.* src/amalgamation/duckdb.hpp src/include/duckdb.h
@@ -153,6 +154,27 @@ jobs:
         build/release/benchmark/benchmark_runner benchmark/micro/update/update_with_join.benchmark
         build/release/duckdb -c "COPY (SELECT 42) TO '/dev/stdout' (FORMAT PARQUET)" | cat
 
+ upload-libduckdb-src:
+    name: Upload libduckdb-src.zip
+    needs: linux-release-cli
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        fetch-depth: 0
+        ref: ${{ inputs.git_ref }}
+
+    - name: Deploy
+      shell: bash
+      env:
+        AWS_ACCESS_KEY_ID: ${{ secrets.S3_DUCKDB_STAGING_ID }}
+        AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_DUCKDB_STAGING_KEY }}
+      run: |
+        python3 scripts/amalgamation.py
+        zip -j libduckdb-src.zip src/amalgamation/duckdb.hpp src/amalgamation/duckdb.cpp src/include/duckdb.h src/include/duckdb_extension.h
+        ./scripts/upload-assets-to-staging.sh github_release libduckdb-src.zip
+
  linux-extensions-64:
     # Builds extensions for linux_amd64
     name: Linux Extensions (x64)

diff --git a/.github/workflows/Main.yml b/.github/workflows/Main.yml
@@ -45,26 +45,18 @@ jobs:
         run: echo "Event name is ${{ github.event_name }}"
 
  linux-debug:
-    name: Linux Debug (${{ matrix.tag }})
+    name: Linux DEBUG + sanitizers
+    # This tests release build while enabling slow verifiers (masked by #ifdef DEBUG) and sanitizers
     needs: check-draft
-    if: ${{ !startsWith(github.ref, 'refs/tags/v') }}
     runs-on: ubuntu-22.04
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - tag: 1
-            start_offset: ""
-            end_offset: "--end-offset 2000"
-          - tag: 2
-            start_offset: "--start-offset 2000"
-            end_offset: ""
     env:
       CC: gcc-10
       CXX: g++-10
       TREAT_WARNINGS_AS_ERRORS: 1
       GEN: ninja
       CRASH_ON_ASSERT: 1
+      CMAKE_CXX_FLAGS: '-DDEBUG'
+      FORCE_ASSERT: 1
 
     steps:
     - uses: actions/checkout@v4
@@ -84,11 +76,11 @@ jobs:
 
     - name: Build
       shell: bash
-      run:  make debug
+      run:  make release
 
     - name: Output version info
       shell: bash
-      run: ./build/debug/duckdb -c "PRAGMA version;"
+      run: ./build/release/duckdb -c "PRAGMA version;"
 
     - name: Set DUCKDB_INSTALL_LIB for ADBC tests
       shell: bash
@@ -100,7 +92,7 @@ jobs:
     - name: Test
       shell: bash
       run: |
-        python3 scripts/run_tests_one_by_one.py build/debug/test/unittest --tests-per-invocation 100 ${{ matrix.start_offset }} ${{ matrix.end_offset }}
+        python3 scripts/run_tests_one_by_one.py build/release/test/unittest --tests-per-invocation 100
 
  linux-release:
     name: Linux Release (full suite)

diff --git a/.github/workflows/OSX.yml b/.github/workflows/OSX.yml
@@ -50,6 +50,8 @@ jobs:
 
     env:
       TREAT_WARNINGS_AS_ERRORS: 1
+      CMAKE_CXX_FLAGS: '-DDEBUG'
+      FORCE_ASSERT: 1
 
     steps:
     - uses: actions/checkout@v4
@@ -73,7 +75,7 @@ jobs:
 
     - name: Build
       shell: bash
-      run: GEN=ninja make debug
+      run: GEN=ninja make release
 
     - name: Set DUCKDB_INSTALL_LIB for ADBC tests
       shell: bash
@@ -85,7 +87,7 @@ jobs:
     - name: Test
       if: ${{ inputs.skip_tests != 'true' }}
       shell: bash
-      run: make unittestci
+      run: make unittest_release
 
     - name: Amalgamation
       if: ${{ inputs.skip_tests != 'true' }}

diff --git a/.github/workflows/Python.yml b/.github/workflows/Python.yml
@@ -157,8 +157,6 @@ jobs:
           - ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' || inputs.run_all == 'true' }}
         exclude:
           # Speed things up a bit for non-releases
-          - isRelease: false
-            python_build: 'cp39-*'
           - isRelease: false
             python_build: 'cp310-*'
           - isRelease: false

diff --git a/extension/icu/icu-timezone.cpp b/extension/icu/icu-timezone.cpp
@@ -184,16 +184,20 @@ struct ICUFromNaiveTimestamp : public ICUDateFunc {
 			throw InternalException("Type %s not handled in BindCastFromNaive", LogicalTypeIdToString(source.id()));
 		}
 	}
+	static void AddCast(CastFunctionSet &casts, const LogicalType &source, const LogicalType &target) {
+		const auto implicit_cost = CastRules::ImplicitCast(source, target);
+		casts.RegisterCastFunction(source, target, BindCastFromNaive, implicit_cost);
+	}
 
 	static void AddCasts(ExtensionLoader &loader) {
-
-		const auto implicit_cost = CastRules::ImplicitCast(LogicalType::TIMESTAMP, LogicalType::TIMESTAMP_TZ);
-		loader.RegisterCastFunction(LogicalType::TIMESTAMP, LogicalType::TIMESTAMP_TZ, BindCastFromNaive,
-		                            implicit_cost);
-		loader.RegisterCastFunction(LogicalType::TIMESTAMP_MS, LogicalType::TIMESTAMP_TZ, BindCastFromNaive);
-		loader.RegisterCastFunction(LogicalType::TIMESTAMP_NS, LogicalType::TIMESTAMP_TZ, BindCastFromNaive);
-		loader.RegisterCastFunction(LogicalType::TIMESTAMP_S, LogicalType::TIMESTAMP_TZ, BindCastFromNaive);
-		loader.RegisterCastFunction(LogicalType::DATE, LogicalType::TIMESTAMP_TZ, BindCastFromNaive);
+		auto &config = DBConfig::GetConfig(loader.GetDatabaseInstance());
+		auto &casts = config.GetCastFunctions();
+
+		AddCast(casts, LogicalType::TIMESTAMP, LogicalType::TIMESTAMP_TZ);
+		AddCast(casts, LogicalType::TIMESTAMP_MS, LogicalType::TIMESTAMP_TZ);
+		AddCast(casts, LogicalType::TIMESTAMP_NS, LogicalType::TIMESTAMP_TZ);
+		AddCast(casts, LogicalType::TIMESTAMP_S, LogicalType::TIMESTAMP_TZ);
+		AddCast(casts, LogicalType::DATE, LogicalType::TIMESTAMP_TZ);
 	}
 };
 

diff --git a/extension/parquet/include/parquet_writer.hpp b/extension/parquet/include/parquet_writer.hpp
@@ -82,8 +82,9 @@ class ParquetWriter {
 	              vector<string> names, duckdb_parquet::CompressionCodec::type codec, ChildFieldIDs field_ids,
 	              const vector<pair<string, string>> &kv_metadata,
 	              shared_ptr<ParquetEncryptionConfig> encryption_config, idx_t dictionary_size_limit,
-	              idx_t string_dictionary_page_size_limit, double bloom_filter_false_positive_ratio,
-	              int64_t compression_level, bool debug_use_openssl, ParquetVersion parquet_version);
+	              idx_t string_dictionary_page_size_limit, bool enable_bloom_filters,
+	              double bloom_filter_false_positive_ratio, int64_t compression_level, bool debug_use_openssl,
+	              ParquetVersion parquet_version);
 	~ParquetWriter();
 
 public:
@@ -122,6 +123,9 @@ class ParquetWriter {
 	idx_t StringDictionaryPageSizeLimit() const {
 		return string_dictionary_page_size_limit;
 	}
+	double EnableBloomFilters() const {
+		return enable_bloom_filters;
+	}
 	double BloomFilterFalsePositiveRatio() const {
 		return bloom_filter_false_positive_ratio;
 	}
@@ -164,6 +168,7 @@ class ParquetWriter {
 	shared_ptr<ParquetEncryptionConfig> encryption_config;
 	idx_t dictionary_size_limit;
 	idx_t string_dictionary_page_size_limit;
+	bool enable_bloom_filters;
 	double bloom_filter_false_positive_ratio;
 	int64_t compression_level;
 	bool debug_use_openssl;

diff --git a/extension/parquet/include/writer/templated_column_writer.hpp b/extension/parquet/include/writer/templated_column_writer.hpp
@@ -284,15 +284,19 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
 		D_ASSERT(state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY);
 
-		state.bloom_filter =
-		    make_uniq<ParquetBloomFilter>(state.dictionary.GetSize(), writer.BloomFilterFalsePositiveRatio());
+		if (writer.EnableBloomFilters()) {
+			state.bloom_filter =
+			    make_uniq<ParquetBloomFilter>(state.dictionary.GetSize(), writer.BloomFilterFalsePositiveRatio());
+		}
 
 		state.dictionary.IterateValues([&](const SRC &src_value, const TGT &tgt_value) {
 			// update the statistics
 			OP::template HandleStats<SRC, TGT>(stats, tgt_value);
-			// update the bloom filter
-			auto hash = OP::template XXHash64<SRC, TGT>(tgt_value);
-			state.bloom_filter->FilterInsert(hash);
+			if (state.bloom_filter) {
+				// update the bloom filter
+				auto hash = OP::template XXHash64<SRC, TGT>(tgt_value);
+				state.bloom_filter->FilterInsert(hash);
+			}
 		});
 
 		// flush the dictionary page and add it to the to-be-written pages

diff --git a/extension/parquet/parquet_extension.cpp b/extension/parquet/parquet_extension.cpp
@@ -225,6 +225,7 @@ struct ParquetWriteBindData : public TableFunctionData {
 	//! This is huge but we grow it starting from 1 MB
 	idx_t string_dictionary_page_size_limit = PrimitiveColumnWriter::MAX_UNCOMPRESSED_DICT_PAGE_SIZE;
 
+	bool enable_bloom_filters = true;
 	//! What false positive rate are we willing to accept for bloom filters
 	double bloom_filter_false_positive_ratio = 0.01;
 
@@ -371,6 +372,8 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyFunctionBi
 				    PrimitiveColumnWriter::MAX_UNCOMPRESSED_DICT_PAGE_SIZE);
 			}
 			bind_data->string_dictionary_page_size_limit = val;
+		} else if (loption == "write_bloom_filter") {
+			bind_data->enable_bloom_filters = BooleanValue::Get(option.second[0].DefaultCastAs(LogicalType::BOOLEAN));
 		} else if (loption == "bloom_filter_false_positive_ratio") {
 			auto val = option.second[0].GetValue<double>();
 			if (val <= 0) {
@@ -434,8 +437,8 @@ unique_ptr<GlobalFunctionData> ParquetWriteInitializeGlobal(ClientContext &conte
 	    context, fs, file_path, parquet_bind.sql_types, parquet_bind.column_names, parquet_bind.codec,
 	    parquet_bind.field_ids.Copy(), parquet_bind.kv_metadata, parquet_bind.encryption_config,
 	    parquet_bind.dictionary_size_limit, parquet_bind.string_dictionary_page_size_limit,
-	    parquet_bind.bloom_filter_false_positive_ratio, parquet_bind.compression_level, parquet_bind.debug_use_openssl,
-	    parquet_bind.parquet_version);
+	    parquet_bind.enable_bloom_filters, parquet_bind.bloom_filter_false_positive_ratio,
+	    parquet_bind.compression_level, parquet_bind.debug_use_openssl, parquet_bind.parquet_version);
 	return std::move(global_state);
 }
 

diff --git a/extension/parquet/parquet_writer.cpp b/extension/parquet/parquet_writer.cpp
@@ -345,12 +345,14 @@ ParquetWriter::ParquetWriter(ClientContext &context, FileSystem &fs, string file
                              vector<string> names_p, CompressionCodec::type codec, ChildFieldIDs field_ids_p,
                              const vector<pair<string, string>> &kv_metadata,
                              shared_ptr<ParquetEncryptionConfig> encryption_config_p, idx_t dictionary_size_limit_p,
-                             idx_t string_dictionary_page_size_limit_p, double bloom_filter_false_positive_ratio_p,
-                             int64_t compression_level_p, bool debug_use_openssl_p, ParquetVersion parquet_version)
+                             idx_t string_dictionary_page_size_limit_p, bool enable_bloom_filters_p,
+                             double bloom_filter_false_positive_ratio_p, int64_t compression_level_p,
+                             bool debug_use_openssl_p, ParquetVersion parquet_version)
     : context(context), file_name(std::move(file_name_p)), sql_types(std::move(types_p)),
       column_names(std::move(names_p)), codec(codec), field_ids(std::move(field_ids_p)),
       encryption_config(std::move(encryption_config_p)), dictionary_size_limit(dictionary_size_limit_p),
       string_dictionary_page_size_limit(string_dictionary_page_size_limit_p),
+      enable_bloom_filters(enable_bloom_filters_p),
       bloom_filter_false_positive_ratio(bloom_filter_false_positive_ratio_p), compression_level(compression_level_p),
       debug_use_openssl(debug_use_openssl_p), parquet_version(parquet_version), total_written(0), num_row_groups(0) {
 

diff --git a/src/common/error_data.cpp b/src/common/error_data.cpp
@@ -70,7 +70,7 @@ string ErrorData::ConstructFinalMessage() const {
 		auto entry = extra_info.find("stack_trace_pointers");
 		if (entry != extra_info.end()) {
 			auto stack_trace = StackTrace::ResolveStacktraceSymbols(entry->second);
-			error += "\n\n" + stack_trace;
+			error += "\n\nStack Trace:\n" + stack_trace;
 		}
 	}
 	return error;

diff --git a/src/common/operator/string_cast.cpp b/src/common/operator/string_cast.cpp
@@ -159,6 +159,9 @@ duckdb::string_t StringFromTimestamp(timestamp_t input, Vector &vector) {
 	idx_t nano_length = 0;
 	if (picos) {
 		//	If there are ps, we need all the µs
+		if (!time[3]) {
+			TimeToStringCast::FormatMicros(time[3], micro_buffer);
+		}
 		time_length = 15;
 		nano_length = 6;
 		nano_length -= NumericCast<idx_t>(TimeToStringCast::FormatMicros(picos, nano_buffer));

diff --git a/src/execution/operator/join/physical_iejoin.cpp b/src/execution/operator/join/physical_iejoin.cpp
@@ -167,6 +167,10 @@ SinkResultType PhysicalIEJoin::Sink(ExecutionContext &context, DataChunk &chunk,
 	auto &gstate = input.global_state.Cast<IEJoinGlobalState>();
 	auto &lstate = input.local_state.Cast<IEJoinLocalState>();
 
+	if (gstate.child == 0 && gstate.tables[1]->global_sort_state.sorted_blocks.empty() && EmptyResultIfRHSIsEmpty()) {
+		return SinkResultType::FINISHED;
+	}
+
 	gstate.Sink(chunk, lstate);
 
 	if (filter_pushdown && !gstate.skip_filter_pushdown) {
@@ -208,15 +212,19 @@ SinkFinalizeType PhysicalIEJoin::Finalize(Pipeline &pipeline, Event &event, Clie
 		// for FULL/LEFT/RIGHT OUTER JOIN, initialize found_match to false for every tuple
 		table.IntializeMatches();
 	}
+
+	SinkFinalizeType res;
 	if (gstate.child == 1 && global_sort_state.sorted_blocks.empty() && EmptyResultIfRHSIsEmpty()) {
 		// Empty input!
-		return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
+		res = SinkFinalizeType::NO_OUTPUT_POSSIBLE;
+	} else {
+		res = SinkFinalizeType::READY;
 	}
 
 	// Move to the next input child
 	gstate.Finalize(pipeline, event);
 
-	return SinkFinalizeType::READY;
+	return res;
 }
 
 //===--------------------------------------------------------------------===//