Skip to content
4 changes: 4 additions & 0 deletions django/db/backends/base/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,10 @@ class BaseDatabaseFeatures:
json_key_contains_list_matching_requires_list = False
# Does the backend support JSONObject() database function?
has_json_object_function = True
# Does the backend support partial updates to JSONField?
supports_partial_json_update = True
# Does JSONSet only append to an array if the index equals to the array length?
json_set_array_append_requires_length_as_index = False

# Does the backend support column collations?
supports_collation_on_charfield = True
Expand Down
4 changes: 4 additions & 0 deletions django/db/backends/oracle/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,7 @@ def supports_aggregation_over_interval_types(self):
@cached_property
def bare_select_suffix(self):
return "" if self.connection.oracle_version >= (23,) else " FROM DUAL"

@cached_property
def supports_partial_json_update(self):
return self.connection.oracle_version >= (21,)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems correct according to https://apex.oracle.com/database-features/

However, the function seems to be documented for 19c as well: https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/json_transform.html

According to https://stackoverflow.com/a/75286939 it was backported to 19c in 19.10. Could we update this and see if it works? According to https://code.djangoproject.com/wiki/CI we use v19.3.0.0.0 on the CI though, so that may need to be upgraded, or we can test ourselves.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We agreed on following the https://apex.oracle.com/database-features/ website.

1 change: 1 addition & 0 deletions django/db/backends/sqlite3/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class DatabaseFeatures(BaseDatabaseFeatures):
insert_test_table_with_defaults = 'INSERT INTO {} ("null") VALUES (1)'
supports_default_keyword_in_insert = False
supports_unlimited_charfield = True
json_set_array_append_requires_length_as_index = True

@cached_property
def django_test_skips(self):
Expand Down
21 changes: 19 additions & 2 deletions django/db/models/fields/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from django.db import NotSupportedError, connections, router
from django.db.models import expressions, lookups
from django.db.models.constants import LOOKUP_SEP
from django.db.models.fields import TextField
from django.db.models.fields import NOT_PROVIDED, TextField
from django.db.models.lookups import (
FieldGetDbPrepValueMixin,
PostgresOperatorLookup,
Expand Down Expand Up @@ -340,12 +340,16 @@ def __init__(self, key_name, *args, **kwargs):
super().__init__(*args, **kwargs)
self.key_name = str(key_name)

def preprocess_lhs(self, compiler, connection):
def unwrap_transforms(self):
key_transforms = [self.key_name]
previous = self.lhs
while isinstance(previous, KeyTransform):
key_transforms.insert(0, previous.key_name)
previous = previous.lhs
return previous, key_transforms

def preprocess_lhs(self, compiler, connection):
previous, key_transforms = self.unwrap_transforms()
lhs, params = compiler.compile(previous)
if connection.vendor == "oracle":
# Escape string-formatting.
Expand Down Expand Up @@ -390,6 +394,19 @@ def as_sqlite(self, compiler, connection):
"THEN JSON_TYPE(%s, %%s) ELSE JSON_EXTRACT(%s, %%s) END)"
) % (lhs, datatype_values, lhs, lhs), (tuple(params) + (json_path,)) * 3

def get_update_expression(self, value, lhs=None):
from ..functions import JSONRemove, JSONSet

field, key_transforms = self.unwrap_transforms()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a bit of round trip around the transforms, so let me explain the flow.

  • We do MyModel.objects.update(json_field__key1="foo", json_field__key2__key2a=123)
  • QuerySet.update() will construct an UpdateQuery which calls add_update_values() to prepare the expressions for each field that we are updating.
  • In add_update_values(), we'll split the kwargs using LOOKUP_SEP.
    • Then, we ensure each name in the resulting split are valid transforms by doing a get_transform() on the field with each string to get the transform class. If it is found, we'll construct the transform. In JSONField's case, it will always give you KeyTransform (because keys can be of any string).
    • If the transform is chained, we keep on building the transforms by composing them, e.g. KeyTransform(KeyTransform(...)). For other fields, this may not be of the same class, e.g. TransformA(TransformB(...)). There's no concrete example for this right now, though.
    • After constructing the final Transform instance (the last one from the result of .split(LOOKUP_SEP)), we call its get_update_expression() method.
    • In KeyTransform, we use the unwrap_transforms so that we can "unwrap" the transform composition e.g. KeyTransform("key2a", KeyTransform("key2", "json_field")) back into field, ["key2", "key2a"].
    • Then, we join the keys with LOOKUP_SEP again, so we can pass it to JSONSet()/JSONRemove() as JSONSet(field, key2__key2a=value).
    • Then JSONSet()/JSONRemove() is going to – yet again –, split the kwargs with LOOKUP_SEP, because it needs to build the final JSON path string e.g. $.key2.key2a.
    • We can probably optimize the last bits by introducing a classmethod on JSONSet/JSONRemove that accepts a pair of a KeyTransform instance and the value...

Alternatively, we could also make it so that the field itself, rather than the transform, defines the expression. This lets us skip the round trip of splitting/joining on LOOKUP_SEP entirely.

For example, if this get_update_expression is implemented in JSONField (instead of KeyTransform), we could just pass the kwargs from .update() directly to the JSONSet() and JSONRemove() functions. This skips KeyTransform entirely.

This approach will result in a much more straightforward code flow. However, it would mean that any use of LOOKUP_SEP in update() is handled entirely by the field itself, rather than going through the transforms mechanism like in .filter().

I think, to make this extensible and more consistent with .filter(), we should do the transforms approach, even if it means doing a bit of round trips in the case of KeyTransform.


if lhs is None:
lhs = field

if value is NOT_PROVIDED:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the idea of using a sentinel to differentiate between null and key deletions (it's something that I believe is missing for JSON null vs SQL NULL) bet I'm not sure we should piggy back on that one since it was meant for a completely different purpose.

Copy link
Contributor

@laymonage laymonage Oct 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree @charettes! This was a temporary solution as suggested by Markus the other day, and I linked to your exact comment: https://fosstodon.org/@laymonage/112949402890177435 (Like I said, big fan 😂)

Happy to introduce a new sentinel for this, although if we want to also support that sentinel in JSONField at the same time, that sounds like a separate work in itself.

I think the idea of making for_save a (enum.Enum, str) to differentiate between different scenario could could as most usage out there are doing if for_save and most likely don't need to differentite between both.

That's what I thought too – though I suppose it could still break if people check for the boolean explicitly e.g. if for_save is True. Still doable, just need to be more careful when we do the transition.

My immediate concerns with these changes are that they are possibly trying to do too much at once?

Is there a reason why we can't first land support for JSONSet / JSONRemove in a first set of changes targeted for 5.2, let them simmer for a release, and then consider adding support for the transform combining feature later on?

No particular reason, really. When we implemented the first pass for the JSONSet and JSONRemove functions, we were curious how much work it would take to implement the transforms support in update() directly. Turns out it wasn't much, but indeed it's a bit more "controversial".

Sarah and I discussed this at the DjangoCon US sprints and we decided to split this PR into two. One for the DB functions, and one for the nicer syntactic sugar via update() directly – just like you said. I noted this in my comment #18489 (review) just yesterday.

@adzshaf and I are still working on polishing the DB functions to be ready for a new, smaller PR. We found a few more edge cases so it's taking us a bit longer than we thought. We'll post an update on the ticket when we put the PR up 😄

I like the ideas put forward by this patch and the separation of concern between field and transform logic but these are more fundamental changes to the ORM works and bring larger questions. For example, should we support save(update_fields) with transforms as well? Is it time to introduce a specialized type of transform that denotes nested data structure access instead of trying to push the current abstraction in this space?

IMO these questions can likely be more easily answered while users are allowed to play with JSONSet and JSONRemove and these primitives are iterated on to iron out bugs first.

Thank you, and yes there are definitely more areas of the ORM that we haven't considered like bulk_{create,update}, update_or_create, etc. and save(update_fields) is one of them. I'd be happy to focus on getting JSONSet and JSONRemove in first, then discuss the more uncertain bits after.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the update, it's great to see you push all of these JSONField improvement forward. It's has been a game changer to be able to use JSONField for non-Postgres database and a lot of that is thanks to you 🙇

I think that the two-commit approach has a lot of merit considering how hard it is to get these things right the first time. Let me know when things have shappen if you'd like more feedback. As I said already I think the proposed approach is great in the sense that it demonstrates that it can be done without too many invasive changes.

return JSONRemove(lhs, LOOKUP_SEP.join(key_transforms))

return JSONSet(lhs, **{LOOKUP_SEP.join(key_transforms): value})


class KeyTextTransform(KeyTransform):
postgres_operator = "->>"
Expand Down
4 changes: 4 additions & 0 deletions django/db/models/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
TruncWeek,
TruncYear,
)
from .json import JSONRemove, JSONSet
from .math import (
Abs,
ACos,
Expand Down Expand Up @@ -187,4 +188,7 @@
"PercentRank",
"Rank",
"RowNumber",
# JSON-related functions
"JSONRemove",
"JSONSet",
]
6 changes: 6 additions & 0 deletions django/db/models/functions/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def as_sql(self, compiler, connection, **extra_context):

def as_sqlite(self, compiler, connection, **extra_context):
db_type = self.output_field.db_type(connection)
output_type = self.output_field.get_internal_type()
if db_type in {"datetime", "time"}:
# Use strftime as datetime/time don't keep fractional seconds.
template = "strftime(%%s, %(expressions)s)"
Expand All @@ -36,6 +37,11 @@ def as_sqlite(self, compiler, connection, **extra_context):
return super().as_sql(
compiler, connection, template=template, **extra_context
)
elif output_type == "JSONField":
template = "JSON(%(expressions)s)"
return super().as_sql(
compiler, connection, template=template, **extra_context
)
Comment on lines +40 to +44
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we extract the changes in Cast(), along with the corresponding test, into a separate commit please?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done ✔️

Comment on lines +40 to +44
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reason why we need this:

https://www.sqlite.org/json1.html#jset

If the value of a path/value pair is an SQLite TEXT value, then it is normally inserted as a quoted JSON string, even if the string looks like valid JSON. However, if the value is the result of another json function (such as json() or json_array() or json_object()) or if it is the result of the -> operator, then it is interpreted as JSON and is inserted as JSON retaining all of its substructure. Values that are the result of the ->> operator are always interpreted as TEXT and are inserted as a JSON string even if they look like valid JSON.

It's very similar to the other workarounds in as_mysql for MariaDB and in as_oracle. Instead of putting the workarounds directly in Cast.as_vendor(), do we want to introduce a new database operation specifically for casting JSONField instead? We have something similar for the datetime transforms:

def datetime_cast_date_sql(self, sql, params, tzname):
"""
Return the SQL to cast a datetime value to date value.
"""
raise NotImplementedError(
"subclasses of BaseDatabaseOperations may require a "
"datetime_cast_date_sql() method."
)
def datetime_cast_time_sql(self, sql, params, tzname):
"""
Return the SQL to cast a datetime value to time value.
"""
raise NotImplementedError(
"subclasses of BaseDatabaseOperations may require a "
"datetime_cast_time_sql() method"
)

return self.as_sql(compiler, connection, **extra_context)

def as_mysql(self, compiler, connection, **extra_context):
Expand Down
244 changes: 244 additions & 0 deletions django/db/models/functions/json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
from django.db import NotSupportedError
from django.db.models.constants import LOOKUP_SEP
from django.db.models.expressions import Func, Value
from django.db.models.fields.json import compile_json_path
from django.db.models.functions import Cast


class JSONSet(Func):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's create a separate commit for JSONSet+tests, and another commit for JSONRemove+tests. Then, create a separate commit for adding KeyTransform.get_update_expression() and the corresponding tests.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done ✔️

def __init__(self, expression, output_field=None, **fields):
if not fields:
raise TypeError("JSONSet requires at least one key-value pair to be set.")
self.fields = fields
super().__init__(expression, output_field=output_field)

def resolve_expression(
self, query=None, allow_joins=True, reuse=None, summarize=False, for_save=False
):
c = super().resolve_expression(query, allow_joins, reuse, summarize, for_save)
# Resolve expressions in the JSON update values.
c.fields = {
key: (
value.resolve_expression(query, allow_joins, reuse, summarize, for_save)
if hasattr(value, "resolve_expression")
else value
)
for key, value in self.fields.items()
}
return c

def as_sql(
self,
compiler,
connection,
function=None,
template=None,
arg_joiner=None,
**extra_context,
):
if not connection.features.supports_partial_json_update:
raise NotSupportedError(
"JSONSet() is not supported on this database backend."
)
copy = self.copy()
new_source_expressions = copy.get_source_expressions()

for key, value in self.fields.items():
key_paths = key.split(LOOKUP_SEP)
key_paths_join = compile_json_path(key_paths)

if not hasattr(value, "resolve_expression"):
# Use Value to serialize the data to string,
# then use Cast to ensure the string is treated as JSON.
value = Cast(
Value(value, output_field=self.output_field),
output_field=self.output_field,
)
Comment on lines +51 to +56
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Value() with JSONField() would allow us to reuse its get_db_prep_value() which does the serialization of Python objects based on the DB vendor. Then, we wrap it in Cast() because this value will be used as a parameter to the DB's JSON_SET function, and some DBs (e.g. SQLite) will treat the value as a plain SQL string (even if it's valid JSON) if we don't use Cast.

This is unlike saving values directly to the column, in which case the DBs will happily accept a JSON-formatted string and do any conversion to the actual JSON data type (if they have one).


new_source_expressions.extend((Value(key_paths_join), value))

copy.set_source_expressions(new_source_expressions)

return super(JSONSet, copy).as_sql(
compiler,
connection,
function="JSON_SET",
**extra_context,
)

def as_postgresql(self, compiler, connection, **extra_context):
copy = self.copy()

all_items = list(self.fields.items())
key, value = all_items[0]
rest = all_items[1:]

# JSONB_SET does not support arbitrary number of arguments,
# so convert multiple updates into recursive calls.
if rest:
copy.fields = {key: value}
return JSONSet(copy, **dict(rest)).as_postgresql(
compiler, connection, **extra_context
)

new_source_expressions = copy.get_source_expressions()
key_paths = key.split(LOOKUP_SEP)
key_paths_join = ",".join(key_paths)

new_source_expressions.append(Value(f"{{{key_paths_join}}}"))

if not hasattr(value, "resolve_expression"):
new_source_expressions.append(Value(value, output_field=self.output_field))
Comment on lines +90 to +91
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need Cast() because Value() with a JSONField is correctly handled as JSONB by Postgres.

else:

class ToJSONB(Func):
function = "TO_JSONB"

new_source_expressions.append(
ToJSONB(value, output_field=self.output_field),
)
Comment on lines +94 to +99
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Casting to JSONB with ::jsonb on Postgres only works for strings (text) that are JSON-formatted. If you have an expression that resolves to a non-string SQL value, e.g. when you refer to an integer column with F(), using Cast() doesn't work. Postgres has a dedicated to_jsonb function for this: https://www.postgresql.org/docs/current/functions-json.html#FUNCTIONS-JSON-CREATION-TABLE


copy.set_source_expressions(new_source_expressions)
return super(JSONSet, copy).as_sql(
compiler, connection, function="JSONB_SET", **extra_context
)

def as_oracle(self, compiler, connection, **extra_context):
if not connection.features.supports_partial_json_update:
raise NotSupportedError(
"JSONSet() is not supported on this database backend."
)
copy = self.copy()

all_items = list(self.fields.items())
key, value = all_items[0]
rest = all_items[1:]

# JSON_TRANSFORM does not support arbitrary number of arguments,
# so convert multiple updates into recursive calls.
if rest:
copy.fields = {key: value}
return JSONSet(copy, **dict(rest)).as_oracle(
compiler, connection, **extra_context
)

new_source_expressions = copy.get_source_expressions()
key_paths = key.split(LOOKUP_SEP)
key_paths_join = compile_json_path(key_paths)
if not hasattr(value, "resolve_expression"):
new_source_expressions.append(Value(value, output_field=self.output_field))
else:
new_source_expressions.append(value)
copy.set_source_expressions(new_source_expressions)

class ArgJoiner:
def join(self, args):
if not hasattr(value, "resolve_expression"):
# Interpolate the JSON path directly to the query string, because
# Oracle does not support passing the JSON path using parameter
# binding.
return f"{args[0]}, SET '{key_paths_join}' = {args[-1]} FORMAT JSON"
return f"{args[0]}, SET '{key_paths_join}' = {args[-1]}"
Comment on lines +136 to +141
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interpolating the JSON path directly is nothing new:

# Add paths directly into SQL because path expressions cannot be passed
# as bind variables on Oracle.
return sql % tuple(params), []

Though I think we should move the comment to be outside the if, because this applies to both cases.

Suggested change
if not hasattr(value, "resolve_expression"):
# Interpolate the JSON path directly to the query string, because
# Oracle does not support passing the JSON path using parameter
# binding.
return f"{args[0]}, SET '{key_paths_join}' = {args[-1]} FORMAT JSON"
return f"{args[0]}, SET '{key_paths_join}' = {args[-1]}"
# Interpolate the JSON path directly to the query string, because
# Oracle does not support passing the JSON path using parameter
# binding.
if not hasattr(value, "resolve_expression"):
return f"{args[0]}, SET '{key_paths_join}' = {args[-1]} FORMAT JSON"
return f"{args[0]}, SET '{key_paths_join}' = {args[-1]}"

Using FORMAT JSON is similar to using Cast() in other DBs but this is specific to Oracle's JSON_TRANSFORM.

https://docs.oracle.com/en/database/oracle/oracle-database/21/adjsn/oracle-sql-function-json_transform.html

If the result expression evaluates to a SQL value that is not JSON type, you can convert it to JSON data by following the expression immediately with keywords FORMAT JSON. This is particularly useful to convert the SQL string 'true' or 'false' to the corresponding JSON-language value true or false. Example 11-7 illustrates this.


return super(JSONSet, copy).as_sql(
compiler,
connection,
function="JSON_TRANSFORM",
arg_joiner=ArgJoiner(),
**extra_context,
)


class JSONRemove(Func):
def __init__(self, expression, *paths):
if not paths:
raise TypeError("JSONRemove requires at least one path to remove")
self.paths = paths
super().__init__(expression)

def as_sql(
self,
compiler,
connection,
function=None,
template=None,
arg_joiner=None,
**extra_context,
):
if not connection.features.supports_partial_json_update:
raise NotSupportedError(
"JSONRemove() is not supported on this database backend."
)

copy = self.copy()
new_source_expressions = copy.get_source_expressions()

for path in self.paths:
key_paths = path.split(LOOKUP_SEP)
key_paths_join = compile_json_path(key_paths)
new_source_expressions.append(Value(key_paths_join))

copy.set_source_expressions(new_source_expressions)

return super(JSONRemove, copy).as_sql(
compiler,
connection,
function="JSON_REMOVE",
**extra_context,
)

def as_postgresql(self, compiler, connection, **extra_context):
copy = self.copy()
path, *rest = self.paths

if rest:
copy.paths = (path,)
return JSONRemove(copy, *rest).as_postgresql(
compiler, connection, **extra_context
)

new_source_expressions = copy.get_source_expressions()
key_paths = path.split(LOOKUP_SEP)
key_paths_join = ",".join(key_paths)
new_source_expressions.append(Value(f"{{{key_paths_join}}}"))
copy.set_source_expressions(new_source_expressions)

return super(JSONRemove, copy).as_sql(
compiler,
connection,
template="%(expressions)s",
arg_joiner="#- ",
**extra_context,
)

def as_oracle(self, compiler, connection, **extra_context):
if not connection.features.supports_partial_json_update:
raise NotSupportedError(
"JSONRemove() is not supported on this database backend."
)

copy = self.copy()

all_items = self.paths
path, *rest = all_items

if rest:
copy.paths = (path,)
return JSONRemove(copy, *rest).as_oracle(
compiler, connection, **extra_context
)

key_paths = path.split(LOOKUP_SEP)
key_paths_join = compile_json_path(key_paths)

class ArgJoiner:
def join(self, args):
return f"{args[0]}, REMOVE '{key_paths_join}'"

return super(JSONRemove, copy).as_sql(
compiler,
connection,
function="JSON_TRANSFORM",
arg_joiner=ArgJoiner(),
**extra_context,
)
11 changes: 11 additions & 0 deletions django/db/models/lookups.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,17 @@ def get_bilateral_transforms(self):
bilateral_transforms.append(self.__class__)
return bilateral_transforms

def get_update_expression(self, value, lhs=None):
"""
Compose an update expression for this transform with a given value,
to be used in QuerySet.update(). Multiple transforms may be used for the same
field in a single update() call, in which case, the previous expression is
provided by the lhs parameter.
"""
raise NotImplementedError(
f"Using {self.__class__.__name__} is not supported in QuerySet.update()."
)


class BuiltinLookup(Lookup):
def process_lhs(self, compiler, connection, lhs=None):
Expand Down
Loading