Skip to content
2 changes: 2 additions & 0 deletions docs/misc/math-expr.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ The following built-in functions are available.
|parse_long|parse_long(string[, radix]) parses a string as a long with the given radix, or 10 (decimal) if a radix is not provided.|
|regexp_extract|regexp_extract(expr, pattern[, index]) applies a regular expression pattern and extracts a capture group index, or null if there is no match. If index is unspecified or zero, returns the substring that matched the pattern. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern.|
|regexp_like|regexp_like(expr, pattern) returns whether `expr` matches regular expression `pattern`. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern. |
|contains_string|contains_string(expr, string) returns whether `expr` contains `string` as a substring. This method is case-sensitive.|
|icontains_string|contains_string(expr, string) returns whether `expr` contains `string` as a substring. This method is case-insensitive.|
|replace|replace(expr, pattern, replacement) replaces pattern with replacement|
|substring|substring(expr, index, length) behaves like java.lang.String's substring|
|right|right(expr, length) returns the rightmost length characters from a string|
Expand Down
2 changes: 2 additions & 0 deletions docs/querying/sql.md
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,8 @@ String functions accept strings, and return a type appropriate to the function.
|`POSITION(needle IN haystack [FROM fromIndex])`|Returns the index of needle within haystack, with indexes starting from 1. The search will begin at fromIndex, or 1 if fromIndex is not specified. If the needle is not found, returns 0.|
|`REGEXP_EXTRACT(expr, pattern, [index])`|Apply regular expression `pattern` to `expr` and extract a capture group, or `NULL` if there is no match. If index is unspecified or zero, returns the first substring that matched the pattern. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern. Note: when `druid.generic.useDefaultValueForNull = true`, it is not possible to differentiate an empty-string match from a non-match (both will return `NULL`).|
|`REGEXP_LIKE(expr, pattern)`|Returns whether `expr` matches regular expression `pattern`. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern. Similar to [`LIKE`](#comparison-operators), but uses regexps instead of LIKE patterns. Especially useful in WHERE clauses.|
|`CONTAINS_STRING(<expr>, str)`|Returns true if the `str` is a substring of `expr`.|
|`ICONTAINS_STRING(<expr>, str)`|Returns true if the `str` is a substring of `expr`. The match is case-insensitive.|
|`REPLACE(expr, pattern, replacement)`|Replaces pattern with replacement in expr, and returns the result.|
|`STRPOS(haystack, needle)`|Returns the index of needle within haystack, with indexes starting from 1. If the needle is not found, returns 0.|
|`SUBSTRING(expr, index, [length])`|Returns a substring of expr starting at index, with a max length, both measured in UTF-16 code units.|
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.query.expression;

import org.apache.druid.java.util.common.IAE;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprMacroTable;

import java.util.List;

/**
* This class implements a function that checks if one string contains another string. It is required that second
* string be a literal. This expression is case-insensitive.
* signature:
* long contains_string(string, string)
* <p>
* Examples:
* - {@code contains_string("foobar", "bar") - 1 }
* - {@code contains_string("foobar", "car") - 0 }
* - {@code contains_string("foobar", "Bar") - 1 }
* <p>
* See {@link ContainsExprMacro} for the case-sensitive version.
*/

public class CaseInsensitiveContainsExprMacro implements ExprMacroTable.ExprMacro
{
public static final String FN_NAME = "icontains_string";

@Override
public String name()
{
return FN_NAME;
}

@Override
public Expr apply(final List<Expr> args)
{
if (args.size() != 2) {
throw new IAE("Function[%s] must have 2 arguments", name());
}

final Expr arg = args.get(0);
final Expr searchStr = args.get(1);
return new ContainsExpr(FN_NAME, arg, searchStr, false);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.query.expression;

import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.math.expr.ExprType;

import javax.annotation.Nonnull;
import java.util.function.Function;

/**
* {@link Expr} class returned by {@link ContainsExprMacro} and {@link CaseInsensitiveContainsExprMacro} for
* evaluating the expression.
*/
Comment on lines +33 to +36
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: It would be good to link to the DimFilter whose behavior we are trying to mimic here ContainsSearchQuerySpec

since we want the logic between these 2 classes to stay the same. I wonder if we can future proof this so they stay in sync if someone makes an update to ContainsSearchQuerySpec

class ContainsExpr extends ExprMacroTable.BaseScalarUnivariateMacroFunctionExpr
{
private final Function<String, Boolean> searchFunction;
private final Expr searchStrExpr;

ContainsExpr(String functioName, Expr arg, Expr searchStrExpr, boolean caseSensitive)
{
super(functioName, arg);
this.searchStrExpr = validateSearchExpr(searchStrExpr, functioName);
// Creates the function eagerly to avoid branching in eval.
this.searchFunction = createFunction(searchStrExpr, caseSensitive);
}

private ContainsExpr(String functioName, Expr arg, Expr searchStrExpr, Function<String, Boolean> searchFunction)
{
super(functioName, arg);
this.searchFunction = searchFunction;
this.searchStrExpr = validateSearchExpr(searchStrExpr, functioName);
}

@Nonnull
@Override
public ExprEval eval(final Expr.ObjectBinding bindings)
{
final String s = NullHandling.nullToEmptyIfNeeded(arg.eval(bindings).asString());

if (s == null) {
// same behavior as regexp_like.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit

Suggested change
// same behavior as regexp_like.
// same behavior as ContainsSearchQuerySpec#accept

Luckily the behavior is the same as regexp_like

    if (dimVal == null || value == null) {
      return false;
    }

return ExprEval.of(false, ExprType.LONG);
} else {
final boolean doesContain = searchFunction.apply(s);
return ExprEval.of(doesContain, ExprType.LONG);
}
}

@Override
public Expr visit(Expr.Shuttle shuttle)
{
Expr newArg = arg.visit(shuttle);
return shuttle.visit(new ContainsExpr(name, newArg, searchStrExpr, searchFunction));
}

@Override
public String stringify()
{
return StringUtils.format("%s(%s, %s)", name, arg.stringify(), searchStrExpr.stringify());
}

private Function<String, Boolean> createFunction(Expr searchStrExpr, boolean caseSensitive)
{
String searchStr = StringUtils.nullToEmptyNonDruidDataString((String) searchStrExpr.getLiteralValue());
if (caseSensitive) {
return s -> s.contains(searchStr);
}
return s -> org.apache.commons.lang.StringUtils.containsIgnoreCase(s, searchStr);
}

private Expr validateSearchExpr(Expr searchExpr, String functioName)
{
if (!ExprUtils.isStringLiteral(searchExpr)) {
throw new IAE("Function[%s] substring must be a string literal", functioName);
}
return searchExpr;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.query.expression;

import org.apache.druid.java.util.common.IAE;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprMacroTable;

import java.util.List;

/**
* This class implements a function that checks if one string contains another string. It is required that second
* string be a literal. This expression is case-sensitive.
* signature:
* long contains_string(string, string)
* <p>
* Examples:
* - {@code contains_string("foobar", "bar") - 1 }
* - {@code contains_string("foobar", "car") - 0 }
* - {@code contains_string("foobar", "Bar") - 0 }
* <p>
* See {@link CaseInsensitiveContainsExprMacro} for the case-insensitive version.
*/
public class ContainsExprMacro implements ExprMacroTable.ExprMacro
{
public static final String FN_NAME = "contains_string";

@Override
public String name()
{
return FN_NAME;
}

@Override
public Expr apply(final List<Expr> args)
{
if (args.size() != 2) {
throw new IAE("Function[%s] must have 2 arguments", name());
}

final Expr arg = args.get(0);
final Expr searchStr = args.get(1);
return new ContainsExpr(FN_NAME, arg, searchStr, true);
}
}
Loading