mirror of
https://github.com/apache/impala.git
synced 2025-12-31 15:00:10 -05:00
IMPALA-939: Regex should match anywhere in string.
Change-Id: I8dcd337c3b06b632017270670a4f199ec7ada648 Reviewed-on: http://gerrit.ent.cloudera.com:8080/2296 Reviewed-by: Victor Bittorf <victor.bittorf@cloudera.com> Tested-by: jenkins (cherry picked from commit c97f82eaaf0efe9bd4c3da3d005464f425696a62) Reviewed-on: http://gerrit.ent.cloudera.com:8080/2371
This commit is contained in:
@@ -1010,6 +1010,10 @@ TEST_F(ExprTest, LikePredicate) {
|
||||
TestValue("'abxcy1234a' RLIKE 'a.x.y.*a'", TYPE_BOOLEAN, true);
|
||||
TestValue("'axcy1234a' REGEXP 'a.x.y.*a'", TYPE_BOOLEAN, false);
|
||||
TestValue("'axcy1234a' RLIKE 'a.x.y.*a'", TYPE_BOOLEAN, false);
|
||||
TestValue("'english' REGEXP 'en'", TYPE_BOOLEAN, true);
|
||||
TestValue("'english' REGEXP 'lis'", TYPE_BOOLEAN, true);
|
||||
TestValue("'english' REGEXP 'english'", TYPE_BOOLEAN, true);
|
||||
TestValue("'english' REGEXP 'engilsh'", TYPE_BOOLEAN, false);
|
||||
// regex escape chars; insert special character in the middle to prevent
|
||||
// it from being matched as a substring
|
||||
TestValue("'.[]{}()x\\\\*+?|^$' LIKE '.[]{}()_\\\\\\\\*+?|^$'", TYPE_BOOLEAN, true);
|
||||
|
||||
@@ -85,6 +85,17 @@ void* LikePredicate::ConstantEqualsFn(Expr* e, TupleRow* row) {
|
||||
return &p->result_.bool_val;
|
||||
}
|
||||
|
||||
void* LikePredicate::ConstantRegexFnPartial(Expr* e, TupleRow* row) {
|
||||
LikePredicate* p = static_cast<LikePredicate*>(e);
|
||||
DCHECK_EQ(p->GetNumChildren(), 2);
|
||||
StringValue* operand_val = static_cast<StringValue*>(e->GetChild(0)->GetValue(row));
|
||||
if (operand_val == NULL) return NULL;
|
||||
|
||||
re2::StringPiece operand_sp(operand_val->ptr, operand_val->len);
|
||||
p->result_.bool_val = RE2::PartialMatch(operand_sp, *p->regex_);
|
||||
return &p->result_.bool_val;
|
||||
}
|
||||
|
||||
void* LikePredicate::ConstantRegexFn(Expr* e, TupleRow* row) {
|
||||
LikePredicate* p = static_cast<LikePredicate*>(e);
|
||||
DCHECK_EQ(p->GetNumChildren(), 2);
|
||||
@@ -110,8 +121,13 @@ void* LikePredicate::RegexMatch(Expr* e, TupleRow* row, bool is_like_pattern) {
|
||||
}
|
||||
re2::RE2 re(re_pattern);
|
||||
if (re.ok()) {
|
||||
p->result_.bool_val =
|
||||
RE2::FullMatch(re2::StringPiece(operand_value->ptr, operand_value->len), re);
|
||||
if (is_like_pattern) {
|
||||
p->result_.bool_val =
|
||||
RE2::FullMatch(re2::StringPiece(operand_value->ptr, operand_value->len), re);
|
||||
} else {
|
||||
p->result_.bool_val =
|
||||
RE2::PartialMatch(re2::StringPiece(operand_value->ptr, operand_value->len), re);
|
||||
}
|
||||
return &p->result_.bool_val;
|
||||
} else {
|
||||
// TODO: log error in runtime state
|
||||
@@ -128,6 +144,9 @@ void* LikePredicate::RegexFn(Expr* e, TupleRow* row) {
|
||||
return RegexMatch(e, row, false);
|
||||
}
|
||||
|
||||
// There is a difference in the semantics of LIKE and REGEXP
|
||||
// LIKE only requires explicit use of '%' to preform partial matches
|
||||
// REGEXP does partial matching by default
|
||||
Status LikePredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
|
||||
RETURN_IF_ERROR(Expr::PrepareChildren(state, row_desc));
|
||||
DCHECK_EQ(children_.size(), 2);
|
||||
@@ -192,7 +211,12 @@ Status LikePredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc
|
||||
}
|
||||
regex_.reset(new RE2(re_pattern));
|
||||
if (!regex_->ok()) return Status("Invalid regular expression: " + pattern_str);
|
||||
compute_fn_ = ConstantRegexFn;
|
||||
if (fn_.name.function_name == "regexp" || fn_.name.function_name == "rlike") {
|
||||
compute_fn_ = ConstantRegexFnPartial;
|
||||
} else {
|
||||
compute_fn_ = ConstantRegexFn;
|
||||
}
|
||||
|
||||
}
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
#define IMPALA_EXPRS_LIKE_PREDICATE_H_
|
||||
|
||||
#include <string>
|
||||
#include <boost/scoped_ptr.hpp>
|
||||
#include <boost/scoped_ptr.hpp>
|
||||
|
||||
#include "exprs/predicate.h"
|
||||
#include "gen-cpp/Exprs_types.h"
|
||||
@@ -56,7 +56,7 @@ class LikePredicate: public Predicate {
|
||||
|
||||
// Handling of like predicates that can be implemented using strncmp
|
||||
static void* ConstantStartsWithFn(Expr* e, TupleRow* row);
|
||||
|
||||
|
||||
// Handling of like predicates that can be implemented using strncmp
|
||||
static void* ConstantEndsWithFn(Expr* e, TupleRow* row);
|
||||
|
||||
@@ -64,6 +64,7 @@ class LikePredicate: public Predicate {
|
||||
static void* ConstantEqualsFn(Expr* e, TupleRow* row);
|
||||
|
||||
static void* ConstantRegexFn(Expr* e, TupleRow* row);
|
||||
static void* ConstantRegexFnPartial(Expr* e, TupleRow* row);
|
||||
static void* LikeFn(Expr* e, TupleRow* row);
|
||||
static void* RegexFn(Expr* e, TupleRow* row);
|
||||
static void* RegexMatch(Expr* e, TupleRow* row, bool is_like_pattern);
|
||||
|
||||
@@ -1746,28 +1746,40 @@ join tpch.part p
|
||||
where
|
||||
(
|
||||
p_brand = 'Brand#12'
|
||||
and p_container REGEXP 'SM CASE||SM BOX||SM PACK||SM PKG'
|
||||
and (p_container LIKE 'SM CASE' or
|
||||
p_container LIKE 'SM BOX' or
|
||||
p_container LIKE 'SM PACK' or
|
||||
p_container LIKE 'SM PKG')
|
||||
and l_quantity >= 1 and l_quantity <= 11
|
||||
and p_size >= 1 and p_size <= 5
|
||||
and l_shipmode REGEXP 'AIR||AIR REG'
|
||||
and (l_shipmode LIKE 'AIR' or
|
||||
l_shipmode LIKE 'AIR REG')
|
||||
and l_shipinstruct = 'DELIVER IN PERSON'
|
||||
)
|
||||
or
|
||||
(
|
||||
p_brand = 'Brand#23'
|
||||
and p_container REGEXP 'MED BAG||MED BOX||MED PKG||MED PACK'
|
||||
and (p_container LIKE 'MED BAG' or
|
||||
p_container LIKE 'MED BOX' or
|
||||
p_container LIKE 'MED PKG' or
|
||||
p_container LIKE 'MED PACK')
|
||||
and l_quantity >= 10 and l_quantity <= 20
|
||||
and p_size >= 1 and p_size <= 10
|
||||
and l_shipmode REGEXP 'AIR||AIR REG'
|
||||
and (l_shipmode LIKE 'AIR' or
|
||||
l_shipmode LIKE 'AIR REG')
|
||||
and l_shipinstruct = 'DELIVER IN PERSON'
|
||||
)
|
||||
or
|
||||
(
|
||||
p_brand = 'Brand#34'
|
||||
and p_container REGEXP 'LG CASE||LG BOX||LG PACK||LG PKG'
|
||||
and (p_container LIKE 'LG BAG' or
|
||||
p_container LIKE 'LG BOX' or
|
||||
p_container LIKE 'LG PKG' or
|
||||
p_container LIKE 'LG PACK')
|
||||
and l_quantity >= 20 and l_quantity <= 30
|
||||
and p_size >= 1 and p_size <= 15
|
||||
and l_shipmode REGEXP 'AIR||AIR REG'
|
||||
and (l_shipmode LIKE 'AIR' or
|
||||
l_shipmode LIKE 'AIR REG')
|
||||
and l_shipinstruct = 'DELIVER IN PERSON'
|
||||
)
|
||||
---- PLAN
|
||||
@@ -1776,7 +1788,7 @@ or
|
||||
|
|
||||
02:HASH JOIN [INNER JOIN]
|
||||
| hash predicates: l.l_partkey = p.p_partkey
|
||||
| other predicates: (p_brand = 'Brand#12' AND p_container REGEXP 'SM CASE||SM BOX||SM PACK||SM PKG' AND l_quantity >= 1.0 AND l_quantity <= 11.0 AND p_size >= 1 AND p_size <= 5 AND l_shipmode REGEXP 'AIR||AIR REG' AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#23' AND p_container REGEXP 'MED BAG||MED BOX||MED PKG||MED PACK' AND l_quantity >= 10.0 AND l_quantity <= 20.0 AND p_size >= 1 AND p_size <= 10 AND l_shipmode REGEXP 'AIR||AIR REG' AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#34' AND p_container REGEXP 'LG CASE||LG BOX||LG PACK||LG PKG' AND l_quantity >= 20.0 AND l_quantity <= 30.0 AND p_size >= 1 AND p_size <= 15 AND l_shipmode REGEXP 'AIR||AIR REG' AND l_shipinstruct = 'DELIVER IN PERSON')
|
||||
| other predicates: (p_brand = 'Brand#12' AND (p_container LIKE 'SM CASE' OR p_container LIKE 'SM BOX' OR p_container LIKE 'SM PACK' OR p_container LIKE 'SM PKG') AND l_quantity >= 1.0 AND l_quantity <= 11.0 AND p_size >= 1 AND p_size <= 5 AND (l_shipmode LIKE 'AIR' OR l_shipmode LIKE 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#23' AND (p_container LIKE 'MED BAG' OR p_container LIKE 'MED BOX' OR p_container LIKE 'MED PKG' OR p_container LIKE 'MED PACK') AND l_quantity >= 10.0 AND l_quantity <= 20.0 AND p_size >= 1 AND p_size <= 10 AND (l_shipmode LIKE 'AIR' OR l_shipmode LIKE 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#34' AND (p_container LIKE 'LG BAG' OR p_container LIKE 'LG BOX' OR p_container LIKE 'LG PKG' OR p_container LIKE 'LG PACK') AND l_quantity >= 20.0 AND l_quantity <= 30.0 AND p_size >= 1 AND p_size <= 15 AND (l_shipmode LIKE 'AIR' OR l_shipmode LIKE 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON')
|
||||
|
|
||||
|--01:SCAN HDFS [tpch.part p]
|
||||
| partitions=1/1 size=22.83MB compact
|
||||
@@ -1794,7 +1806,7 @@ or
|
||||
|
|
||||
02:HASH JOIN [INNER JOIN, BROADCAST]
|
||||
| hash predicates: l.l_partkey = p.p_partkey
|
||||
| other predicates: (p_brand = 'Brand#12' AND p_container REGEXP 'SM CASE||SM BOX||SM PACK||SM PKG' AND l_quantity >= 1.0 AND l_quantity <= 11.0 AND p_size >= 1 AND p_size <= 5 AND l_shipmode REGEXP 'AIR||AIR REG' AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#23' AND p_container REGEXP 'MED BAG||MED BOX||MED PKG||MED PACK' AND l_quantity >= 10.0 AND l_quantity <= 20.0 AND p_size >= 1 AND p_size <= 10 AND l_shipmode REGEXP 'AIR||AIR REG' AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#34' AND p_container REGEXP 'LG CASE||LG BOX||LG PACK||LG PKG' AND l_quantity >= 20.0 AND l_quantity <= 30.0 AND p_size >= 1 AND p_size <= 15 AND l_shipmode REGEXP 'AIR||AIR REG' AND l_shipinstruct = 'DELIVER IN PERSON')
|
||||
| other predicates: (p_brand = 'Brand#12' AND (p_container LIKE 'SM CASE' OR p_container LIKE 'SM BOX' OR p_container LIKE 'SM PACK' OR p_container LIKE 'SM PKG') AND l_quantity >= 1.0 AND l_quantity <= 11.0 AND p_size >= 1 AND p_size <= 5 AND (l_shipmode LIKE 'AIR' OR l_shipmode LIKE 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#23' AND (p_container LIKE 'MED BAG' OR p_container LIKE 'MED BOX' OR p_container LIKE 'MED PKG' OR p_container LIKE 'MED PACK') AND l_quantity >= 10.0 AND l_quantity <= 20.0 AND p_size >= 1 AND p_size <= 10 AND (l_shipmode LIKE 'AIR' OR l_shipmode LIKE 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#34' AND (p_container LIKE 'LG BAG' OR p_container LIKE 'LG BOX' OR p_container LIKE 'LG PKG' OR p_container LIKE 'LG PACK') AND l_quantity >= 20.0 AND l_quantity <= 30.0 AND p_size >= 1 AND p_size <= 15 AND (l_shipmode LIKE 'AIR' OR l_shipmode LIKE 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON')
|
||||
|
|
||||
|--04:EXCHANGE [BROADCAST]
|
||||
| |
|
||||
|
||||
24
testdata/workloads/tpch/queries/tpch-q19.test
vendored
24
testdata/workloads/tpch/queries/tpch-q19.test
vendored
@@ -9,28 +9,40 @@ join part p
|
||||
where
|
||||
(
|
||||
p_brand = 'Brand#12'
|
||||
and p_container REGEXP 'SM CASE||SM BOX||SM PACK||SM PKG'
|
||||
and (p_container = 'SM CASE' or
|
||||
p_container = 'SM BOX' or
|
||||
p_container = 'SM PACK' or
|
||||
p_container = 'SM PKG')
|
||||
and l_quantity >= 1 and l_quantity <= 11
|
||||
and p_size >= 1 and p_size <= 5
|
||||
and l_shipmode REGEXP 'AIR||AIR REG'
|
||||
and (l_shipmode = 'AIR' or
|
||||
l_shipmode = 'AIR REG')
|
||||
and l_shipinstruct = 'DELIVER IN PERSON'
|
||||
)
|
||||
or
|
||||
(
|
||||
p_brand = 'Brand#23'
|
||||
and p_container REGEXP 'MED BAG||MED BOX||MED PKG||MED PACK'
|
||||
and (p_container = 'MED BAG' or
|
||||
p_container = 'MED BOX' or
|
||||
p_container = 'MED PKG' or
|
||||
p_container = 'MED PACK')
|
||||
and l_quantity >= 10 and l_quantity <= 20
|
||||
and p_size >= 1 and p_size <= 10
|
||||
and l_shipmode REGEXP 'AIR||AIR REG'
|
||||
and (l_shipmode = 'AIR' or
|
||||
l_shipmode = 'AIR REG')
|
||||
and l_shipinstruct = 'DELIVER IN PERSON'
|
||||
)
|
||||
or
|
||||
(
|
||||
p_brand = 'Brand#34'
|
||||
and p_container REGEXP 'LG CASE||LG BOX||LG PACK||LG PKG'
|
||||
and (p_container = 'LG CASE' or
|
||||
p_container = 'LG BOX' or
|
||||
p_container = 'LG PKG' or
|
||||
p_container = 'LG PACK')
|
||||
and l_quantity >= 20 and l_quantity <= 30
|
||||
and p_size >= 1 and p_size <= 15
|
||||
and l_shipmode REGEXP 'AIR||AIR REG'
|
||||
and (l_shipmode = 'AIR' or
|
||||
l_shipmode = 'AIR REG')
|
||||
and l_shipinstruct = 'DELIVER IN PERSON'
|
||||
)
|
||||
---- TYPES
|
||||
|
||||
Reference in New Issue
Block a user