mirror of
https://github.com/apache/impala.git
synced 2026-01-15 06:00:38 -05:00
This patch implements the same function as Hive UDF get_json_object.
We reuse RapidJson to parse the json string. In order to track the
memory used in RapidJson, we wrap FunctionContext into an allocator.
get_json_object accepts two parameters: a json string and a selector
(json path). We parse the json string into a Document tree and then
perform BFS according to the selector. For example, to process
get_json_object('[{\"a\":1}, {\"a\":2}, {\"a\":3}]', '$[*].a'),
we first perform '$[*]' to extract all the items in the root array.
Then we get a queue consists of {a:1},{a:2},{a:3} and perform '.a'
selector on all values in the queue. The final results is 1,2,3 in the
queue. As there're multiple results, they should be encapsulated into
an array. The output results is a string of '[1,2,3]'.
More examples can be found in expr-test.cc.
Test:
* Add unit tests in expr-test
* Add e2e tests in exprs.test
* Add tests in test_alloc_fail.py to check handling of out of memory
Change-Id: I6a9d3598cb3beca0865a7edb094f3a5b602dbd2f
Reviewed-on: http://gerrit.cloudera.org:8080/10950
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
73 lines
2.1 KiB
Plaintext
73 lines
2.1 KiB
Plaintext
====
|
|
---- QUERY
|
|
# TODO: IMPALA-3350: Add 'group by' to these tests to exercise different code paths.
|
|
select min(string_col) from functional.alltypes
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 1 bytes.
|
|
====
|
|
---- QUERY
|
|
select max(string_col) from functional.alltypes
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 1 bytes.
|
|
====
|
|
---- QUERY
|
|
select sample(timestamp_col) from functional.alltypes
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 248 bytes.
|
|
====
|
|
---- QUERY
|
|
select group_concat(string_col) from functional.alltypes
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 4 bytes.
|
|
====
|
|
---- QUERY
|
|
select extract(year from timestamp_col) from functional.alltypes limit 10
|
|
---- CATCH
|
|
FunctionContextImpl::AllocateForResults() failed to allocate 4 bytes.
|
|
====
|
|
---- QUERY
|
|
select trunc(timestamp_col, 'YEAR') from functional.alltypes limit 10
|
|
---- CATCH
|
|
FunctionContextImpl::AllocateForResults() failed to allocate 4 bytes.
|
|
====
|
|
---- QUERY
|
|
select first_value(string_col) over (partition by month order by year) from functional.alltypes
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 1 bytes.
|
|
====
|
|
---- QUERY
|
|
select last_value(string_col) over (partition by month order by year) from functional.alltypes
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 1 bytes.
|
|
====
|
|
---- QUERY
|
|
select rand() from functional.alltypes;
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 16 bytes.
|
|
====
|
|
---- QUERY
|
|
select case when min(int_col) = 0 then 0 end from functional.alltypes
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 16 bytes.
|
|
====
|
|
---- QUERY
|
|
select cast(string_col as char(120)) from functional.alltypes
|
|
---- CATCH
|
|
FunctionContextImpl::AllocateForResults() failed to allocate 120 bytes.
|
|
====
|
|
---- QUERY
|
|
select appx_median(int_col) from functional.alltypes
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 248 bytes.
|
|
====
|
|
---- QUERY
|
|
select to_date(now())
|
|
---- CATCH
|
|
FunctionContextImpl::AllocateForResults() failed to allocate 10 bytes.
|
|
====
|
|
---- QUERY
|
|
select get_json_object('{"a": 1}', '$.a')
|
|
---- CATCH
|
|
FunctionContextImpl::AllocateForResults() failed to allocate 32 bytes
|
|
====
|