mirror of
https://github.com/apache/impala.git
synced 2026-02-01 12:00:22 -05:00
The limitation of the Avro JSON library not to handle \u0000 characters is to avoid problems with builtin functions like strlen() that would report wrong length when encountering such a character. Now, in the case if Impala, for now, we don't support any Unicode characters. This allows us to actually skip the \u0000 character instead of interpreting it. It is important to say that even the most recent versions of Avro do not support parsing \u0000 characters. Change-Id: I56dfa7f0f12979fe9705c51c751513aebce4beca Reviewed-on: http://gerrit.cloudera.org:8080/712 Tested-by: Internal Jenkins Reviewed-by: Alex Behm <alex.behm@cloudera.com>
37 lines
1.4 KiB
Python
37 lines
1.4 KiB
Python
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
|
|
|
|
import pytest
|
|
from subprocess import check_call
|
|
from tests.common.test_vector import *
|
|
from tests.common.impala_test_suite import *
|
|
|
|
# This test requires that testdata/avro_schema_resolution/create_table.sql has been run
|
|
class TestAvroSchemaResolution(ImpalaTestSuite):
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestAvroSchemaResolution, cls).add_test_dimensions()
|
|
# avro/snap is the only table format with a schema_resolution_test table
|
|
cls.TestMatrix.add_constraint(lambda v:\
|
|
v.get_value('table_format').file_format == 'avro' and\
|
|
v.get_value('table_format').compression_codec == 'snap')
|
|
|
|
def test_avro_schema_resolution(self, vector):
|
|
self.run_test_case('QueryTest/avro-schema-resolution', vector)
|
|
|
|
def test_avro_c_lib_unicode_nulls(self, vector):
|
|
"""Test for IMPALA-1136 and IMPALA-2161 and unicode characters in the
|
|
schema that were not handled correctly by the Avro C library.
|
|
"""
|
|
result = self.execute_query("select * from functional_avro_snap.avro_unicode_nulls")
|
|
comparison = self.execute_query("select * from functional.liketbl")
|
|
|
|
# If we were not able to properly parse the Avro file schemas, then the result
|
|
# would be empty.
|
|
assert len(comparison.data) == len(result.data)
|
|
for x in range(len(result.data)):
|
|
assert comparison.data[x] == result.data[x]
|