Files
impala/tests/query_test/test_delimited_text.py
David Knupp f590bc0da6 IMPALA-4750: Rename test infra classes so they don't mimic test classes.
This patch addresses warning messages from pytest re: the imported
TestMatrix, TestVector, and TestDimension classes, which were being
collected as potential test classes. The fix was to simply prepend
the class names with Impala-

git grep -l 'TestDimension' | xargs \
    sed -i 's/TestDimension/ImpalaTestDimension/g'

git grep -l 'TestMatrix' | xargs \
    sed -i 's/TestMatrix/ImpalaTestMatrix/g'

git grep -l 'TestVector' | xargs \
    sed -i 's/TestVector/ImpalaTestVector/g'

The tests all passed in an exhaustive run on the upstream jenkins
server:

http://jenkins.impala.io:8080/view/Utility/job/pre-review-test/8/

Change-Id: I06b7bc6fd99fbb637a47ba376bf9830705c1fce1
Reviewed-on: http://gerrit.cloudera.org:8080/5794
Reviewed-by: Michael Brown <mikeb@cloudera.com>
Reviewed-by: Jim Apple <jbapple-impala@apache.org>
Tested-by: Impala Public Jenkins
2017-01-26 23:40:22 +00:00

79 lines
3.4 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Targeted Impala tests for different tuple delimiters, field delimiters,
# and escape characters.
#
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.test_dimensions import (
create_single_exec_option_dimension,
create_uncompressed_text_dimension)
class TestDelimitedText(ImpalaTestSuite):
"""
Tests delimited text files with different tuple delimiters, field delimiters
and escape characters.
"""
@classmethod
def get_workload(self):
return 'functional-query'
@classmethod
def add_test_dimensions(cls):
super(TestDelimitedText, cls).add_test_dimensions()
cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
# Only run on delimited text with no compression.
cls.ImpalaTestMatrix.add_dimension(
create_uncompressed_text_dimension(cls.get_workload()))
def test_delimited_text(self, vector, unique_database):
self.run_test_case('QueryTest/delimited-text', vector, unique_database)
def test_delimited_text_newlines(self, vector, unique_database):
""" Test text with newlines in strings - IMPALA-1943. Execute queries from Python to
avoid issues with newline handling in test file format. """
self.execute_query_expect_success(self.client, """
create table if not exists %s.nl_queries
(c1 string, c2 string, c3 string)
row format delimited
fields terminated by '\002'
lines terminated by '\001'
stored as textfile
""" % unique_database)
# Create test data with newlines in various places
self.execute_query_expect_success(self.client, """
insert into %s.nl_queries
values ("the\\n","\\nquick\\nbrown","fox\\n"),
("\\njumped","over the lazy\\n","\\ndog")""" % unique_database)
result = self.execute_query("select * from %s.nl_queries" % unique_database)
assert len(result.data) == 2
assert result.data[0].split("\t") == ["the\n", "\nquick\nbrown", "fox\n"]
assert result.data[1].split("\t") == ["\njumped","over the lazy\n","\ndog"]
# The row count may be computed without parsing each row, so could be inconsistent.
result = self.execute_query("select count(*) from %s.nl_queries" % unique_database)
assert len(result.data) == 1
assert result.data[0] == "2"
def test_delimited_text_latin_chars(self, vector, unique_database):
"""Verifies Impala is able to properly handle delimited text that contains
extended ASCII/latin characters. Marked as running serial because of shared
cleanup/setup"""
self.run_test_case('QueryTest/delimited-latin-text', vector, unique_database,
encoding="latin-1")