mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
Fix null string parsing.
This commit is contained in:
@@ -55,9 +55,11 @@ Status HdfsTextTableWriter::AppendRowBatch(RowBatch* batch,
|
||||
// the first num_non_partition_cols values.
|
||||
for (int j = 0; j < num_non_partition_cols; ++j) {
|
||||
void* value = output_exprs_[j]->GetValue(current_row);
|
||||
// NULL values become empty strings
|
||||
if (value != NULL) {
|
||||
output_exprs_[j]->PrintValue(value, &row_stringstream);
|
||||
} else {
|
||||
// NULL values in hive are encoded as '\N'
|
||||
row_stringstream << "\\N";
|
||||
}
|
||||
// Append field delimiter.
|
||||
if (j + 1 < num_non_partition_cols) {
|
||||
|
||||
@@ -15,77 +15,82 @@ namespace impala {
|
||||
|
||||
inline bool TextConverter::WriteSlot(const SlotDescriptor* slot_desc, Tuple* tuple,
|
||||
const char* data, int len, bool copy_string, bool need_escape, MemPool* pool) {
|
||||
if (len == 0) {
|
||||
if (len == 0 && slot_desc->type() != TYPE_STRING) {
|
||||
tuple->SetNull(slot_desc->null_indicator_offset());
|
||||
} else {
|
||||
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
|
||||
void* slot = tuple->GetSlot(slot_desc->tuple_offset());
|
||||
return true;
|
||||
} else if (len == 2 && data[0] == '\\' && data[1] == 'N') {
|
||||
// Hive encodes NULLs as '\N'
|
||||
tuple->SetNull(slot_desc->null_indicator_offset());
|
||||
return true;
|
||||
}
|
||||
|
||||
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
|
||||
void* slot = tuple->GetSlot(slot_desc->tuple_offset());
|
||||
|
||||
// Parse the raw-text data. Translate the text string to internal format.
|
||||
switch (slot_desc->type()) {
|
||||
case TYPE_STRING: {
|
||||
StringValue* str_slot = reinterpret_cast<StringValue*>(slot);
|
||||
str_slot->ptr = const_cast<char*>(data);
|
||||
str_slot->len = len;
|
||||
if (copy_string || need_escape) {
|
||||
DCHECK(pool != NULL);
|
||||
char* slot_data = reinterpret_cast<char*>(pool->Allocate(len));
|
||||
if (need_escape) {
|
||||
UnescapeString(data, slot_data, &str_slot->len);
|
||||
} else {
|
||||
memcpy(slot_data, data, str_slot->len);
|
||||
}
|
||||
str_slot->ptr = slot_data;
|
||||
// Parse the raw-text data. Translate the text string to internal format.
|
||||
switch (slot_desc->type()) {
|
||||
case TYPE_STRING: {
|
||||
StringValue* str_slot = reinterpret_cast<StringValue*>(slot);
|
||||
str_slot->ptr = const_cast<char*>(data);
|
||||
str_slot->len = len;
|
||||
if (len != 0 && (copy_string || need_escape)) {
|
||||
DCHECK(pool != NULL);
|
||||
char* slot_data = reinterpret_cast<char*>(pool->Allocate(len));
|
||||
if (need_escape) {
|
||||
UnescapeString(data, slot_data, &str_slot->len);
|
||||
} else {
|
||||
memcpy(slot_data, data, str_slot->len);
|
||||
}
|
||||
break;
|
||||
str_slot->ptr = slot_data;
|
||||
}
|
||||
case TYPE_BOOLEAN:
|
||||
*reinterpret_cast<bool*>(slot) =
|
||||
StringParser::StringToBool(data, len, &parse_result);
|
||||
break;
|
||||
case TYPE_TINYINT:
|
||||
*reinterpret_cast<int8_t*>(slot) =
|
||||
StringParser::StringToInt<int8_t>(data, len, &parse_result);
|
||||
break;
|
||||
case TYPE_SMALLINT:
|
||||
*reinterpret_cast<int16_t*>(slot) =
|
||||
StringParser::StringToInt<int16_t>(data, len, &parse_result);
|
||||
break;
|
||||
case TYPE_INT:
|
||||
*reinterpret_cast<int32_t*>(slot) =
|
||||
StringParser::StringToInt<int32_t>(data, len, &parse_result);
|
||||
break;
|
||||
case TYPE_BIGINT:
|
||||
*reinterpret_cast<int64_t*>(slot) =
|
||||
StringParser::StringToInt<int64_t>(data, len, &parse_result);
|
||||
break;
|
||||
case TYPE_FLOAT:
|
||||
*reinterpret_cast<float*>(slot) =
|
||||
StringParser::StringToFloat<float>(data, len, &parse_result);
|
||||
break;
|
||||
case TYPE_DOUBLE:
|
||||
*reinterpret_cast<double*>(slot) =
|
||||
StringParser::StringToFloat<double>(data, len, &parse_result);
|
||||
break;
|
||||
case TYPE_TIMESTAMP: {
|
||||
std::string strbuf(data, len);
|
||||
TimestampValue* ts_slot = reinterpret_cast<TimestampValue*>(slot);
|
||||
*ts_slot = TimestampValue(strbuf);
|
||||
if (ts_slot->NotADateTime()) {
|
||||
parse_result = StringParser::PARSE_FAILURE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
DCHECK(false) << "bad slot type: " << TypeToString(slot_desc->type());
|
||||
break;
|
||||
break;
|
||||
}
|
||||
case TYPE_BOOLEAN:
|
||||
*reinterpret_cast<bool*>(slot) =
|
||||
StringParser::StringToBool(data, len, &parse_result);
|
||||
break;
|
||||
case TYPE_TINYINT:
|
||||
*reinterpret_cast<int8_t*>(slot) =
|
||||
StringParser::StringToInt<int8_t>(data, len, &parse_result);
|
||||
break;
|
||||
case TYPE_SMALLINT:
|
||||
*reinterpret_cast<int16_t*>(slot) =
|
||||
StringParser::StringToInt<int16_t>(data, len, &parse_result);
|
||||
break;
|
||||
case TYPE_INT:
|
||||
*reinterpret_cast<int32_t*>(slot) =
|
||||
StringParser::StringToInt<int32_t>(data, len, &parse_result);
|
||||
break;
|
||||
case TYPE_BIGINT:
|
||||
*reinterpret_cast<int64_t*>(slot) =
|
||||
StringParser::StringToInt<int64_t>(data, len, &parse_result);
|
||||
break;
|
||||
case TYPE_FLOAT:
|
||||
*reinterpret_cast<float*>(slot) =
|
||||
StringParser::StringToFloat<float>(data, len, &parse_result);
|
||||
break;
|
||||
case TYPE_DOUBLE:
|
||||
*reinterpret_cast<double*>(slot) =
|
||||
StringParser::StringToFloat<double>(data, len, &parse_result);
|
||||
break;
|
||||
case TYPE_TIMESTAMP: {
|
||||
std::string strbuf(data, len);
|
||||
TimestampValue* ts_slot = reinterpret_cast<TimestampValue*>(slot);
|
||||
*ts_slot = TimestampValue(strbuf);
|
||||
if (ts_slot->NotADateTime()) {
|
||||
parse_result = StringParser::PARSE_FAILURE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
DCHECK(false) << "bad slot type: " << TypeToString(slot_desc->type());
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO: add warning for overflow case
|
||||
if (parse_result == StringParser::PARSE_FAILURE) {
|
||||
tuple->SetNull(slot_desc->null_indicator_offset());
|
||||
return false;
|
||||
}
|
||||
// TODO: add warning for overflow case
|
||||
if (parse_result == StringParser::PARSE_FAILURE) {
|
||||
tuple->SetNull(slot_desc->null_indicator_offset());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
// Copyright (c) 2012 Cloudera, Inc. All rights reserved.
|
||||
|
||||
package com.cloudera.impala.service;
|
||||
import org.junit.Test;
|
||||
|
||||
public class MiscQueryTest extends BaseQueryTest {
|
||||
@Test
|
||||
public void TestMisc() {
|
||||
runTestInExecutionMode(EXECUTION_MODE, "misc", false, 1000);
|
||||
}
|
||||
}
|
||||
1
testdata/LikeTbl/data.csv
vendored
1
testdata/LikeTbl/data.csv
vendored
@@ -12,3 +12,4 @@ beginning of line,begin%,not begin%,^begin.*,^not begin.*
|
||||
end of line,%line,%line end,.*line$,.*line end$
|
||||
middle of line,%of%,%of,^.*of.*$,.*of$
|
||||
,%,,.*,X
|
||||
\N,%,\N,.*,X
|
||||
|
||||
|
@@ -1039,3 +1039,49 @@ LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/TinyTable/data.csv' OVERWRIT
|
||||
----
|
||||
ANALYZE TABLE %(table_name)s COMPUTE STATISTICS;
|
||||
====
|
||||
functional
|
||||
----
|
||||
nulltable
|
||||
----
|
||||
CREATE TABLE %(table_name)s (
|
||||
a string,
|
||||
b string,
|
||||
c string,
|
||||
d int,
|
||||
e double)
|
||||
row format delimited fields terminated by ','
|
||||
stored as %(file_format)s;
|
||||
INSERT OVERWRITE TABLE nulltable select 'a', '', NULL, NULL, NULL from alltypes limit 1;
|
||||
----
|
||||
FROM %(base_table_name)s INSERT OVERWRITE TABLE %(table_name)s SELECT *;
|
||||
----
|
||||
${IMPALA_HOME}/bin/run-query.sh --query=" \
|
||||
INSERT OVERWRITE TABLE %(table_name)s \
|
||||
select * FROM %(base_table_name)s"
|
||||
----
|
||||
----
|
||||
ANALYZE TABLE %(table_name)s COMPUTE STATISTICS;
|
||||
====
|
||||
====
|
||||
functional
|
||||
----
|
||||
nullescapedtable
|
||||
----
|
||||
CREATE TABLE %(table_name)s (
|
||||
a string,
|
||||
b string,
|
||||
c string,
|
||||
d int,
|
||||
e double)
|
||||
row format delimited fields terminated by ',' escaped by '\\'
|
||||
stored as %(file_format)s;
|
||||
INSERT OVERWRITE TABLE nullescapedtable select 'a', '', NULL, NULL, NULL from alltypes limit 1;
|
||||
----
|
||||
FROM %(base_table_name)s INSERT OVERWRITE TABLE %(table_name)s SELECT *;
|
||||
----
|
||||
${IMPALA_HOME}/bin/run-query.sh --query=" \
|
||||
INSERT OVERWRITE TABLE %(table_name)s \
|
||||
select * FROM %(base_table_name)s"
|
||||
----
|
||||
----
|
||||
ANALYZE TABLE %(table_name)s COMPUTE STATISTICS;
|
||||
|
||||
@@ -305,6 +305,7 @@ select * from LikeTbl
|
||||
string, string, string, string, string
|
||||
---- RESULTS
|
||||
'NULL','%','NULL','.*','X'
|
||||
'','%','','.*','X'
|
||||
'beginning of line','begin%','not begin%','^begin.*','^not begin.*'
|
||||
'eight','%eight%','n%eight%','.*eight.*','n.*eight.*'
|
||||
'end of line','%line','%line end','.*line$','.*line end$'
|
||||
@@ -337,6 +338,7 @@ string, string
|
||||
'ten','%ten%'
|
||||
'three','%three%'
|
||||
'two','%two%'
|
||||
'','%'
|
||||
====
|
||||
select str_col, match_like_col from LikeTbl
|
||||
where str_col NOT LIKE match_like_col
|
||||
@@ -349,6 +351,7 @@ where str_col LIKE no_match_like_col
|
||||
---- TYPES
|
||||
string, string
|
||||
---- RESULTS
|
||||
'','%'
|
||||
====
|
||||
select str_col, no_match_like_col from LikeTbl
|
||||
where str_col NOT LIKE no_match_like_col
|
||||
@@ -387,6 +390,7 @@ string, string
|
||||
'ten','.*ten.*'
|
||||
'three','.*three.*'
|
||||
'two','.*two.*'
|
||||
'','.*'
|
||||
====
|
||||
select str_col, no_match_regex_col from LikeTbl
|
||||
where str_col REGEXP no_match_regex_col
|
||||
@@ -418,6 +422,7 @@ string, string
|
||||
'ten','n.*ten.*'
|
||||
'three','n.*three.*'
|
||||
'two','n.*two.*'
|
||||
'','X'
|
||||
====
|
||||
select 1+2
|
||||
---- TYPES
|
||||
@@ -1100,4 +1105,4 @@ where (cast('2012-01-01 09:10:11' as timestamp) + interval tinyint_col seconds)
|
||||
bigint
|
||||
---- RESULTS
|
||||
1000
|
||||
====
|
||||
====
|
||||
|
||||
26
testdata/workloads/functional-query/queries/QueryTest/misc.test
vendored
Normal file
26
testdata/workloads/functional-query/queries/QueryTest/misc.test
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
# Test for selecting from table with null and empty strings.
|
||||
select * from nulltable$TABLE
|
||||
---- TYPES
|
||||
string, string, string, int, double
|
||||
---- RESULTS
|
||||
'a','','NULL',NULL,NULL
|
||||
====
|
||||
select count(*),count(a),count(b),count(c),count(d),count(e) from nulltable$TABLE
|
||||
---- TYPES
|
||||
bigint, bigint, bigint, bigint, bigint, bigint
|
||||
---- RESULTS
|
||||
1,1,1,0,0,0
|
||||
====
|
||||
# Test for selecting from table with '\' escape character with null and empty strings.
|
||||
select * from nullescapedtable$TABLE
|
||||
---- TYPES
|
||||
string, string, string, int, double
|
||||
---- RESULTS
|
||||
'a','','NULL',NULL,NULL
|
||||
====
|
||||
select count(*),count(a),count(b),count(c), count(d), count(e) from nullescapedtable$TABLE
|
||||
---- TYPES
|
||||
bigint, bigint, bigint, bigint, bigint, bigint
|
||||
---- RESULTS
|
||||
1,1,1,0,0,0
|
||||
====
|
||||
Reference in New Issue
Block a user