Files
impala/tests/query_test/test_insert_behaviour.py
Henry Robinson 89a0beb56a IMPALA-449: Better cleanup after an INSERT fails
This patch goes some way to improving recovery after an INSERT
fails. Inserts now write intermediate results to
<table_dir>/.impala_insert_staging. After execution completes, either
successfully or not, the query-specific directory under that directory
is deleted.

This doesn't complete the job for better cleanup (although this goes as
far as IMPALA-449 suggests). Two things to do in the future:

* Have each backend delete its own staging files on error. The
  difficulty getting there now is that backends don't know if they are
  cancelled in error or because a LIMIT was reached.
* If the operation to move files to their final destinations should
  fail during FinalizeQuery(), the coordinator should perform
  compensation actions and delete the files that made it.

Note: We also considered a query-wide and impalad-wide option to change
the staging dir. There are advantages to this (all intermediate results
go to a known location which is easy to clean up on failure), but also
security and other operational concerns. Worth revisiting in the future.

Change-Id: Ia54cf36db6a382e359877f87d7d40aad7fdb77be
Reviewed-on: http://gerrit.ent.cloudera.com:8080/670
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: jenkins
2014-01-08 10:53:37 -08:00

61 lines
2.4 KiB
Python
Executable File

#!/usr/bin/env python
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from tests.common.impala_test_suite import ImpalaTestSuite
import time
import pytest
class TestInsertBehaviour(ImpalaTestSuite):
"""Tests for INSERT behaviour that isn't covered by checking query results"""
@pytest.mark.excute_serially
def test_insert_removes_staging_files(self):
insert_staging_dir = \
"test-warehouse/functional.db/insert_overwrite_nopart/.impala_insert_staging"
self.hdfs_client.delete_file_dir(insert_staging_dir, recursive=True)
self.client.execute("""INSERT OVERWRITE
functional.insert_overwrite_nopart SELECT int_col FROM functional.tinyinttable""")
ls = self.hdfs_client.list_dir(insert_staging_dir)
assert len(ls['FileStatuses']['FileStatus']) == 0
@pytest.mark.excute_serially
def test_insert_preserves_hidden_files(self):
"""Test that INSERT OVERWRITE preserves hidden files in the root table directory"""
table_dir = "test-warehouse/functional.db/insert_overwrite_nopart/"
hidden_file_locations = [".hidden", "_hidden"]
dir_locations = ["dir", ".hidden_dir"]
for dir in dir_locations:
self.hdfs_client.make_dir(table_dir + dir)
for file in hidden_file_locations:
self.hdfs_client.create_file(table_dir + file, '', overwrite=True)
self.client.execute("""INSERT OVERWRITE
functional.insert_overwrite_nopart SELECT int_col FROM functional.tinyinttable""")
for file in hidden_file_locations:
try:
self.hdfs_client.get_file_dir_status(table_dir + file)
except:
err_msg = "Hidden file '%s' was unexpectedly deleted by INSERT OVERWRITE"
pytest.fail(err_msg % (table_dir + file))
for dir in dir_locations:
try:
self.hdfs_client.get_file_dir_status(table_dir + file)
except:
err_msg = "Directory '%s' was unexpectedly deleted by INSERT OVERWRITE"
pytest.fail(err_msg % (table_dir + dir))