IMPALA-6128: Add support for AES-CTR encryption when spilling to disk

CFB mode is a stream cipher and is secure when used with a different nonce/IV
for every message. However it can be a performance bottleneck.
CTR mode is also stream cipher and is secure, 4~6x faster than CFB mode in
OpenSSL. AES-CTR+SHA256 is about 40~70% faster than AES-CFB+SHA256.

CTR mode is used if OpenSSL version>=1.0.1 at runtime, otherwise
fall back to using CFB mode.

Testing:
run runtime tmp-file-mgr-test, openssl-util-test, buffer-pool-test and
buffered-tuple-stream-test
The ut case openssl-util-test.EncryptInPlace tests encryption in both modes.

Change-Id: I9debc240615dd8cdbf00ec8730cff62ffef52aff
Reviewed-on: http://gerrit.cloudera.org:8080/8861
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Impala Public Jenkins
This commit is contained in:
Xianda Ke
2017-11-26 15:35:22 +08:00
committed by Impala Public Jenkins
parent f810458ca4
commit 514dfaf9fd
4 changed files with 94 additions and 44 deletions

View File

@@ -612,8 +612,8 @@ void TmpFileMgr::WriteHandle::WaitForWrite() {
Status TmpFileMgr::WriteHandle::EncryptAndHash(MemRange buffer) {
DCHECK(FLAGS_disk_spill_encryption);
SCOPED_TIMER(encryption_timer_);
// Since we're using AES-CFB mode, we must take care not to reuse a key/IV pair.
// Regenerate a new key and IV for every data buffer we write.
// Since we're using AES-CTR/AES-CFB mode, we must take care not to reuse a
// key/IV pair. Regenerate a new key and IV for every data buffer we write.
key_.InitializeRandom();
RETURN_IF_ERROR(key_.Encrypt(buffer.data(), buffer.len(), buffer.data()));
hash_.Compute(buffer.data(), buffer.len());

View File

@@ -56,24 +56,29 @@ TEST_F(OpenSSLUtilTest, Encryption) {
vector<uint8_t> decrypted(buffer_size);
GenerateRandomData(original.data(), buffer_size);
// Iterate multiple times to ensure that key regeneration works correctly.
EncryptionKey key;
for (int i = 0; i < 2; ++i) {
key.InitializeRandom(); // Generate a new key for each iteration.
// Check both CTR & CFB
AES_CIPHER_MODE modes[] = {AES_256_CTR, AES_256_CFB};
for (auto m : modes) {
// Iterate multiple times to ensure that key regeneration works correctly.
EncryptionKey key;
for (int i = 0; i < 2; ++i) {
key.InitializeRandom(); // Generate a new key for each iteration.
key.SetCipherMode(m);
// Check that OpenSSL is happy with the amount of entropy we're feeding it.
DCHECK_EQ(1, RAND_status());
// Check that OpenSSL is happy with the amount of entropy we're feeding it.
DCHECK_EQ(1, RAND_status());
ASSERT_OK(key.Encrypt(original.data(), buffer_size, encrypted.data()));
if (i > 0) {
// Check that we're not somehow reusing the same key.
ASSERT_NE(0, memcmp(encrypted.data(), prev_encrypted.data(), buffer_size));
ASSERT_OK(key.Encrypt(original.data(), buffer_size, encrypted.data()));
if (i > 0) {
// Check that we're not somehow reusing the same key.
ASSERT_NE(0, memcmp(encrypted.data(), prev_encrypted.data(), buffer_size));
}
memcpy(prev_encrypted.data(), encrypted.data(), buffer_size);
// We should get the original data by decrypting it.
ASSERT_OK(key.Decrypt(encrypted.data(), buffer_size, decrypted.data()));
ASSERT_EQ(0, memcmp(original.data(), decrypted.data(), buffer_size));
}
memcpy(prev_encrypted.data(), encrypted.data(), buffer_size);
// We should get the original data by decrypting it.
ASSERT_OK(key.Decrypt(encrypted.data(), buffer_size, decrypted.data()));
ASSERT_EQ(0, memcmp(original.data(), decrypted.data(), buffer_size));
}
}
@@ -83,17 +88,23 @@ TEST_F(OpenSSLUtilTest, EncryptInPlace) {
vector<uint8_t> original(buffer_size);
vector<uint8_t> scratch(buffer_size); // Scratch buffer for in-place encryption.
GenerateRandomData(original.data(), buffer_size);
memcpy(scratch.data(), original.data(), buffer_size);
EncryptionKey key;
key.InitializeRandom();
ASSERT_OK(key.Encrypt(scratch.data(), buffer_size, scratch.data()));
// Check that encryption did something
ASSERT_NE(0, memcmp(original.data(), scratch.data(), buffer_size));
ASSERT_OK(key.Decrypt(scratch.data(), buffer_size, scratch.data()));
// Check that we get the original data back.
ASSERT_EQ(0, memcmp(original.data(), scratch.data(), buffer_size));
// Check both CTR & CFB
AES_CIPHER_MODE modes[] = {AES_256_CTR, AES_256_CFB};
for (auto m : modes) {
GenerateRandomData(original.data(), buffer_size);
memcpy(scratch.data(), original.data(), buffer_size);
key.InitializeRandom();
key.SetCipherMode(m);
ASSERT_OK(key.Encrypt(scratch.data(), buffer_size, scratch.data()));
// Check that encryption did something
ASSERT_NE(0, memcmp(original.data(), scratch.data(), buffer_size));
ASSERT_OK(key.Decrypt(scratch.data(), buffer_size, scratch.data()));
// Check that we get the original data back.
ASSERT_EQ(0, memcmp(original.data(), scratch.data(), buffer_size));
}
}
/// Test that encryption works with buffer lengths that don't fit in a 32-bit integer.

View File

@@ -26,6 +26,7 @@
#include <openssl/sha.h>
#include "common/atomic.h"
#include "gutil/port.h" // ATTRIBUTE_WEAK
#include "gutil/strings/substitute.h"
#include "common/names.h"
@@ -99,13 +100,15 @@ Status EncryptionKey::EncryptInternal(
EVP_CIPHER_CTX_init(&ctx);
EVP_CIPHER_CTX_set_padding(&ctx, 0);
int success;
// Start encryption/decryption. We use a 256-bit AES key, and the cipher block mode
// is CFB because this gives us a stream cipher, which supports arbitrary
// length ciphertexts - it doesn't have to be a multiple of 16 bytes.
success = encrypt ? EVP_EncryptInit_ex(&ctx, EVP_aes_256_cfb(), NULL, key_, iv_) :
EVP_DecryptInit_ex(&ctx, EVP_aes_256_cfb(), NULL, key_, iv_);
// is either CTR or CFB(stream cipher), both of which support arbitrary length
// ciphertexts - it doesn't have to be a multiple of 16 bytes. Additionally, CTR
// mode is well-optimized(instruction level parallelism) with hardware acceleration
// on x86 and PowerPC
const EVP_CIPHER* evpCipher = GetCipher();
int success = encrypt ? EVP_EncryptInit_ex(&ctx, evpCipher, NULL, key_, iv_) :
EVP_DecryptInit_ex(&ctx, evpCipher, NULL, key_, iv_);
if (success != 1) {
return OpenSSLErr(encrypt ? "EVP_EncryptInit_ex" : "EVP_DecryptInit_ex");
}
@@ -122,7 +125,7 @@ Status EncryptionKey::EncryptInternal(
if (success != 1) {
return OpenSSLErr(encrypt ? "EVP_EncryptUpdate" : "EVP_DecryptUpdate");
}
// This is safe because we're using CFB mode without padding.
// This is safe because we're using CTR/CFB mode without padding.
DCHECK_EQ(in_len, out_len);
offset += in_len;
}
@@ -134,8 +137,21 @@ Status EncryptionKey::EncryptInternal(
if (success != 1) {
return OpenSSLErr(encrypt ? "EVP_EncryptFinal" : "EVP_DecryptFinal");
}
// Again safe due to CFB with no padding
// Again safe due to CTR/CFB with no padding
DCHECK_EQ(final_out_len, 0);
return Status::OK();
}
extern "C" {
ATTRIBUTE_WEAK
const EVP_CIPHER* EVP_aes_256_ctr();
}
const EVP_CIPHER* EncryptionKey::GetCipher() const {
// use weak symbol to avoid compiling error on OpenSSL 1.0.0 environment
if (mode_ == AES_256_CTR && EVP_aes_256_ctr) return EVP_aes_256_ctr();
// otherwise, fallback to CFB mode
return EVP_aes_256_cfb();
}
}

View File

@@ -19,16 +19,25 @@
#define IMPALA_UTIL_OPENSSL_UTIL_H
#include <openssl/aes.h>
#include <openssl/evp.h>
#include <openssl/sha.h>
#include "common/status.h"
namespace impala {
#define OPENSSL_VERSION_1_0_1 0x1000100L
/// Add entropy from the system RNG to OpenSSL's global RNG. Called at system startup
/// and again periodically to add new entropy.
void SeedOpenSSLRNG();
enum AES_CIPHER_MODE {
AES_256_CTR,
AES_256_CFB,
AES_256_GCM // not supported now.
};
/// The hash of a data buffer used for checking integrity. A SHA256 hash is used
/// internally.
class IntegrityHash {
@@ -47,20 +56,23 @@ class IntegrityHash {
/// The key and initialization vector (IV) required to encrypt and decrypt a buffer of
/// data. This should be regenerated for each buffer of data.
///
/// We use AES with a 256-bit key and CFB cipher block mode, which gives us a stream
/// cipher that can support arbitrary-length ciphertexts. The IV is used as an input to
/// the cipher as the "block to supply before the first block of plaintext". This is
/// required because all ciphers (except the weak ECB) are built such that each block
/// depends on the output from the previous block. Since the first block doesn't have
/// a previous block, we supply this IV. Think of it as starting off the chain of
/// We use AES with a 256-bit key and CTR/CFB cipher block mode, which gives us a stream
/// cipher that can support arbitrary-length ciphertexts. If OpenSSL version at runtime
/// is 1.0.1 or above, CTR mode is used, otherwise CFB mode is used. The IV is used as
/// an input to the cipher as the "block to supply before the first block of plaintext".
/// This is required because all ciphers (except the weak ECB) are built such that each
/// block depends on the output from the previous block. Since the first block doesn't
/// have a previous block, we supply this IV. Think of it as starting off the chain of
/// encryption.
class EncryptionKey {
public:
EncryptionKey() : initialized_(false) {}
EncryptionKey() : initialized_(false) {
mode_ = SSLeay() < OPENSSL_VERSION_1_0_1 ? AES_256_CFB : AES_256_CTR;
}
/// Initialize a key for temporary use with randomly generated data. Reinitializes with
/// new random values if the key was already initialized. We use AES-CFB mode so key/IV
/// pairs should not be reused. This function automatically reseeds the RNG
/// new random values if the key was already initialized. We use AES-CTR/AES-CFB mode
/// so key/IV pairs should not be reused. This function automatically reseeds the RNG
/// periodically, so callers do not need to do it.
void InitializeRandom();
@@ -75,6 +87,11 @@ class EncryptionKey {
/// otherwise the buffers must not overlap.
Status Decrypt(const uint8_t* data, int64_t len, uint8_t* out) const WARN_UNUSED_RESULT;
/// Specify a cipher mode. Currently used only for testing but maybe in future we
/// can provide a configuration option for the end user who can choose a preferred
/// mode(GCM, CTR, CFB...) based on their software/hardware environment.
void SetCipherMode(AES_CIPHER_MODE m) { mode_ = m; }
private:
/// Helper method that encrypts/decrypts if 'encrypt' is true/false respectively.
/// A buffer of input data 'data' of length 'len' is encrypted/decrypted with this
@@ -88,11 +105,17 @@ class EncryptionKey {
/// uninitialized keys.
bool initialized_;
/// return a EVP_CIPHER according to cipher mode at runtime
const EVP_CIPHER* GetCipher() const;
/// An AES 256-bit key.
uint8_t key_[32];
/// An initialization vector to feed as the first block to AES.
uint8_t iv_[AES_BLOCK_SIZE];
/// Cipher Mode
AES_CIPHER_MODE mode_;
};
}