From 514dfaf9fdff219256eaa9baf3efcc66bfdfafda Mon Sep 17 00:00:00 2001 From: Xianda Ke Date: Sun, 26 Nov 2017 15:35:22 +0800 Subject: [PATCH] IMPALA-6128: Add support for AES-CTR encryption when spilling to disk CFB mode is a stream cipher and is secure when used with a different nonce/IV for every message. However it can be a performance bottleneck. CTR mode is also stream cipher and is secure, 4~6x faster than CFB mode in OpenSSL. AES-CTR+SHA256 is about 40~70% faster than AES-CFB+SHA256. CTR mode is used if OpenSSL version>=1.0.1 at runtime, otherwise fall back to using CFB mode. Testing: run runtime tmp-file-mgr-test, openssl-util-test, buffer-pool-test and buffered-tuple-stream-test The ut case openssl-util-test.EncryptInPlace tests encryption in both modes. Change-Id: I9debc240615dd8cdbf00ec8730cff62ffef52aff Reviewed-on: http://gerrit.cloudera.org:8080/8861 Reviewed-by: Tim Armstrong Tested-by: Impala Public Jenkins --- be/src/runtime/tmp-file-mgr.cc | 4 +-- be/src/util/openssl-util-test.cc | 61 +++++++++++++++++++------------- be/src/util/openssl-util.cc | 32 ++++++++++++----- be/src/util/openssl-util.h | 41 ++++++++++++++++----- 4 files changed, 94 insertions(+), 44 deletions(-) diff --git a/be/src/runtime/tmp-file-mgr.cc b/be/src/runtime/tmp-file-mgr.cc index 24217dec1..650af0bb2 100644 --- a/be/src/runtime/tmp-file-mgr.cc +++ b/be/src/runtime/tmp-file-mgr.cc @@ -612,8 +612,8 @@ void TmpFileMgr::WriteHandle::WaitForWrite() { Status TmpFileMgr::WriteHandle::EncryptAndHash(MemRange buffer) { DCHECK(FLAGS_disk_spill_encryption); SCOPED_TIMER(encryption_timer_); - // Since we're using AES-CFB mode, we must take care not to reuse a key/IV pair. - // Regenerate a new key and IV for every data buffer we write. + // Since we're using AES-CTR/AES-CFB mode, we must take care not to reuse a + // key/IV pair. Regenerate a new key and IV for every data buffer we write. key_.InitializeRandom(); RETURN_IF_ERROR(key_.Encrypt(buffer.data(), buffer.len(), buffer.data())); hash_.Compute(buffer.data(), buffer.len()); diff --git a/be/src/util/openssl-util-test.cc b/be/src/util/openssl-util-test.cc index ef1b28eee..8d98b0d59 100644 --- a/be/src/util/openssl-util-test.cc +++ b/be/src/util/openssl-util-test.cc @@ -56,24 +56,29 @@ TEST_F(OpenSSLUtilTest, Encryption) { vector decrypted(buffer_size); GenerateRandomData(original.data(), buffer_size); - // Iterate multiple times to ensure that key regeneration works correctly. - EncryptionKey key; - for (int i = 0; i < 2; ++i) { - key.InitializeRandom(); // Generate a new key for each iteration. + // Check both CTR & CFB + AES_CIPHER_MODE modes[] = {AES_256_CTR, AES_256_CFB}; + for (auto m : modes) { + // Iterate multiple times to ensure that key regeneration works correctly. + EncryptionKey key; + for (int i = 0; i < 2; ++i) { + key.InitializeRandom(); // Generate a new key for each iteration. + key.SetCipherMode(m); - // Check that OpenSSL is happy with the amount of entropy we're feeding it. - DCHECK_EQ(1, RAND_status()); + // Check that OpenSSL is happy with the amount of entropy we're feeding it. + DCHECK_EQ(1, RAND_status()); - ASSERT_OK(key.Encrypt(original.data(), buffer_size, encrypted.data())); - if (i > 0) { - // Check that we're not somehow reusing the same key. - ASSERT_NE(0, memcmp(encrypted.data(), prev_encrypted.data(), buffer_size)); + ASSERT_OK(key.Encrypt(original.data(), buffer_size, encrypted.data())); + if (i > 0) { + // Check that we're not somehow reusing the same key. + ASSERT_NE(0, memcmp(encrypted.data(), prev_encrypted.data(), buffer_size)); + } + memcpy(prev_encrypted.data(), encrypted.data(), buffer_size); + + // We should get the original data by decrypting it. + ASSERT_OK(key.Decrypt(encrypted.data(), buffer_size, decrypted.data())); + ASSERT_EQ(0, memcmp(original.data(), decrypted.data(), buffer_size)); } - memcpy(prev_encrypted.data(), encrypted.data(), buffer_size); - - // We should get the original data by decrypting it. - ASSERT_OK(key.Decrypt(encrypted.data(), buffer_size, decrypted.data())); - ASSERT_EQ(0, memcmp(original.data(), decrypted.data(), buffer_size)); } } @@ -83,17 +88,23 @@ TEST_F(OpenSSLUtilTest, EncryptInPlace) { vector original(buffer_size); vector scratch(buffer_size); // Scratch buffer for in-place encryption. - GenerateRandomData(original.data(), buffer_size); - memcpy(scratch.data(), original.data(), buffer_size); - EncryptionKey key; - key.InitializeRandom(); - ASSERT_OK(key.Encrypt(scratch.data(), buffer_size, scratch.data())); - // Check that encryption did something - ASSERT_NE(0, memcmp(original.data(), scratch.data(), buffer_size)); - ASSERT_OK(key.Decrypt(scratch.data(), buffer_size, scratch.data())); - // Check that we get the original data back. - ASSERT_EQ(0, memcmp(original.data(), scratch.data(), buffer_size)); + // Check both CTR & CFB + AES_CIPHER_MODE modes[] = {AES_256_CTR, AES_256_CFB}; + for (auto m : modes) { + GenerateRandomData(original.data(), buffer_size); + memcpy(scratch.data(), original.data(), buffer_size); + + key.InitializeRandom(); + key.SetCipherMode(m); + + ASSERT_OK(key.Encrypt(scratch.data(), buffer_size, scratch.data())); + // Check that encryption did something + ASSERT_NE(0, memcmp(original.data(), scratch.data(), buffer_size)); + ASSERT_OK(key.Decrypt(scratch.data(), buffer_size, scratch.data())); + // Check that we get the original data back. + ASSERT_EQ(0, memcmp(original.data(), scratch.data(), buffer_size)); + } } /// Test that encryption works with buffer lengths that don't fit in a 32-bit integer. diff --git a/be/src/util/openssl-util.cc b/be/src/util/openssl-util.cc index e3b22996d..a8ec976cc 100644 --- a/be/src/util/openssl-util.cc +++ b/be/src/util/openssl-util.cc @@ -26,6 +26,7 @@ #include #include "common/atomic.h" +#include "gutil/port.h" // ATTRIBUTE_WEAK #include "gutil/strings/substitute.h" #include "common/names.h" @@ -99,13 +100,15 @@ Status EncryptionKey::EncryptInternal( EVP_CIPHER_CTX_init(&ctx); EVP_CIPHER_CTX_set_padding(&ctx, 0); - int success; - // Start encryption/decryption. We use a 256-bit AES key, and the cipher block mode - // is CFB because this gives us a stream cipher, which supports arbitrary - // length ciphertexts - it doesn't have to be a multiple of 16 bytes. - success = encrypt ? EVP_EncryptInit_ex(&ctx, EVP_aes_256_cfb(), NULL, key_, iv_) : - EVP_DecryptInit_ex(&ctx, EVP_aes_256_cfb(), NULL, key_, iv_); + // is either CTR or CFB(stream cipher), both of which support arbitrary length + // ciphertexts - it doesn't have to be a multiple of 16 bytes. Additionally, CTR + // mode is well-optimized(instruction level parallelism) with hardware acceleration + // on x86 and PowerPC + const EVP_CIPHER* evpCipher = GetCipher(); + int success = encrypt ? EVP_EncryptInit_ex(&ctx, evpCipher, NULL, key_, iv_) : + EVP_DecryptInit_ex(&ctx, evpCipher, NULL, key_, iv_); + if (success != 1) { return OpenSSLErr(encrypt ? "EVP_EncryptInit_ex" : "EVP_DecryptInit_ex"); } @@ -122,7 +125,7 @@ Status EncryptionKey::EncryptInternal( if (success != 1) { return OpenSSLErr(encrypt ? "EVP_EncryptUpdate" : "EVP_DecryptUpdate"); } - // This is safe because we're using CFB mode without padding. + // This is safe because we're using CTR/CFB mode without padding. DCHECK_EQ(in_len, out_len); offset += in_len; } @@ -134,8 +137,21 @@ Status EncryptionKey::EncryptInternal( if (success != 1) { return OpenSSLErr(encrypt ? "EVP_EncryptFinal" : "EVP_DecryptFinal"); } - // Again safe due to CFB with no padding + // Again safe due to CTR/CFB with no padding DCHECK_EQ(final_out_len, 0); return Status::OK(); } + +extern "C" { +ATTRIBUTE_WEAK +const EVP_CIPHER* EVP_aes_256_ctr(); +} + +const EVP_CIPHER* EncryptionKey::GetCipher() const { + // use weak symbol to avoid compiling error on OpenSSL 1.0.0 environment + if (mode_ == AES_256_CTR && EVP_aes_256_ctr) return EVP_aes_256_ctr(); + + // otherwise, fallback to CFB mode + return EVP_aes_256_cfb(); +} } diff --git a/be/src/util/openssl-util.h b/be/src/util/openssl-util.h index 4b32db6cf..22f8235f3 100644 --- a/be/src/util/openssl-util.h +++ b/be/src/util/openssl-util.h @@ -19,16 +19,25 @@ #define IMPALA_UTIL_OPENSSL_UTIL_H #include +#include #include #include "common/status.h" namespace impala { +#define OPENSSL_VERSION_1_0_1 0x1000100L + /// Add entropy from the system RNG to OpenSSL's global RNG. Called at system startup /// and again periodically to add new entropy. void SeedOpenSSLRNG(); +enum AES_CIPHER_MODE { + AES_256_CTR, + AES_256_CFB, + AES_256_GCM // not supported now. +}; + /// The hash of a data buffer used for checking integrity. A SHA256 hash is used /// internally. class IntegrityHash { @@ -47,20 +56,23 @@ class IntegrityHash { /// The key and initialization vector (IV) required to encrypt and decrypt a buffer of /// data. This should be regenerated for each buffer of data. /// -/// We use AES with a 256-bit key and CFB cipher block mode, which gives us a stream -/// cipher that can support arbitrary-length ciphertexts. The IV is used as an input to -/// the cipher as the "block to supply before the first block of plaintext". This is -/// required because all ciphers (except the weak ECB) are built such that each block -/// depends on the output from the previous block. Since the first block doesn't have -/// a previous block, we supply this IV. Think of it as starting off the chain of +/// We use AES with a 256-bit key and CTR/CFB cipher block mode, which gives us a stream +/// cipher that can support arbitrary-length ciphertexts. If OpenSSL version at runtime +/// is 1.0.1 or above, CTR mode is used, otherwise CFB mode is used. The IV is used as +/// an input to the cipher as the "block to supply before the first block of plaintext". +/// This is required because all ciphers (except the weak ECB) are built such that each +/// block depends on the output from the previous block. Since the first block doesn't +/// have a previous block, we supply this IV. Think of it as starting off the chain of /// encryption. class EncryptionKey { public: - EncryptionKey() : initialized_(false) {} + EncryptionKey() : initialized_(false) { + mode_ = SSLeay() < OPENSSL_VERSION_1_0_1 ? AES_256_CFB : AES_256_CTR; + } /// Initialize a key for temporary use with randomly generated data. Reinitializes with - /// new random values if the key was already initialized. We use AES-CFB mode so key/IV - /// pairs should not be reused. This function automatically reseeds the RNG + /// new random values if the key was already initialized. We use AES-CTR/AES-CFB mode + /// so key/IV pairs should not be reused. This function automatically reseeds the RNG /// periodically, so callers do not need to do it. void InitializeRandom(); @@ -75,6 +87,11 @@ class EncryptionKey { /// otherwise the buffers must not overlap. Status Decrypt(const uint8_t* data, int64_t len, uint8_t* out) const WARN_UNUSED_RESULT; + /// Specify a cipher mode. Currently used only for testing but maybe in future we + /// can provide a configuration option for the end user who can choose a preferred + /// mode(GCM, CTR, CFB...) based on their software/hardware environment. + void SetCipherMode(AES_CIPHER_MODE m) { mode_ = m; } + private: /// Helper method that encrypts/decrypts if 'encrypt' is true/false respectively. /// A buffer of input data 'data' of length 'len' is encrypted/decrypted with this @@ -88,11 +105,17 @@ class EncryptionKey { /// uninitialized keys. bool initialized_; + /// return a EVP_CIPHER according to cipher mode at runtime + const EVP_CIPHER* GetCipher() const; + /// An AES 256-bit key. uint8_t key_[32]; /// An initialization vector to feed as the first block to AES. uint8_t iv_[AES_BLOCK_SIZE]; + + /// Cipher Mode + AES_CIPHER_MODE mode_; }; }