mirror of
https://github.com/apache/impala.git
synced 2025-12-30 03:01:44 -05:00
IMPALA-6128: Add support for AES-CTR encryption when spilling to disk
CFB mode is a stream cipher and is secure when used with a different nonce/IV for every message. However it can be a performance bottleneck. CTR mode is also stream cipher and is secure, 4~6x faster than CFB mode in OpenSSL. AES-CTR+SHA256 is about 40~70% faster than AES-CFB+SHA256. CTR mode is used if OpenSSL version>=1.0.1 at runtime, otherwise fall back to using CFB mode. Testing: run runtime tmp-file-mgr-test, openssl-util-test, buffer-pool-test and buffered-tuple-stream-test The ut case openssl-util-test.EncryptInPlace tests encryption in both modes. Change-Id: I9debc240615dd8cdbf00ec8730cff62ffef52aff Reviewed-on: http://gerrit.cloudera.org:8080/8861 Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com> Tested-by: Impala Public Jenkins
This commit is contained in:
committed by
Impala Public Jenkins
parent
f810458ca4
commit
514dfaf9fd
@@ -612,8 +612,8 @@ void TmpFileMgr::WriteHandle::WaitForWrite() {
|
||||
Status TmpFileMgr::WriteHandle::EncryptAndHash(MemRange buffer) {
|
||||
DCHECK(FLAGS_disk_spill_encryption);
|
||||
SCOPED_TIMER(encryption_timer_);
|
||||
// Since we're using AES-CFB mode, we must take care not to reuse a key/IV pair.
|
||||
// Regenerate a new key and IV for every data buffer we write.
|
||||
// Since we're using AES-CTR/AES-CFB mode, we must take care not to reuse a
|
||||
// key/IV pair. Regenerate a new key and IV for every data buffer we write.
|
||||
key_.InitializeRandom();
|
||||
RETURN_IF_ERROR(key_.Encrypt(buffer.data(), buffer.len(), buffer.data()));
|
||||
hash_.Compute(buffer.data(), buffer.len());
|
||||
|
||||
@@ -56,24 +56,29 @@ TEST_F(OpenSSLUtilTest, Encryption) {
|
||||
vector<uint8_t> decrypted(buffer_size);
|
||||
GenerateRandomData(original.data(), buffer_size);
|
||||
|
||||
// Iterate multiple times to ensure that key regeneration works correctly.
|
||||
EncryptionKey key;
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
key.InitializeRandom(); // Generate a new key for each iteration.
|
||||
// Check both CTR & CFB
|
||||
AES_CIPHER_MODE modes[] = {AES_256_CTR, AES_256_CFB};
|
||||
for (auto m : modes) {
|
||||
// Iterate multiple times to ensure that key regeneration works correctly.
|
||||
EncryptionKey key;
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
key.InitializeRandom(); // Generate a new key for each iteration.
|
||||
key.SetCipherMode(m);
|
||||
|
||||
// Check that OpenSSL is happy with the amount of entropy we're feeding it.
|
||||
DCHECK_EQ(1, RAND_status());
|
||||
// Check that OpenSSL is happy with the amount of entropy we're feeding it.
|
||||
DCHECK_EQ(1, RAND_status());
|
||||
|
||||
ASSERT_OK(key.Encrypt(original.data(), buffer_size, encrypted.data()));
|
||||
if (i > 0) {
|
||||
// Check that we're not somehow reusing the same key.
|
||||
ASSERT_NE(0, memcmp(encrypted.data(), prev_encrypted.data(), buffer_size));
|
||||
ASSERT_OK(key.Encrypt(original.data(), buffer_size, encrypted.data()));
|
||||
if (i > 0) {
|
||||
// Check that we're not somehow reusing the same key.
|
||||
ASSERT_NE(0, memcmp(encrypted.data(), prev_encrypted.data(), buffer_size));
|
||||
}
|
||||
memcpy(prev_encrypted.data(), encrypted.data(), buffer_size);
|
||||
|
||||
// We should get the original data by decrypting it.
|
||||
ASSERT_OK(key.Decrypt(encrypted.data(), buffer_size, decrypted.data()));
|
||||
ASSERT_EQ(0, memcmp(original.data(), decrypted.data(), buffer_size));
|
||||
}
|
||||
memcpy(prev_encrypted.data(), encrypted.data(), buffer_size);
|
||||
|
||||
// We should get the original data by decrypting it.
|
||||
ASSERT_OK(key.Decrypt(encrypted.data(), buffer_size, decrypted.data()));
|
||||
ASSERT_EQ(0, memcmp(original.data(), decrypted.data(), buffer_size));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,17 +88,23 @@ TEST_F(OpenSSLUtilTest, EncryptInPlace) {
|
||||
vector<uint8_t> original(buffer_size);
|
||||
vector<uint8_t> scratch(buffer_size); // Scratch buffer for in-place encryption.
|
||||
|
||||
GenerateRandomData(original.data(), buffer_size);
|
||||
memcpy(scratch.data(), original.data(), buffer_size);
|
||||
|
||||
EncryptionKey key;
|
||||
key.InitializeRandom();
|
||||
ASSERT_OK(key.Encrypt(scratch.data(), buffer_size, scratch.data()));
|
||||
// Check that encryption did something
|
||||
ASSERT_NE(0, memcmp(original.data(), scratch.data(), buffer_size));
|
||||
ASSERT_OK(key.Decrypt(scratch.data(), buffer_size, scratch.data()));
|
||||
// Check that we get the original data back.
|
||||
ASSERT_EQ(0, memcmp(original.data(), scratch.data(), buffer_size));
|
||||
// Check both CTR & CFB
|
||||
AES_CIPHER_MODE modes[] = {AES_256_CTR, AES_256_CFB};
|
||||
for (auto m : modes) {
|
||||
GenerateRandomData(original.data(), buffer_size);
|
||||
memcpy(scratch.data(), original.data(), buffer_size);
|
||||
|
||||
key.InitializeRandom();
|
||||
key.SetCipherMode(m);
|
||||
|
||||
ASSERT_OK(key.Encrypt(scratch.data(), buffer_size, scratch.data()));
|
||||
// Check that encryption did something
|
||||
ASSERT_NE(0, memcmp(original.data(), scratch.data(), buffer_size));
|
||||
ASSERT_OK(key.Decrypt(scratch.data(), buffer_size, scratch.data()));
|
||||
// Check that we get the original data back.
|
||||
ASSERT_EQ(0, memcmp(original.data(), scratch.data(), buffer_size));
|
||||
}
|
||||
}
|
||||
|
||||
/// Test that encryption works with buffer lengths that don't fit in a 32-bit integer.
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include <openssl/sha.h>
|
||||
|
||||
#include "common/atomic.h"
|
||||
#include "gutil/port.h" // ATTRIBUTE_WEAK
|
||||
#include "gutil/strings/substitute.h"
|
||||
|
||||
#include "common/names.h"
|
||||
@@ -99,13 +100,15 @@ Status EncryptionKey::EncryptInternal(
|
||||
EVP_CIPHER_CTX_init(&ctx);
|
||||
EVP_CIPHER_CTX_set_padding(&ctx, 0);
|
||||
|
||||
int success;
|
||||
|
||||
// Start encryption/decryption. We use a 256-bit AES key, and the cipher block mode
|
||||
// is CFB because this gives us a stream cipher, which supports arbitrary
|
||||
// length ciphertexts - it doesn't have to be a multiple of 16 bytes.
|
||||
success = encrypt ? EVP_EncryptInit_ex(&ctx, EVP_aes_256_cfb(), NULL, key_, iv_) :
|
||||
EVP_DecryptInit_ex(&ctx, EVP_aes_256_cfb(), NULL, key_, iv_);
|
||||
// is either CTR or CFB(stream cipher), both of which support arbitrary length
|
||||
// ciphertexts - it doesn't have to be a multiple of 16 bytes. Additionally, CTR
|
||||
// mode is well-optimized(instruction level parallelism) with hardware acceleration
|
||||
// on x86 and PowerPC
|
||||
const EVP_CIPHER* evpCipher = GetCipher();
|
||||
int success = encrypt ? EVP_EncryptInit_ex(&ctx, evpCipher, NULL, key_, iv_) :
|
||||
EVP_DecryptInit_ex(&ctx, evpCipher, NULL, key_, iv_);
|
||||
|
||||
if (success != 1) {
|
||||
return OpenSSLErr(encrypt ? "EVP_EncryptInit_ex" : "EVP_DecryptInit_ex");
|
||||
}
|
||||
@@ -122,7 +125,7 @@ Status EncryptionKey::EncryptInternal(
|
||||
if (success != 1) {
|
||||
return OpenSSLErr(encrypt ? "EVP_EncryptUpdate" : "EVP_DecryptUpdate");
|
||||
}
|
||||
// This is safe because we're using CFB mode without padding.
|
||||
// This is safe because we're using CTR/CFB mode without padding.
|
||||
DCHECK_EQ(in_len, out_len);
|
||||
offset += in_len;
|
||||
}
|
||||
@@ -134,8 +137,21 @@ Status EncryptionKey::EncryptInternal(
|
||||
if (success != 1) {
|
||||
return OpenSSLErr(encrypt ? "EVP_EncryptFinal" : "EVP_DecryptFinal");
|
||||
}
|
||||
// Again safe due to CFB with no padding
|
||||
// Again safe due to CTR/CFB with no padding
|
||||
DCHECK_EQ(final_out_len, 0);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
ATTRIBUTE_WEAK
|
||||
const EVP_CIPHER* EVP_aes_256_ctr();
|
||||
}
|
||||
|
||||
const EVP_CIPHER* EncryptionKey::GetCipher() const {
|
||||
// use weak symbol to avoid compiling error on OpenSSL 1.0.0 environment
|
||||
if (mode_ == AES_256_CTR && EVP_aes_256_ctr) return EVP_aes_256_ctr();
|
||||
|
||||
// otherwise, fallback to CFB mode
|
||||
return EVP_aes_256_cfb();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,16 +19,25 @@
|
||||
#define IMPALA_UTIL_OPENSSL_UTIL_H
|
||||
|
||||
#include <openssl/aes.h>
|
||||
#include <openssl/evp.h>
|
||||
#include <openssl/sha.h>
|
||||
|
||||
#include "common/status.h"
|
||||
|
||||
namespace impala {
|
||||
|
||||
#define OPENSSL_VERSION_1_0_1 0x1000100L
|
||||
|
||||
/// Add entropy from the system RNG to OpenSSL's global RNG. Called at system startup
|
||||
/// and again periodically to add new entropy.
|
||||
void SeedOpenSSLRNG();
|
||||
|
||||
enum AES_CIPHER_MODE {
|
||||
AES_256_CTR,
|
||||
AES_256_CFB,
|
||||
AES_256_GCM // not supported now.
|
||||
};
|
||||
|
||||
/// The hash of a data buffer used for checking integrity. A SHA256 hash is used
|
||||
/// internally.
|
||||
class IntegrityHash {
|
||||
@@ -47,20 +56,23 @@ class IntegrityHash {
|
||||
/// The key and initialization vector (IV) required to encrypt and decrypt a buffer of
|
||||
/// data. This should be regenerated for each buffer of data.
|
||||
///
|
||||
/// We use AES with a 256-bit key and CFB cipher block mode, which gives us a stream
|
||||
/// cipher that can support arbitrary-length ciphertexts. The IV is used as an input to
|
||||
/// the cipher as the "block to supply before the first block of plaintext". This is
|
||||
/// required because all ciphers (except the weak ECB) are built such that each block
|
||||
/// depends on the output from the previous block. Since the first block doesn't have
|
||||
/// a previous block, we supply this IV. Think of it as starting off the chain of
|
||||
/// We use AES with a 256-bit key and CTR/CFB cipher block mode, which gives us a stream
|
||||
/// cipher that can support arbitrary-length ciphertexts. If OpenSSL version at runtime
|
||||
/// is 1.0.1 or above, CTR mode is used, otherwise CFB mode is used. The IV is used as
|
||||
/// an input to the cipher as the "block to supply before the first block of plaintext".
|
||||
/// This is required because all ciphers (except the weak ECB) are built such that each
|
||||
/// block depends on the output from the previous block. Since the first block doesn't
|
||||
/// have a previous block, we supply this IV. Think of it as starting off the chain of
|
||||
/// encryption.
|
||||
class EncryptionKey {
|
||||
public:
|
||||
EncryptionKey() : initialized_(false) {}
|
||||
EncryptionKey() : initialized_(false) {
|
||||
mode_ = SSLeay() < OPENSSL_VERSION_1_0_1 ? AES_256_CFB : AES_256_CTR;
|
||||
}
|
||||
|
||||
/// Initialize a key for temporary use with randomly generated data. Reinitializes with
|
||||
/// new random values if the key was already initialized. We use AES-CFB mode so key/IV
|
||||
/// pairs should not be reused. This function automatically reseeds the RNG
|
||||
/// new random values if the key was already initialized. We use AES-CTR/AES-CFB mode
|
||||
/// so key/IV pairs should not be reused. This function automatically reseeds the RNG
|
||||
/// periodically, so callers do not need to do it.
|
||||
void InitializeRandom();
|
||||
|
||||
@@ -75,6 +87,11 @@ class EncryptionKey {
|
||||
/// otherwise the buffers must not overlap.
|
||||
Status Decrypt(const uint8_t* data, int64_t len, uint8_t* out) const WARN_UNUSED_RESULT;
|
||||
|
||||
/// Specify a cipher mode. Currently used only for testing but maybe in future we
|
||||
/// can provide a configuration option for the end user who can choose a preferred
|
||||
/// mode(GCM, CTR, CFB...) based on their software/hardware environment.
|
||||
void SetCipherMode(AES_CIPHER_MODE m) { mode_ = m; }
|
||||
|
||||
private:
|
||||
/// Helper method that encrypts/decrypts if 'encrypt' is true/false respectively.
|
||||
/// A buffer of input data 'data' of length 'len' is encrypted/decrypted with this
|
||||
@@ -88,11 +105,17 @@ class EncryptionKey {
|
||||
/// uninitialized keys.
|
||||
bool initialized_;
|
||||
|
||||
/// return a EVP_CIPHER according to cipher mode at runtime
|
||||
const EVP_CIPHER* GetCipher() const;
|
||||
|
||||
/// An AES 256-bit key.
|
||||
uint8_t key_[32];
|
||||
|
||||
/// An initialization vector to feed as the first block to AES.
|
||||
uint8_t iv_[AES_BLOCK_SIZE];
|
||||
|
||||
/// Cipher Mode
|
||||
AES_CIPHER_MODE mode_;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user