Skip to content

Commit

Permalink
Avoid memcpy in AdditiveCipherTemplate<S>::ProcessData (GH #683, GH #…
Browse files Browse the repository at this point in the history
…1010, PR #1019)

We found we can avoid the memcpy in the previous workaround by using a volatile pointer. The pointer appears to tame the optimizer so the compiler does not short-circuit some calls when outString == inString.
  • Loading branch information
noloader committed Mar 17, 2021
1 parent 4d15863 commit ca123d1
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 28 deletions.
2 changes: 1 addition & 1 deletion dll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
NAMESPACE_BEGIN(CryptoPP)

// Guarding based on DLL due to Clang, http://github.com/weidai11/cryptopp/issues/300
#if defined(CRYPTOPP_IS_DLL)
#ifdef CRYPTOPP_IS_DLL
template<> const byte PKCS_DigestDecoration<SHA1>::decoration[] = {0x30,0x21,0x30,0x09,0x06,0x05,0x2B,0x0E,0x03,0x02,0x1A,0x05,0x00,0x04,0x14};
template<> const unsigned int PKCS_DigestDecoration<SHA1>::length = sizeof(PKCS_DigestDecoration<SHA1>::decoration);

Expand Down
66 changes: 40 additions & 26 deletions strciphr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ extern const char STRCIPHER_FNAME[] = __FILE__;

NAMESPACE_BEGIN(CryptoPP)

// Workaround for https://github.com/weidai11/cryptopp/issues/683
// and https://github.com/weidai11/cryptopp/issues/1010. GCC and
// Clang are optimizing away some calls in ProcessData on ARM when
// outString == inString. Other workarounds are available but they
// require a memcpy, which degrades performance to varying degrees.
static volatile byte* s_workaround;

template <class S>
void AdditiveCipherTemplate<S>::UncheckedSetKey(const byte *key, unsigned int length, const NameValuePairs &params)
{
Expand Down Expand Up @@ -68,36 +75,41 @@ void AdditiveCipherTemplate<S>::GenerateBlock(byte *outString, size_t length)
template <class S>
void AdditiveCipherTemplate<S>::ProcessData(byte *outString, const byte *inString, size_t length)
{
CRYPTOPP_ASSERT(outString); CRYPTOPP_ASSERT(inString);
CRYPTOPP_ASSERT(length % this->MandatoryBlockSize() == 0);

PolicyInterface &policy = this->AccessPolicy();
unsigned int bytesPerIteration = policy.GetBytesPerIteration();

if (m_leftOver > 0)
{
const size_t len = STDMIN(m_leftOver, length);
xorbuf(outString, inString, PtrSub(KeystreamBufferEnd(), m_leftOver), len);

length -= len; m_leftOver -= len;
inString = PtrAdd(inString, len);
outString = PtrAdd(outString, len);

if (!length) {return;}
length -= len; m_leftOver -= len;
}

PolicyInterface &policy = this->AccessPolicy();
unsigned int bytesPerIteration = policy.GetBytesPerIteration();
if (!length) {return;}

const unsigned int alignment = policy.GetAlignment();
const bool inAligned = IsAlignedOn(inString, alignment);
const bool outAligned = IsAlignedOn(outString, alignment);

if (policy.CanOperateKeystream() && length >= bytesPerIteration)
{
const size_t iterations = length / bytesPerIteration;
unsigned int alignment = policy.GetAlignment();
volatile int inAligned = IsAlignedOn(inString, alignment) << 1;
volatile int outAligned = IsAlignedOn(outString, alignment) << 0;
KeystreamOperationFlags flags = static_cast<KeystreamOperationFlags>(
(inAligned ? EnumToInt(INPUT_ALIGNED) : 0) | (outAligned ? EnumToInt(OUTPUT_ALIGNED) : 0));
KeystreamOperation operation = KeystreamOperation(flags);

KeystreamOperation operation = KeystreamOperation(inAligned | outAligned);
policy.OperateKeystream(operation, outString, inString, iterations);
s_workaround = const_cast<volatile byte*>(outString);

inString = PtrAdd(inString, iterations * bytesPerIteration);
outString = PtrAdd(outString, iterations * bytesPerIteration);
length -= iterations * bytesPerIteration;

if (!length) {return;}
}

size_t bufferByteSize = m_buffer.size();
Expand All @@ -108,9 +120,9 @@ void AdditiveCipherTemplate<S>::ProcessData(byte *outString, const byte *inStrin
policy.WriteKeystream(m_buffer, bufferIterations);
xorbuf(outString, inString, KeystreamBufferBegin(), bufferByteSize);

length -= bufferByteSize;
inString = PtrAdd(inString, bufferByteSize);
outString = PtrAdd(outString, bufferByteSize);
length -= bufferByteSize;
}

if (length > 0)
Expand All @@ -120,6 +132,7 @@ void AdditiveCipherTemplate<S>::ProcessData(byte *outString, const byte *inStrin

policy.WriteKeystream(PtrSub(KeystreamBufferEnd(), bufferByteSize), bufferIterations);
xorbuf(outString, inString, PtrSub(KeystreamBufferEnd(), bufferByteSize), length);

m_leftOver = bufferByteSize - length;
}
}
Expand All @@ -137,15 +150,15 @@ template <class BASE>
void AdditiveCipherTemplate<BASE>::Seek(lword position)
{
PolicyInterface &policy = this->AccessPolicy();
word32 bytesPerIteration = policy.GetBytesPerIteration();
unsigned int bytesPerIteration = policy.GetBytesPerIteration();

policy.SeekToIteration(position / bytesPerIteration);
position %= bytesPerIteration;

if (position > 0)
{
policy.WriteKeystream(PtrSub(KeystreamBufferEnd(), bytesPerIteration), 1);
m_leftOver = bytesPerIteration - static_cast<word32>(position);
m_leftOver = bytesPerIteration - static_cast<unsigned int>(position);
}
else
m_leftOver = 0;
Expand Down Expand Up @@ -182,17 +195,17 @@ void CFB_CipherTemplate<BASE>::ProcessData(byte *outString, const byte *inString
CRYPTOPP_ASSERT(length % this->MandatoryBlockSize() == 0);

PolicyInterface &policy = this->AccessPolicy();
word32 bytesPerIteration = policy.GetBytesPerIteration();
unsigned int bytesPerIteration = policy.GetBytesPerIteration();
byte *reg = policy.GetRegisterBegin();

if (m_leftOver)
{
const size_t len = STDMIN(m_leftOver, length);
CombineMessageAndShiftRegister(outString, PtrAdd(reg, bytesPerIteration - m_leftOver), inString, len);

m_leftOver -= len; length -= len;
inString = PtrAdd(inString, len);
outString = PtrAdd(outString, len);
m_leftOver -= len; length -= len;
}

if (!length) {return;}
Expand All @@ -207,8 +220,8 @@ void CFB_CipherTemplate<BASE>::ProcessData(byte *outString, const byte *inString
// Also see https://github.com/weidai11/cryptopp/issues/683.

const unsigned int alignment = policy.GetAlignment();
volatile bool inAligned = IsAlignedOn(inString, alignment);
volatile bool outAligned = IsAlignedOn(outString, alignment);
const bool inAligned = IsAlignedOn(inString, alignment);
const bool outAligned = IsAlignedOn(outString, alignment);

if (policy.CanIterate() && length >= bytesPerIteration && outAligned)
{
Expand All @@ -226,19 +239,19 @@ void CFB_CipherTemplate<BASE>::ProcessData(byte *outString, const byte *inString
// in-place so it short-circuits the transform. However, if we use a stand-alone
// reproducer with the same data then the issue is _not_ present.
//
// When working on this issue we introduced PtrAdd and PtrSub to ensure we were
// not running afoul of pointer arithmetic rules of the language. Namely we need
// to use ptrdiff_t when subtracting pointers. We believe the relevant code paths
// are clean.
//
// One workaround is a distinct and aligned temporary buffer. It [mostly] works
// as expected but requires an extra allocation (casts not shown):
//
// std::string temp(inString, length);
// policy.Iterate(outString, &temp[0], cipherDir, length / bytesPerIteration);
//
// Another workaround is:
//
// std::memcpy(outString, inString, length);
// policy.Iterate(outString, outString, cipherDir, length / bytesPerIteration);

std::memcpy(outString, inString, length);
policy.Iterate(outString, outString, cipherDir, length / bytesPerIteration);
policy.Iterate(outString, inString, cipherDir, length / bytesPerIteration);
s_workaround = const_cast<volatile byte*>(outString);
}
const size_t remainder = length % bytesPerIteration;
inString = PtrAdd(inString, length - remainder);
Expand All @@ -250,9 +263,10 @@ void CFB_CipherTemplate<BASE>::ProcessData(byte *outString, const byte *inString
{
policy.TransformRegister();
CombineMessageAndShiftRegister(outString, reg, inString, bytesPerIteration);
length -= bytesPerIteration;

inString = PtrAdd(inString, bytesPerIteration);
outString = PtrAdd(outString, bytesPerIteration);
length -= bytesPerIteration;
}

if (length > 0)
Expand Down
5 changes: 4 additions & 1 deletion strciphr.h
Original file line number Diff line number Diff line change
Expand Up @@ -702,13 +702,16 @@ class SymmetricCipherFinal : public AlgorithmImpl<SimpleKeyingInterfaceImpl<BASE

NAMESPACE_END

// Used by dll.cpp to ensure objects are in dll.o, and not strciphr.o.
#ifdef CRYPTOPP_MANUALLY_INSTANTIATE_TEMPLATES
#include "strciphr.cpp"
# include "strciphr.cpp"
#endif

NAMESPACE_BEGIN(CryptoPP)

CRYPTOPP_DLL_TEMPLATE_CLASS AbstractPolicyHolder<AdditiveCipherAbstractPolicy, SymmetricCipher>;
CRYPTOPP_DLL_TEMPLATE_CLASS AdditiveCipherTemplate<AbstractPolicyHolder<AdditiveCipherAbstractPolicy, SymmetricCipher> >;

CRYPTOPP_DLL_TEMPLATE_CLASS CFB_CipherTemplate<AbstractPolicyHolder<CFB_CipherAbstractPolicy, SymmetricCipher> >;
CRYPTOPP_DLL_TEMPLATE_CLASS CFB_EncryptionTemplate<AbstractPolicyHolder<CFB_CipherAbstractPolicy, SymmetricCipher> >;
CRYPTOPP_DLL_TEMPLATE_CLASS CFB_DecryptionTemplate<AbstractPolicyHolder<CFB_CipherAbstractPolicy, SymmetricCipher> >;
Expand Down

0 comments on commit ca123d1

Please sign in to comment.