RSA and Diffie-Hellman performance [Was: RFR(L): 8069539: RSA acceleration]

Andrew Haley aph at redhat.com
Thu May 28 09:25:38 UTC 2015

```On 05/28/2015 08:51 AM, Andrew Haley wrote:
> On 27/05/15 21:35, Anthony Scarpino wrote:
>
>> How much of the montgomery multiply and sqaure are you planning to
>> intrinsify?  Are you doing the whole thing or just portions of the
>> operations similar to multiplyToLen and squareToLen?
>
> I'm doing the whole montgomery multiply and square operation in a
> routine which interleaves multiplication and reduction so that the
> intermediate product is never written into memory.

Andrew.

// Multiply (unsigned) Long A by Long B, accumulating the double-
// length result into the accumulator formed of T0, T1, and T2.
#define MACC(A, B, T0, T1, T2)                                  \
do {                                                            \
unsigned long hi, lo;                                         \
: "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2)  \
: "r"(A), "a"(B));                                   \
} while(0)

// Fast Montgomery multiplication.  The derivation of the algorithm is
// in  A Cryptographic Library for the Motorola DSP56000,
// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.

static void __attribute__((noinline))
montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
unsigned long m[], unsigned long inv, int len) {
unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
int i;

assert(inv * n[0] == -1UL);

for (i = 0; i < len; i++) {
int j;
for (j = 0; j < i; j++) {
MACC(a[j], b[i-j], t0, t1, t2);
MACC(m[j], n[i-j], t0, t1, t2);
}
MACC(a[i], b[0], t0, t1, t2);
m[i] = t0 * inv;
MACC(m[i], n[0], t0, t1, t2);

assert(t0 == 0);
t0 = t1; t1 = t2; t2 = 0;
}

for (i = len; i < 2*len; i++) {
int j;
for (j = i-len+1; j < len; j++) {
MACC(a[j], b[i-j], t0, t1, t2);
MACC(m[j], n[i-j], t0, t1, t2);
}
m[i-len] = t0;
t0 = t1; t1 = t2; t2 = 0;
}

m[len] = t0;
while(cmp(m, n, len+1) > 0)
sub(m, n, len+1);
}

```