salsa
| I'm trying to use Intel's AES instruction set for AES encryption. The following piece of code works well with DMD2 but won't compile with LDC. ldc2 tells me this:
Basic Block in function '_D4main48__T21AES_128_KEY_EXPANSIONVAyaa7_656e6372797074Z21AES_128_KEY_EXPANSIONFNaNbNiNexPhPhZv' does not have terminator!
label %endentry
LLVM ERROR: Broken function found, compilation aborted!
Flow control in the asm block might be the problem.
By the way, how could I access arrays (ubyte[]) instead of pointers in inline assembly? Couldn't find a single piece of documentation...
I preferrably omit the use of 'naked' assembler functions. I tried to do it as in biguintx86.d but was confused with the calling conventions. Registers are used in reverse order compared to the C calling convention, aren't they?
******************
module main;
import std.stdio;
import core.cpuid;
void main(string[] args)
{
assert(sse2 && aes, "hardware does not support sse2 and aes!");
// test vectors
immutable ubyte[16] plaintext = cast(const ubyte[])x"6bc1bee22e409f96e93d7e117393172a";
immutable ubyte[16] ciphertext = cast(const ubyte[])x"3ad77bb40d7a3660a89ecaf32466ef97";
immutable ubyte[16] userKey = cast(const ubyte[])x"2b7e151628aed2a6abf7158809cf4f3c";
ubyte[16*11] keySchedule; // buffer for key schedule
AES_128_KEY_EXPANSION!"encrypt"(userKey.ptr, keySchedule.ptr); // initialize encryption key schedule
ubyte[16] buffer;
AES_128_ENCRYPT(keySchedule.ptr, plaintext.ptr, buffer.ptr); // encrypt one 128 bit block
assert(buffer == ciphertext, "aes encryption failed");
writeln("200 OK");
}
/// AES128 11 round encryption
/// Params:
/// key = 11*16 byte key schedule
/// plain = 16 bytes plaintext
/// ciphertext = at least 16 bytes output buffer
void AES_128_ENCRYPT(in ubyte* key, in ubyte* plain, ubyte* ciphertext)
in {
//assert(key.length == 16*ROUNDS, "invalid key size");
//assert(plain.length == 16, "invalid input block size");
//assert(ciphertext.length >= 16, "output buffer too small");
}
body {
asm {
mov RDX, key; // pointer to key schedule
// load key into XMM0-XMM10
lddqu XMM0, [RDX+0x00];
lddqu XMM1, [RDX+0x10];
lddqu XMM2, [RDX+0x20];
lddqu XMM3, [RDX+0x30];
lddqu XMM4, [RDX+0x40];
lddqu XMM5, [RDX+0x50];
lddqu XMM6, [RDX+0x60];
lddqu XMM7, [RDX+0x70];
lddqu XMM8, [RDX+0x80];
lddqu XMM9, [RDX+0x90];
lddqu XMM10, [RDX+0xA0];
// load plaintext into XMM15
mov RDX, plain; // pointer to plaintext
movdqu XMM15, [RDX]; // read plaintext block
// AES-128 encryption sequence.
// The data block is in XMM15.
// Registers XMM0–XMM10 hold the round keys(from 0 to 10 in this order).
// In the end, XMM15 holds the encryption result.
pxor XMM15, XMM0; // Whitening step (Round 0)
aesenc XMM15, XMM1; // Round 1
aesenc XMM15, XMM2; // Round 2
aesenc XMM15, XMM3; // Round 3
aesenc XMM15, XMM4; // Round 4
aesenc XMM15, XMM5; // Round 5
aesenc XMM15, XMM6; // Round 6
aesenc XMM15, XMM7; // Round 7
aesenc XMM15, XMM8; // Round 8
aesenc XMM15, XMM9; // Round 9
aesenclast XMM15, XMM10; // Round 10
mov RDX, ciphertext; // pointer to output buffer
movdqu [RDX], XMM15; // write processed data to buffer
}
}
///
/// Expand a 128 bit user key into 11 round keys
///
/// source: http://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf, Figure 19. AES-128 Key Expansion: Outlined Code Example
///
/// Params:
///
/// decrypt = generate decryption key if set to true. default: false
///
/// userKey = the AES key as given by the user
/// key = 11 round keys
///
///
enum ROUNDS = 11;
@trusted
public void AES_128_KEY_EXPANSION(string mode = "encrypt")(in ubyte* userKey, ubyte* key) nothrow @nogc
if(mode == "encrypt" || mode == "decrypt")
in {
//assertHardwareSupport();
//assert(userKey.length == 16, "invalid key size");
//assert(key.length == ROUNDS*16, "invalid key schedule size");
}
body {
asm {
mov RDX, userKey; // pointer to user key
movdqu XMM1, [RDX]; // read user key
xor RCX, RCX; // set index to 0
mov RDX, key; // pointer to working key
movdqu [RDX+RCX], XMM1;
add RCX, 0x10; // increment by 16 bytes
aeskeygenassist XMM2, XMM1, 0x01;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x02;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x04;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x08;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x10;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x20;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x40;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x80;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x1b;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x36;
call aes_128_assist;
}
static if(mode == "decrypt") {
asm {
// generate inverse key
call aesimc128;
}
} asm {
jmp END;
aes_128_assist:
pshufd XMM2, XMM2, 0xff;
//vpslldq XMM3, XMM1, 0x4; // vpslldq requires AVX, pslldq requires only SSE2
movdqu XMM3, XMM1;
pslldq XMM3, 0x4;
pxor XMM1, XMM3;
//vpslldq XMM3, XMM1, 0x4;
movdqu XMM3, XMM1;
pslldq XMM3, 0x4;
pxor XMM1, XMM3;
//vpslldq XMM3, XMM1, 0x4;
movdqu XMM3, XMM1;
pslldq XMM3, 0x4;
pxor XMM1, XMM3;
pxor XMM1, XMM2;
mov RDX, key; // pointer to working key
movdqu [RDX+RCX], XMM1; // store result in keySchedule
add RCX, 0x10; // increment index by 16 bytes
ret; // end of key_expansion_128
//
// do aesimc for all except the first and the last round key
//
aesimc128:
mov RDX, key; // pointer to key output buffer
add RDX, 0x10; // dont modify first key
mov RCX, ROUNDS-2; // set counter to number of rounds - 2
LOOP:
movdqu XMM1, [RDX]; // load
aesimc XMM1, XMM1; // invert
movdqu [RDX], XMM1; // store
add RDX, 0x10; // increment pointer
loop LOOP; // loop rounds-2 times
ret;
// end aesimc128
END:
;
}
}
|