点击此处获得更好的阅读体验
WriteUp来源
题目描述
题目考点
- 国产CPU编程
解题思路
本题出题意图上是让选手考虑鲲鹏 920 CPU 上的 AES 指令集, 利用此指令扩展从而写出高性能的代码.
一个简单的示例代码见aes.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/* GCC and LLVM Clang, but not Apple Clang */
void aes_process_arm(const uint8_t key[], const uint8_t subkeys[], uint32_t rounds,
const uint8_t input[], uint8_t output[], uint32_t length)
{
while (length >= 16)
{
uint8x16_t block = vld1q_u8(input);
// AES single round encryption
block = vaeseq_u8(block, vld1q_u8(key));
// AES mix columns
block = vaesmcq_u8(block);
// AES single round encryption
block = vaeseq_u8(block, vld1q_u8(subkeys));
// AES mix columns
block = vaesmcq_u8(block);
for (unsigned int i=1; i<rounds-2; ++i)
{
// AES single round encryption
block = vaeseq_u8(block, vld1q_u8(subkeys+i*16));
// AES mix columns
block = vaesmcq_u8(block);
}
// AES single round encryption
block = vaeseq_u8(block, vld1q_u8(subkeys+(rounds-2)*16));
// Final Add (bitwise Xor)
block = veorq_u8(block, vld1q_u8(subkeys+(rounds-1)*16));
vst1q_u8(output, block);
input += 16; output += 16;
length -= 16;
}
}
int main(int argc, char* argv[])
{
/* FIPS 197, Appendix B input */
const uint8_t input[16] = { /* user input, unaligned buffer */
0x32, 0x43, 0xf6, 0xa8, 0x88, 0x5a, 0x30, 0x8d, 0x31, 0x31, 0x98, 0xa2, 0xe0, 0x37, 0x07, 0x34
};
/* FIPS 197, Appendix B key */
const uint8_t key[16] = { /* user input, unaligned buffer */
0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x9 , 0xcf, 0x4f, 0x3c
};
/* FIPS 197, Appendix B expanded subkeys */
__attribute__((aligned(4)))
const uint8_t subkeys[10][16] = { /* library controlled, aligned buffer */
{0xA0, 0xFA, 0xFE, 0x17, 0x88, 0x54, 0x2c, 0xb1, 0x23, 0xa3, 0x39, 0x39, 0x2a, 0x6c, 0x76, 0x05},
{0xF2, 0xC2, 0x95, 0xF2, 0x7a, 0x96, 0xb9, 0x43, 0x59, 0x35, 0x80, 0x7a, 0x73, 0x59, 0xf6, 0x7f},
{0x3D, 0x80, 0x47, 0x7D, 0x47, 0x16, 0xFE, 0x3E, 0x1E, 0x23, 0x7E, 0x44, 0x6D, 0x7A, 0x88, 0x3B},
{0xEF, 0x44, 0xA5, 0x41, 0xA8, 0x52, 0x5B, 0x7F, 0xB6, 0x71, 0x25, 0x3B, 0xDB, 0x0B, 0xAD, 0x00},
{0xD4, 0xD1, 0xC6, 0xF8, 0x7C, 0x83, 0x9D, 0x87, 0xCA, 0xF2, 0xB8, 0xBC, 0x11, 0xF9, 0x15, 0xBC},
{0x6D, 0x88, 0xA3, 0x7A, 0x11, 0x0B, 0x3E, 0xFD, 0xDB, 0xF9, 0x86, 0x41, 0xCA, 0x00, 0x93, 0xFD},
{0x4E, 0x54, 0xF7, 0x0E, 0x5F, 0x5F, 0xC9, 0xF3, 0x84, 0xA6, 0x4F, 0xB2, 0x4E, 0xA6, 0xDC, 0x4F},
{0xEA, 0xD2, 0x73, 0x21, 0xB5, 0x8D, 0xBA, 0xD2, 0x31, 0x2B, 0xF5, 0x60, 0x7F, 0x8D, 0x29, 0x2F},
{0xAC, 0x77, 0x66, 0xF3, 0x19, 0xFA, 0xDC, 0x21, 0x28, 0xD1, 0x29, 0x41, 0x57, 0x5c, 0x00, 0x6E},
{0xD0, 0x14, 0xF9, 0xA8, 0xC9, 0xEE, 0x25, 0x89, 0xE1, 0x3F, 0x0c, 0xC8, 0xB6, 0x63, 0x0C, 0xA6}
};
/* Result */
uint8_t result[19] = { 0 };
aes_process_arm((const uint8_t*)key, (const uint8_t*)subkeys, 10, input, result+3, 16);
printf("Input: ");
for (unsigned int i=0; i<16; ++i)
printf("%02X ", input[i]);
printf("\n");
printf("Key: ");
for (unsigned int i=0; i<16; ++i)
printf("%02X ", key[i]);
printf("\n");
printf("Output: ");
for (unsigned int i=3; i<19; ++i)
printf("%02X ", result[i]);
printf("\n");
/* FIPS 197, Appendix B output */
const uint8_t exp[16] = {
0x39, 0x25, 0x84, 0x1D, 0x02, 0xDC, 0x09, 0xFB, 0xDC, 0x11, 0x85, 0x97, 0x19, 0x6A, 0x0B, 0x32
};
if (0 == memcmp(result+3, exp, 16))
printf("SUCCESS!!!\n");
else
printf("FAILURE!!!\n");
return 0;
}