qemu/roms/ipxe/src/crypto/aes.c

   1 /*
   2  * Copyright (C) 2015 Michael Brown <mbrown@fensystems.co.uk>.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public License as
   6  * published by the Free Software Foundation; either version 2 of the
   7  * License, or any later version.
   8  *
   9  * This program is distributed in the hope that it will be useful, but
  10  * WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17  * 02110-1301, USA.
  18  *
  19  * You can also choose to distribute this program under the terms of
  20  * the Unmodified Binary Distribution Licence (as given in the file
  21  * COPYING.UBDL), provided that you have satisfied its requirements.
  22  */
  23
  24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
  25
  26 /** @file
  27  *
  28  * AES algorithm
  29  *
  30  */
  31
  32 #include <stdint.h>
  33 #include <string.h>
  34 #include <errno.h>
  35 #include <assert.h>
  36 #include <byteswap.h>
  37 #include <ipxe/rotate.h>
  38 #include <ipxe/crypto.h>
  39 #include <ipxe/ecb.h>
  40 #include <ipxe/cbc.h>
  41 #include <ipxe/aes.h>
  42
  43 /** AES strides
  44  *
  45  * These are the strides (modulo 16) used to walk through the AES
  46  * input state bytes in order of byte position after [Inv]ShiftRows.
  47  */
  48 enum aes_stride {
  49         /** Input stride for ShiftRows
  50          *
  51          *    0 4 8 c
  52          *     \ \ \
  53          *    1 5 9 d
  54          *     \ \ \
  55          *    2 6 a e
  56          *     \ \ \
  57          *    3 7 b f
  58          */
  59         AES_STRIDE_SHIFTROWS = +5,
  60         /** Input stride for InvShiftRows
  61          *
  62          *    0 4 8 c
  63          *     / / /
  64          *    1 5 9 d
  65          *     / / /
  66          *    2 6 a e
  67          *     / / /
  68          *    3 7 b f
  69          */
  70         AES_STRIDE_INVSHIFTROWS = -3,
  71 };
  72
  73 /** A single AES lookup table entry
  74  *
  75  * This represents the product (in the Galois field GF(2^8)) of an
  76  * eight-byte vector multiplier with a single scalar multiplicand.
  77  *
  78  * The vector multipliers used for AES will be {1,1,1,3,2,1,1,3} for
  79  * MixColumns and {1,9,13,11,14,9,13,11} for InvMixColumns.  This
  80  * allows for the result of multiplying any single column of the
  81  * [Inv]MixColumns matrix by a scalar value to be obtained simply by
  82  * extracting the relevant four-byte subset from the lookup table
  83  * entry.
  84  *
  85  * For example, to find the result of multiplying the second column of
  86  * the MixColumns matrix by the scalar value 0x80:
  87  *
  88  * MixColumns column[0]: {                            2,    1,    1,    3 }
  89  * MixColumns column[1]: {                      3,    2,    1,    1       }
  90  * MixColumns column[2]: {                1,    3,    2,    1             }
  91  * MixColumns column[3]: {          1,    1,    3,    2                   }
  92  * Vector multiplier:    {    1,    1,    1,    3,    2,    1,    1,    3 }
  93  * Scalar multiplicand:    0x80
  94  * Lookup table entry:   { 0x80, 0x80, 0x80, 0x9b, 0x1b, 0x80, 0x80, 0x9b }
  95  *
  96  * The second column of the MixColumns matrix is {3,2,1,1}.  The
  97  * product of this column with the scalar value 0x80 can be obtained
  98  * by extracting the relevant four-byte subset of the lookup table
  99  * entry:
 100  *
 101  * MixColumns column[1]: {                      3,    2,    1,    1       }
 102  * Vector multiplier:    {    1,    1,    1,    3,    2,    1,    1,    3 }
 103  * Lookup table entry:   { 0x80, 0x80, 0x80, 0x9b, 0x1b, 0x80, 0x80, 0x9b }
 104  * Product:              {                   0x9b, 0x1b, 0x80, 0x80       }
 105  *
 106  * The column lookups require only seven bytes of the eight-byte
 107  * entry: the remaining (first) byte is used to hold the scalar
 108  * multiplicand itself (i.e. the first byte of the vector multiplier
 109  * is always chosen to be 1).
 110  */
 111 union aes_table_entry {
 112         /** Viewed as an array of bytes */
 113         uint8_t byte[8];
 114 } __attribute__ (( packed ));
 115
 116 /** An AES lookup table
 117  *
 118  * This represents the products (in the Galois field GF(2^8)) of a
 119  * constant eight-byte vector multiplier with all possible 256 scalar
 120  * multiplicands.
 121  *
 122  * The entries are indexed by the AES [Inv]SubBytes S-box output
 123  * values (denoted S(N)).  This allows for the result of multiplying
 124  * any single column of the [Inv]MixColumns matrix by S(N) to be
 125  * obtained simply by extracting the relevant four-byte subset from
 126  * the Nth table entry.  For example:
 127  *
 128  * Input byte (N):         0x3a
 129  * SubBytes output S(N):   0x80
 130  * MixColumns column[1]: {                      3,    2,    1,    1       }
 131  * Vector multiplier:    {    1,    1,    1,    3,    2,    1,    1,    3 }
 132  * Table entry[0x3a]:    { 0x80, 0x80, 0x80, 0x9b, 0x1b, 0x80, 0x80, 0x9b }
 133  * Product:              {                   0x9b, 0x1b, 0x80, 0x80       }
 134  *
 135  * Since the first byte of the eight-byte vector multiplier is always
 136  * chosen to be 1, the value of S(N) may be lookup up by extracting
 137  * the first byte of the Nth table entry.
 138  */
 139 struct aes_table {
 140         /** Table entries, indexed by S(N) */
 141         union aes_table_entry entry[256];
 142 } __attribute__ (( aligned ( 8 ) ));
 143
 144 /** AES MixColumns lookup table */
 145 static struct aes_table aes_mixcolumns;
 146
 147 /** AES InvMixColumns lookup table */
 148 static struct aes_table aes_invmixcolumns;
 149
 150 /**
 151  * Multiply [Inv]MixColumns matrix column by scalar multiplicand
 152  *
 153  * @v entry             AES lookup table entry for scalar multiplicand
 154  * @v column            [Inv]MixColumns matrix column index
 155  * @ret product         Product of matrix column with scalar multiplicand
 156  */
 157 static inline __attribute__ (( always_inline )) uint32_t
 158 aes_entry_column ( const union aes_table_entry *entry, unsigned int column ) {
 159         const union {
 160                 uint8_t byte;
 161                 uint32_t column;
 162         } __attribute__ (( may_alias )) *product;
 163
 164         /* Locate relevant four-byte subset */
 165         product = container_of ( &entry->byte[ 4 - column ],
 166                                  typeof ( *product ), byte );
 167
 168         /* Extract this four-byte subset */
 169         return product->column;
 170 }
 171
 172 /**
 173  * Multiply [Inv]MixColumns matrix column by S-boxed input byte
 174  *
 175  * @v table             AES lookup table
 176  * @v stride            AES row shift stride
 177  * @v in                AES input state
 178  * @v offset            Output byte offset (after [Inv]ShiftRows)
 179  * @ret product         Product of matrix column with S(input byte)
 180  *
 181  * Note that the specified offset is not the offset of the input byte;
 182  * it is the offset of the output byte which corresponds to the input
 183  * byte.  This output byte offset is used to calculate both the input
 184  * byte offset and to select the appropriate matric column.
 185  *
 186  * With a compile-time constant offset, this function will optimise
 187  * down to a single "movzbl" (to extract the input byte) and will
 188  * generate a single x86 memory reference expression which can then be
 189  * used directly within a single "xorl" instruction.
 190  */
 191 static inline __attribute__ (( always_inline )) uint32_t
 192 aes_column ( const struct aes_table *table, size_t stride,
 193              const union aes_matrix *in, size_t offset ) {
 194         const union aes_table_entry *entry;
 195         unsigned int byte;
 196
 197         /* Extract input byte corresponding to this output byte offset
 198          * (i.e. perform [Inv]ShiftRows).
 199          */
 200         byte = in->byte[ ( stride * offset ) & 0xf ];
 201
 202         /* Locate lookup table entry for this input byte (i.e. perform
 203          * [Inv]SubBytes).
 204          */
 205         entry = &table->entry[byte];
 206
 207         /* Multiply appropriate matrix column by this input byte
 208          * (i.e. perform [Inv]MixColumns).
 209          */
 210         return aes_entry_column ( entry, ( offset & 0x3 ) );
 211 }
 212
 213 /**
 214  * Calculate intermediate round output column
 215  *
 216  * @v table             AES lookup table
 217  * @v stride            AES row shift stride
 218  * @v in                AES input state
 219  * @v key               AES round key
 220  * @v column            Column index
 221  * @ret output          Output column value
 222  */
 223 static inline __attribute__ (( always_inline )) uint32_t
 224 aes_output ( const struct aes_table *table, size_t stride,
 225              const union aes_matrix *in, const union aes_matrix *key,
 226              unsigned int column ) {
 227         size_t offset = ( column * 4 );
 228
 229         /* Perform [Inv]ShiftRows, [Inv]SubBytes, [Inv]MixColumns, and
 230          * AddRoundKey for this column.  The loop is unrolled to allow
 231          * for the required compile-time constant optimisations.
 232          */
 233         return ( aes_column ( table, stride, in, ( offset + 0 ) ) ^
 234                  aes_column ( table, stride, in, ( offset + 1 ) ) ^
 235                  aes_column ( table, stride, in, ( offset + 2 ) ) ^
 236                  aes_column ( table, stride, in, ( offset + 3 ) ) ^
 237                  key->column[column] );
 238 }
 239
 240 /**
 241  * Perform a single intermediate round
 242  *
 243  * @v table             AES lookup table
 244  * @v stride            AES row shift stride
 245  * @v in                AES input state
 246  * @v out               AES output state
 247  * @v key               AES round key
 248  */
 249 static inline __attribute__ (( always_inline )) void
 250 aes_round ( const struct aes_table *table, size_t stride,
 251             const union aes_matrix *in, union aes_matrix *out,
 252             const union aes_matrix *key ) {
 253
 254         /* Perform [Inv]ShiftRows, [Inv]SubBytes, [Inv]MixColumns, and
 255          * AddRoundKey for all columns.  The loop is unrolled to allow
 256          * for the required compile-time constant optimisations.
 257          */
 258         out->column[0] = aes_output ( table, stride, in, key, 0 );
 259         out->column[1] = aes_output ( table, stride, in, key, 1 );
 260         out->column[2] = aes_output ( table, stride, in, key, 2 );
 261         out->column[3] = aes_output ( table, stride, in, key, 3 );
 262 }
 263
 264 /**
 265  * Perform encryption intermediate rounds
 266  *
 267  * @v in                AES input state
 268  * @v out               AES output state
 269  * @v key               Round keys
 270  * @v rounds            Number of rounds (must be odd)
 271  *
 272  * This function is deliberately marked as non-inlinable to ensure
 273  * maximal availability of registers for GCC's register allocator,
 274  * which has a tendency to otherwise spill performance-critical
 275  * registers to the stack.
 276  */
 277 static __attribute__ (( noinline )) void
 278 aes_encrypt_rounds ( union aes_matrix *in, union aes_matrix *out,
 279                      const union aes_matrix *key, unsigned int rounds ) {
 280         union aes_matrix *tmp;
 281
 282         /* Perform intermediate rounds */
 283         do {
 284                 /* Perform one intermediate round */
 285                 aes_round ( &aes_mixcolumns, AES_STRIDE_SHIFTROWS,
 286                             in, out, key++ );
 287
 288                 /* Swap input and output states for next round */
 289                 tmp = in;
 290                 in = out;
 291                 out = tmp;
 292
 293         } while ( --rounds );
 294 }
 295
 296 /**
 297  * Perform decryption intermediate rounds
 298  *
 299  * @v in                AES input state
 300  * @v out               AES output state
 301  * @v key               Round keys
 302  * @v rounds            Number of rounds (must be odd)
 303  *
 304  * As with aes_encrypt_rounds(), this function is deliberately marked
 305  * as non-inlinable.
 306  *
 307  * This function could potentially use the same binary code as is used
 308  * for encryption.  To compensate for the difference between ShiftRows
 309  * and InvShiftRows, half of the input byte offsets would have to be
 310  * modifiable at runtime (half by an offset of +4/-4, half by an
 311  * offset of -4/+4 for ShiftRows/InvShiftRows).  This can be
 312  * accomplished in x86 assembly within the number of available
 313  * registers, but GCC's register allocator struggles to do so,
 314  * resulting in a significant performance decrease due to registers
 315  * being spilled to the stack.  We therefore use two separate but very
 316  * similar binary functions based on the same C source.
 317  */
 318 static __attribute__ (( noinline )) void
 319 aes_decrypt_rounds ( union aes_matrix *in, union aes_matrix *out,
 320                      const union aes_matrix *key, unsigned int rounds ) {
 321         union aes_matrix *tmp;
 322
 323         /* Perform intermediate rounds */
 324         do {
 325                 /* Perform one intermediate round */
 326                 aes_round ( &aes_invmixcolumns, AES_STRIDE_INVSHIFTROWS,
 327                             in, out, key++ );
 328
 329                 /* Swap input and output states for next round */
 330                 tmp = in;
 331                 in = out;
 332                 out = tmp;
 333
 334         } while ( --rounds );
 335 }
 336
 337 /**
 338  * Perform standalone AddRoundKey
 339  *
 340  * @v state             AES state
 341  * @v key               AES round key
 342  */
 343 static inline __attribute__ (( always_inline )) void
 344 aes_addroundkey ( union aes_matrix *state, const union aes_matrix *key ) {
 345
 346         state->column[0] ^= key->column[0];
 347         state->column[1] ^= key->column[1];
 348         state->column[2] ^= key->column[2];
 349         state->column[3] ^= key->column[3];
 350 }
 351
 352 /**
 353  * Perform final round
 354  *
 355  * @v table             AES lookup table
 356  * @v stride            AES row shift stride
 357  * @v in                AES input state
 358  * @v out               AES output state
 359  * @v key               AES round key
 360  */
 361 static void aes_final ( const struct aes_table *table, size_t stride,
 362                         const union aes_matrix *in, union aes_matrix *out,
 363                         const union aes_matrix *key ) {
 364         const union aes_table_entry *entry;
 365         unsigned int byte;
 366         size_t out_offset;
 367         size_t in_offset;
 368
 369         /* Perform [Inv]ShiftRows and [Inv]SubBytes */
 370         for ( out_offset = 0, in_offset = 0 ; out_offset < 16 ;
 371               out_offset++, in_offset = ( ( in_offset + stride ) & 0xf ) ) {
 372
 373                 /* Extract input byte (i.e. perform [Inv]ShiftRows) */
 374                 byte = in->byte[in_offset];
 375
 376                 /* Locate lookup table entry for this input byte
 377                  * (i.e. perform [Inv]SubBytes).
 378                  */
 379                 entry = &table->entry[byte];
 380
 381                 /* Store output byte */
 382                 out->byte[out_offset] = entry->byte[0];
 383         }
 384
 385         /* Perform AddRoundKey */
 386         aes_addroundkey ( out, key );
 387 }
 388
 389 /**
 390  * Encrypt data
 391  *
 392  * @v ctx               Context
 393  * @v src               Data to encrypt
 394  * @v dst               Buffer for encrypted data
 395  * @v len               Length of data
 396  */
 397 static void aes_encrypt ( void *ctx, const void *src, void *dst, size_t len ) {
 398         struct aes_context *aes = ctx;
 399         union aes_matrix buffer[2];
 400         union aes_matrix *in = &buffer[0];
 401         union aes_matrix *out = &buffer[1];
 402         unsigned int rounds = aes->rounds;
 403
 404         /* Sanity check */
 405         assert ( len == sizeof ( *in ) );
 406
 407         /* Initialise input state */
 408         memcpy ( in, src, sizeof ( *in ) );
 409
 410         /* Perform initial round (AddRoundKey) */
 411         aes_addroundkey ( in, &aes->encrypt.key[0] );
 412
 413         /* Perform intermediate rounds (ShiftRows, SubBytes,
 414          * MixColumns, AddRoundKey).
 415          */
 416         aes_encrypt_rounds ( in, out, &aes->encrypt.key[1], ( rounds - 2 ) );
 417         in = out;
 418
 419         /* Perform final round (ShiftRows, SubBytes, AddRoundKey) */
 420         out = dst;
 421         aes_final ( &aes_mixcolumns, AES_STRIDE_SHIFTROWS, in, out,
 422                     &aes->encrypt.key[ rounds - 1 ] );
 423 }
 424
 425 /**
 426  * Decrypt data
 427  *
 428  * @v ctx               Context
 429  * @v src               Data to decrypt
 430  * @v dst               Buffer for decrypted data
 431  * @v len               Length of data
 432  */
 433 static void aes_decrypt ( void *ctx, const void *src, void *dst, size_t len ) {
 434         struct aes_context *aes = ctx;
 435         union aes_matrix buffer[2];
 436         union aes_matrix *in = &buffer[0];
 437         union aes_matrix *out = &buffer[1];
 438         unsigned int rounds = aes->rounds;
 439
 440         /* Sanity check */
 441         assert ( len == sizeof ( *in ) );
 442
 443         /* Initialise input state */
 444         memcpy ( in, src, sizeof ( *in ) );
 445
 446         /* Perform initial round (AddRoundKey) */
 447         aes_addroundkey ( in, &aes->decrypt.key[0] );
 448
 449         /* Perform intermediate rounds (InvShiftRows, InvSubBytes,
 450          * InvMixColumns, AddRoundKey).
 451          */
 452         aes_decrypt_rounds ( in, out, &aes->decrypt.key[1], ( rounds - 2 ) );
 453         in = out;
 454
 455         /* Perform final round (InvShiftRows, InvSubBytes, AddRoundKey) */
 456         out = dst;
 457         aes_final ( &aes_invmixcolumns, AES_STRIDE_INVSHIFTROWS, in, out,
 458                     &aes->decrypt.key[ rounds - 1 ] );
 459 }
 460
 461 /**
 462  * Multiply a polynomial by (x) modulo (x^8 + x^4 + x^3 + x^2 + 1) in GF(2^8)
 463  *
 464  * @v poly              Polynomial to be multiplied
 465  * @ret result          Result
 466  */
 467 static __attribute__ (( const )) unsigned int aes_double ( unsigned int poly ) {
 468
 469         /* Multiply polynomial by (x), placing the resulting x^8
 470          * coefficient in the LSB (i.e. rotate byte left by one).
 471          */
 472         poly = rol8 ( poly, 1 );
 473
 474         /* If coefficient of x^8 (in LSB) is non-zero, then reduce by
 475          * subtracting (x^8 + x^4 + x^3 + x^2 + 1) in GF(2^8).
 476          */
 477         if ( poly & 0x01 ) {
 478                 poly ^= 0x01; /* Subtract x^8 (currently in LSB) */
 479                 poly ^= 0x1b; /* Subtract (x^4 + x^3 + x^2 + 1) */
 480         }
 481
 482         return poly;
 483 }
 484
 485 /**
 486  * Fill in MixColumns lookup table entry
 487  *
 488  * @v entry             AES lookup table entry for scalar multiplicand
 489  *
 490  * The MixColumns lookup table vector multiplier is {1,1,1,3,2,1,1,3}.
 491  */
 492 static void aes_mixcolumns_entry ( union aes_table_entry *entry ) {
 493         unsigned int scalar_x_1;
 494         unsigned int scalar_x;
 495         unsigned int scalar;
 496
 497         /* Retrieve scalar multiplicand */
 498         scalar = entry->byte[0];
 499         entry->byte[1] = scalar;
 500         entry->byte[2] = scalar;
 501         entry->byte[5] = scalar;
 502         entry->byte[6] = scalar;
 503
 504         /* Calculate scalar multiplied by (x) */
 505         scalar_x = aes_double ( scalar );
 506         entry->byte[4] = scalar_x;
 507
 508         /* Calculate scalar multiplied by (x + 1) */
 509         scalar_x_1 = ( scalar_x ^ scalar );
 510         entry->byte[3] = scalar_x_1;
 511         entry->byte[7] = scalar_x_1;
 512 }
 513
 514 /**
 515  * Fill in InvMixColumns lookup table entry
 516  *
 517  * @v entry             AES lookup table entry for scalar multiplicand
 518  *
 519  * The InvMixColumns lookup table vector multiplier is {1,9,13,11,14,9,13,11}.
 520  */
 521 static void aes_invmixcolumns_entry ( union aes_table_entry *entry ) {
 522         unsigned int scalar_x3_x2_x;
 523         unsigned int scalar_x3_x2_1;
 524         unsigned int scalar_x3_x2;
 525         unsigned int scalar_x3_x_1;
 526         unsigned int scalar_x3_1;
 527         unsigned int scalar_x3;
 528         unsigned int scalar_x2;
 529         unsigned int scalar_x;
 530         unsigned int scalar;
 531
 532         /* Retrieve scalar multiplicand */
 533         scalar = entry->byte[0];
 534
 535         /* Calculate scalar multiplied by (x) */
 536         scalar_x = aes_double ( scalar );
 537
 538         /* Calculate scalar multiplied by (x^2) */
 539         scalar_x2 = aes_double ( scalar_x );
 540
 541         /* Calculate scalar multiplied by (x^3) */
 542         scalar_x3 = aes_double ( scalar_x2 );
 543
 544         /* Calculate scalar multiplied by (x^3 + 1) */
 545         scalar_x3_1 = ( scalar_x3 ^ scalar );
 546         entry->byte[1] = scalar_x3_1;
 547         entry->byte[5] = scalar_x3_1;
 548
 549         /* Calculate scalar multiplied by (x^3 + x + 1) */
 550         scalar_x3_x_1 = ( scalar_x3_1 ^ scalar_x );
 551         entry->byte[3] = scalar_x3_x_1;
 552         entry->byte[7] = scalar_x3_x_1;
 553
 554         /* Calculate scalar multiplied by (x^3 + x^2) */
 555         scalar_x3_x2 = ( scalar_x3 ^ scalar_x2 );
 556
 557         /* Calculate scalar multiplied by (x^3 + x^2 + 1) */
 558         scalar_x3_x2_1 = ( scalar_x3_x2 ^ scalar );
 559         entry->byte[2] = scalar_x3_x2_1;
 560         entry->byte[6] = scalar_x3_x2_1;
 561
 562         /* Calculate scalar multiplied by (x^3 + x^2 + x) */
 563         scalar_x3_x2_x = ( scalar_x3_x2 ^ scalar_x );
 564         entry->byte[4] = scalar_x3_x2_x;
 565 }
 566
 567 /**
 568  * Generate AES lookup tables
 569  *
 570  */
 571 static void aes_generate ( void ) {
 572         union aes_table_entry *entry;
 573         union aes_table_entry *inventry;
 574         unsigned int poly = 0x01;
 575         unsigned int invpoly = 0x01;
 576         unsigned int transformed;
 577         unsigned int i;
 578
 579         /* Iterate over non-zero values of GF(2^8) using generator (x + 1) */
 580         do {
 581
 582                 /* Multiply polynomial by (x + 1) */
 583                 poly ^= aes_double ( poly );
 584
 585                 /* Divide inverse polynomial by (x + 1).  This code
 586                  * fragment is taken directly from the Wikipedia page
 587                  * on the Rijndael S-box.  An explanation of why it
 588                  * works would be greatly appreciated.
 589                  */
 590                 invpoly ^= ( invpoly << 1 );
 591                 invpoly ^= ( invpoly << 2 );
 592                 invpoly ^= ( invpoly << 4 );
 593                 if ( invpoly & 0x80 )
 594                         invpoly ^= 0x09;
 595                 invpoly &= 0xff;
 596
 597                 /* Apply affine transformation */
 598                 transformed = ( 0x63 ^ invpoly ^ rol8 ( invpoly, 1 ) ^
 599                                 rol8 ( invpoly, 2 ) ^ rol8 ( invpoly, 3 ) ^
 600                                 rol8 ( invpoly, 4 ) );
 601
 602                 /* Populate S-box (within MixColumns lookup table) */
 603                 aes_mixcolumns.entry[poly].byte[0] = transformed;
 604
 605         } while ( poly != 0x01 );
 606
 607         /* Populate zeroth S-box entry (which has no inverse) */
 608         aes_mixcolumns.entry[0].byte[0] = 0x63;
 609
 610         /* Fill in MixColumns and InvMixColumns lookup tables */
 611         for ( i = 0 ; i < 256 ; i++ ) {
 612
 613                 /* Fill in MixColumns lookup table entry */
 614                 entry = &aes_mixcolumns.entry[i];
 615                 aes_mixcolumns_entry ( entry );
 616
 617                 /* Populate inverse S-box (within InvMixColumns lookup table) */
 618                 inventry = &aes_invmixcolumns.entry[ entry->byte[0] ];
 619                 inventry->byte[0] = i;
 620
 621                 /* Fill in InvMixColumns lookup table entry */
 622                 aes_invmixcolumns_entry ( inventry );
 623         }
 624 }
 625
 626 /**
 627  * Rotate key column
 628  *
 629  * @v column            Key column
 630  * @ret column          Updated key column
 631  */
 632 static inline __attribute__ (( always_inline )) uint32_t
 633 aes_key_rotate ( uint32_t column ) {
 634
 635         return ( ( __BYTE_ORDER == __LITTLE_ENDIAN ) ?
 636                  ror32 ( column, 8 ) : rol32 ( column, 8 ) );
 637 }
 638
 639 /**
 640  * Apply S-box to key column
 641  *
 642  * @v column            Key column
 643  * @ret column          Updated key column
 644  */
 645 static uint32_t aes_key_sbox ( uint32_t column ) {
 646         unsigned int i;
 647         uint8_t byte;
 648
 649         for ( i = 0 ; i < 4 ; i++ ) {
 650                 byte = ( column & 0xff );
 651                 byte = aes_mixcolumns.entry[byte].byte[0];
 652                 column = ( ( column & ~0xff ) | byte );
 653                 column = rol32 ( column, 8 );
 654         }
 655         return column;
 656 }
 657
 658 /**
 659  * Apply schedule round constant to key column
 660  *
 661  * @v column            Key column
 662  * @v rcon              Round constant
 663  * @ret column          Updated key column
 664  */
 665 static inline __attribute__ (( always_inline )) uint32_t
 666 aes_key_rcon ( uint32_t column, unsigned int rcon ) {
 667
 668         return ( ( __BYTE_ORDER == __LITTLE_ENDIAN ) ?
 669                  ( column ^ rcon ) : ( column ^ ( rcon << 24 ) ) );
 670 }
 671
 672 /**
 673  * Set key
 674  *
 675  * @v ctx               Context
 676  * @v key               Key
 677  * @v keylen            Key length
 678  * @ret rc              Return status code
 679  */
 680 static int aes_setkey ( void *ctx, const void *key, size_t keylen ) {
 681         struct aes_context *aes = ctx;
 682         union aes_matrix *enc;
 683         union aes_matrix *dec;
 684         union aes_matrix temp;
 685         union aes_matrix zero;
 686         unsigned int rcon = 0x01;
 687         unsigned int rounds;
 688         size_t offset = 0;
 689         uint32_t *prev;
 690         uint32_t *next;
 691         uint32_t *end;
 692         uint32_t tmp;
 693
 694         /* Generate lookup tables, if not already done */
 695         if ( ! aes_mixcolumns.entry[0].byte[0] )
 696                 aes_generate();
 697
 698         /* Validate key length and calculate number of intermediate rounds */
 699         switch ( keylen ) {
 700         case ( 128 / 8 ) :
 701                 rounds = 11;
 702                 break;
 703         case ( 192 / 8 ) :
 704                 rounds = 13;
 705                 break;
 706         case ( 256 / 8 ) :
 707                 rounds = 15;
 708                 break;
 709         default:
 710                 DBGC ( aes, "AES %p unsupported key length (%zd bits)\n",
 711                        aes, ( keylen * 8 ) );
 712                 return -EINVAL;
 713         }
 714         aes->rounds = rounds;
 715         enc = aes->encrypt.key;
 716         end = enc[rounds].column;
 717
 718         /* Copy raw key */
 719         memcpy ( enc, key, keylen );
 720         prev = enc->column;
 721         next = ( ( ( void * ) prev ) + keylen );
 722         tmp = next[-1];
 723
 724         /* Construct expanded key */
 725         while ( next < end ) {
 726
 727                 /* If this is the first column of an expanded key
 728                  * block, or the middle column of an AES-256 key
 729                  * block, then apply the S-box.
 730                  */
 731                 if ( ( offset == 0 ) || ( ( offset | keylen ) == 48 ) )
 732                         tmp = aes_key_sbox ( tmp );
 733
 734                 /* If this is the first column of an expanded key
 735                  * block then rotate and apply the round constant.
 736                  */
 737                 if ( offset == 0 ) {
 738                         tmp = aes_key_rotate ( tmp );
 739                         tmp = aes_key_rcon ( tmp, rcon );
 740                         rcon = aes_double ( rcon );
 741                 }
 742
 743                 /* XOR with previous key column */
 744                 tmp ^= *prev;
 745
 746                 /* Store column */
 747                 *next = tmp;
 748
 749                 /* Move to next column */
 750                 offset += sizeof ( *next );
 751                 if ( offset == keylen )
 752                         offset = 0;
 753                 next++;
 754                 prev++;
 755         }
 756         DBGC2 ( aes, "AES %p expanded %zd-bit key:\n", aes, ( keylen * 8 ) );
 757         DBGC2_HDA ( aes, 0, &aes->encrypt, ( rounds * sizeof ( *enc ) ) );
 758
 759         /* Convert to decryption key */
 760         memset ( &zero, 0, sizeof ( zero ) );
 761         dec = &aes->decrypt.key[ rounds - 1 ];
 762         memcpy ( dec--, enc++, sizeof ( *dec ) );
 763         while ( dec > aes->decrypt.key ) {
 764                 /* Perform InvMixColumns (by reusing the encryption
 765                  * final-round code to perform ShiftRows+SubBytes and
 766                  * reusing the decryption intermediate-round code to
 767                  * perform InvShiftRows+InvSubBytes+InvMixColumns, all
 768                  * with a zero encryption key).
 769                  */
 770                 aes_final ( &aes_mixcolumns, AES_STRIDE_SHIFTROWS,
 771                             enc++, &temp, &zero );
 772                 aes_decrypt_rounds ( &temp, dec--, &zero, 1 );
 773         }
 774         memcpy ( dec--, enc++, sizeof ( *dec ) );
 775         DBGC2 ( aes, "AES %p inverted %zd-bit key:\n", aes, ( keylen * 8 ) );
 776         DBGC2_HDA ( aes, 0, &aes->decrypt, ( rounds * sizeof ( *dec ) ) );
 777
 778         return 0;
 779 }
 780
 781 /**
 782  * Set initialisation vector
 783  *
 784  * @v ctx               Context
 785  * @v iv                Initialisation vector
 786  */
 787 static void aes_setiv ( void *ctx __unused, const void *iv __unused ) {
 788         /* Nothing to do */
 789 }
 790
 791 /** Basic AES algorithm */
 792 struct cipher_algorithm aes_algorithm = {
 793         .name = "aes",
 794         .ctxsize = sizeof ( struct aes_context ),
 795         .blocksize = AES_BLOCKSIZE,
 796         .setkey = aes_setkey,
 797         .setiv = aes_setiv,
 798         .encrypt = aes_encrypt,
 799         .decrypt = aes_decrypt,
 800 };
 801
 802 /* AES in Electronic Codebook mode */
 803 ECB_CIPHER ( aes_ecb, aes_ecb_algorithm,
 804              aes_algorithm, struct aes_context, AES_BLOCKSIZE );
 805
 806 /* AES in Cipher Block Chaining mode */
 807 CBC_CIPHER ( aes_cbc, aes_cbc_algorithm,
 808              aes_algorithm, struct aes_context, AES_BLOCKSIZE );