src/ceph/src/erasure-code/isa/xor_op.cc

   1 /*
   2  * Ceph - scalable distributed file system
   3  *
   4  * Copyright (C) 2014 CERN (Switzerland)
   5  *                                                                                                                                                                                                            * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>                                                                                                                                            *
   6  *  This library is free software; you can redistribute it and/or
   7  *  modify it under the terms of the GNU Lesser General Public
   8  *  License as published by the Free Software Foundation; either
   9  *  version 2.1 of the License, or (at your option) any later version.
  10  *
  11  */
  12
  13 // -----------------------------------------------------------------------------
  14 #include "xor_op.h"
  15 #include <stdio.h>
  16 #include <string.h>
  17 #include "arch/intel.h"
  18 // -----------------------------------------------------------------------------
  19
  20
  21 // -----------------------------------------------------------------------------
  22
  23 void
  24 // -----------------------------------------------------------------------------
  25 byte_xor(unsigned char* cw, unsigned char* dw, unsigned char* ew)
  26 // -----------------------------------------------------------------------------
  27 {
  28   while (cw < ew)
  29     *dw++ ^= *cw++;
  30 }
  31
  32 // -----------------------------------------------------------------------------
  33
  34 void
  35 // -----------------------------------------------------------------------------
  36 vector_xor(vector_op_t* cw,
  37            vector_op_t* dw,
  38            vector_op_t* ew)
  39 // -----------------------------------------------------------------------------
  40 {
  41   assert(is_aligned(cw, EC_ISA_VECTOR_OP_WORDSIZE));
  42   assert(is_aligned(dw, EC_ISA_VECTOR_OP_WORDSIZE));
  43   assert(is_aligned(ew, EC_ISA_VECTOR_OP_WORDSIZE));
  44   while (cw < ew) {
  45     *dw++ ^= *cw++;
  46   }
  47 }
  48
  49
  50 // -----------------------------------------------------------------------------
  51
  52 void
  53 // -----------------------------------------------------------------------------
  54 region_xor(unsigned char** src,
  55            unsigned char* parity,
  56            int src_size,
  57            unsigned size)
  58 {
  59   if (!size) {
  60     // nothing to do
  61     return;
  62   }
  63
  64   if (!src_size) {
  65     // nothing to do
  66     return;
  67   }
  68
  69   if (src_size == 1) {
  70     // just copy source to parity
  71     memcpy(parity, src[0], size);
  72     return;
  73   }
  74
  75   unsigned size_left = size;
  76
  77   // ----------------------------------------------------------
  78   // region or vector XOR operations require aligned addresses
  79   // ----------------------------------------------------------
  80
  81   bool src_aligned = true;
  82   for (int i = 0; i < src_size; i++) {
  83     src_aligned &= is_aligned(src[i], EC_ISA_VECTOR_OP_WORDSIZE);
  84   }
  85
  86   if (src_aligned &&
  87       is_aligned(parity, EC_ISA_VECTOR_OP_WORDSIZE)) {
  88
  89 #ifdef __x86_64__
  90     if (ceph_arch_intel_sse2) {
  91       // -----------------------------
  92       // use SSE2 region xor function
  93       // -----------------------------
  94       unsigned region_size =
  95         (size / EC_ISA_VECTOR_SSE2_WORDSIZE) * EC_ISA_VECTOR_SSE2_WORDSIZE;
  96
  97       size_left -= region_size;
  98       // 64-byte region xor
  99       region_sse2_xor((char**) src, (char*) parity, src_size, region_size);
 100     } else
 101 #endif
 102     {
 103       // --------------------------------------------
 104       // use region xor based on vector xor operation
 105       // --------------------------------------------
 106       unsigned vector_words = size / EC_ISA_VECTOR_OP_WORDSIZE;
 107       unsigned vector_size = vector_words * EC_ISA_VECTOR_OP_WORDSIZE;
 108       memcpy(parity, src[0], vector_size);
 109
 110       size_left -= vector_size;
 111       vector_op_t* p_vec = (vector_op_t*) parity;
 112       for (int i = 1; i < src_size; i++) {
 113         vector_op_t* s_vec = (vector_op_t*) src[i];
 114         vector_op_t* e_vec = s_vec + vector_words;
 115         vector_xor(s_vec, p_vec, e_vec);
 116       }
 117     }
 118   }
 119
 120   if (size_left) {
 121     // --------------------------------------------------
 122     // xor the not aligned part with byte-wise region xor
 123     // --------------------------------------------------
 124     memcpy(parity + size - size_left, src[0] + size - size_left, size_left);
 125     for (int i = 1; i < src_size; i++) {
 126       byte_xor(src[i] + size - size_left, parity + size - size_left, src[i] + size);
 127     }
 128   }
 129 }
 130
 131 // -----------------------------------------------------------------------------
 132
 133 void
 134 // -----------------------------------------------------------------------------
 135 region_sse2_xor(char** src,
 136                 char* parity,
 137                 int src_size,
 138                 unsigned size)
 139 // -----------------------------------------------------------------------------
 140 {
 141 #ifdef __x86_64__
 142   assert(!(size % EC_ISA_VECTOR_SSE2_WORDSIZE));
 143   unsigned char* p;
 144   int d, l;
 145   unsigned i;
 146   unsigned char* vbuf[256];
 147
 148   for (int v = 0; v < src_size; v++) {
 149     vbuf[v] = (unsigned char*) src[v];
 150   }
 151
 152   l = src_size;
 153   p = (unsigned char*) parity;
 154
 155   for (i = 0; i < size; i += EC_ISA_VECTOR_SSE2_WORDSIZE) {
 156     asm volatile("movdqa %0,%%xmm0" : : "m" (vbuf[0][i]));
 157     asm volatile("movdqa %0,%%xmm1" : : "m" (vbuf[0][i + 16]));
 158     asm volatile("movdqa %0,%%xmm2" : : "m" (vbuf[0][i + 32]));
 159     asm volatile("movdqa %0,%%xmm3" : : "m" (vbuf[0][i + 48]));
 160
 161     for (d = 1; d < l; d++) {
 162       asm volatile("movdqa %0,%%xmm4" : : "m" (vbuf[d][i]));
 163       asm volatile("movdqa %0,%%xmm5" : : "m" (vbuf[d][i + 16]));
 164       asm volatile("movdqa %0,%%xmm6" : : "m" (vbuf[d][i + 32]));
 165       asm volatile("movdqa %0,%%xmm7" : : "m" (vbuf[d][i + 48]));
 166       asm volatile("pxor %xmm4,%xmm0");
 167       asm volatile("pxor %xmm5,%xmm1");
 168       asm volatile("pxor %xmm6,%xmm2");
 169       asm volatile("pxor %xmm7,%xmm3");
 170     }
 171     asm volatile("movntdq %%xmm0,%0" : "=m" (p[i]));
 172     asm volatile("movntdq %%xmm1,%0" : "=m" (p[i + 16]));
 173     asm volatile("movntdq %%xmm2,%0" : "=m" (p[i + 32]));
 174     asm volatile("movntdq %%xmm3,%0" : "=m" (p[i + 48]));
 175   }
 176
 177   asm volatile("sfence" : : : "memory");
 178 #endif // __x86_64__
 179   return;
 180 }