X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Ferasure-code%2Fisa%2Fxor_op.cc;fp=src%2Fceph%2Fsrc%2Ferasure-code%2Fisa%2Fxor_op.cc;h=be0071d8fa95c6f40584785f89f75746299ea9b8;hb=812ff6ca9fcd3e629e49d4328905f33eee8ca3f5;hp=0000000000000000000000000000000000000000;hpb=15280273faafb77777eab341909a3f495cf248d9;p=stor4nfv.git diff --git a/src/ceph/src/erasure-code/isa/xor_op.cc b/src/ceph/src/erasure-code/isa/xor_op.cc new file mode 100644 index 0000000..be0071d --- /dev/null +++ b/src/ceph/src/erasure-code/isa/xor_op.cc @@ -0,0 +1,180 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 CERN (Switzerland) + * * Author: Andreas-Joachim Peters * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +// ----------------------------------------------------------------------------- +#include "xor_op.h" +#include +#include +#include "arch/intel.h" +// ----------------------------------------------------------------------------- + + +// ----------------------------------------------------------------------------- + +void +// ----------------------------------------------------------------------------- +byte_xor(unsigned char* cw, unsigned char* dw, unsigned char* ew) +// ----------------------------------------------------------------------------- +{ + while (cw < ew) + *dw++ ^= *cw++; +} + +// ----------------------------------------------------------------------------- + +void +// ----------------------------------------------------------------------------- +vector_xor(vector_op_t* cw, + vector_op_t* dw, + vector_op_t* ew) +// ----------------------------------------------------------------------------- +{ + assert(is_aligned(cw, EC_ISA_VECTOR_OP_WORDSIZE)); + assert(is_aligned(dw, EC_ISA_VECTOR_OP_WORDSIZE)); + assert(is_aligned(ew, EC_ISA_VECTOR_OP_WORDSIZE)); + while (cw < ew) { + *dw++ ^= *cw++; + } +} + + +// ----------------------------------------------------------------------------- + +void +// ----------------------------------------------------------------------------- +region_xor(unsigned char** src, + unsigned char* parity, + int src_size, + unsigned size) +{ + if (!size) { + // nothing to do + return; + } + + if (!src_size) { + // nothing to do + return; + } + + if (src_size == 1) { + // just copy source to parity + memcpy(parity, src[0], size); + return; + } + + unsigned size_left = size; + + // ---------------------------------------------------------- + // region or vector XOR operations require aligned addresses + // ---------------------------------------------------------- + + bool src_aligned = true; + for (int i = 0; i < src_size; i++) { + src_aligned &= is_aligned(src[i], EC_ISA_VECTOR_OP_WORDSIZE); + } + + if (src_aligned && + is_aligned(parity, EC_ISA_VECTOR_OP_WORDSIZE)) { + +#ifdef __x86_64__ + if (ceph_arch_intel_sse2) { + // ----------------------------- + // use SSE2 region xor function + // ----------------------------- + unsigned region_size = + (size / EC_ISA_VECTOR_SSE2_WORDSIZE) * EC_ISA_VECTOR_SSE2_WORDSIZE; + + size_left -= region_size; + // 64-byte region xor + region_sse2_xor((char**) src, (char*) parity, src_size, region_size); + } else +#endif + { + // -------------------------------------------- + // use region xor based on vector xor operation + // -------------------------------------------- + unsigned vector_words = size / EC_ISA_VECTOR_OP_WORDSIZE; + unsigned vector_size = vector_words * EC_ISA_VECTOR_OP_WORDSIZE; + memcpy(parity, src[0], vector_size); + + size_left -= vector_size; + vector_op_t* p_vec = (vector_op_t*) parity; + for (int i = 1; i < src_size; i++) { + vector_op_t* s_vec = (vector_op_t*) src[i]; + vector_op_t* e_vec = s_vec + vector_words; + vector_xor(s_vec, p_vec, e_vec); + } + } + } + + if (size_left) { + // -------------------------------------------------- + // xor the not aligned part with byte-wise region xor + // -------------------------------------------------- + memcpy(parity + size - size_left, src[0] + size - size_left, size_left); + for (int i = 1; i < src_size; i++) { + byte_xor(src[i] + size - size_left, parity + size - size_left, src[i] + size); + } + } +} + +// ----------------------------------------------------------------------------- + +void +// ----------------------------------------------------------------------------- +region_sse2_xor(char** src, + char* parity, + int src_size, + unsigned size) +// ----------------------------------------------------------------------------- +{ +#ifdef __x86_64__ + assert(!(size % EC_ISA_VECTOR_SSE2_WORDSIZE)); + unsigned char* p; + int d, l; + unsigned i; + unsigned char* vbuf[256]; + + for (int v = 0; v < src_size; v++) { + vbuf[v] = (unsigned char*) src[v]; + } + + l = src_size; + p = (unsigned char*) parity; + + for (i = 0; i < size; i += EC_ISA_VECTOR_SSE2_WORDSIZE) { + asm volatile("movdqa %0,%%xmm0" : : "m" (vbuf[0][i])); + asm volatile("movdqa %0,%%xmm1" : : "m" (vbuf[0][i + 16])); + asm volatile("movdqa %0,%%xmm2" : : "m" (vbuf[0][i + 32])); + asm volatile("movdqa %0,%%xmm3" : : "m" (vbuf[0][i + 48])); + + for (d = 1; d < l; d++) { + asm volatile("movdqa %0,%%xmm4" : : "m" (vbuf[d][i])); + asm volatile("movdqa %0,%%xmm5" : : "m" (vbuf[d][i + 16])); + asm volatile("movdqa %0,%%xmm6" : : "m" (vbuf[d][i + 32])); + asm volatile("movdqa %0,%%xmm7" : : "m" (vbuf[d][i + 48])); + asm volatile("pxor %xmm4,%xmm0"); + asm volatile("pxor %xmm5,%xmm1"); + asm volatile("pxor %xmm6,%xmm2"); + asm volatile("pxor %xmm7,%xmm3"); + } + asm volatile("movntdq %%xmm0,%0" : "=m" (p[i])); + asm volatile("movntdq %%xmm1,%0" : "=m" (p[i + 16])); + asm volatile("movntdq %%xmm2,%0" : "=m" (p[i + 32])); + asm volatile("movntdq %%xmm3,%0" : "=m" (p[i + 48])); + } + + asm volatile("sfence" : : : "memory"); +#endif // __x86_64__ + return; +}