X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Fcommon%2Futf8.c;fp=src%2Fceph%2Fsrc%2Fcommon%2Futf8.c;h=0000000000000000000000000000000000000000;hb=7da45d65be36d36b880cc55c5036e96c24b53f00;hp=5a8592f1cc5e2afbea9ef52b5b49c8674ca01ed0;hpb=691462d09d0987b47e112d6ee8740375df3c51b2;p=stor4nfv.git diff --git a/src/ceph/src/common/utf8.c b/src/ceph/src/common/utf8.c deleted file mode 100644 index 5a8592f..0000000 --- a/src/ceph/src/common/utf8.c +++ /dev/null @@ -1,183 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2011 New Dream Network - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ -#include "common/utf8.h" - -#include - -static int high_bits_set(int c) -{ - int ret = 0; - while (1) { - if ((c & 0x80) != 0x080) - break; - c <<= 1; - ++ret; - } - return ret; -} - -/* Encode a 31-bit UTF8 code point to 'buf'. - * Assumes buf is of size MAX_UTF8_SZ - * Returns -1 on failure; number of bytes in the encoded value otherwise. - */ -int encode_utf8(unsigned long u, unsigned char *buf) -{ - int i; - unsigned long max_val[MAX_UTF8_SZ] = { - 0x0000007ful, 0x000007fful, 0x0000fffful, - 0x001ffffful, 0x03fffffful, 0x7ffffffful - }; - static const int MAX_VAL_SZ = sizeof(max_val) / sizeof(max_val[0]); - - for (i = 0; i < MAX_VAL_SZ; ++i) { - if (u <= max_val[i]) - break; - } - if (i == MAX_VAL_SZ) { - // This code point is too big to encode. - return -1; - } - - if (i == 0) { - buf[0] = u; - } - else { - signed int j; - for (j = i; j > 0; --j) { - buf[j] = 0x80 | (u & 0x3f); - u >>= 6; - } - - unsigned char mask = ~(0xFF >> (i + 1)); - buf[0] = mask | u; - } - - return i + 1; -} - -/* - * Decode a UTF8 character from an array of bytes. Return character code. - * Upon error, return INVALID_UTF8_CHAR. - */ -unsigned long decode_utf8(unsigned char *buf, int nbytes) -{ - unsigned long code; - int i, j; - - if (nbytes <= 0) - return INVALID_UTF8_CHAR; - - if (nbytes == 1) { - if (buf[0] >= 0x80) - return INVALID_UTF8_CHAR; - return buf[0]; - } - - i = high_bits_set(buf[0]); - if (i != nbytes) - return INVALID_UTF8_CHAR; - code = buf[0] & (0xff >> i); - for (j = 1; j < nbytes; ++j) { - if ((buf[j] & 0xc0) != 0x80) - return INVALID_UTF8_CHAR; - code = (code << 6) | (buf[j] & 0x3f); - } - - // Check for invalid code points - if (code == 0xFFFE) - return INVALID_UTF8_CHAR; - if (code == 0xFFFF) - return INVALID_UTF8_CHAR; - if (code >= 0xD800 && code <= 0xDFFF) - return INVALID_UTF8_CHAR; - - return code; -} - -int check_utf8(const char *buf, int len) -{ - unsigned char u[MAX_UTF8_SZ]; - int enc_len = 0; - int i = 0; - while (1) { - unsigned int c = buf[i]; - if (i >= len || c < 0x80 || (c & 0xC0) != 0x80) { - // the start of a new character. Process what we have - // in the buffer. - if (enc_len > 0) { - int re_encoded_len; - unsigned char re_encoded[MAX_UTF8_SZ]; - unsigned long code = decode_utf8(u, enc_len); - if (code == INVALID_UTF8_CHAR) { - //printf("decoded to invalid utf8"); - return i + 1; - } - re_encoded_len = encode_utf8(code, re_encoded); - if (enc_len != re_encoded_len) { - //printf("originally encoded as %d bytes, " - // "but was re-encoded to %d!\n", - // enc_len, re_encoded_len); - return i + 1; - } - if (memcmp(u, re_encoded, enc_len) != 0) { - //printf("re-encoded to a different " - // "byte stream!"); - return i + 1; - } - //printf("code_point %lu\n", code); - } - enc_len = 0; - if (i >= len) - break; - // start collecting again? - if (c >= 0x80) - u[enc_len++] = c; - } else { - if (enc_len == MAX_UTF8_SZ) { - //printf("too many enc_len in utf character!\n"); - return i + 1; - } - //printf("continuation byte...\n"); - u[enc_len++] = c; - } - ++i; - } - return 0; -} - -int check_utf8_cstr(const char *buf) -{ - return check_utf8(buf, strlen(buf)); -} - -int is_control_character(int c) -{ - return (((c != 0) && (c < 0x20)) || (c == 0x7f)); -} - -int check_for_control_characters(const char *buf, int len) -{ - int i; - for (i = 0; i < len; ++i) { - if (is_control_character((int)(unsigned char)buf[i])) { - return i + 1; - } - } - return 0; -} - -int check_for_control_characters_cstr(const char *buf) -{ - return check_for_control_characters(buf, strlen(buf)); -}