This commit is contained in:
aozhiwei 2020-10-13 17:16:06 +08:00
parent 90512323d4
commit a0d01a57a5
2 changed files with 57 additions and 1 deletions

View File

@ -398,4 +398,60 @@ namespace a8
return strcmp(s1 ? s1 : "", s2 ? s2 : ""); return strcmp(s1 ? s1 : "", s2 ? s2 : "");
} }
size_t GetUtf8Length(const char *str)
{
static unsigned char utf8_look_for_table[] =
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
};
#define UTFLEN(x) utf8_look_for_table[(x)]
auto utf8_char_len =
[] (char firstByte) -> int
{
const unsigned char kFirstBitMask = 128; // 1000000
const unsigned char kSecondBitMask = 64; // 0100000
const unsigned char kThirdBitMask = 32; // 0010000
const unsigned char kFourthBitMask = 16; // 0001000
const unsigned char kFifthBitMask = 8; // 0000100
std::string::difference_type offset = 1;
if (firstByte & kFirstBitMask) { // This means the first byte has a value greater than 127, and so is beyond the ASCII range.
if (firstByte & kThirdBitMask) { // This means that the first byte has a value greater than 224, and so it must be at least a three-octet code point.
if (firstByte & kFourthBitMask) { // This means that the first byte has a value greater than 240, and so it must be a four-octet code point.
offset = 4;
}else{
offset = 3;
}
}else{
offset = 2;
}
}
return offset;
};
int clen = strlen(str);
int len = 0;
for (const char *ptr = str;
*ptr!=0&&len<clen;
len++, ptr+=UTFLEN((unsigned char)*ptr)) ;
return len;
}
} }

View File

@ -28,7 +28,7 @@ namespace a8
std::string IntToFixedString(int val, int n); std::string IntToFixedString(int val, int n);
bool ReadStringFromFile(const std::string& filename, std::string& data); bool ReadStringFromFile(const std::string& filename, std::string& data);
int SafeStrCmp(const char* s1, const char* s2); int SafeStrCmp(const char* s1, const char* s2);
size_t GetUtf8Length(const char *str);
} }
#endif #endif