1
This commit is contained in:
parent
90512323d4
commit
a0d01a57a5
@ -398,4 +398,60 @@ namespace a8
|
||||
return strcmp(s1 ? s1 : "", s2 ? s2 : "");
|
||||
}
|
||||
|
||||
size_t GetUtf8Length(const char *str)
|
||||
{
|
||||
static unsigned char utf8_look_for_table[] =
|
||||
{
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
|
||||
};
|
||||
|
||||
#define UTFLEN(x) utf8_look_for_table[(x)]
|
||||
|
||||
auto utf8_char_len =
|
||||
[] (char firstByte) -> int
|
||||
{
|
||||
const unsigned char kFirstBitMask = 128; // 1000000
|
||||
const unsigned char kSecondBitMask = 64; // 0100000
|
||||
const unsigned char kThirdBitMask = 32; // 0010000
|
||||
const unsigned char kFourthBitMask = 16; // 0001000
|
||||
const unsigned char kFifthBitMask = 8; // 0000100
|
||||
|
||||
std::string::difference_type offset = 1;
|
||||
|
||||
if (firstByte & kFirstBitMask) { // This means the first byte has a value greater than 127, and so is beyond the ASCII range.
|
||||
if (firstByte & kThirdBitMask) { // This means that the first byte has a value greater than 224, and so it must be at least a three-octet code point.
|
||||
if (firstByte & kFourthBitMask) { // This means that the first byte has a value greater than 240, and so it must be a four-octet code point.
|
||||
offset = 4;
|
||||
}else{
|
||||
offset = 3;
|
||||
}
|
||||
}else{
|
||||
offset = 2;
|
||||
}
|
||||
}
|
||||
return offset;
|
||||
};
|
||||
|
||||
int clen = strlen(str);
|
||||
int len = 0;
|
||||
for (const char *ptr = str;
|
||||
*ptr!=0&&len<clen;
|
||||
len++, ptr+=UTFLEN((unsigned char)*ptr)) ;
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ namespace a8
|
||||
std::string IntToFixedString(int val, int n);
|
||||
bool ReadStringFromFile(const std::string& filename, std::string& data);
|
||||
int SafeStrCmp(const char* s1, const char* s2);
|
||||
|
||||
size_t GetUtf8Length(const char *str);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user