1
This commit is contained in:
parent
90512323d4
commit
a0d01a57a5
@ -398,4 +398,60 @@ namespace a8
|
|||||||
return strcmp(s1 ? s1 : "", s2 ? s2 : "");
|
return strcmp(s1 ? s1 : "", s2 ? s2 : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t GetUtf8Length(const char *str)
|
||||||
|
{
|
||||||
|
static unsigned char utf8_look_for_table[] =
|
||||||
|
{
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||||
|
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
|
||||||
|
};
|
||||||
|
|
||||||
|
#define UTFLEN(x) utf8_look_for_table[(x)]
|
||||||
|
|
||||||
|
auto utf8_char_len =
|
||||||
|
[] (char firstByte) -> int
|
||||||
|
{
|
||||||
|
const unsigned char kFirstBitMask = 128; // 1000000
|
||||||
|
const unsigned char kSecondBitMask = 64; // 0100000
|
||||||
|
const unsigned char kThirdBitMask = 32; // 0010000
|
||||||
|
const unsigned char kFourthBitMask = 16; // 0001000
|
||||||
|
const unsigned char kFifthBitMask = 8; // 0000100
|
||||||
|
|
||||||
|
std::string::difference_type offset = 1;
|
||||||
|
|
||||||
|
if (firstByte & kFirstBitMask) { // This means the first byte has a value greater than 127, and so is beyond the ASCII range.
|
||||||
|
if (firstByte & kThirdBitMask) { // This means that the first byte has a value greater than 224, and so it must be at least a three-octet code point.
|
||||||
|
if (firstByte & kFourthBitMask) { // This means that the first byte has a value greater than 240, and so it must be a four-octet code point.
|
||||||
|
offset = 4;
|
||||||
|
}else{
|
||||||
|
offset = 3;
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
offset = 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
int clen = strlen(str);
|
||||||
|
int len = 0;
|
||||||
|
for (const char *ptr = str;
|
||||||
|
*ptr!=0&&len<clen;
|
||||||
|
len++, ptr+=UTFLEN((unsigned char)*ptr)) ;
|
||||||
|
return len;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -28,7 +28,7 @@ namespace a8
|
|||||||
std::string IntToFixedString(int val, int n);
|
std::string IntToFixedString(int val, int n);
|
||||||
bool ReadStringFromFile(const std::string& filename, std::string& data);
|
bool ReadStringFromFile(const std::string& filename, std::string& data);
|
||||||
int SafeStrCmp(const char* s1, const char* s2);
|
int SafeStrCmp(const char* s1, const char* s2);
|
||||||
|
size_t GetUtf8Length(const char *str);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user