SNAP Library 4.0, Developer Reference
2017-07-27 13:18:06
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <unicode.h>
Public Types | |
enum | { DefaultReplacementChar = 0xfffd } |
Public Member Functions | |
TUniCodec () | |
TUniCodec (TUnicodeErrorHandling errorHandling_, bool strict_, int replacementChar_, bool skipBom_) | |
template<typename TSrcVec , typename TDestCh > | |
size_t | DecodeUtf8 (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true) const |
template<typename TSrcVec , typename TDestCh > | |
size_t | DecodeUtf8 (const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true) const |
template<typename TSrcVec , typename TDestCh > | |
size_t | EncodeUtf8 (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true) const |
template<typename TSrcVec , typename TDestCh > | |
size_t | EncodeUtf8 (const TSrcVec &src, TVec< TDestCh > &dest, const bool clrDest=true) const |
template<typename TSrcVec > | |
TStr | EncodeUtf8Str (const TSrcVec &src, size_t srcIdx, const size_t srcCount) const |
template<typename TSrcVec > | |
TStr | EncodeUtf8Str (const TSrcVec &src) const |
template<typename TSrcVec , typename TDestCh > | |
size_t | DecodeUtf16FromBytes (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const TUtf16BomHandling bomHandling=bomAllowed, const TUniByteOrder defaultByteOrder=boMachineEndian) const |
template<typename TSrcVec , typename TDestCh > | |
size_t | DecodeUtf16FromWords (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, bool clrDest, const TUtf16BomHandling bomHandling=bomAllowed, const TUniByteOrder defaultByteOrder=boMachineEndian) const |
template<typename TSrcVec , typename TDestCh > | |
size_t | EncodeUtf16ToWords (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool insertBom, const TUniByteOrder destByteOrder=boMachineEndian) const |
template<typename TSrcVec , typename TDestCh > | |
size_t | EncodeUtf16ToBytes (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool insertBom, const TUniByteOrder destByteOrder=boMachineEndian) const |
void | TestUtf8 () |
void | TestUtf16 () |
Public Attributes | |
int | replacementChar |
TUnicodeErrorHandling | errorHandling |
bool | strict |
bool | skipBom |
Protected Types | |
enum | { DefineByte =(1, 0, 0, 0, 0, 0, 0, 0), DefineByte =(1, 0, 0, 0, 0, 0, 0, 0), DefineByte =(1, 0, 0, 0, 0, 0, 0, 0), DefineByte =(1, 0, 0, 0, 0, 0, 0, 0), DefineByte =(1, 0, 0, 0, 0, 0, 0, 0), DefineByte =(1, 0, 0, 0, 0, 0, 0, 0), DefineByte =(1, 0, 0, 0, 0, 0, 0, 0), DefineByte =(1, 0, 0, 0, 0, 0, 0, 0), DefineByte =(1, 0, 0, 0, 0, 0, 0, 0), DefineByte =(1, 0, 0, 0, 0, 0, 0, 0), DefineByte =(1, 0, 0, 0, 0, 0, 0, 0), DefineByte =(1, 0, 0, 0, 0, 0, 0, 0), DefineByte =(1, 0, 0, 0, 0, 0, 0, 0) } |
enum | { Utf16FirstSurrogate = 0xd800, Utf16SecondSurrogate = 0xdc00 } |
typedef TUniVecIdx | TVecIdx |
Protected Member Functions | |
void | TestUtf8 (bool decode, size_t expectedRetVal, bool expectedThrow, const TIntV &src, const TIntV &expectedDest, FILE *f) |
void | TestDecodeUtf8 (TRnd &rnd, const TStr &testCaseDesc) |
void | WordsToBytes (const TIntV &src, TIntV &dest) |
void | TestUtf16 (bool decode, size_t expectedRetVal, bool expectedThrow, const TIntV &src, const TIntV &expectedDest, const TUtf16BomHandling bomHandling, const TUniByteOrder defaultByteOrder, const bool insertBom, FILE *f) |
void | TestDecodeUtf16 (TRnd &rnd, const TStr &testCaseDesc, const TUtf16BomHandling bomHandling, const TUniByteOrder defaultByteOrder, const bool insertBom) |
Static Protected Member Functions | |
static bool | IsMachineLittleEndian () |
static uint | GetRndUint (TRnd &rnd) |
static uint | GetRndUint (TRnd &rnd, uint minVal, uint maxVal) |
static int | SwapBytes (int x) |
Friends | |
class | TUniCaseFolding |
class | TUnicode |
|
protected |
anonymous enum |
|
protected |
Enumerator | |
---|---|
DefineByte | |
DefineByte | |
DefineByte | |
DefineByte | |
DefineByte | |
DefineByte | |
DefineByte | |
DefineByte | |
DefineByte | |
DefineByte | |
DefineByte | |
DefineByte | |
DefineByte |
Definition at line 101 of file unicode.h.
|
protected |
Enumerator | |
---|---|
Utf16FirstSurrogate | |
Utf16SecondSurrogate |
Definition at line 157 of file unicode.h.
|
inline |
Definition at line 91 of file unicode.h.
|
inline |
Definition at line 95 of file unicode.h.
size_t TUniCodec::DecodeUtf16FromBytes | ( | const TSrcVec & | src, |
size_t | srcIdx, | ||
const size_t | srcCount, | ||
TVec< TDestCh > & | dest, | ||
const bool | clrDest, | ||
const TUtf16BomHandling | bomHandling = bomAllowed , |
||
const TUniByteOrder | defaultByteOrder = boMachineEndian |
||
) | const |
Definition at line 2210 of file unicode.h.
References TVec< TVal, TSizeTy >::Add(), boBigEndian, boLittleEndian, boMachineEndian, bomAllowed, bomIgnored, bomRequired, TVec< TVal, TSizeTy >::Clr(), errorHandling, Fail, TInt::GetStr(), IAssert, IsMachineLittleEndian(), replacementChar, skipBom, strict, uehAbort, uehIgnore, uehReplace, uehThrow, Utf16FirstSurrogate, and Utf16SecondSurrogate.
Referenced by TUnicode::DecodeUtf16FromBytes(), and TestUtf16().
size_t TUniCodec::DecodeUtf16FromWords | ( | const TSrcVec & | src, |
size_t | srcIdx, | ||
const size_t | srcCount, | ||
TVec< TDestCh > & | dest, | ||
bool | clrDest, | ||
const TUtf16BomHandling | bomHandling = bomAllowed , |
||
const TUniByteOrder | defaultByteOrder = boMachineEndian |
||
) | const |
Definition at line 2294 of file unicode.h.
References TVec< TVal, TSizeTy >::Add(), boBigEndian, boLittleEndian, boMachineEndian, bomAllowed, bomIgnored, bomRequired, TVec< TVal, TSizeTy >::Clr(), errorHandling, Fail, TInt::GetStr(), IAssert, IsMachineLittleEndian(), replacementChar, skipBom, strict, uehAbort, uehIgnore, uehReplace, uehThrow, Utf16FirstSurrogate, and Utf16SecondSurrogate.
Referenced by TUnicode::DecodeUtf16FromWords(), and TestUtf16().
size_t TUniCodec::DecodeUtf8 | ( | const TSrcVec & | src, |
size_t | srcIdx, | ||
const size_t | srcCount, | ||
TVec< TDestCh > & | dest, | ||
const bool | clrDest = true |
||
) | const |
Definition at line 2036 of file unicode.h.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Clr(), errorHandling, Fail, TInt::GetStr(), replacementChar, skipBom, strict, uehAbort, uehIgnore, uehReplace, and uehThrow.
Referenced by TUnicode::DecodeUtf8(), TUniChDb::SbEx_AddUtf8(), and TestUtf8().
|
inline |
Definition at line 136 of file unicode.h.
References DecodeUtf8().
Referenced by DecodeUtf8().
size_t TUniCodec::EncodeUtf16ToBytes | ( | const TSrcVec & | src, |
size_t | srcIdx, | ||
const size_t | srcCount, | ||
TVec< TDestCh > & | dest, | ||
const bool | clrDest, | ||
const bool | insertBom, | ||
const TUniByteOrder | destByteOrder = boMachineEndian |
||
) | const |
Definition at line 2428 of file unicode.h.
References ___OutRepl, TVec< TVal, TSizeTy >::Add(), boLittleEndian, boMachineEndian, errorHandling, Fail, TUInt::GetStr(), IAssert, IsMachineLittleEndian(), uehAbort, uehIgnore, uehReplace, uehThrow, Utf16FirstSurrogate, and Utf16SecondSurrogate.
Referenced by TUnicode::EncodeUtf16ToBytes(), and TestUtf16().
size_t TUniCodec::EncodeUtf16ToWords | ( | const TSrcVec & | src, |
size_t | srcIdx, | ||
const size_t | srcCount, | ||
TVec< TDestCh > & | dest, | ||
const bool | clrDest, | ||
const bool | insertBom, | ||
const TUniByteOrder | destByteOrder = boMachineEndian |
||
) | const |
Definition at line 2376 of file unicode.h.
References TVec< TVal, TSizeTy >::Add(), boBigEndian, boLittleEndian, errorHandling, Fail, TUInt::GetStr(), IAssert, IsMachineLittleEndian(), replacementChar, SwapBytes(), uehAbort, uehIgnore, uehReplace, uehThrow, Utf16FirstSurrogate, and Utf16SecondSurrogate.
Referenced by TUnicode::EncodeUtf16ToWords(), and TestUtf16().
size_t TUniCodec::EncodeUtf8 | ( | const TSrcVec & | src, |
size_t | srcIdx, | ||
const size_t | srcCount, | ||
TVec< TDestCh > & | dest, | ||
const bool | clrDest = true |
||
) | const |
Definition at line 2152 of file unicode.h.
References TVec< TVal, TSizeTy >::Add(), errorHandling, Fail, TInt::GetStr(), replacementChar, strict, uehAbort, uehIgnore, uehReplace, and uehThrow.
Referenced by TUnicode::EncodeUtf8(), EncodeUtf8Str(), and TestUtf8().
|
inline |
Definition at line 145 of file unicode.h.
References EncodeUtf8().
Referenced by EncodeUtf8().
|
inline |
Definition at line 149 of file unicode.h.
References EncodeUtf8().
Referenced by TUnicode::EncodeUtf8Str().
|
inline |
Definition at line 150 of file unicode.h.
References TVec< TVal, TSizeTy >::Add(), and EncodeUtf8().
Definition at line 62 of file unicode.cpp.
References TRnd::GetUniDevUInt().
Referenced by GetRndUint(), TestDecodeUtf16(), and TestDecodeUtf8().
Definition at line 71 of file unicode.cpp.
References GetRndUint(), TUInt::Mn, and TUInt::Mx.
|
staticprotected |
Definition at line 83 of file unicode.cpp.
Referenced by DecodeUtf16FromBytes(), DecodeUtf16FromWords(), EncodeUtf16ToBytes(), EncodeUtf16ToWords(), TestDecodeUtf16(), TestUtf16(), and WordsToBytes().
|
inlinestaticprotected |
Definition at line 250 of file unicode.h.
Referenced by EncodeUtf16ToWords(), TestDecodeUtf16(), and TestUtf16().
|
protected |
Definition at line 341 of file unicode.cpp.
References TVec< TVal, TSizeTy >::Add(), boLittleEndian, boMachineEndian, bomRequired, TStr::CStr(), errorHandling, Fail, GetRndUint(), IAssert, IsMachineLittleEndian(), TStr::Len(), TVec< TVal, TSizeTy >::Len(), replacementChar, skipBom, strict, SwapBytes(), TestUtf16(), uehAbort, uehReplace, uehThrow, Utf16FirstSurrogate, and Utf16SecondSurrogate.
Referenced by TestUtf16().
Definition at line 133 of file unicode.cpp.
References TVec< TVal, TSizeTy >::Add(), TStr::CStr(), errorHandling, Fail, GetRndUint(), IAssert, TStr::Len(), TVec< TVal, TSizeTy >::Len(), replacementChar, skipBom, strict, TestUtf8(), uehAbort, uehReplace, and uehThrow.
Referenced by TestUtf8().
|
protected |
Definition at line 284 of file unicode.cpp.
References boBigEndian, boLittleEndian, bomAllowed, bomRequired, TVec< TVal, TSizeTy >::Clr(), TStr::CStr(), DecodeUtf16FromBytes(), DecodeUtf16FromWords(), EncodeUtf16ToBytes(), EncodeUtf16ToWords(), errorHandling, IAssert, TVec< TVal, TSizeTy >::Len(), TUnicodeException::message, replacementChar, skipBom, TUnicodeException::srcChar, TUnicodeException::srcIdx, strict, uehAbort, uehIgnore, uehReplace, uehThrow, and WordsToBytes().
void TUniCodec::TestUtf16 | ( | ) |
Definition at line 408 of file unicode.cpp.
References TVec< TVal, TSizeTy >::Add(), boLittleEndian, boMachineEndian, bomAllowed, bomRequired, TVec< TVal, TSizeTy >::Clr(), errorHandling, TVec< TVal, TSizeTy >::Gen(), IsMachineLittleEndian(), TVec< TVal, TSizeTy >::Len(), TUInt::Mx, replacementChar, TVec< TVal, TSizeTy >::Reserve(), skipBom, strict, SwapBytes(), TestDecodeUtf16(), uehReplace, uehThrow, Utf16FirstSurrogate, and Utf16SecondSurrogate.
Referenced by TestDecodeUtf16().
|
protected |
Definition at line 99 of file unicode.cpp.
References TStr::CStr(), DecodeUtf8(), EncodeUtf8(), errorHandling, IAssert, TVec< TVal, TSizeTy >::Len(), TUnicodeException::message, replacementChar, skipBom, TUnicodeException::srcChar, TUnicodeException::srcIdx, strict, uehAbort, uehIgnore, uehReplace, and uehThrow.
void TUniCodec::TestUtf8 | ( | ) |
Definition at line 194 of file unicode.cpp.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Clr(), EncodeUtf8(), errorHandling, TVec< TVal, TSizeTy >::Gen(), TUInt::Mx, replacementChar, TVec< TVal, TSizeTy >::Reserve(), skipBom, strict, TestDecodeUtf8(), uehReplace, and uehThrow.
Referenced by TestDecodeUtf8().
Definition at line 274 of file unicode.cpp.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Clr(), IsMachineLittleEndian(), and TVec< TVal, TSizeTy >::Len().
Referenced by TestUtf16().
|
friend |
TUnicodeErrorHandling TUniCodec::errorHandling |
Definition at line 66 of file unicode.h.
Referenced by DecodeUtf16FromBytes(), DecodeUtf16FromWords(), DecodeUtf8(), EncodeUtf16ToBytes(), EncodeUtf16ToWords(), EncodeUtf8(), TestDecodeUtf16(), TestDecodeUtf8(), TestUtf16(), and TestUtf8().
int TUniCodec::replacementChar |
Definition at line 64 of file unicode.h.
Referenced by DecodeUtf16FromBytes(), DecodeUtf16FromWords(), DecodeUtf8(), EncodeUtf16ToWords(), EncodeUtf8(), TestDecodeUtf16(), TestDecodeUtf8(), TestUtf16(), and TestUtf8().
bool TUniCodec::skipBom |
Definition at line 89 of file unicode.h.
Referenced by DecodeUtf16FromBytes(), DecodeUtf16FromWords(), DecodeUtf8(), TestDecodeUtf16(), TestDecodeUtf8(), TestUtf16(), and TestUtf8().
bool TUniCodec::strict |
Definition at line 83 of file unicode.h.
Referenced by DecodeUtf16FromBytes(), DecodeUtf16FromWords(), DecodeUtf8(), EncodeUtf8(), TestDecodeUtf16(), TestDecodeUtf8(), TestUtf16(), and TestUtf8().