SNAP Library , Developer Reference  2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
TUniCaseFolding Class Reference

#include <unicode.h>

Collaboration diagram for TUniCaseFolding:

List of all members.

Public Member Functions

 TUniCaseFolding ()
 TUniCaseFolding (TSIn &SIn)
void Load (TSIn &SIn)
void Save (TSOut &SOut) const
void Clr ()
void LoadTxt (const TStr &fileName)
template<typename TSrcVec , typename TDestCh >
void Fold (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool full, const bool turkic) const
template<typename TSrcVec >
void FoldInPlace (TSrcVec &src, size_t srcIdx, const size_t srcCount, const bool turkic) const
void Test ()

Protected Types

typedef TUniVecIdx TVecIdx

Protected Member Functions

void Test (const TIntV &src, const TIntV &expectedDest, const bool full, const bool turkic, FILE *f)

Static Protected Member Functions

template<typename TSrcDat , typename TDestDat >
static void AppendVector (const TVec< TSrcDat > &src, TVec< TDestDat > &dest)

Protected Attributes

TIntH cfCommon
TIntH cfSimple
TIntH cfTurkic
TIntIntVH cfFull

Friends

class TUniChDb

Detailed Description

Definition at line 270 of file unicode.h.


Member Typedef Documentation

typedef TUniVecIdx TUniCaseFolding::TVecIdx [protected]

Definition at line 280 of file unicode.h.


Constructor & Destructor Documentation

Definition at line 283 of file unicode.h.

{ }
TUniCaseFolding::TUniCaseFolding ( TSIn SIn) [inline, explicit]

Definition at line 284 of file unicode.h.

References TSIn::LoadCs().

: cfCommon(SIn), cfSimple(SIn), cfTurkic(SIn), cfFull(SIn) { SIn.LoadCs(); }

Here is the call graph for this function:


Member Function Documentation

template<typename TSrcDat , typename TDestDat >
static void TUniCaseFolding::AppendVector ( const TVec< TSrcDat > &  src,
TVec< TDestDat > &  dest 
) [inline, static, protected]

Definition at line 277 of file unicode.h.

References TVec< TVal >::Add(), and TVec< TVal >::Len().

Referenced by Fold(), and TUniChDb::GetCaseConverted().

                                                                                        {
                for (int i = 0; i < src.Len(); i++) dest.Add(src[i]); }

Here is the call graph for this function:

Here is the caller graph for this function:

void TUniCaseFolding::Clr ( ) [inline]

Definition at line 287 of file unicode.h.

References cfCommon, cfFull, cfSimple, cfTurkic, and THash< TKey, TDat, THashFunc >::Clr().

Referenced by TUniChDb::Clr(), and LoadTxt().

Here is the call graph for this function:

Here is the caller graph for this function:

template<typename TSrcVec , typename TDestCh >
void TUniCaseFolding::Fold ( const TSrcVec &  src,
size_t  srcIdx,
const size_t  srcCount,
TVec< TDestCh > &  dest,
const bool  clrDest,
const bool  full,
const bool  turkic 
) const [inline]

Definition at line 292 of file unicode.h.

References TVec< TVal >::Add(), AppendVector(), cfCommon, cfFull, cfSimple, cfTurkic, and THash< TKey, TDat, THashFunc >::GetKeyId().

Referenced by TUniChDb::GetCaseFolded(), and Test().

        {
                for (const size_t srcEnd = srcIdx + srcCount; srcIdx < srcEnd; )
                {
                        int c = src[TVecIdx(srcIdx)], i; srcIdx++;
                        if (turkic && ((i = cfTurkic.GetKeyId(c)) >= 0)) { dest.Add(cfTurkic[i]); continue; }
                        if (full && ((i = cfFull.GetKeyId(c)) >= 0)) { AppendVector(cfFull[i], dest); continue; }
                        if ((! full) && ((i = cfSimple.GetKeyId(c)) >= 0)) { dest.Add(cfSimple[i]); continue; }
                        i = cfCommon.GetKeyId(c); if (i >= 0) dest.Add(cfCommon[i]); else dest.Add(c);
                }
        }

Here is the call graph for this function:

Here is the caller graph for this function:

template<typename TSrcVec >
void TUniCaseFolding::FoldInPlace ( TSrcVec &  src,
size_t  srcIdx,
const size_t  srcCount,
const bool  turkic 
) const [inline]

Definition at line 306 of file unicode.h.

References cfCommon, cfSimple, cfTurkic, and THash< TKey, TDat, THashFunc >::GetKeyId().

Referenced by TUniChDb::ToCaseFolded().

        {
                for (const size_t srcEnd = srcIdx + srcCount; srcIdx < srcEnd; srcIdx++)
                {
                        int c = src[TVecIdx(srcIdx)], i;
                        if (turkic && ((i = cfTurkic.GetKeyId(c)) >= 0)) { src[TVecIdx(srcIdx)] = cfTurkic[i]; continue; }
                        if ((i = cfSimple.GetKeyId(c)) >= 0) { src[TVecIdx(srcIdx)] = cfSimple[i]; continue; }
                        i = cfCommon.GetKeyId(c); if (i >= 0) src[TVecIdx(srcIdx)] = cfCommon[i];
                }
        }

Here is the call graph for this function:

Here is the caller graph for this function:

void TUniCaseFolding::Load ( TSIn SIn) [inline]

Definition at line 285 of file unicode.h.

References cfCommon, cfFull, cfSimple, cfTurkic, THash< TKey, TDat, THashFunc >::Load(), and TSIn::LoadCs().

Referenced by TUniChDb::Load().

{ cfCommon.Load(SIn); cfSimple.Load(SIn); cfFull.Load(SIn); cfTurkic.Load(SIn); SIn.LoadCs(); }

Here is the call graph for this function:

Here is the caller graph for this function:

void TUniCaseFolding::LoadTxt ( const TStr fileName)

Definition at line 509 of file unicode.cpp.

References THash< TKey, TDat, THashFunc >::AddDat(), cfCommon, cfFull, cfSimple, cfTurkic, Clr(), TStr::CStr(), FailR, TUniChDb::TUcdFileReader::GetNextLine(), IAssert, THash< TKey, TDat, THashFunc >::IsKey(), THash< TKey, TDat, THashFunc >::Len(), TUniChDb::TUcdFileReader::Open(), TUniChDb::TUcdFileReader::ParseCodePoint(), and TUniChDb::TUcdFileReader::ParseCodePointList().

Referenced by TUniChDb::LoadTxt().

{
        Clr();
        TUniChDb::TUcdFileReader reader; reader.Open(fileName);
        TStrV fields;
        while (reader.GetNextLine(fields))
        {
                int cp = reader.ParseCodePoint(fields[0]);
                const TStr status = fields[1], mapsTo = fields[2];
                if (status == "C" || status == "S" || status == "T") {
                        TIntH &dest = (status == "C" ? cfCommon : status == "S" ? cfSimple : cfTurkic);
                        IAssert(! dest.IsKey(cp));
                        int cp2 = reader.ParseCodePoint(mapsTo);
                        dest.AddDat(cp, cp2); }
                else if (status == "F") {
                        TIntIntVH &dest = cfFull;
                        IAssert(! dest.IsKey(cp));
                        TIntV cps; reader.ParseCodePointList(mapsTo, cps); IAssert(cps.Len() > 0);
                        dest.AddDat(cp, cps); }
                else
                        FailR(status.CStr());
        }
        printf("TUniCaseFolding(\"%s\"): %d common, %d simple, %d full, %d Turkic.\n",
                fileName.CStr(), cfCommon.Len(), cfSimple.Len(), cfFull.Len(), cfTurkic.Len());
}

Here is the call graph for this function:

Here is the caller graph for this function:

void TUniCaseFolding::Save ( TSOut SOut) const [inline]

Definition at line 286 of file unicode.h.

References cfCommon, cfFull, cfSimple, cfTurkic, THash< TKey, TDat, THashFunc >::Save(), and TSOut::SaveCs().

Referenced by TUniChDb::Save().

{ cfCommon.Save(SOut); cfSimple.Save(SOut); cfFull.Save(SOut); cfTurkic.Save(SOut); SOut.SaveCs(); }

Here is the call graph for this function:

Here is the caller graph for this function:

void TUniCaseFolding::Test ( const TIntV src,
const TIntV expectedDest,
const bool  full,
const bool  turkic,
FILE *  f 
) [protected]

Definition at line 535 of file unicode.cpp.

References Fold(), IAssert, and TVec< TVal >::Len().

Referenced by TUniChDb::Test().

{
        fprintf(f, "TUniCaseFolding(%s%s): ", (full ? "full" : "simple"), (turkic ? ", turkic" : ""));
        for (int i = 0; i < src.Len(); i++) fprintf(f, " %04x", int(src[i]));
        TIntV dest; Fold(src, 0, src.Len(), dest, true, full, turkic);
        fprintf(f, "\n  -> ");
        for (int i = 0; i < dest.Len(); i++) fprintf(f, " %04x", int(dest[i]));
        fprintf(f, "\n");
        IAssert(dest.Len() == expectedDest.Len());
        for (int i = 0; i < dest.Len(); i++) IAssert(dest[i] == expectedDest[i]);
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 553 of file unicode.cpp.

References anonymous_namespace{unicode.cpp}::VB.

{
        FILE *f = stderr;
        TVectorBuilder VB;
        // simple
        Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x69, 0x63, 0xdf), false, false, f);
        // simple + turkic
        Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x131, 0x63, 0xdf), false, true, f);
        // full
        Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x69, 0x63, 0x73, 0x73), true, false, f);
        // full + turkic
        Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x131, 0x63, 0x73, 0x73), true, true, f);
}

Friends And Related Function Documentation

friend class TUniChDb [friend]

Definition at line 279 of file unicode.h.


Member Data Documentation

Definition at line 273 of file unicode.h.

Referenced by Clr(), Fold(), FoldInPlace(), Load(), LoadTxt(), and Save().

Definition at line 274 of file unicode.h.

Referenced by Clr(), Fold(), Load(), LoadTxt(), and Save().

Definition at line 273 of file unicode.h.

Referenced by Clr(), Fold(), FoldInPlace(), Load(), LoadTxt(), and Save().

Definition at line 273 of file unicode.h.

Referenced by Clr(), Fold(), FoldInPlace(), Load(), LoadTxt(), and Save().


The documentation for this class was generated from the following files: