Codebase list fcitx-unikey / 2d419e84-daa7-4f30-8835-f2179a197202/main unikey / charset.h
2d419e84-daa7-4f30-8835-f2179a197202/main

Tree @2d419e84-daa7-4f30-8835-f2179a197202/main (Download .tar.gz)

charset.h @2d419e84-daa7-4f30-8835-f2179a197202/mainraw · history · blame

// -*- coding:unix; mode:c++; tab-width:4; c-basic-offset:4; indent-tabs-mode:nil -*-
/*------------------------------------------------------------------------------
VnConv: Vietnamese Encoding Converter Library
UniKey Project: http://unikey.sourceforge.net
Copyleft (C) 1998-2002 Pham Kim Long
Contact: longp@cslab.felk.cvut.cz

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
--------------------------------------------------------------------------------*/

#ifndef __CHARSET_CONVERT_H
#define __CHARSET_CONVERT_H

#if !defined(_WIN32)
  #include <stdint.h>
#endif

#if defined(_WIN32)
    #if defined(UNIKEYHOOK)
        #define DllInterface   __declspec( dllexport )
    #else
        #define DllInterface   __declspec( dllimport )
    #endif
#else
    #define DllInterface //not used
    #define DllExport
    #define DllImport
#endif

#include "vnconv.h"
#include "byteio.h"
#include "pattern.h"

#define TOTAL_VNCHARS 213
#define TOTAL_ALPHA_VNCHARS 186

#if defined(_WIN32)
    typedef unsigned __int32 StdVnChar;
    typedef unsigned __int16 UnicodeChar;
    typedef unsigned __int16 UKWORD;
    typedef unsigned __int32 UKDWORD;
#else
//typedef unsigned int StdVnChar; //the size should be more specific
	typedef uint32_t StdVnChar;
    typedef uint16_t UnicodeChar;
    typedef uint16_t UKWORD;
    typedef uint32_t UKDWORD;
#endif

//typedef unsigned short UnicodeChar;
//typedef unsigned short UKWORD;

//typedef unsigned int UKDWORD; //the size should be more specific

#ifndef LOWORD
#define LOWORD(l)           ((UKWORD)(l))
#endif

#ifndef HIWORD
#define HIWORD(l)           ((UKWORD)(((UKDWORD)(l) >> 16) & 0xFFFF))
#endif

#ifndef MAKEWORD
#define MAKEWORD(a, b)      ((UKWORD)(((UKBYTE)(a)) | ((UKWORD)((UKBYTE)(b))) << 8))
#endif

const StdVnChar VnStdCharOffset = 0x10000;
const StdVnChar INVALID_STD_CHAR = 0xFFFFFFFF;
//const unsigned char PadChar = '?'; //? is used for VIQR charset
const unsigned char PadChar = '#';
const unsigned char PadStartQuote = '\"';
const unsigned char PadEndQuote = '\"';
const unsigned char PadEllipsis = '.';

class DllInterface VnCharset {
public:
	virtual void startInput() {};
	virtual void startOutput() {};
//	virtual UKBYTE *nextInput(UKBYTE *input, int inLen, StdVnChar & stdChar, int & bytesRead) = 0;
	virtual int nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead) = 0;

	//------------------------------------------------------------------------
	// put a character to the output after converting it
	// Arguments:
	//     output[in]: output buffer
	//     stdChar[in]: character in standard charset
	//     outLen[out]: length of converted sequence
	//     maxAvail[in]: max length available.
	// Returns: next position in output
	//------------------------------------------------------------------------
	virtual int putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen) = 0;
    virtual int elementSize();
	virtual ~VnCharset() {}
};

//--------------------------------------------------
class SingleByteCharset: public VnCharset {
protected:
	UKWORD m_stdMap[256];
	unsigned char * m_vnChars;
public:
	SingleByteCharset(unsigned char * vnChars);
	virtual int nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead);
	virtual int putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen);
};

//--------------------------------------------------
class VnInternalCharset: public VnCharset {
public:
  VnInternalCharset() {};
  virtual int nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead);
  virtual int putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen);
  virtual int elementSize();
};

//--------------------------------------------------
class UnicodeCharset: public VnCharset {
protected:
	UKDWORD m_vnChars[TOTAL_VNCHARS];
	UnicodeChar * m_toUnicode;
public:
	UnicodeCharset(UnicodeChar *vnChars);
	virtual int nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead);
	virtual int putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen);
    virtual int elementSize();
};

//--------------------------------------------------
class DoubleByteCharset: public VnCharset {
protected:
	UKWORD m_stdMap[256];
	UKDWORD m_vnChars[TOTAL_VNCHARS];
	UKWORD * m_toDoubleChar;
public:
	DoubleByteCharset(UKWORD *vnChars);
	virtual int nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead);
	virtual int putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen);
};

//--------------------------------------------------
class UnicodeUTF8Charset: public UnicodeCharset
{
public:
	UnicodeUTF8Charset(UnicodeChar *vnChars) : UnicodeCharset(vnChars)	{}

	virtual int nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead);
	virtual int putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen);
};

//--------------------------------------------------
class UnicodeRefCharset: public UnicodeCharset
{
public:
	UnicodeRefCharset(UnicodeChar *vnChars) : UnicodeCharset(vnChars)	{}

	virtual int nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead);
	virtual int putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen);
};

//--------------------------------------------------
class UnicodeHexCharset: public UnicodeRefCharset
{
public:
	UnicodeHexCharset(UnicodeChar *vnChars) : UnicodeRefCharset(vnChars) {}
	virtual int putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen);
};

//--------------------------------------------------
class UnicodeCStringCharset: public UnicodeCharset
{
protected:
	int m_prevIsHex;
public:
	UnicodeCStringCharset(UnicodeChar *vnChars) : UnicodeCharset(vnChars) {}
	virtual int nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead);
	virtual int putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen);
	virtual void startInput();
};

//--------------------------------------------------
class WinCP1258Charset: public VnCharset {
protected:
	UKWORD m_stdMap[256];
	UKDWORD m_vnChars[TOTAL_VNCHARS*2];
	UKWORD *m_toDoubleChar;
	int m_totalChars;

public:
	WinCP1258Charset(UKWORD *compositeChars, UKWORD *precomposedChars);
	virtual int nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead);
	virtual int putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen);
};

//--------------------------------------------------
struct UniCompCharInfo {
	UKDWORD	compChar;
	int stdIndex;
};

class UnicodeCompCharset: public VnCharset {
protected:
	UniCompCharInfo m_info[TOTAL_VNCHARS*2];
	UKDWORD *m_uniCompChars;
	int m_totalChars;
public:
	UnicodeCompCharset(UnicodeChar *uniChars, UKDWORD *uniCompChars);
	virtual int nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead);
	virtual int putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen);
    virtual int elementSize();
};

//--------------------------------------------------
class VIQRCharset: public VnCharset {
protected:
	UKDWORD *m_vnChars;
	UKWORD m_stdMap[256];
	int m_atWordBeginning;
	int m_escapeBowl;
	int m_escapeRoof;
	int m_escapeHook;
	int m_escapeTone;
	int m_gotTone;
	int m_escAll;
	int m_noOutEsc;
public:
	int m_suspicious;
	VIQRCharset(UKDWORD *vnChars);
	virtual void startInput();
	virtual void startOutput();
	virtual int nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead);
	virtual int putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen);
};

//--------------------------------------------------
class UTF8VIQRCharset: public VnCharset {

protected:
	VIQRCharset *m_pViqr;
	UnicodeUTF8Charset *m_pUtf;

public:
	UTF8VIQRCharset(UnicodeUTF8Charset *pUtf, VIQRCharset *pViqr);
	virtual void startInput();
	virtual void startOutput();
	virtual int nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead);
	virtual int putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen);
};


//--------------------------------------------------
class DllInterface CVnCharsetLib {
protected:
	SingleByteCharset * m_sgCharsets[CONV_TOTAL_SINGLE_CHARSETS];
	DoubleByteCharset * m_dbCharsets[CONV_TOTAL_DOUBLE_CHARSETS];
	UnicodeCharset * m_pUniCharset;	
	UnicodeCompCharset * m_pUniCompCharset;
	UnicodeUTF8Charset * m_pUniUTF8;
	UnicodeRefCharset * m_pUniRef;
	UnicodeHexCharset * m_pUniHex;
	VIQRCharset * m_pVIQRCharObj;
	UTF8VIQRCharset * m_pUVIQRCharObj;
	WinCP1258Charset * m_pWinCP1258;
	UnicodeCStringCharset *m_pUniCString;
	VnInternalCharset *m_pVnIntCharset;

public:
	PatternList m_VIQREscPatterns, m_VIQROutEscPatterns;
	VnConvOptions m_options;
	CVnCharsetLib();
	~CVnCharsetLib();
	VnCharset * getVnCharset(int charsetIdx);
};

extern unsigned char SingleByteTables[][TOTAL_VNCHARS];
extern UKWORD DoubleByteTables[][TOTAL_VNCHARS];
extern UnicodeChar UnicodeTable[TOTAL_VNCHARS];
extern UKDWORD VIQRTable[TOTAL_VNCHARS];
extern UKDWORD UnicodeComposite[TOTAL_VNCHARS];
extern UKWORD WinCP1258[TOTAL_VNCHARS];
extern UKWORD WinCP1258Pre[TOTAL_VNCHARS];

extern DllInterface CVnCharsetLib VnCharsetLibObj;
extern VnConvOptions VnConvGlobalOptions;
extern int StdVnNoTone[TOTAL_VNCHARS];
extern int StdVnRootChar[TOTAL_VNCHARS];

DllInterface int genConvert(VnCharset & incs, VnCharset & outcs, ByteInStream & input, ByteOutStream & output);

StdVnChar StdVnToUpper(StdVnChar ch);
StdVnChar StdVnToLower(StdVnChar ch);
StdVnChar StdVnGetRoot(StdVnChar ch);

#endif