base/util.h - mozc (debian/1.1.717.102-1)

Tree @debian/1.1.717.102-1 (Download .tar.gz)

util.h @debian/1.1.717.102-1 — raw · history · blame

// Copyright 2010-2011, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#ifndef MOZC_BASE_UTIL_H_
#define MOZC_BASE_UTIL_H_

#include <string>
#include <utility>
#include <vector>

#include "base/base.h"

struct tm;

namespace mozc {

class Util {
 public:
  // String utils
  static void SplitStringUsing(const string &str,
                               const char *delm,
                               vector<string> *output);

  static void SplitStringAllowEmpty(const string &str,
                                    const char *delm,
                                    vector<string> *output);

  static void SplitCSV(const string &str, vector<string> *output);

  static void JoinStrings(const vector<string> &str,
                          const char *delm,
                          string *output);


  static void StringReplace(const string &s, const string &oldsub,
                            const string &newsub, bool replace_all,
                            string *res);

  static void LowerString(string *output);
  static void UpperString(string *output);

  // Transform the first character to the upper case and tailing
  // characters to the lower cases.  ex. "abCd" => "Abcd".
  static void CapitalizeString(string *output);

  static size_t OneCharLen(const char *src);

  static size_t CharsLen(const char *src, size_t size);

  static size_t CharsLen(const string &str) {
    return CharsLen(str.c_str(), str.size());
  }

  static char32 UTF8ToUCS4(const char *begin,
                           const char *end,
                           size_t *mblen);

  static void UCS4ToUTF8(char32 c, string *output);
  static void UCS4ToUTF8Append(char32 c, string *output);

  // The return value overflow if the UTF8-bytes represent the
  // character with code point >0xFFFF.
  static uint16 UTF8ToUCS2(const char *begin,
                           const char *end,
                           size_t *mblen);

  // Convert UCS2 code point to UTF8 string
  static void UCS2ToUTF8(uint16 c, string *output);
  static void UCS2ToUTF8Append(uint16 c, string *output);

#ifdef OS_WINDOWS
  // Returns how many wide characters are necessary in UTF-16 to represent
  // given UTF-8 string. Note that the result of this method becomes greater
  // than that of Util::CharsLen if |src| contains any character which is
  // encoded by the surrogate-pair in UTF-16.
  static size_t WideCharsLen(const char *src);
  static size_t WideCharsLen(const string &src);
  // Converts the encoding of the specified string from UTF-8 to UTF-16, and
  // vice versa.
  static int UTF8ToWide(const char *input, wstring *output);
  static int UTF8ToWide(const string &input, wstring *output);
  static int WideToUTF8(const wchar_t *input, string *output);
  static int WideToUTF8(const wstring &input, string *output);
#endif

  static void SubString(const string &src,
                        const size_t start, const size_t length,
                        string *result);

  static string SubString(const string &src,
                          const size_t start, const size_t length) {
    string result;
    SubString(src, start, length, &result);
    return result;
  }

  // Determines whether the beginning of |str| matches |prefix|.
  static bool StartsWith(const string &str, const string &prefix);

  // Determines whether the end of |str| matches |suffix|.
  static bool EndsWith(const string &str, const string &suffix);

  // Strip a heading UTF-8 BOM (binary order mark) sequence (= \xef\xbb\xbf).
  static void StripUTF8BOM(string *line);

  // return true the line starts with UTF16-LE/UTF16-BE BOM.
  static bool IsUTF16BOM(const string &line);

  // Convert the number to a string and append it to output.
  static string SimpleItoa(int32 number);

  // Convert the string to a number and return it.
  static int SimpleAtoi(const string &str);

  struct NumberString {
   public:
    enum Style {
        DEFAULT_STYLE = 0,
        // 123,456,789
        NUMBER_SEPARATED_ARABIC_HALFWIDTH,
        // "１２３，４５６，７８９"
        NUMBER_SEPARATED_ARABIC_FULLWIDTH,
        // "一億二千三百四十五万六千七百八十九"
        NUMBER_KANJI,
        // "壱億弐千参百四拾五万六千七百八拾九"
        NUMBER_OLD_KANJI,
        // "ⅠⅡⅢ"
        NUMBER_ROMAN_CAPITAL,
        // "ⅰⅱⅲ"
        NUMBER_ROMAN_SMALL,
        // "①②③"
        NUMBER_CIRCLED,
        // "0x4d2" (1234 in decimal)
        NUMBER_HEX,
        // "02322" (1234 in decimal)
        NUMBER_OCT,
        // "0b10011010010" (1234 in decimal)
        NUMBER_BIN,
        // "ニ〇〇"
        NUMBER_KANJI_ARABIC,
    };

    NumberString(const string &value, const string &description, Style style)
        : value(value),
          description(description),
          style(style) {}

    // Converted string
    string value;

    // Description of Converted String
    string description;

    // Converted Number Style
    Style style;
  };

  // Converts half-width Arabic number string to Kan-su-ji string
  //    - input_num: a string which *must* be half-width number string
  //    - output: function appends new representation into output vector.
  // value, desc and style are stored same size and same order.
  // if invalid string is set, this function do nothing.
  static bool ArabicToKanji(const string &input_num,
                            vector<Util::NumberString> *output);

  // Converts half-width Arabic number string to Separated Arabic string
  //  (e.g. 1234567890 are converted to 1,234,567,890)
  // Arguments are same as ArabicToKanji (above)
  static bool ArabicToSeparatedArabic(const string &input_num,
                                      vector<Util::NumberString> *output);

  // Converts half-width Arabic number string to full-width Arabic number string
  // Arguments are same as ArabicToKanji (above)
  static bool ArabicToWideArabic(const string &input_num,
                                 vector<Util::NumberString> *output);

  // Converts half-width Arabic number to various styles
  // Arguments are same as ArabicToKanji (above)
  //    - Roman style (i) (ii) ...
  static bool ArabicToOtherForms(const string &input_num,
                                 vector<Util::NumberString> *output);

  // Converts half-width Arabic number to various radices (2,8,16)
  // Arguments are same as ArabicToKanji (above)
  //   except input digits is smaller than 20
  static bool ArabicToOtherRadixes(const string &input_num,
                                   vector<Util::NumberString> *output);

  // Converts the string to a 32-/64-bit unsigned int.  Returns true if success
  // or false if the string is in the wrong format.
  static bool SafeStrToUInt32(const string &str, uint32 *value);
  static bool SafeStrToUInt64(const string &str, uint64 *value);

  // Converts the string to a double.  Returns true if success or false if the
  // string is in the wrong format.
  // If |str| is a hexadecimal number like "0x1234", the result depends on
  // compiler.  It returns false when compiled by VisualC++.  On the other hand
  // it returns true and sets correct value when compiled by gcc.
  static bool SafeStrToDouble(const string &str, double *value);

#ifndef SWIG
  // C++ string version of sprintf.
  static string StringPrintf(const char *format, ...)
      // Tell the compiler to do printf format string checking.
      PRINTF_ATTRIBUTE(1, 2);
#endif

  // Chop the return characters (i.e. '\n' and '\r') at the end of the
  // given line.
  static bool ChopReturns(string *line);

  // 32bit Fingerprint
  static uint32 Fingerprint32(const string &key);
#ifndef SWIG
  static uint32 Fingerprint32(const char *str, size_t length);
  static uint32 Fingerprint32(const char *str);
#endif

  static uint32 Fingerprint32WithSeed(const string &key,
                                      uint32 seed);
#ifndef SWIG
  static uint32 Fingerprint32WithSeed(const char *str,
                                      size_t length, uint32 seed);
  static uint32 Fingerprint32WithSeed(const char *str,
                                      uint32 seed);
#endif
  static uint32 Fingerprint32WithSeed(uint32 num, uint32 seed);

  // 64bit Fingerprint
  static uint64 Fingerprint(const string &key);
  static uint64 Fingerprint(const char *str, size_t length);

  static uint64 FingerprintWithSeed(const string &key, uint32 seed);

  static uint64 FingerprintWithSeed(const char *str,
                                    size_t length, uint32 seed);

  // Fill a given buffer with random characters
  static bool GetSecureRandomSequence(char *buf, size_t buf_size);
  static bool GetSecureRandomAsciiSequence(char *buf, size_t buf_size);

  // return random variable whose range is [0..size-1].
  // This function uses rand() internally, so don't use it for
  // security-sensitive purpose.
  static int Random(int size);

  // Get the current time info using gettimeofday-like functions.
  // sec: number of seconds from epoch
  // usec: micro-second passed: [0,1000000)
  static void GetTimeOfDay(uint64 *sec, uint32 *usec);

  // Get the current time info using time-like function
  // For Windows, _time64() is used.
  // For Linux/Mac, time() is used.
  static uint64 GetTime();

  // Get the current local time to current_time.  Returns true if succeeded.
  static bool GetCurrentTm(tm *current_time);
  // Get local time, which is offset_sec seconds after now. Returns true if
  // succeeded.
  static bool GetTmWithOffsetSecond(tm *time_with_offset, int offset_sec);

  // Interface of the helper class.
  // Default implementation is defined in the .cc file.
  class ClockInterface {
   public:
    virtual ~ClockInterface() {}
    virtual void GetTimeOfDay(uint64 *sec, uint32 *usec) = 0;
    virtual uint64 GetTime() = 0;
  };

  // This function is provided for test.
  // The behavior of system clock can be customized by replacing this handler.
  static void SetClockHandler(Util::ClockInterface *handler);

  // Suspends the execution of the current thread until
  // the time-out interval elapses.
  static void Sleep(uint32 msec);

  // Convert Kanji numeric into Arabic numeric
  // When the trim_leading_zeros is true, leading zeros for arabic_output
  // are trimmed off.
  // TODO(toshiyuki): This parameter is only applied for arabic_output now.
  //
  // When input contains non-number characters, conversion will be failed
  // and returns false.
  //
  // Input: "2千五百"
  // kanji_output: "二千五百"
  // arabic output: 2500
  static bool NormalizeNumbers(const string &input,
                               bool trim_leading_zeros,
                               string *kanji_output,
                               string *arabic_output);

  // Japanese utils
  static void HiraganaToKatakana(const string &input,
                                 string *output);

  static void HiraganaToHalfwidthKatakana(const string &input,
                                          string *output);

  static void HiraganaToRomanji(const string &input,
                                string *output);

  static void HalfWidthAsciiToFullWidthAscii(const string &input,
                                             string *output);

  static void FullWidthAsciiToHalfWidthAscii(const string &input,
                                             string *output);

  static void HiraganaToFullwidthRomanji(const string &input,
                                         string *output);

  static void RomanjiToHiragana(const string &input,
                                string *output);

  static void KatakanaToHiragana(const string &input,
                                 string *output);

  static void HalfWidthKatakanaToFullWidthKatakana(const string &input,
                                                   string *output);

  static void FullWidthKatakanaToHalfWidthKatakana(const string &input,
                                                   string *output);

  static void FullWidthToHalfWidth(const string &input,
                                   string *output);

  static void HalfWidthToFullWidth(const string &input,
                                   string *output);

  // return true if all chars in input are both defined
  // in full width and half-width-katakana area
  static bool IsFullWidthSymbolInHalfWidthKatakana(const string &input);

  // return true if all chars are defiend in
  // half-width-katakana area.
  static bool IsHalfWidthKatakanaSymbol(const string &input);

  // return true if one or more Kana-symbol characters are
  // in the input.
  static bool IsKanaSymbolContained(const string &input);

  static void NormalizeVoicedSoundMark(const string &input,
                                       string *output);

  // Note: this function just does charcter-by-character conversion
  // "百二十" -> 10020
  static void KanjiNumberToArabicNumber(const string &input,
                                        string *output);

  // return true if key is an open bracket.
  // if key is an open bracket, corresponding close bracket is
  // assigned
  static bool IsOpenBracket(const string &key, string *close_bracket);

  // return true if key is a close bracket.
  // if key is a close bracket, corresponding open bracket is
  // assigned.
  static bool IsCloseBracket(const string &key, string *open_bracket);

  // Code converter
#ifndef OS_WINDOWS
  static void UTF8ToEUC(const string &input, string *output);
  static void EUCToUTF8(const string &input, string *output);
#endif

  static void UTF8ToSJIS(const string &input, string *output);
  static void SJISToUTF8(const string &input, string *output);

  // File and directory operations
  static bool CreateDirectory(const string &path);
  static bool RemoveDirectory(const string &dirname);
  static bool Unlink(const string &filename);
  static bool FileExists(const string &filename);
  static bool DirectoryExists(const string &filename);
  static bool Rename(const string &from, const string &to);

  // This function has a limitation. See comment in the .cc file.
  // This function opens a file with text mode. The return code
  // may be different between |from| and |to|.
  static bool CopyTextFile(const string &from, const string &to);

  // CopyFile uses mmap internally. |from| and |to| should
  // be identical.
  static bool CopyFile(const string &from, const string &to);

  // Return true if |filename1| and |filename2|
  // are identical.
  static bool IsEqualFile(const string &filename1,
                          const string &filename2);

  // Move/Rename file atomically.
  // Vista or Later: use Transactional NTFS API, which guarantees atomic
  // file move operation.
  // When anything wrong happen during the transactional NTFS api, execute
  // the fallback plan, which is the same as the treatment for Windows XP.
  //
  // XP: use MoveFileWx with MOVEFILE_WRITE_THROUGH, which isn't atomic but
  // almost always works as intended.
  //
  // Linux: use rename(2), which is atomic.
  //
  // Mac OSX: use rename(2), but rename(2) on Mac OSX
  // is not properly implemented, atomic rename is POSIX spec though.
  // http://www.weirdnet.nl/apple/rename.html
  static bool AtomicRename(const string &from, const string &to);

  static string JoinPath(const string &path1, const string &path2);

#ifndef SWIG
  static void JoinPath(const string &path1, const string &path2,
                       string *output);
#endif

  static string Basename(const string &filename);
  static string Dirname(const string &filename);

  // return normalized path by replacing '/' with '\\' in Windows.
  // does nothing in other platforms.
  static string NormalizeDirectorySeparator(const string &path);

  // return "~/.mozc" for Unix/Mac
  // return "%USERPROFILE%\\Local Settings\\Application\\"
  //        "Google\\Google Japanese Input" for Windows XP.
  // return "%USERPROFILE%\\AppData\\LocalLow\\"
  //        "Google\\Google Japanese Input" for Windows Vista and later.
  static string GetUserProfileDirectory();

  // return ~/Library/Logs/Mozc for Mac
  // Otherwise same as GetUserProfileDirectory().
  static string GetLoggingDirectory();

  // set user dir

  // Currently we enabled this method to release-build too because
  // some tests use this.
  // TODO(mukai,taku): find better way to hide this method in the release
  // build but available from those tests.
  static void SetUserProfileDirectory(const string &path);

#ifdef OS_WINDOWS
  // From an early stage of the development of Mozc, we have somehow abused
  // CHECK macro assuming that any failure of fundamental APIs like
  // ::SHGetFolderPathW or ::SHGetKnownFolderPathis is worth being notified
  // as a crash.  But the circumstances have been changed.  As filed as
  // b/3216603, increasing number of instances of various applications begin
  // to use their own sandbox technology, where these kind of fundamental APIs
  // are far more likely to fail with an unexpected error code.
  // EnsureVitalImmutableDataIsAvailable is a simple fail-fast mechanism to
  // this situation.  This function simply returns false instead of making
  // the process crash if any of following functions cannot work as expected.
  // - IsVistaOrLaterCache
  // - SystemDirectoryCache
  // - ProgramFilesX86Cache
  // - LocalAppDataDirectoryCache
  // TODO(taku,yukawa): Implement more robust and reliable mechanism against
  //   sandboxed environment, where such kind of fundamental APIs are far more
  //   likely to fail.  See b/3216603.
  static bool EnsureVitalImmutableDataIsAvailable();
#endif  // OS_WINDOWS

  // return the directory name where the mozc server exist.
  static string GetServerDirectory();

  // return the path of the mozc server.
  static string GetServerPath();

  // return the username.  This function's name was GetUserName.
  // Since Windows reserves GetUserName as a macro, we have changed
  // the name to GetUserNameAsString.
  static string GetUserNameAsString();

  // return Windows SID as string.
  // On Linux and Mac, GetUserSidAsString() is equivalent to
  // GetUserNameAsString()
  static string GetUserSidAsString();


  // return DesktopName as string.
  // On Windows. return <session_id>.<DesktopStationName>.<ThreadDesktopName>
  // On Linux, return getenv("DISPLAY")
  // Mac has no DesktopName() so, just return empty string
  static string GetDesktopNameAsString();

  // Command line arguments

  // Rotate the first argv value to the end of argv.
  static void CommandLineRotateArguments(int argc, char ***argv);

  // Get a pair of key and value from argv, and returns the number of
  // arguments used for the pair of key and value.  If the argv
  // contains invalid format, this function returns false and the
  // number of checked arguments.  Otherwise returns true.
  static bool CommandLineGetFlag(int argc,
                                 char **argv,
                                 string *key,
                                 string *value,
                                 int *used_args);

  static void EncodeURI(const string &input, string *output);
  static void DecodeURI(const string &input, string *output);

  // Make a string for CGI parameters from params and append it to
  // base.  The result looks like:
  //   <base><key1>=<encoded val1>&<key2>=<encoded val2>
  // The base is supposed to end "?" or "&".
  static void AppendCGIParams(const vector<pair<string, string> > &params,
                              string *base);

  // Escape any characters into \x prefixed hex digits.
  // ex.  "ABC" => "\x41\x42\x43".
  static void Escape(const string &input, string *output);

  // Escape any characters into % prefixed hex digits.
  // ex. "ABC" => "%41%42%43"
  static void EscapeUrl(const string &input, string *output);

  // Escape unsafe html characters such as <, > and &.
  static void EscapeHtml(const string &text, string *res);

  // Escape unsafe CSS characters like <.  Note > and & are not
  // escaped becaused they are operands of CSS.
  static void EscapeCss(const string &text, string *result);

  enum ScriptType {
    UNKNOWN_SCRIPT,
    KATAKANA,
    HIRAGANA,
    KANJI,
    NUMBER,
    ALPHABET,
    SCRIPT_TYPE_SIZE,
  };

  // return script type of w
  static ScriptType GetScriptType(char32 w);

  // return script type of first character in [begin, end)
  static ScriptType GetScriptType(const char *begin, const char *end,
                                  size_t *mblen);

  // return script type of string. all chars in str must be
  // KATAKANA/HIRAGANA/KANJI/NUMBER or ALPHABET.
  // If str has mixed scripts, this function returns UNKNOWN_SCRIPT
  static ScriptType GetScriptType(const string &str);

  // The same as GetScryptType(), but it ignores symbols
  // in the |str|.
  static ScriptType GetScriptTypeWithoutSymbols(const string &str);

  // return true if all script_type in str is "type"
  static bool IsScriptType(const string &str, ScriptType type);

  // return true if the string contains script_type char
  static bool ContainsScriptType(const string &str, ScriptType type);

  enum FormType {
    UNKNOWN_FORM,
    HALF_WIDTH,
    FULL_WIDTH,
    FORM_TYPE_SIZE,
  };

  // return Form type of single character
  static FormType GetFormType(char32 w);

  // return FormType of string
  static FormType GetFormType(const string &str);

  // Basically, if chraset >= JIX0212, the char is platform dependent char.
  enum CharacterSet {
    ASCII,         // ASCII (simply ucs4 <= 0x007F)
    JISX0201,      // defined at least in 0201 (can be in 0208/0212/0213/CP9232)
    JISX0208,      // defined at least in 0208 (can be in 0212/0213/CP932)
    JISX0212,      // defined at least in 0212 (can be in 0213/CP932)
    JISX0213,      // defined at least in 0213 (can be in CP932)
    CP932,         // defined only in CP932, not in JISX02*
    UNICODE_ONLY,  // defined only in UNICODE, not in JISX* nor CP932
    CHARACTER_SET_SIZE,
  };

  // return CharacterSet
  static CharacterSet GetCharacterSet(char32 ucs4);

  // return CharacterSet of string.
  // if the given string contains multiple charasets, return
  // the maximum character set.
  static CharacterSet GetCharacterSet(const string &str);

  // Return true if the OS is supported.
  // [OS_MACOSX] This function never returns false.
  // [OS_LINUX] This function never returns false.
  // TODO(yukawa): support Mac and Linux.
  static bool IsPlatformSupported();

#ifdef OS_WINDOWS
  // return true if the version of Windows is Vista or later.
  static bool IsVistaOrLater();

  // return true if the version of Windows is x64 Edition.
  static bool IsWindowsX64();

  enum IsWindowsX64Mode {
    IS_WINDOWS_X64_DEFAULT_MODE,
    IS_WINDOWS_X64_EMULATE_32BIT_MACHINE,
    IS_WINDOWS_X64_EMULATE_64BIT_MACHINE,
  };

  // For unit tests, this function overrides the behavior of |IsWindowsX64|.
  static void SetIsWindowsX64ModeForTest(IsWindowsX64Mode mode);

  // return system directory. If failed, return NULL.
  // You need not to delete the returned pointer.
  // This function is thread safe.
  static const wchar_t *GetSystemDir();

  // Load a DLL which has the specified base-name and is located in the
  // system directory.
  // If the function succeeds, the return value is a handle to the module.
  // You should call FreeLibrary with the handle.
  // If the function fails, the return value is NULL.
  static HMODULE LoadSystemLibrary(const wstring &base_filename);

  // Load a DLL which has the specified base-name and is located in the
  // Mozc server directory.
  // If the function succeeds, the return value is a handle to the module.
  // You should call FreeLibrary with the handle.
  // If the function fails, the return value is NULL.
  static HMODULE LoadMozcLibrary(const wstring &base_filename);

  // If a DLL which has the specified base-name and located in the system
  // directory is loaded in the caller process, retrieve its module handle.
  // If the function succeeds, the return value is a handle to the module
  // without incrementing its reference count so that you should not call
  // FreeLibrary with the handle.
  // If the function fails, the return value is NULL.
  static HMODULE GetSystemModuleHandle(const wstring &base_filename);

  // Retrieves version of the specified file.
  // If the function fails, returns false.
  static bool GetFileVersion(const wstring &file_fullpath,
                             int *major, int *minor, int *build, int *revision);

  // Retrieves version string of the specified file.
  // The version string consists of 4 digits separated by comma
  // like "X.YY.ZZZ.WWWW".
  // If the function fails, the return value is an empty string.
  static string GetFileVersionString(const wstring &file_fullpath);

#endif

  // return string representing os version
  // TODO(toshiyuki): Add unittests.
  static string GetOSVersionString();

  // disable IME in the current process/thread
  static void DisableIME();

  // retrieve total physical memory. returns 0 if any error occurs.
  static uint64 GetTotalPhysicalMemory();

  // read specified memory-mapped region to cause page fault.
  // this function does not consider memory alignment.
  // if |*query_quit| is or becomes true, it returns immediately.
  static void PreloadMappedRegion(const void *begin,
                                  size_t region_size_in_byte,
                                  volatile bool *query_quit);

  // write byte array header to ofs
  // Windows does not accept static string of size >= 65536.
  // so we represent string in an array of uint64 in Windows.
  //  * const size_t k<name>_size and
  //  * const uint64 k<name>_uint64_data[] and
  //    const char *k<name>_data =
  //                     reinterpret_cast<const char *>(k<name>_uint64_data)
  //    (for Windows), or
  //  * const char k<name>_data[] (for others)
  // are generated.
  static void WriteByteArray(const string &name, const char *buf,
                             size_t buf_size, ostream *ofs);


  // Convert binary file |input| into header file |output|
  static void MakeByteArrayFile(const string &name,
                                const string &input,
                                const string &output);

  // Convert binary file |input| into header filestream |os|
  static void MakeByteArrayStream(const string &name,
                                  const string &input,
                                  ostream *os);

  // check endian-ness at runtime.
  static bool IsLittleEndian();

  // should never be allocated.
 private:
  Util() {}
  virtual ~Util() {}
};
}  // namespace mozc

#endif  // MOZC_BASE_UTIL_H_