// Copyright 2010, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "base/util.h"
#ifdef OS_WINDOWS
#include <windows.h>
#include <Lmcons.h>
#include <shlobj.h>
#include <time.h>
#include <sddl.h>
// #include <KnownFolders.h>
#else
#include <pwd.h>
#include <string.h>
#include <sys/stat.h>
#ifdef OS_MACOSX
#include <sys/sysctl.h>
#endif // OS_MACOSX
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
#endif // OS_WINDOWS
#include <algorithm>
#include <cctype>
#include <cerrno>
#include <fstream>
#include <iterator>
#include <map>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "base/base.h"
#include "base/const.h"
#include "base/file_stream.h"
#include "base/mmap.h"
#include "base/mutex.h"
#include "base/port.h"
#include "base/singleton.h"
#include "base/text_converter.h"
#ifdef OS_MACOSX
#include "base/mac_util.h"
#endif
namespace {
const char kFileDelimiterForUnix = '/';
const char kFileDelimiterForWindows = '\\';
#ifdef OS_WINDOWS
const char kFileDelimiter = kFileDelimiterForWindows;
#else
const char kFileDelimiter = kFileDelimiterForUnix;
#endif
// Table of UTF-8 character lengths, based on first byte
const unsigned char kUTF8LenTbl[256] = {
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4
};
template <class ITR>
void SplitStringToIteratorUsing(const string &full,
const char *delim,
ITR &result) {
// Optimize the common case where delim is a single character.
if (delim[0] != '\0' && delim[1] == '\0') {
char c = delim[0];
const char *p = full.data();
const char *end = p + full.size();
while (p != end) {
if (*p == c) {
++p;
} else {
const char *start = p;
while (++p != end && *p != c);
*result++ = string(start, p - start);
}
}
return;
}
string::size_type begin_index, end_index;
begin_index = full.find_first_not_of(delim);
while (begin_index != string::npos) {
end_index = full.find_first_of(delim, begin_index);
if (end_index == string::npos) {
*result++ = full.substr(begin_index);
return;
}
*result++ = full.substr(begin_index, (end_index - begin_index));
begin_index = full.find_first_not_of(delim, end_index);
}
}
template <typename ITR>
void SplitStringToIteratorAllowEmpty(const string &full,
const char *delim,
ITR &result) {
string::size_type begin_index, end_index;
begin_index = 0;
for (size_t i = 0; ; i++) {
end_index = full.find_first_of(delim, begin_index);
if (end_index == string::npos) {
*result++ = full.substr(begin_index);
return;
}
*result++ = full.substr(begin_index, (end_index - begin_index));
begin_index = end_index + 1;
}
}
} // namespace
namespace mozc {
void Util::SplitStringUsing(const string &str,
const char *delim,
vector<string> *output) {
back_insert_iterator<vector<string> > it(*output);
SplitStringToIteratorUsing(str, delim, it);
}
void Util::SplitStringAllowEmpty(const string &full, const char *delim,
vector<string> *result) {
back_insert_iterator<vector<string> > it(*result);
SplitStringToIteratorAllowEmpty(full, delim, it);
}
void Util::SplitCSV(const string &input, vector<string> *output) {
scoped_array<char> tmp(new char[input.size() + 1]);
char *str = tmp.get();
memcpy(str, input.data(), input.size());
str[input.size()] = '\0';
char *eos = str + input.size();
char *start = NULL;
char *end = NULL;
output->clear();
while (str < eos) {
while (*str == ' ' || *str == '\t') {
++str;
}
bool inquote = false;
if (*str == '"') {
start = ++str;
end = start;
for (; str < eos; ++str) {
if (*str == '"') {
str++;
if (*str != '"')
break;
}
*end++ = *str;
}
inquote = true;
str = find(str, eos, ',');
} else {
start = str;
str = find(str, eos, ',');
end = str;
}
bool end_is_empty = false;
if (*end == ',' && end == eos - 1) {
end_is_empty = true;
}
*end = '\0';
output->push_back(start);
if (end_is_empty) {
output->push_back("");
}
++str;
}
}
void Util::JoinStrings(const vector<string> &input,
const char *delim,
string *output) {
output->clear();
for (size_t i = 0; i < input.size(); ++i) {
if (i > 0) {
*output += delim;
}
*output += input[i];
}
}
void Util::StringReplace(const string &s, const string &oldsub,
const string &newsub, bool replace_all,
string *res) {
if (oldsub.empty()) {
res->append(s); // if empty, append the given string.
return;
}
string::size_type start_pos = 0;
string::size_type pos;
do {
pos = s.find(oldsub, start_pos);
if (pos == string::npos) {
break;
}
res->append(s, start_pos, pos - start_pos);
res->append(newsub);
start_pos = pos + oldsub.size(); // start searching again after the "old"
} while (replace_all);
res->append(s, start_pos, s.length() - start_pos);
}
// The offset value to transform the upper case character to the lower
// case. The value comes from both of (0x0061 "a" - 0x0041 "A") and
// (0xFF41 "a" - 0xFF21 "A").
namespace {
const size_t kOffsetFromUpperToLower = 0x0020;
}
void Util::LowerString(string *str) {
const char *begin = str->data();
size_t mblen = 0;
string utf8;
size_t pos = 0;
while (pos < str->size()) {
uint16 ucs2 = Util::UTF8ToUCS2(begin + pos, begin + str->size(), &mblen);
// ('A' <= ucs2 && ucs2 <= 'Z') || ('A' <= ucs2 && ucs2 <= 'Z')
if ((0x0041 <= ucs2 && ucs2 <= 0x005A) ||
(0xFF21 <= ucs2 && ucs2 <= 0xFF3A)) {
ucs2 += kOffsetFromUpperToLower;
UCS2ToUTF8(ucs2, &utf8);
// The size of upper case character must be equal to the source
// lower case character. The following check asserts it.
if (utf8.size() != mblen) {
LOG(ERROR) << "The generated size differs from the source.";
return;
}
str->replace(pos, mblen, utf8);
}
pos += mblen;
}
}
void Util::UpperString(string *str) {
const char *begin = str->data();
size_t mblen = 0;
string utf8;
size_t pos = 0;
while (pos < str->size()) {
uint16 ucs2 = Util::UTF8ToUCS2(begin + pos, begin + str->size(), &mblen);
// ('a' <= ucs2 && ucs2 <= 'z') || ('a' <= ucs2 && ucs2 <= 'z')
if ((0x0061 <= ucs2 && ucs2 <= 0x007A) ||
(0xFF41 <= ucs2 && ucs2 <= 0xFF5A)) {
ucs2 -= kOffsetFromUpperToLower;
UCS2ToUTF8(ucs2, &utf8);
// The size of upper case character must be equal to the source
// lower case character. The following check asserts it.
if (utf8.size() != mblen) {
LOG(ERROR) << "The generated size differs from the source.";
return;
}
str->replace(pos, mblen, utf8);
}
pos += mblen;
}
}
void Util::CapitalizeString(string *str) {
string first_str;
Util::SubString(*str, 0, 1, &first_str);
Util::UpperString(&first_str);
string tailing_str;
Util::SubString(*str, 1, string::npos, &tailing_str);
Util::LowerString(&tailing_str);
str->assign(first_str + tailing_str);
}
// Return length of a single UTF-8 source character
size_t Util::OneCharLen(const char *src) {
return kUTF8LenTbl[*reinterpret_cast<const uint8*>(src)];
}
size_t Util::CharsLen(const char *src, size_t length) {
const char *begin = src;
const char *end = src + length;
int result = 0;
while (begin < end) {
++result;
begin += Util::OneCharLen(begin);
}
return result;
}
uint16 Util::UTF8ToUCS2(const char *begin,
const char *end,
size_t *mblen) {
const uint32 len = static_cast<uint32>(end - begin);
if (static_cast<unsigned char>(begin[0]) < 0x80) {
*mblen = 1;
return static_cast<unsigned char>(begin[0]);
} else if (len >= 2 && (begin[0] & 0xe0) == 0xc0) {
*mblen = 2;
return ((begin[0] & 0x1f) << 6) |(begin[1] & 0x3f);
} else if (len >= 3 && (begin[0] & 0xf0) == 0xe0) {
*mblen = 3;
return ((begin[0] & 0x0f) << 12) |
((begin[1] & 0x3f) << 6) |(begin[2] & 0x3f);
/* belows are out of UCS2 */
} else if (len >= 4 && (begin[0] & 0xf8) == 0xf0) {
*mblen = 4;
return 0;
} else if (len >= 5 && (begin[0] & 0xfc) == 0xf8) {
*mblen = 5;
return 0;
} else if (len >= 6 && (begin[0] & 0xfe) == 0xfc) {
*mblen = 6;
return 0;
} else {
*mblen = 1;
return 0;
}
}
namespace {
void UCS2ToUTF8Internal(uint16 c, char *output) {
if (c < 0x00080) {
output[0] = static_cast<char>(c & 0xFF);
output[1] = '\0';
} else if (c < 0x00800) {
output[0] = static_cast<char>(0xC0 + ((c >> 6) & 0x1F));
output[1] = static_cast<char>(0x80 + (c & 0x3F));
output[2] = '\0';
} else {
output[0] = static_cast<char>(0xE0 + ((c >> 12) & 0x0F));
output[1] = static_cast<char>(0x80 + ((c >> 6) & 0x3F));
output[2] = static_cast<char>(0x80 + (c & 0x3F));
output[3] = '\0';
}
// UCS4 range
// output[0] = static_cast<char>(0xF0 + ((c >> 18) & 0x07));
// output[1] = static_cast<char>(0x80 + ((c >> 12) & 0x3F));
// output[2] = static_cast<char>(0x80 + ((c >> 6) & 0x3F));
// output[3] = static_cast<char>(0x80 + (c & 0x3F));
// output[4] = '\0';
}
} // namespace
void Util::UCS2ToUTF8(uint16 c, string *output) {
char buf[4];
UCS2ToUTF8Internal(c, buf);
output->clear();
*output = buf;
}
void Util::UCS2ToUTF8Append(uint16 c, string *output) {
char buf[4];
UCS2ToUTF8Internal(c, buf);
*output += buf;
}
#ifdef OS_WINDOWS
int Util::UTF8ToWide(const char *input, wstring *output) {
const int output_length = MultiByteToWideChar(CP_UTF8, 0, input, -1, NULL, 0);
if (output_length == 0) {
return 0;
}
scoped_array<wchar_t> input_wide(new wchar_t[output_length + 1]);
const int result = MultiByteToWideChar(CP_UTF8, 0, input, -1,
input_wide.get(), output_length + 1);
if (result > 0) {
output->assign(input_wide.get());
}
return result;
}
int Util::UTF8ToWide(const string &input, wstring *output) {
return Util::UTF8ToWide(input.c_str(), output);
}
int Util::WideToUTF8(const wchar_t *input, string *output) {
const int output_length = WideCharToMultiByte(CP_UTF8, 0, input, -1, NULL, 0,
NULL, NULL);
if (output_length == 0) {
return 0;
}
scoped_array<char> input_encoded(new char[output_length + 1]);
const int result = WideCharToMultiByte(CP_UTF8, 0, input, -1,
input_encoded.get(),
output_length + 1, NULL, NULL);
if (result > 0) {
output->assign(input_encoded.get());
}
return result;
}
int Util::WideToUTF8(const wstring &input, string *output) {
return Util::WideToUTF8(input.c_str(), output);
}
#endif
void Util::SubString(const string &src,
const size_t start,
const size_t length,
string *result) {
size_t l = start;
const char *begin = src.c_str();
const char *end = begin + src.size();
while (l > 0) {
begin += Util::OneCharLen(begin);
--l;
}
l = length;
while (l > 0 && begin < end) {
const size_t len = Util::OneCharLen(begin);
result->append(begin, len);
begin += len;
--l;
}
return;
}
void Util::StripUTF8BOM(string *line) {
const char kUTF8BOM[] = "\xef\xbb\xbf";
if (line->substr(0, 3) == kUTF8BOM) {
line->erase(0, 3);
}
}
bool Util::IsUTF16BOM(const string &line) {
const char kUTF16LEBOM[] = "\xff\xfe";
const char kUTF16BEBOM[] = "\xfe\xff";
if (line.size() >= 2 &&
(line.substr(0, 2) == kUTF16LEBOM ||
line.substr(0, 2) == kUTF16BEBOM)) {
return true;
}
return false;
}
namespace {
const int kInt32BufferSize = 12; // -2147483648\0
}
string Util::SimpleItoa(int32 number) {
char buffer[kInt32BufferSize];
int length = snprintf(buffer, kInt32BufferSize, "%d", number);
return string(buffer, length);
}
int Util::SimpleAtoi(const string &str) {
stringstream ss;
ss << str;
int i = 0;
ss >> i;
return i;
}
bool Util::SafeStrToUInt32(const string &str, uint32 *value) {
DCHECK(value);
const char *s = str.c_str();
// strtoul does not give any errors on negative numbers, so we have to
// search the string for '-' manually.
while (isspace(*s)) {
++s;
}
if (*s == '-') {
return false;
}
char *endptr;
errno = 0; // errno only gets set on errors
unsigned long ul = strtoul(s, &endptr, 10);
if (endptr != s) {
while (isspace(*endptr)) {
++endptr;
}
}
*value = static_cast<uint32>(ul);
return *s != 0 && *endptr == 0 && errno == 0 &&
static_cast<unsigned long>(*value) == ul; // no overflow
}
bool Util::SafeStrToUInt64(const string &str, uint64 *value) {
DCHECK(value);
const char *s = str.c_str();
// strtoull does not give any errors on negative numbers, so we have to
// search the string for '-' manually.
while (isspace(*s)) {
++s;
}
if (*s == '-') {
return false;
}
char *endptr;
errno = 0; // errno only gets set on errors
#ifdef OS_WINDOWS
*value = _strtoui64(s, &endptr, 10);
#else
unsigned long long ull = strtoull(s, &endptr, 10);
#endif
if (endptr != s) {
while (isspace(*endptr)) {
++endptr;
}
}
#ifdef OS_WINDOWS
return *s != 0 && *endptr == 0 && errno == 0;
#else
*value = static_cast<uint64>(ull);
return *s != 0 && *endptr == 0 && errno == 0 &&
static_cast<unsigned long long>(*value) == ull; // no overflow
#endif
}
bool Util::ChopReturns(string *line) {
const string::size_type line_end = line->find_last_not_of("\r\n");
if (line_end + 1 != line->size()) {
line->erase(line_end + 1);
return true;
}
return false;
}
bool Util::GetSecureRandomSequence(char *buf, size_t buf_size) {
memset(buf, '\0', buf_size);
#ifdef OS_WINDOWS
HCRYPTPROV hprov;
if (!::CryptAcquireContext(&hprov,
NULL,
NULL,
PROV_RSA_FULL,
CRYPT_VERIFYCONTEXT)) {
return false;
}
if (!::CryptGenRandom(hprov,
static_cast<DWORD>(buf_size),
reinterpret_cast<BYTE *>(buf))) {
::CryptReleaseContext(hprov, 0);
return false;
}
::CryptReleaseContext(hprov, 0);
#else
// Use non blocking interface on Linux.
// Mac also have /dev/urandom (although it's identical with /dev/random)
ifstream ifs("/dev/urandom", ios::binary);
if (!ifs) {
return false;
}
ifs.read(buf, buf_size);
#endif
return true;
}
bool Util::GetSecureRandomAsciiSequence(char *buf, size_t buf_size) {
// We use this map to convert a random byte value to an ascii character.
// Its size happens to be 64, which is just one fourth of the number of
// values that can be represented by a sigle byte value. This accidental
// coincidence makes implementation of the method quite simple.
const char kCharMap[] =
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_";
if (!GetSecureRandomSequence(buf, buf_size)) {
return false;
}
for (size_t i = 0; i < buf_size; ++i) {
// The size of kCharMap is just one fourth of 256. So we don't need to
// care if probability distribution over the characters is biased.
buf[i] = kCharMap[static_cast<unsigned char>(buf[i]) % 64];
}
return true;
}
void Util::GetTimeOfDay(uint64 *sec, uint32 *usec) {
#ifdef OS_WINDOWS
FILETIME file_time;
GetSystemTimeAsFileTime(&file_time);
ULARGE_INTEGER time_value;
time_value.HighPart = file_time.dwHighDateTime;
time_value.LowPart = file_time.dwLowDateTime;
// Convert into microseconds
time_value.QuadPart /= 10;
// kDeltaEpochInMicroSecs is difference between January 1, 1970 and
// January 1, 1601 in microsecond.
// This number is calculated as follows.
// ((1970 - 1601) * 365 + 89) * 24 * 60 * 60 * 1000000
// 89 is the number of leap years between 1970 and 1601.
const uint64 kDeltaEpochInMicroSecs = 11644473600000000ULL;
// Convert file time to unix epoch
time_value.QuadPart -= kDeltaEpochInMicroSecs;
*sec = static_cast<uint64>(time_value.QuadPart / 1000000UL);
*usec = static_cast<uint32>(time_value.QuadPart % 1000000UL);
#else
struct timeval tv;
gettimeofday(&tv, NULL);
*sec = tv.tv_sec;
*usec = tv.tv_usec;
#endif
}
uint64 Util::GetTime() {
#ifdef OS_WINDOWS
return static_cast<uint64>(_time64(NULL));
#else
return static_cast<uint64>(time(NULL));
#endif
}
bool Util::GetCurrentTm(tm *current_time) {
return GetTmWithOffsetSecond(current_time, 0);
}
bool Util::GetTmWithOffsetSecond(tm *time_with_offset, int offset_sec) {
time_t now;
time(&now);
const time_t unixtime_with_offset = now + offset_sec;
#ifdef OS_WINDOWS
if (_localtime64_s(time_with_offset, &unixtime_with_offset) != 0) {
return false;
}
#else
if (localtime_r(&unixtime_with_offset, time_with_offset) == NULL) {
return false;
}
#endif
return true;
}
void Util::Sleep(uint32 msec) {
#ifdef OS_WINDOWS
::Sleep(msec);
#else
usleep(msec * 1000);
#endif
}
namespace {
// TODO(yukawa): this should be moved into ports.h
const uint64 kUInt64Max = 0xFFFFFFFFFFFFFFFFull;
// There is a informative discuttion about the overflow detection in
// "Hacker's Delight" (http://www.hackersdelight.org/basics.pdf)
// 2-12 'Overflow Detection'
// *output = arg1 + arg2
// return false when an integer overflow happens.
bool AddAndCheckOverflow(uint64 arg1, uint64 arg2, uint64 *output) {
*output = arg1 + arg2;
if (arg2 > (kUInt64Max - arg1)) {
// overflow happens
return false;
}
return true;
}
// *output = arg1 * arg2
// return false when an integer overflow happens.
bool MultiplyAndCheckOverflow(uint64 arg1, uint64 arg2, uint64 *output) {
*output = arg1 * arg2;
if (arg1 !=0 && arg2 > (kUInt64Max / arg1)) {
// overflow happens
return false;
}
return true;
}
// Decode decimal number sequence into one number,
// considering scalind number.
// [5,4,3] => 543
// [5,100,4,10,3] => 543
// Return
// ture: conversion is successful.
bool NormalizeNumbersHelper(const vector<uint64>::const_iterator &begin,
const vector<uint64>::const_iterator &end,
uint64 *number_output) {
*number_output = 0;
if (begin >= end) {
return true;
}
vector<uint64>::const_iterator it = max_element(begin, end);
// 20, 30 and 40 are treated as a special case
const bool is_special_number = (*it > 10 && *it < 100);
const bool is_regular_number = (*it < 10);
if (is_regular_number) {
// when no scaling number is found and digit size is >= 2,
// convert number directly. i.e, [5,4,3] => 543
uint64 n = 0;
for (vector<uint64>::const_iterator it = begin;
it < end; ++it) {
// n = n * 10 + *it
if (!MultiplyAndCheckOverflow(n, 10, &n)
|| !AddAndCheckOverflow(n, *it, &n)) {
return false;
}
}
*number_output = n;
return true;
}
if (is_special_number) {
*number_output = *it;
return true;
}
if (begin == it) {
uint64 rest = 0;
return NormalizeNumbersHelper(it + 1, end, &rest)
&& AddAndCheckOverflow(*it, rest, number_output);
}
{
uint64 scaled_number = 0;
uint64 rest = 0;
return NormalizeNumbersHelper(begin, it, &scaled_number)
&& NormalizeNumbersHelper(it + 1, end, &rest)
&& MultiplyAndCheckOverflow(scaled_number, *it, number_output)
&& AddAndCheckOverflow(*number_output, rest, number_output);
}
}
} // end of anonymous namespace
// Convert Kanji numbers into Arabic numbers:
// e.g. "百二十万" -> 1200000
bool Util::NormalizeNumbers(const string &input,
bool trim_leading_zeros,
string *kanji_output,
string *arabic_output) {
DCHECK(kanji_output);
DCHECK(arabic_output);
const char *begin = input.data();
const char *end = input.data() + input.size();
vector<uint64> numbers;
numbers.reserve(input.size());
const char *const kNumKanjiDigits[] = {
"\xe3\x80\x87", "\xe4\xb8\x80", "\xe4\xba\x8c", "\xe4\xb8\x89",
"\xe5\x9b\x9b", "\xe4\xba\x94", "\xe5\x85\xad", "\xe4\xb8\x83",
"\xe5\x85\xab", "\xe4\xb9\x9d", NULL
};
// "〇", "一", "二", "三", "四", "五", "六", "七", "八", "九", NULL
kanji_output->clear();
while (begin < end) {
size_t mblen = 0;
const uint16 wchar = UTF8ToUCS2(begin, end, &mblen);
string w(begin, mblen);
if (wchar >= 0x0030 && wchar <= 0x0039) {
*kanji_output += kNumKanjiDigits[wchar - 0x0030];
} else if (wchar >= 0xFF10 && wchar <= 0xFF19) {
*kanji_output += kNumKanjiDigits[wchar - 0xFF10];
} else {
*kanji_output += w;
}
string tmp;
KanjiNumberToArabicNumber(w, &tmp);
uint64 n = 0;
for (size_t i = 0; i < tmp.size(); ++i) {
if (!isdigit(tmp[i])) { // non-number is included
return false;
}
if (!MultiplyAndCheckOverflow(n, 10, &n)
|| !AddAndCheckOverflow(n, static_cast<uint64>(tmp[i] - '0'),
&n)) {
return false;
}
}
numbers.push_back(n);
begin += mblen;
}
if (numbers.empty()) {
return false;
}
uint64 n = 0;
if (!NormalizeNumbersHelper(numbers.begin(), numbers.end(), &n)) {
return false;
}
if (!trim_leading_zeros) {
// if numbers is [0, 0, 0], we add output "00".
for (size_t i = 0; i < numbers.size() - 1; ++i) {
if (numbers[i] == 0) {
*arabic_output += "0";
} else {
break;
}
}
}
char buf[1024];
snprintf(buf, sizeof(buf), "%llu", n);
*arabic_output += buf;
return true;
}
// Load Rules
#include "base/japanese_util_rule.h"
void Util::HiraganaToKatakana(const string &input,
string *output) {
TextConverter::Convert(hiragana_to_katakana_da,
hiragana_to_katakana_table,
input,
output);
}
void Util::HiraganaToHalfwidthKatakana(const string &input,
string *output) {
// combine two rules
string tmp;
TextConverter::Convert(hiragana_to_katakana_da,
hiragana_to_katakana_table,
input, &tmp);
TextConverter::Convert(fullwidthkatakana_to_halfwidthkatakana_da,
fullwidthkatakana_to_halfwidthkatakana_table,
tmp, output);
}
void Util::HiraganaToRomanji(const string &input,
string *output) {
TextConverter::Convert(hiragana_to_romanji_da,
hiragana_to_romanji_table,
input,
output);
}
void Util::HalfWidthAsciiToFullWidthAscii(const string &input,
string *output) {
TextConverter::Convert(halfwidthascii_to_fullwidthascii_da,
halfwidthascii_to_fullwidthascii_table,
input,
output);
}
void Util::FullWidthAsciiToHalfWidthAscii(const string &input,
string *output) {
TextConverter::Convert(fullwidthascii_to_halfwidthascii_da,
fullwidthascii_to_halfwidthascii_table,
input,
output);
}
void Util::HiraganaToFullwidthRomanji(const string &input,
string *output) {
string tmp;
TextConverter::Convert(hiragana_to_romanji_da,
hiragana_to_romanji_table,
input,
&tmp);
TextConverter::Convert(halfwidthascii_to_fullwidthascii_da,
halfwidthascii_to_fullwidthascii_table,
tmp,
output);
}
void Util::RomanjiToHiragana(const string &input,
string *output) {
TextConverter::Convert(romanji_to_hiragana_da,
romanji_to_hiragana_table,
input,
output);
}
void Util::KatakanaToHiragana(const string &input,
string *output) {
TextConverter::Convert(katakana_to_hiragana_da,
katakana_to_hiragana_table,
input,
output);
}
void Util::HalfWidthKatakanaToFullWidthKatakana(const string &input,
string *output) {
TextConverter::Convert(halfwidthkatakana_to_fullwidthkatakana_da,
halfwidthkatakana_to_fullwidthkatakana_table,
input,
output);
}
void Util::FullWidthKatakanaToHalfWidthKatakana(const string &input,
string *output) {
TextConverter::Convert(fullwidthkatakana_to_halfwidthkatakana_da,
fullwidthkatakana_to_halfwidthkatakana_table,
input,
output);
}
void Util::FullWidthToHalfWidth(const string &input, string *output) {
string tmp;
Util::FullWidthAsciiToHalfWidthAscii(input, &tmp);
output->clear();
Util::FullWidthKatakanaToHalfWidthKatakana(tmp, output);
}
void Util::HalfWidthToFullWidth(const string &input, string *output) {
string tmp;
Util::HalfWidthAsciiToFullWidthAscii(input, &tmp);
output->clear();
Util::HalfWidthKatakanaToFullWidthKatakana(tmp, output);
}
// TODO(tabata): Add another function to split voice mark
// of some UNICODE only characters (required to display
// and commit for old clients)
void Util::NormalizeVoicedSoundMark(const string &input,
string *output) {
TextConverter::Convert(normalize_voiced_sound_da,
normalize_voiced_sound_table,
input,
output);
}
void Util::KanjiNumberToArabicNumber(const string &input,
string *output) {
TextConverter::Convert(kanjinumber_to_arabicnumber_da,
kanjinumber_to_arabicnumber_table,
input,
output);
}
namespace {
class BracketHandler {
public:
BracketHandler() {
VLOG(1) << "Init bracket mapping";
static const struct BracketType {
const char *open_bracket;
const char *close_bracket;
} kBracketType[] = {
// { "(", ")" },
// { "〔", "〕" },
// { "[", "]" },
// { "{", "}" },
// { "〈", "〉" },
// { "《", "》" },
// { "「", "」" },
// { "『", "』" },
// { "【", "】" },
{ "\xEF\xBC\x88", "\xEF\xBC\x89" },
{ "\xE3\x80\x94", "\xE3\x80\x95" },
{ "\xEF\xBC\xBB", "\xEF\xBC\xBD" },
{ "\xEF\xBD\x9B", "\xEF\xBD\x9D" },
{ "\xE3\x80\x88", "\xE3\x80\x89" },
{ "\xE3\x80\x8A", "\xE3\x80\x8B" },
{ "\xE3\x80\x8C", "\xE3\x80\x8D" },
{ "\xE3\x80\x8E", "\xE3\x80\x8F" },
{ "\xE3\x80\x90", "\xE3\x80\x91" },
{ NULL, NULL }, // sentinel
};
string open_full_width, open_half_width;
string close_full_width, close_half_width;
for (size_t i = 0;
(kBracketType[i].open_bracket != NULL ||
kBracketType[i].close_bracket != NULL);
++i) {
mozc::Util::FullWidthToHalfWidth(kBracketType[i].open_bracket,
&open_full_width);
mozc::Util::HalfWidthToFullWidth(kBracketType[i].open_bracket,
&open_half_width);
mozc::Util::FullWidthToHalfWidth(kBracketType[i].close_bracket,
&close_full_width);
mozc::Util::HalfWidthToFullWidth(kBracketType[i].close_bracket,
&close_half_width);
open_bracket_[open_half_width] = close_half_width;
open_bracket_[open_full_width] = close_full_width;
close_bracket_[close_half_width] = open_half_width;
close_bracket_[close_full_width] = open_full_width;
}
}
~BracketHandler() {}
bool IsOpenBracket(const string &key, string *close_bracket) const {
map<string, string>::const_iterator it =
open_bracket_.find(key);
if (it == open_bracket_.end()) {
return false;
}
*close_bracket = it->second;
return true;
}
bool IsCloseBracket(const string &key, string *open_bracket) const {
map<string, string>::const_iterator it =
close_bracket_.find(key);
if (it == close_bracket_.end()) {
return false;
}
*open_bracket = it->second;
return true;
}
private:
map<string, string> open_bracket_;
map<string, string> close_bracket_;
};
} // namespace
bool Util::IsOpenBracket(const string &key, string *close_bracket) {
return Singleton<BracketHandler>::get()->IsOpenBracket(key, close_bracket);
}
bool Util::IsCloseBracket(const string &key, string *open_bracket) {
return Singleton<BracketHandler>::get()->IsCloseBracket(key, open_bracket);
}
bool Util::IsFullWidthSymbolInHalfWidthKatakana(const string &input) {
const char *begin = input.data();
const char *end = begin + input.size();
while (begin < end) {
size_t mblen = 0;
uint16 w = UTF8ToUCS2(begin, end, &mblen);
switch (w) {
case 0x3002: // FULLSTOP
case 0x300C: // LEFT CORNER BRACKET
case 0x300D: // RIGHT CORNER BRACKET
case 0x3001: // COMMA
case 0x30FB: // MIDDLE DOT
case 0x30FC: // SOUND_MARK
case 0x3099: // VOICE SOUND MARK
case 0x309A: // SEMI VOICE SOUND MARK
begin += mblen;
break;
default:
return false;
}
begin += mblen;
}
return true;
}
bool Util::IsHalfWidthKatakanaSymbol(const string &input) {
const char *begin = input.data();
const char *end = begin + input.size();
while (begin < end) {
size_t mblen = 0;
uint16 w = UTF8ToUCS2(begin, end, &mblen);
switch (w) {
case 0xFF61: // FULLSTOP
case 0xFF62: // LEFT CORNER BRACKET
case 0xFF63: // RIGHT CORNER BRACKET
case 0xFF64: // COMMA
case 0xFF65: // MIDDLE DOT
case 0xFF70: // SOUND_MARK
case 0xFF9E: // VOICE SOUND MARK
case 0xFF9F: // SEMI VOICE SOUND MARK
begin += mblen;
break;
default:
return false;
}
}
return true;
}
bool Util::Unlink(const string &filename) {
#ifdef OS_WINDOWS
wstring wide;
return Util::UTF8ToWide(filename.c_str(), &wide) > 0 &&
::DeleteFileW(wide.c_str()) != 0;
#else
return ::unlink(filename.c_str()) == 0;
#endif
}
bool Util::RemoveDirectory(const string &dirname) {
#ifdef OS_WINDOWS
wstring wide;
return (Util::UTF8ToWide(dirname.c_str(), &wide) > 0 &&
::RemoveDirectoryW(wide.c_str()) != 0);
#else
return ::rmdir(dirname.c_str()) == 0;
#endif
}
bool Util::CreateDirectory(const string &path) {
#ifdef OS_WINDOWS
wstring wide;
if (Util::UTF8ToWide(path.c_str(), &wide) <= 0)
return false;
::CreateDirectoryW(wide.c_str(), NULL);
#else
::mkdir(path.c_str(), 0700);
#endif
return true;
}
bool Util::FileExists(const string &filename) {
#ifdef OS_WINDOWS
wstring wide;
return (Util::UTF8ToWide(filename.c_str(), &wide) > 0 &&
::GetFileAttributesW(wide.c_str()) != -1);
#else
struct stat s;
return ::stat(filename.c_str(), &s) == 0;
#endif
}
bool Util::DirectoryExists(const string &dirname) {
#ifdef OS_WINDOWS
wstring wide;
if (Util::UTF8ToWide(dirname.c_str(), &wide) <= 0) {
return false;
}
const DWORD attribute = ::GetFileAttributesW(wide.c_str());
return ((attribute != -1) &&
(attribute & FILE_ATTRIBUTE_DIRECTORY));
#else
struct stat s;
return (::stat(dirname.c_str(), &s) == 0 && S_ISDIR(s.st_mode));
#endif
}
bool Util::Rename(const string &from, const string &to) {
// If a file whose name is 'to' already exists, do nothing and
// return false.
if (FileExists(to)) {
return false;
}
#ifdef OS_WINDOWS
wstring wfrom;
wstring wto;
return Util::UTF8ToWide(from.c_str(), &wfrom) > 0 &&
Util::UTF8ToWide(to.c_str(), &wto) > 0 &&
::MoveFileW(wfrom.c_str(), wto.c_str());
#else
return ::rename(from.c_str(), to.c_str()) == 0;
#endif
}
#ifdef OS_WINDOWS
namespace {
typedef HANDLE (WINAPI *FPCreateTransaction)(LPSECURITY_ATTRIBUTES,
LPGUID,
DWORD,
DWORD,
DWORD,
DWORD,
LPWSTR);
typedef BOOL (WINAPI *FPMoveFileTransactedW)(LPCTSTR,
LPCTSTR,
LPPROGRESS_ROUTINE,
LPVOID,
DWORD,
HANDLE);
typedef BOOL (WINAPI *FPCommitTransaction)(HANDLE);
FPCreateTransaction g_create_transaction = NULL;
FPMoveFileTransactedW g_move_file_transactedw = NULL;
FPCommitTransaction g_commit_transaction = NULL;
static once_t g_init_tx_move_file_once = MOZC_ONCE_INIT;
void InitTxMoveFile() {
if (!Util::IsVistaOrLater()) {
return;
}
const HMODULE lib_ktmw = mozc::Util::LoadSystemLibrary(L"ktmw32.dll");
if (lib_ktmw == NULL) {
LOG(ERROR) << "LoadSystemLibrary for ktmw32.dll failed.";
return;
}
const HMODULE lib_kernel = Util::GetSystemModuleHandle(L"kernel32.dll");
if (lib_kernel == NULL) {
LOG(ERROR) << "LoadSystemLibrary for kernel32.dll failed.";
return;
}
g_create_transaction =
reinterpret_cast<FPCreateTransaction>
(::GetProcAddress(lib_ktmw, "CreateTransaction"));
g_move_file_transactedw =
reinterpret_cast<FPMoveFileTransactedW>
(::GetProcAddress(lib_kernel, "MoveFileTransactedW"));
g_commit_transaction =
reinterpret_cast<FPCommitTransaction>
(::GetProcAddress(lib_ktmw, "CommitTransaction"));
LOG_IF(ERROR, g_create_transaction == NULL)
<< "CreateTransaction init failed";
LOG_IF(ERROR, g_move_file_transactedw == NULL)
<< "MoveFileTransactedW init failed";
LOG_IF(ERROR, g_commit_transaction == NULL)
<< "CommitTransaction init failed";
}
} // namespace
#endif // OS_WINDOWS
bool Util::AtomicRename(const string &from, const string &to) {
#ifdef OS_WINDOWS
CallOnce(&g_init_tx_move_file_once, &InitTxMoveFile);
wstring fromw, tow;
Util::UTF8ToWide(from.c_str(), &fromw);
Util::UTF8ToWide(to.c_str(), &tow);
bool transaction_failed = false;
if (g_commit_transaction != NULL &&
g_move_file_transactedw != NULL &&
g_create_transaction != NULL) {
HANDLE handle = (*g_create_transaction)(NULL, 0, 0, 0, 0, 0, NULL);
if (INVALID_HANDLE_VALUE == handle) {
LOG(ERROR) << "CreateTransaction failed: " << ::GetLastError();
transaction_failed = true;
}
if (!transaction_failed &&
!(*g_move_file_transactedw)(fromw.c_str(), tow.c_str(),
NULL, NULL,
MOVEFILE_WRITE_THROUGH |
MOVEFILE_REPLACE_EXISTING,
handle)) {
LOG(ERROR) << "MoveFileTransactedW failed: " << ::GetLastError();
transaction_failed = true;
}
if (!transaction_failed &&
!(*g_commit_transaction)(handle)) {
LOG(ERROR) << "CommitTransaction failed: " << ::GetLastError();
transaction_failed = true;
}
LOG_IF(ERROR, transaction_failed)
<< "Transactional MoveFile failed. Execute fallback plan";
::CloseHandle(handle);
} else {
transaction_failed = true;
}
if (transaction_failed &&
!::MoveFileEx(fromw.c_str(), tow.c_str(),
MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH)) {
LOG(ERROR) << "MoveFileEx failed: " << ::GetLastError();
return false;
}
return true;
#else // OS_WINDOWS
return (0 == rename(from.c_str(), to.c_str()));
#endif // OS_WINDOWS
}
// This function uses LF as a line separator. This is OK because it is
// used only for test code now. If you want to use it for a general
// purpose, I may need to make it recognize CR and CR+LF as well.
bool Util::CopyTextFile(const string &from, const string &to) {
InputFileStream ifs(from.c_str());
if (!ifs) {
LOG(ERROR) << "Can't open input file. " << from;
return false;
}
OutputFileStream ofs(to.c_str());
if (!ofs) {
LOG(ERROR) << "Can't open output file. " << to;
return false;
}
string line;
while (getline(ifs, line)) {
ofs << line << "\n";
}
return true;
}
// TODO(taku): This value is defined in KnownFolders.h
// If Win SDK for Vista is installed. Check the availability of SDK later.
#ifdef OS_WINDOWS
EXTERN_C const GUID DECLSPEC_SELECTANY FOLDERID_LocalAppDataLow = {
0xA520A1A4, 0x1780, 0x4FF6, { 0xBD, 0x18, 0x16, 0x73, 0x43, 0xC5, 0xAF, 0x16 }
};
#endif
namespace {
class UserProfileDirectoryImpl {
public:
UserProfileDirectoryImpl();
const string &get() { return dir_; }
void set(const string &dir) { dir_ = dir; }
private:
string dir_;
};
UserProfileDirectoryImpl::UserProfileDirectoryImpl() {
string dir;
#ifdef OS_WINDOWS
// Windows Vista: use LocalLow
// Call SHGetKnownFolderPath dynamically.
// http://msdn.microsoft.com/en-us/library/bb762188(VS.85).aspx
// http://msdn.microsoft.com/en-us/library/bb762584(VS.85).aspx
// GUID: {A520A1A4-1780-4FF6-BD18-167343C5AF16}
if (Util::IsVistaOrLater()) {
const HMODULE hLib = mozc::Util::LoadSystemLibrary(L"shell32.dll");
if (hLib != NULL) {
typedef HRESULT (WINAPI *FPSHGetKnownFolderPath)(
const GUID &, DWORD, HANDLE, PWSTR *);
FPSHGetKnownFolderPath func = reinterpret_cast<FPSHGetKnownFolderPath>
(::GetProcAddress(hLib, "SHGetKnownFolderPath"));
if (func != NULL) {
PWSTR pstr = NULL;
const HRESULT result =
(*func)(FOLDERID_LocalAppDataLow, 0, NULL, &pstr);
if (SUCCEEDED(result) && pstr != NULL && ::lstrlen(pstr) > 0) {
Util::WideToUTF8(pstr, &dir);
::CoTaskMemFree(pstr);
}
}
::FreeLibrary(hLib);
}
}
// Windows XP: use "%USERPROFILE%\Local Settings\Application Data"
if (dir.empty() || !Util::IsVistaOrLater()) {
// Retrieve the directory "%USERPROFILE%\Local Settings\Application Data",
// which is a user directory which serves a data repository for local
// applications, to avoid user profiles from being roamed by indexers.
wchar_t config[MAX_PATH];
if (::SHGetFolderPathW(NULL, CSIDL_LOCAL_APPDATA, NULL, SHGFP_TYPE_CURRENT,
&config[0]) == S_OK) {
Util::WideToUTF8(&config[0], &dir);
}
}
CHECK(!dir.empty());
dir = Util::JoinPath(dir, kCompanyNameInEnglish);
Util::CreateDirectory(dir);
dir = Util::JoinPath(dir, kProductNameInEnglish);
#elif defined(OS_MACOSX)
dir = MacUtil::GetApplicationSupportDirectory();
#ifdef GOOGLE_JAPANESE_INPUT_BUILD
dir = Util::JoinPath(dir, "Google");
// The permission of ~/Library/Application Support/Google seems to be 0755.
// TODO(komatsu): nice to make a wrapper function.
::mkdir(dir.c_str(), 0755);
dir = Util::JoinPath(dir, "JapaneseInput");
#else
dir = Util::JoinPath(dir, "Mozc");
#endif
#else // OS_LINUX or OS_CHROMEOS
char buf[1024];
struct passwd pw, *ppw;
const uid_t uid = geteuid();
CHECK_EQ(0, getpwuid_r(uid, &pw, buf, sizeof(buf), &ppw))
<< "Can't get passwd entry for uid " << uid << ".";
CHECK_LT(0, strlen(pw.pw_dir))
<< "Home directory for uid " << uid << " is not set.";
#if defined(OS_CHROMEOS)
dir = Util::JoinPath(pw.pw_dir, "user/.mozc");
#else
dir = Util::JoinPath(pw.pw_dir, ".mozc");
#endif // OS_CHROMEOS
#endif
Util::CreateDirectory(dir);
if (!Util::DirectoryExists(dir)) {
LOG(ERROR) << "Failed to create directory: " << dir;
}
// set User profile directory
dir_ = dir;
}
#ifdef OS_WINDOWS
class ServerDirectoryCache {
public:
ServerDirectoryCache() {
wchar_t program_files_path_buffer[MAX_PATH];
#if defined(_M_X64)
// For x86-64 binaries (such as Text Input Prosessor DLL for 64-bit apps),
// CSIDL_PROGRAM_FILES points 64-bit Program Files directory so that
// CSIDL_PROGRAM_FILESX86 is appropriate to find server, renderer,
// and other binaries' path.
const HRESULT hr = ::SHGetFolderPathW(
NULL, CSIDL_PROGRAM_FILESX86, NULL,
SHGFP_TYPE_CURRENT, program_files_path_buffer);
#elif defined(_M_IX86)
// For x86 binaries (such as server, renderer, and other binaries),
// CSIDL_PROGRAM_FILES always points 32-bit Program Files directory
// even if they are running in 64-bit Windows.
const HRESULT hr = ::SHGetFolderPathW(
NULL, CSIDL_PROGRAM_FILES, NULL,
SHGFP_TYPE_CURRENT, program_files_path_buffer);
#else
#error "Unsupported CPU architecture"
#endif // _M_X64, _M_IX86, and others
CHECK(S_OK == hr) << "Failed to get server directory. HRESULT = "
<< hr;
string program_files_path;
Util::WideToUTF8(program_files_path_buffer, &program_files_path);
server_path_ = program_files_path;
server_path_ = Util::JoinPath(server_path_, kCompanyNameInEnglish);
server_path_ = Util::JoinPath(server_path_, kProductNameInEnglish);
}
const string server_path() const {
return server_path_;
}
private:
string server_path_;
};
#endif // OS_WINDOWS
} // namespace
string Util::GetUserProfileDirectory() {
return Singleton<UserProfileDirectoryImpl>::get()->get();
}
void Util::SetUserProfileDirectory(const string &path) {
Singleton<UserProfileDirectoryImpl>::get()->set(path);
}
string Util::GetLoggingDirectory() {
#ifdef OS_MACOSX
string dir = MacUtil::GetLoggingDirectory();
Util::CreateDirectory(dir);
return dir;
#else
return GetUserProfileDirectory();
#endif
}
string Util::GetServerDirectory() {
#ifdef OS_WINDOWS
return Singleton<ServerDirectoryCache>::get()->server_path();
#endif // OS_WINDOWS
// TODO(mazda): Not to use hardcoded path.
#ifdef OS_MACOSX
return MacUtil::GetServerDirectory();
#endif // OS_MACOSX
#ifdef OS_LINUX
return "/usr/lib/mozc";
#endif // OS_LINUX
}
string Util::GetServerPath() {
const string server_path = mozc::Util::GetServerDirectory();
// if server path is empty, return empty path
if (server_path.empty()) {
return "";
}
return mozc::Util::JoinPath(server_path, kMozcServerName);
}
string Util::GetUserNameAsString() {
string username;
#ifdef OS_WINDOWS
wchar_t wusername[UNLEN + 1];
DWORD name_size = UNLEN + 1;
// Call the same name Windows API. (include Advapi32.lib).
::GetUserName(wusername, &name_size);
Util::WideToUTF8(&wusername[0], &username);
#else // OS_WINDOWS
char buf[1024];
struct passwd pw, *ppw;
CHECK_EQ(0, getpwuid_r(geteuid(), &pw, buf, sizeof(buf), &ppw));
username.append(pw.pw_name);
#endif // OS_WINDOWS
return username;
}
#ifdef OS_WINDOWS
namespace {
string GetObjectNameAsString(HANDLE handle) {
if (handle == NULL) {
LOG(ERROR) << "Unknown handle";
return "";
}
DWORD size = 0;
if (::GetUserObjectInformationA(handle, UOI_NAME,
NULL, 0, &size) ||
ERROR_INSUFFICIENT_BUFFER != ::GetLastError()) {
LOG(ERROR) << "GetUserObjectInformationA() failed: "
<< ::GetLastError();
return "";
}
if (size == 0) {
LOG(ERROR) << "buffer size is 0";
return "";
}
scoped_array<char> buf(new char[size]);
DWORD return_size = 0;
if (!::GetUserObjectInformationA(handle, UOI_NAME, buf.get(),
size, &return_size)) {
LOG(ERROR) << "::GetUserObjectInformationA() failed: "
<< ::GetLastError();
return "";
}
if (return_size <= 1) {
LOG(ERROR) << "result buffer size is too small";
return "";
}
char *result = buf.get();
result[return_size - 1] = '\0'; // just make sure NULL terminated
return result;
}
}
#endif
string Util::GetDesktopNameAsString() {
#ifdef OS_LINUX
const char *display = getenv("DISPLAY");
if (display == NULL) {
return "";
}
return display;
#endif
#ifdef OS_MACOSX
return "";
#endif
#ifdef OS_WINDOWS
DWORD session_id = 0;
if (!::ProcessIdToSessionId(::GetCurrentProcessId(),
&session_id)) {
LOG(ERROR) << "cannot get session id: " << ::GetLastError();
return "";
}
char id[64];
snprintf(id, sizeof(id), "%u", session_id);
string result = id;
result += ".";
result += GetObjectNameAsString(::GetProcessWindowStation());
result += ".";
result += GetObjectNameAsString(::GetThreadDesktop(::GetCurrentThreadId()));
return result;
#endif
}
#ifdef OS_WINDOWS
namespace {
class UserSidImpl {
public:
UserSidImpl();
const string &get() { return sid_; }
private:
string sid_;
};
UserSidImpl::UserSidImpl() {
HANDLE htoken = NULL;
if (!::OpenProcessToken(::GetCurrentProcess(), TOKEN_QUERY, &htoken)) {
sid_ = Util::GetUserNameAsString();
LOG(ERROR) << "OpenProcessToken failed: " << ::GetLastError();
return;
}
DWORD length = 0;
::GetTokenInformation(htoken, TokenUser, NULL, 0, &length);
scoped_array<char> buf(new char[length]);
PTOKEN_USER p_user = reinterpret_cast<PTOKEN_USER>(buf.get());
if (length == 0 ||
!::GetTokenInformation(htoken, TokenUser, p_user, length, &length)) {
::CloseHandle(htoken);
sid_ = Util::GetUserNameAsString();
LOG(ERROR) << "OpenTokenInformation failed: " << ::GetLastError();
return;
}
LPTSTR p_sid_user_name;
if (!::ConvertSidToStringSidW(p_user->User.Sid, &p_sid_user_name)) {
::CloseHandle(htoken);
sid_ = Util::GetUserNameAsString();
LOG(ERROR) << "ConvertSidToStringSidW failed: " << ::GetLastError();
return;
}
Util::WideToUTF8(p_sid_user_name, &sid_);
::LocalFree(p_sid_user_name);
::CloseHandle(htoken);
}
} // namespace
#endif // OS_WINDOWS
string Util::GetUserSidAsString() {
#ifdef OS_WINDOWS
return Singleton<UserSidImpl>::get()->get();
#else
return Util::GetUserNameAsString();
#endif // OS_WINDOWS
}
// Path
string Util::JoinPath(const string &path1, const string &path2) {
string output;
JoinPath(path1, path2, &output);
return output;
}
void Util::JoinPath(const string &path1, const string &path2,
string *output) {
*output = path1;
if (path1.size() > 0 && path1[path1.size() - 1] != kFileDelimiter) {
*output += kFileDelimiter;
}
*output += path2;
}
// TODO(taku): what happens if filename == '/foo/bar/../bar/..
string Util::Dirname(const string &filename) {
const string::size_type p = filename.find_last_of(kFileDelimiter);
if (p == string::npos) {
return "";
}
return filename.substr(0, p);
}
string Util::Basename(const string &filename) {
const string::size_type p = filename.find_last_of(kFileDelimiter);
if (p == string::npos) {
return filename;
}
return filename.substr(p + 1, filename.size() - p);
}
string Util::NormalizeDirectorySeparator(const string &path) {
#ifdef OS_WINDOWS
string normalized;
Util::StringReplace(path, string(1, kFileDelimiterForUnix),
string(1, kFileDelimiterForWindows), true, &normalized);
return normalized;
#else
return path;
#endif
}
// Command line flags
bool Util::CommandLineGetFlag(int argc,
char **argv,
string *key,
string *value,
int *used_args) {
key->clear();
value->clear();
*used_args = 0;
if (argc < 1) {
return false;
}
*used_args = 1;
const char *start = argv[0];
if (start[0] != '-') {
return false;
}
++start;
if (start[0] == '-') ++start;
const string arg = start;
const size_t n = arg.find("=");
if (n != string::npos) {
*key = arg.substr(0, n);
*value = arg.substr(n + 1, arg.size() - n);
return true;
}
key->assign(arg);
value->clear();
if (argc == 1) {
return true;
}
start = argv[1];
if (start[0] == '-') {
return true;
}
*used_args = 2;
value->assign(start);
return true;
}
void Util::CommandLineRotateArguments(int argc, char ***argv) {
char *arg = **argv;
memmove(*argv, *argv + 1, (argc - 1) * sizeof(**argv));
(*argv)[argc - 1] = arg;
}
// URL
void Util::EncodeURI(const string &input, string *output) {
const char kDigits[] = "0123456789ABCDEF";
const char *begin = input.data();
const char *end = input.data() + input.size();
output->clear();
while (begin < end) {
if (isascii(*begin) &&
(isdigit(*begin) || isalpha(*begin))) {
*output += *begin;
} else {
*output += '%';
*output += kDigits[(*begin >> 4) & 0x0f];
*output += kDigits[*begin & 0x0f];
}
++begin;
}
}
void Util::DecodeURI(const string &src, string *output) {
output->clear();
const char *p = src.data();
const char *end = src.data() + src.size();
while (p < end) {
if (*p == '%' && p + 2 < end) {
const char h = toupper(p[1]);
const char l = toupper(p[2]);
const int vh = isalpha(h) ? (10 + (h -'A')) : (h - '0');
const int vl = isalpha(l) ? (10 + (l -'A')) : (l - '0');
*output += ((vh << 4) + vl);
p += 3;
} else if (*p == '+') {
*output += ' ';
p++;
} else {
*output += *p++;
}
}
}
void Util::AppendCGIParams(const vector<pair<string, string> > ¶ms,
string *base) {
if (params.size() == 0 || base == NULL) {
return;
}
string encoded;
for (vector<pair<string, string> >::const_iterator it = params.begin();
it != params.end();
++it) {
// Append "<first>=<encoded second>&"
base->append(it->first);
base->append("=");
Util::EncodeURI(it->second, &encoded);
base->append(encoded);
base->append("&");
}
// Delete the last "&".
if (!base->empty()) {
base->erase(base->size() - 1);
}
}
void Util::Escape(const string &input, string *output) {
output->clear();
for (size_t i = 0; i < input.size(); ++i) {
const int hi = ((static_cast<int>(input[i]) & 0xF0) >> 4);
const int lo = (static_cast<int>(input[i]) & 0x0F);
*output += "\\x";
*output += static_cast<char>(hi >= 10 ? hi - 10 + 'A' : hi + '0');
*output += static_cast<char>(lo >= 10 ? lo - 10 + 'A' : lo + '0');
}
}
void Util::EscapeHtml(const string &plain, string *escaped) {
string tmp1, tmp2, tmp3, tmp4;
Util::StringReplace(plain, "&", "&", true, &tmp1);
Util::StringReplace(tmp1, "<", "<", true, &tmp2);
Util::StringReplace(tmp2, ">", ">", true, &tmp3);
Util::StringReplace(tmp3, "\"", """, true, &tmp4);
Util::StringReplace(tmp4, "'", "'", true, escaped);
}
void Util::EscapeCss(const string &plain, string *escaped) {
// ">" and "&" are not escaped because they are used for operands of
// CSS.
Util::StringReplace(plain, "<", "<", true, escaped);
}
#define INRANGE(w, a, b) ((w) >= (a) && (w) <= (b))
// script type
Util::ScriptType Util::GetScriptType(uint16 w) {
if (INRANGE(w, 0x0030, 0x0039) || // ascii number
INRANGE(w, 0xFF10, 0xFF19)) { // full width number
return NUMBER;
} else if (INRANGE(w, 0x0041, 0x005A) || // ascii upper
INRANGE(w, 0x0061, 0x007A) || // ascii lower
INRANGE(w, 0xFF21, 0xFF3A) || // fullwidth ascii upper
INRANGE(w, 0xFF41, 0xFF5A)) { // fullwidth ascii lower
return ALPHABET;
} else if (INRANGE(w, 0x4E00, 0x9FA5) || // CJK Unified Ideographs
INRANGE(w, 0x3400, 0x4DBF) || // CJK Unified Ideographs ExtentionA
INRANGE(w, 0xF900, 0xFA2D) || // CJK Compatibility Ideographs
w == 0x305) { // 々 (repetition)
return KANJI;
} else if (INRANGE(w, 0x3041, 0x309F)) { // hiragana
return HIRAGANA;
} else if (INRANGE(w, 0x30A1, 0x30FE) || // full width katakana
INRANGE(w, 0xFF65, 0xFF9F)) { // half width katakana
return KATAKANA;
}
return UNKNOWN_SCRIPT;
}
Util::FormType Util::GetFormType(uint16 w) {
if (INRANGE(w, 0x0020, 0x007F) || // ascii
INRANGE(w, 0xFF61, 0xFF9F)) { // half-width katakana
return HALF_WIDTH;
} else {
return FULL_WIDTH;
}
return UNKNOWN_FORM;
}
#undef INRANGE
// return script type of first character in str
Util::ScriptType Util::GetScriptType(const char *begin,
const char *end, size_t *mblen) {
const uint16 w = Util::UTF8ToUCS2(begin, end, mblen);
return Util::GetScriptType(w);
}
Util::ScriptType Util::GetScriptType(const string &str) {
const char *begin = str.data();
const char *end = str.data() + str.size();
size_t mblen = 0;
ScriptType result = SCRIPT_TYPE_SIZE;
while (begin < end) {
const uint16 w = Util::UTF8ToUCS2(begin, end, &mblen);
ScriptType type = Util::GetScriptType(w);
if ((w == 0x30FC || w == 0x30FB ||
(w >= 0x3099 && w <= 0x309C)) &&
// PROLONGEDSOUND MARK|MIDLE_DOT|VOICDE_SOUND_MARKS
// are HIRAGANA as well
(result == SCRIPT_TYPE_SIZE ||
result == HIRAGANA || result == KATAKANA)) {
type = result; // restore the previous state
}
if (type == UNKNOWN_SCRIPT) {
return UNKNOWN_SCRIPT;
}
// not first character
if (str.data() != begin && result != SCRIPT_TYPE_SIZE && type != result) {
return UNKNOWN_SCRIPT;
}
result = type;
begin += mblen;
}
if (result == SCRIPT_TYPE_SIZE) { // everything is "ー"
return UNKNOWN_SCRIPT;
}
return result;
}
// return true if all script_type in str is "type"
bool Util::IsScriptType(const string &str, Util::ScriptType type) {
const char *begin = str.data();
const char *end = str.data() + str.size();
size_t mblen = 0;
while (begin < end) {
const uint16 w = Util::UTF8ToUCS2(begin, end, &mblen);
// Exception: 30FC (PROLONGEDSOUND MARK is categorized as HIRAGANA as well)
if ((w == 0x30FC && type == HIRAGANA) || type == GetScriptType(w)) {
begin += mblen;
} else {
return false;
}
}
return true;
}
// return true if the string contains script_type char
bool Util::ContainsScriptType(const string &str, ScriptType type) {
const char *begin = str.data();
const char *end = str.data() + str.size();
while (begin < end) {
size_t mblen;
const uint16 w = Util::UTF8ToUCS2(begin, end, &mblen);
if (type == Util::GetScriptType(w)) {
return true;
}
begin += mblen;
}
return false;
}
// return the Form Type of string
Util::FormType Util::GetFormType(const string &str) {
const char *begin = str.data();
const char *end = str.data() + str.size();
size_t mblen = 0;
Util::FormType result = Util::UNKNOWN_FORM;
while (begin < end) {
const uint16 w = Util::UTF8ToUCS2(begin, end, &mblen);
const Util::FormType type = GetFormType(w);
if (type == UNKNOWN_FORM || (str.data() != begin && type != result)) {
return UNKNOWN_FORM;
}
result = type;
begin += mblen;
}
return result;
}
// Util::CharcterSet Util::GetCharacterSet(uint16 ucs2);
#include "base/character_set.h"
Util::CharacterSet Util::GetCharacterSet(const string &str) {
const char *begin = str.data();
const char *end = str.data() + str.size();
size_t mblen = 0;
Util::CharacterSet result = Util::ASCII;
while (begin < end) {
const uint16 w = Util::UTF8ToUCS2(begin, end, &mblen);
result = max(result, GetCharacterSet(w));
begin += mblen;
}
return result;
}
bool Util::IsPlatformSupported() {
#if defined(OS_MACOSX)
// TODO(yukawa): support Mac.
return true;
#elif defined(OS_LINUX)
// TODO(yukawa): support Linux.
return true;
#elif defined(OS_WINDOWS)
// Sometimes we suffer from version lie of GetVersion(Ex) such as
// http://b/2430094
// This is why we use VerifyVersionInfo here instead of GetVersion(Ex).
if (IsWindowsX64() && !Util::IsVistaOrLater()) {
// NT 5.x OSes are excluded from Mozc 64-bit support.
return false; // not supported
}
// You can find a table of version number for each version of Windows in
// the following page.
// http://msdn.microsoft.com/en-us/library/ms724833.aspx
{
// Windows 7 <= |OSVERSION|: supported
OSVERSIONINFOEX osvi = { 0 };
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
osvi.dwMajorVersion = 6;
osvi.dwMinorVersion = 1;
DWORDLONG conditional = 0;
VER_SET_CONDITION(conditional, VER_MAJORVERSION, VER_GREATER_EQUAL);
VER_SET_CONDITION(conditional, VER_MINORVERSION, VER_GREATER_EQUAL);
const DWORD typemask = VER_MAJORVERSION | VER_MINORVERSION;
if (::VerifyVersionInfo(&osvi, typemask, conditional) != 0) {
return true; // supported
}
}
{
// Windows Vista SP1 <= |OSVERSION| < Windows 7: supported
OSVERSIONINFOEX osvi = { 0 };
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
osvi.dwMajorVersion = 6;
osvi.dwMinorVersion = 0;
osvi.wServicePackMajor = 1;
DWORDLONG conditional = 0;
VER_SET_CONDITION(conditional, VER_MAJORVERSION, VER_GREATER_EQUAL);
VER_SET_CONDITION(conditional, VER_MINORVERSION, VER_GREATER_EQUAL);
VER_SET_CONDITION(conditional, VER_SERVICEPACKMAJOR, VER_GREATER_EQUAL);
const DWORD typemask = VER_MAJORVERSION | VER_MINORVERSION |
VER_SERVICEPACKMAJOR;
if (::VerifyVersionInfo(&osvi, typemask, conditional) != 0) {
return true; // supported
}
}
{
// Windows Server 2003 <= |OSVERSION| < Windows Vista SP1: not supported
OSVERSIONINFOEX osvi = { 0 };
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
osvi.dwMajorVersion = 5;
osvi.dwMinorVersion = 2;
DWORDLONG conditional = 0;
VER_SET_CONDITION(conditional, VER_MAJORVERSION, VER_GREATER_EQUAL);
VER_SET_CONDITION(conditional, VER_MINORVERSION, VER_GREATER_EQUAL);
const DWORD typemask = VER_MAJORVERSION | VER_MINORVERSION;
if (::VerifyVersionInfo(&osvi, typemask, conditional) != 0) {
return false; // not supported
}
}
{
// Windows XP SP2 <= |OSVERSION| < Windows Server 2003: supported
OSVERSIONINFOEX osvi = { 0 };
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
osvi.dwMajorVersion = 5;
osvi.dwMinorVersion = 1;
osvi.wServicePackMajor = 2;
DWORDLONG conditional = 0;
VER_SET_CONDITION(conditional, VER_MAJORVERSION, VER_GREATER_EQUAL);
VER_SET_CONDITION(conditional, VER_MINORVERSION, VER_GREATER_EQUAL);
VER_SET_CONDITION(conditional, VER_SERVICEPACKMAJOR, VER_GREATER_EQUAL);
const DWORD typemask = VER_MAJORVERSION | VER_MINORVERSION |
VER_SERVICEPACKMAJOR;
if (::VerifyVersionInfo(&osvi, typemask, conditional) != 0) {
return true; // supported
}
}
// |OSVERSION| < Windows XP SP2: not supported
return false; // not support
#else
COMPILE_ASSERT(false, unsupported_platform);
#endif
}
#ifdef OS_WINDOWS
bool Util::IsVistaOrLater() {
OSVERSIONINFOEX osvi = { 0 };
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
osvi.dwMajorVersion = 6;
DWORDLONG conditional = 0;
VER_SET_CONDITION(conditional, VER_MAJORVERSION, VER_GREATER_EQUAL);
return 0 != VerifyVersionInfo(&osvi, VER_MAJORVERSION, conditional);
}
class IsWindowsX64Cache {
public:
IsWindowsX64Cache() : is_x64_(IsX64()) {}
bool is_x64() const {
return is_x64_;
}
private:
static bool IsX64() {
typedef void (WINAPI *GetNativeSystemInfoProc)(
__out LPSYSTEM_INFO lpSystemInfo);
const HMODULE kernel32 = Util::GetSystemModuleHandle(L"kernel32.dll");
DCHECK_NE(kernel32, NULL);
GetNativeSystemInfoProc proc = reinterpret_cast<GetNativeSystemInfoProc>(
::GetProcAddress(kernel32, "GetNativeSystemInfo"));
if (proc == NULL) {
DLOG(INFO) << "GetNativeSystemInfo not found";
// If GetNativeSystemInfo API is not exported,
// the system is never X64 edition.
return false;
}
SYSTEM_INFO system_info;
::ZeroMemory(&system_info, sizeof(system_info));
proc(&system_info);
return system_info.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64;
}
bool is_x64_;
DISALLOW_COPY_AND_ASSIGN(IsWindowsX64Cache);
};
bool Util::IsWindowsX64() {
return Singleton<IsWindowsX64Cache>::get()->is_x64();
}
namespace {
class SystemDirectoryCache {
public:
SystemDirectoryCache() : system_dir_(NULL) {
const HRESULT hr = ::SHGetFolderPathW(NULL, CSIDL_SYSTEM, NULL,
SHGFP_TYPE_CURRENT, path_buffer_);
DCHECK(hr == S_OK);
if (hr != S_OK) {
LOG(ERROR) << "Failed to get system directory. HRESULT = " << hr;
return;
}
system_dir_ = path_buffer_;
}
const wchar_t *system_dir() const {
return system_dir_;
}
private:
wchar_t path_buffer_[MAX_PATH];
wchar_t *system_dir_;
};
} // namespace
const wchar_t *Util::GetSystemDir() {
return Singleton<SystemDirectoryCache>::get()->system_dir();
}
HMODULE Util::LoadSystemLibrary(const wstring &base_filename) {
wstring fullpath = Util::GetSystemDir();
fullpath += L"\\";
fullpath += base_filename;
const HMODULE module = ::LoadLibraryExW(fullpath.c_str(),
NULL,
LOAD_WITH_ALTERED_SEARCH_PATH);
if (NULL == module) {
const int last_error = ::GetLastError();
DLOG(WARNING) << "LoadLibraryEx failed."
<< " fullpath = " << fullpath.c_str()
<< " error = " << last_error;
}
return module;
}
HMODULE Util::LoadMozcLibrary(const wstring &base_filename) {
wstring fullpath;
Util::UTF8ToWide(Util::GetServerDirectory().c_str(), &fullpath);
fullpath += L"\\";
fullpath += base_filename;
const HMODULE module = ::LoadLibraryExW(fullpath.c_str(),
NULL,
LOAD_WITH_ALTERED_SEARCH_PATH);
if (NULL == module) {
const int last_error = ::GetLastError();
DLOG(WARNING) << "LoadLibraryEx failed."
<< " fullpath = " << fullpath.c_str()
<< " error = " << last_error;
}
return module;
}
HMODULE Util::GetSystemModuleHandle(const wstring &base_filename) {
wstring fullpath = Util::GetSystemDir();
fullpath += L"\\";
fullpath += base_filename;
const HMODULE module = ::GetModuleHandleW(fullpath.c_str());
if (NULL == module) {
const int last_error = ::GetLastError();
DLOG(WARNING) << "GetModuleHandle failed."
<< " fullpath = " << fullpath.c_str()
<< " error = " << last_error;
}
return module;
}
bool Util::GetFileVersion(const wstring &file_fullpath,
int *major, int *minor, int *build, int *revision) {
DCHECK(major);
DCHECK(minor);
DCHECK(build);
DCHECK(revision);
string path;
Util::WideToUTF8(file_fullpath.c_str(), &path);
// Accoding to KB826496, we should check file existence.
// http://support.microsoft.com/kb/826496
if (!FileExists(path)) {
LOG(ERROR) << "file not found";
return false;
}
DWORD handle = 0;
const DWORD version_size =
::GetFileVersionInfoSizeW(file_fullpath.c_str(), &handle);
if (version_size == 0) {
LOG(ERROR) << "GetFileVersionInfoSizeW failed."
<< " error = " << ::GetLastError();
return false;
}
scoped_array<BYTE> version_buffer(new BYTE[version_size]);
if (!::GetFileVersionInfoW(file_fullpath.c_str(), 0,
version_size, version_buffer.get())) {
LOG(ERROR) << "GetFileVersionInfo failed."
<< " error = " << ::GetLastError();
return false;
}
VS_FIXEDFILEINFO *fixed_fileinfo = NULL;
UINT length = 0;
if (!::VerQueryValueW(version_buffer.get(), L"\\",
reinterpret_cast<LPVOID *>(&fixed_fileinfo),
&length)) {
LOG(ERROR) << "VerQueryValue failed."
<< " error = " << ::GetLastError();
return false;
}
*major = HIWORD(fixed_fileinfo->dwFileVersionMS);
*minor = LOWORD(fixed_fileinfo->dwFileVersionMS);
*build = HIWORD(fixed_fileinfo->dwFileVersionLS);
*revision = LOWORD(fixed_fileinfo->dwFileVersionLS);
return true;
}
string Util::GetFileVersionString(const wstring &file_fullpath) {
int major, minor, build, revision;
if (!GetFileVersion(file_fullpath, &major, &minor, &build, &revision)) {
return "";
}
stringstream buf;
buf << major << "." << minor << "." << build << "." << revision;
return buf.str();
}
#endif // OS_WINDOWS
// TODO(toshiyuki): move this to the initialization module and calculate
// version only when initializing.
string Util::GetOSVersionString() {
#ifdef OS_WINDOWS
string ret = "Windows";
OSVERSIONINFOEX osvi = { 0 };
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
if (GetVersionEx(reinterpret_cast<OSVERSIONINFO *>(&osvi))) {
ret += "." + SimpleItoa(osvi.dwMajorVersion);
ret += "." + SimpleItoa(osvi.dwMinorVersion);
ret += "." + SimpleItoa(osvi.wServicePackMajor);
ret += "." + SimpleItoa(osvi.wServicePackMinor);
} else {
LOG(WARNING) << "GetVersionEx failed";
}
return ret;
#elif defined(OS_MACOSX)
const string ret = "MacOSX " + MacUtil::GetOSVersionString();
// TODO(toshiyuki): get more specific info
return ret;
#elif defined(OS_LINUX)
const string ret = "Linux";
return ret;
#else
const string ret = "Unknown";
return ret;
#endif
}
void Util::DisableIME() {
#ifdef OS_WINDOWS
// turn off IME:
// AFAIK disabling TSF under Vista and later OS is almost impossible
// so that all we have to do is to prevent from calling
// ITfThreadMgr::Activate and ITfThreadMgrEx::ActivateEx in this thread.
::ImmDisableTextFrameService(-1);
::ImmDisableIME(-1);
#endif
}
uint64 Util::GetTotalPhysicalMemory() {
#if defined(OS_WINDOWS)
MEMORYSTATUSEX memory_status = { sizeof(MEMORYSTATUSEX) };
if (!::GlobalMemoryStatusEx(&memory_status)) {
return 0;
}
return memory_status.ullTotalPhys;
#elif defined(OS_MACOSX)
int mib[] = { CTL_HW, HW_MEMSIZE };
uint64 total_memory = 0;
size_t size = sizeof(total_memory);
const int error =
sysctl(mib, arraysize(mib), &total_memory, &size, NULL, 0);
if (error == -1) {
const int error = errno;
LOG(ERROR) << "sysctl with hw.memsize failed. "
<< "errno: " << error;
return 0;
}
return total_memory;
#elif defined(OS_LINUX)
#if defined(_SC_PAGESIZE) && defined(_SC_PHYS_PAGES)
const long page_size = sysconf(_SC_PAGESIZE);
const long number_of_phyisical_pages = sysconf(_SC_PHYS_PAGES);
if (number_of_phyisical_pages < 0) {
// likely to be overflowed.
return 0;
}
return static_cast<uint64>(number_of_phyisical_pages) * page_size;
#else
return 0;
#endif // defined(_SC_PAGESIZE) && defined(_SC_PHYS_PAGES)
#else // !(defined(OS_WINDOWS) || defined(OS_MACOSX) || defined(OS_LINUX))
#error "unknown platform"
#endif
}
void Util::PreloadMappedRegion(const void *begin,
size_t region_size_in_byte,
volatile bool *query_quit) {
#if defined(OS_WINDOWS)
SYSTEM_INFO system_info;
::ZeroMemory(&system_info, sizeof(system_info));
::GetSystemInfo(&system_info);
const size_t page_size = system_info.dwPageSize;
#elif defined(OS_MACOSX) || defined(OS_LINUX)
#if defined(_SC_PAGESIZE)
const size_t page_size = sysconf(_SC_PAGESIZE);
#else
const size_t page_size = 4096;
#endif
#else // !(defined(OS_WINDOWS) || defined(OS_MACOSX) || defined(OS_LINUX))
#error "unknown platform"
#endif
const char *begin_ptr = reinterpret_cast<const char *>(begin);
const char *end_ptr = begin_ptr + region_size_in_byte;
// make sure the read operation will not be removed by the optimization
static volatile char dummy = 0;
for (const char *p = begin_ptr; p < end_ptr; p += page_size) {
if (query_quit != NULL && (*query_quit)) {
break;
}
dummy += *p;
}
}
void Util::MakeByteArrayFile(const string &name,
const string &input,
const string &output) {
OutputFileStream ofs(output.c_str());
CHECK(ofs);
Util::MakeByteArrayStream(name, input, &ofs);
}
void Util::MakeByteArrayStream(const string &name,
const string &input,
ostream *os) {
Mmap<char> mmap;
CHECK(mmap.Open(input.c_str()));
Util::WriteByteArray(name, mmap.begin(), mmap.GetFileSize(), os);
}
void Util::WriteByteArray(const string &name, const char *image,
size_t image_size, ostream *ofs) {
const char *begin = image;
const char *end = image + image_size;
*ofs << "const size_t k" << name << "_size = "
<< image_size << ";" << endl;
#ifdef OS_WINDOWS
*ofs << "const uint64 k" << name << "_data_uint64[] = {" << endl;
ofs->setf(ios::hex, ios::basefield); // in hex
ofs->setf(ios::showbase); // add 0x
int num = 0;
while (begin < end) {
uint64 n = 0;
uint8 *buf = reinterpret_cast<uint8 *>(&n);
const size_t size = min(static_cast<size_t>(end - begin),
static_cast<size_t>(8));
for (size_t i = 0; i < size; ++i) {
buf[i] = static_cast<uint8>(begin[i]);
}
begin += 8;
*ofs << n << ", ";
if (++num % 8 == 0) {
*ofs << endl;
}
}
*ofs << "};" << endl;
*ofs << "const char *k" << name << "_data"
<< " = reinterpret_cast<const char *>(k"
<< name << "_data_uint64);" << endl;
#else
*ofs << "const char k" << name << "_data[] =" << endl;
static const size_t kBucketSize = 20;
while (begin < end) {
const size_t size = min(static_cast<size_t>(end - begin), kBucketSize);
string buf;
Util::Escape(string(begin, size), &buf);
*ofs << "\"" << buf << "\"";
*ofs << endl;
begin += kBucketSize;
}
*ofs << ";" << endl;
#endif
}
bool Util::IsLittleEndian() {
#ifndef OS_WINDOWS
union {
unsigned char c[4];
unsigned int i;
} u;
COMPILE_ASSERT(sizeof(u.c) == sizeof(u.i), bad_sizeof_int);
COMPILE_ASSERT(sizeof(u) == sizeof(u.i), misaligned_union);
u.i = 0x12345678U;
return u.c[0] == 0x78U;
#else
return true;
#endif
}
} // namespace mozc