src/base/number_util.cc - mozc (262156b)

Tree @262156b (Download .tar.gz)

number_util.cc @262156b — raw · history · blame

// Copyright 2010-2020, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "base/number_util.h"

#include <algorithm>
#include <cctype>
#include <cerrno>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <limits>
#include <string>
#include <vector>

#include "base/japanese_util_rule.h"
#include "base/logging.h"
#include "base/port.h"
#include "base/util.h"
#include "absl/strings/numbers.h"
#include "absl/strings/str_format.h"

namespace mozc {
namespace {

// Table of number character of Kansuji
const char *const kNumKanjiDigits[] = {"〇", "一", "二", "三", "四",   "五",
                                       "六", "七", "八", "九", nullptr};
const char *const kNumKanjiOldDigits[] = {nullptr, "壱", "弐", "参", "四",
                                          "五",    "六", "七", "八", "九"};
const char *const kNumFullWidthDigits[] = {"０", "１", "２", "３", "４",   "５",
                                           "６", "７", "８", "９", nullptr};
const char *const kNumHalfWidthDigits[] = {"0", "1", "2", "3", "4",    "5",
                                           "6", "7", "8", "9", nullptr};

// Table of Kanji number ranks
const char *const kNumKanjiRanks[] = {nullptr, "", "十", "百", "千"};
const char *const kNumKanjiBiggerRanks[] = {"", "万", "億", "兆", "京"};
const char *const kNumKanjiOldRanks[] = {nullptr, "", "拾", "百", "阡"};
const char *const kNumKanjiBiggerOldRanks[] = {"", "萬", "億", "兆", "京"};

const char *const kRomanNumbersCapital[] = {nullptr, "Ⅰ", "Ⅱ", "Ⅲ",    "Ⅳ",
                                            "Ⅴ",     "Ⅵ", "Ⅶ", "Ⅷ",    "Ⅸ",
                                            "Ⅹ",     "Ⅺ", "Ⅻ", nullptr};

const char *const kRomanNumbersSmall[] = {nullptr, "ⅰ", "ⅱ", "ⅲ",    "ⅳ",
                                          "ⅴ",     "ⅵ", "ⅶ", "ⅷ",    "ⅸ",
                                          "ⅹ",     "ⅺ", "ⅻ", nullptr};

const char *const kCircledNumbers[] = {
    nullptr, "①",  "②",  "③",  "④",  "⑤",  "⑥",  "⑦",    "⑧",  "⑨",  "⑩",
    "⑪",     "⑫",  "⑬",  "⑭",  "⑮",  "⑯",  "⑰",  "⑱",    "⑲",  "⑳",  "㉑",
    "㉒",    "㉓", "㉔", "㉕", "㉖", "㉗", "㉘", "㉙",   "㉚", "㉛", "㉜",
    "㉝",    "㉞", "㉟", "㊱", "㊲", "㊳", "㊴", "㊵",   "㊶", "㊷", "㊸",
    "㊹",    "㊺", "㊻", "㊼", "㊽", "㊾", "㊿", nullptr};

// Structure to store character set variations.
struct NumberStringVariation {
  const char *const *const digits;
  const int numbers_size;
  const char *description;
  const char *separator;
  const char *point;
  const NumberUtil::NumberString::Style style;
};

// Judges given string is a decimal number (including integer) or not.
// It accepts strings whose last point is a decimal point like "123456."
bool IsDecimalNumber(absl::string_view str) {
  int num_point = 0;
  for (size_t i = 0; i < str.size(); ++i) {
    if (str[i] == '.') {
      ++num_point;
      // A valid decimal number has at most one decimal point.
      if (num_point >= 2) {
        return false;
      }
    } else if (!isdigit(str[i])) {
      return false;
    }
  }

  return true;
}

const char kAsciiZero = '0';
const char kAsciiOne = '1';
const char kAsciiNine = '9';

}  // namespace

int NumberUtil::SimpleAtoi(absl::string_view str) {
  int integer;
  if (absl::SimpleAtoi(str, &integer)) {
    return integer;
  }
  return 0;
}

namespace {

// TODO(hidehiko): Refactoring with GetScriptType in Util class.
inline bool IsArabicDecimalChar32(char32 ucs4) {
  // Halfwidth digit.
  if (kAsciiZero <= ucs4 && ucs4 <= kAsciiNine) {
    return true;
  }

  // Fullwidth digit.
  if (0xFF10 <= ucs4 && ucs4 <= 0xFF19) {
    return true;
  }

  return false;
}

}  // namespace

bool NumberUtil::IsArabicNumber(absl::string_view input_string) {
  if (input_string.empty()) {
    return false;
  }
  for (ConstChar32Iterator iter(input_string); !iter.Done(); iter.Next()) {
    if (!IsArabicDecimalChar32(iter.Get())) {
      // Found non-Arabic decimal character.
      return false;
    }
  }

  // All characters are numbers.
  return true;
}

bool NumberUtil::IsDecimalInteger(absl::string_view str) {
  if (str.empty()) {
    return false;
  }
  for (size_t i = 0; i < str.size(); ++i) {
    if (!isdigit(str[i])) {
      return false;
    }
  }
  return true;
}

namespace {

// To know what "大字" means, please refer
// http://ja.wikipedia.org/wiki/%E5%A4%A7%E5%AD%97_(%E6%95%B0%E5%AD%97)
const NumberStringVariation kKanjiVariations[] = {
    {kNumHalfWidthDigits, 10, "数字", nullptr, nullptr,
     NumberUtil::NumberString::NUMBER_ARABIC_AND_KANJI_HALFWIDTH},
    {kNumFullWidthDigits, 10, "数字", nullptr, nullptr,
     NumberUtil::NumberString::NUMBER_ARABIC_AND_KANJI_FULLWIDTH},
    {kNumKanjiDigits, 10, "漢数字", nullptr, nullptr,
     NumberUtil::NumberString::NUMBER_KANJI},
    {kNumKanjiOldDigits, 10, "大字", nullptr, nullptr,
     NumberUtil::NumberString::NUMBER_OLD_KANJI},
};

const char kOldTwoTen[] = "弐拾";
const size_t kOldTwoTenLength = arraysize(kOldTwoTen) - 1;
const char kOldTwenty[] = "廿";

}  // namespace

bool NumberUtil::ArabicToKanji(absl::string_view input_num,
                               std::vector<NumberString> *output) {
  DCHECK(output);
  const char *const kNumZero = "零";
  const int kDigitsInBigRank = 4;

  if (!IsDecimalInteger(input_num)) {
    return false;
  }

  {
    // We don't convert a number starting with '0', other than 0 itself.
    absl::string_view::size_type i;
    for (i = 0; i < input_num.size() && input_num[i] == kAsciiZero; ++i) {
    }
    if (i == input_num.size()) {
      output->push_back(
          NumberString(kNumZero, "大字", NumberString::NUMBER_OLD_KANJI));
      return true;
    }
  }

  // If given number needs higher ranks than our expectations,
  // we don't convert it.
  if (arraysize(kNumKanjiBiggerRanks) * kDigitsInBigRank < input_num.size()) {
    return false;
  }

  // Fill '0' in the beginning of input_num to make its length
  // (N * kDigitsInBigRank).
  const int filled_zero_num =
      (kDigitsInBigRank - (input_num.size() % kDigitsInBigRank)) %
      kDigitsInBigRank;
  std::string input(filled_zero_num, kAsciiZero);
  input.append(input_num.data(), input_num.size());

  // Segment into kDigitsInBigRank-digits pieces
  std::vector<std::string> ranked_numbers;
  for (int i = static_cast<int>(input.size()) - kDigitsInBigRank; i >= 0;
       i -= kDigitsInBigRank) {
    ranked_numbers.push_back(input.substr(i, kDigitsInBigRank));
  }
  const size_t rank_size = ranked_numbers.size();

  for (size_t variation_index = 0;
       variation_index < arraysize(kKanjiVariations); ++variation_index) {
    const NumberStringVariation &variation = kKanjiVariations[variation_index];
    const char *const *const digits = variation.digits;
    const NumberString::Style style = variation.style;

    if (rank_size == 1 &&
        (style == NumberString::NUMBER_ARABIC_AND_KANJI_HALFWIDTH ||
         style == NumberString::NUMBER_ARABIC_AND_KANJI_FULLWIDTH)) {
      continue;
    }

    const char *const *ranks;
    const char *const *bigger_ranks;
    if (style == NumberString::NUMBER_OLD_KANJI) {
      ranks = kNumKanjiOldRanks;
      bigger_ranks = kNumKanjiBiggerOldRanks;
    } else {
      ranks = kNumKanjiRanks;
      bigger_ranks = kNumKanjiBiggerRanks;
    }

    // TODO(peria): Bring |result| out if it improves the performance.
    std::string result;

    // Converts each segment, and merges them with rank Kanjis.
    for (int rank = rank_size - 1; rank >= 0; --rank) {
      const std::string &segment = ranked_numbers[rank];
      std::string segment_result;
      bool leading = true;
      for (size_t i = 0; i < segment.size(); ++i) {
        if (leading && segment[i] == kAsciiZero) {
          continue;
        }

        leading = false;
        if (style == NumberString::NUMBER_ARABIC_AND_KANJI_HALFWIDTH ||
            style == NumberString::NUMBER_ARABIC_AND_KANJI_FULLWIDTH) {
          segment_result += digits[segment[i] - kAsciiZero];
        } else {
          if (segment[i] == kAsciiZero) {
            continue;
          }
          // In "大字" style, "壱" is also required on every rank.
          if (style == NumberString::NUMBER_OLD_KANJI ||
              i == kDigitsInBigRank - 1 || segment[i] != kAsciiOne) {
            segment_result += digits[segment[i] - kAsciiZero];
          }
          segment_result += ranks[kDigitsInBigRank - i];
        }
      }
      if (!segment_result.empty()) {
        result += segment_result + bigger_ranks[rank];
      }
    }

    const char *description = variation.description;
    // Add simply converted numbers.
    output->push_back(NumberString(result, description, style));

    // Add specialized style numbers.
    if (style == NumberString::NUMBER_OLD_KANJI) {
      size_t index = result.find(kOldTwoTen);
      if (index != std::string::npos) {
        std::string result2(result);
        do {
          result2.replace(index, kOldTwoTenLength, kOldTwenty);
          index = result2.find(kOldTwoTen, index);
        } while (index != std::string::npos);
        output->push_back(NumberString(result2, description, style));
      }

      // for single kanji
      if (input == "0010") {
        output->push_back(NumberString("拾", description, style));
      }
      if (input == "1000") {
        output->push_back(NumberString("阡", description, style));
      }
    }
  }

  return true;
}

namespace {

const NumberStringVariation kNumDigitsVariations[] = {
    {kNumHalfWidthDigits, 10, "数字", ",", ".",
     NumberUtil::NumberString::NUMBER_SEPARATED_ARABIC_HALFWIDTH},
    {kNumFullWidthDigits, 10, "数字", "，", "．",
     NumberUtil::NumberString::NUMBER_SEPARATED_ARABIC_FULLWIDTH},
};

}  // namespace

bool NumberUtil::ArabicToSeparatedArabic(absl::string_view input_num,
                                         std::vector<NumberString> *output) {
  DCHECK(output);

  if (!IsDecimalNumber(input_num)) {
    return false;
  }

  // Separate a number into an integral part and a fractional part.
  absl::string_view::size_type point_pos = input_num.find('.');
  if (point_pos == absl::string_view::npos) {
    point_pos = input_num.size();
  }
  const absl::string_view integer = input_num.substr(0, point_pos);
  // |fraction| has the decimal point with digits in fractional part.
  const absl::string_view fraction =
      input_num.substr(point_pos, input_num.size() - point_pos);

  // We don't add separator to number whose integral part starts with '0'
  if (integer[0] == kAsciiZero) {
    return false;
  }

  for (size_t i = 0; i < arraysize(kNumDigitsVariations); ++i) {
    const NumberStringVariation &variation = kNumDigitsVariations[i];
    const char *const *const digits = variation.digits;
    // TODO(peria): Bring |result| out if it improves the performance.
    std::string result;

    // integral part
    for (absl::string_view::size_type j = 0; j < integer.size(); ++j) {
      // We don't add separater first
      if (j != 0 && (integer.size() - j) % 3 == 0) {
        result.append(variation.separator);
      }
      const uint32 d = static_cast<uint32>(integer[j] - kAsciiZero);
      if (d <= 9 && digits[d]) {
        result.append(digits[d]);
      }
    }

    // fractional part
    if (!fraction.empty()) {
      DCHECK_EQ(fraction[0], '.');
      result.append(variation.point);
      for (absl::string_view::size_type j = 1; j < fraction.size(); ++j) {
        result.append(digits[static_cast<int>(fraction[j] - kAsciiZero)]);
      }
    }

    output->push_back(
        NumberString(result, variation.description, variation.style));
  }
  return true;
}

namespace {

// use default for wide Arabic, because half/full width for
// normal number is learned by charactor form manager.
const NumberStringVariation kSingleDigitsVariations[] = {
    {kNumKanjiDigits, 10, "漢数字", nullptr, nullptr,
     NumberUtil::NumberString::NUMBER_KANJI_ARABIC},
    {kNumFullWidthDigits, 10, "数字", nullptr, nullptr,
     NumberUtil::NumberString::DEFAULT_STYLE},
};

}  // namespace

bool NumberUtil::ArabicToWideArabic(absl::string_view input_num,
                                    std::vector<NumberString> *output) {
  DCHECK(output);

  if (!IsDecimalInteger(input_num)) {
    return false;
  }

  for (size_t i = 0; i < arraysize(kSingleDigitsVariations); ++i) {
    const NumberStringVariation &variation = kSingleDigitsVariations[i];
    // TODO(peria): Bring |result| out if it improves the performance.
    std::string result;
    for (absl::string_view::size_type j = 0; j < input_num.size(); ++j) {
      result.append(
          variation.digits[static_cast<int>(input_num[j] - kAsciiZero)]);
    }
    if (!result.empty()) {
      output->push_back(
          NumberString(result, variation.description, variation.style));
    }
  }
  return true;
}

namespace {

const NumberStringVariation kSpecialNumericVariations[] = {
    {kRomanNumbersCapital, arraysize(kRomanNumbersCapital),
     "ローマ数字(大文字)", nullptr, nullptr,
     NumberUtil::NumberString::NUMBER_ROMAN_CAPITAL},
    {kRomanNumbersSmall, arraysize(kRomanNumbersSmall), "ローマ数字(小文字)",
     nullptr, nullptr, NumberUtil::NumberString::NUMBER_ROMAN_SMALL},
    {kCircledNumbers, arraysize(kCircledNumbers), "丸数字", nullptr, nullptr,
     NumberUtil::NumberString::NUMBER_CIRCLED},
};

}  // namespace

bool NumberUtil::ArabicToOtherForms(absl::string_view input_num,
                                    std::vector<NumberString> *output) {
  DCHECK(output);

  if (!IsDecimalInteger(input_num)) {
    return false;
  }

  bool converted = false;

  // Googol
  {
    // 10^100
    const char *const kNumGoogol =
        "100000000000000000000000000000000000000000000000000"
        "00000000000000000000000000000000000000000000000000";

    if (input_num == kNumGoogol) {
      output->push_back(
          NumberString("Googol", "", NumberString::DEFAULT_STYLE));
      converted = true;
    }
  }

  // Following conversions require uint64 number.
  uint64 n;
  if (!SafeStrToUInt64(input_num, &n)) {
    return converted;
  }

  // Special forms
  for (size_t i = 0; i < arraysize(kSpecialNumericVariations); ++i) {
    const NumberStringVariation &variation = kSpecialNumericVariations[i];
    if (n < variation.numbers_size && variation.digits[n]) {
      output->push_back(NumberString(variation.digits[n], variation.description,
                                     variation.style));
      converted = true;
    }
  }

  return converted;
}

bool NumberUtil::ArabicToOtherRadixes(absl::string_view input_num,
                                      std::vector<NumberString> *output) {
  DCHECK(output);

  if (!IsDecimalInteger(input_num)) {
    return false;
  }

  uint64 n;
  if (!SafeStrToUInt64(input_num, &n)) {
    return false;
  }

  // Hexadecimal
  if (n > 9) {
    const std::string hex = absl::StrFormat("0x%x", static_cast<uint64>(n));
    output->push_back(NumberString(hex, "16進数", NumberString::NUMBER_HEX));
  }

  // Octal
  if (n > 7) {
    const std::string oct = absl::StrFormat("0%o", static_cast<uint64>(n));
    output->push_back(NumberString(oct, "8進数", NumberString::NUMBER_OCT));
  }

  // Binary
  if (n > 1) {
    std::string binary;
    for (uint64 num = n; num; num >>= 1) {
      binary.push_back(kAsciiZero + static_cast<char>(num & 0x1));
    }
    // "b0" will be "0b" in head of |binary|
    binary.append("b0");
    std::reverse(binary.begin(), binary.end());
    output->push_back(NumberString(binary, "2進数", NumberString::NUMBER_BIN));
  }

  return (n > 1);
}

namespace {

const absl::string_view SkipWhiteSpace(absl::string_view str) {
  absl::string_view::size_type i;
  for (i = 0; i < str.size() && isspace(str[i]); ++i) {
  }
  DCHECK(i == str.size() || !isspace(str[i]));
  return str.substr(i);
}

// There is an informative discussion about the overflow detection in
// "Hacker's Delight" (http://www.hackersdelight.org/basics.pdf)
//   2-12 'Overflow Detection'

// *output = arg1 + arg2
// return false when an integer overflow happens.
bool AddAndCheckOverflow(uint64 arg1, uint64 arg2, uint64 *output) {
  *output = arg1 + arg2;
  if (arg2 > (std::numeric_limits<uint64>::max() - arg1)) {
    // overflow happens
    return false;
  }
  return true;
}

// *output = arg1 * arg2
// return false when an integer overflow happens.
bool MultiplyAndCheckOverflow(uint64 arg1, uint64 arg2, uint64 *output) {
  *output = arg1 * arg2;
  if (arg1 != 0 && arg2 > (std::numeric_limits<uint64>::max() / arg1)) {
    // overflow happens
    return false;
  }
  return true;
}

// A simple wrapper of strtoull function. |c_str| must be terminated by '\0'.
inline uint64 StrToUint64(const char *c_str, char **end_ptr, int base) {
#ifdef OS_WIN
  return _strtoui64(c_str, end_ptr, base);
#else   // OS_WIN
  return strtoull(c_str, end_ptr, base);
#endif  // OS_WIN
}

// Converts a string which describes a number into an uint64 value in |base|
// radix.  Does not convert octal or hexadecimal strings with "0" or "0x"
// suffixes.
bool SafeStrToUInt64WithBase(absl::string_view str, int base, uint64 *value) {
  DCHECK(value);

  // Maximum possible length of number string, including terminating '\0'. Note
  // that the maximum possible length is achieved when str="111...11" (64
  // unities) and base=2.
  const size_t kMaxPossibleLength = 65;

  // Leading white spaces are allowed.
  const absl::string_view stripped_str = SkipWhiteSpace(str);
  if (stripped_str.empty() || stripped_str.size() >= kMaxPossibleLength) {
    return false;
  }
  // StrToUint64() does not check if the input is negative.  However, a leading
  // '+' is OK.
  if (stripped_str[0] == '-') {
    return false;
  }

  // Since absl::string_view doesn't end with '\0', we make a c-string on stack
  // here.
  char buf[kMaxPossibleLength];
  memcpy(buf, str.data(), str.size());
  buf[str.size()] = '\0';

  char *end_ptr = nullptr;
  errno = 0;
  *value = StrToUint64(buf, &end_ptr, base);
  if (errno != 0 || end_ptr == buf) {  // Failed to parse uint64.
    return false;
  }
  // Trailing white spaces are allowed.
  const absl::string_view trailing_str(end_ptr, buf + str.size() - end_ptr);
  return SkipWhiteSpace(trailing_str).empty();
}

template <typename T1, typename T2>
struct GenericFalseTypeArity2 {
  // TODO(yukawa): Use std::false_type once C++11 is enabled everywhere.
  static constexpr bool value = false;
};

template <typename SrcType, typename DestType>
bool SafeCast(SrcType src, DestType *dest) {
  static_assert(GenericFalseTypeArity2<SrcType, DestType>::value,
                "Shouldn't be used with implicit type conversion.");
  return false;
}

template <>
bool SafeCast(int64 src, int16 *dest) {
  if (src < static_cast<int64>(std::numeric_limits<int16>::min()) ||
      static_cast<int64>(std::numeric_limits<int16>::max()) < src) {
    return false;
  }
  *dest = static_cast<int16>(src);
  return true;
}

template <>
bool SafeCast(int64 src, int32 *dest) {
  if (src < static_cast<int64>(std::numeric_limits<int32>::min()) ||
      static_cast<int64>(std::numeric_limits<int32>::max()) < src) {
    return false;
  }
  *dest = static_cast<int32>(src);
  return true;
}

template <>
bool SafeCast(uint64 src, int64 *dest) {
  if (src > static_cast<uint64>(std::numeric_limits<int64>::max())) {
    return false;
  }
  *dest = static_cast<int64>(src);
  return true;
}

template <>
bool SafeCast(uint64 src, uint16 *dest) {
  if (src > static_cast<uint64>(std::numeric_limits<uint16>::max())) {
    return false;
  }
  *dest = static_cast<uint16>(src);
  return true;
}

template <>
bool SafeCast(uint64 src, uint32 *dest) {
  if (src > static_cast<uint64>(std::numeric_limits<uint32>::max())) {
    return false;
  }
  *dest = static_cast<uint32>(src);
  return true;
}

template <typename SrcType, typename DestType>
bool SafeUnaryNegation(SrcType src, DestType *dest) {
  static_assert(GenericFalseTypeArity2<SrcType, DestType>::value,
                "Shouldn't be used with implicit type conversion.");
  return false;
}

template <>
bool SafeUnaryNegation(uint64 src, int64 *dest) {
  int64 tmp = 0;
  if (!SafeCast(src, &tmp)) {
    if (src == 0x8000000000000000ul) {
      // This is an exceptional case. |src| isn't in the range of int64,
      // but |-src| is in the range.
      *dest = std::numeric_limits<int64>::min();
      return true;
    }
    return false;
  }
  *dest = -tmp;
  return true;
}

}  // namespace

bool NumberUtil::SafeStrToInt16(absl::string_view str, int16 *value) {
  int64 tmp;
  if (!SafeStrToInt64(str, &tmp)) {
    return false;
  }
  return SafeCast(tmp, value);
}

bool NumberUtil::SafeStrToInt32(absl::string_view str, int32 *value) {
  int64 tmp;
  if (!SafeStrToInt64(str, &tmp)) {
    return false;
  }
  return SafeCast(tmp, value);
}

bool NumberUtil::SafeStrToInt64(absl::string_view str, int64 *value) {
  const absl::string_view stripped_str = SkipWhiteSpace(str);
  if (stripped_str.empty()) {
    return false;
  }
  uint64 tmp;
  if (stripped_str[0] == '-') {
    absl::string_view opposite_str =
        stripped_str.substr(1, stripped_str.size() - 1);
    if (!SafeStrToUInt64WithBase(opposite_str, 10, &tmp)) {
      return false;
    }
    return SafeUnaryNegation(tmp, value);
  }
  if (!SafeStrToUInt64WithBase(str, 10, &tmp)) {
    return false;
  }
  return SafeCast(tmp, value);
}

bool NumberUtil::SafeStrToUInt16(absl::string_view str, uint16 *value) {
  uint64 tmp;
  if (!SafeStrToUInt64WithBase(str, 10, &tmp)) {
    return false;
  }
  return SafeCast(tmp, value);
}

bool NumberUtil::SafeStrToUInt32(absl::string_view str, uint32 *value) {
  uint64 tmp;
  if (!SafeStrToUInt64WithBase(str, 10, &tmp)) {
    return false;
  }
  return SafeCast(tmp, value);
}

bool NumberUtil::SafeHexStrToUInt32(absl::string_view str, uint32 *value) {
  uint64 tmp;
  if (!SafeStrToUInt64WithBase(str, 16, &tmp)) {
    return false;
  }
  return SafeCast(tmp, value);
}

bool NumberUtil::SafeOctStrToUInt32(absl::string_view str, uint32 *value) {
  uint64 tmp;
  if (!SafeStrToUInt64WithBase(str, 8, &tmp)) {
    return false;
  }
  return SafeCast(tmp, value);
}

bool NumberUtil::SafeStrToUInt64(absl::string_view str, uint64 *value) {
  return SafeStrToUInt64WithBase(str, 10, value);
}

bool NumberUtil::SafeStrToDouble(absl::string_view str, double *value) {
  DCHECK(value);
  // Note that absl::string_view isn't terminated by '\0'.  However, since
  // strtod requires null-terminated string, we make a string here. If we have a
  // good estimate of the maximum possible length of the input string, we may be
  // able to use char buffer instead.  Note: const reference ensures the life of
  // this temporary string until the end!
  const std::string &s = std::string(str);
  const char *ptr = s.c_str();

  char *end_ptr;
  errno = 0;  // errno only gets set on errors
  // strtod of GCC accepts hexadecimal number like "0x1234", but that of
  // VisualC++ does not.
  // Note that strtod accepts white spaces at the beginning of the parameter.
  *value = std::strtod(ptr, &end_ptr);

  if (errno != 0 || ptr == end_ptr || std::isnan(*value) ||
      *value == std::numeric_limits<double>::infinity() ||
      *value == -std::numeric_limits<double>::infinity()) {
    return false;
  }
  // Trailing white spaces are allowed.
  const absl::string_view trailing_str(end_ptr, ptr + s.size() - end_ptr);
  return SkipWhiteSpace(trailing_str).empty();
}

namespace {

// Reduces leading digits less than 10 as their base10 interpretation, e.g.,
//   [1, 2, 3, 10, 100] => begin points to [10, 100], output = 123
// Returns false when overflow happened.
bool ReduceLeadingNumbersAsBase10System(
    std::vector<uint64>::const_iterator *begin,
    const std::vector<uint64>::const_iterator &end, uint64 *output) {
  *output = 0;
  for (; *begin < end; ++*begin) {
    if (**begin >= 10) {
      return true;
    }
    // *output = *output * 10 + *it
    if (!MultiplyAndCheckOverflow(*output, 10, output) ||
        !AddAndCheckOverflow(*output, **begin, output)) {
      return false;
    }
  }
  return true;
}

// Interprets digits as base10 system, e.g.,
//   [1, 2, 3] => 123
//   [1, 2, 3, 10] => false
// Returns false if a number greater than 10 was found or overflow happened.
bool InterpretNumbersAsBase10System(const std::vector<uint64> &numbers,
                                    uint64 *output) {
  auto begin = numbers.begin();
  const bool success =
      ReduceLeadingNumbersAsBase10System(&begin, numbers.end(), output);
  // Check if the whole numbers were reduced.
  return (success && begin == numbers.end());
}

// Reads a leading number in a sequence and advances the iterator. Returns false
// if the range is empty or the leading number is not less than 10.
bool ReduceOnesDigit(std::vector<uint64>::const_iterator *begin,
                     const std::vector<uint64>::const_iterator &end,
                     uint64 *num) {
  if (*begin == end || **begin >= 10) {
    return false;
  }
  *num = **begin;
  ++*begin;
  return true;
}

// Given expected_base, 10, 100, or 1000, reads leading one or two numbers and
// calculates the number in the follwoing way:
//   Case: expected_base == 10
//     [10, ...] => 10
//     [2, 10, ...] => 20
//     [1, 10, ...] => error because we don't write "一十" in Japanese.
//     [20, ...] => 20 because "廿" is interpreted as 20.
//     [2, 0, ...] => 20
//   Case: expected_base == 100
//     [100, ...] => 100
//     [2, 100, ...] => 200
//     [1, 100, ...] => error because we don't write "一百" in Japanese.
//     [1, 2, 3, ...] => 123
//   Case: expected_base == 1000
//     [1000, ...] => 1000
//     [2, 1000, ...] => 2000
//     [1, 1000, ...] => 1000
//     [1, 2, 3, 4, ...] => 1234
bool ReduceDigitsHelper(std::vector<uint64>::const_iterator *begin,
                        const std::vector<uint64>::const_iterator &end,
                        uint64 *num, const uint64 expected_base) {
  // Skip leading zero(s).
  while (*begin != end && **begin == 0) {
    ++*begin;
  }
  if (*begin == end) {
    return false;
  }
  const uint64 leading_number = **begin;

  // If the leading number is less than 10, e.g., patterns like [2, 10], we need
  // to check the next number.
  if (leading_number < 10) {
    if (end - *begin < 2) {
      return false;
    }
    const uint64 next_number = *(*begin + 1);

    // If the next number is also less than 10, this pattern is like
    // [1, 2, ...] => 12. In this case, the result must be less than
    // 10 * expected_base.
    if (next_number < 10) {
      if (!ReduceLeadingNumbersAsBase10System(begin, end, num) ||
          *num >= expected_base * 10 || (*begin != end && **begin < 10000)) {
        *begin = end;  // Force to ignore the rest of the sequence.
        return false;
      }
      return true;
    }

    // Patterns like [2, 10, ...] and [1, 1000, ...].
    if (next_number != expected_base ||
        (leading_number == 1 && expected_base != 1000)) {
      return false;
    }
    *num = leading_number * expected_base;
    *begin += 2;
    return true;
  }

  // Patterns like [10, ...], [100, ...], [1000, ...], [20, ...]. The leading 20
  // is a special case for Kanji "廿".
  if (leading_number == expected_base ||
      (expected_base == 10 && leading_number == 20)) {
    *num = leading_number;
    ++*begin;
    return true;
  }
  return false;
}

inline bool ReduceTensDigit(std::vector<uint64>::const_iterator *begin,
                            const std::vector<uint64>::const_iterator &end,
                            uint64 *num) {
  return ReduceDigitsHelper(begin, end, num, 10);
}

inline bool ReduceHundredsDigit(std::vector<uint64>::const_iterator *begin,
                                const std::vector<uint64>::const_iterator &end,
                                uint64 *num) {
  return ReduceDigitsHelper(begin, end, num, 100);
}

inline bool ReduceThousandsDigit(std::vector<uint64>::const_iterator *begin,
                                 const std::vector<uint64>::const_iterator &end,
                                 uint64 *num) {
  return ReduceDigitsHelper(begin, end, num, 1000);
}

// Reduces leading digits as a number less than 10000 and advances the
// iterator. For example:
//   [1, 1000, 2, 100, 3, 10, 4, 10000, ...]
//        => begin points to [10000, ...], num = 1234
//   [3, 100, 4, 100]
//        => error because same base number appears twice
bool ReduceNumberLessThan10000(std::vector<uint64>::const_iterator *begin,
                               const std::vector<uint64>::const_iterator &end,
                               uint64 *num) {
  *num = 0;
  bool success = false;
  uint64 n = 0;
  // Note: the following additions never overflow.
  if (ReduceThousandsDigit(begin, end, &n)) {
    *num += n;
    success = true;
  }
  if (ReduceHundredsDigit(begin, end, &n)) {
    *num += n;
    success = true;
  }
  if (ReduceTensDigit(begin, end, &n)) {
    *num += n;
    success = true;
  }
  if (ReduceOnesDigit(begin, end, &n)) {
    *num += n;
    success = true;
  }
  // If at least one reduce was successful, no number remains in the sequence or
  // the next number should be a base number greater than 1000 (e.g., 10000,
  // 100000, etc.). Strictly speaking, better to check **begin % 10 == 0.
  return success && (*begin == end || **begin >= 10000);
}

// Interprets a sequence of numbers in a Japanese reading way. For example:
//   "一万二千三百四十五" = [1, 10000, 2, 1000, 3, 100, 4, 10, 5] => 12345
// Base-10 numbers must be decreasing, i.e.,
//   "一十二百" = [1, 10, 2, 100] => error
bool InterpretNumbersInJapaneseWay(const std::vector<uint64> &numbers,
                                   uint64 *output) {
  uint64 last_base = std::numeric_limits<uint64>::max();
  auto begin = numbers.begin();
  *output = 0;
  do {
    uint64 coef = 0;
    if (!ReduceNumberLessThan10000(&begin, numbers.end(), &coef)) {
      return false;
    }
    if (begin == numbers.end()) {
      return AddAndCheckOverflow(*output, coef, output);
    }
    if (*begin >= last_base) {
      return false;  // Increasing order of base-10 numbers.
    }
    // Safely performs *output += coef * *begin.
    uint64 delta = 0;
    if (!MultiplyAndCheckOverflow(coef, *begin, &delta) ||
        !AddAndCheckOverflow(*output, delta, output)) {
      return false;
    }
    last_base = *begin++;
  } while (begin != numbers.end());

  return true;
}

// Interprets a sequence of numbers directly or in a Japanese reading way
// depending on the maximum number in the sequence.
bool NormalizeNumbersHelper(const std::vector<uint64> &numbers,
                            uint64 *number_output) {
  const auto itr_max = std::max_element(numbers.begin(), numbers.end());
  if (itr_max == numbers.end()) {
    return false;  // numbers is empty
  }

  // When no scaling number is found, convert number directly.
  // For example, [5,4,3] => 543
  if (*itr_max < 10) {
    return InterpretNumbersAsBase10System(numbers, number_output);
  }
  return InterpretNumbersInJapaneseWay(numbers, number_output);
}

// TODO(peria): Do refactoring this method.
bool NormalizeNumbersInternal(absl::string_view input, bool trim_leading_zeros,
                              bool allow_suffix, std::string *kanji_output,
                              std::string *arabic_output, std::string *suffix) {
  DCHECK(kanji_output);
  DCHECK(arabic_output);
  const char *begin = input.data();
  const char *end = input.data() + input.size();
  std::vector<uint64> numbers;
  numbers.reserve(input.size());

  // Map Kanji number string to digits, e.g., "二百十一" -> [2, 100, 10, 1].
  // Simultaneously, constructs a Kanji number string.
  kanji_output->clear();
  arabic_output->clear();
  std::string kanji_char;

  while (begin < end) {
    size_t mblen = 0;
    const char32 wchar = Util::UTF8ToUCS4(begin, end, &mblen);
    kanji_char.assign(begin, mblen);

    std::string tmp;
    NumberUtil::KanjiNumberToArabicNumber(kanji_char, &tmp);

    uint64 n = 0;
    if (!NumberUtil::SafeStrToUInt64(tmp, &n)) {
      break;
    }

    if (wchar >= 0x0030 && wchar <= 0x0039) {  // '0' <= wchar <= '9'
      kanji_char.assign(kNumKanjiDigits[wchar - 0x0030], 3);
    } else if (wchar >= 0xFF10 && wchar <= 0xFF19) {  // '０' <= wchar <= '９'
      kanji_char.assign(kNumKanjiDigits[wchar - 0xFF10], 3);
    }
    kanji_output->append(kanji_char);
    numbers.push_back(n);
    begin += mblen;
  }
  if (begin < end) {
    if (!allow_suffix) {
      return false;
    }
    DCHECK(suffix);
    suffix->assign(begin, end);
  }

  if (numbers.empty()) {
    return false;
  }

  // Try interpreting the sequence of digits.
  uint64 n = 0;
  if (!NormalizeNumbersHelper(numbers, &n)) {
    return false;
  }

  if (!trim_leading_zeros) {
    // If |numbers| contains only k zeros, add (k - 1) zeros to the output.
    // Otherwise, add the same number of leading zeros.
    size_t num_zeros;
    for (num_zeros = 0; num_zeros < numbers.size(); ++num_zeros) {
      if (numbers[num_zeros] != 0) {
        break;
      }
    }
    if (num_zeros == numbers.size()) {
      --num_zeros;
    }
    arabic_output->append(num_zeros, kAsciiZero);
  }

  arabic_output->append(absl::StrFormat("%u", static_cast<uint64>(n)));
  return true;
}

}  // namespace

// Convert Kanji numbers into Arabic numbers:
// e.g. "百二十万" -> 1200000
bool NumberUtil::NormalizeNumbers(absl::string_view input,
                                  bool trim_leading_zeros,
                                  std::string *kanji_output,
                                  std::string *arabic_output) {
  return NormalizeNumbersInternal(input, trim_leading_zeros,
                                  false,  // allow_suffix
                                  kanji_output, arabic_output, nullptr);
}

bool NumberUtil::NormalizeNumbersWithSuffix(absl::string_view input,
                                            bool trim_leading_zeros,
                                            std::string *kanji_output,
                                            std::string *arabic_output,
                                            std::string *suffix) {
  return NormalizeNumbersInternal(input, trim_leading_zeros,
                                  true,  // allow_suffix
                                  kanji_output, arabic_output, suffix);
}

void NumberUtil::KanjiNumberToArabicNumber(absl::string_view input,
                                           std::string *output) {
  Util::ConvertUsingDoubleArray(
      japanese_util_rule::kanjinumber_to_arabicnumber_da,
      japanese_util_rule::kanjinumber_to_arabicnumber_table, input, output);
}

}  // namespace mozc