converter/immutable_converter.cc - mozc (d2e045e)

Tree @d2e045e (Download .tar.gz)

immutable_converter.cc @d2e045e — raw · history · blame

// Copyright 2010-2012, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "converter/immutable_converter.h"

#include <algorithm>
#include <climits>
#include <map>
#include <string>
#include <utility>
#include <vector>

#include "base/base.h"
#include "base/singleton.h"
#include "base/util.h"
#include "config/config.pb.h"
#include "config/config_handler.h"
#include "converter/connector_interface.h"
#include "converter/key_corrector.h"
#include "converter/lattice.h"
#include "converter/nbest_generator.h"
#include "converter/segmenter.h"
#include "converter/segmenter_interface.h"
#include "converter/segments.h"
#include "dictionary/dictionary_interface.h"
#include "dictionary/pos_matcher.h"
#include "dictionary/suffix_dictionary.h"

DECLARE_bool(disable_lattice_cache);
DEFINE_bool(disable_predictive_realtime_conversion,
            false,
            "disable predictive realtime conversion");

namespace mozc {
namespace {

const size_t kMaxSegmentsSize                   = 256;
const size_t kMaxCharLength                     = 1024;
const size_t kMaxCharLengthForReverseConversion = 600;  // 200 chars in UTF8
const int    kMaxCost                           = 32767;
const int    kMinCost                           = -32767;
const int    kDefaultNumberCost                 = 3000;

enum {
  CONNECTED,
  WEAK_CONNECTED,
  NOT_CONNECTED,
};

// TODO(taku): this is a tentative fix.
// including the header file both in rewriter and converter are redudant.
// we'd like to have a common module.
#include "rewriter/user_segment_history_rewriter_rule.h"

// return the POS group of the given candidate
uint16 GetPosGroup(uint16 lid) {
  return kLidGroup[lid];
}

void InsertCorrectedNodes(size_t pos, const string &key,
                          const KeyCorrector *key_corrector,
                          DictionaryInterface *dictionary,
                          Lattice *lattice) {
  if (key_corrector == NULL) {
    return;
  }
  size_t length = 0;
  const char *str = key_corrector->GetCorrectedPrefix(pos, &length);
  if (str == NULL || length == 0) {
    return;
  }

  Node *rnode = dictionary->LookupPrefix(str, length,
                                         lattice->node_allocator());
  Node *head = NULL;
  Node *prev = NULL;
  for (Node *node = rnode; node != NULL; node = node->bnext) {
    const size_t offset =
        key_corrector->GetOriginalOffset(pos, node->key.size());
    if (!KeyCorrector::IsValidPosition(offset) || offset == 0) {
      continue;
    }
    node->key = key.substr(pos, offset);
    node->wcost += KeyCorrector::GetCorrectedCostPenalty(node->key);
    if (head == NULL) {
      head = node;
    }
    if (prev != NULL) {
      prev->bnext = node;
    }
    prev = node;
  }

  if (prev != NULL) {
    prev->bnext = NULL;
  }

  if (head != NULL) {
    lattice->Insert(pos, head);
  }
}

void MakeGroup(const Segments *segments, vector<uint16> *group) {
  group->clear();
  for (size_t i = 0; i < segments->segments_size(); ++i) {
    for (size_t j = 0; j < segments->segment(i).key().size(); ++j) {
      group->push_back(static_cast<uint16>(i));
    }
  }
  group->push_back(static_cast<uint16>(segments->segments_size() - 1));
}

bool IsNumber(const char c) {
  return c >= '0' && c <= '9';
}

void DecomposeNumberAndSuffix(const string &input,
                              string *number, string *suffix) {
  const char *begin = input.data();
  const char *end = input.data() + input.size();
  size_t pos = 0;
  while (begin < end) {
    if (IsNumber(*begin)) {
      ++pos;
      ++begin;
      continue;
    }
    break;
  }
  *number = input.substr(0, pos);
  *suffix = input.substr(pos, input.size() - pos);
}

void DecomposePrefixAndNumber(const string &input,
                              string *prefix, string *number) {
  const char *begin = input.data();
  const char *end = input.data() + input.size() - 1;
  size_t pos = input.size();
  while (begin <= end) {
    if (IsNumber(*end)) {
      --pos;
      --end;
      continue;
    }
    break;
  }
  *prefix = input.substr(0, pos);
  *number = input.substr(pos, input.size() - pos);
}

void NormalizeHistorySegments(Segments *segments) {
  for (size_t i = 0; i < segments->history_segments_size(); ++i) {
    Segment *segment = segments->mutable_history_segment(i);
    if (segment == NULL || segment->candidates_size() == 0) {
      continue;
    }

    string key;
    Segment::Candidate *c = segment->mutable_candidate(0);
    const string value = c->value;
    const string content_value = c->content_value;
    const string content_key = c->content_key;
    Util::FullWidthAsciiToHalfWidthAscii(segment->key(), &key);
    Util::FullWidthAsciiToHalfWidthAscii(value, &c->value);
    Util::FullWidthAsciiToHalfWidthAscii(content_value, &c->content_value);
    Util::FullWidthAsciiToHalfWidthAscii(content_key, &c->content_key);
    c->key = key;
    segment->set_key(key);

    // Ad-hoc rewrite for Numbers
    // Since number candidate is generative, i.e, any number can be
    // written by users, we normalize the value here. normalzied number
    // is used for the ranking tweaking based on history
    if (key.size() > 1 &&
        key == c->value &&
        key == c->content_value &&
        key == c->key &&
        key == c->content_key &&
        Util::GetScriptType(key) == Util::NUMBER &&
        IsNumber(key[key.size() - 1])) {
      key = key.substr(key.size() - 1, 1);  // use the last digit only
      segment->set_key(key);
      c->value = key;
      c->content_value = key;
      c->content_key = key;
    }
  }
}

Lattice *GetLattice(Segments *segments, bool is_prediction) {
  Lattice *lattice = segments->mutable_cached_lattice();

  const size_t history_segments_size = segments->history_segments_size();
  string conversion_key = "";
  for (size_t i = history_segments_size; i < segments->segments_size(); ++i) {
    conversion_key.append(segments->segment(i).key());
  }

  if (!is_prediction
      || FLAGS_disable_lattice_cache
      || Util::CharsLen(conversion_key) <= 1) {
    // Do not cache if conversion is not prediction, or disable_lattice_cache
    // flag is used.
    // Addition, if a user input the key right after a finish of conversion,
    // reset the lattice to erase old nodes.
    lattice->Clear();
  }

  return lattice;
}
}   // anonymous namespace

// Constractor for keeping compatibility
// TODO(team): Change code to use another constractor and
// specify implementations.
ImmutableConverterImpl::ImmutableConverterImpl()
    : connector_(ConnectorFactory::GetConnector()),
      dictionary_(DictionaryFactory::GetDictionary()),
      segmenter_(Singleton<Segmenter>::get()),
      last_to_first_name_transition_cost_(0) {
  last_to_first_name_transition_cost_
      = connector_->GetTransitionCost(
          POSMatcher::GetLastNameId(), POSMatcher::GetFirstNameId());
}

ImmutableConverterImpl::ImmutableConverterImpl(
    const SegmenterInterface *segmenter)
    : connector_(ConnectorFactory::GetConnector()),
      dictionary_(DictionaryFactory::GetDictionary()),
      segmenter_(segmenter),
      last_to_first_name_transition_cost_(0) {
  last_to_first_name_transition_cost_
      = connector_->GetTransitionCost(
          POSMatcher::GetLastNameId(), POSMatcher::GetFirstNameId());
}

void ImmutableConverterImpl::ExpandCandidates(
    NBestGenerator *nbest, Segment *segment,
    Segments::RequestType request_type, size_t expand_size) const {
  DCHECK(nbest);
  DCHECK(segment);
  CHECK_GT(expand_size, 0);

  while (segment->candidates_size() < expand_size) {
    Segment::Candidate *candidate = segment->push_back_candidate();
    DCHECK(candidate);
    candidate->Init();

    // if NBestGenerator::Next() returns NULL,
    // no more entries are generated.
    if (!nbest->Next(candidate, request_type)) {
      segment->pop_back_candidate();
      break;
    }
  }
}

void ImmutableConverterImpl::PromoteUserDictionaryCandidate(
    Segment *segment) const {
  // User-dictionary candidates are not always placed at the top.
  // Since user expects that user-dictionary candidates may appear
  // on the top, we simply move user-dictoinary-candidate just
  // "after" the top candidate.
  if (segment->candidates_size() <= 2) {
    return;
  }
  if ((segment->candidate(0).attributes &
       Segment::Candidate::USER_DICTIONARY) ||
      (segment->candidate(1).attributes &
       Segment::Candidate::USER_DICTIONARY)) {
    return;
  }
  for (size_t i = 2; i < segment->candidates_size(); ++i) {
    if (segment->candidate(i).attributes &
        Segment::Candidate::USER_DICTIONARY) {
      segment->move_candidate(i, 1);   // after the top.
      return;
    }
  }
}

void ImmutableConverterImpl::InsertDummyCandidates(Segment *segment,
                                                   size_t expand_size) const {
  const Segment::Candidate *top_candidate =
      segment->candidates_size() == 0 ? NULL :
      segment->mutable_candidate(0);
  const Segment::Candidate *last_candidate =
      segment->candidates_size() == 0 ? NULL :
      segment->mutable_candidate(segment->candidates_size() - 1);

  // Insert a dummy candiate whose content_value is katakana.
  // If functional_key() is empty, no need to make a dummy candidate.
  if (segment->candidates_size() > 0 &&
      segment->candidates_size() < expand_size &&
      !segment->candidate(0).functional_key().empty() &&
      Util::GetScriptType(segment->candidate(0).content_key) ==
      Util::HIRAGANA) {
    // Use last_candidate as a refernce of cost.
    // Use top_candidate as a refarence of lid/rid and key/value.
    DCHECK(top_candidate);
    DCHECK(last_candidate);
    Segment::Candidate *new_candidate = segment->add_candidate();
    DCHECK(new_candidate);

    string katakana_value;
    Util::HiraganaToKatakana(segment->candidate(0).content_key,
                             &katakana_value);

    new_candidate->CopyFrom(*top_candidate);
    new_candidate->value = katakana_value + top_candidate->functional_value();
    new_candidate->content_value = katakana_value;
    new_candidate->cost = last_candidate->cost + 1;
    new_candidate->wcost = last_candidate->wcost + 1;
    new_candidate->structure_cost = last_candidate->structure_cost + 1;
    new_candidate->attributes = 0;
    last_candidate = new_candidate;
  }

  // Insert a dummy hiragana candidate.
  if (segment->candidates_size() == 0 ||
      (segment->candidates_size() < expand_size &&
       Util::GetScriptType(segment->key()) == Util::HIRAGANA)) {
    Segment::Candidate *new_candidate = segment->add_candidate();
    DCHECK(new_candidate);

    if (last_candidate != NULL) {
      new_candidate->CopyFrom(*last_candidate);
    } else {
      new_candidate->Init();
    }
    new_candidate->key = segment->key();
    new_candidate->value = segment->key();
    new_candidate->content_key = segment->key();
    new_candidate->content_value = segment->key();
    if (last_candidate != NULL) {
      new_candidate->cost = last_candidate->cost + 1;
      new_candidate->wcost = last_candidate->wcost + 1;
      new_candidate->structure_cost = last_candidate->structure_cost + 1;
    }
    new_candidate->attributes = 0;
    last_candidate = new_candidate;
    // One character hiragana/katakana will cause side effect.
    // Type "し" and choose "シ". After that, "しました" will become "シました".
    if (Util::CharsLen(new_candidate->key) <= 1) {
      new_candidate->attributes |= Segment::Candidate::CONTEXT_SENSITIVE;
    }
  }

  // Insert a dummy katakana candidate.
  string katakana_value;
  Util::HiraganaToKatakana(segment->key(), &katakana_value);
  if (segment->candidates_size() > 0 &&
      segment->candidates_size() < expand_size &&
      Util::GetScriptType(katakana_value) == Util::KATAKANA) {
    Segment::Candidate *new_candidate = segment->add_candidate();
    DCHECK(new_candidate);
    DCHECK(last_candidate);
    new_candidate->Init();
    new_candidate->key = segment->key();
    new_candidate->value = katakana_value;
    new_candidate->content_key = segment->key();
    new_candidate->content_value = katakana_value;
    new_candidate->cost = last_candidate->cost + 1;
    new_candidate->wcost = last_candidate->wcost + 1;
    new_candidate->structure_cost = last_candidate->structure_cost + 1;
    new_candidate->lid = last_candidate->lid;
    new_candidate->rid = last_candidate->rid;
    if (Util::CharsLen(new_candidate->key) <= 1) {
      new_candidate->attributes |= Segment::Candidate::CONTEXT_SENSITIVE;
    }
  }

  DCHECK_GT(segment->candidates_size(), 0);
}

void ImmutableConverterImpl::ApplyResegmentRules(
    size_t pos, Lattice *lattice) const {
  if (ResegmentArabicNumberAndSuffix(pos, lattice)) {
    VLOG(1) << "ResegmentArabicNumberAndSuffix returned true";
    return;
  }

  if (ResegmentPrefixAndArabicNumber(pos, lattice)) {
    VLOG(1) << "ResegmentArabicNumberAndSuffix returned true";
    return;
  }

  if (ResegmentPersonalName(pos, lattice)) {
    VLOG(1) << "ResegmentPersonalName returned true";
    return;
  }
}

// Currently, only arabic_number + suffix patterns are resegmented
// TODO(taku): consider kanji number into consideration
bool ImmutableConverterImpl::ResegmentArabicNumberAndSuffix(
    size_t pos, Lattice *lattice) const {
  const Node *bnode = lattice->begin_nodes(pos);
  if (bnode == NULL) {
    VLOG(1) << "bnode is NULL";
    return false;
  }

  bool modified = false;

  for (const Node *compound_node = bnode;
       compound_node != NULL; compound_node = compound_node->bnext) {
    if (!compound_node->value.empty() && !compound_node->key.empty() &&
        POSMatcher::IsNumber(compound_node->lid) &&
        !POSMatcher::IsNumber(compound_node->rid) &&
        IsNumber(compound_node->value[0]) && IsNumber(compound_node->key[0])) {
      string number_value, number_key;
      string suffix_value, suffix_key;
      DecomposeNumberAndSuffix(compound_node->value,
                               &number_value, &suffix_value);
      DecomposeNumberAndSuffix(compound_node->key,
                               &number_key, &suffix_key);

      if (suffix_value.empty() || suffix_key.empty()) {
        continue;
      }

      // not compatibile
      if (number_value != number_key) {
        LOG(WARNING) << "Incompatible key/value number pair";
        continue;
      }

      // do -1 so that resegmented nodes are boosted
      // over compound node.
      const int32 wcost = max(compound_node->wcost / 2 - 1, 0);

      Node *number_node = lattice->NewNode();
      CHECK(number_node);
      number_node->key = number_key;
      number_node->value = number_value;
      number_node->lid = compound_node->lid;
      number_node->rid = 0;   // 0 to 0 transition cost is 0
      number_node->wcost = wcost;
      number_node->node_type = Node::NOR_NODE;
      number_node->bnext = NULL;

      // insert number into the lattice
      lattice->Insert(pos, number_node);

      Node *suffix_node = lattice->NewNode();
      CHECK(suffix_node);
      suffix_node->key = suffix_key;
      suffix_node->value = suffix_value;
      suffix_node->lid = 0;
      suffix_node->rid = compound_node->rid;
      suffix_node->wcost = wcost;
      suffix_node->node_type = Node::NOR_NODE;
      suffix_node->bnext = NULL;

      suffix_node->constrained_prev = number_node;

      // insert suffix into the lattice
      lattice->Insert(pos + number_node->key.size(), suffix_node);
      VLOG(1) << "Resegmented: " << compound_node->value << " "
              << number_node->value << " " << suffix_node->value;

      modified = true;
    }
  }

  return modified;
}

bool ImmutableConverterImpl::ResegmentPrefixAndArabicNumber(
    size_t pos, Lattice *lattice) const {
  const Node *bnode = lattice->begin_nodes(pos);
  if (bnode == NULL) {
    VLOG(1) << "bnode is NULL";
    return false;
  }

  bool modified = false;

  for (const Node *compound_node = bnode;
       compound_node != NULL; compound_node = compound_node->bnext) {
    // Unlike ResegmentArabicNumberAndSuffix, we don't
    // check POS as words ending with Arabic numbers are pretty rare.
    if (compound_node->value.size() > 1 && compound_node->key.size() > 1 &&
        !IsNumber(compound_node->value[0]) &&
        !IsNumber(compound_node->key[0]) &&
        IsNumber(compound_node->value[compound_node->value.size() - 1]) &&
        IsNumber(compound_node->key[compound_node->key.size() - 1])) {
      string number_value, number_key;
      string prefix_value, prefix_key;
      DecomposePrefixAndNumber(compound_node->value,
                               &prefix_value, &number_value);
      DecomposePrefixAndNumber(compound_node->key,
                               &prefix_key, &number_key);

      if (prefix_value.empty() || prefix_key.empty()) {
        continue;
      }

      // not compatibile
      if (number_value != number_key) {
        LOG(WARNING) << "Incompatible key/value number pair";
        continue;
      }

      // do -1 so that resegmented nodes are boosted
      // over compound node.
      const int32 wcost = max(compound_node->wcost / 2 - 1, 0);

      Node *prefix_node = lattice->NewNode();
      CHECK(prefix_node);
      prefix_node->key = prefix_key;
      prefix_node->value = prefix_value;
      prefix_node->lid = compound_node->lid;
      prefix_node->rid = 0;   // 0 to 0 transition cost is 0
      prefix_node->wcost = wcost;
      prefix_node->node_type = Node::NOR_NODE;
      prefix_node->bnext = NULL;

      // insert number into the lattice
      lattice->Insert(pos, prefix_node);

      Node *number_node = lattice->NewNode();
      CHECK(number_node);
      number_node->key = number_key;
      number_node->value = number_value;
      number_node->lid = 0;
      number_node->rid = compound_node->rid;
      number_node->wcost = wcost;
      number_node->node_type = Node::NOR_NODE;
      number_node->bnext = NULL;

      number_node->constrained_prev = prefix_node;

      // insert number into the lattice
      lattice->Insert(pos + prefix_node->key.size(), number_node);
      VLOG(1) << "Resegmented: " << compound_node->value << " "
              << prefix_node->value << " " << number_node->value;

      modified = true;
    }
  }

  return modified;
}

bool ImmutableConverterImpl::ResegmentPersonalName(
    size_t pos, Lattice *lattice) const {
  const Node *bnode = lattice->begin_nodes(pos);
  if (bnode == NULL) {
    VLOG(1) << "bnode is NULL";
    return false;
  }

  bool modified = false;

  // find a combination of last_name and first_name, e.g. "田中麗奈".
  for (const Node *compound_node = bnode;
       compound_node != NULL; compound_node = compound_node->bnext) {
    // left word is last name and right word is first name
    if (compound_node->lid != POSMatcher::GetLastNameId() ||
        compound_node->rid != POSMatcher::GetFirstNameId()) {
      continue;
    }

    const size_t len = Util::CharsLen(compound_node->value);

    // Don't resegment one-word last_name/first_name like 林健,
    // as it would deliver side effect.
    if (len <= 2) {
      continue;
    }

    // Don't resegment if the value is katakana
    if (Util::GetScriptType(compound_node->value) == Util::KATAKANA) {
      continue;
    }

    // Do constrained Viterbi search inside the compound "田中麗奈".
    // Constraints:
    // 1. Concats of last_name and first_name should be "田中麗奈"
    // 2. consisting of two words (last_name and first_name)
    // 3. Segment-boundary exist between the two words.
    // 4.a Either (a) POS of lnode is last_name or (b) POS of rnode is fist_name
    //     (len >= 4)
    // 4.b Both (a) POS of lnode is last_name and (b) POS of rnode is fist_name
    //     (len == 3)
    const Node *best_last_name_node = NULL;
    const Node *best_first_name_node = NULL;
    int best_cost = 0x7FFFFFFF;
    for (const Node *lnode = bnode; lnode != NULL; lnode = lnode->bnext) {
      // lnode(last_name) is a prefix of compound, Constraint 1.
      if (compound_node->value.size() > lnode->value.size() &&
          compound_node->key.size() > lnode->key.size() &&
          Util::StartsWith(compound_node->value, lnode->value)) {
        // rnode(first_name) is a suffix of compound, Constraint 1.
        for (const Node *rnode = lattice->begin_nodes(pos + lnode->key.size());
             rnode != NULL; rnode = rnode->bnext) {
          if ((lnode->value.size() + rnode->value.size())
              == compound_node->value.size() &&
              (lnode->value + rnode->value) == compound_node->value &&
              segmenter_->IsBoundary(lnode, rnode, false)) {   // Constraint 3.
            const int32 cost = lnode->wcost + GetCost(lnode, rnode);
            if (cost < best_cost) {   // choose the smallest ones
              best_last_name_node = lnode;
              best_first_name_node = rnode;
              best_cost = cost;
            }
          }
        }
      }
    }

    // No valid first/last names are found
    if (best_first_name_node == NULL || best_last_name_node == NULL) {
      continue;
    }

    // Constraint 4.a
    if (len >= 4 &&
        (best_last_name_node->lid != POSMatcher::GetLastNameId() &&
         best_first_name_node->rid != POSMatcher::GetFirstNameId())) {
      continue;
    }

    // Constraint 4.b
    if (len == 3 &&
        (best_last_name_node->lid != POSMatcher::GetLastNameId() ||
         best_first_name_node->rid != POSMatcher::GetFirstNameId())) {
      continue;
    }

    // Insert LastName and FirstName as independent nodes.
    // Duplications will be removed in nbest enumerations.
    // word costs are calculated from compound node by assuming that
    // transition cost is 0.
    //
    // last_name_cost + transition_cost + first_name_cost == compound_cost
    // last_name_cost == first_name_cost
    // i.e,
    // last_name_cost = first_name_cost =
    // (compound_cost - transition_cost) / 2;

    const int32 wcost = (compound_node->wcost -
                         last_to_first_name_transition_cost_) / 2;

    Node *last_name_node = lattice->NewNode();
    CHECK(last_name_node);
    last_name_node->key = best_last_name_node->key;
    last_name_node->value = best_last_name_node->value;
    last_name_node->lid = compound_node->lid;
    last_name_node->rid = POSMatcher::GetLastNameId();
    last_name_node->wcost = wcost;
    last_name_node->node_type = Node::NOR_NODE;
    last_name_node->bnext = NULL;

    // insert last_name into the lattice
    lattice->Insert(pos, last_name_node);

    Node *first_name_node = lattice->NewNode();
    CHECK(first_name_node);
    first_name_node->key = best_first_name_node->key;
    first_name_node->value = best_first_name_node->value;
    first_name_node->lid = POSMatcher::GetFirstNameId();
    first_name_node->rid = compound_node->rid;
    first_name_node->wcost = wcost;
    first_name_node->node_type = Node::NOR_NODE;
    first_name_node->bnext = NULL;

    first_name_node->constrained_prev = last_name_node;

    // insert first_name into the lattice
    lattice->Insert(pos + last_name_node->key.size(), first_name_node);

    VLOG(2) << "Resegmented: " << compound_node->value << " "
            << last_name_node->value << " " << first_name_node->value;

    modified = true;
  }

  return modified;
}

Node *ImmutableConverterImpl::Lookup(const int begin_pos,
                                     const int end_pos,
                                     bool is_reverse,
                                     bool is_prediction,
                                     Lattice *lattice) const {
  CHECK_LE(begin_pos, end_pos);
  const char *begin = lattice->key().data() + begin_pos;
  const char *end = lattice->key().data() + end_pos;
  const size_t len = end_pos - begin_pos;

  lattice->node_allocator()->set_max_nodes_size(8192);
  Node *result_node = NULL;
  if (is_reverse) {
    result_node = dictionary_->LookupReverse(
        begin, len, lattice->node_allocator());
  } else {
    if (is_prediction && !FLAGS_disable_lattice_cache) {
      DictionaryInterface::Limit limit;
      limit.key_len_lower_limit = lattice->cache_info(begin_pos) + 1;

      result_node =
          DictionaryFactory::GetDictionary()->LookupPrefixWithLimit(
          begin, len, limit, lattice->node_allocator());

      // add ENABLE_CACHE attribute and set raw_wcost
      for (Node *node = result_node; node != NULL; node = node->bnext) {
        node->attributes |= Node::ENABLE_CACHE;
        node->raw_wcost = node->wcost;
      }

      lattice->SetCacheInfo(begin_pos, len);
    } else {
      // when cache feature is not used, look up normally
      result_node =
          DictionaryFactory::GetDictionary()->LookupPrefix(
          begin, len, lattice->node_allocator());
    }
  }
  return AddCharacterTypeBasedNodes(begin, end, lattice, result_node);
}

Node *ImmutableConverterImpl::AddCharacterTypeBasedNodes(
    const char *begin, const char *end, Lattice *lattice, Node *nodes) const {

  size_t mblen = 0;
  const char32 ucs4 = Util::UTF8ToUCS4(begin, end, &mblen);

  const Util::ScriptType first_script_type = Util::GetScriptType(ucs4);
  const Util::FormType first_form_type = Util::GetFormType(ucs4);

  // Add 1 character node. It can be either UnknownId or NumberId.
  {
    Node *new_node = lattice->NewNode();
    CHECK(new_node);
    if (first_script_type == Util::NUMBER) {
      new_node->lid = POSMatcher::GetNumberId();
      new_node->rid = POSMatcher::GetNumberId();
    } else {
      new_node->lid = POSMatcher::GetUnknownId();
      new_node->rid = POSMatcher::GetUnknownId();
    }

    new_node->wcost = kMaxCost;
    new_node->value.assign(begin, mblen);
    new_node->key.assign(begin, mblen);
    new_node->node_type = Node::NOR_NODE;
    new_node->bnext = nodes;
    nodes = new_node;
  }  // scope out |new_node|

  if (first_script_type == Util::NUMBER) {
    nodes->wcost = kDefaultNumberCost;
    return nodes;
  }

  if (first_script_type != Util::ALPHABET &&
      first_script_type != Util::KATAKANA) {
    return nodes;
  }

  // group by same char type
  int num_char = 1;
  const char *p = begin + mblen;
  while (p < end) {
    const char32 next_ucs4 = Util::UTF8ToUCS4(p, end, &mblen);
    if (first_script_type != Util::GetScriptType(next_ucs4) ||
        first_form_type != Util::GetFormType(next_ucs4)) {
      break;
    }
    p += mblen;
    ++num_char;
  }

  if (num_char > 1) {
    mblen = static_cast<uint32>(p - begin);
    Node *new_node = lattice->NewNode();
    CHECK(new_node);
    if (first_script_type == Util::NUMBER) {
      new_node->lid = POSMatcher::GetNumberId();
      new_node->rid = POSMatcher::GetNumberId();
    } else {
      new_node->lid = POSMatcher::GetUnknownId();
      new_node->rid = POSMatcher::GetUnknownId();
    }
    new_node->wcost = kMaxCost / 2;
    new_node->value.assign(begin, mblen);
    new_node->key.assign(begin, mblen);
    new_node->node_type = Node::NOR_NODE;
    new_node->bnext = nodes;
    nodes = new_node;
  }

  return nodes;
}

bool ImmutableConverterImpl::Viterbi(Segments *segments,
                                     const Lattice &lattice,
                                     const vector<uint16> &group) const {
  DCHECK(segments);

  // log prob of 1/1000
  const int kWeakConnectedPeanlty = 3453;

  // Reasonably big cost. Cannot use INT_MAX a new cost will be calculated
  // based on kVeryBigCost.
  const int kVeryBigCost = (INT_MAX >> 2);

  const string &key = lattice.key();

  for (size_t pos = 0; pos <= key.size(); ++pos) {
    for (Node *rnode = lattice.begin_nodes(pos);
         rnode != NULL; rnode = rnode->bnext) {
      int best_cost = INT_MAX;
      Node *best_node = NULL;
      for (Node *lnode = lattice.end_nodes(pos);
           lnode != NULL; lnode = lnode->enext) {
        int cost = 0;
        switch (GetConnectionType(lnode, rnode, group, segments)) {
          case CONNECTED:
            cost = lnode->cost + GetCost(lnode, rnode);
            break;
          case WEAK_CONNECTED:
            // word boundary with WEAK_CONNECTED is created as follows
            // - [ABCD] becomes one segment with converter,
            //   where A, B, C and D a word
            // - User changed the boundary into ABC|D
            // - The boundary between C and D is WEAK_CONNECTED.
            // Here, we simply demote the transition probability of
            // WEAK_CONNECTED.
            // Issue is how strongly we should demote?
            // - If converter strongly obeys user-preference and demotes
            //      the probability aggressively, the word D will disappear,
            //      since C->D transition gets rarer.
            // - If converter ignores user-preference, it's also annoying, as
            //    the result will be unchanged even after changing the boundary.
            cost = lnode->cost + GetCost(lnode, rnode) + kWeakConnectedPeanlty;
            rnode->attributes |= Node::WEAK_CONNECTED;
            break;
          case NOT_CONNECTED:
            cost = kVeryBigCost;
            break;
          default:
            break;
        }

        if (cost < best_cost) {
          best_node = lnode;
          best_cost = cost;
        }
      }
      rnode->prev = best_node;
      rnode->cost = best_cost;
    }
  }

  Node *node = lattice.eos_nodes();
  CHECK(node->bnext == NULL);
  Node *prev = NULL;
  while (node->prev != NULL) {
    prev = node->prev;
    prev->next = node;
    node = prev;
  }

  if (lattice.bos_nodes() != prev) {
    LOG(WARNING) << "cannot make lattice";
    return false;
  }

  return true;
}

int ImmutableConverterImpl::GetConnectionType(const Node *lnode,
                                              const Node *rnode,
                                              const vector<uint16> &group,
                                              const Segments *segments) const {
  if ((rnode->node_type != Node::EOS_NODE &&
       group[rnode->begin_pos] != group[rnode->end_pos - 1]) ||
      (lnode->node_type != Node::BOS_NODE &&
       group[lnode->begin_pos] != group[lnode->end_pos - 1])) {
    return NOT_CONNECTED;
  }

  // BOS/EOS nodes
  if (lnode->node_type == Node::BOS_NODE ||
      rnode->node_type == Node::EOS_NODE ||
      lnode->node_type == Node::HIS_NODE ||
      rnode->node_type == Node::HIS_NODE) {
    return CONNECTED;
  }

  // lnode and rnode are both FREE Node
  const Segment::SegmentType ltype =
      segments->segment(group[lnode->begin_pos]).segment_type();
  const Segment::SegmentType rtype =
      segments->segment(group[rnode->begin_pos]).segment_type();
  if (ltype == Segment::FREE && rtype == Segment::FREE) {
    return CONNECTED;
  }

  const bool is_prediction =
      (segments->request_type() == Segments::PREDICTION ||
       segments->request_type() == Segments::SUGGESTION ||
       segments->request_type() == Segments::PARTIAL_PREDICTION ||
       segments->request_type() == Segments::PARTIAL_SUGGESTION);

  const bool is_rule_boundary = segmenter_->IsBoundary(lnode, rnode,
                                                       is_prediction);
  const bool is_constraint_boundary =
      group[lnode->begin_pos] != group[rnode->begin_pos];

  if (is_constraint_boundary && !is_rule_boundary) {
    return WEAK_CONNECTED;
  }

  return CONNECTED;
}

// faster Viterbi algorithm for prediction
//
// Run simple Viterbi algorithm with contracting the same lid and rid.
// Because the original Viterbi has speciall nodes, we should take it
// consideration.
// 1. CONNECTED nodes: are normal nodes.
// 2. WEAK_CONNECTED nodes: don't occur in prediction, so we do not have to
//    consider about them.
// 3. NOT_CONNECTED nodes: occur when they are between history nodes and
//    normal nodes.
// For NOT_CONNECTED nodes, we should run Viterbi for history nodes first,
// and do it for normal nodes second. The function "PredictionViterbiSub"
// runs Viterbi for positions between calc_begin_pos and calc_end_pos,
// inclusive.
//
// We cannot apply this function in suggestion because in suggestion there are
// WEAK_CONNECTED nodes and this function is not designed for them.
bool ImmutableConverterImpl::PredictionViterbi(Segments *segments,
                                               const Lattice &lattice) const {
  const size_t &key_length = lattice.key().size();
  const size_t history_segments_size = segments->history_segments_size();
  size_t history_length = 0;
  for (size_t i = 0; i < history_segments_size; ++i) {
    history_length += segments->segment(i).key().size();
  }
  PredictionViterbiSub(segments, lattice, 0, history_length);
  PredictionViterbiSub(segments, lattice, history_length, key_length);

  Node *node = lattice.eos_nodes();
  CHECK(node->bnext == NULL);
  Node *prev = NULL;
  while (node->prev != NULL) {
    prev = node->prev;
    prev->next = node;
    node = prev;
  }

  if (lattice.bos_nodes() != prev) {
    LOG(WARNING) << "cannot make lattice";
    return false;
  }

  return true;
}

void ImmutableConverterImpl::PredictionViterbiSub(Segments *segments,
                                                  const Lattice &lattice,
                                                  int calc_begin_pos,
                                                  int calc_end_pos) const {
  CHECK_LE(calc_begin_pos, calc_end_pos);
  for (size_t pos = calc_begin_pos; pos <= calc_end_pos; ++pos) {
    // Mapping from lnode's rid to (cost, Node) of best way/cost
    map<int, pair<int, Node*> > lbest;

    for (Node *lnode = lattice.end_nodes(pos);
         lnode != NULL; lnode = lnode->enext) {
      const int rid = lnode->rid;
      map<int, pair<int, Node*> >::iterator it_best = lbest.find(rid);
      if (it_best == lbest.end()) {
        lbest[rid] = make_pair<int, Node*>(INT_MAX, NULL);
        it_best = lbest.find(rid);
      }
      if (lnode->cost < it_best->second.first) {
        it_best->second.first = lnode->cost;
        it_best->second.second = lnode;
      }
    }

    map<int, int> rbest_cost;
    for (Node *rnode = lattice.begin_nodes(pos);
         rnode != NULL; rnode = rnode->bnext) {
      if (rnode->end_pos > calc_end_pos) {
        continue;
      }
      rbest_cost[rnode->lid] = INT_MAX;
    }

    map<int, Node*> rbest_prev_node;
    for (map<int, pair<int, Node*> >::iterator lt = lbest.begin();
         lt != lbest.end(); ++lt) {
      for (map<int, int>::iterator rt = rbest_cost.begin();
           rt != rbest_cost.end(); ++rt) {
        const int cost = lt->second.first +
            connector_->GetTransitionCost(lt->first, rt->first);
        if (cost < rt->second) {
          rbest_prev_node[rt->first] = lt->second.second;
          rt->second = cost;
        }
      }
    }

    for (Node *rnode = lattice.begin_nodes(pos);
         rnode != NULL; rnode = rnode->bnext) {
      if (rnode->end_pos > calc_end_pos) {
        continue;
      }
      const int lid = rnode->lid;
      if (rbest_prev_node[lid] == NULL) {
        continue;
      }
      rnode->prev = rbest_prev_node[lid];
      rnode->cost = rbest_cost[lid] + rnode->wcost;
    }
  }
}

// Add predictive nodes from conversion key.
void ImmutableConverterImpl::MakeLatticeNodesForPredictiveNodes(
    Lattice *lattice, Segments *segments) const {
  const string &key = lattice->key();
  string conversion_key;
  for (size_t i = 0; i < segments->conversion_segments_size(); ++i) {
    conversion_key += segments->conversion_segment(i).key();
  }
  DCHECK_NE(string::npos, key.find(conversion_key));
  vector<string> conversion_key_chars;
  Util::SplitStringToUtf8Chars(conversion_key, &conversion_key_chars);

  // do nothing if the conversion key is short
  const size_t kKeyMinLength = 7;
  if (conversion_key_chars.size() < kKeyMinLength) {
    return;
  }

  // predictive search from suffix dictionary
  // (search words with between 1 and 6 characters)
  {
    const size_t kMaxSuffixLookupKey = 6;
    const size_t max_sufffix_len =
        min(kMaxSuffixLookupKey, conversion_key_chars.size());
    size_t pos = key.size();
    for (size_t suffix_len = 1; suffix_len <= max_sufffix_len; ++suffix_len) {
      pos -= conversion_key_chars[
          conversion_key_chars.size() - suffix_len].size();
      DCHECK_GE(key.size(), pos);

      const size_t str_len = key.size() - pos;
      Node *result_node =
          SuffixDictionaryFactory::GetSuffixDictionary()->LookupPredictive(
              key.data() + pos, str_len,
              lattice->node_allocator());
      AddPredictiveNodes(suffix_len, pos, lattice, result_node);
    }
  }

  // predictive search from system dictionary
  // (search words with between 5 and 8 characters)
  {
    const size_t kMinSystemLookupKey = 5;
    const size_t kMaxSystemLookupKey = 8;
    const size_t max_suffix_len =
        min(kMaxSystemLookupKey, conversion_key_chars.size());
    size_t pos = key.size();
    for (size_t suffix_len = 1; suffix_len <= max_suffix_len; ++suffix_len) {
      pos -= conversion_key_chars[
          conversion_key_chars.size() - suffix_len].size();
      DCHECK_GE(key.size(), pos);

      if (suffix_len < kMinSystemLookupKey) {
        // Just update |pos|.
        continue;
      }

      const size_t str_len = key.size() - pos;
      Node *result_node =
          DictionaryFactory::GetDictionary()->LookupPredictive(
              key.data() + pos, str_len,
              lattice->node_allocator());
      AddPredictiveNodes(suffix_len, pos, lattice, result_node);
    }
  }
}

void ImmutableConverterImpl::AddPredictiveNodes(const size_t &len,
                                                const size_t &pos,
                                                Lattice *lattice,
                                                Node *result_node) const {
  if (result_node == NULL) {
    return;
  }

  // add penalty cost to each nodes
  for (Node* rnode = result_node; rnode != NULL; rnode = rnode->bnext) {
    // add penalty to predictive nodes
    const int kPredictiveNodeDefaultPenalty = 900;  // =~ -500 * log(1/6)
    int additional_cost = kPredictiveNodeDefaultPenalty;

    // bonus for suffix word
    if (POSMatcher::IsSuffixWord(rnode->rid)
        && POSMatcher::IsSuffixWord(rnode->lid)) {
      const int kSuffixWordBonus = 700;
      additional_cost -= kSuffixWordBonus;
    }

    // penalty for unique noun word
    if (POSMatcher::IsUniqueNoun(rnode->rid)
        || POSMatcher::IsUniqueNoun(rnode->lid)) {
      const int kUniqueNounPenalty = 500;
      additional_cost += kUniqueNounPenalty;
    }

    // penalty for number
    if (POSMatcher::IsNumber(rnode->rid)
        || POSMatcher::IsNumber(rnode->lid)) {
      const int kNumberPenalty = 4000;
      additional_cost += kNumberPenalty;
    }

    rnode->wcost += additional_cost;
  }

  // insert nodes
  lattice->Insert(pos, result_node);
}

bool ImmutableConverterImpl::MakeLattice(Lattice *lattice,
                                         Segments *segments) const {
  if (segments == NULL) {
    LOG(ERROR) << "Segments is NULL";
    return false;
  }

  if (lattice == NULL) {
    LOG(ERROR) << "Lattice is NULL";
    return false;
  }

  if (segments->segments_size() >= kMaxSegmentsSize) {
    LOG(WARNING) << "too many segments";
    return false;
  }

  NormalizeHistorySegments(segments);

  const bool is_reverse =
      (segments->request_type() == Segments::REVERSE_CONVERSION);

  const bool is_prediction =
      (segments->request_type() == Segments::SUGGESTION ||
       segments->request_type() == Segments::PREDICTION);

  // In suggestion mode, ImmutableConverter will not accept multiple-segments.
  // The result always consists of one segment.
  if ((is_reverse || is_prediction) &&
      (segments->conversion_segments_size() != 1 ||
       segments->conversion_segment(0).segment_type() != Segment::FREE)) {
    LOG(WARNING) << "ImmutableConverter doesn't support constrained requests";
    return false;
  }

  string history_key, conversion_key;
  const size_t history_segments_size = segments->history_segments_size();
  for (size_t i = 0; i < history_segments_size; ++i) {
    DCHECK(!segments->segment(i).key().empty());
    history_key.append(segments->segment(i).key());
  }

  for (size_t i = history_segments_size; i < segments->segments_size(); ++i) {
    DCHECK(!segments->segment(i).key().empty());
    conversion_key.append(segments->segment(i).key());
  }

  const size_t max_char_len =
      is_reverse ? kMaxCharLengthForReverseConversion : kMaxCharLength;
  if (history_key.size() + conversion_key.size() >= max_char_len) {
    LOG(WARNING) << "too long input chars";
    return false;
  }

  if (conversion_key.empty()) {
    LOG(WARNING) << "key is empty";
    return false;
  }

  const string key = history_key + conversion_key;
  lattice->UpdateKey(key);
  lattice->ResetNodeCost();

  if (is_reverse) {
    // Reverse lookup for each prefix string in key is slow with current
    // implementation, so run it for them at once and cache the result.
    dictionary_->PopulateReverseLookupCache(
        key.c_str(), key.size(), lattice->node_allocator());
  }

  bool is_valid_lattice = true;
  // Perform the main part of lattice construction.
  if (!MakeLatticeNodesForHistorySegments(lattice, segments) ||
      lattice->end_nodes(history_key.size()) == NULL) {
    is_valid_lattice = false;
  }

  // Can not apply key corrector to invalid lattice.
  if (is_valid_lattice) {
    MakeLatticeNodesForConversionSegments(history_key, segments, lattice);
  }

  if (is_reverse) {
    // No reverse look up will happen afterwards.
    dictionary_->ClearReverseLookupCache(
        lattice->node_allocator());
  }

  // Predictive real time conversion
  if (is_prediction && !FLAGS_disable_predictive_realtime_conversion) {
    MakeLatticeNodesForPredictiveNodes(lattice, segments);
  }

  if (!is_valid_lattice) {
    // Safely bail out, since reverse look up cache was released already.
    return false;
  }

  if (lattice->end_nodes(key.size()) == NULL) {
    LOG(WARNING) << "cannot build lattice from input";
    return false;
  }

  ApplyPrefixSuffixPenalty(conversion_key, lattice);

  // Re-segment personal-names, numbers ...etc
  const bool is_conversion =
      (segments->request_type() == Segments::CONVERSION);
  if (is_conversion) {
    Resegment(history_key, conversion_key, segments, lattice);
  }

  return true;
}

bool ImmutableConverterImpl::MakeLatticeNodesForHistorySegments(
    Lattice *lattice, Segments *segments) const {
  const bool is_reverse =
     (segments->request_type() == Segments::REVERSE_CONVERSION);
  const size_t history_segments_size = segments->history_segments_size();
  const string &key = lattice->key();

  size_t segments_pos = 0;
  uint16 last_rid = 0;

  for (size_t s = 0; s < history_segments_size; ++s) {
    const Segment &segment = segments->segment(s);
    if (segment.segment_type() != Segment::HISTORY &&
        segment.segment_type() != Segment::SUBMITTED) {
      LOG(WARNING) << "inconsistent history";
      return false;
    }
    if (segment.key().empty()) {
      LOG(WARNING) << "invalid history: key is empty";
      return false;
    }
    const Segment::Candidate &candidate = segment.candidate(0);

    // Add a virtual nodes corresponding to HISTORY segments.
    Node *rnode = lattice->NewNode();
    CHECK(rnode);
    rnode->lid = candidate.lid;
    rnode->rid = candidate.rid;
    rnode->wcost = 0;
    rnode->value = candidate.value;
    rnode->key = segment.key();
    rnode->node_type = Node::HIS_NODE;
    rnode->bnext = NULL;
    lattice->Insert(segments_pos, rnode);

    // For the last history segment,  we also insert a new node having
    // EOS part-of-speech. Viterbi algorithm will find the
    // best path from rnode(context) and rnode2(EOS).
    if (s + 1 == history_segments_size && candidate.rid != 0) {
      Node *rnode2 = lattice->NewNode();
      CHECK(rnode2);
      rnode2->lid = candidate.lid;
      rnode2->rid = 0;   // 0 is BOS/EOS
      rnode2->wcost = 1500;  // =~ -500 * log(1/20)
      rnode2->value = candidate.value;
      rnode2->key = segment.key();
      rnode2->node_type = Node::HIS_NODE;
      rnode2->bnext = NULL;
      lattice->Insert(segments_pos, rnode2);
    }

    // Dictionary lookup for the candidates which are
    // overlapping between history and conversion.
    // Check only the last history segment at this moment.
    //
    // Example: history "おいかわ(及川)", conversion: "たくや"
    // Here, try to find "おいかわたくや(及川卓也)" from dictionary
    // and insert "卓也" as a new word node with a modified cost
    if (s + 1 == history_segments_size) {
      const bool is_prediction =
          (segments->request_type() == Segments::SUGGESTION ||
           segments->request_type() == Segments::PREDICTION);
      const Node *node = Lookup(segments_pos, key.size(),
                                is_reverse,
                                is_prediction,
                                lattice);
      for (const Node *compound_node = node; compound_node != NULL;
           compound_node = compound_node->bnext) {
        // No overlapps
        if (compound_node->key.size() <= rnode->key.size() ||
            compound_node->value.size() <= rnode->value.size() ||
            !Util::StartsWith(compound_node->key, rnode->key) ||
            !Util::StartsWith(compound_node->value, rnode->value)) {
          // not a prefix
          continue;
        }

        // Must be in the same POS group.
        // http://b/issue?id=2977618
        if (GetPosGroup(candidate.lid) != GetPosGroup(compound_node->lid)) {
          continue;
        }

        // make new virtual node
        Node *new_node = lattice->NewNode();
        CHECK(new_node);

        // get the suffix part ("たくや/卓也")
        new_node->key =
            compound_node->key.substr(rnode->key.size(),
                                      compound_node->key.size() -
                                      rnode->key.size());

        new_node->value =
            compound_node->value.substr(rnode->value.size(),
                                        compound_node->value.size() -
                                        rnode->value.size());

        // rid/lid are derived from the compound.
        // lid is just an approximation
        new_node->rid = compound_node->rid;
        new_node->lid = compound_node->lid;
        new_node->bnext = NULL;
        new_node->node_type = Node::NOR_NODE;

        // New cost recalcuration:
        //
        // trans(last_rid, candidate.lid) + candidate.wcost
        // + trans(candidate.rid, new_node->lid)
        // + new_node->wcost ==
        // trans(last_rid, compound_node->lid) + compound_node->wcost
        //
        // i.e.
        // new_node->wcost =
        // trans(last_rid, compound_node->lid) + compound_node->wcost
        //   - trans(last_rid, candidate.lid) - candidate.wcost -
        // trans(candidate.rid, new_node->lid)
        new_node->wcost =
            connector_->GetTransitionCost(last_rid, compound_node->lid)
            + compound_node->wcost
            - connector_->GetTransitionCost(last_rid, candidate.lid)
            - candidate.wcost
            - connector_->GetTransitionCost(candidate.rid, new_node->lid);

        VLOG(2) << " compound_node->lid=" << compound_node->lid
                << " compound_node->rid=" << compound_node->rid
                << " compound_node->wcost=" << compound_node->wcost;
        VLOG(2) << " last_rid=" << last_rid
                << " candidate.lid=" << candidate.lid
                << " candidate.rid=" << candidate.rid
                << " candidate.cost=" << candidate.cost
                << " candidate.wcost=" << candidate.wcost;
        VLOG(2) << " new_node->wcost=" << new_node->wcost;

        new_node->constrained_prev = rnode;

        // Added as new node
        lattice->Insert(segments_pos + rnode->key.size(), new_node);

        VLOG(2) << "Added: " << new_node->key << " " << new_node->value;
      }
    }

    // update segment pos
    segments_pos += rnode->key.size();
    last_rid = rnode->rid;
  }
  return true;
}

void ImmutableConverterImpl::MakeLatticeNodesForConversionSegments(
    const string &history_key,
    Segments *segments, Lattice *lattice) const {
  const string &key = lattice->key();
  const bool is_conversion =
      (segments->request_type() == Segments::CONVERSION);
  // Do not use KeyCorrector if user changes the boundary.
  // http://b/issue?id=2804996
  scoped_ptr<KeyCorrector> key_corrector;
  if (is_conversion && !segments->resized()) {
    KeyCorrector::InputMode mode = KeyCorrector::ROMAN;
    if (GET_CONFIG(preedit_method) != config::Config::ROMAN) {
      mode = KeyCorrector::KANA;
    }
    key_corrector.reset(new KeyCorrector(key, mode, history_key.size()));
  }

  const bool is_reverse =
      (segments->request_type() == Segments::REVERSE_CONVERSION);
  const bool is_prediction =
      (segments->request_type() == Segments::SUGGESTION ||
       segments->request_type() == Segments::PREDICTION);
  for (size_t pos = history_key.size(); pos < key.size(); ++pos) {
    if (lattice->end_nodes(pos) != NULL) {
      Node *rnode = Lookup(pos, key.size(), is_reverse, is_prediction, lattice);
      // If history key is NOT empty and user input seems to starts with
      // a particle ("はにで..."), mark the node as STARTS_WITH_PARTICLE.
      // We change the segment boundary if STARTS_WITH_PARTICLE attribute
      // is assigned.
      if (!history_key.empty() && pos == history_key.size()) {
        for (Node *node = rnode; node != NULL; node = node->bnext) {
          if (POSMatcher::IsAcceptableParticleAtBeginOfSegment(node->lid) &&
              node->lid == node->rid) {  // not a compound.
            node->attributes |= Node::STARTS_WITH_PARTICLE;
          }
        }
      }
      CHECK(rnode != NULL);
      lattice->Insert(pos, rnode);
      InsertCorrectedNodes(pos, key,
                           key_corrector.get(),
                           dictionary_, lattice);
    }
  }
}

void ImmutableConverterImpl::ApplyPrefixSuffixPenalty(
    const string &conversion_key,
    Lattice *lattice) const {
  const string &key = lattice->key();
  DCHECK_LE(conversion_key.size(), key.size());
  for (Node *node = lattice->begin_nodes(key.size() -
                                         conversion_key.size());
       node != NULL; node = node->bnext) {
    // TODO(taku):
    // We might be able to tweak the penalty according to
    // the size of history segments.
    // If history-segments is non-empty, we can make the
    // penalty smaller so that history context is more likely
    // selected.
    node->wcost += segmenter_->GetPrefixPenalty(node->lid);
  }

  for (Node *node = lattice->end_nodes(key.size());
       node != NULL; node = node->enext) {
    node->wcost += segmenter_->GetSuffixPenalty(node->rid);
  }
}

void ImmutableConverterImpl::Resegment(
    const string &history_key, const string &conversion_key,
    Segments *segments, Lattice *lattice) const {
  for (size_t pos = history_key.size();
       pos < history_key.size() + conversion_key.size(); ++pos) {
    ApplyResegmentRules(pos, lattice);
  }

  // Enable constrained node.
  size_t segments_pos = 0;
  for (size_t s = 0; s < segments->segments_size(); ++s) {
    const Segment &segment = segments->segment(s);
    if (segment.segment_type() == Segment::FIXED_VALUE) {
      const Segment::Candidate &candidate = segment.candidate(0);
      Node *rnode = lattice->NewNode();
      CHECK(rnode);
      rnode->lid       = candidate.lid;
      rnode->rid       = candidate.rid;
      rnode->wcost     = kMinCost;
      rnode->value     = candidate.value;
      rnode->key       = segment.key();
      rnode->node_type = Node::CON_NODE;
      rnode->bnext     = NULL;
      lattice->Insert(segments_pos, rnode);
    }
    segments_pos += segment.key().size();
  }
}

bool ImmutableConverterImpl::MakeSegments(Segments *segments,
                                          const Lattice &lattice,
                                          const vector<uint16> &group) const {
  if (segments == NULL) {
    LOG(WARNING) << "Segments is NULL";
    return false;
  }

  // skip HIS_NODE(s)
  Node *prev = lattice.bos_nodes();
  for (Node *node = lattice.bos_nodes()->next;
       node->next != NULL && node->node_type == Node::HIS_NODE;
       node = node->next) {
    prev = node;
  }

  size_t max_candidates_size = 0;
  bool is_prediction = false;
  switch (segments->request_type()) {
    case Segments::PREDICTION:
    case Segments::SUGGESTION:
    case Segments::PARTIAL_PREDICTION:
    case Segments::PARTIAL_SUGGESTION:
      max_candidates_size = segments->max_prediction_candidates_size();
      is_prediction = true;
      break;
    case Segments::CONVERSION:
      max_candidates_size = segments->max_conversion_candidates_size();
      break;
    case Segments::REVERSE_CONVERSION:
      // Currently, we assume that REVERSE_CONVERSION only
      // requires 1 result.
      // TODO(taku): support to set the size on REVESER_CONVERSION mode.
      max_candidates_size = 1;
      break;
    default:
      break;
  }

  const size_t expand_size =
      max(static_cast<size_t>(1),
          min(static_cast<size_t>(512), max_candidates_size));

  const size_t history_segments_size = segments->history_segments_size();
  const size_t old_segments_size = segments->segments_size();

  string key;
  for (Node *node = prev->next; node->next != NULL; node = node->next) {
    key.append(node->key);
    const Segment &old_segment = segments->segment(group[node->begin_pos]);
    // Condition 1: prev->next is NOT a boundary. Very strong constraint
    if (node->next->node_type != Node::EOS_NODE &&
        old_segment.segment_type() == Segment::FIXED_BOUNDARY &&
        group[node->begin_pos] == group[node->next->begin_pos]) {
      // do nothing
      // Condition 2: prev->next is a boundary. Very strong constraint
    } else if (node->node_type == Node::CON_NODE ||
               (node->next->node_type != Node::EOS_NODE &&
                group[node->begin_pos] != group[node->next->begin_pos]) ||
               segmenter_->IsBoundary(node, node->next, is_prediction)) {
      Segment *segment = is_prediction ?
          segments->mutable_segment(segments->segments_size() - 1) :
          segments->add_segment();
      scoped_ptr<NBestGenerator> nbest(new NBestGenerator);
      CHECK(segment);
      CHECK(nbest.get());
      if (!is_prediction) {
        segment->clear_candidates();
      }
      nbest->Init(prev, node->next, &lattice,
                  is_prediction);
      if (!is_prediction) {
        // For prediction, segment is already set as a request key
        segment->set_key(key);
      }
      ExpandCandidates(nbest.get(), segment, segments->request_type(),
                       expand_size);
      PromoteUserDictionaryCandidate(segment);
      InsertDummyCandidates(segment, expand_size);
      if (node->node_type == Node::CON_NODE) {
        segment->set_segment_type(Segment::FIXED_VALUE);
      } else {
        segment->set_segment_type(Segment::FREE);
      }
      key.clear();
      prev = node;
    }
    // otherwise, not a boundary
  }

  // erase old segments
  if (!is_prediction &&
      old_segments_size - history_segments_size > 0) {
    segments->erase_segments(history_segments_size,
                             old_segments_size -
                             history_segments_size);
  }

  return true;
}

bool ImmutableConverterImpl::Convert(Segments *segments) const {
  const bool is_prediction =
      (segments->request_type() == Segments::PREDICTION ||
       segments->request_type() == Segments::SUGGESTION);

  Lattice *lattice = GetLattice(segments, is_prediction);

  if (!MakeLattice(lattice, segments)) {
    LOG(WARNING) << "could not make lattice";
    return false;
  }

  vector<uint16> group;
  MakeGroup(segments, &group);

  if (is_prediction) {
    if (!PredictionViterbi(segments, *lattice)) {
      LOG(WARNING) << "prediction_viterbi failed";
      return false;
    }
  } else {
    if (!Viterbi(segments, *lattice, group)) {
      LOG(WARNING) << "viterbi failed";
      return false;
    }
  }

  VLOG(2) << lattice->DebugString();

  if (!MakeSegments(segments, *lattice, group)) {
    LOG(WARNING) << "make segments failed";
    return false;
  }

  return true;
}

namespace {
ImmutableConverterInterface *g_immutable_converter = NULL;

}  // namespace

ImmutableConverterInterface *
ImmutableConverterFactory::GetImmutableConverter() {
  if (g_immutable_converter == NULL) {
    return Singleton<ImmutableConverterImpl>::get();
  } else {
    return g_immutable_converter;
  }
}

void ImmutableConverterFactory::SetImmutableConverter(
    ImmutableConverterInterface *immutable_converter) {
  g_immutable_converter = immutable_converter;
}
}  // namespace mozc