Codebase list mozc / d2e045e converter / candidate_filter_test.cc
d2e045e

Tree @d2e045e (Download .tar.gz)

candidate_filter_test.cc @d2e045eraw · history · blame

// Copyright 2010-2012, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "converter/candidate_filter.h"

#include <climits>
#include <string>
#include <vector>

#include "base/base.h"
#include "base/freelist.h"
#include "base/util.h"
#include "converter/node.h"
#include "converter/segments.h"
#include "dictionary/pos_matcher.h"
#include "dictionary/suppression_dictionary.h"
#include "testing/base/public/gunit.h"

namespace mozc {

class CandidateFilterTest : public testing::Test {
 protected:
  virtual void SetUp() {
    candidate_freelist_.reset(new FreeList<Segment::Candidate>(1024));
    node_freelist_.reset(new FreeList<Node>(1024));
  }

  virtual void TearDown() {
    candidate_freelist_->Free();
    node_freelist_->Free();
  }

  void GetDefaultNodes(vector<const Node *> *nodes) {
    nodes->clear();
    Node *n1 = NewNode();
    // "てすと"
    n1->value = "\xE3\x81\xA6\xE3\x81\x99\xE3\x81\xA8";
    n1->lid = POSMatcher::GetUnknownId();
    n1->rid = POSMatcher::GetUnknownId();
    nodes->push_back(n1);

    Node *n2 = NewNode();
    // "てすと"
    n2->value = "\xE3\x81\xA6\xE3\x81\x99\xE3\x81\xA8";
    n2->lid = POSMatcher::GetFunctionalId();
    n2->rid = POSMatcher::GetFunctionalId();
    nodes->push_back(n2);
  }

  Node *NewNode() {
    Node *n = node_freelist_->Alloc(1);
    n->Init();
    return n;
  }

  Segment::Candidate *NewCandidate() {
    Segment::Candidate *c = candidate_freelist_->Alloc(1);
    c->Init();
    c->cost = 100;
    c->structure_cost = 100;
    return c;
  }

 private:
  scoped_ptr<FreeList<Segment::Candidate> > candidate_freelist_;
  scoped_ptr<FreeList<Node> > node_freelist_;
};

TEST_F(CandidateFilterTest, FilterTest) {
  CandidateFilter filter;
  vector<const Node *> n;

  GetDefaultNodes(&n);
  Segment::Candidate *c1 = NewCandidate();
  c1->lid = 1;
  c1->rid = 1;
  c1->value = "abc";
  EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
            filter.FilterCandidate(c1, n));

  Segment::Candidate *c2 = NewCandidate();
  c2->value = "abc";
  // Same value candidate should be rejected.
  EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
            filter.FilterCandidate(c2, n));

  Segment::Candidate *c3 = NewCandidate();
  c3->structure_cost = INT_MAX;
  c3->value = "def";
  // High structure cost candidate should be rejected.
  EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
            filter.FilterCandidate(c3, n));

  Segment::Candidate *c4 = NewCandidate();
  // Checks if a canidate is active before appending many candidates.
  EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
            filter.FilterCandidate(c4, n));

  // Don't filter if lid/rid the same as the top candidate.
  Segment::Candidate *c5 = NewCandidate();
  c5->value = "foo";
  c5->lid = 1;
  c5->rid = 1;
  EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
            filter.FilterCandidate(c5, n));


  // Though CandidateFilter may change its limit, 1000 should
  // be always above the limit.
  for (int i = 0; i < 1000; ++i) {
    Segment::Candidate *cand = NewCandidate();
    char buf[10];
    snprintf(buf, sizeof(buf), "%d", i);
    cand->value = string(buf);
    filter.FilterCandidate(cand, n);
  }
  // There will be no more candidates.
  EXPECT_EQ(CandidateFilter::STOP_ENUMERATION,
            filter.FilterCandidate(c4, n));
}

TEST_F(CandidateFilterTest, KatakanaT13N) {
  {
    CandidateFilter filter;
    vector<const Node *> nodes;
    GetDefaultNodes(&nodes);
    // nodes[0] is KatakanaT13N
    Segment::Candidate *c = NewCandidate();
    c->value = "abc";
    Node *n = NewNode();
    n->lid = POSMatcher::GetUnknownId();
    n->rid = POSMatcher::GetUnknownId();
    n->value = "abc";
    nodes[0] = n;
    EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
              filter.FilterCandidate(c, nodes));
  }

  {
    CandidateFilter filter;
    vector<const Node *> nodes;
    GetDefaultNodes(&nodes);
    // nodes[1] is KatakanaT13N
    Segment::Candidate *c = NewCandidate();
    c->value = "abc";
    Node *n = NewNode();
    n->lid = POSMatcher::GetFunctionalId();
    n->rid = POSMatcher::GetFunctionalId();
    n->value = "abc";
    nodes[1] = n;
    EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
              filter.FilterCandidate(c, nodes));
  }

  {
    CandidateFilter filter;
    vector<const Node *> nodes;
    GetDefaultNodes(&nodes);
    // nodes[1] is not a functional word
    Segment::Candidate *c = NewCandidate();
    c->value = "abc";
    Node *n1 = NewNode();
    n1->lid = POSMatcher::GetUnknownId();
    n1->rid = POSMatcher::GetUnknownId();
    n1->value = "abc";
    nodes[0] = n1;
    Node *n2 = NewNode();
    n2->lid = POSMatcher::GetUnknownId();
    n2->rid = POSMatcher::GetUnknownId();
    // "てすと";
    n2->value = "\xE3\x81\xA6\xE3\x81\x99\xE3\x81\xA8";
    nodes[1] = n2;
    EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
              filter.FilterCandidate(c, nodes));
  }
}

TEST_F(CandidateFilterTest, IsolatedWord) {
  CandidateFilter filter;
  vector<const Node *> nodes;
  Segment::Candidate *c = NewCandidate();
  c->value = "abc";

  Node *node = NewNode();
  nodes.push_back(node);
  node->prev = NewNode();
  node->next = NewNode();
  node->lid = POSMatcher::GetIsolatedWordId();
  node->rid = POSMatcher::GetIsolatedWordId();
  node->key = "test";
  node->value = "test";

  node->prev->node_type = Node::NOR_NODE;
  node->next->node_type = Node::EOS_NODE;
  EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
            filter.FilterCandidate(c, nodes));

  node->prev->node_type = Node::BOS_NODE;
  node->next->node_type = Node::NOR_NODE;
  EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
            filter.FilterCandidate(c, nodes));

  node->prev->node_type = Node::NOR_NODE;
  node->next->node_type = Node::NOR_NODE;
  EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
            filter.FilterCandidate(c, nodes));

  node->prev->node_type = Node::BOS_NODE;
  node->next->node_type = Node::EOS_NODE;
  EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
            filter.FilterCandidate(c, nodes));
}

TEST_F(CandidateFilterTest, MayHaveMoreCandidates) {
  CandidateFilter filter;
  vector<const Node *> n;
  GetDefaultNodes(&n);

  Segment::Candidate *c1 = NewCandidate();
  c1->value = "abc";
  EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
            filter.FilterCandidate(c1, n));

  Segment::Candidate *c2 = NewCandidate();
  c2->value = "abc";
  // Though same value candidate is rejected, enumeration should continue.
  EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
            filter.FilterCandidate(c2, n));

  Segment::Candidate *c3 = NewCandidate();
  c3->structure_cost = INT_MAX;
  c3->value = "def";
  // High structure cost should not Stop enumeration.
  EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
            filter.FilterCandidate(c3, n));

  Segment::Candidate *c4 = NewCandidate();
  c4->cost = INT_MAX;
  c4->structure_cost = INT_MAX;
  c4->value = "ghi";
  // High cost candidate should be rejected.
  EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
            filter.FilterCandidate(c4, n));

  // Insert many valid candidates
  for (int i = 0; i < 50; ++i) {
    Segment::Candidate *tmp = NewCandidate();
    tmp->value = Util::SimpleItoa(i) + "test";
    filter.FilterCandidate(tmp, n);
  }

  Segment::Candidate *c5 = NewCandidate();
  c5->cost = INT_MAX;
  c5->structure_cost = INT_MAX;
  c5->value = "ghi";

  // finally, it returns STOP_ENUMERATION, because
  // filter has seen more than 50 good candidates.
  c5->value = "ghi2";
  EXPECT_EQ(CandidateFilter::STOP_ENUMERATION,
            filter.FilterCandidate(c5, n));
}

TEST_F(CandidateFilterTest, Regression3437022) {
  CandidateFilter filter;
  vector<const Node *> n;
  GetDefaultNodes(&n);

  Segment::Candidate *c1 = NewCandidate();
  c1->key = "test_key";
  c1->value = "test_value";

  EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
            filter.FilterCandidate(c1, n));

  SuppressionDictionary *dic
      = SuppressionDictionary::GetSuppressionDictionary();
  dic->Lock();
  dic->AddEntry("test_key", "test_value");
  dic->UnLock();

  EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
            filter.FilterCandidate(c1, n));

  c1->key = "test_key_suffix";
  c1->value = "test_value_suffix";
  c1->content_key = "test_key";
  c1->content_value = "test_value";

  EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
            filter.FilterCandidate(c1, n));

  dic->Lock();
  dic->Clear();
  dic->UnLock();

  EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
            filter.FilterCandidate(c1, n));
}

TEST_F(CandidateFilterTest, FilterRealtimeConversionTest) {
  CandidateFilter filter;
  vector<const Node *> n;

  n.clear();
  Node *n1 = NewNode();

  n1->key = "PC";
  n1->value = "PC";
  n1->lid = POSMatcher::GetUnknownId();
  n1->rid = POSMatcher::GetUnknownId();
  n.push_back(n1);

  Node *n2 = NewNode();
  // "てすと"
  n2->value = "\xE3\x81\xA6\xE3\x81\x99\xE3\x81\xA8";
  n2->lid = POSMatcher::GetUnknownId();
  n2->rid = POSMatcher::GetUnknownId();
  n.push_back(n2);

  Segment::Candidate *c1 = NewCandidate();
  c1->attributes |= Segment::Candidate::REALTIME_CONVERSION;
  // "PCテスト"
  c1->value = "PC\xE3\x83\x86\xE3\x82\xB9\xE3\x83\x88";
  // Don't filter a candidate because it starts with alphabets and
  // is followed by a non-functional word.
  EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
            filter.FilterCandidate(c1, n));
}
}  // namespace mozc