Codebase list mozc / upstream/0.13.464.102 converter / quality_regression_test.cc
upstream/0.13.464.102

Tree @upstream/0.13.464.102 (Download .tar.gz)

quality_regression_test.cc @upstream/0.13.464.102raw · history · blame

// Copyright 2010, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <algorithm>
#include <map>
#include <string>
#include <vector>
#include "base/base.h"
#include "base/file_stream.h"
#include "base/util.h"
#include "converter/segments.h"
#include "converter/converter_interface.h"
#include "converter/quality_regression_test_data.h"
#include "session/config_handler.h"
#include "session/config.pb.h"
#include "testing/base/public/gunit.h"

// Do not rase exception even if error occurs
DEFINE_bool(dryrun, false, "dryrun mode");
DECLARE_string(test_tmpdir);

namespace mozc {
namespace {

const char kConversionExpect[]    = "Conversion Expect";
const char kConversionNotExpect[] = "Conversion Not Expect";
const char kPredictionExpect[]    = "Prediction Expect";
const char kPredictionNotExpect[] = "Prediction Not Expect";

// copied from evaluation/quality_regression/evaluator.cc
int GetRank(const string &value, const Segments *segments,
              size_t current_pos, size_t current_segment) {
  if (current_segment == segments->segments_size()) {
    if (current_pos == value.size()) {
      return 0;
    } else {
      return -1;
    }
  }
  const Segment &seg = segments->segment(current_segment);
  for (size_t i = 0; i < seg.candidates_size(); ++i) {
    const string &cand_value = seg.candidate(i).value;
    const size_t len = cand_value.size();
    if (current_pos + len > value.size()) {
      continue;
    }
    if (strncmp(cand_value.c_str(),
                value.c_str() + current_pos, len) != 0) {
      continue;
    }
    const int rest = GetRank(value, segments,
                             current_pos + len, current_segment + 1);
    if (rest == -1) {
      continue;
    }
    return i + rest;
  }
  return -1;
}


bool ConvertAndTest(const ConverterInterface *converter,
                    const string &key,
                    const string &expected_value,
                    const string &command,
                    uint32 expected_rank,
                    Segments *segments,
                    string *actual_value) {
  CHECK(segments);
  CHECK(actual_value);

  segments->Clear();
  converter->ResetConversion(segments);
  actual_value->clear();

  const size_t expand_size = expected_rank + 32;

  if (command == kConversionExpect ||
      command == kConversionNotExpect) {
    converter->StartConversion(segments, key);
    if (expected_rank > 0) {
      converter->GetCandidates(segments, 0, expand_size);
    }
  } else if (command == kPredictionExpect ||
             command == kPredictionNotExpect) {
    converter->StartPrediction(segments, key);
  } else {
    LOG(FATAL) << "Unknown command: " << command;
  }

  // No results is OK if "prediction not expect" command
  if (command == kPredictionNotExpect &&
      (segments->segments_size() == 0 ||
       (segments->segments_size() >= 1 &&
        segments->segment(0).candidates_size() == 0))) {
    return true;
  }

  for (size_t i = 0; i < segments->segments_size(); ++i) {
    *actual_value += segments->segment(i).candidate(0).value;
  }

  const int32 actual_rank = GetRank(expected_value, segments, 0, 0);

  bool result = (actual_rank >= 0 && actual_rank <= expected_rank);

  if (command == kConversionNotExpect ||
      command == kPredictionNotExpect) {
    result = !result;
  }

  return result;
}

TEST(QualityRegressionTest, BasicTest) {
  ConverterInterface *converter = ConverterFactory::GetConverter();
  CHECK(converter);

  Util::SetUserProfileDirectory(FLAGS_test_tmpdir);

  config::Config config;
  config::ConfigHandler::GetDefaultConfig(&config);
  config::ConfigHandler::SetConfig(config);

  Segments segments;
  map<string, vector<pair<float, string> > > results;

  for (size_t i = 0; i < arraysize(kTestData); ++i) {
    string line = kTestData[i];
    vector<string> tokens;
    Util::SplitStringUsing(line, "\t", &tokens);
    CHECK_GE(tokens.size(), 6);
    const string &group          = tokens[0];
    const string &key            = tokens[1];
    const string &expected_value = tokens[2];
    const string &command        = tokens[3];
    const uint32  expected_rank  = atoi(tokens[4].c_str());
    const float   expected_ratio = atof(tokens[5].c_str());
    CHECK_GT(expected_ratio, 0.0);
    CHECK_LE(expected_ratio, 1.0);

    string actual_value;
    const  bool test_result = ConvertAndTest(converter, key,
                                             expected_value,
                                             command, expected_rank,
                                             &segments, &actual_value);
    line += "\tActual: ";
    line += actual_value;
    if (test_result) {
      // use "-1.0" as a dummy expected ratio
      results[group].push_back(make_pair(-1.0, line));
    } else {
      results[group].push_back(make_pair(expected_ratio, line));
    }
  }

  for (map<string, vector<pair<float, string > > >::iterator
           it = results.begin(); it != results.end(); ++it) {
    vector<pair<float, string> > &values = it->second;
    sort(values.begin(), values.end());
    size_t correct = 0;
    for (int n = values.size() - 1; n >= 0; --n) {
      const float expected_ratio = values[n].first;
      const float actual_ratio = 1.0 * n / values.size();
      if (expected_ratio < 0) {
        ++correct;
      }
      LOG_IF(INFO, expected_ratio >= 0.0) << "Error: " << values[n].second;
      EXPECT_TRUE(expected_ratio < actual_ratio) << values[n].second;
    }
    LOG(INFO) << "Accuracy: " << it->first << " "
              << 1.0 * correct / values.size();
  }
}
}  // namespace
}  // namespace mozc