diff --git a/src/data/version/mozc_version_template.bzl b/src/data/version/mozc_version_template.bzl index bbeacca..982a4d5 100644 --- a/src/data/version/mozc_version_template.bzl +++ b/src/data/version/mozc_version_template.bzl @@ -30,7 +30,7 @@ MAJOR=2 MINOR=23 -BUILD=2811 +BUILD=2812 REVISION=102 # This version represents the version of Mozc IME engine (converter, predictor, # etc.). This version info is included both in the Mozc server and in the Mozc diff --git a/src/dictionary/file/codec.cc b/src/dictionary/file/codec.cc index 6bc0e9d..1f99b8d 100644 --- a/src/dictionary/file/codec.cc +++ b/src/dictionary/file/codec.cc @@ -29,8 +29,7 @@ #include "dictionary/file/codec.h" -#include - +#include "base/hash.h" #include "base/logging.h" #include "base/port.h" #include "base/util.h" @@ -41,44 +40,75 @@ namespace mozc { namespace dictionary { -DictionaryFileCodec::DictionaryFileCodec() : filemagic_(20110701) {} +DictionaryFileCodec::DictionaryFileCodec() + : seed_(2135654146), filemagic_(20110701) {} -DictionaryFileCodec::~DictionaryFileCodec() {} +DictionaryFileCodec::~DictionaryFileCodec() = default; void DictionaryFileCodec::WriteSections( const std::vector §ions, std::ostream *ofs) const { DCHECK(ofs); WriteHeader(ofs); - for (size_t i = 0; i < sections.size(); ++i) { - WriteSection(sections[i], ofs); + + if (sections.size() == 4) { + // In production, the number of sections equals 4. In this case, write the + // sections in the following deterministic order. This order was determined + // by random shuffle for engine version 24 but it's now made deterministic + // to obsolte DictionaryFileCodec. + for (size_t i : {0, 2, 1, 3}) { + WriteSection(sections[i], ofs); + } + } else { + // Some tests don't have four sections. In this case, simply write sections + // in given order. + for (const auto §ion : sections) { + WriteSection(section, ofs); + } } + filecodec_util::WriteInt(0, ofs); } void DictionaryFileCodec::WriteHeader(std::ostream *ofs) const { DCHECK(ofs); filecodec_util::WriteInt(filemagic_, ofs); + filecodec_util::WriteInt(seed_, ofs); } void DictionaryFileCodec::WriteSection(const DictionaryFileSection §ion, std::ostream *ofs) const { DCHECK(ofs); - const string &name = GetSectionName(section.name); - VLOG(1) << "section=" << name << " length=" << section.len; + const string &name = section.name; + // name should be encoded + // uint64 needs just 8 bytes. + DCHECK_EQ(8, name.size()); + string escaped; + Util::Escape(name, &escaped); + VLOG(1) << "section=" << escaped << " length=" << section.len; filecodec_util::WriteInt(section.len, ofs); - - const int name_len = name.size() + 1; // including '\0' - ofs->write(name.data(), name_len); - Pad4(name_len, ofs); + ofs->write(name.data(), name.size()); ofs->write(section.ptr, section.len); Pad4(section.len, ofs); } +void DictionaryFileCodec::Pad4(int length, std::ostream *ofs) { + DCHECK(ofs); + for (int i = length; (i % 4) != 0; ++i) { + (*ofs) << '\0'; + } +} + string DictionaryFileCodec::GetSectionName(const string &name) const { - // Use the given string as is - return name; + VLOG(1) << "seed\t" << seed_; + const uint64 name_fp = Hash::FingerprintWithSeed(name, seed_); + const string fp_string(reinterpret_cast(&name_fp), + sizeof(name_fp)); + string escaped; + Util::Escape(fp_string, &escaped); + VLOG(1) << "Section name for " << name << ": " << escaped; + return fp_string; } bool DictionaryFileCodec::ReadSections( @@ -87,18 +117,21 @@ DCHECK(sections); const char *ptr = image; const int filemagic = filecodec_util::ReadInt(ptr); - CHECK(filemagic == filemagic_) << - "invalid dictionary file magic (recompile dictionary?)"; + CHECK(filemagic == filemagic_) + << "invalid dictionary file magic (recompile dictionary?)"; ptr += sizeof(filemagic); - + seed_ = filecodec_util::ReadInt(ptr); + ptr += sizeof(seed_); int size; while ((size = filecodec_util::ReadInt(ptr))) { ptr += sizeof(size); - const string name(ptr); - VLOG(1) << "section=" << name << " length=" << size; - const int name_len = name.size() + 1; - ptr += name_len; - ptr += filecodec_util::Rup4(name_len); + // finger print name + const string name(ptr, sizeof(uint64)); + ptr += sizeof(uint64); + + string escaped; + Util::Escape(name, &escaped); + VLOG(1) << "section=" << escaped << " length=" << size; sections->push_back(DictionaryFileSection(ptr, size, name)); @@ -111,13 +144,5 @@ return true; } -// Write padding -void DictionaryFileCodec::Pad4(int length, std::ostream *ofs) { - DCHECK(ofs); - for (int i = length; (i % 4) != 0; ++i) { - (*ofs) << static_cast(Util::Random(CHAR_MAX)); - } -} - } // namespace dictionary } // namespace mozc diff --git a/src/dictionary/file/codec.h b/src/dictionary/file/codec.h index c7ae1ae..3371541 100644 --- a/src/dictionary/file/codec.h +++ b/src/dictionary/file/codec.h @@ -46,13 +46,14 @@ class DictionaryFileCodec : public DictionaryFileCodecInterface { public: DictionaryFileCodec(); - virtual ~DictionaryFileCodec(); + ~DictionaryFileCodec() override; - virtual void WriteSections(const std::vector §ions, - std::ostream *ofs) const; - virtual bool ReadSections(const char *image, int length, - std::vector *sections) const; - virtual string GetSectionName(const string &name) const; + void WriteSections(const std::vector §ions, + std::ostream *ofs) const override; + bool ReadSections( + const char *image, int length, + std::vector *sections) const override; + string GetSectionName(const string &name) const override; private: void WriteHeader(std::ostream *ofs) const; @@ -61,6 +62,9 @@ static void Pad4(int length, std::ostream *ofs); + // Seed value for name string finger print + // Made it mutable for reading sections. + mutable int seed_; // Magic value for simple file validation const int filemagic_; diff --git a/src/dictionary/file/codec_factory.cc b/src/dictionary/file/codec_factory.cc index d100b88..9176703 100644 --- a/src/dictionary/file/codec_factory.cc +++ b/src/dictionary/file/codec_factory.cc @@ -29,28 +29,21 @@ #include "dictionary/file/codec_factory.h" - #include "base/singleton.h" #include "dictionary/file/codec.h" #include "dictionary/file/codec_interface.h" - namespace mozc { namespace dictionary { - - namespace { DictionaryFileCodecInterface *g_dictionary_file_codec = nullptr; } // namespace -typedef DictionaryFileCodec DefaultCodec; - DictionaryFileCodecInterface *DictionaryFileCodecFactory::GetCodec() { if (g_dictionary_file_codec == nullptr) { - return Singleton::get(); - } else { - return g_dictionary_file_codec; + return Singleton::get(); } + return g_dictionary_file_codec; } void DictionaryFileCodecFactory::SetCodec(DictionaryFileCodecInterface *codec) { diff --git a/src/dictionary/file/codec_factory.h b/src/dictionary/file/codec_factory.h index 0c101f3..06bafea 100644 --- a/src/dictionary/file/codec_factory.h +++ b/src/dictionary/file/codec_factory.h @@ -38,7 +38,6 @@ namespace mozc { namespace dictionary { - class DictionaryFileCodecFactory { public: // Returns the singleton instance. diff --git a/src/dictionary/file/codec_test.cc b/src/dictionary/file/codec_test.cc index 046bf28..824157f 100644 --- a/src/dictionary/file/codec_test.cc +++ b/src/dictionary/file/codec_test.cc @@ -50,12 +50,12 @@ CodecTest() : test_file_(FLAGS_test_tmpdir + "testfile.txt") {} protected: - virtual void SetUp() { + void SetUp() override { DictionaryFileCodecFactory::SetCodec(NULL); FileUtil::Unlink(test_file_); } - virtual void TearDown() { + void TearDown() override { // Reset to default setting DictionaryFileCodecFactory::SetCodec(NULL); FileUtil::Unlink(test_file_); @@ -185,10 +185,10 @@ EXPECT_TRUE(CheckValue(sections[index], "Value 1 test test")); } -TEST_F(CodecTest, CodecTest) { - std::unique_ptr default_codec( +TEST_F(CodecTest, RandomizedCodecTest) { + std::unique_ptr internal_codec( new DictionaryFileCodec); - DictionaryFileCodecFactory::SetCodec(default_codec.get()); + DictionaryFileCodecFactory::SetCodec(internal_codec.get()); const DictionaryFileCodecInterface *codec = DictionaryFileCodecFactory::GetCodec(); EXPECT_TRUE(codec != NULL); @@ -222,7 +222,6 @@ EXPECT_TRUE(CheckValue(sections[index], "Value 1 test test")); } - } // namespace } // namespace dictionary } // namespace mozc