| // Copyright 2010-2015, Google Inc. |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #include "storage/lru_storage.h" |
| |
| #include <algorithm> |
| #include <cstdlib> |
| #include <cstring> |
| #include <ctime> |
| #include <map> |
| #include <set> |
| #include <string> |
| #include <vector> |
| |
| #include "base/file_stream.h" |
| #include "base/file_util.h" |
| #include "base/logging.h" |
| #include "base/mmap.h" |
| #include "base/port.h" |
| #include "base/scoped_ptr.h" |
| #include "base/util.h" |
| |
| namespace mozc { |
| namespace storage { |
| |
| namespace { |
| const size_t kMaxLRUSize = 1000000; // 1M |
| const size_t kMaxValueSize = 1024; // 1024 byte |
| |
| template <class T> |
| inline void ReadValue(char **ptr, T *value) { |
| memcpy(value, *ptr, sizeof(*value)); |
| *ptr += sizeof(*value); |
| } |
| |
| uint64 GetFP(const char *ptr) { |
| return *reinterpret_cast<const uint64 *>(ptr); |
| } |
| |
| uint32 GetTimeStamp(const char *ptr) { |
| return *reinterpret_cast<const uint32 *>(ptr + 8); |
| } |
| |
| const char* GetValue(const char *ptr) { |
| return ptr + 12; |
| } |
| |
| void Update(char *ptr) { |
| const uint32 last_access_time = static_cast<uint32>(Util::GetTime()); |
| memcpy(ptr + 8, reinterpret_cast<const char *>(&last_access_time), 4); |
| } |
| |
| void Update(char *ptr, uint64 fp, const char *value, size_t value_size) { |
| const uint32 last_access_time = static_cast<uint32>(Util::GetTime()); |
| memcpy(ptr, reinterpret_cast<const char *>(&fp), 8); |
| memcpy(ptr + 8, reinterpret_cast<const char *>(&last_access_time), 4); |
| memcpy(ptr + 12, value, value_size); |
| } |
| |
| class CompareByTimeStamp { |
| public: |
| bool operator()(const char *a, const char *b) const { |
| return GetTimeStamp(a) > GetTimeStamp(b); |
| } |
| }; |
| } // namespace |
| |
| class LRUStorage::Node { |
| public: |
| Node(): next(NULL), prev(NULL), value(NULL) { |
| } |
| |
| Node *next; |
| Node *prev; |
| char *value; |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(Node); |
| }; |
| |
| class LRUStorage::LRUList { |
| public: |
| explicit LRUList(size_t max_size) |
| : max_size_(max_size), size_(0), last_(NULL), top_(NULL) { |
| } |
| |
| ~LRUList() { |
| Clear(); |
| } |
| |
| void Clear() { |
| Node *node = top_; |
| while (node != NULL) { |
| Node *next = node->next; |
| delete node; |
| node = next; |
| } |
| size_ = 0; |
| top_ = last_ = NULL; |
| } |
| |
| Node *Add(char *value) { |
| if (size_ < max_size_) { |
| Node *node = new Node; |
| node->value = value; |
| if (last_ == NULL) { |
| node->prev = NULL; |
| top_ = node; |
| } else { |
| last_->next = node; |
| } |
| node->next = NULL; |
| node->prev = last_; |
| last_ = node; |
| ++size_; |
| return node; |
| } |
| LOG(WARNING) << "LRUList is full"; |
| return NULL; |
| } |
| |
| bool empty() const { |
| return (top_ == NULL); |
| } |
| |
| size_t size() const { |
| return size_; |
| } |
| |
| Node *GetLastNode() { |
| return last_; |
| } |
| |
| void MoveToTop(Node *node) { |
| if (node->prev != NULL) { // this is top |
| Node *prev = node->prev; |
| Node *next = node->next; |
| prev->next = next; |
| if (next == NULL) { |
| last_ = prev; |
| } else { |
| next->prev = prev; |
| } |
| node->next = top_; |
| top_->prev = node; |
| top_ = node; |
| top_->prev = NULL; |
| } |
| } |
| |
| private: |
| size_t max_size_; |
| size_t size_; |
| Node *last_; |
| Node *top_; |
| |
| DISALLOW_COPY_AND_ASSIGN(LRUList); |
| }; |
| |
| LRUStorage *LRUStorage::Create(const char *filename) { |
| scoped_ptr<LRUStorage> n(new LRUStorage); |
| if (!n->Open(filename)) { |
| LOG(ERROR) << "could not open LRUStorage"; |
| return NULL; |
| } |
| return n.release(); |
| } |
| |
| LRUStorage *LRUStorage::Create(const char *filename, |
| size_t value_size, |
| size_t size, |
| uint32 seed) { |
| scoped_ptr<LRUStorage> n(new LRUStorage); |
| if (!n->OpenOrCreate(filename, value_size, size, seed)) { |
| LOG(ERROR) << "could not open LRUStorage"; |
| return NULL; |
| } |
| return n.release(); |
| } |
| |
| bool LRUStorage::CreateStorageFile(const char *filename, |
| size_t value_size, |
| size_t size, |
| uint32 seed) { |
| if (value_size == 0 || value_size > kMaxValueSize) { |
| LOG(ERROR) << "value_size is out of range"; |
| return false; |
| } |
| |
| if (size == 0 || size > kMaxLRUSize) { |
| LOG(ERROR) << "size is out of range"; |
| return false; |
| } |
| |
| if (value_size % 4 != 0) { |
| LOG(ERROR) << "value_size_ must be 4 byte alignment"; |
| return false; |
| } |
| |
| OutputFileStream ofs(filename, ios::binary|ios::out); |
| if (!ofs) { |
| LOG(ERROR) << "cannot open " << filename; |
| return false; |
| } |
| |
| const uint32 value_size_uint32 = static_cast<uint32>(value_size); |
| const uint32 size_uint32 = static_cast<uint32>(size); |
| |
| ofs.write(reinterpret_cast<const char *>(&value_size_uint32), |
| sizeof(value_size_uint32)); |
| ofs.write(reinterpret_cast<const char *>(&size_uint32), |
| sizeof(size_uint32)); |
| ofs.write(reinterpret_cast<const char *>(&seed), |
| sizeof(seed)); |
| vector<char> ary(value_size, '\0'); |
| const uint32 last_access_time = 0; |
| const uint64 fp = 0; |
| |
| for (size_t i = 0; i < size; ++i) { |
| ofs.write(reinterpret_cast<const char *>(&fp), sizeof(fp)); |
| ofs.write(reinterpret_cast<const char *>(&last_access_time), |
| sizeof(last_access_time)); |
| ofs.write(reinterpret_cast<const char *>(&ary[0]), |
| static_cast<std::streamsize>(ary.size() * sizeof(ary[0]))); |
| } |
| |
| return true; |
| } |
| |
| // Reopen file after initializing mapped page. |
| bool LRUStorage::Clear() { |
| // Don't need to clear the page if the lru list is empty |
| if (mmap_.get() == NULL || lru_list_.get() == NULL || |
| lru_list_->size() == 0) { |
| return true; |
| } |
| const size_t offset = |
| sizeof(value_size_) + sizeof(size_) + sizeof(seed_); |
| if (offset >= mmap_->size()) { // should not happen |
| return false; |
| } |
| memset(mmap_->begin() + offset, '\0', mmap_->size() - offset); |
| lru_list_.reset(NULL); |
| map_.clear(); |
| Open(mmap_->begin(), mmap_->size()); |
| return true; |
| } |
| |
| bool LRUStorage::Merge(const char *filename) { |
| LRUStorage target_storage; |
| if (!target_storage.Open(filename)) { |
| return false; |
| } |
| return Merge(target_storage); |
| } |
| |
| bool LRUStorage::Merge(const LRUStorage &storage) { |
| if (storage.value_size() != value_size()) { |
| return false; |
| } |
| |
| if (seed_ != storage.seed_) { |
| return false; |
| } |
| |
| vector<const char *> ary; |
| |
| // this file |
| { |
| const char *begin = begin_; |
| const char *end = end_; |
| while (begin < end) { |
| ary.push_back(begin); |
| begin += (value_size_ + 12); |
| } |
| } |
| |
| // target file |
| { |
| const char *begin = storage.begin_; |
| const char *end = storage.end_; |
| while (begin < end) { |
| ary.push_back(begin); |
| begin += (value_size_ + 12); |
| } |
| } |
| |
| stable_sort(ary.begin(), ary.end(), CompareByTimeStamp()); |
| |
| string buf; |
| set<uint64> seen; // remove duplicated entries. |
| for (size_t i = 0; i < ary.size(); ++i) { |
| if (!seen.insert(GetFP(ary[i])).second) { |
| continue; |
| } |
| buf.append(const_cast<const char *>(ary[i]), value_size_ + 12); |
| } |
| |
| const size_t old_size = static_cast<size_t>(end_ - begin_); |
| const size_t new_size = min(buf.size(), old_size); |
| |
| // TODO(taku): this part is not atomic. |
| // If the converter process is killed while memcpy or memset is running, |
| // the storage data will be broken. |
| memcpy(begin_, buf.data(), new_size); |
| if (new_size < old_size) { |
| memset(begin_ + new_size, '\0', old_size - new_size); |
| } |
| |
| return Open(mmap_->begin(), mmap_->size()); |
| } |
| |
| LRUStorage::LRUStorage() |
| : value_size_(0), |
| size_(0), |
| seed_(0), |
| last_item_(NULL), |
| begin_(NULL), end_(NULL) {} |
| |
| LRUStorage::~LRUStorage() { |
| Close(); |
| } |
| |
| bool LRUStorage::OpenOrCreate(const char *filename, |
| size_t new_value_size, |
| size_t new_size, |
| uint32 new_seed) { |
| if (!FileUtil::FileExists(filename)) { |
| // This is also an expected scenario. Let's create a new data file. |
| VLOG(1) << filename << " does not exist. Creating a new one."; |
| if (!LRUStorage::CreateStorageFile(filename, |
| new_value_size, |
| new_size, new_seed)) { |
| LOG(ERROR) << "CreateStorageFile failed against " << filename; |
| return false; |
| } |
| } |
| |
| if (!Open(filename)) { |
| Close(); |
| LOG(ERROR) << "Failed to open the file or the data is corrupted. " |
| "So try to recreate new file. filename: " << filename; |
| // If the file exists but is corrupted, the following operation may |
| // may fix some problem. However, if the file was temporarily locked |
| // by some processes and now no longer locked, the following operation |
| // is likely to result in a simple permanent data loss. |
| // TODO(yukawa, team): Do not clear the data whenever we can open the |
| // data file and the content is actually valid. |
| if (!LRUStorage::CreateStorageFile(filename, |
| new_value_size, |
| new_size, new_seed)) { |
| LOG(ERROR) << "CreateStorageFile failed"; |
| return false; |
| } |
| if (!Open(filename)) { |
| Close(); |
| LOG(ERROR) << "Open failed after CreateStorageFile. Give up..."; |
| return false; |
| } |
| } |
| |
| // File format has changed |
| if (new_value_size != value_size() || new_size != size()) { |
| Close(); |
| if (!LRUStorage::CreateStorageFile(filename, new_value_size, |
| new_size, new_seed)) { |
| LOG(ERROR) << "CreateStorageFile failed"; |
| return false; |
| } |
| if (!Open(filename)) { |
| Close(); |
| LOG(ERROR) << "Open failed after CreateStorageFile"; |
| return false; |
| } |
| } |
| |
| if (new_value_size != value_size() || new_size != size()) { |
| Close(); |
| LOG(ERROR) << "file is broken"; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool LRUStorage::Open(const char *filename) { |
| mmap_.reset(new Mmap); |
| |
| if (mmap_.get() == NULL) { |
| LOG(ERROR) << "cannot make Mmap object"; |
| return false; |
| } |
| |
| if (!mmap_->Open(filename, "r+")) { |
| LOG(ERROR) << "cannot open " << filename |
| << " with read+write mode"; |
| return false; |
| } |
| |
| if (mmap_->size() < 8) { |
| LOG(ERROR) << "file size is too small"; |
| return false; |
| } |
| |
| filename_ = filename; |
| return Open(mmap_->begin(), mmap_->size()); |
| } |
| |
| bool LRUStorage::Open(char *ptr, size_t ptr_size) { |
| begin_ = ptr; |
| end_ = ptr + ptr_size; |
| |
| uint32 value_size_uint32 = 0; |
| uint32 size_uint32 = 0; |
| |
| ReadValue<uint32>(&begin_, &value_size_uint32); |
| ReadValue<uint32>(&begin_, &size_uint32); |
| ReadValue<uint32>(&begin_, &seed_); |
| |
| value_size_ = static_cast<size_t>(value_size_uint32); |
| size_ = static_cast<size_t>(size_uint32); |
| |
| if (value_size_ % 4 != 0) { |
| LOG(ERROR) << "value_size_ must be 4 byte alignment"; |
| return false; |
| } |
| |
| if (size_ == 0 || size_ > kMaxLRUSize) { |
| LOG(ERROR) << "LRU size is invalid: " << size_; |
| return false; |
| } |
| |
| if (value_size_ == 0 || value_size_ > kMaxValueSize) { |
| LOG(ERROR) << "value_size is invalid: " << value_size_; |
| return false; |
| } |
| |
| const size_t file_size = mmap_->size() - 12; |
| if ((value_size_ + 12) * size_ != file_size) { |
| LOG(ERROR) << "LRU file is broken"; |
| return false; |
| } |
| |
| vector<char *> ary; |
| char *begin = begin_; |
| char *end = end_; |
| while (begin < end) { |
| ary.push_back(begin); |
| begin += (value_size_ + 12); |
| } |
| stable_sort(ary.begin(), ary.end(), |
| CompareByTimeStamp()); |
| |
| lru_list_.reset(new LRUList(size_)); |
| map_.clear(); |
| last_item_ = NULL; |
| for (size_t i = 0; i < ary.size(); ++i) { |
| if (GetTimeStamp(ary[i]) != 0) { |
| Node *node = lru_list_->Add(ary[i]); |
| map_.insert(make_pair(GetFP(ary[i]), node)); |
| } else if (last_item_ == NULL) { |
| last_item_ = ary[i]; |
| } |
| } |
| |
| return true; |
| } |
| |
| void LRUStorage::Close() { |
| filename_.clear(); |
| mmap_.reset(NULL); |
| lru_list_.reset(NULL); |
| map_.clear(); |
| } |
| |
| const char* LRUStorage::Lookup(const string &key) const { |
| uint32 last_access_time = 0; |
| return Lookup(key, &last_access_time); |
| } |
| |
| const char* LRUStorage::Lookup(const string &key, |
| uint32 *last_access_time) const { |
| const uint64 fp = Util::FingerprintWithSeed(key.data(), key.size(), seed_); |
| map<uint64, Node *>::const_iterator it = map_.find(fp); |
| if (it == map_.end()) { |
| return NULL; |
| } |
| *last_access_time = GetTimeStamp(it->second->value); |
| return GetValue(it->second->value); |
| } |
| |
| bool LRUStorage::GetAllValues(vector<string> *values) const { |
| if (lru_list_.get() == NULL) { |
| return false; |
| } |
| DCHECK(values); |
| values->clear(); |
| for (const Node *node = lru_list_->GetLastNode(); |
| node != NULL; |
| node = node->prev) { |
| // Default constructor of string is not applicable |
| // because value's size() must return value_size_. |
| DCHECK(node->value); |
| values->push_back(string(GetValue(node->value), value_size_)); |
| } |
| reverse(values->begin(), values->end()); |
| return true; |
| } |
| |
| bool LRUStorage::Touch(const string &key) { |
| if (lru_list_.get() == NULL) { |
| return false; |
| } |
| |
| const uint64 fp = Util::FingerprintWithSeed(key.data(), key.size(), seed_); |
| map<uint64, Node *>::iterator it = map_.find(fp); |
| if (it != map_.end()) { // find in the cache |
| Update(it->second->value); |
| lru_list_->MoveToTop(it->second); |
| return true; |
| } |
| return false; |
| } |
| |
| bool LRUStorage::Insert(const string &key, const char *value) { |
| if (lru_list_.get() == NULL) { |
| return false; |
| } |
| |
| const uint64 fp = Util::FingerprintWithSeed(key.data(), key.size(), seed_); |
| map<uint64, Node *>::iterator it = map_.find(fp); |
| if (it != map_.end()) { // find in the cache |
| Update(it->second->value, fp, value, value_size_); |
| lru_list_->MoveToTop(it->second); |
| } else if (lru_list_->size() >= size_ || |
| last_item_ == NULL) { // not found, but cache is FULL |
| Node *node = lru_list_->GetLastNode(); |
| const uint64 old_fp = GetFP(node->value); // remove oldest item |
| map<uint64, Node *>::iterator old_it = map_.find(old_fp); |
| if (old_it != map_.end()) { |
| map_.erase(old_it); |
| } |
| lru_list_->MoveToTop(node); |
| Update(node->value, fp, value, value_size_); |
| map_.insert(make_pair(fp, node)); |
| } else if (last_item_ < mmap_->end()) { // not found, cahce is not FULL |
| Node *node = lru_list_->Add(last_item_); |
| lru_list_->MoveToTop(node); |
| Update(node->value, fp, value, value_size_); |
| map_.insert(make_pair(fp, node)); |
| last_item_ += (value_size_ + 12); |
| if (last_item_ >= mmap_->end()) { |
| last_item_ = NULL; |
| } |
| } else { |
| LOG(ERROR) << "insertion failed"; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool LRUStorage::TryInsert(const string &key, const char *value) { |
| if (lru_list_.get() == NULL) { |
| return false; |
| } |
| |
| const uint64 fp = Util::FingerprintWithSeed(key.data(), key.size(), seed_); |
| map<uint64, Node *>::iterator it = map_.find(fp); |
| if (it != map_.end()) { // find in the cache |
| Update(it->second->value, fp, value, value_size_); |
| lru_list_->MoveToTop(it->second); |
| } |
| |
| return true; |
| } |
| |
| size_t LRUStorage::value_size() const { |
| return value_size_; |
| } |
| |
| size_t LRUStorage::size() const { |
| return size_; |
| } |
| |
| size_t LRUStorage::used_size() const { |
| return lru_list_.get() == NULL ? 0 : lru_list_->size(); |
| } |
| |
| uint32 LRUStorage::seed() const { |
| return seed_; |
| } |
| |
| const string &LRUStorage::filename() const { |
| return filename_; |
| } |
| |
| void LRUStorage::Write(size_t i, |
| uint64 fp, |
| const string &value, |
| uint32 last_access_time) { |
| DCHECK_LT(i, size_); |
| char *ptr = begin_ + (i * (value_size_ + 12)); |
| memcpy(ptr, reinterpret_cast<const char *>(&fp), 8); |
| memcpy(ptr + 8, reinterpret_cast<const char *>(&last_access_time), 4); |
| if (value.size() == value_size_) { |
| memcpy(ptr + 12, value.data(), value_size_); |
| } else { |
| LOG(ERROR) << "value size is not " << value_size_ << " byte."; |
| } |
| } |
| |
| void LRUStorage::Read(size_t i, |
| uint64 *fp, |
| string *value, |
| uint32 *last_access_time) const { |
| DCHECK_LT(i, size_); |
| const char *ptr = begin_ + (i * (value_size_ + 12)); |
| *fp = GetFP(ptr); |
| value->assign(GetValue(ptr), value_size_); |
| *last_access_time = GetTimeStamp(ptr); |
| } |
| |
| } // namespace storage |
| } // namespace mozc |