1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
5 #include "leveldb/db.h"
6 #include "leveldb/filter_policy.h"
7 #include "db/db_impl.h"
8 #include "db/filename.h"
9 #include "db/version_set.h"
10 #include "db/write_batch_internal.h"
11 #include "leveldb/cache.h"
12 #include "leveldb/env.h"
13 #include "leveldb/table.h"
14 #include "util/hash.h"
15 #include "util/logging.h"
16 #include "util/mutexlock.h"
17 #include "util/testharness.h"
18 #include "util/testutil.h"
22 static std::string RandomString(Random* rnd, int len) {
24 test::RandomString(rnd, len, &r);
34 AtomicCounter() : count_(0) { }
38 void IncrementBy(int count) {
52 void DelayMilliseconds(int millis) {
53 Env::Default()->SleepForMicroseconds(millis * 1000);
57 // Special Env used to delay background operations
58 class SpecialEnv : public EnvWrapper {
60 // sstable/log Sync() calls are blocked while this pointer is non-NULL.
61 port::AtomicPointer delay_data_sync_;
63 // sstable/log Sync() calls return an error.
64 port::AtomicPointer data_sync_error_;
66 // Simulate no-space errors while this pointer is non-NULL.
67 port::AtomicPointer no_space_;
69 // Simulate non-writable file system while this pointer is non-NULL
70 port::AtomicPointer non_writable_;
72 // Force sync of manifest files to fail while this pointer is non-NULL
73 port::AtomicPointer manifest_sync_error_;
75 // Force write to manifest files to fail while this pointer is non-NULL
76 port::AtomicPointer manifest_write_error_;
78 bool count_random_reads_;
79 AtomicCounter random_read_counter_;
81 explicit SpecialEnv(Env* base) : EnvWrapper(base) {
82 delay_data_sync_.Release_Store(NULL);
83 data_sync_error_.Release_Store(NULL);
84 no_space_.Release_Store(NULL);
85 non_writable_.Release_Store(NULL);
86 count_random_reads_ = false;
87 manifest_sync_error_.Release_Store(NULL);
88 manifest_write_error_.Release_Store(NULL);
91 Status NewWritableFile(const std::string& f, WritableFile** r) {
92 class DataFile : public WritableFile {
98 DataFile(SpecialEnv* env, WritableFile* base)
102 ~DataFile() { delete base_; }
103 Status Append(const Slice& data) {
104 if (env_->no_space_.Acquire_Load() != NULL) {
105 // Drop writes on the floor
108 return base_->Append(data);
111 Status Close() { return base_->Close(); }
112 Status Flush() { return base_->Flush(); }
114 if (env_->data_sync_error_.Acquire_Load() != NULL) {
115 return Status::IOError("simulated data sync error");
117 while (env_->delay_data_sync_.Acquire_Load() != NULL) {
118 DelayMilliseconds(100);
120 return base_->Sync();
123 class ManifestFile : public WritableFile {
128 ManifestFile(SpecialEnv* env, WritableFile* b) : env_(env), base_(b) { }
129 ~ManifestFile() { delete base_; }
130 Status Append(const Slice& data) {
131 if (env_->manifest_write_error_.Acquire_Load() != NULL) {
132 return Status::IOError("simulated writer error");
134 return base_->Append(data);
137 Status Close() { return base_->Close(); }
138 Status Flush() { return base_->Flush(); }
140 if (env_->manifest_sync_error_.Acquire_Load() != NULL) {
141 return Status::IOError("simulated sync error");
143 return base_->Sync();
148 if (non_writable_.Acquire_Load() != NULL) {
149 return Status::IOError("simulated write error");
152 Status s = target()->NewWritableFile(f, r);
154 if (strstr(f.c_str(), ".ldb") != NULL ||
155 strstr(f.c_str(), ".log") != NULL) {
156 *r = new DataFile(this, *r);
157 } else if (strstr(f.c_str(), "MANIFEST") != NULL) {
158 *r = new ManifestFile(this, *r);
164 Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) {
165 class CountingFile : public RandomAccessFile {
167 RandomAccessFile* target_;
168 AtomicCounter* counter_;
170 CountingFile(RandomAccessFile* target, AtomicCounter* counter)
171 : target_(target), counter_(counter) {
173 virtual ~CountingFile() { delete target_; }
174 virtual Status Read(uint64_t offset, size_t n, Slice* result,
175 char* scratch) const {
176 counter_->Increment();
177 return target_->Read(offset, n, result, scratch);
181 Status s = target()->NewRandomAccessFile(f, r);
182 if (s.ok() && count_random_reads_) {
183 *r = new CountingFile(*r, &random_read_counter_);
191 const FilterPolicy* filter_policy_;
193 // Sequence of option configurations to try
207 Options last_options_;
209 DBTest() : option_config_(kDefault),
210 env_(new SpecialEnv(Env::Default())) {
211 filter_policy_ = NewBloomFilterPolicy(10);
212 dbname_ = test::TmpDir() + "/db_test";
213 DestroyDB(dbname_, Options());
220 DestroyDB(dbname_, Options());
222 delete filter_policy_;
225 // Switch to a fresh database with the next option configuration to
226 // test. Return false if there are no more configurations to test.
227 bool ChangeOptions() {
229 if (option_config_ >= kEnd) {
237 // Return the current option configuration.
238 Options CurrentOptions() {
240 switch (option_config_) {
242 options.filter_policy = filter_policy_;
245 options.compression = kNoCompression;
254 return reinterpret_cast<DBImpl*>(db_);
257 void Reopen(Options* options = NULL) {
258 ASSERT_OK(TryReopen(options));
266 void DestroyAndReopen(Options* options = NULL) {
269 DestroyDB(dbname_, Options());
270 ASSERT_OK(TryReopen(options));
273 Status TryReopen(Options* options) {
277 if (options != NULL) {
280 opts = CurrentOptions();
281 opts.create_if_missing = true;
283 last_options_ = opts;
285 return DB::Open(opts, dbname_, &db_);
288 Status Put(const std::string& k, const std::string& v) {
289 return db_->Put(WriteOptions(), k, v);
292 Status Delete(const std::string& k) {
293 return db_->Delete(WriteOptions(), k);
296 std::string Get(const std::string& k, const Snapshot* snapshot = NULL) {
298 options.snapshot = snapshot;
300 Status s = db_->Get(options, k, &result);
301 if (s.IsNotFound()) {
302 result = "NOT_FOUND";
303 } else if (!s.ok()) {
304 result = s.ToString();
309 // Return a string that contains all key,value pairs in order,
310 // formatted like "(k1->v1)(k2->v2)".
311 std::string Contents() {
312 std::vector<std::string> forward;
314 Iterator* iter = db_->NewIterator(ReadOptions());
315 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
316 std::string s = IterStatus(iter);
317 result.push_back('(');
319 result.push_back(')');
320 forward.push_back(s);
323 // Check reverse iteration results are the reverse of forward results
325 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
326 ASSERT_LT(matched, forward.size());
327 ASSERT_EQ(IterStatus(iter), forward[forward.size() - matched - 1]);
330 ASSERT_EQ(matched, forward.size());
336 std::string AllEntriesFor(const Slice& user_key) {
337 Iterator* iter = dbfull()->TEST_NewInternalIterator();
338 InternalKey target(user_key, kMaxSequenceNumber, kTypeValue);
339 iter->Seek(target.Encode());
341 if (!iter->status().ok()) {
342 result = iter->status().ToString();
346 while (iter->Valid()) {
347 ParsedInternalKey ikey;
348 if (!ParseInternalKey(iter->key(), &ikey)) {
349 result += "CORRUPTED";
351 if (last_options_.comparator->Compare(ikey.user_key, user_key) != 0) {
360 result += iter->value().ToString();
378 int NumTableFilesAtLevel(int level) {
379 std::string property;
381 db_->GetProperty("leveldb.num-files-at-level" + NumberToString(level),
383 return atoi(property.c_str());
386 int TotalTableFiles() {
388 for (int level = 0; level < config::kNumLevels; level++) {
389 result += NumTableFilesAtLevel(level);
394 // Return spread of files per level
395 std::string FilesPerLevel() {
397 int last_non_zero_offset = 0;
398 for (int level = 0; level < config::kNumLevels; level++) {
399 int f = NumTableFilesAtLevel(level);
401 snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
404 last_non_zero_offset = result.size();
407 result.resize(last_non_zero_offset);
412 std::vector<std::string> files;
413 env_->GetChildren(dbname_, &files);
414 return static_cast<int>(files.size());
417 uint64_t Size(const Slice& start, const Slice& limit) {
418 Range r(start, limit);
420 db_->GetApproximateSizes(&r, 1, &size);
424 void Compact(const Slice& start, const Slice& limit) {
425 db_->CompactRange(&start, &limit);
428 // Do n memtable compactions, each of which produces an sstable
429 // covering the range [small,large].
430 void MakeTables(int n, const std::string& small, const std::string& large) {
431 for (int i = 0; i < n; i++) {
434 dbfull()->TEST_CompactMemTable();
438 // Prevent pushing of new sstables into deeper levels by adding
439 // tables that cover a specified range to all levels.
440 void FillLevels(const std::string& smallest, const std::string& largest) {
441 MakeTables(config::kNumLevels, smallest, largest);
444 void DumpFileCounts(const char* label) {
445 fprintf(stderr, "---\n%s:\n", label);
446 fprintf(stderr, "maxoverlap: %lld\n",
447 static_cast<long long>(
448 dbfull()->TEST_MaxNextLevelOverlappingBytes()));
449 for (int level = 0; level < config::kNumLevels; level++) {
450 int num = NumTableFilesAtLevel(level);
452 fprintf(stderr, " level %3d : %d files\n", level, num);
457 std::string DumpSSTableList() {
458 std::string property;
459 db_->GetProperty("leveldb.sstables", &property);
463 std::string IterStatus(Iterator* iter) {
466 result = iter->key().ToString() + "->" + iter->value().ToString();
468 result = "(invalid)";
473 bool DeleteAnSSTFile() {
474 std::vector<std::string> filenames;
475 ASSERT_OK(env_->GetChildren(dbname_, &filenames));
478 for (size_t i = 0; i < filenames.size(); i++) {
479 if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) {
480 ASSERT_OK(env_->DeleteFile(TableFileName(dbname_, number)));
487 // Returns number of files renamed.
488 int RenameLDBToSST() {
489 std::vector<std::string> filenames;
490 ASSERT_OK(env_->GetChildren(dbname_, &filenames));
493 int files_renamed = 0;
494 for (size_t i = 0; i < filenames.size(); i++) {
495 if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) {
496 const std::string from = TableFileName(dbname_, number);
497 const std::string to = SSTTableFileName(dbname_, number);
498 ASSERT_OK(env_->RenameFile(from, to));
502 return files_renamed;
506 TEST(DBTest, Empty) {
508 ASSERT_TRUE(db_ != NULL);
509 ASSERT_EQ("NOT_FOUND", Get("foo"));
510 } while (ChangeOptions());
513 TEST(DBTest, ReadWrite) {
515 ASSERT_OK(Put("foo", "v1"));
516 ASSERT_EQ("v1", Get("foo"));
517 ASSERT_OK(Put("bar", "v2"));
518 ASSERT_OK(Put("foo", "v3"));
519 ASSERT_EQ("v3", Get("foo"));
520 ASSERT_EQ("v2", Get("bar"));
521 } while (ChangeOptions());
524 TEST(DBTest, PutDeleteGet) {
526 ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1"));
527 ASSERT_EQ("v1", Get("foo"));
528 ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2"));
529 ASSERT_EQ("v2", Get("foo"));
530 ASSERT_OK(db_->Delete(WriteOptions(), "foo"));
531 ASSERT_EQ("NOT_FOUND", Get("foo"));
532 } while (ChangeOptions());
535 TEST(DBTest, GetFromImmutableLayer) {
537 Options options = CurrentOptions();
539 options.write_buffer_size = 100000; // Small write buffer
542 ASSERT_OK(Put("foo", "v1"));
543 ASSERT_EQ("v1", Get("foo"));
545 env_->delay_data_sync_.Release_Store(env_); // Block sync calls
546 Put("k1", std::string(100000, 'x')); // Fill memtable
547 Put("k2", std::string(100000, 'y')); // Trigger compaction
548 ASSERT_EQ("v1", Get("foo"));
549 env_->delay_data_sync_.Release_Store(NULL); // Release sync calls
550 } while (ChangeOptions());
553 TEST(DBTest, GetFromVersions) {
555 ASSERT_OK(Put("foo", "v1"));
556 dbfull()->TEST_CompactMemTable();
557 ASSERT_EQ("v1", Get("foo"));
558 } while (ChangeOptions());
561 TEST(DBTest, GetSnapshot) {
563 // Try with both a short key and a long key
564 for (int i = 0; i < 2; i++) {
565 std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
566 ASSERT_OK(Put(key, "v1"));
567 const Snapshot* s1 = db_->GetSnapshot();
568 ASSERT_OK(Put(key, "v2"));
569 ASSERT_EQ("v2", Get(key));
570 ASSERT_EQ("v1", Get(key, s1));
571 dbfull()->TEST_CompactMemTable();
572 ASSERT_EQ("v2", Get(key));
573 ASSERT_EQ("v1", Get(key, s1));
574 db_->ReleaseSnapshot(s1);
576 } while (ChangeOptions());
579 TEST(DBTest, GetLevel0Ordering) {
581 // Check that we process level-0 files in correct order. The code
582 // below generates two level-0 files where the earlier one comes
583 // before the later one in the level-0 file list since the earlier
584 // one has a smaller "smallest" key.
585 ASSERT_OK(Put("bar", "b"));
586 ASSERT_OK(Put("foo", "v1"));
587 dbfull()->TEST_CompactMemTable();
588 ASSERT_OK(Put("foo", "v2"));
589 dbfull()->TEST_CompactMemTable();
590 ASSERT_EQ("v2", Get("foo"));
591 } while (ChangeOptions());
594 TEST(DBTest, GetOrderedByLevels) {
596 ASSERT_OK(Put("foo", "v1"));
598 ASSERT_EQ("v1", Get("foo"));
599 ASSERT_OK(Put("foo", "v2"));
600 ASSERT_EQ("v2", Get("foo"));
601 dbfull()->TEST_CompactMemTable();
602 ASSERT_EQ("v2", Get("foo"));
603 } while (ChangeOptions());
606 TEST(DBTest, GetPicksCorrectFile) {
608 // Arrange to have multiple files in a non-level-0 level.
609 ASSERT_OK(Put("a", "va"));
611 ASSERT_OK(Put("x", "vx"));
613 ASSERT_OK(Put("f", "vf"));
615 ASSERT_EQ("va", Get("a"));
616 ASSERT_EQ("vf", Get("f"));
617 ASSERT_EQ("vx", Get("x"));
618 } while (ChangeOptions());
621 TEST(DBTest, GetEncountersEmptyLevel) {
623 // Arrange for the following to happen:
624 // * sstable A in level 0
625 // * nothing in level 1
626 // * sstable B in level 2
627 // Then do enough Get() calls to arrange for an automatic compaction
628 // of sstable A. A bug would cause the compaction to be marked as
629 // occurring at level 1 (instead of the correct level 0).
631 // Step 1: First place sstables in levels 0 and 2
632 int compaction_count = 0;
633 while (NumTableFilesAtLevel(0) == 0 ||
634 NumTableFilesAtLevel(2) == 0) {
635 ASSERT_LE(compaction_count, 100) << "could not fill levels 0 and 2";
639 dbfull()->TEST_CompactMemTable();
642 // Step 2: clear level 1 if necessary.
643 dbfull()->TEST_CompactRange(1, NULL, NULL);
644 ASSERT_EQ(NumTableFilesAtLevel(0), 1);
645 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
646 ASSERT_EQ(NumTableFilesAtLevel(2), 1);
648 // Step 3: read a bunch of times
649 for (int i = 0; i < 1000; i++) {
650 ASSERT_EQ("NOT_FOUND", Get("missing"));
653 // Step 4: Wait for compaction to finish
654 DelayMilliseconds(1000);
656 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
657 } while (ChangeOptions());
660 TEST(DBTest, IterEmpty) {
661 Iterator* iter = db_->NewIterator(ReadOptions());
664 ASSERT_EQ(IterStatus(iter), "(invalid)");
667 ASSERT_EQ(IterStatus(iter), "(invalid)");
670 ASSERT_EQ(IterStatus(iter), "(invalid)");
675 TEST(DBTest, IterSingle) {
676 ASSERT_OK(Put("a", "va"));
677 Iterator* iter = db_->NewIterator(ReadOptions());
680 ASSERT_EQ(IterStatus(iter), "a->va");
682 ASSERT_EQ(IterStatus(iter), "(invalid)");
684 ASSERT_EQ(IterStatus(iter), "a->va");
686 ASSERT_EQ(IterStatus(iter), "(invalid)");
689 ASSERT_EQ(IterStatus(iter), "a->va");
691 ASSERT_EQ(IterStatus(iter), "(invalid)");
693 ASSERT_EQ(IterStatus(iter), "a->va");
695 ASSERT_EQ(IterStatus(iter), "(invalid)");
698 ASSERT_EQ(IterStatus(iter), "a->va");
700 ASSERT_EQ(IterStatus(iter), "(invalid)");
703 ASSERT_EQ(IterStatus(iter), "a->va");
705 ASSERT_EQ(IterStatus(iter), "(invalid)");
708 ASSERT_EQ(IterStatus(iter), "(invalid)");
713 TEST(DBTest, IterMulti) {
714 ASSERT_OK(Put("a", "va"));
715 ASSERT_OK(Put("b", "vb"));
716 ASSERT_OK(Put("c", "vc"));
717 Iterator* iter = db_->NewIterator(ReadOptions());
720 ASSERT_EQ(IterStatus(iter), "a->va");
722 ASSERT_EQ(IterStatus(iter), "b->vb");
724 ASSERT_EQ(IterStatus(iter), "c->vc");
726 ASSERT_EQ(IterStatus(iter), "(invalid)");
728 ASSERT_EQ(IterStatus(iter), "a->va");
730 ASSERT_EQ(IterStatus(iter), "(invalid)");
733 ASSERT_EQ(IterStatus(iter), "c->vc");
735 ASSERT_EQ(IterStatus(iter), "b->vb");
737 ASSERT_EQ(IterStatus(iter), "a->va");
739 ASSERT_EQ(IterStatus(iter), "(invalid)");
741 ASSERT_EQ(IterStatus(iter), "c->vc");
743 ASSERT_EQ(IterStatus(iter), "(invalid)");
746 ASSERT_EQ(IterStatus(iter), "a->va");
748 ASSERT_EQ(IterStatus(iter), "a->va");
750 ASSERT_EQ(IterStatus(iter), "b->vb");
752 ASSERT_EQ(IterStatus(iter), "b->vb");
754 ASSERT_EQ(IterStatus(iter), "(invalid)");
756 // Switch from reverse to forward
761 ASSERT_EQ(IterStatus(iter), "b->vb");
763 // Switch from forward to reverse
768 ASSERT_EQ(IterStatus(iter), "b->vb");
770 // Make sure iter stays at snapshot
771 ASSERT_OK(Put("a", "va2"));
772 ASSERT_OK(Put("a2", "va3"));
773 ASSERT_OK(Put("b", "vb2"));
774 ASSERT_OK(Put("c", "vc2"));
775 ASSERT_OK(Delete("b"));
777 ASSERT_EQ(IterStatus(iter), "a->va");
779 ASSERT_EQ(IterStatus(iter), "b->vb");
781 ASSERT_EQ(IterStatus(iter), "c->vc");
783 ASSERT_EQ(IterStatus(iter), "(invalid)");
785 ASSERT_EQ(IterStatus(iter), "c->vc");
787 ASSERT_EQ(IterStatus(iter), "b->vb");
789 ASSERT_EQ(IterStatus(iter), "a->va");
791 ASSERT_EQ(IterStatus(iter), "(invalid)");
796 TEST(DBTest, IterSmallAndLargeMix) {
797 ASSERT_OK(Put("a", "va"));
798 ASSERT_OK(Put("b", std::string(100000, 'b')));
799 ASSERT_OK(Put("c", "vc"));
800 ASSERT_OK(Put("d", std::string(100000, 'd')));
801 ASSERT_OK(Put("e", std::string(100000, 'e')));
803 Iterator* iter = db_->NewIterator(ReadOptions());
806 ASSERT_EQ(IterStatus(iter), "a->va");
808 ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
810 ASSERT_EQ(IterStatus(iter), "c->vc");
812 ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
814 ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
816 ASSERT_EQ(IterStatus(iter), "(invalid)");
819 ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
821 ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
823 ASSERT_EQ(IterStatus(iter), "c->vc");
825 ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
827 ASSERT_EQ(IterStatus(iter), "a->va");
829 ASSERT_EQ(IterStatus(iter), "(invalid)");
834 TEST(DBTest, IterMultiWithDelete) {
836 ASSERT_OK(Put("a", "va"));
837 ASSERT_OK(Put("b", "vb"));
838 ASSERT_OK(Put("c", "vc"));
839 ASSERT_OK(Delete("b"));
840 ASSERT_EQ("NOT_FOUND", Get("b"));
842 Iterator* iter = db_->NewIterator(ReadOptions());
844 ASSERT_EQ(IterStatus(iter), "c->vc");
846 ASSERT_EQ(IterStatus(iter), "a->va");
848 } while (ChangeOptions());
851 TEST(DBTest, Recover) {
853 ASSERT_OK(Put("foo", "v1"));
854 ASSERT_OK(Put("baz", "v5"));
857 ASSERT_EQ("v1", Get("foo"));
859 ASSERT_EQ("v1", Get("foo"));
860 ASSERT_EQ("v5", Get("baz"));
861 ASSERT_OK(Put("bar", "v2"));
862 ASSERT_OK(Put("foo", "v3"));
865 ASSERT_EQ("v3", Get("foo"));
866 ASSERT_OK(Put("foo", "v4"));
867 ASSERT_EQ("v4", Get("foo"));
868 ASSERT_EQ("v2", Get("bar"));
869 ASSERT_EQ("v5", Get("baz"));
870 } while (ChangeOptions());
873 TEST(DBTest, RecoveryWithEmptyLog) {
875 ASSERT_OK(Put("foo", "v1"));
876 ASSERT_OK(Put("foo", "v2"));
879 ASSERT_OK(Put("foo", "v3"));
881 ASSERT_EQ("v3", Get("foo"));
882 } while (ChangeOptions());
885 // Check that writes done during a memtable compaction are recovered
886 // if the database is shutdown during the memtable compaction.
887 TEST(DBTest, RecoverDuringMemtableCompaction) {
889 Options options = CurrentOptions();
891 options.write_buffer_size = 1000000;
894 // Trigger a long memtable compaction and reopen the database during it
895 ASSERT_OK(Put("foo", "v1")); // Goes to 1st log file
896 ASSERT_OK(Put("big1", std::string(10000000, 'x'))); // Fills memtable
897 ASSERT_OK(Put("big2", std::string(1000, 'y'))); // Triggers compaction
898 ASSERT_OK(Put("bar", "v2")); // Goes to new log file
901 ASSERT_EQ("v1", Get("foo"));
902 ASSERT_EQ("v2", Get("bar"));
903 ASSERT_EQ(std::string(10000000, 'x'), Get("big1"));
904 ASSERT_EQ(std::string(1000, 'y'), Get("big2"));
905 } while (ChangeOptions());
908 static std::string Key(int i) {
910 snprintf(buf, sizeof(buf), "key%06d", i);
911 return std::string(buf);
914 TEST(DBTest, MinorCompactionsHappen) {
915 Options options = CurrentOptions();
916 options.write_buffer_size = 10000;
921 int starting_num_tables = TotalTableFiles();
922 for (int i = 0; i < N; i++) {
923 ASSERT_OK(Put(Key(i), Key(i) + std::string(1000, 'v')));
925 int ending_num_tables = TotalTableFiles();
926 ASSERT_GT(ending_num_tables, starting_num_tables);
928 for (int i = 0; i < N; i++) {
929 ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
934 for (int i = 0; i < N; i++) {
935 ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
939 TEST(DBTest, RecoverWithLargeLog) {
941 Options options = CurrentOptions();
943 ASSERT_OK(Put("big1", std::string(200000, '1')));
944 ASSERT_OK(Put("big2", std::string(200000, '2')));
945 ASSERT_OK(Put("small3", std::string(10, '3')));
946 ASSERT_OK(Put("small4", std::string(10, '4')));
947 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
950 // Make sure that if we re-open with a small write buffer size that
951 // we flush table files in the middle of a large log file.
952 Options options = CurrentOptions();
953 options.write_buffer_size = 100000;
955 ASSERT_EQ(NumTableFilesAtLevel(0), 3);
956 ASSERT_EQ(std::string(200000, '1'), Get("big1"));
957 ASSERT_EQ(std::string(200000, '2'), Get("big2"));
958 ASSERT_EQ(std::string(10, '3'), Get("small3"));
959 ASSERT_EQ(std::string(10, '4'), Get("small4"));
960 ASSERT_GT(NumTableFilesAtLevel(0), 1);
963 TEST(DBTest, CompactionsGenerateMultipleFiles) {
964 Options options = CurrentOptions();
965 options.write_buffer_size = 100000000; // Large write buffer
970 // Write 8MB (80 values, each 100K)
971 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
972 std::vector<std::string> values;
973 for (int i = 0; i < 80; i++) {
974 values.push_back(RandomString(&rnd, 100000));
975 ASSERT_OK(Put(Key(i), values[i]));
978 // Reopening moves updates to level-0
980 dbfull()->TEST_CompactRange(0, NULL, NULL);
982 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
983 ASSERT_GT(NumTableFilesAtLevel(1), 1);
984 for (int i = 0; i < 80; i++) {
985 ASSERT_EQ(Get(Key(i)), values[i]);
989 TEST(DBTest, RepeatedWritesToSameKey) {
990 Options options = CurrentOptions();
992 options.write_buffer_size = 100000; // Small write buffer
995 // We must have at most one file per level except for level-0,
996 // which may have up to kL0_StopWritesTrigger files.
997 const int kMaxFiles = config::kNumLevels + config::kL0_StopWritesTrigger;
1000 std::string value = RandomString(&rnd, 2 * options.write_buffer_size);
1001 for (int i = 0; i < 5 * kMaxFiles; i++) {
1003 ASSERT_LE(TotalTableFiles(), kMaxFiles);
1004 fprintf(stderr, "after %d: %d files\n", int(i+1), TotalTableFiles());
1008 TEST(DBTest, SparseMerge) {
1009 Options options = CurrentOptions();
1010 options.compression = kNoCompression;
1013 FillLevels("A", "Z");
1015 // Suppose there is:
1016 // small amount of data with prefix A
1017 // large amount of data with prefix B
1018 // small amount of data with prefix C
1019 // and that recent updates have made small changes to all three prefixes.
1020 // Check that we do not do a compaction that merges all of B in one shot.
1021 const std::string value(1000, 'x');
1023 // Write approximately 100MB of "B" values
1024 for (int i = 0; i < 100000; i++) {
1026 snprintf(key, sizeof(key), "B%010d", i);
1030 dbfull()->TEST_CompactMemTable();
1031 dbfull()->TEST_CompactRange(0, NULL, NULL);
1033 // Make sparse update
1035 Put("B100", "bvalue2");
1037 dbfull()->TEST_CompactMemTable();
1039 // Compactions should not cause us to create a situation where
1040 // a file overlaps too much data at the next level.
1041 ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
1042 dbfull()->TEST_CompactRange(0, NULL, NULL);
1043 ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
1044 dbfull()->TEST_CompactRange(1, NULL, NULL);
1045 ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
1048 static bool Between(uint64_t val, uint64_t low, uint64_t high) {
1049 bool result = (val >= low) && (val <= high);
1051 fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
1052 (unsigned long long)(val),
1053 (unsigned long long)(low),
1054 (unsigned long long)(high));
1059 TEST(DBTest, ApproximateSizes) {
1061 Options options = CurrentOptions();
1062 options.write_buffer_size = 100000000; // Large write buffer
1063 options.compression = kNoCompression;
1066 ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
1068 ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
1070 // Write 8MB (80 values, each 100K)
1071 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1073 static const int S1 = 100000;
1074 static const int S2 = 105000; // Allow some expansion from metadata
1076 for (int i = 0; i < N; i++) {
1077 ASSERT_OK(Put(Key(i), RandomString(&rnd, S1)));
1080 // 0 because GetApproximateSizes() does not account for memtable space
1081 ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
1083 // Check sizes across recovery by reopening a few times
1084 for (int run = 0; run < 3; run++) {
1087 for (int compact_start = 0; compact_start < N; compact_start += 10) {
1088 for (int i = 0; i < N; i += 10) {
1089 ASSERT_TRUE(Between(Size("", Key(i)), S1*i, S2*i));
1090 ASSERT_TRUE(Between(Size("", Key(i)+".suffix"), S1*(i+1), S2*(i+1)));
1091 ASSERT_TRUE(Between(Size(Key(i), Key(i+10)), S1*10, S2*10));
1093 ASSERT_TRUE(Between(Size("", Key(50)), S1*50, S2*50));
1094 ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), S1*50, S2*50));
1096 std::string cstart_str = Key(compact_start);
1097 std::string cend_str = Key(compact_start + 9);
1098 Slice cstart = cstart_str;
1099 Slice cend = cend_str;
1100 dbfull()->TEST_CompactRange(0, &cstart, &cend);
1103 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1104 ASSERT_GT(NumTableFilesAtLevel(1), 0);
1106 } while (ChangeOptions());
1109 TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
1111 Options options = CurrentOptions();
1112 options.compression = kNoCompression;
1116 std::string big1 = RandomString(&rnd, 100000);
1117 ASSERT_OK(Put(Key(0), RandomString(&rnd, 10000)));
1118 ASSERT_OK(Put(Key(1), RandomString(&rnd, 10000)));
1119 ASSERT_OK(Put(Key(2), big1));
1120 ASSERT_OK(Put(Key(3), RandomString(&rnd, 10000)));
1121 ASSERT_OK(Put(Key(4), big1));
1122 ASSERT_OK(Put(Key(5), RandomString(&rnd, 10000)));
1123 ASSERT_OK(Put(Key(6), RandomString(&rnd, 300000)));
1124 ASSERT_OK(Put(Key(7), RandomString(&rnd, 10000)));
1126 // Check sizes across recovery by reopening a few times
1127 for (int run = 0; run < 3; run++) {
1130 ASSERT_TRUE(Between(Size("", Key(0)), 0, 0));
1131 ASSERT_TRUE(Between(Size("", Key(1)), 10000, 11000));
1132 ASSERT_TRUE(Between(Size("", Key(2)), 20000, 21000));
1133 ASSERT_TRUE(Between(Size("", Key(3)), 120000, 121000));
1134 ASSERT_TRUE(Between(Size("", Key(4)), 130000, 131000));
1135 ASSERT_TRUE(Between(Size("", Key(5)), 230000, 231000));
1136 ASSERT_TRUE(Between(Size("", Key(6)), 240000, 241000));
1137 ASSERT_TRUE(Between(Size("", Key(7)), 540000, 541000));
1138 ASSERT_TRUE(Between(Size("", Key(8)), 550000, 560000));
1140 ASSERT_TRUE(Between(Size(Key(3), Key(5)), 110000, 111000));
1142 dbfull()->TEST_CompactRange(0, NULL, NULL);
1144 } while (ChangeOptions());
1147 TEST(DBTest, IteratorPinsRef) {
1148 Put("foo", "hello");
1150 // Get iterator that will yield the current contents of the DB.
1151 Iterator* iter = db_->NewIterator(ReadOptions());
1153 // Write to force compactions
1154 Put("foo", "newvalue1");
1155 for (int i = 0; i < 100; i++) {
1156 ASSERT_OK(Put(Key(i), Key(i) + std::string(100000, 'v'))); // 100K values
1158 Put("foo", "newvalue2");
1160 iter->SeekToFirst();
1161 ASSERT_TRUE(iter->Valid());
1162 ASSERT_EQ("foo", iter->key().ToString());
1163 ASSERT_EQ("hello", iter->value().ToString());
1165 ASSERT_TRUE(!iter->Valid());
1169 TEST(DBTest, Snapshot) {
1172 const Snapshot* s1 = db_->GetSnapshot();
1174 const Snapshot* s2 = db_->GetSnapshot();
1176 const Snapshot* s3 = db_->GetSnapshot();
1179 ASSERT_EQ("v1", Get("foo", s1));
1180 ASSERT_EQ("v2", Get("foo", s2));
1181 ASSERT_EQ("v3", Get("foo", s3));
1182 ASSERT_EQ("v4", Get("foo"));
1184 db_->ReleaseSnapshot(s3);
1185 ASSERT_EQ("v1", Get("foo", s1));
1186 ASSERT_EQ("v2", Get("foo", s2));
1187 ASSERT_EQ("v4", Get("foo"));
1189 db_->ReleaseSnapshot(s1);
1190 ASSERT_EQ("v2", Get("foo", s2));
1191 ASSERT_EQ("v4", Get("foo"));
1193 db_->ReleaseSnapshot(s2);
1194 ASSERT_EQ("v4", Get("foo"));
1195 } while (ChangeOptions());
1198 TEST(DBTest, HiddenValuesAreRemoved) {
1201 FillLevels("a", "z");
1203 std::string big = RandomString(&rnd, 50000);
1205 Put("pastfoo", "v");
1206 const Snapshot* snapshot = db_->GetSnapshot();
1208 Put("pastfoo2", "v2"); // Advance sequence number one more
1210 ASSERT_OK(dbfull()->TEST_CompactMemTable());
1211 ASSERT_GT(NumTableFilesAtLevel(0), 0);
1213 ASSERT_EQ(big, Get("foo", snapshot));
1214 ASSERT_TRUE(Between(Size("", "pastfoo"), 50000, 60000));
1215 db_->ReleaseSnapshot(snapshot);
1216 ASSERT_EQ(AllEntriesFor("foo"), "[ tiny, " + big + " ]");
1218 dbfull()->TEST_CompactRange(0, NULL, &x);
1219 ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
1220 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
1221 ASSERT_GE(NumTableFilesAtLevel(1), 1);
1222 dbfull()->TEST_CompactRange(1, NULL, &x);
1223 ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
1225 ASSERT_TRUE(Between(Size("", "pastfoo"), 0, 1000));
1226 } while (ChangeOptions());
1229 TEST(DBTest, DeletionMarkers1) {
1231 ASSERT_OK(dbfull()->TEST_CompactMemTable());
1232 const int last = config::kMaxMemCompactLevel;
1233 ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
1235 // Place a table at level last-1 to prevent merging with preceding mutation
1238 dbfull()->TEST_CompactMemTable();
1239 ASSERT_EQ(NumTableFilesAtLevel(last), 1);
1240 ASSERT_EQ(NumTableFilesAtLevel(last-1), 1);
1244 ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
1245 ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
1246 ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
1248 dbfull()->TEST_CompactRange(last-2, NULL, &z);
1249 // DEL eliminated, but v1 remains because we aren't compacting that level
1250 // (DEL can be eliminated because v2 hides v1).
1251 ASSERT_EQ(AllEntriesFor("foo"), "[ v2, v1 ]");
1252 dbfull()->TEST_CompactRange(last-1, NULL, NULL);
1253 // Merging last-1 w/ last, so we are the base level for "foo", so
1254 // DEL is removed. (as is v1).
1255 ASSERT_EQ(AllEntriesFor("foo"), "[ v2 ]");
1258 TEST(DBTest, DeletionMarkers2) {
1260 ASSERT_OK(dbfull()->TEST_CompactMemTable());
1261 const int last = config::kMaxMemCompactLevel;
1262 ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
1264 // Place a table at level last-1 to prevent merging with preceding mutation
1267 dbfull()->TEST_CompactMemTable();
1268 ASSERT_EQ(NumTableFilesAtLevel(last), 1);
1269 ASSERT_EQ(NumTableFilesAtLevel(last-1), 1);
1272 ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
1273 ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
1274 ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
1275 dbfull()->TEST_CompactRange(last-2, NULL, NULL);
1276 // DEL kept: "last" file overlaps
1277 ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
1278 dbfull()->TEST_CompactRange(last-1, NULL, NULL);
1279 // Merging last-1 w/ last, so we are the base level for "foo", so
1280 // DEL is removed. (as is v1).
1281 ASSERT_EQ(AllEntriesFor("foo"), "[ ]");
1284 TEST(DBTest, OverlapInLevel0) {
1286 ASSERT_EQ(config::kMaxMemCompactLevel, 2) << "Fix test to match config";
1288 // Fill levels 1 and 2 to disable the pushing of new memtables to levels > 0.
1289 ASSERT_OK(Put("100", "v100"));
1290 ASSERT_OK(Put("999", "v999"));
1291 dbfull()->TEST_CompactMemTable();
1292 ASSERT_OK(Delete("100"));
1293 ASSERT_OK(Delete("999"));
1294 dbfull()->TEST_CompactMemTable();
1295 ASSERT_EQ("0,1,1", FilesPerLevel());
1297 // Make files spanning the following ranges in level-0:
1298 // files[0] 200 .. 900
1299 // files[1] 300 .. 500
1300 // Note that files are sorted by smallest key.
1301 ASSERT_OK(Put("300", "v300"));
1302 ASSERT_OK(Put("500", "v500"));
1303 dbfull()->TEST_CompactMemTable();
1304 ASSERT_OK(Put("200", "v200"));
1305 ASSERT_OK(Put("600", "v600"));
1306 ASSERT_OK(Put("900", "v900"));
1307 dbfull()->TEST_CompactMemTable();
1308 ASSERT_EQ("2,1,1", FilesPerLevel());
1310 // Compact away the placeholder files we created initially
1311 dbfull()->TEST_CompactRange(1, NULL, NULL);
1312 dbfull()->TEST_CompactRange(2, NULL, NULL);
1313 ASSERT_EQ("2", FilesPerLevel());
1315 // Do a memtable compaction. Before bug-fix, the compaction would
1316 // not detect the overlap with level-0 files and would incorrectly place
1317 // the deletion in a deeper level.
1318 ASSERT_OK(Delete("600"));
1319 dbfull()->TEST_CompactMemTable();
1320 ASSERT_EQ("3", FilesPerLevel());
1321 ASSERT_EQ("NOT_FOUND", Get("600"));
1322 } while (ChangeOptions());
1325 TEST(DBTest, L0_CompactionBug_Issue44_a) {
1327 ASSERT_OK(Put("b", "v"));
1329 ASSERT_OK(Delete("b"));
1330 ASSERT_OK(Delete("a"));
1332 ASSERT_OK(Delete("a"));
1334 ASSERT_OK(Put("a", "v"));
1337 ASSERT_EQ("(a->v)", Contents());
1338 DelayMilliseconds(1000); // Wait for compaction to finish
1339 ASSERT_EQ("(a->v)", Contents());
1342 TEST(DBTest, L0_CompactionBug_Issue44_b) {
1354 DelayMilliseconds(1000); // Wait for compaction to finish
1363 ASSERT_EQ("(->)(c->cv)", Contents());
1364 DelayMilliseconds(1000); // Wait for compaction to finish
1365 ASSERT_EQ("(->)(c->cv)", Contents());
1368 TEST(DBTest, ComparatorCheck) {
1369 class NewComparator : public Comparator {
1371 virtual const char* Name() const { return "leveldb.NewComparator"; }
1372 virtual int Compare(const Slice& a, const Slice& b) const {
1373 return BytewiseComparator()->Compare(a, b);
1375 virtual void FindShortestSeparator(std::string* s, const Slice& l) const {
1376 BytewiseComparator()->FindShortestSeparator(s, l);
1378 virtual void FindShortSuccessor(std::string* key) const {
1379 BytewiseComparator()->FindShortSuccessor(key);
1383 Options new_options = CurrentOptions();
1384 new_options.comparator = &cmp;
1385 Status s = TryReopen(&new_options);
1386 ASSERT_TRUE(!s.ok());
1387 ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos)
1391 TEST(DBTest, CustomComparator) {
1392 class NumberComparator : public Comparator {
1394 virtual const char* Name() const { return "test.NumberComparator"; }
1395 virtual int Compare(const Slice& a, const Slice& b) const {
1396 return ToNumber(a) - ToNumber(b);
1398 virtual void FindShortestSeparator(std::string* s, const Slice& l) const {
1399 ToNumber(*s); // Check format
1400 ToNumber(l); // Check format
1402 virtual void FindShortSuccessor(std::string* key) const {
1403 ToNumber(*key); // Check format
1406 static int ToNumber(const Slice& x) {
1407 // Check that there are no extra characters.
1408 ASSERT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size()-1] == ']')
1412 ASSERT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1)
1417 NumberComparator cmp;
1418 Options new_options = CurrentOptions();
1419 new_options.create_if_missing = true;
1420 new_options.comparator = &cmp;
1421 new_options.filter_policy = NULL; // Cannot use bloom filters
1422 new_options.write_buffer_size = 1000; // Compact more often
1423 DestroyAndReopen(&new_options);
1424 ASSERT_OK(Put("[10]", "ten"));
1425 ASSERT_OK(Put("[0x14]", "twenty"));
1426 for (int i = 0; i < 2; i++) {
1427 ASSERT_EQ("ten", Get("[10]"));
1428 ASSERT_EQ("ten", Get("[0xa]"));
1429 ASSERT_EQ("twenty", Get("[20]"));
1430 ASSERT_EQ("twenty", Get("[0x14]"));
1431 ASSERT_EQ("NOT_FOUND", Get("[15]"));
1432 ASSERT_EQ("NOT_FOUND", Get("[0xf]"));
1433 Compact("[0]", "[9999]");
1436 for (int run = 0; run < 2; run++) {
1437 for (int i = 0; i < 1000; i++) {
1439 snprintf(buf, sizeof(buf), "[%d]", i*10);
1440 ASSERT_OK(Put(buf, buf));
1442 Compact("[0]", "[1000000]");
1446 TEST(DBTest, ManualCompaction) {
1447 ASSERT_EQ(config::kMaxMemCompactLevel, 2)
1448 << "Need to update this test to match kMaxMemCompactLevel";
1450 MakeTables(3, "p", "q");
1451 ASSERT_EQ("1,1,1", FilesPerLevel());
1453 // Compaction range falls before files
1455 ASSERT_EQ("1,1,1", FilesPerLevel());
1457 // Compaction range falls after files
1459 ASSERT_EQ("1,1,1", FilesPerLevel());
1461 // Compaction range overlaps files
1462 Compact("p1", "p9");
1463 ASSERT_EQ("0,0,1", FilesPerLevel());
1465 // Populate a different range
1466 MakeTables(3, "c", "e");
1467 ASSERT_EQ("1,1,2", FilesPerLevel());
1469 // Compact just the new range
1471 ASSERT_EQ("0,0,2", FilesPerLevel());
1474 MakeTables(1, "a", "z");
1475 ASSERT_EQ("0,1,2", FilesPerLevel());
1476 db_->CompactRange(NULL, NULL);
1477 ASSERT_EQ("0,0,1", FilesPerLevel());
1480 TEST(DBTest, DBOpen_Options) {
1481 std::string dbname = test::TmpDir() + "/db_options_test";
1482 DestroyDB(dbname, Options());
1484 // Does not exist, and create_if_missing == false: error
1487 opts.create_if_missing = false;
1488 Status s = DB::Open(opts, dbname, &db);
1489 ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != NULL);
1490 ASSERT_TRUE(db == NULL);
1492 // Does not exist, and create_if_missing == true: OK
1493 opts.create_if_missing = true;
1494 s = DB::Open(opts, dbname, &db);
1496 ASSERT_TRUE(db != NULL);
1501 // Does exist, and error_if_exists == true: error
1502 opts.create_if_missing = false;
1503 opts.error_if_exists = true;
1504 s = DB::Open(opts, dbname, &db);
1505 ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != NULL);
1506 ASSERT_TRUE(db == NULL);
1508 // Does exist, and error_if_exists == false: OK
1509 opts.create_if_missing = true;
1510 opts.error_if_exists = false;
1511 s = DB::Open(opts, dbname, &db);
1513 ASSERT_TRUE(db != NULL);
1519 TEST(DBTest, Locking) {
1521 Status s = DB::Open(CurrentOptions(), dbname_, &db2);
1522 ASSERT_TRUE(!s.ok()) << "Locking did not prevent re-opening db";
1525 // Check that number of files does not grow when we are out of space
1526 TEST(DBTest, NoSpace) {
1527 Options options = CurrentOptions();
1531 ASSERT_OK(Put("foo", "v1"));
1532 ASSERT_EQ("v1", Get("foo"));
1534 const int num_files = CountFiles();
1535 env_->no_space_.Release_Store(env_); // Force out-of-space errors
1536 for (int i = 0; i < 10; i++) {
1537 for (int level = 0; level < config::kNumLevels-1; level++) {
1538 dbfull()->TEST_CompactRange(level, NULL, NULL);
1541 env_->no_space_.Release_Store(NULL);
1542 ASSERT_LT(CountFiles(), num_files + 3);
1545 TEST(DBTest, NonWritableFileSystem) {
1546 Options options = CurrentOptions();
1547 options.write_buffer_size = 1000;
1550 ASSERT_OK(Put("foo", "v1"));
1551 env_->non_writable_.Release_Store(env_); // Force errors for new files
1552 std::string big(100000, 'x');
1554 for (int i = 0; i < 20; i++) {
1555 fprintf(stderr, "iter %d; errors %d\n", i, errors);
1556 if (!Put("foo", big).ok()) {
1558 DelayMilliseconds(100);
1561 ASSERT_GT(errors, 0);
1562 env_->non_writable_.Release_Store(NULL);
1565 TEST(DBTest, WriteSyncError) {
1566 // Check that log sync errors cause the DB to disallow future writes.
1568 // (a) Cause log sync calls to fail
1569 Options options = CurrentOptions();
1572 env_->data_sync_error_.Release_Store(env_);
1574 // (b) Normal write should succeed
1576 ASSERT_OK(db_->Put(w, "k1", "v1"));
1577 ASSERT_EQ("v1", Get("k1"));
1579 // (c) Do a sync write; should fail
1581 ASSERT_TRUE(!db_->Put(w, "k2", "v2").ok());
1582 ASSERT_EQ("v1", Get("k1"));
1583 ASSERT_EQ("NOT_FOUND", Get("k2"));
1585 // (d) make sync behave normally
1586 env_->data_sync_error_.Release_Store(NULL);
1588 // (e) Do a non-sync write; should fail
1590 ASSERT_TRUE(!db_->Put(w, "k3", "v3").ok());
1591 ASSERT_EQ("v1", Get("k1"));
1592 ASSERT_EQ("NOT_FOUND", Get("k2"));
1593 ASSERT_EQ("NOT_FOUND", Get("k3"));
1596 TEST(DBTest, ManifestWriteError) {
1597 // Test for the following problem:
1598 // (a) Compaction produces file F
1599 // (b) Log record containing F is written to MANIFEST file, but Sync() fails
1601 // (d) After reopening DB, reads fail since deleted F is named in log record
1603 // We iterate twice. In the second iteration, everything is the
1604 // same except the log record never makes it to the MANIFEST file.
1605 for (int iter = 0; iter < 2; iter++) {
1606 port::AtomicPointer* error_type = (iter == 0)
1607 ? &env_->manifest_sync_error_
1608 : &env_->manifest_write_error_;
1610 // Insert foo=>bar mapping
1611 Options options = CurrentOptions();
1613 options.create_if_missing = true;
1614 options.error_if_exists = false;
1615 DestroyAndReopen(&options);
1616 ASSERT_OK(Put("foo", "bar"));
1617 ASSERT_EQ("bar", Get("foo"));
1619 // Memtable compaction (will succeed)
1620 dbfull()->TEST_CompactMemTable();
1621 ASSERT_EQ("bar", Get("foo"));
1622 const int last = config::kMaxMemCompactLevel;
1623 ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo=>bar is now in last level
1625 // Merging compaction (will fail)
1626 error_type->Release_Store(env_);
1627 dbfull()->TEST_CompactRange(last, NULL, NULL); // Should fail
1628 ASSERT_EQ("bar", Get("foo"));
1630 // Recovery: should not lose data
1631 error_type->Release_Store(NULL);
1633 ASSERT_EQ("bar", Get("foo"));
1637 TEST(DBTest, MissingSSTFile) {
1638 ASSERT_OK(Put("foo", "bar"));
1639 ASSERT_EQ("bar", Get("foo"));
1641 // Dump the memtable to disk.
1642 dbfull()->TEST_CompactMemTable();
1643 ASSERT_EQ("bar", Get("foo"));
1646 ASSERT_TRUE(DeleteAnSSTFile());
1647 Options options = CurrentOptions();
1648 options.paranoid_checks = true;
1649 Status s = TryReopen(&options);
1650 ASSERT_TRUE(!s.ok());
1651 ASSERT_TRUE(s.ToString().find("issing") != std::string::npos)
1655 TEST(DBTest, StillReadSST) {
1656 ASSERT_OK(Put("foo", "bar"));
1657 ASSERT_EQ("bar", Get("foo"));
1659 // Dump the memtable to disk.
1660 dbfull()->TEST_CompactMemTable();
1661 ASSERT_EQ("bar", Get("foo"));
1663 ASSERT_GT(RenameLDBToSST(), 0);
1664 Options options = CurrentOptions();
1665 options.paranoid_checks = true;
1666 Status s = TryReopen(&options);
1667 ASSERT_TRUE(s.ok());
1668 ASSERT_EQ("bar", Get("foo"));
1671 TEST(DBTest, FilesDeletedAfterCompaction) {
1672 ASSERT_OK(Put("foo", "v2"));
1674 const int num_files = CountFiles();
1675 for (int i = 0; i < 10; i++) {
1676 ASSERT_OK(Put("foo", "v2"));
1679 ASSERT_EQ(CountFiles(), num_files);
1682 TEST(DBTest, BloomFilter) {
1683 env_->count_random_reads_ = true;
1684 Options options = CurrentOptions();
1686 options.block_cache = NewLRUCache(0); // Prevent cache hits
1687 options.filter_policy = NewBloomFilterPolicy(10);
1690 // Populate multiple layers
1691 const int N = 10000;
1692 for (int i = 0; i < N; i++) {
1693 ASSERT_OK(Put(Key(i), Key(i)));
1696 for (int i = 0; i < N; i += 100) {
1697 ASSERT_OK(Put(Key(i), Key(i)));
1699 dbfull()->TEST_CompactMemTable();
1701 // Prevent auto compactions triggered by seeks
1702 env_->delay_data_sync_.Release_Store(env_);
1704 // Lookup present keys. Should rarely read from small sstable.
1705 env_->random_read_counter_.Reset();
1706 for (int i = 0; i < N; i++) {
1707 ASSERT_EQ(Key(i), Get(Key(i)));
1709 int reads = env_->random_read_counter_.Read();
1710 fprintf(stderr, "%d present => %d reads\n", N, reads);
1711 ASSERT_GE(reads, N);
1712 ASSERT_LE(reads, N + 2*N/100);
1714 // Lookup present keys. Should rarely read from either sstable.
1715 env_->random_read_counter_.Reset();
1716 for (int i = 0; i < N; i++) {
1717 ASSERT_EQ("NOT_FOUND", Get(Key(i) + ".missing"));
1719 reads = env_->random_read_counter_.Read();
1720 fprintf(stderr, "%d missing => %d reads\n", N, reads);
1721 ASSERT_LE(reads, 3*N/100);
1723 env_->delay_data_sync_.Release_Store(NULL);
1725 delete options.block_cache;
1726 delete options.filter_policy;
1729 // Multi-threaded test:
1732 static const int kNumThreads = 4;
1733 static const int kTestSeconds = 10;
1734 static const int kNumKeys = 1000;
1738 port::AtomicPointer stop;
1739 port::AtomicPointer counter[kNumThreads];
1740 port::AtomicPointer thread_done[kNumThreads];
1748 static void MTThreadBody(void* arg) {
1749 MTThread* t = reinterpret_cast<MTThread*>(arg);
1751 DB* db = t->state->test->db_;
1752 uintptr_t counter = 0;
1753 fprintf(stderr, "... starting thread %d\n", id);
1754 Random rnd(1000 + id);
1757 while (t->state->stop.Acquire_Load() == NULL) {
1758 t->state->counter[id].Release_Store(reinterpret_cast<void*>(counter));
1760 int key = rnd.Uniform(kNumKeys);
1762 snprintf(keybuf, sizeof(keybuf), "%016d", key);
1765 // Write values of the form <key, my id, counter>.
1766 // We add some padding for force compactions.
1767 snprintf(valbuf, sizeof(valbuf), "%d.%d.%-1000d",
1768 key, id, static_cast<int>(counter));
1769 ASSERT_OK(db->Put(WriteOptions(), Slice(keybuf), Slice(valbuf)));
1771 // Read a value and verify that it matches the pattern written above.
1772 Status s = db->Get(ReadOptions(), Slice(keybuf), &value);
1773 if (s.IsNotFound()) {
1774 // Key has not yet been written
1776 // Check that the writer thread counter is >= the counter in the value
1779 ASSERT_EQ(3, sscanf(value.c_str(), "%d.%d.%d", &k, &w, &c)) << value;
1782 ASSERT_LT(w, kNumThreads);
1783 ASSERT_LE(static_cast<uintptr_t>(c), reinterpret_cast<uintptr_t>(
1784 t->state->counter[w].Acquire_Load()));
1789 t->state->thread_done[id].Release_Store(t);
1790 fprintf(stderr, "... stopping thread %d after %d ops\n", id, int(counter));
1795 TEST(DBTest, MultiThreaded) {
1800 mt.stop.Release_Store(0);
1801 for (int id = 0; id < kNumThreads; id++) {
1802 mt.counter[id].Release_Store(0);
1803 mt.thread_done[id].Release_Store(0);
1807 MTThread thread[kNumThreads];
1808 for (int id = 0; id < kNumThreads; id++) {
1809 thread[id].state = &mt;
1811 env_->StartThread(MTThreadBody, &thread[id]);
1814 // Let them run for a while
1815 DelayMilliseconds(kTestSeconds * 1000);
1817 // Stop the threads and wait for them to finish
1818 mt.stop.Release_Store(&mt);
1819 for (int id = 0; id < kNumThreads; id++) {
1820 while (mt.thread_done[id].Acquire_Load() == NULL) {
1821 DelayMilliseconds(100);
1824 } while (ChangeOptions());
1828 typedef std::map<std::string, std::string> KVMap;
1831 class ModelDB: public DB {
1833 class ModelSnapshot : public Snapshot {
1838 explicit ModelDB(const Options& options): options_(options) { }
1840 virtual Status Put(const WriteOptions& o, const Slice& k, const Slice& v) {
1841 return DB::Put(o, k, v);
1843 virtual Status Delete(const WriteOptions& o, const Slice& key) {
1844 return DB::Delete(o, key);
1846 virtual Status Get(const ReadOptions& options,
1847 const Slice& key, std::string* value) {
1848 assert(false); // Not implemented
1849 return Status::NotFound(key);
1851 virtual Iterator* NewIterator(const ReadOptions& options) {
1852 if (options.snapshot == NULL) {
1853 KVMap* saved = new KVMap;
1855 return new ModelIter(saved, true);
1857 const KVMap* snapshot_state =
1858 &(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_);
1859 return new ModelIter(snapshot_state, false);
1862 virtual const Snapshot* GetSnapshot() {
1863 ModelSnapshot* snapshot = new ModelSnapshot;
1864 snapshot->map_ = map_;
1868 virtual void ReleaseSnapshot(const Snapshot* snapshot) {
1869 delete reinterpret_cast<const ModelSnapshot*>(snapshot);
1871 virtual Status Write(const WriteOptions& options, WriteBatch* batch) {
1872 class Handler : public WriteBatch::Handler {
1875 virtual void Put(const Slice& key, const Slice& value) {
1876 (*map_)[key.ToString()] = value.ToString();
1878 virtual void Delete(const Slice& key) {
1879 map_->erase(key.ToString());
1883 handler.map_ = &map_;
1884 return batch->Iterate(&handler);
1887 virtual bool GetProperty(const Slice& property, std::string* value) {
1890 virtual void GetApproximateSizes(const Range* r, int n, uint64_t* sizes) {
1891 for (int i = 0; i < n; i++) {
1895 virtual void CompactRange(const Slice* start, const Slice* end) {
1899 class ModelIter: public Iterator {
1901 ModelIter(const KVMap* map, bool owned)
1902 : map_(map), owned_(owned), iter_(map_->end()) {
1905 if (owned_) delete map_;
1907 virtual bool Valid() const { return iter_ != map_->end(); }
1908 virtual void SeekToFirst() { iter_ = map_->begin(); }
1909 virtual void SeekToLast() {
1910 if (map_->empty()) {
1911 iter_ = map_->end();
1913 iter_ = map_->find(map_->rbegin()->first);
1916 virtual void Seek(const Slice& k) {
1917 iter_ = map_->lower_bound(k.ToString());
1919 virtual void Next() { ++iter_; }
1920 virtual void Prev() { --iter_; }
1921 virtual Slice key() const { return iter_->first; }
1922 virtual Slice value() const { return iter_->second; }
1923 virtual Status status() const { return Status::OK(); }
1925 const KVMap* const map_;
1926 const bool owned_; // Do we own map_
1927 KVMap::const_iterator iter_;
1929 const Options options_;
1933 static std::string RandomKey(Random* rnd) {
1934 int len = (rnd->OneIn(3)
1935 ? 1 // Short sometimes to encourage collisions
1936 : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10)));
1937 return test::RandomKey(rnd, len);
1940 static bool CompareIterators(int step,
1943 const Snapshot* model_snap,
1944 const Snapshot* db_snap) {
1945 ReadOptions options;
1946 options.snapshot = model_snap;
1947 Iterator* miter = model->NewIterator(options);
1948 options.snapshot = db_snap;
1949 Iterator* dbiter = db->NewIterator(options);
1952 for (miter->SeekToFirst(), dbiter->SeekToFirst();
1953 ok && miter->Valid() && dbiter->Valid();
1954 miter->Next(), dbiter->Next()) {
1956 if (miter->key().compare(dbiter->key()) != 0) {
1957 fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n",
1959 EscapeString(miter->key()).c_str(),
1960 EscapeString(dbiter->key()).c_str());
1965 if (miter->value().compare(dbiter->value()) != 0) {
1966 fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n",
1968 EscapeString(miter->key()).c_str(),
1969 EscapeString(miter->value()).c_str(),
1970 EscapeString(miter->value()).c_str());
1976 if (miter->Valid() != dbiter->Valid()) {
1977 fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n",
1978 step, miter->Valid(), dbiter->Valid());
1982 fprintf(stderr, "%d entries compared: ok=%d\n", count, ok);
1988 TEST(DBTest, Randomized) {
1989 Random rnd(test::RandomSeed());
1991 ModelDB model(CurrentOptions());
1992 const int N = 10000;
1993 const Snapshot* model_snap = NULL;
1994 const Snapshot* db_snap = NULL;
1996 for (int step = 0; step < N; step++) {
1997 if (step % 100 == 0) {
1998 fprintf(stderr, "Step %d of %d\n", step, N);
2000 // TODO(sanjay): Test Get() works
2001 int p = rnd.Uniform(100);
2002 if (p < 45) { // Put
2003 k = RandomKey(&rnd);
2004 v = RandomString(&rnd,
2006 ? 100 + rnd.Uniform(100)
2008 ASSERT_OK(model.Put(WriteOptions(), k, v));
2009 ASSERT_OK(db_->Put(WriteOptions(), k, v));
2011 } else if (p < 90) { // Delete
2012 k = RandomKey(&rnd);
2013 ASSERT_OK(model.Delete(WriteOptions(), k));
2014 ASSERT_OK(db_->Delete(WriteOptions(), k));
2017 } else { // Multi-element batch
2019 const int num = rnd.Uniform(8);
2020 for (int i = 0; i < num; i++) {
2021 if (i == 0 || !rnd.OneIn(10)) {
2022 k = RandomKey(&rnd);
2024 // Periodically re-use the same key from the previous iter, so
2025 // we have multiple entries in the write batch for the same key
2028 v = RandomString(&rnd, rnd.Uniform(10));
2034 ASSERT_OK(model.Write(WriteOptions(), &b));
2035 ASSERT_OK(db_->Write(WriteOptions(), &b));
2038 if ((step % 100) == 0) {
2039 ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL));
2040 ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
2041 // Save a snapshot from each DB this time that we'll use next
2042 // time we compare things, to make sure the current state is
2043 // preserved with the snapshot
2044 if (model_snap != NULL) model.ReleaseSnapshot(model_snap);
2045 if (db_snap != NULL) db_->ReleaseSnapshot(db_snap);
2048 ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL));
2050 model_snap = model.GetSnapshot();
2051 db_snap = db_->GetSnapshot();
2054 if (model_snap != NULL) model.ReleaseSnapshot(model_snap);
2055 if (db_snap != NULL) db_->ReleaseSnapshot(db_snap);
2056 } while (ChangeOptions());
2059 std::string MakeKey(unsigned int num) {
2061 snprintf(buf, sizeof(buf), "%016u", num);
2062 return std::string(buf);
2065 void BM_LogAndApply(int iters, int num_base_files) {
2066 std::string dbname = test::TmpDir() + "/leveldb_test_benchmark";
2067 DestroyDB(dbname, Options());
2071 opts.create_if_missing = true;
2072 Status s = DB::Open(opts, dbname, &db);
2074 ASSERT_TRUE(db != NULL);
2079 Env* env = Env::Default();
2084 InternalKeyComparator cmp(BytewiseComparator());
2086 VersionSet vset(dbname, &options, NULL, &cmp);
2087 ASSERT_OK(vset.Recover());
2090 for (int i = 0; i < num_base_files; i++) {
2091 InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
2092 InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
2093 vbase.AddFile(2, fnum++, 1 /* file size */, start, limit);
2095 ASSERT_OK(vset.LogAndApply(&vbase, &mu));
2097 uint64_t start_micros = env->NowMicros();
2099 for (int i = 0; i < iters; i++) {
2101 vedit.DeleteFile(2, fnum);
2102 InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
2103 InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
2104 vedit.AddFile(2, fnum++, 1 /* file size */, start, limit);
2105 vset.LogAndApply(&vedit, &mu);
2107 uint64_t stop_micros = env->NowMicros();
2108 unsigned int us = stop_micros - start_micros;
2110 snprintf(buf, sizeof(buf), "%d", num_base_files);
2112 "BM_LogAndApply/%-6s %8d iters : %9u us (%7.0f us / iter)\n",
2113 buf, iters, us, ((float)us) / iters);
2116 } // namespace leveldb
2118 int main(int argc, char** argv) {
2119 if (argc > 1 && std::string(argv[1]) == "--benchmark") {
2120 leveldb::BM_LogAndApply(1000, 1);
2121 leveldb::BM_LogAndApply(1000, 100);
2122 leveldb::BM_LogAndApply(1000, 10000);
2123 leveldb::BM_LogAndApply(100, 100000);
2127 return leveldb::test::RunAllTests();