level_handler.go - external/github.com/dgraph-io/badger - Git at Google

 /*
  * Copyright 2017 Dgraph Labs, Inc. and Contributors
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package badger

 import (
 	"fmt"
 	"sort"
 	"sync"

 	"github.com/dgryski/go-farm"

 	"github.com/dgraph-io/badger/v2/table"
 	"github.com/dgraph-io/badger/v2/y"
 	"github.com/pkg/errors"
 )

 type levelHandler struct {
 	// Guards tables, totalSize.
 	sync.RWMutex

 	// For level >= 1, tables are sorted by key ranges, which do not overlap.
 	// For level 0, tables are sorted by time.
 	// For level 0, newest table are at the back. Compact the oldest one first, which is at the front.
 	tables    []*table.Table
 	totalSize int64

 	// The following are initialized once and const.
 	level        int
 	strLevel     string
 	maxTotalSize int64
 	db           *DB
 }

 func (s *levelHandler) getTotalSize() int64 {
 	s.RLock()
 	defer s.RUnlock()
 	return s.totalSize
 }

 // initTables replaces s.tables with given tables. This is done during loading.
 func (s *levelHandler) initTables(tables []*table.Table) {
 	s.Lock()
 	defer s.Unlock()

 	s.tables = tables
 	s.totalSize = 0
 	for _, t := range tables {
 		s.totalSize += t.Size()
 	}

 	if s.level == 0 {
 		// Key range will overlap. Just sort by fileID in ascending order
 		// because newer tables are at the end of level 0.
 		sort.Slice(s.tables, func(i, j int) bool {
 			return s.tables[i].ID() < s.tables[j].ID()
 		})
 	} else {
 		// Sort tables by keys.
 		sort.Slice(s.tables, func(i, j int) bool {
 			return y.CompareKeys(s.tables[i].Smallest(), s.tables[j].Smallest()) < 0
 		})
 	}
 }

 // deleteTables remove tables idx0, ..., idx1-1.
 func (s *levelHandler) deleteTables(toDel []*table.Table) error {
 	s.Lock() // s.Unlock() below

 	toDelMap := make(map[uint64]struct{})
 	for _, t := range toDel {
 		toDelMap[t.ID()] = struct{}{}
 	}

 	// Make a copy as iterators might be keeping a slice of tables.
 	var newTables []*table.Table
 	for _, t := range s.tables {
 		_, found := toDelMap[t.ID()]
 		if !found {
 			newTables = append(newTables, t)
 			continue
 		}
 		s.totalSize -= t.Size()
 	}
 	s.tables = newTables

 	s.Unlock() // Unlock s _before_ we DecrRef our tables, which can be slow.

 	return decrRefs(toDel)
 }

 // replaceTables will replace tables[left:right] with newTables. Note this EXCLUDES tables[right].
 // You must call decr() to delete the old tables _after_ writing the update to the manifest.
 func (s *levelHandler) replaceTables(toDel, toAdd []*table.Table) error {
 	// Need to re-search the range of tables in this level to be replaced as other goroutines might
 	// be changing it as well.  (They can't touch our tables, but if they add/remove other tables,
 	// the indices get shifted around.)
 	s.Lock() // We s.Unlock() below.

 	toDelMap := make(map[uint64]struct{})
 	for _, t := range toDel {
 		toDelMap[t.ID()] = struct{}{}
 	}
 	var newTables []*table.Table
 	for _, t := range s.tables {
 		_, found := toDelMap[t.ID()]
 		if !found {
 			newTables = append(newTables, t)
 			continue
 		}
 		s.totalSize -= t.Size()
 	}

 	// Increase totalSize first.
 	for _, t := range toAdd {
 		s.totalSize += t.Size()
 		t.IncrRef()
 		newTables = append(newTables, t)
 	}

 	// Assign tables.
 	s.tables = newTables
 	sort.Slice(s.tables, func(i, j int) bool {
 		return y.CompareKeys(s.tables[i].Smallest(), s.tables[j].Smallest()) < 0
 	})
 	s.Unlock() // s.Unlock before we DecrRef tables -- that can be slow.
 	return decrRefs(toDel)
 }

 // addTable adds toAdd table to levelHandler. Normally when we add tables to levelHandler, we sort
 // tables based on table.Smallest. This is required for correctness of the system. But in case of
 // stream writer this can be avoided. We can just add tables to levelHandler's table list
 // and after all addTable calls, we can sort table list(check sortTable method).
 // NOTE: levelHandler.sortTables() should be called after call addTable calls are done.
 func (s *levelHandler) addTable(t *table.Table) {
 	s.Lock()
 	defer s.Unlock()

 	s.totalSize += t.Size() // Increase totalSize first.
 	t.IncrRef()
 	s.tables = append(s.tables, t)
 }

 // sortTables sorts tables of levelHandler based on table.Smallest.
 // Normally it should be called after all addTable calls.
 func (s *levelHandler) sortTables() {
 	s.RLock()
 	defer s.RUnlock()

 	sort.Slice(s.tables, func(i, j int) bool {
 		return y.CompareKeys(s.tables[i].Smallest(), s.tables[j].Smallest()) < 0
 	})
 }

 func decrRefs(tables []*table.Table) error {
 	for _, table := range tables {
 		if err := table.DecrRef(); err != nil {
 			return err
 		}
 	}
 	return nil
 }

 func newLevelHandler(db *DB, level int) *levelHandler {
 	return &levelHandler{
 		level:    level,
 		strLevel: fmt.Sprintf("l%d", level),
 		db:       db,
 	}
 }

 // tryAddLevel0Table returns true if ok and no stalling.
 func (s *levelHandler) tryAddLevel0Table(t *table.Table) bool {
 	y.AssertTrue(s.level == 0)
 	// Need lock as we may be deleting the first table during a level 0 compaction.
 	s.Lock()
 	defer s.Unlock()
 	// Return false only if L0 is in memory and number of tables is more than number of
 	// ZeroTableStall. For on disk L0, we should just add the tables to the level.
 	if s.db.opt.KeepL0InMemory && len(s.tables) >= s.db.opt.NumLevelZeroTablesStall {
 		return false
 	}

 	s.tables = append(s.tables, t)
 	t.IncrRef()
 	s.totalSize += t.Size()

 	return true
 }

 func (s *levelHandler) numTables() int {
 	s.RLock()
 	defer s.RUnlock()
 	return len(s.tables)
 }

 func (s *levelHandler) close() error {
 	s.RLock()
 	defer s.RUnlock()
 	var err error
 	for _, t := range s.tables {
 		if closeErr := t.Close(); closeErr != nil && err == nil {
 			err = closeErr
 		}
 	}
 	return errors.Wrap(err, "levelHandler.close")
 }

 // getTableForKey acquires a read-lock to access s.tables. It returns a list of tableHandlers.
 func (s *levelHandler) getTableForKey(key []byte) ([]*table.Table, func() error) {
 	s.RLock()
 	defer s.RUnlock()

 	if s.level == 0 {
 		// For level 0, we need to check every table. Remember to make a copy as s.tables may change
 		// once we exit this function, and we don't want to lock s.tables while seeking in tables.
 		// CAUTION: Reverse the tables.
 		out := make([]*table.Table, 0, len(s.tables))
 		for i := len(s.tables) - 1; i >= 0; i-- {
 			out = append(out, s.tables[i])
 			s.tables[i].IncrRef()
 		}
 		return out, func() error {
 			for _, t := range out {
 				if err := t.DecrRef(); err != nil {
 					return err
 				}
 			}
 			return nil
 		}
 	}
 	// For level >= 1, we can do a binary search as key range does not overlap.
 	idx := sort.Search(len(s.tables), func(i int) bool {
 		return y.CompareKeys(s.tables[i].Biggest(), key) >= 0
 	})
 	if idx >= len(s.tables) {
 		// Given key is strictly > than every element we have.
 		return nil, func() error { return nil }
 	}
 	tbl := s.tables[idx]
 	tbl.IncrRef()
 	return []*table.Table{tbl}, tbl.DecrRef
 }

 // get returns value for a given key or the key after that. If not found, return nil.
 func (s *levelHandler) get(key []byte) (y.ValueStruct, error) {
 	tables, decr := s.getTableForKey(key)
 	keyNoTs := y.ParseKey(key)

 	hash := farm.Fingerprint64(keyNoTs)
 	var maxVs y.ValueStruct
 	for _, th := range tables {
 		if th.DoesNotHave(hash) {
 			y.NumLSMBloomHits.Add(s.strLevel, 1)
 			continue
 		}

 		it := th.NewIterator(false)
 		defer it.Close()

 		y.NumLSMGets.Add(s.strLevel, 1)
 		it.Seek(key)
 		if !it.Valid() {
 			continue
 		}
 		if y.SameKey(key, it.Key()) {
 			if version := y.ParseTs(it.Key()); maxVs.Version < version {
 				maxVs = it.ValueCopy()
 				maxVs.Version = version
 			}
 		}
 	}
 	return maxVs, decr()
 }

 // appendIterators appends iterators to an array of iterators, for merging.
 // Note: This obtains references for the table handlers. Remember to close these iterators.
 func (s *levelHandler) appendIterators(iters []y.Iterator, opt *IteratorOptions) []y.Iterator {
 	s.RLock()
 	defer s.RUnlock()

 	if s.level == 0 {
 		// Remember to add in reverse order!
 		// The newer table at the end of s.tables should be added first as it takes precedence.
 		// Level 0 tables are not in key sorted order, so we need to consider them one by one.
 		var out []*table.Table
 		for _, t := range s.tables {
 			if opt.pickTable(t) {
 				out = append(out, t)
 			}
 		}
 		return appendIteratorsReversed(iters, out, opt.Reverse)
 	}

 	tables := opt.pickTables(s.tables)
 	if len(tables) == 0 {
 		return iters
 	}
 	return append(iters, table.NewConcatIterator(tables, opt.Reverse))
 }

 type levelHandlerRLocked struct{}

 // overlappingTables returns the tables that intersect with key range. Returns a half-interval.
 // This function should already have acquired a read lock, and this is so important the caller must
 // pass an empty parameter declaring such.
 func (s *levelHandler) overlappingTables(_ levelHandlerRLocked, kr keyRange) (int, int) {
 	if len(kr.left) == 0 || len(kr.right) == 0 {
 		return 0, 0
 	}
 	left := sort.Search(len(s.tables), func(i int) bool {
 		return y.CompareKeys(kr.left, s.tables[i].Biggest()) <= 0
 	})
 	right := sort.Search(len(s.tables), func(i int) bool {
 		return y.CompareKeys(kr.right, s.tables[i].Smallest()) < 0
 	})
 	return left, right
 }
	/*
	* Copyright 2017 Dgraph Labs, Inc. and Contributors
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package badger

	import (
	"fmt"
	"sort"
	"sync"

	"github.com/dgryski/go-farm"

	"github.com/dgraph-io/badger/v2/table"
	"github.com/dgraph-io/badger/v2/y"
	"github.com/pkg/errors"
	)

	type levelHandler struct {
	// Guards tables, totalSize.
	sync.RWMutex

	// For level >= 1, tables are sorted by key ranges, which do not overlap.
	// For level 0, tables are sorted by time.
	// For level 0, newest table are at the back. Compact the oldest one first, which is at the front.
	tables []*table.Table
	totalSize int64

	// The following are initialized once and const.
	level int
	strLevel string
	maxTotalSize int64
	db *DB
	}

	func (s *levelHandler) getTotalSize() int64 {
	s.RLock()
	defer s.RUnlock()
	return s.totalSize
	}

	// initTables replaces s.tables with given tables. This is done during loading.
	func (s levelHandler) initTables(tables []table.Table) {
	s.Lock()
	defer s.Unlock()

	s.tables = tables
	s.totalSize = 0
	for _, t := range tables {
	s.totalSize += t.Size()
	}

	if s.level == 0 {
	// Key range will overlap. Just sort by fileID in ascending order
	// because newer tables are at the end of level 0.
	sort.Slice(s.tables, func(i, j int) bool {
	return s.tables[i].ID() < s.tables[j].ID()
	})
	} else {
	// Sort tables by keys.
	sort.Slice(s.tables, func(i, j int) bool {
	return y.CompareKeys(s.tables[i].Smallest(), s.tables[j].Smallest()) < 0
	})
	}
	}

	// deleteTables remove tables idx0, ..., idx1-1.
	func (s levelHandler) deleteTables(toDel []table.Table) error {
	s.Lock() // s.Unlock() below

	toDelMap := make(map[uint64]struct{})
	for _, t := range toDel {
	toDelMap[t.ID()] = struct{}{}
	}

	// Make a copy as iterators might be keeping a slice of tables.
	var newTables []*table.Table
	for _, t := range s.tables {
	_, found := toDelMap[t.ID()]
	if !found {
	newTables = append(newTables, t)
	continue
	}
	s.totalSize -= t.Size()
	}
	s.tables = newTables

	s.Unlock() // Unlock s _before_ we DecrRef our tables, which can be slow.

	return decrRefs(toDel)
	}

	// replaceTables will replace tables[left:right] with newTables. Note this EXCLUDES tables[right].
	// You must call decr() to delete the old tables _after_ writing the update to the manifest.
	func (s levelHandler) replaceTables(toDel, toAdd []table.Table) error {
	// Need to re-search the range of tables in this level to be replaced as other goroutines might
	// be changing it as well. (They can't touch our tables, but if they add/remove other tables,
	// the indices get shifted around.)
	s.Lock() // We s.Unlock() below.

	toDelMap := make(map[uint64]struct{})
	for _, t := range toDel {
	toDelMap[t.ID()] = struct{}{}
	}
	var newTables []*table.Table
	for _, t := range s.tables {
	_, found := toDelMap[t.ID()]
	if !found {
	newTables = append(newTables, t)
	continue
	}
	s.totalSize -= t.Size()
	}

	// Increase totalSize first.
	for _, t := range toAdd {
	s.totalSize += t.Size()
	t.IncrRef()
	newTables = append(newTables, t)
	}

	// Assign tables.
	s.tables = newTables
	sort.Slice(s.tables, func(i, j int) bool {
	return y.CompareKeys(s.tables[i].Smallest(), s.tables[j].Smallest()) < 0
	})
	s.Unlock() // s.Unlock before we DecrRef tables -- that can be slow.
	return decrRefs(toDel)
	}

	// addTable adds toAdd table to levelHandler. Normally when we add tables to levelHandler, we sort
	// tables based on table.Smallest. This is required for correctness of the system. But in case of
	// stream writer this can be avoided. We can just add tables to levelHandler's table list
	// and after all addTable calls, we can sort table list(check sortTable method).
	// NOTE: levelHandler.sortTables() should be called after call addTable calls are done.
	func (s levelHandler) addTable(t table.Table) {
	s.Lock()
	defer s.Unlock()

	s.totalSize += t.Size() // Increase totalSize first.
	t.IncrRef()
	s.tables = append(s.tables, t)
	}

	// sortTables sorts tables of levelHandler based on table.Smallest.
	// Normally it should be called after all addTable calls.
	func (s *levelHandler) sortTables() {
	s.RLock()
	defer s.RUnlock()

	sort.Slice(s.tables, func(i, j int) bool {
	return y.CompareKeys(s.tables[i].Smallest(), s.tables[j].Smallest()) < 0
	})
	}

	func decrRefs(tables []*table.Table) error {
	for _, table := range tables {
	if err := table.DecrRef(); err != nil {
	return err
	}
	}
	return nil
	}

	func newLevelHandler(db DB, level int) levelHandler {
	return &levelHandler{
	level: level,
	strLevel: fmt.Sprintf("l%d", level),
	db: db,
	}
	}

	// tryAddLevel0Table returns true if ok and no stalling.
	func (s levelHandler) tryAddLevel0Table(t table.Table) bool {
	y.AssertTrue(s.level == 0)
	// Need lock as we may be deleting the first table during a level 0 compaction.
	s.Lock()
	defer s.Unlock()
	// Return false only if L0 is in memory and number of tables is more than number of
	// ZeroTableStall. For on disk L0, we should just add the tables to the level.
	if s.db.opt.KeepL0InMemory && len(s.tables) >= s.db.opt.NumLevelZeroTablesStall {
	return false
	}

	s.tables = append(s.tables, t)
	t.IncrRef()
	s.totalSize += t.Size()

	return true
	}

	func (s *levelHandler) numTables() int {
	s.RLock()
	defer s.RUnlock()
	return len(s.tables)
	}

	func (s *levelHandler) close() error {
	s.RLock()
	defer s.RUnlock()
	var err error
	for _, t := range s.tables {
	if closeErr := t.Close(); closeErr != nil && err == nil {
	err = closeErr
	}
	}
	return errors.Wrap(err, "levelHandler.close")
	}

	// getTableForKey acquires a read-lock to access s.tables. It returns a list of tableHandlers.
	func (s levelHandler) getTableForKey(key []byte) ([]table.Table, func() error) {
	s.RLock()
	defer s.RUnlock()

	if s.level == 0 {
	// For level 0, we need to check every table. Remember to make a copy as s.tables may change
	// once we exit this function, and we don't want to lock s.tables while seeking in tables.
	// CAUTION: Reverse the tables.
	out := make([]*table.Table, 0, len(s.tables))
	for i := len(s.tables) - 1; i >= 0; i-- {
	out = append(out, s.tables[i])
	s.tables[i].IncrRef()
	}
	return out, func() error {
	for _, t := range out {
	if err := t.DecrRef(); err != nil {
	return err
	}
	}
	return nil
	}
	}
	// For level >= 1, we can do a binary search as key range does not overlap.
	idx := sort.Search(len(s.tables), func(i int) bool {
	return y.CompareKeys(s.tables[i].Biggest(), key) >= 0
	})
	if idx >= len(s.tables) {
	// Given key is strictly > than every element we have.
	return nil, func() error { return nil }
	}
	tbl := s.tables[idx]
	tbl.IncrRef()
	return []*table.Table{tbl}, tbl.DecrRef
	}

	// get returns value for a given key or the key after that. If not found, return nil.
	func (s *levelHandler) get(key []byte) (y.ValueStruct, error) {
	tables, decr := s.getTableForKey(key)
	keyNoTs := y.ParseKey(key)

	hash := farm.Fingerprint64(keyNoTs)
	var maxVs y.ValueStruct
	for _, th := range tables {
	if th.DoesNotHave(hash) {
	y.NumLSMBloomHits.Add(s.strLevel, 1)
	continue
	}

	it := th.NewIterator(false)
	defer it.Close()

	y.NumLSMGets.Add(s.strLevel, 1)
	it.Seek(key)
	if !it.Valid() {
	continue
	}
	if y.SameKey(key, it.Key()) {
	if version := y.ParseTs(it.Key()); maxVs.Version < version {
	maxVs = it.ValueCopy()
	maxVs.Version = version
	}
	}
	}
	return maxVs, decr()
	}

	// appendIterators appends iterators to an array of iterators, for merging.
	// Note: This obtains references for the table handlers. Remember to close these iterators.
	func (s levelHandler) appendIterators(iters []y.Iterator, opt IteratorOptions) []y.Iterator {
	s.RLock()
	defer s.RUnlock()

	if s.level == 0 {
	// Remember to add in reverse order!
	// The newer table at the end of s.tables should be added first as it takes precedence.
	// Level 0 tables are not in key sorted order, so we need to consider them one by one.
	var out []*table.Table
	for _, t := range s.tables {
	if opt.pickTable(t) {
	out = append(out, t)
	}
	}
	return appendIteratorsReversed(iters, out, opt.Reverse)
	}

	tables := opt.pickTables(s.tables)
	if len(tables) == 0 {
	return iters
	}
	return append(iters, table.NewConcatIterator(tables, opt.Reverse))
	}

	type levelHandlerRLocked struct{}

	// overlappingTables returns the tables that intersect with key range. Returns a half-interval.
	// This function should already have acquired a read lock, and this is so important the caller must
	// pass an empty parameter declaring such.
	func (s *levelHandler) overlappingTables(_ levelHandlerRLocked, kr keyRange) (int, int) {
	if len(kr.left) == 0 \|\| len(kr.right) == 0 {
	return 0, 0
	}
	left := sort.Search(len(s.tables), func(i int) bool {
	return y.CompareKeys(kr.left, s.tables[i].Biggest()) <= 0
	})
	right := sort.Search(len(s.tables), func(i int) bool {
	return y.CompareKeys(kr.right, s.tables[i].Smallest()) < 0
	})
	return left, right
	}