第三节 Open()实现分析

Open()方法主要用来创建一个boltdb的DB对象,底层会执行新建或者打开存储数据的文件,当指定的文件不存在时, boltdb就会新建一个数据文件。否则的话,就直接加载指定的数据库文件内容。

值的注意是,boltdb会根据Open时,options传递的参数来判断到底加互斥锁还是共享锁。

新建时: 会调用init()方法,内部主要是新建一个文件,然后第0页、第1页写入元数据信息;第2页写入freelist信息;第3页写入bucket leaf信息。并最终刷盘。

加载时: 会读取第0页内容,也就是元信息。然后对其进行校验和校验,当校验通过后获取pageSize。否则的话,读取操作系统默认的pagesize(一般4k)

上述操作完成后,会通过mmap来映射数据。最后再根据磁盘页中的freelist数据初始化db的freelist字段。

// Open creates and opens a database at the given path.
// If the file does not exist then it will be created automatically.
// Passing in nil options will cause Bolt to open the database with the default options.
func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
    var db = &DB{opened: true}

    // Set default options if no options are provided.
    if options == nil {
        options = DefaultOptions
    }
    db.NoGrowSync = options.NoGrowSync
    db.MmapFlags = options.MmapFlags

    // Set default values for later DB operations.
    db.MaxBatchSize = DefaultMaxBatchSize
    db.MaxBatchDelay = DefaultMaxBatchDelay
    db.AllocSize = DefaultAllocSize

    flag := os.O_RDWR
    if options.ReadOnly {
        flag = os.O_RDONLY
        db.readOnly = true
    }

    // Open data file and separate sync handler for metadata writes.
    db.path = path
    var err error
    // 打开db文件
    if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
        _ = db.close()
        return nil, err
    }

    // Lock file so that other processes using Bolt in read-write mode cannot
    // use the database  at the same time. This would cause corruption since
    // the two processes would write meta pages and free pages separately.
    // The database file is locked exclusively (only one process can grab the lock)
    // if !options.ReadOnly.
    // The database file is locked using the shared lock (more than one process may
    // hold a lock at the same time) otherwise (options.ReadOnly is set).

    // 只读加共享锁、否则加互斥锁
    if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil {
        _ = db.close()
        return nil, err
    }

    // Default values for test hooks
    db.ops.writeAt = db.file.WriteAt

    // Initialize the database if it doesn't exist.
    if info, err := db.file.Stat(); err != nil {
        return nil, err
    } else if info.Size() == 0 {
        // Initialize new files with meta pages.
        // 初始化新db文件
        if err := db.init(); err != nil {
            return nil, err
        }
    } else {
        // 不是新文件,读取第一页元数据
        // Read the first meta page to determine the page size.
        // 2^12,正好是4k
        var buf [0x1000]byte
        if _, err := db.file.ReadAt(buf[:], 0); err == nil {
            // 仅仅是读取了pageSize
            m := db.pageInBuffer(buf[:], 0).meta()
            if err := m.validate(); err != nil {
                // If we can't read the page size, we can assume it's the same
                // as the OS -- since that's how the page size was chosen in the
                // first place.
                //
                // If the first page is invalid and this OS uses a different
                // page size than what the database was created with then we
                // are out of luck and cannot access the database.
                db.pageSize = os.Getpagesize()
            } else {
                db.pageSize = int(m.pageSize)
            }
        }
    }

    // Initialize page pool.
    db.pagePool = sync.Pool{
        New: func() interface{} {
            // 4k
            return make([]byte, db.pageSize)
        },
    }

    // Memory map the data file.
    // mmap映射db文件数据到内存
    if err := db.mmap(options.InitialMmapSize); err != nil {
        _ = db.close()
        return nil, err
    }

    // Read in the freelist.
    db.freelist = newFreelist()
    // db.meta().freelist=2
    // 读第二页的数据
    // 然后建立起freelist中
    db.freelist.read(db.page(db.meta().freelist))

    // Mark the database as opened and return.
    return db, nil
}


// init creates a new database file and initializes its meta pages.
func (db *DB) init() error {
    // Set the page size to the OS page size.
    db.pageSize = os.Getpagesize()

    // Create two meta pages on a buffer.
    buf := make([]byte, db.pageSize*4)
    for i := 0; i < 2; i++ {
        p := db.pageInBuffer(buf[:], pgid(i))
        p.id = pgid(i)
        // 第0页和第1页存放元数据
        p.flags = metaPageFlag

        // Initialize the meta page.
        m := p.meta()
        m.magic = magic
        m.version = version
        m.pageSize = uint32(db.pageSize)
        m.freelist = 2
        m.root = bucket{root: 3}
        m.pgid = 4
        m.txid = txid(i)
        m.checksum = m.sum64()
    }

    // Write an empty freelist at page 3.
    // 拿到第2页存放freelist
    p := db.pageInBuffer(buf[:], pgid(2))
    p.id = pgid(2)
    p.flags = freelistPageFlag
    p.count = 0

    // 第三块存放叶子page
    // Write an empty leaf page at page 4.
    p = db.pageInBuffer(buf[:], pgid(3))
    p.id = pgid(3)
    p.flags = leafPageFlag
    p.count = 0

    // Write the buffer to our data file.
    // 写入4页的数据
    if _, err := db.ops.writeAt(buf, 0); err != nil {
        return err
    }
    // 刷盘
    if err := fdatasync(db); err != nil {
        return err
    }

    return nil
}

// page retrieves a page reference from the mmap based on the current page size.
func (db *DB) page(id pgid) *page {
    pos := id * pgid(db.pageSize)
    return (*page)(unsafe.Pointer(&db.data[pos]))
}

// pageInBuffer retrieves a page reference from a given byte array based on the current page size.
func (db *DB) pageInBuffer(b []byte, id pgid) *page {
    return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)]))
}

// mmap opens the underlying memory-mapped file and initializes the meta references.
// minsz is the minimum size that the new mmap can be.
func (db *DB) mmap(minsz int) error {
    db.mmaplock.Lock()
    defer db.mmaplock.Unlock()

    info, err := db.file.Stat()
    if err != nil {
        return fmt.Errorf("mmap stat error: %s", err)
    } else if int(info.Size()) < db.pageSize*2 {
        return fmt.Errorf("file size too small")
    }

    // Ensure the size is at least the minimum size.
    var size = int(info.Size())
    if size < minsz {
        size = minsz
    }
    size, err = db.mmapSize(size)
    if err != nil {
        return err
    }

    // Dereference all mmap references before unmapping.
    if db.rwtx != nil {
        db.rwtx.root.dereference()
    }

    // Unmap existing data before continuing.
    if err := db.munmap(); err != nil {
        return err
    }

    // Memory-map the data file as a byte slice.
    if err := mmap(db, size); err != nil {
        return err
    }

    // Save references to the meta pages.
    // 获取元数据信息
    db.meta0 = db.page(0).meta()
    db.meta1 = db.page(1).meta()

    // Validate the meta pages. We only return an error if both meta pages fail
    // validation, since meta0 failing validation means that it wasn't saved
    // properly -- but we can recover using meta1. And vice-versa.
    err0 := db.meta0.validate()
    err1 := db.meta1.validate()
    if err0 != nil && err1 != nil {
        return err0
    }

    return nil
}

// mmapSize determines the appropriate size for the mmap given the current size
// of the database. The minimum size is 32KB and doubles until it reaches 1GB.
// Returns an error if the new mmap size is greater than the max allowed.
func (db *DB) mmapSize(size int) (int, error) {
    // Double the size from 32KB until 1GB.
    for i := uint(15); i <= 30; i++ {
        if size <= 1<<i {
            return 1 << i, nil
        }
    }

    // Verify the requested size is not above the maximum allowed.
    if size > maxMapSize {
        return 0, fmt.Errorf("mmap too large")
    }

    // If larger than 1GB then grow by 1GB at a time.
    sz := int64(size)
    if remainder := sz % int64(maxMmapStep); remainder > 0 {
        sz += int64(maxMmapStep) - remainder
    }

    // Ensure that the mmap size is a multiple of the page size.
    // This should always be true since we're incrementing in MBs.
    pageSize := int64(db.pageSize)
    if (sz % pageSize) != 0 {
        sz = ((sz / pageSize) + 1) * pageSize
    }

    // If we've exceeded the max size then only grow up to the max size.
    if sz > maxMapSize {
        sz = maxMapSize
    }

    return int(sz), nil
}

results matching ""

    No results matching ""