Source code for zlmdb._lmdb_vendor.tool

#
# Copyright 2013 The py-lmdb authors, all rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted only as authorized by the OpenLDAP
# Public License.
#
# A copy of this license is available in the file LICENSE in the
# top-level directory of the distribution or, alternatively, at
# <http://www.OpenLDAP.org/license.html>.
#
# OpenLDAP is a registered trademark of the OpenLDAP Foundation.
#
# Individual files and/or contributed packages may be copyright by
# other parties and/or subject to additional restrictions.
#
# This work also contains materials derived from public sources.
#
# Additional information about OpenLDAP can be obtained at
# <http://www.openldap.org/>.
#

"""
Basic tools for working with LMDB.

    copy: Consistent high speed backup an environment.
        %prog copy -e source.lmdb target.lmdb

    copyfd: Consistent high speed backup an environment to stdout.
        %prog copyfd -e source.lmdb > target.lmdb/data.mdb

    drop: Delete one or more sub-databases.
        %prog drop db1

    dump: Dump one or more databases to disk in 'cdbmake' format.
        Usage: dump [db1=file1.cdbmake db2=file2.cdbmake]

        If no databases are given, dumps the main database to 'main.cdbmake'.

    edit: Add/delete/replace values from a database.
        %prog edit --set key=value --set-file key=/path \\
                   --add key=value --add-file key=/path/to/file \\
                   --delete key

    get: Read one or more values from a database.
        %prog get [<key1> [<keyN> [..]]]

    readers: Display readers in the lock table
        %prog readers -e /path/to/db [-c]

        If -c is specified, clear stale readers.

    restore: Read one or more database from disk in 'cdbmake' format.
        %prog restore db1=file1.cdbmake db2=file2.cdbmake

        The special db name ":main:" may be used to indicate the main DB.

    rewrite: Re-create an environment using MDB_APPEND
        %prog rewrite -e src.lmdb -E dst.lmdb [<db1> [<dbN> ..]]

        If no databases are given, rewrites only the main database.

    shell: Open interactive console with ENV set to the open environment.

    stat: Print environment statistics.

    warm: Read environment into page cache sequentially.

    watch: Show live environment statistics
"""

import array
import collections
import csv
import functools
import optparse
import os
import pprint
import signal
import string
import struct
import sys
import time
from io import BytesIO as StringIO
from typing import NoReturn

# zlmdb adaptation: vendored as zlmdb._lmdb_vendor (CFFI-only fork of py-lmdb).
import zlmdb._lmdb_vendor as lmdb


[docs] BUF_SIZE = 10485760
[docs] ENV: 'lmdb.Environment | None' = None
[docs] DB = None
# How strings get encoded to and decoded from DB
[docs] ENCODING = 'utf-8'
[docs] def _to_bytes(s): """Given a str, return a bytes instance.""" return str(s).encode(ENCODING)
[docs] def isprint(c): """Return ``True`` if the character `c` can be printed visibly and without adversely affecting printing position (e.g. newline).""" return c in string.printable and ord(c) > 16
[docs] def xxd(s): """Return a vaguely /usr/bin/xxd formatted representation of the bytestring `s`.""" s = bytes(s) sio = StringIO() pr = _to_bytes('') idx = -1 for idx, ch in enumerate(s): ch = chr(ch) if not (idx % 16): if idx: sio.write(_to_bytes(' ')) sio.write(pr) sio.write(_to_bytes('\n')) sio.write(_to_bytes('%07x:' % idx)) pr = _to_bytes('') if not (idx % 2): sio.write(_to_bytes(' ')) sio.write(_to_bytes('%02x' % (ord(ch),))) pr += _to_bytes(ch) if isprint(ch) else _to_bytes('.') if idx >= 0 and idx % 16: need = 15 - (idx % 16) # fill remainder of last line. sio.write(_to_bytes(' ') * need) sio.write(_to_bytes(' ') * (need // 2)) sio.write(_to_bytes(' ')) sio.write(pr) sio.write(_to_bytes('\n')) return sio.getvalue().decode(ENCODING)
[docs] def make_parser(): parser = optparse.OptionParser() parser.prog = 'python -mlmdb' parser.usage = '%prog [options] <command>\n' + (__doc__ or '').rstrip() parser.add_option('-e', '--env', help='Environment file to open') parser.add_option('-d', '--db', help='Database to open (default: main)') parser.add_option('-r', '--read', help='Open environment read-only') parser.add_option('-S', '--map_size', type='int', default='10', help='Map size in megabytes (default: 10)') parser.add_option('-s', '--use-single-file', action='store_true', help='The database was created as a single file and not a subdirectory') # FIXME: implement --all # parser.add_option('-a', '--all', action='store_true', # help='Make "dump" dump all databases') parser.add_option('-E', '--target_env', help='Target environment file for "dumpfd"') parser.add_option('-x', '--xxd', action='store_true', help='Print values in xxd format') parser.add_option('-M', '--max-dbs', type='int', default=128, help='Maximum open DBs (default: 128)') parser.add_option('--out-fd', type='int', default=1, help='"copyfd" command target fd') group = parser.add_option_group('Options for "copy" command') group.add_option('--compact', action='store_true', default=False, help='Perform compaction while copying.') group = parser.add_option_group('Options for "edit" command') group.add_option('--set', action='append', help='List of key=value pairs to set.') group.add_option('--set-file', action='append', help='List of key pairs to read from files.') group.add_option('--add', action='append', help='List of key=value pairs to add.') group.add_option('--add-file', action='append', help='List of key pairs to read from files.') group.add_option('--delete', action='append', help='List of key=value pairs to delete.') group = parser.add_option_group('Options for "readers" command') group.add_option('-c', '--clean', action='store_true', help='Clean stale readers? (default: no)') group = parser.add_option_group('Options for "watch" command') group.add_option('--csv', action='store_true', help='Generate CSV instead of terminal output.') group.add_option('--interval', type='int', default=1, help='Interval size (default: 1sec)') group.add_option('--window', type='int', default=10, help='Average window size (default: 10)') return parser
[docs] def die(fmt, *args) -> NoReturn: if args: fmt %= args sys.stderr.write('lmdb.tool: %s\n' % (fmt,)) raise SystemExit(1)
[docs] def dump_cursor_to_fp(cursor, fp): for key, value in cursor: fp.write(_to_bytes('+%d,%d:' % (len(key), len(value)))) fp.write(key) fp.write(_to_bytes('->')) fp.write(value) fp.write(_to_bytes('\n')) fp.write(_to_bytes('\n'))
[docs] def db_map_from_args(args): assert ENV is not None db_map = {} for arg in args: dbname, sep, path = arg.partition('=') if not sep: die('DB specification missing "=": %r', arg) if dbname == ':main:': dbname = None if dbname in db_map: die('DB specified twice: %r', arg) db_map[dbname] = (ENV.open_db(_to_bytes(dbname) if dbname else None), path) if not db_map: db_map[':main:'] = (ENV.open_db(None), 'main.cdbmake') return db_map
[docs] def cmd_copy(opts, args): assert ENV is not None if len(args) != 1: die('Please specify output directory (see --help)') output_dir = args[0] if os.path.exists(output_dir): die('Output directory %r already exists.', output_dir) os.makedirs(output_dir, int('0755', 8)) print('Running copy to %r....' % (output_dir,)) ENV.copy(output_dir, compact=opts.compact)
[docs] def cmd_copyfd(opts, args): assert ENV is not None if args: die('"copyfd" command takes no arguments (see --help)') try: os.fstat(opts.out_fd) except OSError: e = sys.exc_info()[1] die('Bad --out-fd %d: %s', opts.out_fd, e) ENV.copyfd(opts.out_fd)
[docs] def cmd_dump(opts, args): assert ENV is not None db_map = db_map_from_args(args) with ENV.begin(buffers=True) as txn: for dbname, (db, path) in db_map.items(): with open(path, 'wb', BUF_SIZE) as fp: print('Dumping to %r...' % (path,)) cursor = txn.cursor(db=db) dump_cursor_to_fp(cursor, fp)
[docs] def restore_cursor_from_fp(txn, fp, db): read = fp.read read1 = functools.partial(read, 1) read_until = lambda sep: b''.join(iter(read1, sep)) # NOQA: E731 rec_nr = 0 while True: rec_nr += 1 plus = read(1) if plus == b'\n': break elif plus != b'+': die('bad or missing plus, line/record #%d', rec_nr) try: klen = int(read_until(b','), 10) dlen = int(read_until(b':'), 10) except ValueError: die('bad or missing length, line/record #%d', rec_nr) key = read(klen) if read(2) != b'->': die('bad or missing separator, line/record #%d', rec_nr) data = read(dlen) if (len(key) + len(data)) != (klen + dlen): die('short key or data, line/record #%d', rec_nr) if read(1) != b'\n': die('bad line ending, line/record #%d', rec_nr) txn.put(key, data, db=db) return rec_nr
[docs] def cmd_drop(opts, args): assert ENV is not None if not args: die('Must specify at least one sub-database (see --help)') dbs = map(ENV.open_db, (map(_to_bytes, args))) for idx, db in enumerate(dbs): name = args[idx] if name == ':main:': die('Cannot drop main DB') print('Dropping DB %r...' % (name,)) with ENV.begin(write=True) as txn: txn.drop(db)
[docs] def cmd_readers(opts, args): assert ENV is not None if opts.clean: print('Cleaned %d stale entries.' % (ENV.reader_check(),)) print(ENV.readers())
[docs] def cmd_restore(opts, args): assert ENV is not None db_map = db_map_from_args(args) with ENV.begin(buffers=True, write=True) as txn: for dbname, (db, path) in db_map.items(): with open(path, 'rb', BUF_SIZE) as fp: print('Restoring from %r...' % (path,)) count = restore_cursor_from_fp(txn, fp, db) print('Loaded %d keys from %r' % (count, path))
[docs] def delta(hst): return [(hst[i] - hst[i - 1]) for i in range(1, len(hst))]
[docs] SYS_BLOCK = '/sys/block'
[docs] def _find_diskstat(path): if not os.path.exists(SYS_BLOCK): return st = os.stat(path) devs = '%s:%s' % (st.st_dev >> 8, st.st_dev & 0xff) def maybe(rootpath): dpath = os.path.join(rootpath, 'dev') if os.path.exists(dpath): with open(dpath) as fp: if fp.read().strip() == devs: return os.path.join(rootpath, 'stat') for name in os.listdir(SYS_BLOCK): basepath = os.path.join(SYS_BLOCK, name) statpath = maybe(basepath) if statpath: return statpath for name in os.listdir(basepath): base2path = os.path.join(basepath, name) statpath = maybe(base2path) if statpath: return statpath
[docs] class DiskStatter:
[docs] FIELDS = ( 'reads', 'reads_merged', 'sectors_read', 'read_ms', 'writes', 'writes_merged', 'sectors_written', 'write_ms', 'io_count', 'io_ms', 'total_ms' )
[docs] sectors_read: int
[docs] sectors_written: int
def __init__(self, path):
[docs] self.fp = open(path)
self.refresh()
[docs] def refresh(self): self.fp.seek(0) vars(self).update((self.FIELDS[i], int(s)) for i, s in enumerate(self.fp.read().split()))
[docs] def cmd_watch(opts, args): assert ENV is not None info = {} stat = {} def window(func): history = collections.deque() def windowfunc(): history.append(func()) if len(history) > opts.window: history.popleft() if len(history) <= 1: return 0 n = sum(delta(history)) / float(len(history) - 1) return n / opts.interval return windowfunc envmb = lambda: (info['last_pgno'] * stat['psize']) / 1048576. # NOQA cols = [ ('%d', 'Depth', lambda: stat['depth']), ('%d', 'Branch', lambda: stat['branch_pages']), ('%d', 'Leaf', lambda: stat['leaf_pages']), ('%+d', 'Leaf/s', window(lambda: stat['leaf_pages'])), ('%d', 'Oflow', lambda: stat['overflow_pages']), ('%+d', 'Oflow/s', window(lambda: stat['overflow_pages'])), ('%d', 'Recs', lambda: stat['entries']), ('%+d', 'Recs/s', window(lambda: stat['entries'])), ('%d', 'Rdrs', lambda: info['num_readers']), ('%.2f', 'EnvMb', envmb), ('%+.2f', 'EnvMb/s', window(envmb)), ('%d', 'Txs', lambda: info['last_txnid']), ('%+.2f', 'Txs/s', window(lambda: info['last_txnid'])) ] statter = None statpath = _find_diskstat(ENV.path()) if statpath: statter = DiskStatter(statpath) cols += [ ('%+d', 'SctRd/s', window(lambda: statter.sectors_read)), ('%+d', 'SctWr/s', window(lambda: statter.sectors_written)), ] term_width = 0 widths = [len(head) for _, head, _ in cols] if opts.csv: writer = csv.writer(sys.stdout, quoting=csv.QUOTE_ALL) writer.writerow([head for _, head, _ in cols]) cnt = 0 try: while True: stat = ENV.stat() info = ENV.info() if statter: statter.refresh() vals = [] for i, (fmt, head, func) in enumerate(cols): val = fmt % func() vals.append(val) widths[i] = max(widths[i], len(val)) if opts.csv: writer.writerow(vals) # pyright: ignore[reportPossiblyUnboundVariable] else: if term_width != _TERM_WIDTH or not (cnt % (_TERM_HEIGHT - 2)): for i, (fmt, head, func) in enumerate(cols): sys.stdout.write(head.rjust(widths[i] + 1)) sys.stdout.write('\n') term_width = _TERM_WIDTH for i, val in enumerate(vals): sys.stdout.write(val.rjust(widths[i] + 1)) sys.stdout.write('\n') time.sleep(opts.interval) cnt += 1 except KeyboardInterrupt: pass
[docs] def cmd_warm(opts, args): assert ENV is not None stat = ENV.stat() info = ENV.info() bufsize = 32768 last_offset = stat['psize'] * info['last_pgno'] buf = array.array('B', _to_bytes('\x00' * bufsize)) t0 = time.time() if opts.use_single_file: fp = open(opts.env, 'rb', bufsize) else: fp = open(opts.env + '/data.mdb', 'rb', bufsize) while fp.tell() < last_offset: fp.readinto(buf) print('Warmed %.2fmb in %dms' % (last_offset / 1048576., 1000 * (time.time() - t0)))
[docs] def cmd_rewrite(opts, args): assert ENV is not None if not opts.target_env: die('Must specify target environment path with -E') src_info = ENV.info() target_env = lmdb.open(opts.target_env, map_size=src_info['map_size'] * 2, max_dbs=opts.max_dbs, sync=False, writemap=True, map_async=True, metasync=False) dbs = [] for arg in args: name = None if arg == ':main:' else arg src_db = ENV.open_db(_to_bytes(name)) dst_db = target_env.open_db(_to_bytes(name)) dbs.append((arg, src_db, dst_db)) if not dbs: dbs.append((':main:', ENV.open_db(None), target_env.open_db(None))) for name, src_db, dst_db in dbs: print('Writing %r...' % (name,)) with target_env.begin(db=dst_db, write=True) as wtxn: with ENV.begin(db=src_db, buffers=True) as rtxn: for key, value in rtxn.cursor(): wtxn.put(bytes(key), bytes(value), append=True) print('Syncing..') target_env.sync(True) target_env.close()
[docs] def cmd_get(opts, args): assert ENV is not None print_header = len(args) > 1 with ENV.begin(buffers=True, db=DB) as txn: for arg in args: value = txn.get(_to_bytes(arg)) if value is None: print('%r: missing' % (arg,)) continue if print_header: print('%r:' % (arg,)) if opts.xxd: print(xxd(value)) else: print(bytes(value))
[docs] def cmd_edit(opts, args): assert ENV is not None if args: die('Edit command only takes options, not arguments (see --help)') with ENV.begin(write=True) as txn: cursor = txn.cursor(db=DB) for elem in opts.add or []: key, _, value = _to_bytes(elem).partition(_to_bytes('=')) cursor.put(key, value, overwrite=False) for elem in opts.set or []: key, _, value = _to_bytes(elem).partition(_to_bytes('=')) cursor.put(key, value) for key in opts.delete or []: txn.delete(_to_bytes(key), db=DB) for elem in opts.add_file or []: key, _, path = _to_bytes(elem).partition(_to_bytes('=')) with open(path, 'rb') as fp: cursor.put(key, fp.read(), overwrite=False) for elem in opts.set_file or []: key, _, path = _to_bytes(elem).partition(_to_bytes('=')) with open(path, 'rb') as fp: cursor.put(key, fp.read())
[docs] def cmd_shell(opts, args): import code import readline # NOQA code.InteractiveConsole(globals()).interact()
[docs] def cmd_stat(opts, args): assert ENV is not None pprint.pprint(ENV.stat()) pprint.pprint(ENV.info())
[docs] def _get_term_width(default=(80, 25)): try: import fcntl # No fcntl on win32 import termios # No termios on win32 s = fcntl.ioctl(sys.stdout.fileno(), termios.TIOCGWINSZ, b'1234') height, width = struct.unpack('hh', s) return width, height except Exception: return default
[docs] def _on_sigwinch(*args): global _TERM_WIDTH, _TERM_HEIGHT _TERM_WIDTH, _TERM_HEIGHT = _get_term_width()
[docs] def main(argv=None): parser = make_parser() opts, args = parser.parse_args(argv) if not args: die('Please specify a command (see --help)') if not opts.env: die('Please specify environment (--env)') global ENV ENV = lmdb.open(opts.env, map_size=opts.map_size * 1048576, subdir=not opts.use_single_file, max_dbs=opts.max_dbs, create=False, readonly=opts.read == 'READ') if opts.db: global DB DB = ENV.open_db(_to_bytes(opts.db)) if hasattr(signal, 'SIGWINCH'): # Disable on win32. signal.signal(signal.SIGWINCH, _on_sigwinch) _on_sigwinch() func = globals().get('cmd_' + args[0]) if not func: die('No such command: %r' % (args[0],)) func(opts, args[1:])
if __name__ == '__main__': main(sys.argv[1:])