import sqlite3 # 使用内存数据库 con = sqlite3.connect(':memory:') # 创建a,b,c三个字段 cur = con.cursor() cur.execute('create table test (a char(256), b char(256), c char(256));') # 为字段a,b创建索引 cur.execute('create index a_index on test(a)') cur.execute('create index b_index on test(b)') # 插入一条数据 cur.execute('insert into test values(?, ?, ?)', (-1,0,1)) # 查询符合特定要求的数据 cur.execute('select * from test where a=? and b=?',(-1, 0))
print'-------sqlite3--------' import sqlite3 con = sqlite3.connect(':memory:') cur = con.cursor() cur.execute('create table test (a char(256), b char(256), c char(256));') cur.execute('create index a_index on test(a)') cur.execute('create index b_index on test(b)') test_sqlite_insert(cur, des='insert') test_sqlite_query(cur, des='query')
def__init__(self, path, protocol=pickle.HIGHEST_PROTOCOL, save_to_file=True, sqlite_compat=False): """protocol as defined in pickle / pickle. Defaults to the highest protocol available. For maximum compatibility use protocol = 0 """ self.path = path """The path of the database in the file system""" self.name = os.path.splitext(os.path.basename(path))[0] """The basename of the path, stripped of its extension""" self.protocol = protocol self.mode = None if path == ":memory:": save_to_file = False self.save_to_file = save_to_file self.sqlite_compat = sqlite_compat self.fields = [] """The list of the fields (does not include the internal fields __id__ and __version__)""" # if base exists, get field names if save_to_file and self.exists(): if protocol == 0: _in = open(self.path) # don't specify binary mode ! else: _in = open(self.path, 'rb') self.fields = pickle.load(_in)
defcreate(self, *fields, **kw): """ Create a new base with specified field names. Args: - \*fields (str): The field names to create. - mode (str): the mode used when creating the database. - if mode = 'create' : create a new base (the default value) - if mode = 'open' : open the existing base, ignore the fields - if mode = 'override' : erase the existing base and create a new one with the specified fields Returns: - the database (self). """ self.mode = kw.get("mode", 'create') if self.save_to_file and os.path.exists(self.path): ifnot os.path.isfile(self.path): raise IOError("%s exists and is not a file" % self.path) elif self.mode is'create': raise IOError("Base %s already exists" % self.path) elif self.mode == "open": return self.open() elif self.mode == "override": os.remove(self.path) else: raise ValueError("Invalid value given for 'open': '%s'" % open)
self.fields = [] self.default_values = {} for field in fields: if type(field) is dict: self.fields.append(field["name"]) self.default_values[field["name"]] = field.get("default", None) elif type(field) is tuple: self.fields.append(field[0]) self.default_values[field[0]] = field[1] else: self.fields.append(field) self.default_values[field] = None
defcreate_index(self, *fields): """ Create an index on the specified field names An index on a field is a mapping between the values taken by the field and the sorted list of the ids of the records whose field is equal to this value For each indexed field, an attribute of self is created, an instance of the class Index (see above). Its name it the field name, with the prefix _ to avoid name conflicts Args: - fields (list): the fields to index """ reset = False for f in fields: if f notin self.fields: raise NameError("%s is not a field name %s" % (f, self.fields)) # initialize the indices if self.mode == "open"and f in self.indices: continue reset = True self.indices[f] = {} for _id, record in self.records.items(): # use bisect to quickly insert the id in the list bisect.insort(self.indices[f].setdefault(record[f], []), _id) # create a new attribute of self, used to find the records # by this index setattr(self, '_' + f, Index(self, f)) if reset: self.commit()
可以看出,pydblite 在内存中维护了一个名为 records 的字典变量,用来存放一条条的数据。它的 key 是内部维护的 id,从 0 开始自增;而它的 value 则是用户插入的数据,为了后续查询和记录的方便,这里在每条数据中额外又加入了id和version。其次,内部维护的 indices 字典变量则是是个索引表,它的 key 是字段名,而 value 则是这样一个字典:其 key 是这个字段所有已知的值,value 是这个值所在的那条数据的 id。
举个例子,假设我们插入了“a=-1,b=0,c=1”和“a=0,b=1,c=2”两条数据,那么 records 和 indices 的内容会是这样的:
比方说现在我们想查找 a=0 的数据,那么就会在 indices 中找 key 为’a’的 value,即{-1: set([0]), 0: set([1])},然后在这里面找 key 为 0 的 value,即[1],由此我们直到了我们想要的这条数据它的 id 是 1(也可能会有多个);假设我们对数据还有其他要求比如 a=0,b=1,那么它会继续上述的查找过程,找到 a=0 和 b=1 分别对应的 ids,做交集,就得到了满足这两个条件的 ids,然后再到 records 里根据 ids 找到所有对应的数据。
明白了原理,我们再看看有什么可优化的地方:
数据结构,整体的 records 和 indeices 数据结构已经挺精简了,暂时不需要优化。其中的version可以不要,因为我们并不关注这个数据被修改了几次。其次是由于 indices 中最终的 ids 是个 list,在查询和插入的时候会比较慢,我们知道内部维护的 id 一定是唯一的,所以这里改成 set 会好一些。