sqlite 使用总结
SQLite官方文档 Appropriate Uses For SQLite 指出:
SQLite does not compete with client/server databases. SQLite competes with fopen().
SQLite的竞争对手是fopen
(相当于 python 中的 open
)。
1. 存储文本数据集
存在一个大规模的文本数据集, 以 (index/int, hash/str, content/str)的形式存储,使用方式仅是通过搜索 index 或 hash, 获取文本信息。此外, 该数据集的读取,都在一个进程中执行。
在这种特殊的应用场景中, 如果将数据集存储到诸如 mysql, mongo 等数据库中, 会造成资源的浪费。这时候, sqlite 便可以派上用场。
import logging
import os
import sqlite3
from collections import namedtuple
TextModel = namedtuple("TextModel", field_names=["index", "hash_id", "content"])
_cur_dir = os.path.dirname(__file__)
class SqliteTextDao(object):
def __init__(self, db_file: str, logger: logging.Logger = None):
self.db_file = db_file
self.logger = logger or logging.getLogger(self.__class__.__name__)
def search(self, index_list: [int]) -> [(int, str, str)]:
""" 搜索 """
conn = self._get_connect()
try:
args = list(set(index_list))
sql = "SELECT INDEX, HASHID, CONTENT from TEXTDB where ID IN ({})".format(",".join(["?" for _ in range(len(args))]))
cursor = conn.cursor().execute(sql, args)
return [(row[0], row[1], row[2]) for row in cursor]
except Exception as e:
self.logger.error(e)
def insert(self, hash_id: str, content: str, index: int):
""" 插入数据 """
conn = self._get_connect()
try:
sql = "INSERT INTO TEXTDB(INDEX, HASHID, CONTENT) VALUES (?, ?, ?)"
conn.cursor().execute(sql, (index, hash_id, content))
conn.commit()
except Exception as e:
self.logger.error(e)
return None
finally:
conn.close()
def _get_connect(self):
""" 创建模型 """
if not os.path.exists(self.db_file):
conn = sqlite3.connect(self.db_file)
c = conn.cursor()
c.execute('''CREATE TABLE TEXTDB
(INDEX INTEGER PRIMARY KEY,
HASHID CHAR(32) NOT NULL,
CONTENT TEXT);''')
conn.commit()
conn.close()
return sqlite3.connect(self.db_file)
def list_instance_by_index(self, index_list: [int]) -> [TextModel]:
"""
根据 index 返回 数据模型 列表
:param index_list:
:return:
:rtype: list of TextModel
"""
index_list = [int(_index) for _index in list(set(index_list))] # numpy 来源的 index 可能类型不一样
instance_dict = {}
for index, hash_id, content in self.search(index_list=index_list):
instance_dict[int(index)] = TextModel(index=int(index), hash_id=hash_id, content=content)
result_list = []
for index in index_list:
result_list.append(instance_dict.get(index))
return result_list
2. sqlite sql 用例参考
2.1 list instance
# uid in uid_list
"SELECT UID from DB1 where UID IN ({})".format(",".join(["?" for _ in range(len(uid_list))]))
# uid in uid_list && has_detail
"SELECT UID from DB1 where UID IN ({}) AND DETAIL != '' ".format(",".join(["?" for _ in range(len(uid_list))]))
# uid in uid_list && !has_detail
"SELECT UID from DB1 where UID IN ({}) AND DETAIL = '' ".format(",".join(["?" for _ in range(len(uid_list))]))
2.2 insert only uid not exists
# UID 是 CHAR(32) UNIQUE NOT NULL
'INSERT OR IGNORE INTO DB1 (ID, UID, VALUE) values (NULL, ?, ? ) '