百度/腾讯 ocr 试用
1. 百度 ocr
准备工作:
- 注册百度云账号, 创建应用, 获取
APP_ID
,API_KEY
,SECRET_KEY
- 安装
python
sdk, 执行python -c "from aip import AipOcr; AipOcr "
确认安装成功.
代码示例:
import math
import os
import time
import unittest
import cv2
import numpy as np
from aip import AipOcr
class BaiduOCR(object):
""" 你的 APPID AK SK """
APP_ID = '<APP_ID>'
API_KEY = '<API_KEY>'
SECRET_KEY = '<SECRET_KEY>'
def __init__(self):
self.client = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY)
@staticmethod
def get_image(image_path: str) -> bytes:
with open(image_path, "rb") as f:
return f.read()
@staticmethod
def image_np2bytes(image_np: np.ndarray) -> bytes:
""" cv2 的 图片直接生成 bytes """
# todo BytesIO
tmp = "x.jpg"
try:
cv2.imwrite(tmp, img=image_np, )
return BaiduOCR.get_image(tmp)
finally:
if os.path.exists(tmp):
os.remove(tmp)
def ocr(self, image: bytes):
return self.client.general(image=image, options={
"language_type": "CHN_ENG",
"detect_direction": "false",
"probability": "true",
})
if __name__ == '__main__':
image_path = "test.png"
print(BaiduOCR().ocr(image=BaiduOCR.get_image(image_path)))
2. 腾讯 OCR
准备工作:开通服务。
代码示例:
import base64
import hashlib
import json
import os
import random
import time
from urllib.parse import quote, urlencode
from urllib.request import Request, urlopen
_cur_dir = os.path.dirname(__file__)
def setParams(array, key, value):
array[key] = value
def genSignString(parser):
uri_str = ''
for key in sorted(parser.keys()):
if key == 'app_key':
continue
uri_str += "%s=%s&" % (key, quote(str(parser[key]), safe=""))
sign_str = uri_str + 'app_key=' + parser['app_key']
hash_md5 = hashlib.md5(sign_str.encode('utf-8'))
return hash_md5.hexdigest().upper()
class AiPlat(object):
url_preffix = 'https://api.ai.qq.com/fcgi-bin/'
def __init__(self, app_id, app_key):
self.app_id = app_id
self.app_key = app_key
self.data = {}
def invoke(self, params):
self.url_data = urlencode(params).encode('utf-8')
req = Request(self.url, self.url_data)
try:
rsp = urlopen(req)
str_rsp = rsp.read().decode('utf-8')
dict_rsp = json.loads(str_rsp)
return dict_rsp
except Exception as e:
print(e)
dict_error = {}
dict_error['ret'] = -1
dict_error['httpcode'] = -1
dict_error['msg'] = "system error"
return dict_error
def getOcrGeneralocr(self, image):
self.url = self.url_preffix + 'ocr/ocr_generalocr'
setParams(self.data, 'app_id', self.app_id)
setParams(self.data, 'app_key', self.app_key)
setParams(self.data, 'time_stamp', int(time.time()))
setParams(self.data, 'nonce_str', int(time.time()) + random.randint(1000, 100000))
image_data = base64.b64encode(image).decode("utf-8")
setParams(self.data, 'image', image_data)
sign_str = genSignString(self.data)
setParams(self.data, 'sign', sign_str)
return self.invoke(self.data)
def getNlpTextTrans(self, text, type):
self.url = self.url_preffix + 'nlp/nlp_texttrans'
setParams(self.data, 'app_id', self.app_id)
setParams(self.data, 'app_key', self.app_key)
setParams(self.data, 'time_stamp', int(time.time()))
setParams(self.data, 'nonce_str', int(time.time()))
setParams(self.data, 'text', text)
setParams(self.data, 'type', type)
sign_str = genSignString(self.data)
setParams(self.data, 'sign', sign_str)
return self.invoke(self.data)
def getAaiWxAsrs(self, chunk, speech_id, end_flag, format_id, rate, bits,
seq, chunk_len, cont_res):
self.url = self.url_preffix + 'aai/aai_wxasrs'
setParams(self.data, 'app_id', self.app_id)
setParams(self.data, 'app_key', self.app_key)
setParams(self.data, 'time_stamp', int(time.time()))
setParams(self.data, 'nonce_str', int(time.time()))
speech_chunk = base64.b64encode(chunk)
setParams(self.data, 'speech_chunk', speech_chunk)
setParams(self.data, 'speech_id', speech_id)
setParams(self.data, 'end', end_flag)
setParams(self.data, 'format', format_id)
setParams(self.data, 'rate', rate)
setParams(self.data, 'bits', bits)
setParams(self.data, 'seq', seq)
setParams(self.data, 'len', chunk_len)
setParams(self.data, 'cont_res', cont_res)
sign_str = genSignString(self.data)
setParams(self.data, 'sign', sign_str)
return self.invoke(self.data)
class TencentAPI(object):
app_id = "<APPID>"
app_key = "<APP_KEY>"
def __init__(self):
self.api = AiPlat(app_id=self.app_id, app_key=self.app_key)
@staticmethod
def get_image(image_path: str) -> bytes:
with open(image_path, "rb") as f:
return f.read()
def ocr(self, image: bytes):
return self.api.getOcrGeneralocr(image)
if __name__ == '__main__':
image_path = "test.png"
print(TencentAPI().ocr(image=TencentAPI.get_image(image_path)))