标签:验证码 text height [] 验证码识别 ram path pixel 识别
1.图片二值化:先将RGB图像转为灰度图再转为二值图
2.图片去噪
3.使用pytesseract(OCR)将识别并“读取”嵌入图像中的文本
import pytesseract
from PIL import Image
def get_bin_table(threshold=170):
"""
:param: threshold
:return:
"""
table = []
for i in range(256):
table.append(0) if i < threshold else table.append(1)
return table
def type_trans(path):
img = Image.open(path)
img = img.convert(‘L‘) # 转换为灰度图像
img = img.point(get_bin_table(), ‘1‘) # 转换为二值图 1代表二值图, L代表灰度图
return img
def cut_noise(img):
width, height = img.size
for i in range(1, width-1):
for j in range(1, height-1):
pixel_set = []
for m in range(i-1, i+2):
for n in range(j-1, j+2):
if img.getpixel((m, n)) != 1:
pixel_set.append(img.getpixel((m, n)))
if len(pixel_set) <= 2:
print(pixel_set)
img.putpixel((i, j), 1)
return img
img = type_trans(‘image.jpg‘)
img = cut_noise(img)
text = pytesseract.image_to_string(img)
exclude_char_list = ‘ .:\\|\‘\"?![],()~@#$%^&*_+-={};<>/¥‘
text = ‘‘.join([x for x in text if x not in exclude_char_list])
标签:验证码 text height [] 验证码识别 ram path pixel 识别
原文地址:https://www.cnblogs.com/frank-shen/p/10324164.html