基于Python与OpenCV的票据识别系统开发指南
2025.09.19 17:59浏览量:1简介:本文详细介绍如何利用Python和OpenCV实现票据识别功能,涵盖图像预处理、边缘检测、轮廓提取、文字定位与识别等关键步骤,并提供完整的代码实现。
基于Python与OpenCV的票据识别系统开发指南
一、票据识别技术背景与意义
票据识别是OCR(光学字符识别)技术的重要应用场景,广泛应用于财务报销、银行票据处理、税务申报等领域。传统票据识别系统多依赖商业OCR引擎,存在成本高、定制性差等问题。基于Python和OpenCV的开源方案,不仅能显著降低开发成本,还能通过算法优化提升识别准确率。
OpenCV作为计算机视觉领域的标准库,提供了丰富的图像处理函数,结合Python的简洁语法,可快速构建票据识别系统。本方案通过图像预处理、边缘检测、轮廓提取等步骤,实现票据区域的精准定位,为后续文字识别奠定基础。
二、票据识别系统核心流程
1. 图像预处理
票据图像常存在倾斜、光照不均、背景干扰等问题,预处理环节至关重要。主要步骤包括:
灰度化转换:将彩色图像转为灰度图,减少计算量
import cv2def rgb2gray(img_path):img = cv2.imread(img_path)gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)return gray
高斯模糊:消除高频噪声
def gaussian_blur(img):blurred = cv2.GaussianBlur(img, (5,5), 0)return blurred
自适应阈值处理:解决光照不均问题
def adaptive_threshold(img):thresh = cv2.adaptiveThreshold(img, 255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV, 11, 2)return thresh
2. 边缘检测与轮廓提取
Canny边缘检测算法能有效提取票据边缘:
def canny_edge(img):edges = cv2.Canny(img, 50, 150)return edges
结合形态学操作优化边缘:
def morph_operations(img):kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))dilated = cv2.dilate(img, kernel, iterations=1)eroded = cv2.erode(dilated, kernel, iterations=1)return eroded
轮廓提取与筛选:
def find_contours(img):contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)# 筛选面积大于阈值的轮廓min_area = 1000valid_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > min_area]return valid_contours
3. 票据区域定位与矫正
通过轮廓分析确定票据位置:
def locate_receipt(contours):# 假设最大轮廓为票据区域if not contours:return Nonetarget_cnt = max(contours, key=cv2.contourArea)# 获取边界矩形x,y,w,h = cv2.boundingRect(target_cnt)return (x,y,w,h)
透视变换矫正倾斜票据:
def perspective_transform(img, pts):# pts为票据四个顶点坐标rect = np.array(pts, dtype="float32")(tl, tr, br, bl) = rect# 计算新图像尺寸widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))maxWidth = max(int(widthA), int(widthB))heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))maxHeight = max(int(heightA), int(heightB))dst = np.array([[0, 0],[maxWidth - 1, 0],[maxWidth - 1, maxHeight - 1],[0, maxHeight - 1]], dtype="float32")M = cv2.getPerspectiveTransform(rect, dst)warped = cv2.warpPerspective(img, M, (maxWidth, maxHeight))return warped
4. 文字区域提取与识别
使用投影法分割文字行:
def extract_text_lines(img):# 计算垂直投影hist = cv2.reduce(img, 1, cv2.REDUCE_AVG).reshape(-1)# 寻找分割点threshold = hist.max() * 0.1lines = []start = 0for i in range(len(hist)):if hist[i] > threshold and (i == 0 or hist[i-1] <= threshold):start = ielif hist[i] <= threshold and i > 0 and hist[i-1] > threshold:lines.append((start, i))# 提取文字行text_lines = []for (start, end) in lines:roi = img[start:end, :]text_lines.append(roi)return text_lines
三、完整代码实现
import cv2import numpy as npclass ReceiptRecognizer:def __init__(self):passdef preprocess(self, img_path):img = cv2.imread(img_path)gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)blurred = cv2.GaussianBlur(gray, (5,5), 0)thresh = cv2.adaptiveThreshold(blurred, 255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV, 11, 2)return threshdef detect_edges(self, img):edges = cv2.Canny(img, 50, 150)kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))dilated = cv2.dilate(edges, kernel, iterations=1)return dilateddef find_receipt_contour(self, edges):contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)if not contours:return Nonetarget_cnt = max(contours, key=cv2.contourArea)return target_cntdef perspective_correction(self, img, contour):epsilon = 0.02 * cv2.arcLength(contour, True)approx = cv2.approxPolyDP(contour, epsilon, True)approx = np.array(approx, dtype="float32")# 排序四个顶点 (tl, tr, br, bl)rect = order_points(approx)# 透视变换(tl, tr, br, bl) = rectwidthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))maxWidth = max(int(widthA), int(widthB))heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))maxHeight = max(int(heightA), int(heightB))dst = np.array([[0, 0],[maxWidth - 1, 0],[maxWidth - 1, maxHeight - 1],[0, maxHeight - 1]], dtype="float32")M = cv2.getPerspectiveTransform(rect, dst)warped = cv2.warpPerspective(img, M, (maxWidth, maxHeight))return warpeddef extract_text_regions(self, warped_img):# 计算垂直投影hist = cv2.reduce(warped_img, 1, cv2.REDUCE_AVG).reshape(-1)# 寻找分割点threshold = hist.max() * 0.1lines = []start = 0for i in range(len(hist)):if hist[i] > threshold and (i == 0 or hist[i-1] <= threshold):start = ielif hist[i] <= threshold and i > 0 and hist[i-1] > threshold:lines.append((start, i))# 提取文字行text_lines = []for (start, end) in lines:roi = warped_img[start:end, :]text_lines.append(roi)return text_linesdef recognize(self, img_path):# 1. 预处理processed = self.preprocess(img_path)# 2. 边缘检测edges = self.detect_edges(processed)# 3. 轮廓提取contour = self.find_receipt_contour(edges)if contour is None:return None# 4. 透视矫正img = cv2.imread(img_path)warped = self.perspective_correction(img, contour)# 5. 文字区域提取gray_warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)_, binary = cv2.threshold(gray_warped, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)text_lines = self.extract_text_regions(binary)return text_linesdef order_points(pts):# 初始化坐标点rect = np.zeros((4, 2), dtype="float32")# 计算四个点的总和与差值s = pts.sum(axis=1)rect[0] = pts[np.argmin(s)] # 左上角rect[2] = pts[np.argmax(s)] # 右下角diff = np.diff(pts, axis=1)rect[1] = pts[np.argmin(diff)] # 右上角rect[3] = pts[np.argmax(diff)] # 左下角return rect# 使用示例if __name__ == "__main__":recognizer = ReceiptRecognizer()text_lines = recognizer.recognize("receipt.jpg")if text_lines:for i, line in enumerate(text_lines):cv2.imwrite(f"line_{i}.png", line)
四、性能优化建议
五、应用场景扩展
- 财务报销系统:自动识别发票信息并填充报销单
- 银行票据处理:识别支票、汇款单等金融票据
- 物流单据管理:自动提取运单号、收发货人信息
- 医疗票据处理:识别医保报销所需的各种票据
本方案通过Python和OpenCV实现了票据识别的核心功能,开发者可根据实际需求进行功能扩展和性能优化。随着计算机视觉技术的不断发展,票据识别系统的准确率和效率将持续提升,为各行业的数字化转型提供有力支持。

发表评论
登录后可评论,请前往 登录 或 注册