logo

基于Python与OpenCV的票据识别系统开发指南

作者:很酷cat2025.09.19 17:59浏览量:1

简介:本文详细介绍如何利用Python和OpenCV实现票据识别功能,涵盖图像预处理、边缘检测、轮廓提取、文字定位与识别等关键步骤,并提供完整的代码实现。

基于Python与OpenCV的票据识别系统开发指南

一、票据识别技术背景与意义

票据识别是OCR(光学字符识别)技术的重要应用场景,广泛应用于财务报销、银行票据处理、税务申报等领域。传统票据识别系统多依赖商业OCR引擎,存在成本高、定制性差等问题。基于Python和OpenCV的开源方案,不仅能显著降低开发成本,还能通过算法优化提升识别准确率。

OpenCV作为计算机视觉领域的标准库,提供了丰富的图像处理函数,结合Python的简洁语法,可快速构建票据识别系统。本方案通过图像预处理、边缘检测、轮廓提取等步骤,实现票据区域的精准定位,为后续文字识别奠定基础。

二、票据识别系统核心流程

1. 图像预处理

票据图像常存在倾斜、光照不均、背景干扰等问题,预处理环节至关重要。主要步骤包括:

  • 灰度化转换:将彩色图像转为灰度图,减少计算量

    1. import cv2
    2. def rgb2gray(img_path):
    3. img = cv2.imread(img_path)
    4. gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    5. return gray
  • 高斯模糊:消除高频噪声

    1. def gaussian_blur(img):
    2. blurred = cv2.GaussianBlur(img, (5,5), 0)
    3. return blurred
  • 自适应阈值处理:解决光照不均问题

    1. def adaptive_threshold(img):
    2. thresh = cv2.adaptiveThreshold(img, 255,
    3. cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
    4. cv2.THRESH_BINARY_INV, 11, 2)
    5. return thresh

2. 边缘检测与轮廓提取

Canny边缘检测算法能有效提取票据边缘:

  1. def canny_edge(img):
  2. edges = cv2.Canny(img, 50, 150)
  3. return edges

结合形态学操作优化边缘:

  1. def morph_operations(img):
  2. kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
  3. dilated = cv2.dilate(img, kernel, iterations=1)
  4. eroded = cv2.erode(dilated, kernel, iterations=1)
  5. return eroded

轮廓提取与筛选:

  1. def find_contours(img):
  2. contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  3. # 筛选面积大于阈值的轮廓
  4. min_area = 1000
  5. valid_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > min_area]
  6. return valid_contours

3. 票据区域定位与矫正

通过轮廓分析确定票据位置:

  1. def locate_receipt(contours):
  2. # 假设最大轮廓为票据区域
  3. if not contours:
  4. return None
  5. target_cnt = max(contours, key=cv2.contourArea)
  6. # 获取边界矩形
  7. x,y,w,h = cv2.boundingRect(target_cnt)
  8. return (x,y,w,h)

透视变换矫正倾斜票据:

  1. def perspective_transform(img, pts):
  2. # pts为票据四个顶点坐标
  3. rect = np.array(pts, dtype="float32")
  4. (tl, tr, br, bl) = rect
  5. # 计算新图像尺寸
  6. widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
  7. widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
  8. maxWidth = max(int(widthA), int(widthB))
  9. heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
  10. heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
  11. maxHeight = max(int(heightA), int(heightB))
  12. dst = np.array([
  13. [0, 0],
  14. [maxWidth - 1, 0],
  15. [maxWidth - 1, maxHeight - 1],
  16. [0, maxHeight - 1]], dtype="float32")
  17. M = cv2.getPerspectiveTransform(rect, dst)
  18. warped = cv2.warpPerspective(img, M, (maxWidth, maxHeight))
  19. return warped

4. 文字区域提取与识别

使用投影法分割文字行:

  1. def extract_text_lines(img):
  2. # 计算垂直投影
  3. hist = cv2.reduce(img, 1, cv2.REDUCE_AVG).reshape(-1)
  4. # 寻找分割点
  5. threshold = hist.max() * 0.1
  6. lines = []
  7. start = 0
  8. for i in range(len(hist)):
  9. if hist[i] > threshold and (i == 0 or hist[i-1] <= threshold):
  10. start = i
  11. elif hist[i] <= threshold and i > 0 and hist[i-1] > threshold:
  12. lines.append((start, i))
  13. # 提取文字行
  14. text_lines = []
  15. for (start, end) in lines:
  16. roi = img[start:end, :]
  17. text_lines.append(roi)
  18. return text_lines

三、完整代码实现

  1. import cv2
  2. import numpy as np
  3. class ReceiptRecognizer:
  4. def __init__(self):
  5. pass
  6. def preprocess(self, img_path):
  7. img = cv2.imread(img_path)
  8. gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  9. blurred = cv2.GaussianBlur(gray, (5,5), 0)
  10. thresh = cv2.adaptiveThreshold(blurred, 255,
  11. cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
  12. cv2.THRESH_BINARY_INV, 11, 2)
  13. return thresh
  14. def detect_edges(self, img):
  15. edges = cv2.Canny(img, 50, 150)
  16. kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
  17. dilated = cv2.dilate(edges, kernel, iterations=1)
  18. return dilated
  19. def find_receipt_contour(self, edges):
  20. contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  21. if not contours:
  22. return None
  23. target_cnt = max(contours, key=cv2.contourArea)
  24. return target_cnt
  25. def perspective_correction(self, img, contour):
  26. epsilon = 0.02 * cv2.arcLength(contour, True)
  27. approx = cv2.approxPolyDP(contour, epsilon, True)
  28. approx = np.array(approx, dtype="float32")
  29. # 排序四个顶点 (tl, tr, br, bl)
  30. rect = order_points(approx)
  31. # 透视变换
  32. (tl, tr, br, bl) = rect
  33. widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
  34. widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
  35. maxWidth = max(int(widthA), int(widthB))
  36. heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
  37. heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
  38. maxHeight = max(int(heightA), int(heightB))
  39. dst = np.array([
  40. [0, 0],
  41. [maxWidth - 1, 0],
  42. [maxWidth - 1, maxHeight - 1],
  43. [0, maxHeight - 1]], dtype="float32")
  44. M = cv2.getPerspectiveTransform(rect, dst)
  45. warped = cv2.warpPerspective(img, M, (maxWidth, maxHeight))
  46. return warped
  47. def extract_text_regions(self, warped_img):
  48. # 计算垂直投影
  49. hist = cv2.reduce(warped_img, 1, cv2.REDUCE_AVG).reshape(-1)
  50. # 寻找分割点
  51. threshold = hist.max() * 0.1
  52. lines = []
  53. start = 0
  54. for i in range(len(hist)):
  55. if hist[i] > threshold and (i == 0 or hist[i-1] <= threshold):
  56. start = i
  57. elif hist[i] <= threshold and i > 0 and hist[i-1] > threshold:
  58. lines.append((start, i))
  59. # 提取文字行
  60. text_lines = []
  61. for (start, end) in lines:
  62. roi = warped_img[start:end, :]
  63. text_lines.append(roi)
  64. return text_lines
  65. def recognize(self, img_path):
  66. # 1. 预处理
  67. processed = self.preprocess(img_path)
  68. # 2. 边缘检测
  69. edges = self.detect_edges(processed)
  70. # 3. 轮廓提取
  71. contour = self.find_receipt_contour(edges)
  72. if contour is None:
  73. return None
  74. # 4. 透视矫正
  75. img = cv2.imread(img_path)
  76. warped = self.perspective_correction(img, contour)
  77. # 5. 文字区域提取
  78. gray_warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
  79. _, binary = cv2.threshold(gray_warped, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
  80. text_lines = self.extract_text_regions(binary)
  81. return text_lines
  82. def order_points(pts):
  83. # 初始化坐标点
  84. rect = np.zeros((4, 2), dtype="float32")
  85. # 计算四个点的总和与差值
  86. s = pts.sum(axis=1)
  87. rect[0] = pts[np.argmin(s)] # 左上角
  88. rect[2] = pts[np.argmax(s)] # 右下角
  89. diff = np.diff(pts, axis=1)
  90. rect[1] = pts[np.argmin(diff)] # 右上角
  91. rect[3] = pts[np.argmax(diff)] # 左下角
  92. return rect
  93. # 使用示例
  94. if __name__ == "__main__":
  95. recognizer = ReceiptRecognizer()
  96. text_lines = recognizer.recognize("receipt.jpg")
  97. if text_lines:
  98. for i, line in enumerate(text_lines):
  99. cv2.imwrite(f"line_{i}.png", line)

四、性能优化建议

  1. 多尺度处理:对图像进行不同尺度的处理,提高小票据的识别率
  2. 并行计算:利用多线程处理多个票据图像
  3. 深度学习融合:结合CNN网络进行更精准的文字定位
  4. 后处理优化:添加文字方向校正、字符分割等后处理步骤

五、应用场景扩展

  1. 财务报销系统:自动识别发票信息并填充报销单
  2. 银行票据处理:识别支票、汇款单等金融票据
  3. 物流单据管理:自动提取运单号、收发货人信息
  4. 医疗票据处理:识别医保报销所需的各种票据

本方案通过Python和OpenCV实现了票据识别的核心功能,开发者可根据实际需求进行功能扩展和性能优化。随着计算机视觉技术的不断发展,票据识别系统的准确率和效率将持续提升,为各行业的数字化转型提供有力支持。

相关文章推荐

发表评论