[Python] 纯文本查看 复制代码
import ttkbootstrap as ttk
from ttkbootstrap.constants import *
from tkinter import filedialog
from PIL import Image, ImageTk
import ddddocr
import cv2
import numpy as np
import io
class OCRApp:
def __init__(self):
self.window = ttk.Window(themename="cosmo")
self.window.title("验证码识别与目标检测系统")
self.window.geometry("1850x1300")
# 创建主框架
self.main_frame = ttk.Frame(self.window)
self.main_frame.pack(fill=BOTH, expand=True, padx=20, pady=20)
# 控制按钮区域 - 移到最上面
self.button_frame = ttk.Frame(self.main_frame)
self.button_frame.pack(fill=X, pady=(0, 10))
self.upload_btn = ttk.Button(
self.button_frame,
text="上传图片",
command=self.upload_image,
bootstyle=PRIMARY
)
self.upload_btn.pack(side=LEFT, padx=(0, 10))
self.second_image_btn = ttk.Button(
self.button_frame,
text="上传第二张图片",
command=self.upload_second_image,
bootstyle=PRIMARY
)
self.second_image_btn.pack(side=LEFT, padx=(0, 10))
self.second_image_btn.pack_forget() # 初始隐藏
self.clear_btn = ttk.Button(
self.button_frame,
text="清空",
command=self.clear_content,
bootstyle=DANGER
)
self.clear_btn.pack(side=LEFT)
# 模式选择区域
self.mode_frame = ttk.LabelFrame(self.main_frame, text="模式选择")
self.mode_frame.pack(fill=X, pady=(0, 10))
self.mode_var = ttk.StringVar(value="ocr")
self.mode_var.trace('w', self.on_mode_change) # 添加模式变化的回调
self.ocr_radio = ttk.Radiobutton(
self.mode_frame,
text="OCR模式",
variable=self.mode_var,
value="ocr"
)
self.ocr_radio.pack(side=LEFT, padx=10)
self.detection_radio = ttk.Radiobutton(
self.mode_frame,
text="目标检测模式",
variable=self.mode_var,
value="detection"
)
self.detection_radio.pack(side=LEFT, padx=10)
self.slide_radio = ttk.Radiobutton(
self.mode_frame,
text="滑块检测模式",
variable=self.mode_var,
value="slide"
)
self.slide_radio.pack(side=LEFT, padx=10)
# OCR设置区域 - 移到模式选择下面
self.ocr_settings_frame = ttk.LabelFrame(self.main_frame, text="OCR设置")
self.ocr_settings_frame.pack(fill=X, pady=(0, 10))
# 字符范围选择 - 默认设置为"所有字母+数字"
self.range_var = ttk.StringVar(value="default") # 修改默认值
ranges = [
("默认", "default"),
("纯数字(0-9)", "0"),
("小写字母(a-z)", "1"),
("大写字母(A-Z)", "2"),
("所有字母(a-z+A-Z)", "3"),
("小写字母+数字", "4"),
("大写字母+数字", "5"),
("所有字母+数字", "6"),
("自定义范围", "custom")
]
for text, value in ranges:
ttk.Radiobutton(
self.ocr_settings_frame,
text=text,
variable=self.range_var,
value=value,
command=self.on_range_change
).pack(side=LEFT, padx=5)
# 自定义范围输入框
self.custom_range_frame = ttk.Frame(self.ocr_settings_frame)
self.custom_range_frame.pack(fill=X, pady=5)
self.custom_range_frame.pack_forget()
ttk.Label(self.custom_range_frame, text="自定义字符范围:").pack(side=LEFT, padx=5)
self.custom_range_entry = ttk.Entry(self.custom_range_frame)
self.custom_range_entry.pack(side=LEFT, fill=X, expand=True, padx=5)
self.custom_range_entry.insert(0, "0123456789+-x/=")
# 滑块检测算法选择 - 移到模式选择下面
self.algorithm_frame = ttk.LabelFrame(self.main_frame, text="滑块检测算法")
self.algorithm_frame.pack(fill=X, pady=(0, 10))
self.algorithm_frame.pack_forget() # 初始隐藏
self.slide_algorithm = ttk.StringVar(value="algorithm1")
self.algorithm1_radio = ttk.Radiobutton(
self.algorithm_frame,
text="算法1 (滑块匹配)",
variable=self.slide_algorithm,
value="algorithm1"
)
self.algorithm1_radio.pack(side=LEFT, padx=10)
self.algorithm2_radio = ttk.Radiobutton(
self.algorithm_frame,
text="算法2 (图片比对)",
variable=self.slide_algorithm,
value="algorithm2"
)
self.algorithm2_radio.pack(side=LEFT, padx=10)
# 识别结果区域
self.result_frame = ttk.LabelFrame(self.main_frame, text="识别结果")
self.result_frame.pack(fill=X, pady=(0, 10))
self.result_text = ttk.Text(self.result_frame, height=3)
self.result_text.pack(fill=X, padx=5, pady=5)
# 图片显示区域
self.image_frame = ttk.LabelFrame(self.main_frame, text="图片预览")
self.image_frame.pack(fill=BOTH, expand=True, pady=(0, 10))
self.image_label = ttk.Label(self.image_frame)
self.image_label.pack(expand=True)
# 初始化检测器
self.ocr = ddddocr.DdddOcr()
self.detector = ddddocr.DdddOcr(det=True, ocr=False)
self.slide_detector = ddddocr.DdddOcr(det=False, ocr=False)
# 保存图片路径
self.current_image_path = None
self.second_image_path = None
def on_mode_change(self, *args):
"""当模式改变时的回调函数"""
mode = self.mode_var.get()
if mode == "slide":
self.algorithm_frame.pack(after=self.mode_frame, fill=X, pady=(0, 10))
self.ocr_settings_frame.pack_forget()
self.second_image_btn.pack_forget()
elif mode == "ocr":
self.algorithm_frame.pack_forget()
self.ocr_settings_frame.pack(after=self.mode_frame, fill=X, pady=(0, 10))
self.second_image_btn.pack_forget()
else:
self.algorithm_frame.pack_forget()
self.ocr_settings_frame.pack_forget()
self.second_image_btn.pack_forget()
# 清空当前内容
self.clear_content()
def upload_image(self):
filetypes = [
("图片文件", "*.bmp;*.jpg;*.jpeg;*.png;*.tif;*.tiff;*.gif;*.pcx;*.tga;*.exif;*.fpx;*.svg;*.psd;*.cdr;*.pcd;*.dxf;*.ufo;*.eps;*.ai;*.raw;*.WMF;*.webp;*.avif;*.apng")
]
file_path = filedialog.askopenfilename(filetypes=filetypes)
if file_path:
self.current_image_path = file_path
try:
mode = self.mode_var.get()
if mode == "ocr":
self.process_ocr(file_path)
elif mode == "detection":
self.process_detection(file_path)
elif mode == "slide":
self.handle_slide_mode(file_path)
except Exception as e:
self.result_text.delete(1.0, END)
self.result_text.insert(END, f"错误:{str(e)}")
def process_ocr(self, file_path):
# 打开并显示图片
img = Image.open(file_path)
img.thumbnail((600, 400))
photo = ImageTk.PhotoImage(img)
self.image_label.config(image=photo)
self.image_label.image = photo
# 设置OCR范围
range_value = self.range_var.get()
if range_value != "default":
if range_value == "custom":
custom_range = self.custom_range_entry.get()
if custom_range:
self.ocr.set_ranges(custom_range)
else:
self.ocr.set_ranges(int(range_value))
# 识别图片
with open(file_path, "rb") as f:
image_bytes = f.read()
# 使用概率模式识别
result = self.ocr.classification(image_bytes, probability=True)
# 显示结果
self.result_text.delete(1.0, END)
# 如果是概率模式的结果
if isinstance(result, dict):
# 获取最终识别结果
final_text = ""
probabilities = []
for prob_list in result['probability']:
max_prob = max(prob_list)
char_idx = prob_list.index(max_prob)
final_text += result['charsets'][char_idx]
probabilities.append(max_prob)
# 显示结果和概率
self.result_text.insert(END, f"识别结果:{final_text}\n")
self.result_text.insert(END, "字符概率:\n")
for char, prob in zip(final_text, probabilities):
self.result_text.insert(END, f"{char}: {prob*100:.2f}%\n")
else:
# 如果是普通模式的结果
self.result_text.insert(END, f"识别结果:{result}")
def process_detection(self, file_path):
# 读取图片进行检测
with open(file_path, "rb") as f:
image_bytes = f.read()
# 获取检测框
bboxes = self.detector.detection(image_bytes)
# 使用OpenCV绘制边界框和序号
im = cv2.imread(file_path)
for idx, bbox in enumerate(bboxes, 1):
x1, y1, x2, y2 = bbox
# 绘制矩形框
im = cv2.rectangle(im, (x1, y1), (x2, y2), color=(0, 255, 0), thickness=2)
# 计算文本位置
text_x = x1
text_y = y1 - 5 if y1 - 5 > 5 else y1 + 20
# 绘制序号
cv2.putText(im, str(idx), (text_x, text_y),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
# 转换颜色空间从BGR到RGB
im_rgb = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
# 转换为PIL图像并调整大小
pil_image = Image.fromarray(im_rgb)
pil_image.thumbnail((600, 400))
# 显示结果图片
photo = ImageTk.PhotoImage(pil_image)
self.image_label.config(image=photo)
self.image_label.image = photo
# 显示检测结果和点击顺序
self.result_text.delete(1.0, END)
self.result_text.insert(END, f"检测到 {len(bboxes)} 个目标\n")
self.result_text.insert(END, "点击顺序和坐标:\n")
for idx, bbox in enumerate(bboxes, 1):
x1, y1, x2, y2 = bbox
center_x = (x1 + x2) // 2
center_y = (y1 + y2) // 2
self.result_text.insert(END, f"{idx}. 点击坐标: ({center_x}, {center_y})\n")
def handle_slide_mode(self, file_path):
# 显示第二张图片上传按钮
self.second_image_btn.pack(side=LEFT, padx=10)
# 显示第一张图片
img = Image.open(file_path)
img.thumbnail((600, 400))
photo = ImageTk.PhotoImage(img)
self.image_label.config(image=photo)
self.image_label.image = photo
if self.second_image_path:
self.process_slide_detection()
def upload_second_image(self):
filetypes = [
("图片文件", "*.bmp;*.jpg;*.jpeg;*.png;*.tif;*.tiff")
]
file_path = filedialog.askopenfilename(filetypes=filetypes)
if file_path:
self.second_image_path = file_path
self.process_slide_detection()
def process_slide_detection(self):
if not self.current_image_path or not self.second_image_path:
return
try:
with open(self.current_image_path, 'rb') as f:
first_bytes = f.read()
with open(self.second_image_path, 'rb') as f:
second_bytes = f.read()
if self.slide_algorithm.get() == "algorithm1":
# 算法1:滑块匹配
result = self.slide_detector.slide_match(first_bytes, second_bytes, simple_target=True) # 添加simple_target参数
self.result_text.delete(1.0, END)
self.result_text.insert(END, f"滑块匹配结果:{result}\n")
self.result_text.insert(END, "注:如果滑块图有透明边框,需要自行估算修正值")
else:
# 算法2:图片比对
result = self.slide_detector.slide_comparison(first_bytes, second_bytes)
self.result_text.delete(1.0, END)
self.result_text.insert(END, f"图片比对结果:{result}")
except Exception as e:
self.result_text.delete(1.0, END)
self.result_text.insert(END, f"错误:{str(e)}")
def clear_content(self):
self.image_label.config(image="")
self.result_text.delete(1.0, END)
self.current_image_path = None
self.second_image_path = None
self.second_image_btn.pack_forget()
def on_range_change(self):
"""当OCR范围选择改变时的回调"""
if self.range_var.get() == "custom":
self.custom_range_frame.pack(fill=X, pady=5)
else:
self.custom_range_frame.pack_forget()
def run(self):
self.window.mainloop()
if __name__ == "__main__":
app = OCRApp()
app.run()