1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
|
import tkinter as tk
from tkinter import filedialog, ttk
from PIL import Image, ImageTk
import json
import pandas as pd
from typing import List, Dict
import base64
import io
from volcenginesdkarkruntime import Ark
import os
import datetime
class CropWindow:
def __init__(self, parent, image_path, callback):
self.top = tk.Toplevel(parent)
self.top.title("裁切图片")
self.callback = callback
# 加载原始图片
self.original_image = Image.open(image_path)
# 计算显示尺寸
display_size = (800, 600)
self.display_image = self.original_image.copy()
self.display_image.thumbnail(display_size)
# 计算缩放比例
self.scale_x = self.original_image.width / self.display_image.width
self.scale_y = self.original_image.height / self.display_image.height
# 创建画布
self.canvas = tk.Canvas(self.top, width=self.display_image.width, height=self.display_image.height)
self.canvas.pack(expand=True)
# 显示图片
self.photo = ImageTk.PhotoImage(self.display_image)
self.canvas.create_image(0, 0, image=self.photo, anchor=tk.NW)
# 裁切区域变量
self.start_x = None
self.start_y = None
self.rect = None
self.crop_coords = None
# 绑定鼠标事件
self.canvas.bind("<ButtonPress-1>", self.on_press)
self.canvas.bind("<B1-Motion>", self.on_drag)
self.canvas.bind("<ButtonRelease-1>", self.on_release)
# 添加按钮
button_frame = ttk.Frame(self.top)
button_frame.pack(pady=5)
ttk.Button(button_frame, text="确认裁切", command=self.crop_and_close).pack(side=tk.LEFT, padx=5)
ttk.Button(button_frame, text="跳过裁切", command=self.skip_crop).pack(side=tk.LEFT, padx=5)
def on_press(self, event):
"""鼠标按下时的处理"""
self.start_x = event.x
self.start_y = event.y
# 如果已经有矩形,删除它
if self.rect:
self.canvas.delete(self.rect)
self.rect = None
def on_drag(self, event):
"""鼠标拖动时的处理"""
if self.start_x is None:
return
# 如果已经有矩形,删除它
if self.rect:
self.canvas.delete(self.rect)
# 创建新矩形
self.rect = self.canvas.create_rectangle(
self.start_x, self.start_y,
event.x, event.y,
outline='red'
)
def on_release(self, event):
"""鼠标释放时的处理"""
if self.start_x is None:
return
# 确保有一个有效的选择区域
x1 = min(self.start_x, event.x)
y1 = min(self.start_y, event.y)
x2 = max(self.start_x, event.x)
y2 = max(self.start_y, event.y)
# 保存裁切坐标(考虑缩放比例)
self.crop_coords = (
int(x1 * self.scale_x),
int(y1 * self.scale_y),
int(x2 * self.scale_x),
int(y2 * self.scale_y)
)
def crop_and_close(self):
"""裁切图片并关闭窗口"""
if self.crop_coords:
# 裁切图片
cropped_image = self.original_image.crop(self.crop_coords)
# 调用回调函数
self.callback(cropped_image)
self.top.destroy()
def skip_crop(self):
"""跳过裁切"""
self.callback(self.original_image)
self.top.destroy()
class ImageProcessorApp:
def __init__(self, root):
self.root = root
self.root.title("图片表格提取器")
self.root.geometry("800x600")
# 初始化API客户端
self.client = Ark(api_key='') #修改APIkey
self.client.api_key = "1c477866-b675-423e-b205-3c285fb15925"
# 存储选中的图片
self.images: List[Dict] = []
self.current_crop_index = 0
self.pending_images = []
# 存储提取字段和Excel导出路径
self.fields = ["姓名", "车次", "出发地", "目的地", "车票价格"] # 默认字段
self.excel_output_path = ""
# 添加当前显示图片的索引
self.current_display_index = 0
self.setup_ui()
def setup_ui(self):
# 创建主框架
self.main_frame = ttk.Frame(self.root)
self.main_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
# 创建水平方向的主面板分隔器
self.main_panel = ttk.PanedWindow(self.main_frame, orient=tk.HORIZONTAL)
self.main_panel.pack(fill=tk.BOTH, expand=True)
# 创建左侧面板(可伸缩)
self.left_panel = ttk.PanedWindow(self.main_panel, orient=tk.HORIZONTAL)
self.main_panel.add(self.left_panel, weight=4) # 左侧占比更大
# 创建左侧缩略图区域
self.thumbnail_frame = ttk.LabelFrame(self.left_panel, text="已上传图片")
self.left_panel.add(self.thumbnail_frame, weight=1)
# 创建缩略图滚动区域
self.thumb_canvas = tk.Canvas(self.thumbnail_frame, width=150)
self.thumb_scrollbar = ttk.Scrollbar(self.thumbnail_frame, orient=tk.VERTICAL, command=self.thumb_canvas.yview)
self.thumb_canvas.configure(yscrollcommand=self.thumb_scrollbar.set)
self.thumb_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
self.thumb_canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
self.thumbnail_container = ttk.Frame(self.thumb_canvas)
self.thumb_canvas.create_window((0, 0), window=self.thumbnail_container, anchor=tk.NW)
# 创建中央图片显示区域
self.display_frame = ttk.Frame(self.left_panel)
self.left_panel.add(self.display_frame, weight=3)
# 添加图片导航按钮
nav_frame = ttk.Frame(self.display_frame)
nav_frame.pack(fill=tk.X, pady=5)
ttk.Button(nav_frame, text="上一张", command=self.prev_image).pack(side=tk.LEFT, padx=5)
ttk.Button(nav_frame, text="下一张", command=self.next_image).pack(side=tk.LEFT, padx=5)
ttk.Button(nav_frame, text="选择图片", command=self.select_images).pack(side=tk.LEFT, padx=5)
ttk.Button(nav_frame, text="裁剪图片", command=self.crop_current_image).pack(side=tk.LEFT, padx=5)
ttk.Button(nav_frame, text="处理图片", command=self.process_images).pack(side=tk.LEFT, padx=5)
ttk.Button(nav_frame, text="清空图片", command=self.clear_images).pack(side=tk.LEFT, padx=5)
# 创建主图片显示区域
self.main_image_label = ttk.Label(self.display_frame)
self.main_image_label.pack(fill=tk.BOTH, expand=True)
# 添加图片序号显示
self.image_counter_label = ttk.Label(self.display_frame, text="")
self.image_counter_label.pack(pady=5)
# 创建右侧设置区域
self.settings_frame = ttk.Frame(self.main_panel)
self.main_panel.add(self.settings_frame, weight=1) # 右侧占比较小
# 字段设置区域
field_settings = ttk.LabelFrame(self.settings_frame, text="字段设置")
field_settings.pack(fill=tk.X, pady=5, padx=5)
ttk.Label(field_settings, text="提取字段(一行一个):").pack(padx=5, pady=2)
self.fields_text = tk.Text(field_settings, height=5, width=30)
self.fields_text.pack(padx=5, pady=2)
self.fields_text.insert('1.0', "\n".join(self.fields))
ttk.Button(field_settings, text="更新字段", command=self.update_fields).pack(padx=5, pady=5)
# Excel输出路径设置
path_settings = ttk.LabelFrame(self.settings_frame, text="导出设置")
path_settings.pack(fill=tk.X, pady=5, padx=5)
ttk.Label(path_settings, text="Excel导出路径:").pack(padx=5, pady=2)
self.path_var = tk.StringVar()
ttk.Entry(path_settings, textvariable=self.path_var, width=30).pack(padx=5, pady=2)
ttk.Button(path_settings, text="选择路径", command=self.select_output_path).pack(padx=5, pady=5)
def clear_images(self):
"""清空所有已加载的图片"""
self.images.clear()
self.current_display_index = 0
self.refresh_thumbnail_view()
self.update_main_display()
def refresh_thumbnail_view(self):
"""刷新左侧缩略图显示"""
# 清除现有缩略图
for widget in self.thumbnail_container.winfo_children():
widget.destroy()
# 重新添加缩略图
for i, img_info in enumerate(self.images):
frame = ttk.Frame(self.thumbnail_container)
frame.pack(pady=5, padx=5)
# 创建缩略图
thumb = img_info["image"].copy()
thumb.thumbnail((100, 100))
photo = ImageTk.PhotoImage(thumb)
# 创建图片标签和删除按钮的容器
thumb_frame = ttk.Frame(frame)
thumb_frame.pack()
# 添加序号标签
ttk.Label(thumb_frame, text=f"{i + 1}/{len(self.images)}").pack()
label = ttk.Label(thumb_frame, image=photo)
label.image = photo
label.pack()
# 添加删除按钮
ttk.Button(thumb_frame, text="×", width=2,
command=lambda idx=i: self.remove_image(idx)).pack()
# 绑定点击事件
label.bind('<Button-1>', lambda e, idx=i: self.show_image(idx))
# 更新滚动区域
self.thumbnail_container.update_idletasks()
self.thumb_canvas.configure(scrollregion=self.thumb_canvas.bbox("all"))
def remove_image(self, index):
"""删除指定索引的图片"""
if 0 <= index < len(self.images):
self.images.pop(index)
if self.current_display_index >= len(self.images):
self.current_display_index = max(0, len(self.images) - 1)
self.refresh_thumbnail_view()
self.update_main_display()
def show_image(self, index):
"""显示指定索引的图片"""
if 0 <= index < len(self.images):
self.current_display_index = index
self.update_main_display()
def update_main_display(self):
"""更新主显示区域的图片"""
if not self.images:
self.main_image_label.configure(image='')
self.image_counter_label.configure(text="")
return
img = self.images[self.current_display_index]["image"].copy()
# 调整图片大小以适应显示区域
display_size = (600, 400)
img.thumbnail(display_size)
photo = ImageTk.PhotoImage(img)
self.main_image_label.configure(image=photo)
self.main_image_label.image = photo
# 更新图片序号显示
self.image_counter_label.configure(
text=f"当前第 {self.current_display_index + 1} 张,共 {len(self.images)} 张"
)
def prev_image(self):
"""显示上一张图片"""
if self.images:
self.current_display_index = (self.current_display_index - 1) % len(self.images)
self.update_main_display()
def next_image(self):
"""显示下一张图片"""
if self.images:
self.current_display_index = (self.current_display_index + 1) % len(self.images)
self.update_main_display()
def select_images(self):
filepaths = filedialog.askopenfilenames(
title="选择图片",
filetypes=[("Image files", "*.png *.jpg *.jpeg *.gif *.bmp")]
)
if not filepaths:
return
# 加载新图片
for filepath in filepaths:
try:
image = Image.open(filepath)
self.images.append({
"filepath": filepath,
"image": image
})
except Exception as e:
tk.messagebox.showerror("错误", f"无法加载图片 {filepath}: {str(e)}")
# 刷新显示
self.refresh_thumbnail_view()
if self.images:
self.show_image(len(self.images) - 1)
def update_fields(self):
field_text = self.fields_text.get('1.0', tk.END)
self.fields = [f.strip() for f in field_text.split('\n') if f.strip()]
tk.messagebox.showinfo("成功", "字段已更新!")
def select_output_path(self):
folder_path = filedialog.askdirectory(title="选择Excel导出目录")
if folder_path:
self.excel_output_path = folder_path
self.path_var.set(folder_path)
def process_images(self):
if not self.images:
tk.messagebox.showwarning("警告", "请先选择图片!")
return
if not self.excel_output_path:
tk.messagebox.showwarning("警告", "请先选择Excel导出路径!")
return
all_data = []
for img_info in self.images:
try:
# 将图片转换为PNG格式(确保格式兼容性)
img = img_info['image']
buffered = io.BytesIO()
# 统一转换为PNG格式
if img.mode in ('RGBA', 'LA') or (img.mode == 'P' and 'transparency' in img.info):
# 如果图片带有透明通道,先转换为RGB
img = img.convert('RGB')
img.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
# 第一次提示词优化
prompt = (
f"请仔细分析这张图片,提取以下字段的信息并以JSON格式返回:{', '.join(self.fields)}。\n"
"注意事项:\n"
"1. 图片中包含手写体文字,请特别注意:\n"
" - 仔细辨识每个手写字符,包括潦草字迹\n"
" - 注意辨识英文字母、数字和特殊符号\n"
" - 对于不确定的字符,尝试从上下文推断\n"
"2. 关于涂抹和修改的处理:\n"
" - 如有涂抹内容,优先识别涂抹处周围或上方的新增内容\n"
" - 对于部分涂抹但仍可见的内容,尝试识别可见部分\n"
" - 注意识别批注、更正或添加的内容\n"
"3. 特殊情况处理:\n"
" - 对于缩写或简写,尽可能还原完整内容\n"
" - 注意识别表格边缘或角落的信息\n"
" - 对于模糊不清的内容,根据上下文推断\n"
"4. 如果某字段信息确实无法识别或不存在,返回空字符串\n"
"5. 优先提取最新的、有效的信息,忽略已被明确删除的内容"
)
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{img_str}"
}
}
]
}
]
# 发送请求
response = self.client.chat.completions.create(
model="ep-20250118110159-n2vsp",
messages=messages
)
# 解析第一次响应
if response.choices and response.choices[0]:
raw_result = json.loads(response.choices[0].message.content)
# 修改数据整理的提示词
organize_prompt = (
"我需要你帮我整理以下数据到规范的格式。这些数据来自图片识别结果,需要被整理成独立的记录。\n"
"原始数据如下:\n"
f"{json.dumps(raw_result, ensure_ascii=False, indent=2)}\n\n"
"请你将数据处理成列表格式,每个元素都是一个字典,对应一条完整的记录。\n"
"处理要求:\n"
"1. 数据拆分和整理:\n"
" - 如果任何字段中包含多个值,将其拆分成多个独立记录\n"
" - 确保拆分后的每条记录包含所有必要字段:" +
f"{', '.join(self.fields)}\n"
" - 保持各字段之间的对应关系\n"
"2. 特殊内容处理:\n"
" - 统一处理英文大小写(如缩写、代码等)\n"
" - 规范化数字格式(如价格、编号等)\n"
" - 处理可能的重复信息\n"
"3. 数据完整性:\n"
" - 返回JSON格式,确保每个字段都存在\n"
" - 对无法识别的信息使用空字符串\n"
" - 保持数据的一致性和完整性\n"
"4. 上下文关联:\n"
" - 利用上下文补充可能缺失的信息\n"
" - 确保关联字段之间的逻辑性\n"
" - 处理可能的交叉引用\n"
"5. 数据清理:\n"
" - 移除多余的空格和特殊字符\n"
" - 统一格式化字符串\n"
" - 确保数据的规范性\n"
"请直接返回JSON数组,数组中的每个元素都应该是包含所有必要字段的字典。不要包含任何其他解释文字。"
)
# 发送第二次请求进行数据整理
organize_response = self.client.chat.completions.create(
model="ep-20250117235826-zpdh8",
messages=[
{
"role": "user",
"content": organize_prompt
}
]
)
if organize_response.choices and organize_response.choices[0]:
# 解析整理后的数据
organized_results = json.loads(organize_response.choices[0].message.content)
all_data.extend(organized_results) # 使用extend而不是append
print(f"处理完成图片:{img_info['filepath']}")
print(f"整理后的结果:{organized_results}")
except Exception as e:
tk.messagebox.showerror("错误", f"处理图片时出错:{str(e)}")
continue
if all_data:
try:
# 直接使用整理后的数据创建DataFrame
df = pd.DataFrame(all_data)
# 使用指定路径保存Excel
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
output_file = os.path.join(self.excel_output_path, f"提取结果_{timestamp}.xlsx")
df.to_excel(output_file, index=False)
tk.messagebox.showinfo("成功", f"数据已保存到:{output_file}")
except Exception as e:
tk.messagebox.showerror("错误", f"保存Excel文件时出错:{str(e)}")
def crop_current_image(self):
"""裁剪当前显示的图片"""
if not self.images or self.current_display_index >= len(self.images):
tk.messagebox.showwarning("警告", "没有可裁剪的图片!")
return
img_info = self.images[self.current_display_index]
CropWindow(self.root, img_info['filepath'],
lambda cropped: self.on_crop_complete(cropped, self.current_display_index))
def on_crop_complete(self, cropped_image, index):
"""处理裁剪完成的图片"""
if 0 <= index < len(self.images):
self.images[index]['image'] = cropped_image
self.refresh_thumbnail_view()
self.update_main_display()
def main():
root = tk.Tk()
app = ImageProcessorApp(root)
root.mainloop()
if __name__ == "__main__":
main()
|