该代码主要用于拆分清单中不同县部门的部分,后来优化了一下,使其可以适用于不同表格,也算比较实用,不同于方方格子的拆分是,方方格子的拆分适用于单元格内内容固定,该脚本逻辑是“只要包含某个关键词就拆分入其中”,并且会将未拆分过的单独列出,方便检查。
import tkinter as tk
from tkinterdnd2 import DND_FILES, TkinterDnD
from tkinter import messagebox, ttk
import openpyxl
from openpyxl import Workbook, load_workbook
from openpyxl.utils import column_index_from_string, get_column_letter
from copy import copy
import os
import threading
import concurrent.futures
import time
# 定义拆分列号识别的关键词
KEYWORDS = ["县级指导部门", "上级部门(单位)", "承接部门"]
# 全局变量存储自动识别结果
auto_headers = {}
auto_split_cols = {}
# 创建主窗口
root = TkinterDnD.Tk()
root.title("履职清单拆分器 智能识别版 v1.3 by杨海")
root.geometry("600x700")
# 创建Canvas和Scrollbar
canvas = tk.Canvas(root)
scrollbar = tk.Scrollbar(root, orient="vertical", command=canvas.yview)
canvas.configure(yscrollcommand=scrollbar.set)
# 创建一个Frame来放置所有组件
main_frame = tk.Frame(canvas)
# 将main_frame添加到canvas中
canvas.create_window((0, 0), window=main_frame, anchor="nw")
# 绑定滚动事件
def on_mousewheel(event):
canvas.yview_scroll(int(-1*(event.delta/120)), "units")
canvas.bind_all("<MouseWheel>", on_mousewheel)
# 放置canvas和scrollbar
canvas.pack(side="left", fill="both", expand=True)
scrollbar.pack(side="right", fill="y")
# 更新canvas的滚动区域
def update_scrollregion(event):
canvas.configure(scrollregion=canvas.bbox("all"))
main_frame.bind("<Configure>", update_scrollregion)
# 拖拽区域
drop_frame = tk.Frame(main_frame, bg="lightgray", relief="raised", bd=2)
drop_frame.pack(pady=20, padx=20, fill="x")
drop_label = tk.Label(drop_frame, text="拖拽 Excel 文件到此处", bg="lightgray", font=("黑体", 12), height=5)
drop_label.pack(expand=True)
file_path = None
def on_drop(event):
global file_path
file_path = event.data.strip('{}')
drop_label.config(text=f"已上传文件: {os.path.basename(file_path)}")
load_sheets()
update_columns_entries()
status_label.config(text="请勾选需要拆分的表格")
drop_label.drop_target_register(DND_FILES)
drop_label.dnd_bind("<<Drop>>", on_drop)
# 工作表选择区域
sheets_frame = tk.LabelFrame(main_frame, text="选择需要拆分的工作表", font=("黑体", 11), padx=10, pady=10)
sheets_frame.pack(pady=10, padx=20, fill="both")
sheets_vars = []
def load_sheets():
global file_path, sheets_vars, auto_headers, auto_split_cols
if not file_path: return
try:
wb = load_workbook(file_path)
sheets = wb.sheetnames
for w in sheets_frame.winfo_children(): w.destroy()
sheets_vars.clear()
auto_headers.clear()
auto_split_cols.clear()
for sheet in sheets:
var = tk.BooleanVar()
chk = tk.Checkbutton(sheets_frame, text=sheet, variable=var, font=("黑体", 10), command=update_columns_entries)
chk.pack(anchor='w')
sheets_vars.append((sheet, var))
ws = wb[sheet]
# 自动识别表头行数
header_rows = None
for r in range(1, ws.max_row+1):
cell = ws.cell(r,1)
if isinstance(cell.value, (int,float)):
header_rows = r-1
break
auto_headers[sheet] = header_rows
# 自动识别拆分列
split_col = None
found_cell = None
if header_rows:
for c in range(1, ws.max_column+1):
for r in range(1, header_rows+1):
v = ws.cell(r,c).value
if v and any(k in str(v) for k in KEYWORDS):
split_col = get_column_letter(c)
found_cell = (r, split_col)
break
if split_col: break
auto_split_cols[sheet] = (split_col, found_cell) if split_col else None
except Exception as e:
messagebox.showerror("错误", f"无法加载 Excel 文件: {e}")
# 列和表头输入区域
columns_frame = tk.LabelFrame(main_frame, text="设置拆分参数", font=("黑体", 11), padx=10, pady=10)
columns_frame.pack(pady=10, padx=20, fill="both")
columns_entries = {}
headers_entries = {}
def update_columns_entries():
for w in columns_frame.winfo_children(): w.destroy()
selected = [s for s,v in sheets_vars if v.get()]
if not selected:
tk.Label(columns_frame, text="请勾选需要拆分的表格", font=("黑体",10,"italic")).pack(pady=20)
status_label.config(text="请勾选需要拆分的表格")
else:
status_label.config(text="请确认输入并点击确认")
for sheet in selected:
sf = tk.Frame(columns_frame, relief="groove", bd=1); sf.pack(pady=5, fill="x")
tk.Label(sf, text=f"工作表: {sheet}", font=("黑体",10,"bold")).pack(anchor="w", pady=2)
# 拆分列
cf = tk.Frame(sf); cf.pack(fill="x", pady=2)
tk.Label(cf, text="拆分列号 (如 A, B):", font=("黑体",10)).pack(side="left")
ec = tk.Entry(cf, width=5, font=("黑体",10)); ec.pack(side="left", padx=5)
auto = auto_split_cols.get(sheet)
if auto:
sc, fc = auto
ec.insert(0, sc)
txt = f"(智能识别: {sc} 列"
if fc: txt += f",基于{fc[0]}行)"
else: txt += ")"
tk.Label(cf, text=txt, font=("黑体",10), fg="green").pack(side="left", padx=5)
else:
tk.Label(cf, text="(智能识别失败)", font=("黑体",10,"italic"), fg="red").pack(side="left", padx=5)
# 表头行数
hf = tk.Frame(sf); hf.pack(fill="x", pady=2)
tk.Label(hf, text="表头行数:", font=("黑体",10)).pack(side="left")
eh = tk.Entry(hf, width=5, font=("黑体",10)); eh.pack(side="left", padx=5)
ah = auto_headers.get(sheet)
if ah is not None:
eh.insert(0,str(ah))
tk.Label(hf, text=f"(智能识别: {ah} 行)", font=("黑体",10), fg="green").pack(side="left", padx=5)
else:
tk.Label(hf, text="(智能识别失败)", font=("黑体",10,"italic"), fg="red").pack(side="left", padx=5)
columns_entries[sheet] = ec
headers_entries[sheet] = eh
# 关键词输入区域
keywords_frame = tk.LabelFrame(main_frame, text="输入拆分关键词", font=("黑体",11), padx=10, pady=10)
keywords_frame.pack(pady=10, padx=20, fill="x")
tk.Label(keywords_frame, text="关键词(以 @ 分隔):", font=("黑体",10)).pack()
keywords_entry = tk.Entry(keywords_frame, font=("黑体",10))
keywords_entry.pack(fill="x")
add_serial_var = tk.BooleanVar()
add_serial_check = tk.Checkbutton(keywords_frame, text="在拆分后的文件名前添加序号", variable=add_serial_var, font=("黑体",10))
add_serial_check.pack(pady=5)
# 保存方式选择区域
save_mode_frame = tk.LabelFrame(main_frame, text="选择保存方式", font=("黑体",11), padx=10, pady=10)
save_mode_frame.pack(pady=10, padx=20, fill="x")
save_mode_var = tk.StringVar(value="separate")
tk.Radiobutton(save_mode_frame, text="每个拆分结果保存为一个工作簿", variable=save_mode_var, value="separate", font=("黑体",10)).pack(anchor="w")
tk.Radiobutton(save_mode_frame, text="所有拆分结果保存在一个工作簿中", variable=save_mode_var, value="combined", font=("黑体",10)).pack(anchor="w")
# 线程数输入区域
threads_frame = tk.LabelFrame(main_frame, text="拆分线程数", font=("黑体",11), padx=10, pady=10)
threads_frame.pack(pady=10, padx=20, fill="x")
tk.Label(threads_frame, text="线程数(默认智能判断):", font=("黑体",10)).pack(side="left")
threads_entry = tk.Entry(threads_frame, width=5, font=("黑体",10))
threads_entry.pack(side="left", padx=5)
default_threads = os.cpu_count() or 1
threads_entry.insert(0, str(default_threads))
tk.Label(threads_frame, text=f"(智能判断: {default_threads} 线程)", font=("黑体",10), fg="green").pack(side="left", padx=5)
# 进度条区域
progress_frame = tk.Frame(main_frame)
progress_frame.pack(pady=10)
progress_var = tk.DoubleVar()
progress_bar = ttk.Progressbar(progress_frame, variable=progress_var, maximum=100)
progress_bar.pack(side="left", fill="x", expand=True)
progress_label = tk.Label(progress_frame, text="0%", font=("黑体", 10))
progress_label.pack(side="left", padx=5)
# 按钮和状态
button_frame = tk.Frame(main_frame); button_frame.pack(pady=10)
confirm_button = tk.Button(button_frame, text="确认拆分", command=lambda: process(), font=("黑体",10), bg="lightgreen")
confirm_button.pack(side="left", padx=5)
status_label = tk.Label(main_frame, text="请拖拽 Excel 文件到此处", font=("黑体",10,"italic"), fg="gray")
status_label.pack(pady=10)
def process():
global file_path
if not file_path:
messagebox.showerror("错误", "未上传文件")
return
selected = [s for s,v in sheets_vars if v.get()]
if not selected:
messagebox.showerror("错误", "未选择任何工作表")
return
# 读取用户输入参数
columns = {}
headers = {}
for sheet in selected:
try:
col = column_index_from_string(columns_entries[sheet].get().strip().upper())
columns[sheet] = col
except:
messagebox.showerror("错误", f"{sheet} 列号无效")
return
try:
hr = int(headers_entries[sheet].get().strip())
headers[sheet] = hr
except:
messagebox.showerror("错误", f"{sheet} 表头行数无效")
return
kws = [k.strip() for k in keywords_entry.get().split("@") if k.strip()]
if not kws:
messagebox.showerror("错误", "未输入关键词")
return
# 读取线程数
try:
threads = int(threads_entry.get().strip())
if threads < 1:
threads = default_threads
except:
threads = default_threads
# 禁用确认按钮,防止重复点击
confirm_button.config(state="disabled")
# 启动线程
threading.Thread(target=process_thread, args=(selected, columns, headers, kws, threads)).start()
def process_thread(selected, columns, headers, kws, threads):
try:
# 准备输出
result_dir = "拆分结果"
os.makedirs(result_dir, exist_ok=True)
wb = load_workbook(file_path)
split_rows = {s:set() for s in selected}
total_keywords = len(kws)
completed_keywords = 0
lock = threading.Lock()
add_serial = add_serial_var.get()
save_mode = save_mode_var.get()
if save_mode == "combined":
new_wb = Workbook()
if new_wb.sheetnames: new_wb.remove(new_wb[new_wb.sheetnames[0]])
for idx, keyword in enumerate(kws, start=1):
for sheet in selected:
original = wb[sheet]
ws_name = f"{keyword}_{sheet}"
if add_serial:
ws_name = f"{idx:02d}_{ws_name}"
new_ws = new_wb.create_sheet(title=ws_name)
hr = headers[sheet]
col_num = columns[sheet]
# 复制列宽
for c in range(1, original.max_column+1):
letter = get_column_letter(c)
wd = original.column_dimensions[letter].width
if wd: new_ws.column_dimensions[letter].width = wd
# 复制“顶端标题行”合并和内容
for merge in original.merged_cells.ranges:
if merge.min_row<=hr and merge.max_row<=hr:
new_ws.merge_cells(
start_row=merge.min_row, start_column=merge.min_col,
end_row=merge.max_row, end_column=merge.max_col
)
for r in range(1, hr+1):
for c in range(1, original.max_column+1):
oc = original.cell(r,c)
nc = new_ws.cell(r,c,value=oc.value)
if oc.has_style:
nc.font = copy(oc.font)
nc.border = copy(oc.border)
nc.fill = copy(oc.fill)
nc.number_format = copy(oc.number_format)
nc.protection = copy(oc.protection)
nc.alignment = copy(oc.alignment)
rh = original.row_dimensions[r].height
if rh: new_ws.row_dimensions[r].height = rh
# 复制打印及页面设置
new_ws.page_setup = copy(original.page_setup)
new_ws.page_margins = copy(original.page_margins)
new_ws.print_options = copy(original.print_options)
new_ws.print_title_rows = original.print_title_rows
new_ws.print_title_cols = original.print_title_cols
# 按块复制数据行并保持合并
merged_regions = [
mr for mr in original.merged_cells.ranges
if mr.min_row > hr
]
row_to_merge = {}
for mr in merged_regions:
for r in range(mr.min_row, mr.max_row+1):
row_to_merge[r] = mr
orig_to_new = {}
next_row = hr + 1
has_data = False
r = hr + 1
while r <= original.max_row:
mr = row_to_merge.get(r)
if mr and r != mr.min_row:
r += 1
continue
if mr:
region = list(range(mr.min_row, mr.max_row+1))
check_r = mr.min_row
else:
region = [r]
check_r = r
val = original.cell(check_r, col_num).value
if val and str(keyword) in str(val):
for orow in region:
for c in range(1, original.max_column+1):
oc = original.cell(orow,c)
nc = new_ws.cell(row=next_row, column=c, value=oc.value)
if oc.has_style:
nc.font = copy(oc.font)
nc.border = copy(oc.border)
nc.fill = copy(oc.fill)
nc.number_format = copy(oc.number_format)
nc.protection = copy(oc.protection)
nc.alignment = copy(oc.alignment)
orig_to_new[orow] = next_row
rh = original.row_dimensions[orow].height
if rh: new_ws.row_dimensions[next_row].height = rh
next_row += 1
has_data = True
r = region[-1] + 1
# 重现数据区的合并单元格
for mr in merged_regions:
if all(rr in orig_to_new for rr in range(mr.min_row, mr.max_row+1)):
start_n = orig_to_new[mr.min_row]
end_n = orig_to_new[mr.max_row]
new_ws.merge_cells(
start_row=start_n, start_column=mr.min_col,
end_row=end_n, end_column=mr.max_col
)
if not has_data:
new_wb.remove(new_ws)
else:
with lock:
split_rows[sheet].update(orig_to_new.keys())
with lock:
completed_keywords += 1
progress = (completed_keywords / total_keywords) * 100
root.after(0, update_progress, progress)
if new_wb.sheetnames:
new_wb.save(os.path.join(result_dir, "combined.xlsx"))
else:
def split_keyword(keyword, idx):
nonlocal completed_keywords
new_wb = Workbook()
if new_wb.sheetnames: new_wb.remove(new_wb[new_wb.sheetnames[0]])
for sheet in selected:
original = wb[sheet]
new_ws = new_wb.create_sheet(title=sheet)
hr = headers[sheet]
col_num = columns[sheet]
# 复制列宽
for c in range(1, original.max_column+1):
letter = get_column_letter(c)
wd = original.column_dimensions[letter].width
if wd: new_ws.column_dimensions[letter].width = wd
# 复制“顶端标题行”合并和内容
for merge in original.merged_cells.ranges:
if merge.min_row<=hr and merge.max_row<=hr:
new_ws.merge_cells(
start_row=merge.min_row, start_column=merge.min_col,
end_row=merge.max_row, end_column=merge.max_col
)
for r in range(1, hr+1):
for c in range(1, original.max_column+1):
oc = original.cell(r,c)
nc = new_ws.cell(r,c,value=oc.value)
if oc.has_style:
nc.font = copy(oc.font)
nc.border = copy(oc.border)
nc.fill = copy(oc.fill)
nc.number_format = copy(oc.number_format)
nc.protection = copy(oc.protection)
nc.alignment = copy(oc.alignment)
rh = original.row_dimensions[r].height
if rh: new_ws.row_dimensions[r].height = rh
# 复制打印及页面设置
new_ws.page_setup = copy(original.page_setup)
new_ws.page_margins = copy(original.page_margins)
new_ws.print_options = copy(original.print_options)
new_ws.print_title_rows = original.print_title_rows
new_ws.print_title_cols = original.print_title_cols
# 按块复制数据行并保持合并
merged_regions = [
mr for mr in original.merged_cells.ranges
if mr.min_row > hr
]
row_to_merge = {}
for mr in merged_regions:
for r in range(mr.min_row, mr.max_row+1):
row_to_merge[r] = mr
orig_to_new = {}
next_row = hr + 1
has_data = False
r = hr + 1
while r <= original.max_row:
mr = row_to_merge.get(r)
if mr and r != mr.min_row:
r += 1
continue
if mr:
region = list(range(mr.min_row, mr.max_row+1))
check_r = mr.min_row
else:
region = [r]
check_r = r
val = original.cell(check_r, col_num).value
if val and str(keyword) in str(val):
for orow in region:
for c in range(1, original.max_column+1):
oc = original.cell(orow,c)
nc = new_ws.cell(row=next_row, column=c, value=oc.value)
if oc.has_style:
nc.font = copy(oc.font)
nc.border = copy(oc.border)
nc.fill = copy(oc.fill)
nc.number_format = copy(oc.number_format)
nc.protection = copy(oc.protection)
nc.alignment = copy(oc.alignment)
orig_to_new[orow] = next_row
rh = original.row_dimensions[orow].height
if rh: new_ws.row_dimensions[next_row].height = rh
next_row += 1
has_data = True
r = region[-1] + 1
# 重现数据区的合并单元格
for mr in merged_regions:
if all(rr in orig_to_new for rr in range(mr.min_row, mr.max_row+1)):
start_n = orig_to_new[mr.min_row]
end_n = orig_to_new[mr.max_row]
new_ws.merge_cells(
start_row=start_n, start_column=mr.min_col,
end_row=end_n, end_column=mr.max_col
)
if not has_data:
new_wb.remove(new_ws)
else:
with lock:
split_rows[sheet].update(orig_to_new.keys())
if new_wb.sheetnames:
safe = keyword.replace("/","_").replace("\\","_")
if add_serial:
safe = f"{idx:02d}_{safe}"
new_wb.save(os.path.join(result_dir, f"{safe}.xlsx"))
with lock:
completed_keywords += 1
progress = (completed_keywords / total_keywords) * 100
root.after(0, update_progress, progress)
with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
executor.map(lambda x: split_keyword(x[1], x[0]+1), enumerate(kws))
# 处理未拆分行
unsplit_list = []
for sheet in selected:
ws = wb[sheet]
hr = headers[sheet]
all_rows = set(range(hr+1, ws.max_row+1))
left = all_rows - split_rows[sheet]
if left:
unsplit_list.append((sheet, left))
if unsplit_list:
uwb = Workbook()
if uwb.sheetnames: uwb.remove(uwb[uwb.sheetnames[0]])
for sheet,left in unsplit_list:
orig = wb[sheet]
nws = uwb.create_sheet(title=f"{sheet}_unsplit")
hr = headers[sheet]
# 复制列宽 & 顶端标题
for c in range(1, orig.max_column+1):
lt = get_column_letter(c)
wd = orig.column_dimensions[lt].width
if wd: nws.column_dimensions[lt].width = wd
for mr in orig.merged_cells.ranges:
if mr.min_row<=hr and mr.max_row<=hr:
nws.merge_cells(
start_row=mr.min_row, start_column=mr.min_col,
end_row=mr.max_row, end_column=mr.max_col
)
for r in range(1,hr+1):
for c in range(1, orig.max_column+1):
oc = orig.cell(r,c)
nc = nws.cell(r,c,value=oc.value)
if oc.has_style:
nc.font = copy(oc.font)
nc.border = copy(oc.border)
nc.fill = copy(oc.fill)
nc.number_format = copy(oc.number_format)
nc.protection = copy(oc.protection)
nc.alignment = copy(oc.alignment)
rh = orig.row_dimensions[r].height
if rh: nws.row_dimensions[r].height = rh
# 复制打印设置
nws.page_setup = copy(orig.page_setup)
nws.page_margins = copy(orig.page_margins)
nws.print_options = copy(orig.print_options)
nws.print_title_rows = orig.print_title_rows
nws.print_title_cols = orig.print_title_cols
# 复制未拆分数据行及合并
mrs = [mr for mr in orig.merged_cells.ranges if mr.min_row>hr]
r2new = {}
nr = hr + 1
for r in sorted(left):
for c in range(1, orig.max_column+1):
oc = orig.cell(r,c)
nc = nws.cell(row=nr, column=c, value=oc.value)
if oc.has_style:
nc.font = copy(oc.font)
nc.border = copy(oc.border)
nc.fill = copy(oc.fill)
nc.number_format = copy(oc.number_format)
nc.protection = copy(oc.protection)
nc.alignment = copy(oc.alignment)
r2new[r] = nr
rh = orig.row_dimensions[r].height
if rh: nws.row_dimensions[nr].height = rh
nr += 1
for mr in mrs:
if all(r in r2new for r in range(mr.min_row, mr.max_row+1)):
nws.merge_cells(
start_row=r2new[mr.min_row], start_column=mr.min_col,
end_row=r2new[mr.max_row], end_column=mr.max_col
)
uwb.save(os.path.join(result_dir, "unsplit.xlsx"))
root.after(0, messagebox.showwarning, "警告", "存在未拆分内容,请检查 unsplit.xlsx")
else:
root.after(0, messagebox.showinfo, "信息", "所有行均已拆分")
except Exception as e:
root.after(0, messagebox.showerror, "错误", f"拆分过程中出错: {e}")
finally:
root.after(0, confirm_button.config, {"state": "normal"})
root.after(0, update_progress, 0)
def update_progress(value):
progress_var.set(value)
progress_label.config(text=f"{value:.1f}%")
# 运行 GUI
root.mainloop()