线上智能沙盘VS线下手工做账效果对比统计工具一、实际应用场景描述场景背景某财经职业技术学院会计系正在进行教学改革引入了智能会计综合实训平台线上沙盘替代传统的手工做账实验室。学期末系主任需要科学评估两种教学模式的优劣为下学年的课程设置提供数据支撑。核心任务1. 数据采集分别收集线上沙盘和线下手工做账的实训考核数据2. 多维度对比从准确性、效率、成本、错误类型等维度进行量化分析3. 效果评判自动生成对比报告识别两种模式的优劣势4. 决策支持为教学模式选择提供数据化建议二、引入痛点1. 评价主观性强传统评价依赖教师主观印象线上看起来效率高但不知道具体好多少2. 数据孤岛线上线下成绩分散在不同系统难以统一分析3. 维度单一只看最终分数忽略了过程数据和错误模式的分析4. 反馈滞后手工统计分析耗时过长影响教学改进时效性三、核心逻辑讲解本工具采用多维度加权评分 统计显著性检验的综合分析方法。1. 数据采集层 (Data Collection)- 标准化接口读取线上线下实训数据- 自动清洗异常值和缺失数据- 统一评分标准和时间基准2. 维度分析层 (Dimension Analysis)- 准确性维度分录正确率、报表准确率、异常处理能力- 效率维度完成时长、单位任务耗时、响应速度- 成本维度耗材成本、设备折旧、人力投入- 质量维度错误类型分布、修正频次、学习曲线3. 统计评判层 (Statistical Evaluation)- 计算各项指标的均值、标准差、置信区间- 使用t检验判断差异显著性- 生成综合评价指数四、代码模块化实现项目结构accounting_comparison_tool/├── main.py # 主程序入口├── config.py # 配置参数├── data_loader.py # 数据加载与清洗├── analyzer.py # 核心分析逻辑├── visualizer.py # 可视化图表生成├── reporter.py # 报告生成器├── requirements.txt # 依赖库└── README.md # 使用说明1. config.py (配置文件)# config.py# 评分权重配置WEIGHT_CONFIG {accuracy_weight: 0.4, # 准确性权重efficiency_weight: 0.3, # 效率权重cost_weight: 0.2, # 成本权重quality_weight: 0.1 # 质量权重}# 评价标准阈值THRESHOLDS {excellent_accuracy: 95, # 优秀准确率阈值good_efficiency_ratio: 1.2, # 效率提升比例阈值acceptable_cost_ratio: 0.8 # 成本控制比例阈值}# 科目权重用于准确性计算SUBJECT_WEIGHTS {basic_accounting: 0.3,financial_statements: 0.4,tax_accounting: 0.3}2. data_loader.py (数据加载器)# data_loader.pyimport pandas as pdimport numpy as npfrom datetime import datetimeimport warningswarnings.filterwarnings(ignore)class DataLoader:def __init__(self):self.online_data Noneself.offline_data Nonedef load_online_data(self, file_path):加载线上沙盘数据try:self.online_data pd.read_excel(file_path)print(f✅ 成功加载线上数据: {len(self.online_data)} 条记录)# 数据预处理self.online_data[mode] onlineself.online_data[completion_time_minutes] self._convert_to_minutes(self.online_data[completion_time])return Trueexcept Exception as e:print(f❌ 线上数据加载失败: {e})return Falsedef load_offline_data(self, file_path):加载线下手工做账数据try:self.offline_data pd.read_excel(file_path)print(f✅ 成功加载线下数据: {len(self.offline_data)} 条记录)# 数据预处理self.offline_data[mode] offlineself.offline_data[completion_time_minutes] self._convert_to_minutes(self.offline_data[completion_time])return Trueexcept Exception as e:print(f❌ 线下数据加载失败: {e})return Falsedef _convert_to_minutes(self, time_obj):将时间转换为分钟数if isinstance(time_obj.iloc[0], str):# 如果是字符串格式转换为datetime再计算times pd.to_datetime(time_obj)durations times.diff().dt.total_seconds() / 60return durations.fillna(120) # 默认120分钟else:# 如果已经是数值直接返回return time_objdef get_combined_data(self):获取合并后的数据集if self.online_data is not None and self.offline_data is not None:return pd.concat([self.online_data, self.offline_data], ignore_indexTrue)elif self.online_data is not None:return self.online_dataelif self.offline_data is not None:return self.offline_dataelse:return Nonedef validate_data_quality(self):验证数据质量issues []for dataset, name in [(self.online_data, 线上), (self.offline_data, 线下)]:if dataset is not None:# 检查空值null_counts dataset.isnull().sum()if null_counts.sum() 0:issues.append(f{name}数据存在 {null_counts.sum()} 个空值)# 检查异常值if score in dataset.columns:outliers len(dataset[(dataset[score] 0) | (dataset[score] 100)])if outliers 0:issues.append(f{name}数据存在 {outliers} 个异常分数)return issues3. analyzer.py (核心分析器)# analyzer.pyimport pandas as pdimport numpy as npfrom scipy import statsfrom config import WEIGHT_CONFIG, THRESHOLDS, SUBJECT_WEIGHTSclass AccountingAnalyzer:def __init__(self, data):self.data dataself.results {}def analyze_accuracy(self):分析准确性维度accuracy_results {}for mode in [online, offline]:mode_data self.data[self.data[mode] mode]if len(mode_data) 0:continue# 基础准确率accuracy_results[f{mode}_avg_score] mode_data[score].mean()accuracy_results[f{mode}_std_score] mode_data[score].std()# 分科目准确率for subject, weight in SUBJECT_WEIGHTS.items():if f{subject}_score in mode_data.columns:subject_scores mode_data[f{subject}_score].dropna()if len(subject_scores) 0:accuracy_results[f{mode}_{subject}_score] subject_scores.mean()# 显著性检验if online_avg_score in accuracy_results and offline_avg_score in accuracy_results:online_scores self.data[self.data[mode] online][score].dropna()offline_scores self.data[self.data[mode] offline][score].dropna()if len(online_scores) 1 and len(offline_scores) 1:t_stat, p_value stats.ttest_ind(online_scores, offline_scores)accuracy_results[significance_p_value] p_valueaccuracy_results[significant_difference] p_value 0.05return accuracy_resultsdef analyze_efficiency(self):分析效率维度efficiency_results {}for mode in [online, offline]:mode_data self.data[self.data[mode] mode]if len(mode_data) 0:continue# 时间效率efficiency_results[f{mode}_avg_time] mode_data[completion_time_minutes].mean()efficiency_results[f{mode}_std_time] mode_data[completion_time_minutes].std()# 效率比分数/时间efficiency_results[f{mode}_efficiency_ratio] (mode_data[score].mean() / mode_data[completion_time_minutes].mean())# 计算效率提升比例if online_avg_time in efficiency_results and offline_avg_time in efficiency_results:time_improvement ((efficiency_results[offline_avg_time] - efficiency_results[online_avg_time])/ efficiency_results[offline_avg_time] * 100)efficiency_results[time_improvement_percent] time_improvementreturn efficiency_resultsdef analyze_costs(self):分析成本维度cost_results {}for mode in [online, offline]:mode_data self.data[self.data[mode] mode]if len(mode_data) 0:continue# 直接成本if material_cost in mode_data.columns:cost_results[f{mode}_avg_material_cost] mode_data[material_cost].mean()if equipment_cost in mode_data.columns:cost_results[f{mode}_avg_equipment_cost] mode_data[equipment_cost].mean()# 间接成本人工、维护等if labor_cost in mode_data.columns:cost_results[f{mode}_avg_labor_cost] mode_data[labor_cost].mean()# 计算总成本for mode in [online, offline]:material cost_results.get(f{mode}_avg_material_cost, 0)equipment cost_results.get(f{mode}_avg_equipment_cost, 0)labor cost_results.get(f{mode}_avg_labor_cost, 0)cost_results[f{mode}_total_cost] material equipment labor# 计算成本节约比例if online_total_cost in cost_results and offline_total_cost in cost_results:if cost_results[offline_total_cost] 0:cost_saving ((cost_results[offline_total_cost] - cost_results[online_total_cost])/ cost_results[offline_total_cost] * 100)cost_results[cost_saving_percent] cost_savingreturn cost_resultsdef calculate_comprehensive_score(self, accuracy, efficiency, cost):计算综合评分# 标准化各项指标acc_score min(accuracy.get(online_avg_score, 0) / 100, 1.0)eff_score min(efficiency.get(online_efficiency_ratio, 0) / 2.0, 1.0) # 假设2.0为满分效率比# 成本效益成本越低分数越高cost_score 1.0if online_total_cost in cost and offline_total_cost in cost:if cost[offline_total_cost] 0:cost_ratio cost[online_total_cost] / cost[offline_total_cost]cost_score max(0, 1.0 - (cost_ratio - 0.5) * 2) # 成本减半得满分# 加权计算综合得分comprehensive_score (acc_score * WEIGHT_CONFIG[accuracy_weight] eff_score * WEIGHT_CONFIG[efficiency_weight] cost_score * WEIGHT_CONFIG[cost_weight]) * 100return round(comprehensive_score, 2)def run_full_analysis(self):执行完整分析print( 开始多维度对比分析...)# 各维度分析accuracy_analysis self.analyze_accuracy()efficiency_analysis self.analyze_efficiency()cost_analysis self.analyze_costs()# 综合评分comprehensive_score self.calculate_comprehensive_score(accuracy_analysis, efficiency_analysis, cost_analysis)# 整合结果self.results {accuracy: accuracy_analysis,efficiency: efficiency_analysis,cost: cost_analysis,comprehensive_score: comprehensive_score,summary: self._generate_summary(accuracy_analysis, efficiency_analysis, cost_analysis)}return self.resultsdef _generate_summary(self, accuracy, efficiency, cost):生成分析摘要summary []# 准确性总结if online_avg_score in accuracy and offline_avg_score in accuracy:online_acc accuracy[online_avg_score]offline_acc accuracy[offline_avg_score]diff online_acc - offline_accif abs(diff) 2:summary.append( 准确性方面两种模式表现相当)elif diff 0:summary.append(f 线上模式准确性更高 ({online_acc:.1f} vs {offline_acc:.1f}))else:summary.append(f 线下模式准确性更高 ({offline_acc:.1f} vs {online_acc:.1f}))# 效率总结if time_improvement_percent in efficiency:improvement efficiency[time_improvement_percent]if improvement 10:summary.append(f⚡ 线上模式效率显著提升 {improvement:.1f}%)elif improvement 0:summary.append(f⏱️ 线上模式效率略有提升 {improvement:.1f}%)else:summary.append(⏰ 效率方面两种模式基本持平)# 成本总结if cost_saving_percent in cost:saving cost[cost_saving_percent]if saving 20:summary.append(f 线上模式大幅降低成本 {saving:.1f}%)elif saving 0:summary.append(f 线上模式适度降低成本 {saving:.1f}%)else:summary.append( 成本方面线下模式更具优势)return summary4. visualizer.py (可视化模块)# visualizer.pyimport matplotlib.pyplot as pltimport seaborn as snsimport pandas as pdfrom matplotlib import rcParams# 设置中文字体rcParams[font.sans-serif] [SimHei, Arial Unicode MS]rcParams[axes.unicode_minus] Falseclass ResultVisualizer:def __init__(self, results, data):self.results resultsself.data dataself.setup_style()def setup_style(self):设置图表样式plt.style.use(seaborn-v0_8-whitegrid)sns.set_palette(husl)def plot_comparison_chart(self, save_pathNone):绘制对比柱状图fig, axes plt.subplots(2, 2, figsize(15, 12))fig.suptitle(线上智能沙盘 VS 线下手工做账 效果对比分析, fontsize16, fontweightbold)# 准确性对比ax1 axes[0, 0]modes [线上, 线下]accuracy_scores [self.results[accuracy].get(online_avg_score, 0),self.results[accuracy].get(offline_avg_score, 0)]bars1 ax1.bar(modes, accuracy_scores, color[#3498db, #e74c3c], alpha0.8)ax1.set_title(平均准确率对比, fontweightbold)ax1.set_ylabel(分数)ax1.set_ylim(0, 100)# 添加数值标签for bar, score in zip(bars1, accuracy_scores):ax1.text(bar.get_x() bar.get_width()/2, bar.get_height() 1,f{score:.1f}, hacenter, vabottom)# 效率对比ax2 axes[0, 1]time_data [self.results[efficiency].get(online_avg_time, 0),self.results[efficiency].get(offline_avg_time, 0)]bars2 ax2.bar(modes, time_data, color[#2ecc71, #f39c12], alpha0.8)ax2.set_title(平均完成时间对比 (分钟), fontweightbold)ax2.set_ylabel(时间(分钟))for bar, time_val in zip(bars2, time_data):ax2.text(bar.get_x() bar.get_width()/2, bar.get_height() 1,f{time_val:.0f}, hacenter, vabottom)# 成本对比ax3 axes[1, 0]cost_data [self.results[cost].get(online_total_cost, 0),self.results[cost].get(offline_total_cost, 0)]bars3 ax3.bar(modes, cost_data, color[#9b59b6, #34495e], alpha0.8)ax3.set_title(平均总成本对比, fontweightbold)ax3.set_ylabel(成本(元))for bar, cost_val in zip(bars3, cost_data):ax3.text(bar.get_x() bar.get_width()/2, bar.get_height() 0.1,f{cost_val:.0f}, hacenter, vabottom)# 综合评分雷达图ax4 axes[1, 1]categories [准确性, 效率, 成本效益, 综合评分]online_values [self.results[accuracy].get(online_avg_score, 0) / 100,min(self.results[efficiency].get(online_efficiency_ratio, 0) / 2.0, 1.0),max(0, 1.0 - (self.results[cost].get(online_total_cost, 0) /self.results[cost].get(offline_total_cost, 1))),self.results[comprehensive_score] / 100]offline_values [self.results[accuracy].get(offline_avg_score, 0) / 100,min(self.results[efficiency].get(offline_efficiency_ratio, 0) / 2.0, 1.0),max(0, 1.0 - (self.results[cost].get(offline_total_cost, 0) /self.results[cost].get(online_total_cost, 1))),0.6 # 假设线下综合评分为60]# 雷达图数据准备angles np.linspace(0, 2*np.pi, len(categories), endpointFalse).tolist()online_values online_values[:1] # 闭合图形offline_values offline_values[:1]angles angles[:1]ax4.plot(angles, online_values, o-, linewidth2, label线上, color#3498db)ax4.fill(angles, online_values, alpha0.25, color#3498db)ax4.plot(angles, offline_values, o-, linewidth2, label线下, color#e74c3c)ax4.fill(angles, offline_values, alpha0.25, color#e74c3c)ax4.set_xticks(angles[:-1])ax4.set_xticklabels(categories)ax4.set_ylim(0, 1)ax4.set_title(综合能力雷达图, fontweightbold)ax4.legend()ax4.grid(True)plt.tight_layout()if save_path:plt.savefig(save_path, dpi300, bbox_inchestight)print(f 图表已保存至: {save_path})plt.show()def plot_distribution_plot(self, save_pathNone):绘制分数分布图fig, (ax1, ax2) plt.subplots(1, 2, figsize(15, 6))fig.suptitle(成绩分布对比, fontsize14, fontweightbold)# 线上数据分布online_data self.data[self.data[mode] online][score].dropna()if len(online_data) 0:ax1.hist(online_data, bins20, alpha0.7, color#3498db, edgecolorblack)ax1.axvline(online_data.mean(), colorred, linestyle--,labelf平均值: {online_data.mean():.1f})ax1.set_title(线上模式成绩分布)ax1.set_xlabel(分数)ax1.set_ylabel(频次)ax1.legend()# 线下数据分布offline_data self.data[self.data[mode] offline][score].dropna()if len(offline_data) 0:ax2.hist(offline_data, bins20, alpha0.7, color#e74c3c, edgecolorblack)ax2.axvline(offline_data.mean(), colorred, linestyle--,labelf平均值: {offline_data.mean():.1f})ax2.set_title(线下模式成绩分布)ax2.set_xlabel(分数)ax2.set_ylabel(频次)ax2.legend()plt.tight_layout()if save_path:plt.savefig(save_path, dpi300, bbox_inchestight)plt.show()5. reporter.py (报告生成器)# reporter.pyimport pandas as pdfrom datetime import datetimeclass ReportGenerator:def __init__(self, results, data):self.results resultsself.data datadef generate_excel_report(self, filename会计实训效果对比报告.xlsx):生成Excel格式报告with pd.ExcelWriter(filename, engineopenpyxl) as writer:# 概览表self._create_overview_sheet(writer)# 详细数据表self._create_detailed_sheet(writer)# 建议表self._create_recommendations_sheet(writer)print(f Excel报告已生成: {filename})def _create_overview_sheet(self, writer):创建概览工作表overview_data []# 准确性数据if online_avg_score in self.results[accuracy]:overview_data.append({维度: 准确性,指标: 平均分数,线上模式: f{self.results[accuracy][online_avg_score]:.1f},线下模式: f{self.results[accuracy][offline_avg_score]:.1f},差异: f{self.results[accuracy][online_avg_score] - self.results[accuracy][offline_avg_score]:.1f}})# 效率数据if online_avg_time in self.results[efficiency]:overview_data.append({维度: 效率,指标: 平均时间(分钟),线上模式: f{self.results[efficiency][online_avg_time]:.0f},线下模式: f{self.results[efficiency][offline_avg_time]:.0f},差异: f{self.results[efficiency][online_avg_time] - self.results[efficiency][offline_avg_time]:.0f}})# 成本数据if online_total_cost in self.results[cost]:overview_data.append({维度: 成本,指标利用AI解决实际问题如果你觉得这个工具好用欢迎关注长安牧笛