第23讲:项目实战 - 财务智能助手开发实现

第23讲:项目实战 - 财务智能助手开发实现

实现财务智能助手的核心功能,包括发票识别、报表生成和数据处理。

一、项目结构

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
finance_assistant/
├── __init__.py
├── main.py # 入口
├── config.py # 配置
├── models/ # 数据模型
│ ├── __init__.py
│ ├── invoice.py # 发票模型
│ └── report.py # 报表模型
├── services/ # 业务服务
│ ├── __init__.py
│ ├── ocr_service.py # OCR服务
│ ├── invoice_service.py
│ └── report_service.py
├── utils/ # 工具函数
│ ├── __init__.py
│ ├── db.py # 数据库
│ └── excel.py # Excel处理
└── templates/ # 报表模板
├── balance_sheet.xlsx
└── income_statement.xlsx

二、核心代码实现

2.1 发票识别服务

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# services/invoice_service.py
from typing import Dict, List
import re
from datetime import datetime

class InvoiceService:
"""发票服务"""

def __init__(self, ocr_service, db):
self.ocr = ocr_service
self.db = db

def recognize_invoice(self, image_path: str) -> Dict:
"""识别发票"""
# 1. OCR识别
raw_text = self.ocr.recognize(image_path)

# 2. 提取结构化信息
invoice_info = self._extract_invoice_info(raw_text)

# 3. 验证发票
validation = self._validate_invoice(invoice_info)
invoice_info['validation'] = validation

# 4. 保存到数据库
self.db.save_invoice(invoice_info)

return invoice_info

def _extract_invoice_info(self, text: str) -> Dict:
"""提取发票信息"""
info = {
'invoice_code': self._extract_pattern(text, r'发票代码[::]\s*(\d{12})'),
'invoice_number': self._extract_pattern(text, r'发票号码[::]\s*(\d{8,20})'),
'date': self._extract_date(text),
'buyer_name': self._extract_buyer(text),
'buyer_tax_id': self._extract_pattern(text, r'购买方.*纳税人识别号[::]\s*([A-Z0-9]+)'),
'seller_name': self._extract_seller(text),
'seller_tax_id': self._extract_pattern(text, r'销售方.*纳税人识别号[::]\s*([A-Z0-9]+)'),
'amount': self._extract_amount(text),
'tax': self._extract_tax(text),
'total': self._extract_total(text),
'items': self._extract_items(text)
}
return info

def _extract_pattern(self, text: str, pattern: str) -> str:
"""正则提取"""
match = re.search(pattern, text, re.DOTALL)
return match.group(1).strip() if match else None

def _extract_date(self, text: str) -> str:
"""提取日期"""
patterns = [
r'(\d{4}年\d{1,2}月\d{1,2}日)',
r'(\d{4}-\d{2}-\d{2})',
r'(\d{4}/\d{2}/\d{2})'
]
for pattern in patterns:
match = re.search(pattern, text)
if match:
return match.group(1)
return None

def _validate_invoice(self, info: Dict) -> Dict:
"""验证发票"""
errors = []

if not info['invoice_code']:
errors.append("缺少发票代码")
if not info['invoice_number']:
errors.append("缺少发票号码")
if not info['amount']:
errors.append("缺少金额信息")

# 检查是否重复
if self.db.check_duplicate(info.get('invoice_code'), info.get('invoice_number')):
errors.append("发票已存在")

return {
'is_valid': len(errors) == 0,
'errors': errors
}

def query_invoices(self, start_date: str = None, end_date: str = None,
buyer_name: str = None) -> List[Dict]:
"""查询发票"""
return self.db.query_invoices(start_date, end_date, buyer_name)

def get_statistics(self, month: str) -> Dict:
"""获取月度统计"""
return self.db.get_invoice_statistics(month)

2.2 报表生成服务

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# services/report_service.py
from openpyxl import load_workbook
from typing import Dict
import os

class ReportService:
"""报表服务"""

def __init__(self, db, template_dir: str):
self.db = db
self.template_dir = template_dir

def generate_balance_sheet(self, year: int, month: int) -> str:
"""生成资产负债表"""
# 1. 加载模板
template_path = os.path.join(self.template_dir, 'balance_sheet.xlsx')
wb = load_workbook(template_path)
ws = wb.active

# 2. 获取数据
data = self.db.get_balance_sheet_data(year, month)

# 3. 填充数据
self._fill_balance_sheet(ws, data)

# 4. 保存
output_path = f"资产负债表_{year}{month}月.xlsx"
wb.save(output_path)

return output_path

def _fill_balance_sheet(self, ws, data: Dict):
"""填充资产负债表"""
# 流动资产
ws['B5'] = data.get('cash', 0)
ws['B6'] = data.get('receivables', 0)
ws['B7'] = data.get('inventory', 0)

# 非流动资产
ws['B12'] = data.get('fixed_assets', 0)
ws['B13'] = data.get('intangible_assets', 0)

# 流动负债
ws['E5'] = data.get('short_term_loans', 0)
ws['E6'] = data.get('payables', 0)

# 所有者权益
ws['E15'] = data.get('paid_in_capital', 0)
ws['E16'] = data.get('retained_earnings', 0)

def generate_income_statement(self, year: int, month: int) -> str:
"""生成利润表"""
template_path = os.path.join(self.template_dir, 'income_statement.xlsx')
wb = load_workbook(template_path)
ws = wb.active

data = self.db.get_income_data(year, month)

# 填充数据
ws['B3'] = data.get('revenue', 0)
ws['B4'] = data.get('cost', 0)
ws['B6'] = data.get('gross_profit', 0)
ws['B8'] = data.get('operating_expenses', 0)
ws['B10'] = data.get('operating_profit', 0)
ws['B16'] = data.get('net_profit', 0)

output_path = f"利润表_{year}{month}月.xlsx"
wb.save(output_path)

return output_path

2.3 Skill 主类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# main.py
from typing import Dict

class FinanceAssistantSkill:
"""财务智能助手 Skill"""

def __init__(self):
self.db = Database()
self.ocr_service = OCRService()
self.invoice_service = InvoiceService(self.ocr_service, self.db)
self.report_service = ReportService(self.db, './templates')

def handle_message(self, message: str, context: Dict) -> str:
"""处理用户消息"""
intent = self._classify_intent(message)

if intent == 'recognize_invoice':
return "请上传发票图片,我将为您识别发票信息。"

elif intent == 'generate_report':
return self._handle_report_request(message)

elif intent == 'query_invoice':
return self._handle_invoice_query(message)

elif intent == 'statistics':
return self._handle_statistics(message)

else:
return self._handle_qa(message)

def handle_file(self, file_path: str, context: Dict) -> str:
"""处理文件上传"""
# 识别发票
result = self.invoice_service.recognize_invoice(file_path)

if result['validation']['is_valid']:
return f"""
发票识别成功!
- 发票代码:{result.get('invoice_code')}
- 发票号码:{result.get('invoice_number')}
- 开票日期:{result.get('date')}
- 销售方:{result.get('seller_name')}
- 金额:¥{result.get('amount')}
- 税额:¥{result.get('tax')}
- 价税合计:¥{result.get('total')}
""".strip()
else:
errors = '\n'.join(result['validation']['errors'])
return f"发票识别完成,但存在以下问题:\n{errors}"

def _classify_intent(self, message: str) -> str:
"""意图分类"""
keywords = {
'recognize_invoice': ['识别发票', '录入发票', '发票识别'],
'generate_report': ['生成报表', '报表', '资产负债表', '利润表'],
'query_invoice': ['查询发票', '查找发票', '发票查询'],
'statistics': ['统计', '汇总', '分析']
}

for intent, words in keywords.items():
if any(word in message for word in words):
return intent

return 'qa'

def _handle_report_request(self, message: str) -> str:
"""处理报表请求"""
import re

# 提取年月
year_match = re.search(r'(\d{4})年', message)
month_match = re.search(r'(\d{1,2})月', message)

year = int(year_match.group(1)) if year_match else 2024
month = int(month_match.group(1)) if month_match else 1

if '资产负债' in message:
report_path = self.report_service.generate_balance_sheet(year, month)
return f"资产负债表已生成:{report_path}"

elif '利润' in message:
report_path = self.report_service.generate_income_statement(year, month)
return f"利润表已生成:{report_path}"

else:
return "请指定要生成的报表类型(资产负债表/利润表)"

def _handle_invoice_query(self, message: str) -> str:
"""处理发票查询"""
# 简化实现,实际应提取查询条件
invoices = self.invoice_service.query_invoices(limit=10)

if not invoices:
return "未找到发票记录"

result = "最近的发票记录:\n"
for inv in invoices:
result += f"- {inv.get('date')} {inv.get('seller_name')} ¥{inv.get('amount')}\n"

return result

def _handle_statistics(self, message: str) -> str:
"""处理统计请求"""
import re

month_match = re.search(r'(\d{4})年(\d{1,2})月', message)
if month_match:
year = int(month_match.group(1))
month = int(month_match.group(2))
month_str = f"{year}-{month:02d}"
else:
month_str = datetime.now().strftime('%Y-%m')

stats = self.invoice_service.get_statistics(month_str)

return f"""
{month_str} 发票统计:
- 发票数量:{stats.get('count', 0)}
- 总金额:¥{stats.get('total_amount', 0):,.2f}
- 总税额:¥{stats.get('total_tax', 0):,.2f}
- 价税合计:¥{stats.get('total_with_tax', 0):,.2f}
""".strip()

def _handle_qa(self, message: str) -> str:
"""处理问答"""
# 简单的问答响应
faq = {
'怎么使用': '您可以上传发票图片让我识别,或者让我生成财务报表。',
'支持哪些功能': '目前支持发票识别、报表生成、发票查询和统计分析。',
'数据安全吗': '您的数据仅保存在本地,不会上传到云端。'
}

for key, value in faq.items():
if key in message:
return value

return "抱歉,我不太理解您的问题。您可以问"怎么使用"或"支持哪些功能"。"

三、部署配置

3.1 Coze 配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# coze_config.yaml
bot_name: "财务智能助手"
description: 自动化财务处理助手,支持发票识别、报表生成等功能

skills:
- name: recognize_invoice
description: "识别发票信息"

- name: generate_report
description: "生成财务报表"

- name: query_invoice
description: "查询发票记录"

plugins:
- ocr
- excel

四、测试验证

4.1 单元测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# tests/test_invoice.py
import unittest
from services.invoice_service import InvoiceService

class TestInvoiceService(unittest.TestCase):

def setUp(self):
self.service = InvoiceService(MockOCR(), MockDB())

def test_extract_invoice_code(self):
text = "发票代码:123456789012"
result = self.service._extract_pattern(text, r'发票代码[::]\s*(\d{12})')
self.assertEqual(result, '123456789012')

def test_validate_invoice(self):
info = {
'invoice_code': '123456789012',
'invoice_number': '12345678',
'amount': '1000.00'
}
validation = self.service._validate_invoice(info)
self.assertTrue(validation['is_valid'])

五、总结

通过本项目,我们实践了:

  • 完整的 Skill 开发流程
  • OCR 在财务场景的应用
  • Excel 报表自动生成
  • 对话式交互设计

六、下节预告

下一讲我们将开始 HR 智能助手项目 的需求分析。


加入学习群

👉 加入AI编程学习交流群

点击加入


本讲是《AI Skills 从入门到实践》系列课程的第23讲。

🎓 AI 编程实战课程

想系统学习 AI 编程?程序员晚枫的 AI 编程实战课 帮你从零上手!