第25讲:项目实战 - HR智能助手开发实现

第25讲:项目实战 - HR智能助手开发实现

实现HR智能助手的核心功能,包括简历解析、考勤管理和薪资计算。

一、项目结构

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
hr_assistant/
├── __init__.py
├── main.py # 入口
├── config.py # 配置
├── models/ # 数据模型
│ ├── __init__.py
│ ├── resume.py # 简历模型
│ ├── attendance.py # 考勤模型
│ └── employee.py # 员工模型
├── services/ # 业务服务
│ ├── __init__.py
│ ├── resume_service.py
│ ├── attendance_service.py
│ └── payroll_service.py
├── utils/ # 工具函数
│ ├── __init__.py
│ ├── db.py
│ └── excel.py
└── templates/ # 模板
├── payroll_template.xlsx
└── attendance_template.xlsx

二、核心代码实现

2.1 简历解析服务

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# services/resume_service.py
from typing import Dict, List
import re

class ResumeService:
"""简历服务"""

def __init__(self, ocr_service, nlp_service, db):
self.ocr = ocr_service
self.nlp = nlp_service
self.db = db

def parse_resume(self, file_path: str) -> Dict:
"""解析简历"""
# 1. 提取文本
text = self._extract_text(file_path)

# 2. 解析基本信息
basic_info = self._parse_basic_info(text)

# 3. 解析教育经历
education = self._parse_education(text)

# 4. 解析工作经历
experience = self._parse_experience(text)

# 5. 提取技能
skills = self._extract_skills(text)

resume_data = {
**basic_info,
'education': education,
'experience': experience,
'skills': skills,
'raw_text': text
}

# 6. 保存到数据库
self.db.save_resume(resume_data)

return resume_data

def _extract_text(self, file_path: str) -> str:
"""从文件提取文本"""
if file_path.endswith('.pdf'):
return self.ocr.extract_from_pdf(file_path)
elif file_path.endswith(('.doc', '.docx')):
return self.ocr.extract_from_word(file_path)
else:
return self.ocr.recognize(file_path)

def _parse_basic_info(self, text: str) -> Dict:
"""解析基本信息"""
info = {}

# 姓名(通常在开头)
name_match = re.search(r'^([\u4e00-\u9fa5]{2,4})', text)
info['name'] = name_match.group(1) if name_match else None

# 电话
phone_match = re.search(r'1[3-9]\d{9}', text)
info['phone'] = phone_match.group() if phone_match else None

# 邮箱
email_match = re.search(r'[\w.-]+@[\w.-]+\.\w+', text)
info['email'] = email_match.group() if email_match else None

# 性别
if '男' in text[:100]:
info['gender'] = '男'
elif '女' in text[:100]:
info['gender'] = '女'
else:
info['gender'] = None

# 年龄/出生年份
age_match = re.search(r'(\d{4})年.*出生|出生.*(\d{4})', text)
if age_match:
birth_year = int(age_match.group(1) or age_match.group(2))
info['age'] = 2024 - birth_year
else:
info['age'] = None

return info

def _parse_education(self, text: str) -> List[Dict]:
"""解析教育经历"""
education = []

# 匹配教育经历模式
edu_pattern = r'(\d{4}[./]\d{1,2}[-~至](?:\d{4}[./]\d{1,2}|至今))\s*([\u4e00-\u9fa5]+大学|[\u4e00-\u9fa5]+学院).*?([\u4e00-\u9fa5]+)(?:专业|系)'

for match in re.finditer(edu_pattern, text, re.DOTALL):
education.append({
'period': match.group(1),
'school': match.group(2),
'major': match.group(3)
})

return education

def _parse_experience(self, text: str) -> List[Dict]:
"""解析工作经历"""
experience = []

# 匹配工作经历模式
exp_pattern = r'(\d{4}[./]\d{1,2}[-~至](?:\d{4}[./]\d{1,2}|至今))\s*([\u4e00-\u9fa5]+公司|[^\n]+).*?([\u4e00-\u9fa5]+)'

for match in re.finditer(exp_pattern, text, re.DOTALL):
experience.append({
'period': match.group(1),
'company': match.group(2),
'position': match.group(3)
})

return experience

def _extract_skills(self, text: str) -> List[str]:
"""提取技能"""
# 技能关键词列表
skill_keywords = [
'Python', 'Java', 'JavaScript', 'C++', 'Go', 'Rust',
'MySQL', 'PostgreSQL', 'MongoDB', 'Redis',
'Linux', 'Docker', 'Kubernetes', 'AWS', 'Azure',
'机器学习', '深度学习', '数据分析', '人工智能',
'项目管理', '团队协作', '敏捷开发'
]

found_skills = []
for skill in skill_keywords:
if skill.lower() in text.lower():
found_skills.append(skill)

return found_skills

def search_resumes(self, keywords: List[str], min_score: int = 60) -> List[Dict]:
"""搜索简历"""
all_resumes = self.db.get_all_resumes()
results = []

for resume in all_resumes:
score = self._calculate_match_score(resume, keywords)
if score >= min_score:
resume['matching_score'] = score
results.append(resume)

# 按匹配度排序
results.sort(key=lambda x: x['matching_score'], reverse=True)
return results

def _calculate_match_score(self, resume: Dict, keywords: List[str]) -> int:
"""计算匹配分数"""
score = 0
text = resume.get('raw_text', '')

for keyword in keywords:
if keyword.lower() in text.lower():
score += 20

return min(score, 100)

2.2 考勤管理服务

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# services/attendance_service.py
from typing import Dict, List
from datetime import datetime, timedelta
import pandas as pd

class AttendanceService:
"""考勤服务"""

def __init__(self, db):
self.db = db
self.work_start_time = "09:00"
self.work_end_time = "18:00"

def import_attendance(self, file_path: str, month: str) -> Dict:
"""导入考勤数据"""
# 读取Excel
df = pd.read_excel(file_path)

# 数据清洗
df = self._clean_attendance_data(df)

# 识别异常
anomalies = self._detect_anomalies(df)

# 保存到数据库
self.db.save_attendance(df, month)

return {
'total_records': len(df),
'anomalies': anomalies,
'anomaly_count': len(anomalies)
}

def _clean_attendance_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""清洗考勤数据"""
# 去除空行
df = df.dropna(subset=['员工ID', '日期'])

# 转换日期格式
df['日期'] = pd.to_datetime(df['日期'])

# 转换时间格式
for col in ['上班时间', '下班时间']:
if col in df.columns:
df[col] = pd.to_datetime(df[col], format='%H:%M', errors='coerce')

return df

def _detect_anomalies(self, df: pd.DataFrame) -> List[Dict]:
"""检测异常"""
anomalies = []

for _, row in df.iterrows():
# 检查迟到
if pd.notna(row.get('上班时间')):
check_in = row['上班时间'].strftime('%H:%M')
if check_in > self.work_start_time:
anomalies.append({
'employee_id': row['员工ID'],
'date': row['日期'].strftime('%Y-%m-%d'),
'type': '迟到',
'detail': f"上班时间 {check_in}"
})

# 检查早退
if pd.notna(row.get('下班时间')):
check_out = row['下班时间'].strftime('%H:%M')
if check_out < self.work_end_time:
anomalies.append({
'employee_id': row['员工ID'],
'date': row['日期'].strftime('%Y-%m-%d'),
'type': '早退',
'detail': f"下班时间 {check_out}"
})

# 检查缺卡
if pd.isna(row.get('上班时间')) or pd.isna(row.get('下班时间')):
anomalies.append({
'employee_id': row['员工ID'],
'date': row['日期'].strftime('%Y-%m-%d'),
'type': '缺卡',
'detail': '缺少打卡记录'
})

return anomalies

def generate_monthly_report(self, month: str) -> str:
"""生成月度考勤报表"""
# 获取数据
data = self.db.get_attendance_by_month(month)

# 统计
stats = self._calculate_statistics(data)

# 生成Excel
output_path = f"考勤报表_{month}.xlsx"
self._export_report(stats, output_path)

return output_path

def _calculate_statistics(self, data: pd.DataFrame) -> Dict:
"""计算统计指标"""
stats = {}

# 按员工统计
for employee_id in data['员工ID'].unique():
emp_data = data[data['员工ID'] == employee_id]

stats[employee_id] = {
'total_days': len(emp_data),
'late_count': len(emp_data[emp_data['状态'] == '迟到']),
'early_leave_count': len(emp_data[emp_data['状态'] == '早退']),
'absent_count': len(emp_data[emp_data['状态'] == '缺勤']),
'leave_count': len(emp_data[emp_data['状态'] == '请假']),
'normal_days': len(emp_data[emp_data['状态'] == '正常'])
}

return stats

2.3 薪资计算服务

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# services/payroll_service.py
from typing import Dict, List
from decimal import Decimal, ROUND_HALF_UP

class PayrollService:
"""薪资服务"""

def __init__(self, db):
self.db = db
# 个税起征点
self.tax_threshold = Decimal('5000')
# 个税税率表
self.tax_brackets = [
(Decimal('0'), Decimal('3000'), Decimal('0.03'), Decimal('0')),
(Decimal('3000'), Decimal('12000'), Decimal('0.10'), Decimal('210')),
(Decimal('12000'), Decimal('25000'), Decimal('0.20'), Decimal('1410')),
(Decimal('25000'), Decimal('35000'), Decimal('0.25'), Decimal('2660')),
(Decimal('35000'), Decimal('55000'), Decimal('0.30'), Decimal('4410')),
(Decimal('55000'), Decimal('80000'), Decimal('0.35'), Decimal('7160')),
(Decimal('80000'), Decimal('999999999'), Decimal('0.45'), Decimal('15160'))
]

def calculate_salary(self, employee_id: str, month: str) -> Dict:
"""计算薪资"""
# 获取员工信息
employee = self.db.get_employee(employee_id)

# 获取考勤数据
attendance = self.db.get_attendance(employee_id, month)

# 基本工资
base_salary = Decimal(str(employee['base_salary']))

# 计算应扣款项
deductions = self._calculate_deductions(base_salary, attendance)

# 计算应发款项
allowances = self._calculate_allowances(employee)

# 计算社保公积金
insurance = self._calculate_insurance(base_salary)

# 应纳税所得额
taxable_income = (base_salary + allowances['total'] -
deductions['total'] - insurance['total'] -
self.tax_threshold)

# 计算个税
tax = self._calculate_tax(max(taxable_income, Decimal('0')))

# 实发工资
net_salary = (base_salary + allowances['total'] -
deductions['total'] - insurance['total'] - tax)

salary_detail = {
'employee_id': employee_id,
'month': month,
'base_salary': float(base_salary),
'allowances': allowances,
'deductions': deductions,
'insurance': insurance,
'tax': float(tax.quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)),
'net_salary': float(net_salary.quantize(Decimal('0.01'), rounding=ROUND_HALF_UP))
}

# 保存记录
self.db.save_salary_record(salary_detail)

return salary_detail

def _calculate_tax(self, taxable_income: Decimal) -> Decimal:
"""计算个人所得税"""
for low, high, rate, deduction in self.tax_brackets:
if low <= taxable_income <= high:
return taxable_income * rate - deduction
return Decimal('0')

def _calculate_insurance(self, base_salary: Decimal) -> Dict:
"""计算社保公积金"""
# 简化的计算,实际应根据当地政策
pension = base_salary * Decimal('0.08') # 养老保险
medical = base_salary * Decimal('0.02') # 医疗保险
unemployment = base_salary * Decimal('0.005') # 失业保险
housing_fund = base_salary * Decimal('0.07') # 住房公积金

total = pension + medical + unemployment + housing_fund

return {
'pension': float(pension.quantize(Decimal('0.01'))),
'medical': float(medical.quantize(Decimal('0.01'))),
'unemployment': float(unemployment.quantize(Decimal('0.01'))),
'housing_fund': float(housing_fund.quantize(Decimal('0.01'))),
'total': float(total.quantize(Decimal('0.01')))
}

def generate_payslip(self, employee_id: str, month: str) -> str:
"""生成工资条"""
salary = self.db.get_salary_record(employee_id, month)
employee = self.db.get_employee(employee_id)

# 生成Excel工资条
output_path = f"工资条_{employee['name']}_{month}.xlsx"
# ... Excel生成代码

return output_path

三、Skill 主类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# main.py
class HRAssistantSkill:
"""HR智能助手 Skill"""

def __init__(self):
self.db = Database()
self.resume_service = ResumeService(OCRService(), NLPService(), self.db)
self.attendance_service = AttendanceService(self.db)
self.payroll_service = PayrollService(self.db)

def handle_message(self, message: str, context: Dict) -> str:
"""处理用户消息"""
intent = self._classify_intent(message)

if intent == 'parse_resume':
return "请上传简历文件,我将为您解析简历信息。"

elif intent == 'import_attendance':
return "请上传考勤数据Excel文件。"

elif intent == 'calculate_salary':
return self._handle_salary_calculation(message)

elif intent == 'search_resume':
return self._handle_resume_search(message)

else:
return self._handle_qa(message)

def handle_file(self, file_path: str, context: Dict) -> str:
"""处理文件上传"""
intent = context.get('last_intent')

if intent == 'parse_resume':
result = self.resume_service.parse_resume(file_path)
return self._format_resume_result(result)

elif intent == 'import_attendance':
month = context.get('month', datetime.now().strftime('%Y-%m'))
result = self.attendance_service.import_attendance(file_path, month)
return f"""
考勤数据导入完成!
- 总记录数:{result['total_records']}
- 异常记录:{result['anomaly_count']}
""".strip()

return "文件已收到,请告诉我您想做什么?"

def _classify_intent(self, message: str) -> str:
"""意图分类"""
keywords = {
'parse_resume': ['解析简历', '识别简历', '简历解析'],
'import_attendance': ['导入考勤', '上传考勤', '考勤数据'],
'calculate_salary': ['计算薪资', '算工资', '薪资计算'],
'search_resume': ['搜索简历', '查找候选人', '人才搜索']
}

for intent, words in keywords.items():
if any(word in message for word in words):
return intent

return 'qa'

def _format_resume_result(self, result: Dict) -> str:
"""格式化简历解析结果"""
return f"""
简历解析完成!

基本信息:
- 姓名:{result.get('name')}
- 电话:{result.get('phone')}
- 邮箱:{result.get('email')}
- 性别:{result.get('gender')}
- 年龄:{result.get('age')}

教育经历:
{self._format_education(result.get('education', []))}

工作经历:
{self._format_experience(result.get('experience', []))}

技能:{', '.join(result.get('skills', []))}
""".strip()

四、总结

通过本项目,我们实践了:

  • 简历解析和信息提取
  • 考勤数据处理和异常检测
  • 薪资计算和个税处理
  • 多模块协同工作

五、下节预告

下一讲我们将进行 项目回顾与总结


加入学习群

👉 加入AI编程学习交流群

点击加入


本讲是《AI Skills 从入门到实践》系列课程的第25讲。

🎓 AI 编程实战课程

想系统学习 AI 编程?程序员晚枫的 AI 编程实战课 帮你从零上手!