1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
| from typing import Dict, List import re
class ResumeService: """简历服务""" def __init__(self, ocr_service, nlp_service, db): self.ocr = ocr_service self.nlp = nlp_service self.db = db def parse_resume(self, file_path: str) -> Dict: """解析简历""" text = self._extract_text(file_path) basic_info = self._parse_basic_info(text) education = self._parse_education(text) experience = self._parse_experience(text) skills = self._extract_skills(text) resume_data = { **basic_info, 'education': education, 'experience': experience, 'skills': skills, 'raw_text': text } self.db.save_resume(resume_data) return resume_data def _extract_text(self, file_path: str) -> str: """从文件提取文本""" if file_path.endswith('.pdf'): return self.ocr.extract_from_pdf(file_path) elif file_path.endswith(('.doc', '.docx')): return self.ocr.extract_from_word(file_path) else: return self.ocr.recognize(file_path) def _parse_basic_info(self, text: str) -> Dict: """解析基本信息""" info = {} name_match = re.search(r'^([\u4e00-\u9fa5]{2,4})', text) info['name'] = name_match.group(1) if name_match else None phone_match = re.search(r'1[3-9]\d{9}', text) info['phone'] = phone_match.group() if phone_match else None email_match = re.search(r'[\w.-]+@[\w.-]+\.\w+', text) info['email'] = email_match.group() if email_match else None if '男' in text[:100]: info['gender'] = '男' elif '女' in text[:100]: info['gender'] = '女' else: info['gender'] = None age_match = re.search(r'(\d{4})年.*出生|出生.*(\d{4})', text) if age_match: birth_year = int(age_match.group(1) or age_match.group(2)) info['age'] = 2024 - birth_year else: info['age'] = None return info def _parse_education(self, text: str) -> List[Dict]: """解析教育经历""" education = [] edu_pattern = r'(\d{4}[./]\d{1,2}[-~至](?:\d{4}[./]\d{1,2}|至今))\s*([\u4e00-\u9fa5]+大学|[\u4e00-\u9fa5]+学院).*?([\u4e00-\u9fa5]+)(?:专业|系)' for match in re.finditer(edu_pattern, text, re.DOTALL): education.append({ 'period': match.group(1), 'school': match.group(2), 'major': match.group(3) }) return education def _parse_experience(self, text: str) -> List[Dict]: """解析工作经历""" experience = [] exp_pattern = r'(\d{4}[./]\d{1,2}[-~至](?:\d{4}[./]\d{1,2}|至今))\s*([\u4e00-\u9fa5]+公司|[^\n]+).*?([\u4e00-\u9fa5]+)' for match in re.finditer(exp_pattern, text, re.DOTALL): experience.append({ 'period': match.group(1), 'company': match.group(2), 'position': match.group(3) }) return experience def _extract_skills(self, text: str) -> List[str]: """提取技能""" skill_keywords = [ 'Python', 'Java', 'JavaScript', 'C++', 'Go', 'Rust', 'MySQL', 'PostgreSQL', 'MongoDB', 'Redis', 'Linux', 'Docker', 'Kubernetes', 'AWS', 'Azure', '机器学习', '深度学习', '数据分析', '人工智能', '项目管理', '团队协作', '敏捷开发' ] found_skills = [] for skill in skill_keywords: if skill.lower() in text.lower(): found_skills.append(skill) return found_skills def search_resumes(self, keywords: List[str], min_score: int = 60) -> List[Dict]: """搜索简历""" all_resumes = self.db.get_all_resumes() results = [] for resume in all_resumes: score = self._calculate_match_score(resume, keywords) if score >= min_score: resume['matching_score'] = score results.append(resume) results.sort(key=lambda x: x['matching_score'], reverse=True) return results def _calculate_match_score(self, resume: Dict, keywords: List[str]) -> int: """计算匹配分数""" score = 0 text = resume.get('raw_text', '') for keyword in keywords: if keyword.lower() in text.lower(): score += 20 return min(score, 100)
|