""" 核心处理模块：提供文件内容处理的核心功能 """ import os import re import yaml from ObjectiveC.oc_custom import custom_util from ObjectiveC.oc_function import e_print_annotation # 预编译的正则表达式（模块级别，避免重复编译） # 分模块组织，便于维护和扩展 # 1. 基础匹配模式 _BASIC_PATTERNS = { 'method_body': re.compile(r'[-\+]\s*$[^)]*$\s*[^{]*\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}', re.DOTALL), 'block_content': re.compile(r'\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}', re.DOTALL), 'variable_name': re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$'), 'method_start': re.compile(r'^\s*[-\+]\s*$'), } # 2. 局部变量匹配模式 _LOCAL_PATTERNS = { 'basic_types': re.compile(r'(?:BOOL|NSInteger|NSUInteger|CGFloat|int|float|double|long|short|char|size_t|NSTimeInterval|CGPoint|CGSize|CGRect|NSRange|UIEdgeInsets)\s+(\w+)\s*[=;]'), 'pointer_types': re.compile(r'([A-Z]\w+)\s*\*\s*(\w+)\s*[=;]'), 'block_param': re.compile(r'\^[^{]*\(\s*\w+\s+(\w+)\s*$'), 'block_var': re.compile(r'__block\s+\w+\s+(\w+)\s*='), 'for_loop': re.compile(r'for\s*$\s*(?:NSInteger|NSUInteger|int)\s+(\w+)\s*='), # 额外的 block 内部参数模式 'block_internal_pointer': re.compile(r'\^[^{]*\(\s*\w+\s*\*\s*(\w+)\s*$'), 'block_internal_type': re.compile(r'\^[^{]*$\s*[A-Z]\w+\s+(\w+)\s*$'), # 补充模式：处理更多变量声明情况 'pointer_no_assign': re.compile(r'([A-Z]\w+)\s*\*\s*(\w+)\s*;'), 'pointer_method_call': re.compile(r'([A-Z]\w+)\s*\*\s*(\w+)\s*=\s*\['), 'pointer_property': re.compile(r'([A-Z]\w+)\s*\*\s*(\w+)\s*=\s*\w+\.'), 'basic_no_assign': re.compile(r'(?:BOOL|NSInteger|NSUInteger|CGFloat|int|float|double|long|short|char|size_t|NSTimeInterval|CGPoint|CGSize|CGRect|NSRange|UIEdgeInsets)\s+(\w+)\s*;'), 'complex_init': re.compile(r'(\w+)\s*\*\s*(\w+)\s*=\s*\[\['), 'error_param': re.compile(r'error:\s*&\s*(\w+)'), 'generic_pointer': re.compile(r'([A-Z]\w+)\s*<[^>]*>\s*\*\s*(\w+)\s*='), # 复杂匹配模式 'complex_method_call': re.compile(r'(\w+)\s*\*\s*(\w+)\s*=\s*\[\w+\s+\w+[^;]*error:\s*&\w+[^;]*\]'), 'long_type_method': re.compile(r'([A-Z][a-zA-Z0-9_]{10,})\s+(\w+)\s*=\s*\['), 'long_type_general': re.compile(r'([A-Z]\w{10,})\s+(\w+)\s*='), 'basic_method_call': re.compile(r'([A-Z]\w+)\s+(\w+)\s*=\s*\['), 'unsigned_signed': re.compile(r'(?:unsigned|signed)\s+(?:int|long|short|char)\s+(\w+)\s*[=;]'), # for 循环补充模式 'for_nsuinteger': re.compile(r'for\s*$\s*NSUInteger\s+(\w+)\s*='), 'for_in_loop': re.compile(r'for\s*\(\s*(\w+)\s+\*\s*(\w+)\s+in\s+'), # 方法签名中的 block 参数 'method_sig_void_pointer': re.compile(r'\(void\s*\(\s*\^\s*$\s*$[^)]*\*\s*(\w+)\s*$\s*\)'), 'method_sig_void_type': re.compile(r'$void\s*\(\s*\^\s*$\s*$[^)]*\s+(\w+)\s*$\s*\)'), 'method_sig_return_pointer': re.compile(r'$\s*\w+\s*\(\s*\^\s*$\s*$[^)]*\*\s*(\w+)\s*$\s*\)'), 'method_sig_return_type': re.compile(r'$\s*\w+\s*\(\s*\^\s*$\s*$[^)]*\s+(\w+)\s*$\s*\)'), # block 内的变量声明 'block_var_pointer': re.compile(r'\^[^{]*\{[^}]*?(\w+)\s*\*\s*(\w+)\s*='), 'block_var_basic': re.compile(r'\^[^{]*\{[^}]*?(?:NSInteger|NSUInteger|CGFloat|BOOL|int|float|double|id)\s+(\w+)\s*='), 'block_var_simple': re.compile(r'(?:\^|\{)[^}]*?(\w+)\s+(\w+)\s*='), } # 3. 静态变量匹配模式 _STATIC_PATTERNS = { 'basic': re.compile(r'static\s+\w+\s+(\w+)\s*[;=]'), 'pointer': re.compile(r'static\s+\w+\s*\*\s*(\w+)\s*[;=]'), 'const_basic': re.compile(r'static\s+const\s+\w+\s+(\w+)\s*[;=]'), } # 4. 常量变量匹配模式 _CONST_PATTERNS = { 'extern_basic': re.compile(r'extern\s+\w+\s+(\w+)\s*[;=]'), 'extern_pointer': re.compile(r'extern\s+\w+\s*\*\s*(\w+)\s*[;=]'), 'extern_const': re.compile(r'extern\s+const\s+\w+\s*\*?\s*(\w+)\s*[;=]'), # 泛型指针常量：extern NSArray *varName; 'extern_generic': re.compile(r'extern\s+([A-Z]\w+)\s*<[^>]*>\s*\*\s*(\w+)\s*[;=]'), } # 5. 宏定义匹配模式 _DEFINE_PATTERNS = { 'multiline': re.compile(r'#define\s+(\w+)(?:$[^)]*$)?\s+[^\\]*(?:\\\s*\n[^\\]*)*', re.MULTILINE), 'function_macro': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*$[^)]*$\s+.*'), 'string_macro': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+@?"[^"]*"'), 'number_macro': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+[-+]?\d+(?:\.\d+)?[fFlL]?(?:\s|$)'), 'paren_expr': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+$[^)]+$'), 'bit_operation': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+$[^)]*<<[^)]*$'), 'conditional': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+.*\?.*:.*'), 'type_cast': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+$\([^)]+$.*\)'), 'function_call': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+[a-zA-Z_][a-zA-Z0-9_]*$[^)]*$'), 'compound_expr': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+.*[+\-*/].*'), 'array_struct': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+\{[^}]*\}'), 'special_symbol': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+[^\w\s].*'), 'attribute': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+__[a-zA-Z_]+.*'), 'stringify': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+#[a-zA-Z_].*'), 'concatenate': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+.*##.*'), 'multi_identifier': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+[a-zA-Z_][a-zA-Z0-9_.]*'), 'empty': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*$'), 'basic_value': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+\S.*'), 'general': re.compile(r'#define\s+([a-zA-Z_][a-zA-Z0-9_]*)(?:\s.*)?'), } # 6. 合并所有模式字典（向后兼容） _COMPILED_PATTERNS = { **_BASIC_PATTERNS, **{f'local_{k}': v for k, v in _LOCAL_PATTERNS.items()}, **{f'static_{k}': v for k, v in _STATIC_PATTERNS.items()}, **{f'const_{k}': v for k, v in _CONST_PATTERNS.items()}, **{f'define_{k}': v for k, v in _DEFINE_PATTERNS.items()}, } def clean_content(content): """统一的内容清理函数：去注释、去打印、去空字符串""" # 去注释和打印 content = e_print_annotation.search_annotation_and_delete(content) content = e_print_annotation.search_print_and_delete(content) content = content.replace('@""','') return content def get_describe_words(): """ 获取属性和方法的描述词列表这些描述词用于清理属性声明和方法声明中的可用性标注 """ try: yaml_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "oc_function", "a_yaml.yaml") with open(yaml_path, 'r', encoding='utf-8') as yaml_file: yaml_data = yaml.load(yaml_file.read(), Loader=yaml.FullLoader) if yaml_data and isinstance(yaml_data, dict) and 'PropertyDescribeWord' in yaml_data: return yaml_data['PropertyDescribeWord'] except Exception as e: print(f"读取描述词列表时出错: {str(e)}") # 设置一些常见的描述词作为备用 return [ "__OSX_AVAILABLE_STARTING", "__OSX_AVAILABLE_BUT_DEPRECATED", "NS_AVAILABLE", "NS_DEPRECATED", "API_AVAILABLE", "API_UNAVAILABLE", "NS_SWIFT_NAME", "__TVOS_AVAILABLE", "__WATCHOS_AVAILABLE", "NS_DESIGNATED_INITIALIZER", "NS_UNAVAILABLE", "DEPRECATED_ATTRIBUTE", "__deprecated", "__attribute__", "CF_RETURNS_RETAINED", "CF_RETURNS_NOT_RETAINED" ] # 移除属性声明部分 def remove_property_declaration(content): """移除属性声明部分""" content = re.sub(r'@property\s*$[^)]*$[^;]*;', '', content, flags=re.MULTILINE) # 更精确地移除 @interface 声明，避免误删方法体 content = re.sub(r'@interface\s+\w+[^@]*?@end', '', content, flags=re.DOTALL) # 移除单独的 @interface 行（不包含实例变量的情况） content = re.sub(r'@interface\s+\w+[^{@\n]*\n', '', content, flags=re.MULTILINE) return content def process_property_content(content): """专门用于处理和提取属性内容的函数""" # 统一的内容清理 content = clean_content(content) # 获取描述词列表 property_describe_words = get_describe_words() # 预处理属性声明行，根据是否有*号采用不同的空格处理策略 processed_lines = [] for line in content.split('\n'): if '@property' in line: # 保存原始行，用于最终确认是否处理正确 original_line = line # 处理@property行 - 改进的描述词清理逻辑 # 使用更强大的正则表达式来清理所有类型的注解 # 1. 使用更强大的方法清理复杂的API注解（处理嵌套括号） def remove_balanced_parentheses(text, keyword): """移除指定关键字后面的平衡括号内容""" pattern = rf'{re.escape(keyword)}\s*\(' result = text while True: match = re.search(pattern, result) if not match: break start = match.start() paren_start = match.end() - 1 # '(' 的位置 # 找到匹配的闭合括号 paren_count = 1 pos = paren_start + 1 while pos < len(result) and paren_count > 0: if result[pos] == '(': paren_count += 1 elif result[pos] == ')': paren_count -= 1 pos += 1 if paren_count == 0: # 找到了匹配的括号，移除整个注解 result = result[:start] + result[pos:] else: # 括号不匹配，简单移除到行尾 result = result[:start] break return result # 清理各种API注解 api_keywords = [ 'API_DEPRECATED', 'API_DEPRECATED_WITH_REPLACEMENT', 'API_AVAILABLE', 'API_UNAVAILABLE', 'NS_AVAILABLE', 'NS_DEPRECATED', 'NS_SWIFT_NAME', '__attribute__', '__OSX_AVAILABLE_STARTING', '__OSX_AVAILABLE_BUT_DEPRECATED', ] for keyword in api_keywords: line = remove_balanced_parentheses(line, keyword) # 2. 清理简单的标识符注解 simple_patterns = [ r'\b(?:NS_DESIGNATED_INITIALIZER|NS_UNAVAILABLE|DEPRECATED_ATTRIBUTE)\b', r'\b(?:__deprecated|CF_RETURNS_RETAINED|CF_RETURNS_NOT_RETAINED)\b', r'\b(?:__TVOS_AVAILABLE|__WATCHOS_AVAILABLE)\b', ] for pattern in simple_patterns: line = re.sub(pattern, '', line) # 3. 清理多余的空格 line = re.sub(r'\s+', ' ', line).strip() # 确保分号结尾 if not line.rstrip().endswith(';'): line = line.rstrip() + ';' # 简化处理：删除所有尖括号<>及其中的内容 # 先处理嵌套的泛型情况，例如 NSArray *> while '<' in line and '>' in line: start = line.find('<') end = line.rfind('>') if start < end: line = line[:start] + line[end + 1:] else: # 防止不匹配的情况导致死循环 break # 首先判断是否为Block类型属性，然后再判断是否带*号 if '^' in line or '(^' in line: # 带^号的Block类型属性：需要特殊处理以保留结构 # 1. 处理@property(...)部分 prop_decl_match = re.search(r'(@property\s*$[^)]*$)', line) if prop_decl_match: prop_decl = prop_decl_match.group(1) clean_prop_decl = re.sub(r'\s+', '', prop_decl) line = line.replace(prop_decl, clean_prop_decl) # 2. 标准化^符号周围的空格 line = re.sub(r'$\s*\^\s*', '(^ ', line) # 3. 保留Block名称和参数列表之间的结构 line = re.sub(r'$\s*$', ')(', line) # 4. 去除其他多余的空格，但保留关键结构 line = re.sub(r'\s+', ' ', line) line = re.sub(r'\s+;', ';', line) elif '*' in line: # 带*号的属性：可以去除所有空格，只保留*号周围的空格 # 1. 标记*号位置 line = line.replace('*', ' * ') # 2. 处理@property(...)部分 prop_decl_match = re.search(r'(@property\s*\([^)]*$)', line) if prop_decl_match: prop_decl = prop_decl_match.group(1) clean_prop_decl = re.sub(r'\s+', '', prop_decl) line = line.replace(prop_decl, clean_prop_decl) # 3. 去除类型名称和*号之间的空格 line = re.sub(r'(\w+)\s+\*', r'\1 *', line) # 4. 去除其他多余的空格 line = re.sub(r'\s+', ' ', line) line = re.sub(r'$\s+', '(', line) line = re.sub(r'\s+$', ')', line) line = re.sub(r'\s+;', ';', line) else: # 不带*号和^号的属性：需要保留类型和属性名之间的空格 # 1. 处理@property(...)部分，去除声明内的空格 prop_decl_match = re.search(r'(@property\s*$[^)]*$)', line) if prop_decl_match: prop_decl = prop_decl_match.group(1) clean_prop_decl = re.sub(r'\s+', '', prop_decl) # 2. 提取属性声明后的部分（类型和属性名） remaining = line[line.index(prop_decl) + len(prop_decl):].strip() # 3. 检测类型和属性名 # 常见基本类型列表 basic_types = ['BOOL', 'CGFloat', 'NSInteger', 'NSUInteger', 'int', 'float', 'double', 'NSTimeInterval'] # 尝试匹配基本类型 found_type = False for type_name in basic_types: if remaining.startswith(type_name): # 基本类型，如：@property(...) BOOL isEnabled; type_part = type_name name_part = remaining[len(type_name):].strip() # 重构为干净的格式：@property(...) TYPE NAME; line = f"{clean_prop_decl} {type_part} {name_part.split(';')[0]};" found_type = True break if not found_type: # 尝试根据空格分割识别自定义类型 parts = remaining.split() if len(parts) >= 2: # 自定义类型，如：@property(...) CustomType name; type_part = parts[0] name_part = ' '.join(parts[1:]).split(';')[0] line = f"{clean_prop_decl} {type_part} {name_part};" else: # 无法通过空格分割，可能是无空格格式如 TransactionStatusxxpk_status # 尝试基于首字母大小写分割 type_name_match = re.search(r'([A-Z][a-zA-Z0-9]+)([a-z][a-zA-Z0-9_]*)', remaining) if type_name_match: type_part = type_name_match.group(1) name_part = type_name_match.group(2).split(';')[0] line = f"{clean_prop_decl} {type_part} {name_part};" processed_lines.append(line) content = '\n'.join(processed_lines) # 由于已经标准化了属性声明，现在可以使用更简化的正则表达式 property_patterns = [ # 匹配带*号的普通属性: @property(...) Type * propertyName; r'@property$[^)]*$[^*;]*\*\s*(\w+)\s*;', # 匹配不带*号的属性: @property(...) Type propertyName; （修复：允许多个空格） r'@property$[^)]*$\s+\w+\s+(\w+)\s*;', # 匹配Block类型的属性: @property(...) returnType(^blockName)(...); r'@property$[^)]*$[^(]*$\s*\^\s*(\w+)\s*$(?:$[^)]*$)?\s*;', ] # 存储属性定义和自定义setter/getter property_custom_accessors = {} # 提取属性自定义setter/getter for line in content.split('\n'): if '@property' in line: # 提取属性名 prop_match = None for pattern in property_patterns: match = re.search(pattern, line) if match: prop_match = match.group(1) break if not prop_match: continue # 检查自定义setter和getter custom_setter = re.search(r'setter\s*=\s*(\w+)', line) custom_getter = re.search(r'getter\s*=\s*(\w+)', line) setter_name = custom_setter.group(1) if custom_setter else None getter_name = custom_getter.group(1) if custom_getter else None property_custom_accessors[prop_match] = (setter_name, getter_name) # 应用所有正则表达式模式来提取属性名 all_properties = [] for pattern in property_patterns: matches = re.findall(pattern, content) all_properties.extend([match for match in matches if match]) # 去重处理 unique_properties = set(all_properties) # 排序：不带下划线的排在前面，带下划线的排在后面 sorted_properties = sorted(unique_properties, key=lambda x: (x.startswith('_'), x)) return sorted_properties, property_custom_accessors def process_instance_variables_content(content): """专门用于处理和提取实例变量内容的函数""" # 统一的内容清理 content = clean_content(content) # 匹配类扩展中的实例变量定义块 # 优化正则表达式以匹配更多格式的类扩展声明，包括带有协议的声明 ivar_block_pattern = r'@interface\s+\w+\s*(?:$\s*\w*\s*$)?\s*(?:<[^>]*>)?\s*\{([^}]*)\}' blocks = re.findall(ivar_block_pattern, content, re.DOTALL) all_ivars = [] # 处理每个实例变量块 for block in blocks: # 分割成多行处理 lines = block.strip().split('\n') for line in lines: line = line.strip() if not line: continue # 移除注释部分 line = re.sub(r'//.*$', '', line) # 修改正则表达式，确保只匹配完整的变量名，而不是变量名中的一部分 # 例如：BOOL xxpk_serviceButton; 或 CGPoint _xxpk_originalPosition; type_var_pattern = r'(?:[\w*]+\s+)+([\w]+(?:_[\w]+)*)\s*;' type_var_matches = re.findall(type_var_pattern, line) all_ivars.extend(type_var_matches) # 如果已经通过类型匹配找到，就不再进行其他匹配了 if type_var_matches: continue # 匹配常见的实例变量声明模式（不含类型或类型被前面的正则捕获） # 1. 基本模式：变量名; # 2. 带下划线的模式：_变量名; # 3. 多变量声明：变量1, 变量2, 变量3; var_patterns = [ r'\b([\w]+(?:_[\w]+)*)\s*;', # 简单变量名 r'([\w]+(?:_[\w]+)*)\s*,', # 多变量声明中的变量名 ] for pattern in var_patterns: matches = re.findall(pattern, line) all_ivars.extend(matches) # 去重处理 unique_ivars = list(set(all_ivars)) # 排序：不带下划线的排在前面，带下划线的排在后面 sorted_ivars = sorted(unique_ivars, key=lambda x: (x.startswith('_'), x)) return sorted_ivars def process_local_variables_content(content): """专门用于处理和提取方法内局部变量的函数 """ all_variables = [] content = clean_content(content) content = remove_property_declaration(content) all_variables.extend(_extract_local_variables_fast(content)) # 快速去重和过滤 return _filter_and_sort_variables(all_variables) def process_constants_content(content): """优化后的常量提取函数""" content = clean_content(content) content = remove_property_declaration(content) all_variables = [] all_variables.extend(_extract_static_variables_fast(content)) all_variables.extend(_extract_const_variables_fast(content)) # 2. 宏定义常量 all_variables.extend(extract_define_macros(content)) # 快速去重和过滤 return _filter_and_sort_variables(all_variables) def _filter_and_sort_variables(variables): """快速过滤和排序变量（使用预编译模式优化）""" if not variables: return [] # 快速去重 unique_vars = list(dict.fromkeys(variables)) # 保持顺序的去重 # 基本过滤 filtered_vars = [] # 使用预编译的变量名模式 var_pattern = _COMPILED_PATTERNS['variable_name'] min_length = 1 # 恢复原来的最小长度设置，不区分类型 for var in unique_vars: if (var and len(var) >= min_length and var_pattern.match(var)): filtered_vars.append(var) return filtered_vars def _extract_local_variables_fast(content): """快速提取局部变量（改进版：直接从整个内容提取，不依赖方法体提取）""" variables = [] # 预处理：移除静态变量声明，避免局部变量提取包含静态变量 filtered_content = re.sub(r'^\s*static\s+[^;]+;', '', content, flags=re.MULTILINE) filtered_content = re.sub(r'static\s+[^=;]+[=;][^;]*;', '', filtered_content) # 1. 直接从整个内容中提取变量（不再依赖方法体提取） # 基本类型变量 variables.extend(_COMPILED_PATTERNS['local_basic_types'].findall(filtered_content)) # 指针类型变量（取变量名部分） pointer_matches = _COMPILED_PATTERNS['local_pointer_types'].findall(filtered_content) variables.extend([match[1] for match in pointer_matches if len(match) >= 2]) # Block 参数（包括 block 内部的参数） variables.extend(_COMPILED_PATTERNS['local_block_param'].findall(filtered_content)) # 额外的 block 内部参数模式（使用预编译模式） variables.extend(_COMPILED_PATTERNS['local_block_internal_pointer'].findall(filtered_content)) variables.extend(_COMPILED_PATTERNS['local_block_internal_type'].findall(filtered_content)) # __block 变量 variables.extend(_COMPILED_PATTERNS['local_block_var'].findall(filtered_content)) # 补充模式：处理更多变量声明情况（使用预编译模式） # 不带赋值的指针声明 pointer_matches = _COMPILED_PATTERNS['local_pointer_no_assign'].findall(filtered_content) variables.extend([match[1] for match in pointer_matches if len(match) >= 2]) # 方法调用赋值 method_matches = _COMPILED_PATTERNS['local_pointer_method_call'].findall(filtered_content) variables.extend([match[1] for match in method_matches if len(match) >= 2]) # 属性访问赋值 property_matches = _COMPILED_PATTERNS['local_pointer_property'].findall(filtered_content) variables.extend([match[1] for match in property_matches if len(match) >= 2]) # 基本类型不带赋值 variables.extend(_COMPILED_PATTERNS['local_basic_no_assign'].findall(filtered_content)) # 复杂初始化 init_matches = _COMPILED_PATTERNS['local_complex_init'].findall(filtered_content) variables.extend([match[1] for match in init_matches if len(match) >= 2]) # 错误参数 variables.extend(_COMPILED_PATTERNS['local_error_param'].findall(filtered_content)) # 泛型指针 generic_matches = _COMPILED_PATTERNS['local_generic_pointer'].findall(filtered_content) variables.extend([match[1] for match in generic_matches if len(match) >= 2]) # 复杂匹配模式 complex_matches = _COMPILED_PATTERNS['local_complex_method_call'].findall(filtered_content) variables.extend([match[1] for match in complex_matches if len(match) >= 2]) # 长类型名声明 long_method_matches = _COMPILED_PATTERNS['local_long_type_method'].findall(filtered_content) variables.extend([match[1] for match in long_method_matches if len(match) >= 2]) long_general_matches = _COMPILED_PATTERNS['local_long_type_general'].findall(filtered_content) variables.extend([match[1] for match in long_general_matches if len(match) >= 2]) # 基本类型方法调用 basic_method_matches = _COMPILED_PATTERNS['local_basic_method_call'].findall(filtered_content) variables.extend([match[1] for match in basic_method_matches if len(match) >= 2]) # unsigned/signed 修饰的类型 variables.extend(_COMPILED_PATTERNS['local_unsigned_signed'].findall(filtered_content)) # 2. for 循环变量 variables.extend(_COMPILED_PATTERNS['local_for_loop'].findall(filtered_content)) variables.extend(_COMPILED_PATTERNS['local_for_nsuinteger'].findall(filtered_content)) # for-in 循环变量 for_in_matches = _COMPILED_PATTERNS['local_for_in_loop'].findall(filtered_content) variables.extend([match[1] for match in for_in_matches if len(match) >= 2]) # 3. 提取方法签名中的 block 参数（使用预编译模式） variables.extend(_COMPILED_PATTERNS['local_method_sig_void_pointer'].findall(filtered_content)) variables.extend(_COMPILED_PATTERNS['local_method_sig_void_type'].findall(filtered_content)) variables.extend(_COMPILED_PATTERNS['local_method_sig_return_pointer'].findall(filtered_content)) variables.extend(_COMPILED_PATTERNS['local_method_sig_return_type'].findall(filtered_content)) # 4. 匹配 block 内的变量声明（使用预编译模式） # block内的指针变量声明 block_pointer_matches = _COMPILED_PATTERNS['local_block_var_pointer'].findall(filtered_content) variables.extend([match[1] for match in block_pointer_matches if len(match) >= 2]) # block内的基本类型变量声明 variables.extend(_COMPILED_PATTERNS['local_block_var_basic'].findall(filtered_content)) # 简化的block变量匹配 block_simple_matches = _COMPILED_PATTERNS['local_block_var_simple'].findall(filtered_content) variables.extend([match[1] for match in block_simple_matches if len(match) >= 2]) # 5. 特殊处理：Block 参数（包括带修饰符的） # 匹配 ^(Type * _Nonnull varName) 这种格式 block_param_enhanced = re.findall(r'\^[^{]*$\s*\w+\s*\*\s*_\w+\s+(\w+)\s*$', filtered_content) variables.extend(block_param_enhanced) # 6. 通用指针类型模式：匹配所有以大写字母开头的类型名 universal_patterns = [ # 1. 标准指针声明：TypeName *varName = value; r'([A-Z][a-zA-Z0-9_]*)\s*\*\s*(\w+)\s*=', # 2. 不带赋值的指针声明：TypeName *varName; r'([A-Z][a-zA-Z0-9_]*)\s*\*\s*(\w+)\s*;', # 3. 方法调用赋值：TypeName *varName = [method]; r'([A-Z][a-zA-Z0-9_]*)\s*\*\s*(\w+)\s*=\s*\[', # 4. 属性访问：TypeName *varName = obj.property; r'([A-Z][a-zA-Z0-9_]*)\s*\*\s*(\w+)\s*=\s*\w+\.', # 5. 泛型指针：TypeName<...> *varName = value; r'([A-Z][a-zA-Z0-9_]*)\s*<[^>]*>\s*\*\s*(\w+)\s*=', # 6. 基本类型方法调用：TypeName varName = [method]; r'([A-Z][a-zA-Z0-9_]*)\s+(\w+)\s*=\s*\[', # 7. 复杂初始化：TypeName *varName = [[Class alloc] init]; r'([A-Z][a-zA-Z0-9_]*)\s*\*\s*(\w+)\s*=\s*\[\[', # 8. 错误处理模式：TypeName *varName = [method error:&error]; r'([A-Z][a-zA-Z0-9_]*)\s*\*\s*(\w+)\s*=\s*\[.*error:', # 9. 长类型名匹配：匹配超长的类型名（如系统框架类型） r'([A-Z][a-zA-Z0-9_]{8,})\s*\*?\s*(\w+)\s*[=;]', # 10. 协议类型：id varName r'id\s*<[^>]*>\s*(\w+)\s*[=;]', ] for pattern in universal_patterns: matches = re.findall(pattern, filtered_content) for match in matches: if isinstance(match, tuple) and len(match) >= 2: var_name = match[-1] # 取最后一个元素作为变量名 # 过滤掉明显的类型名（全大写或以大写开头的长单词） if (var_name and not var_name.isupper() and not (var_name[0].isupper() and len(var_name) > 8)): variables.append(var_name) return variables def _extract_static_variables_fast(content): """快速提取静态变量（使用预编译模式优化）""" variables = [] # 智能模式匹配：不依赖 pointer_types，使用通用模式匹配所有静态变量 # 基本静态变量（使用预编译模式） variables.extend(_COMPILED_PATTERNS['static_basic'].findall(content)) # 静态指针变量 variables.extend(_COMPILED_PATTERNS['static_pointer'].findall(content)) # 带 const 的静态变量 variables.extend(_COMPILED_PATTERNS['static_const_basic'].findall(content)) # 通用静态变量模式：匹配所有类型的静态变量 universal_static_patterns = [ # 1. static TypeName *varName; 或 static TypeName *varName = value; r'static\s+([A-Z][a-zA-Z0-9_]*)\s*\*\s*(\w+)\s*[;=]', # 2. static const TypeName *varName; 或 static const TypeName *varName = value; r'static\s+const\s+([A-Z][a-zA-Z0-9_]*)\s*\*\s*(\w+)\s*[;=]', # 3. static TypeName * const varName; 或 static TypeName * const varName = value; r'static\s+([A-Z][a-zA-Z0-9_]*)\s*\*\s*const\s+(\w+)\s*[;=]', # 4. static TypeName varName; 或 static TypeName varName = value; r'static\s+([A-Z][a-zA-Z0-9_]*)\s+(\w+)\s*[;=]', # 5. 长类型名的静态变量 r'static\s+([A-Z][a-zA-Z0-9_]{8,})\s*\*?\s*(\w+)\s*[;=]', # 6. 带下划线前缀的静态变量（常见模式） r'static\s+\w+\s+(_\w+)\s*[;=]', ] for pattern in universal_static_patterns: matches = re.findall(pattern, content) for match in matches: if isinstance(match, tuple) and len(match) >= 2: var_name = match[-1] # 取最后一个元素作为变量名 # 过滤掉明显的类型名 if (var_name and not var_name.isupper() and not (var_name[0].isupper() and len(var_name) > 8)): variables.append(var_name) return variables def _extract_const_variables_fast(content): """快速提取常量变量（使用预编译模式优化）""" variables = [] # extern 常量（使用预编译模式） variables.extend(_COMPILED_PATTERNS['const_extern_basic'].findall(content)) # extern 指针常量 variables.extend(_COMPILED_PATTERNS['const_extern_pointer'].findall(content)) # extern const 组合 variables.extend(_COMPILED_PATTERNS['const_extern_const'].findall(content)) # 泛型指针常量 generic_matches = _COMPILED_PATTERNS['const_extern_generic'].findall(content) variables.extend([match[1] for match in generic_matches if len(match) >= 2]) # 通用extern常量模式：匹配所有类型的extern常量 universal_extern_patterns = [ # 1. extern TypeName *const varName; r'extern\s+([A-Z][a-zA-Z0-9_]*)\s*\*\s*const\s+(\w+)\s*[;=]', # 2. extern const TypeName *varName; r'extern\s+const\s+([A-Z][a-zA-Z0-9_]*)\s*\*\s*(\w+)\s*[;=]', # 3. extern TypeName varName; r'extern\s+([A-Z][a-zA-Z0-9_]*)\s+(\w+)\s*[;=]', # 4. 长类型名的extern常量 r'extern\s+([A-Z][a-zA-Z0-9_]{8,})\s*\*?\s*(\w+)\s*[;=]', # 5. extern 基本类型常量 r'extern\s+(?:BOOL|NSInteger|NSUInteger|CGFloat|int|float|double)\s+(\w+)\s*[;=]', ] for pattern in universal_extern_patterns: matches = re.findall(pattern, content) for match in matches: if isinstance(match, tuple) and len(match) >= 2: var_name = match[-1] # 取最后一个元素作为变量名 # 过滤掉明显的类型名 if (var_name and not var_name.isupper() and not (var_name[0].isupper() and len(var_name) > 8)): variables.append(var_name) elif isinstance(match, str): # 对于只有一个捕获组的模式 if (match and not match.isupper() and not (match[0].isupper() and len(match) > 8)): variables.append(match) return variables def extract_define_macros(content): """ 专门提取 #define 宏定义的函数（使用预编译模式优化）支持复杂宏定义匹配，包括多行宏、函数式宏、条件宏等 Args: content (str): 原始文件内容 Returns: list: 宏定义名称列表 """ define_constants = [] # 提取多行宏定义（使用预编译模式） multiline_macros = _COMPILED_PATTERNS['define_multiline'].findall(content) define_constants.extend(multiline_macros) # 单行宏定义模式（使用预编译模式，恢复原有的18种全面匹配） define_pattern_keys = [ 'define_function_macro', # 1. 函数式宏定义 'define_string_macro', # 2. 字符串宏定义 'define_number_macro', # 3. 数值宏定义 'define_paren_expr', # 4. 括号表达式宏定义 'define_bit_operation', # 5. 位运算宏定义 'define_conditional', # 6. 条件宏定义 'define_type_cast', # 7. 类型转换宏定义 'define_function_call', # 8. 函数调用宏定义 'define_compound_expr', # 9. 复合表达式宏定义 'define_array_struct', # 10. 数组/结构体宏定义 'define_special_symbol', # 11. 特殊符号宏定义 'define_attribute', # 12. 属性宏定义 'define_stringify', # 13. 字符串化宏定义 'define_concatenate', # 14. 连接宏定义 'define_multi_identifier', # 15. 多个标识符宏定义 'define_empty', # 16. 空宏定义 'define_basic_value', # 17. 基本宏定义 'define_general', # 18. 通用宏定义 ] for pattern_key in define_pattern_keys: matches = _COMPILED_PATTERNS[pattern_key].findall(content) for match in matches: if match: # 确保匹配不为空 define_constants.append(match) return define_constants def process_methods_content(content): """专门用于处理和提取方法内容的函数""" # 统一的内容清理 content = clean_content(content) # 获取描述词列表，用于清理方法声明中的可用性标注 describe_words = get_describe_words() all_methods = [] # 步骤1：预处理 - 合并多行方法声明并清理描述词 lines = content.split('\n') processed_lines = [] current_method = "" in_method_declaration = False for line in lines: stripped_line = line.strip() # 检测方法声明开始 if re.match(r'^\s*[-\+]\s*\(', line): # 保存之前的方法 if current_method: processed_lines.append(current_method) # 清理描述词 cleaned_line = stripped_line for word in describe_words: if word in cleaned_line: cleaned_line = cleaned_line.split(word)[0].strip() break current_method = cleaned_line # 检查是否在同一行就结束了（以 ; 或 { 结尾） if cleaned_line.endswith(';') or cleaned_line.endswith('{'): # 方法声明在同一行完成 processed_lines.append(current_method) current_method = "" in_method_declaration = False else: in_method_declaration = True elif in_method_declaration: # 检测方法声明结束 if (stripped_line.endswith(';') or stripped_line.endswith('{') or stripped_line.startswith('@') or stripped_line.startswith('#') or re.match(r'^\s*[-\+]\s*\(', stripped_line) or not stripped_line): # 空行也表示结束 if not re.match(r'^\s*[-\+]\s*\(', stripped_line) and stripped_line: current_method += " " + stripped_line # 清理并保存完整的方法声明 current_method = re.sub(r'\s+', ' ', current_method).strip() processed_lines.append(current_method) current_method = "" in_method_declaration = False # 如果当前行是新的方法声明，开始处理 if re.match(r'^\s*[-\+]\s*\(', stripped_line): cleaned_line = stripped_line for word in describe_words: if word in cleaned_line: cleaned_line = cleaned_line.split(word)[0].strip() break current_method = cleaned_line in_method_declaration = True else: processed_lines.append(line) else: # 继续合并方法声明 current_method += " " + stripped_line else: processed_lines.append(line) # 处理最后一个方法 if current_method: current_method = re.sub(r'\s+', ' ', current_method).strip() processed_lines.append(current_method) processed_content = '\n'.join(processed_lines) # 步骤2：提取所有方法名（包括多参数方法的所有部分） # 首先找到所有的方法声明行 method_lines = [] for line in processed_content.split('\n'): line = line.strip() if re.match(r'^\s*[-\+]\s*$', line): method_lines.append(line) # 内联方法名有效性检查函数 def _is_valid_method_name_inline(method_name): """检查是否是有效的方法名""" if not method_name: return False # 只包含字母、数字和下划线 if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', method_name): return False # 不能只是下划线 if method_name.replace('_', '') == '': return False return True # 处理每个方法声明行，提取方法名的所有部分 for method_line in method_lines: # 内联提取方法名部分的逻辑 method_parts = [] # 移除方法前缀和返回类型，只保留方法签名部分 signature_match = re.search(r'[-\+]\s*\([^)]*$\s*(.+)', method_line) if signature_match: signature = signature_match.group(1).strip() # 移除结尾的分号或大括号 signature = re.sub(r'[;{]\s*$', '', signature).strip() if signature: # 情况1：无参数方法 - methodName if ':' not in signature: method_name = signature.split()[0] # 取第一个单词 if method_name and _is_valid_method_name_inline(method_name): method_parts.append(method_name) else: # 情况2：有参数的方法 - 需要提取所有方法名部分 parts = signature.split(':') for i, part in enumerate(parts[:-1]): # 最后一个部分通常是参数，不包含方法名 part = part.strip() if i == 0: # 第一部分：methodName method_name = part.split()[0] if part else "" if method_name and _is_valid_method_name_inline(method_name): method_parts.append(method_name) else: # 后续部分：可能包含方法名，格式如 "param withMethodName" 或 "param) withMethodName" words = part.split() if words: # 取最后一个单词作为方法名部分 potential_method_name = words[-1] # 移除可能的括号 potential_method_name = re.sub(r'[()]+', '', potential_method_name) if (potential_method_name and _is_valid_method_name_inline(potential_method_name) and len(potential_method_name) > 1): method_parts.append(potential_method_name) all_methods.extend(method_parts) # 步骤3：过滤和排序 # 去重 unique_methods = list(set(all_methods)) filtered_methods = [] for method in unique_methods: if (len(method) > 1 and _is_valid_method_name_inline(method)): # 使用内联函数检查: 只包含字母、数字和下划线 filtered_methods.append(method) # 将set开头的排在后面 filtered_methods.sort(key=lambda x: (x.startswith('set'), x)) return filtered_methods # 专门处理c语言方法内容 def process_c_method_content(content): """提取C语言函数名的优化版本 - 支持外部声明和多行函数""" content = clean_content(content) # 系统函数黑名单 SYSTEM_FUNCTIONS = { 'printf', 'sprintf', 'snprintf', 'fprintf', 'scanf', 'strlen', 'strcpy', 'strcat', 'strcmp', 'malloc', 'calloc', 'realloc', 'free', 'memcpy', 'memset', 'memmove', 'memcmp', 'fopen', 'fclose', 'fread', 'fwrite', 'fseek', 'ftell', 'rewind', 'dladdr', 'dlerror', 'NSLog', 'CFRelease', 'CFRetain' } # C关键字黑名单 C_KEYWORDS = { 'if', 'else', 'for', 'while', 'do', 'switch', 'case', 'default', 'break', 'continue', 'return', 'goto', 'sizeof', 'typedef', 'struct', 'union', 'enum', 'const', 'volatile', 'static', 'extern', 'auto', 'register', 'inline', 'restrict', 'signed', 'unsigned', 'true', 'false', 'NULL', 'YES', 'NO', 'nil', 'Nil', 'TRUE', 'FALSE' } def is_valid_c_function_name(name): """检查是否是有效的C函数名""" # 无效名称黑名单（常见的误匹配） invalid_names = { 'F', 'OF', 'EXPORT', 'EXTERN', 'ZEXTERN', 'ZEXPORT', 'd', 'n', 'c', 'x', 'y', 'm', 'a', 'b', 'e', 'i', 'o', 'u', 'int', 'void', 'char', 'float', 'double', 'long', 'short', 'const', 'static', 'extern', 'inline' } return (name and len(name) > 2 and # 提高最小长度要求 re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name) and name not in C_KEYWORDS and name not in invalid_names and name.replace('_', '') != '' and (not name.isupper() or len(name) > 4) and # 提高全大写单词的长度要求 not re.match(r'^[a-z]$', name)) # 排除单个小写字母 def clean_api_annotations(text): """清理API注解""" # 清理各种API注解 api_patterns = [ r'API_AVAILABLE$[^)]*$', r'API_DEPRECATED$[^)]*$', r'API_UNAVAILABLE$[^)]*$', r'CF_RETURNS_RETAINED', r'CF_RETURNS_NOT_RETAINED', r'NS_AVAILABLE$[^)]*$', r'__deprecated', r'__attribute__$[^)]*$', ] for pattern in api_patterns: text = re.sub(pattern, '', text) return re.sub(r'\s+', ' ', text).strip() def extract_from_patterns(): """使用正则模式提取函数定义""" functions = [] # 首先清理API注解 cleaned_content = clean_api_annotations(content) # 移除Objective-C方法体，避免提取方法内的函数调用 # 匹配 - (returnType)methodName { ... } 或 + (returnType)methodName { ... } # 改进：使用更精确的大括号匹配 def remove_objc_methods(text): result = [] lines = text.split('\n') i = 0 while i < len(lines): line = lines[i] # 检测Objective-C方法开始 if re.match(r'^\s*[-\+]\s*$', line.strip()): # 跳过整个方法，包括多行方法签名和方法体 brace_count = 0 method_started = False while i < len(lines): current_line = lines[i] # 查找方法体开始 if '{' in current_line: method_started = True brace_count += current_line.count('{') if method_started: brace_count -= current_line.count('}') i += 1 # 方法体结束 if method_started and brace_count <= 0: break else: result.append(line) i += 1 return '\n'.join(result) cleaned_content = remove_objc_methods(cleaned_content) # 1. 处理ZLIB格式的函数声明：ZEXTERN type ZEXPORT functionName OF((params)); # 支持多行声明 zlib_pattern = r'ZEXTERN\s+\w+\s+ZEXPORT\s+(\w+)\s+OF\s*\(\([^)]*$\)\s*;' zlib_matches = re.findall(zlib_pattern, cleaned_content, re.DOTALL | re.MULTILINE) functions.extend([match for match in zlib_matches if is_valid_c_function_name(match) and len(match) > 2]) # 2. 处理多行函数声明 - 先合并分行的函数声明 # 匹配从函数声明开始到分号结束的多行内容 multiline_functions = re.findall( r'(?:CG_EXTERN|CF_EXPORT|UIKIT_EXTERN|FOUNDATION_EXPORT|extern|static|inline)?\s*' r'(?:const\s+)?(?:unsigned\s+)?' r'(?:void|char|short|int|long|float|double|bool|BOOL|NSInteger|NSUInteger|CGFloat|' r'[A-Z][a-zA-Z0-9_]*(?:Ref|Type)?)\s*\*{0,2}\s*' r'(\w+)\s*$[^;]*$\s*;', cleaned_content, re.DOTALL | re.MULTILINE ) functions.extend([match for match in multiline_functions if is_valid_c_function_name(match)]) # 3. 基本类型函数声明 basic_types = r'(?:void|char|short|int|long|float|double|bool|BOOL|NSInteger|NSUInteger|CGFloat|CGPoint|CGSize|CGRect|NSTimeInterval)' patterns = [ # 外部声明：CG_EXTERN, CF_EXPORT 等 rf'(?:CG_EXTERN|CF_EXPORT|UIKIT_EXTERN|FOUNDATION_EXPORT)\s+(?:const\s+)?{basic_types}\s*\*{{0,2}}\s*(\w+)\s*$', # 静态函数 rf'static\s+(?:const\s+)?(?:unsigned\s+)?{basic_types}\s*\*{{0,2}}\s*(\w+)\s*\(', # 普通extern函数 rf'extern\s+(?:const\s+)?(?:unsigned\s+)?{basic_types}\s*\*{{0,2}}\s*(\w+)\s*\(', # 内联函数 rf'inline\s+(?:const\s+)?(?:unsigned\s+)?{basic_types}\s*\*{{0,2}}\s*(\w+)\s*\(', # 普通函数 rf'(?:^|\n)\s*(?:const\s+)?(?:unsigned\s+)?{basic_types}\s*\*{{0,2}}\s*(\w+)\s*\(', # 函数指针typedef r'typedef\s+\w+\s*\(\s*\*\s*(\w+)\s*$', ] # 5. 新增：函数定义模式（带大括号的函数体） function_definition_patterns = [ # BOOL functionName(void) { 或 BOOL functionName() { rf'(?:^|\s)(?:const\s+)?(?:unsigned\s+)?{basic_types}\s*\*{{0,2}}\s*(\w+)\s*$[^)]*$\s*\{{', # static BOOL functionName(void) { rf'(?:^|\s)static\s+(?:const\s+)?(?:unsigned\s+)?{basic_types}\s*\*{{0,2}}\s*(\w+)\s*$[^)]*$\s*\{{', # extern BOOL functionName(void) { rf'(?:^|\s)extern\s+(?:const\s+)?(?:unsigned\s+)?{basic_types}\s*\*{{0,2}}\s*(\w+)\s*$[^)]*$\s*\{{', # inline BOOL functionName(void) { rf'(?:^|\s)inline\s+(?:const\s+)?(?:unsigned\s+)?{basic_types}\s*\*{{0,2}}\s*(\w+)\s*$[^)]*$\s*\{{', ] for pattern in patterns: matches = re.findall(pattern, cleaned_content, re.MULTILINE) functions.extend([match for match in matches if is_valid_c_function_name(match)]) # 处理新增的函数定义模式 for pattern in function_definition_patterns: matches = re.findall(pattern, cleaned_content, re.MULTILINE) functions.extend([match for match in matches if is_valid_c_function_name(match)]) # 4. 自定义类型函数声明（如 CGContextRef, CFStringRef 等） custom_type_patterns = [ # CG_EXTERN CustomType functionName(...) r'(?:CG_EXTERN|CF_EXPORT|UIKIT_EXTERN|FOUNDATION_EXPORT)\s+([A-Z][a-zA-Z0-9_]*(?:Ref|Type)?)\s*\*{0,2}\s*(\w+)\s*$', # extern CustomType functionName(...) r'extern\s+([A-Z][a-zA-Z0-9_]*(?:Ref|Type)?)\s*\*{0,2}\s*(\w+)\s*\(', # static CustomType functionName(...) r'static\s+([A-Z][a-zA-Z0-9_]*(?:Ref|Type)?)\s*\*{0,2}\s*(\w+)\s*\(', # CustomType functionName(...) - 普通自定义类型 r'(?:^|\n)\s*([A-Z][a-zA-Z0-9_]*(?:Ref|Type)?)\s*\*{0,2}\s*(\w+)\s*\(', ] # 6. 自定义类型函数定义（带大括号的函数体） custom_type_definition_patterns = [ # CustomType functionName(...) { r'(?:^|\s)([A-Z][a-zA-Z0-9_]*(?:Ref|Type)?)\s*\*{0,2}\s*(\w+)\s*\([^)]*$\s*\{', # static CustomType functionName(...) { r'(?:^|\s)static\s+([A-Z][a-zA-Z0-9_]*(?:Ref|Type)?)\s*\*{0,2}\s*(\w+)\s*$[^)]*$\s*\{', # extern CustomType functionName(...) { r'(?:^|\s)extern\s+([A-Z][a-zA-Z0-9_]*(?:Ref|Type)?)\s*\*{0,2}\s*(\w+)\s*$[^)]*$\s*\{', ] for pattern in custom_type_patterns: matches = re.findall(pattern, cleaned_content, re.MULTILINE) for match in matches: if len(match) >= 2: func_name = match[1] if isinstance(match, tuple) else match if is_valid_c_function_name(func_name): functions.append(func_name) # 处理自定义类型函数定义模式 for pattern in custom_type_definition_patterns: matches = re.findall(pattern, cleaned_content, re.MULTILINE) for match in matches: if len(match) >= 2: func_name = match[1] if isinstance(match, tuple) else match if is_valid_c_function_name(func_name): functions.append(func_name) return functions def extract_function_calls(): """提取独立的函数调用（避免误匹配点语法和Objective-C方法内调用）""" functions = [] # 只处理全局范围的函数调用，排除Objective-C方法内的调用 lines = content.split('\n') in_objc_method = False brace_count = 0 for line in lines: line_stripped = line.strip() # 检测Objective-C方法开始 if re.match(r'^\s*[-\+]\s*$', line_stripped): in_objc_method = True # 检查是否在同一行有开始大括号 if '{' in line: brace_count = line.count('{') - line.count('}') continue # 如果在Objective-C方法内，跟踪大括号 if in_objc_method: if '{' in line or '}' in line: brace_count += line.count('{') - line.count('}') if brace_count <= 0: in_objc_method = False brace_count = 0 continue # 只在全局范围内提取函数调用 if not in_objc_method and not line_stripped.startswith('//') and '.' not in line_stripped: # 匹配行首的函数调用 matches = re.findall(r'^\s*(\w+)\s*\([^)]*$\s*;', line_stripped) for match in matches: if (is_valid_c_function_name(match) and match not in SYSTEM_FUNCTIONS and len(match) > 2): functions.append(match) return functions # 收集所有函数名 all_functions = [] all_functions.extend(extract_from_patterns()) all_functions.extend(extract_function_calls()) # 去重并排序 unique_functions = list(dict.fromkeys(all_functions)) # 保持顺序的去重 unique_functions.sort(key=lambda x: (x.startswith('set'), x)) return unique_functions def process_enums_content(content): """ 专门用于处理和提取枚举内容的函数，包括枚举类型名和所有枚举值支持可用性标注清理，在一个函数内完整处理 """ # 统一的内容清理 content = clean_content(content) # 获取描述词列表，用于清理可用性标注 describe_words = get_describe_words() all_enums = [] # 内联函数：清理可用性标注 def clean_annotations(text): """清理文本中的可用性标注""" if not text: return text # 清理常见的可用性标注模式 patterns = [ r'__OSX_AVAILABLE_STARTING$[^)]*$', r'NS_ENUM_AVAILABLE$[^)]*$', r'NS_AVAILABLE$[^)]*$', r'API_AVAILABLE$[^)]*$', r'__deprecated', r'__attribute__$[^)]*$' ] for pattern in patterns: text = re.sub(pattern, '', text) # 清理描述词 for word in describe_words: if word in text: word_pattern = re.escape(word) + r'(?:$[^)]*$)?' text = re.sub(word_pattern, '', text) # 清理多余的空白字符 text = re.sub(r'\s+', ' ', text) return text.strip() # 内联函数：从枚举体中提取所有枚举值 def extract_enum_values_inline(enum_body): """内联函数：从枚举体中提取枚举值""" enum_values = [] if not enum_body: return enum_values # 清理枚举体内容，移除注释 enum_body = re.sub(r'//.*$', '', enum_body, flags=re.MULTILINE) enum_body = re.sub(r'/\*.*?\*/', '', enum_body, flags=re.DOTALL) enum_body = enum_body.strip() # 清理可用性标注 enum_body = clean_annotations(enum_body) # 移除多余的空白字符和换行 enum_body = re.sub(r'\s+', ' ', enum_body) # 使用更简单但更可靠的方法：先按逗号分割，然后处理每个部分 # 处理可能的嵌套括号情况 parts = [] current_part = "" paren_count = 0 bracket_count = 0 for char in enum_body: if char == '(': paren_count += 1 elif char == ')': paren_count -= 1 elif char == '[': bracket_count += 1 elif char == ']': bracket_count -= 1 elif char == ',' and paren_count == 0 and bracket_count == 0: if current_part.strip(): parts.append(current_part.strip()) current_part = "" continue current_part += char # 添加最后一个部分 if current_part.strip(): parts.append(current_part.strip()) # 从每个部分提取枚举值名称 for part in parts: part = part.strip() if not part: continue # 再次清理可用性标注（针对单个枚举值） part = clean_annotations(part) if not part: continue # 提取枚举值名称（去掉赋值部分） # 支持各种赋值格式：EnumValue, EnumValue = 1, EnumValue = 1 << 2, etc. value_match = re.match(r'^(\w+)(?:\s*=.*)?$', part, re.DOTALL) if value_match: enum_value = value_match.group(1).strip() if (enum_value and re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', enum_value) and len(enum_value) > 0): # 允许单字符枚举值 enum_values.append(enum_value) return enum_values # 1. 处理 NS_ENUM 和 NS_OPTIONS 枚举定义 # 使用更精确的正则表达式，支持多行匹配、嵌套大括号和可用性标注 # 修复：支持 unsigned long, unsigned int 等多词类型 ns_enum_pattern = r'typedef\s+(NS_ENUM|NS_OPTIONS)\s*$\s*([^,]+),\s*(\w+)\s*$\s*\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}[^;]*;?' ns_enum_matches = re.findall(ns_enum_pattern, content, re.DOTALL) for _, _, enum_name, enum_body in ns_enum_matches: # 清理枚举类型名的可用性标注 enum_name = clean_annotations(enum_name.strip()) if enum_name: # 添加枚举类型名 all_enums.append(enum_name) # 提取所有枚举值 enum_values = extract_enum_values_inline(enum_body) all_enums.extend(enum_values) # 2. 处理传统 C 枚举定义 c_enum_patterns = [ # typedef enum EnumName { ... } EnumName; (r'typedef\s+enum\s+(\w+)\s*\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}\s*(\w+)[^;]*;', 'named_typedef'), # typedef enum { ... } EnumName; (r'typedef\s+enum\s*\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}\s*(\w+)[^;]*;', 'anonymous_typedef'), # typedef enum : NSUInteger { ... } EnumName; (r'typedef\s+enum\s*:\s*\w+\s*\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}\s*(\w+)[^;]*;', 'typed_enum'), # enum EnumName { ... }; (r'enum\s+(\w+)\s*\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}[^;]*;?', 'simple_enum') ] for pattern, pattern_type in c_enum_patterns: matches = re.findall(pattern, content, re.DOTALL) for match in matches: if pattern_type == 'named_typedef': enum_name1, enum_body, enum_name2 = match enum_name1 = clean_annotations(enum_name1) enum_name2 = clean_annotations(enum_name2) if enum_name1: all_enums.append(enum_name1) if enum_name2 and enum_name2 != enum_name1: all_enums.append(enum_name2) else: if pattern_type == 'simple_enum': enum_name, enum_body = match else: # anonymous_typedef, typed_enum enum_body, enum_name = match enum_name = clean_annotations(enum_name) if enum_name: all_enums.append(enum_name) # 提取枚举值 enum_values = extract_enum_values_inline(enum_body) all_enums.extend(enum_values) # 去重并过滤 unique_enums = [] seen_enums = set() for enum_name in all_enums: if (enum_name and enum_name not in seen_enums and len(enum_name) > 1 and re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', enum_name)): unique_enums.append(enum_name) seen_enums.add(enum_name) return unique_enums def process_delegates_content(content): """专门用于处理和提取代理内容的函数""" # 统一的内容清理 content = clean_content(content) all_delegates = [] # 1. 匹配 @protocol 协议定义 protocol_patterns = [ r'@protocol\s+(\w+)\s*<[^>]*>', # @protocol DelegateName r'@protocol\s+(\w+)\s*;', # @protocol DelegateName; r'@protocol\s+(\w+)\s*\n', # @protocol DelegateName (换行) r'@protocol\s+(\w+)\s*$' # @protocol DelegateName (行尾) ] for pattern in protocol_patterns: matches = re.findall(pattern, content, re.MULTILINE) all_delegates.extend(matches) # 去重并过滤 unique_delegates = [] for delegate_name in set(all_delegates): if (delegate_name and len(delegate_name) > 2 and re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', delegate_name)): unique_delegates.append(delegate_name) return unique_delegates def process_blocks_content(content): """专门用于处理和提取Block内容的函数""" # 统一的内容清理 content = clean_content(content) all_blocks = [] # 1. 匹配 typedef Block 定义 typedef_block_patterns = [ # typedef void(^BlockTypeName)(NSString *param); r'typedef\s+\w+\s*$\s*\^\s*(\w+)\s*$\s*$[^)]*$\s*;', # typedef NSString *(^BlockTypeName)(NSInteger param); r'typedef\s+\w+\s*\*\s*$\s*\^\s*(\w+)\s*$\s*$[^)]*$\s*;', # typedef id(^BlockTypeName)(void); r'typedef\s+id\s*$\s*\^\s*(\w+)\s*$\s*$[^)]*$\s*;', # typedef BOOL(^BlockTypeName)(NSError *error); r'typedef\s+BOOL\s*$\s*\^\s*(\w+)\s*$\s*$[^)]*$\s*;' ] for pattern in typedef_block_patterns: matches = re.findall(pattern, content) all_blocks.extend(matches) # 2. 匹配属性中的 Block 定义 # @property (nonatomic, copy) void(^blockName)(NSString *param); property_block_patterns = [ r'@property\s*$[^)]*$\s*\w+\s*$\s*\^\s*(\w+)\s*$\s*$[^)]*$\s*;', r'@property\s*$[^)]*$\s*\w+\s*\*\s*$\s*\^\s*(\w+)\s*$\s*$[^)]*$\s*;', r'@property\s*$[^)]*$\s*id\s*$\s*\^\s*(\w+)\s*$\s*$[^)]*$\s*;', r'@property\s*$[^)]*$\s*BOOL\s*$\s*\^\s*(\w+)\s*$\s*$[^)]*$\s*;' ] for pattern in property_block_patterns: matches = re.findall(pattern, content) all_blocks.extend(matches) # 3. 匹配方法参数中的 Block # - (void)methodWithBlock:(void(^)(NSString *))blockName; method_block_patterns = [ r':\s*\w+\s*$\s*\^\s*$\s*$\s*[^)]*\s*$\s*(\w+)', r':\s*\w+\s*\*\s*$\s*\^\s*$\s*$\s*[^)]*\s*$\s*(\w+)', r':\s*id\s*$\s*\^\s*$\s*$\s*[^)]*\s*$\s*(\w+)', r':\s*BOOL\s*$\s*\^\s*$\s*$\s*[^)]*\s*$\s*(\w+)' ] for pattern in method_block_patterns: matches = re.findall(pattern, content) all_blocks.extend(matches) # 4. 匹配变量声明中的 Block # void(^blockName)(NSString *param) = ^(NSString *param) { ... }; variable_block_patterns = [ r'\w+\s*$\s*\^\s*(\w+)\s*$\s*$[^)]*$\s*=', r'\w+\s*\*\s*$\s*\^\s*(\w+)\s*$\s*$[^)]*$\s*=', r'id\s*$\s*\^\s*(\w+)\s*$\s*$[^)]*$\s*=', r'BOOL\s*$\s*\^\s*(\w+)\s*$\s*$[^)]*$\s*=' ] for pattern in variable_block_patterns: matches = re.findall(pattern, content) all_blocks.extend(matches) # 去重并过滤 unique_blocks = [] for block_name in set(all_blocks): if (block_name and len(block_name) > 1 and re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', block_name)): unique_blocks.append(block_name) return unique_blocks def process_class_name_content(content): """专门用于处理和提取类名函数""" content = clean_content(content) all_class_names = [] # 1. 匹配 @interface 类定义 interface_patterns = [ r'@interface\s+(\w+)\s*', # @interface ClassName r'@interface\s+(\w+)\s*<[^>]*>', # @interface ClassName r'@interface\s+(\w+)\s*$' # @interface ClassName (行尾) r'@interface\s+(\w+)\s*:\s*(\w+)\s*$' # @interface ClassName : SuperClassName (行尾) r'@interface\s+(\w+)\s*:\s*(\w+)\s*<[^>]*>\s*$' # @interface ClassName : SuperClassName (行尾) ] for pattern in interface_patterns: matches = re.findall(pattern, content) all_class_names.extend(matches) return all_class_names