xGblob 18397 #!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
最终 SDK 包 demo 工程清理器

用途：
1. 清理旧的 ../OSFrameworks、../OSResources、../CNFrameworks、../CNResources 引用
   （包括 PBXGroup 和 PBXFileSystemSynchronizedRootGroup 两种形式）
2. 清理旧的 SDK 组引用（默认组名：SDK）
3. 清理主 framework / bundle / middleware .a 的旧 fileRef、buildFile、build phase 引用
4. 按块级精确删除 FRAMEWORK_SEARCH_PATHS / HEADER_SEARCH_PATHS / LIBRARY_SEARCH_PATHS
   中包含目标路径的条目

此脚本采用 "按块定位 + 按 ID 清除" 策略，避免误删正常行。
"""

import os
import re
import shutil
import sys
from typing import Iterable, List, Set, Tuple


LEGACY_PATHS = {
    "../OSFrameworks",
    "../OSResources",
    "../CNFrameworks",
    "../CNResources",
}

SEARCH_PATH_KEYS = [
    "FRAMEWORK_SEARCH_PATHS",
    "HEADER_SEARCH_PATHS",
    "LIBRARY_SEARCH_PATHS",
]


class FinalSDKProjectCleaner:
    def __init__(self, pbxproj_path: str):
        self.pbxproj_path = pbxproj_path
        if not os.path.exists(pbxproj_path):
            raise FileNotFoundError(f"项目文件不存在: {pbxproj_path}")

    def clean(self, sdk_name: str, sdk_rel_path: str = "../SDK", group_name: str = "SDK", exported_libs: Iterable[str] = ()):  # noqa: E501
        print("🧹 开始清理最终 SDK 包中的旧引用...")
        print(f"📄 pbxproj: {self.pbxproj_path}")
        print(f"📦 SDK 名称: {sdk_name}")
        print(f"📂 SDK 相对路径: {sdk_rel_path}")
        print(f"📁 组名: {group_name}")

        backup_path = f"{self.pbxproj_path}.backup_final_sdk_clean"
        shutil.copy2(self.pbxproj_path, backup_path)
        print(f"✅ 已创建备份: {backup_path}")

        with open(self.pbxproj_path, "r", encoding="utf-8") as f:
            content = f.read()

        try:
            exported_libs = [name.strip() for name in exported_libs if str(name).strip()]
            target_files = [f"{sdk_name}.framework", f"{sdk_name}.bundle", *exported_libs]
            sdk_folder_name = os.path.basename(sdk_rel_path.replace("\\", "/").rstrip("/"))

            removed_ids: Set[str] = set()

            content, ids = self._remove_sync_root_groups(content, LEGACY_PATHS)
            removed_ids |= ids

            content, ids = self._remove_pbx_groups_by_path(content, LEGACY_PATHS)
            removed_ids |= ids

            content, ids = self._remove_pbx_groups_by_name_or_path(content, group_name, sdk_folder_name, sdk_rel_path)
            removed_ids |= ids

            # Recursively remove sub-groups whose IDs were collected as children of deleted groups
            content, ids = self._remove_sub_groups_recursively(content, removed_ids)
            removed_ids |= ids

            # Remove orphaned PBXFileReference entries whose IDs were children of deleted groups
            content, ids = self._remove_file_references_by_ids(content, removed_ids)
            removed_ids |= ids

            content, ids = self._remove_file_references(content, target_files)
            removed_ids |= ids

            content, ids, build_file_ids_by_ref = self._remove_build_files(content, removed_ids | set())
            removed_ids |= ids
            # Also remove build files for any of the target_files (regardless)
            content, ids = self._remove_build_files_by_target_names(content, target_files)
            removed_ids |= ids

            # Strip references to removed IDs anywhere (children / files / synchronizedGroups / etc.)
            content = self._strip_id_references(content, removed_ids)

            content = self._clean_search_paths(content, sdk_folder_name)

            if not self._validate(content):
                raise RuntimeError("清理后的 pbxproj 结构校验失败")

            with open(self.pbxproj_path, "w", encoding="utf-8") as f:
                f.write(content)

            print("✅ 旧引用清理完成")
            return True
        except Exception as e:
            print(f"❌ 清理失败: {e}")
            shutil.copy2(backup_path, self.pbxproj_path)
            print("✅ 已恢复备份文件")
            return False

    # ---------- helpers ----------

    @staticmethod
    def _strip_quotes(value: str):
        value = value.strip()
        if len(value) >= 2 and value[0] == '"' and value[-1] == '"':
            return value[1:-1]
        return value

    def _find_block(self, content: str, start_idx: int) -> Tuple[int, int]:
        """Given index pointing to `{`, return (start, end_after_matching_brace_and_semicolon)."""
        depth = 0
        i = start_idx
        n = len(content)
        in_string = False
        while i < n:
            ch = content[i]
            if ch == '"' and (i == 0 or content[i - 1] != "\\"):
                in_string = not in_string
            elif not in_string:
                if ch == "{":
                    depth += 1
                elif ch == "}":
                    depth -= 1
                    if depth == 0:
                        # include trailing `;`
                        j = i + 1
                        while j < n and content[j] in " \t":
                            j += 1
                        if j < n and content[j] == ";":
                            j += 1
                        # include trailing newline
                        while j < n and content[j] == "\n":
                            j += 1
                            break
                        return start_idx, j
            i += 1
        return start_idx, n

    def _iter_object_blocks(self, content: str, isa_name: str):
        """Yield (id, full_match_start, block_start, block_end, block_text) for each top-level object of given isa."""
        pattern = re.compile(
            r"^\s*([A-F0-9]{24}) /\* (.*?) \*/ = \{[^{}]*?isa = " + re.escape(isa_name) + r";",
            re.MULTILINE,
        )
        for m in pattern.finditer(content):
            obj_id = m.group(1)
            comment = m.group(2)
            # find opening brace position
            brace_idx = content.index("{", m.start())
            # find beginning of line
            line_start = content.rfind("\n", 0, m.start()) + 1
            _, end = self._find_block(content, brace_idx)
            yield obj_id, comment, line_start, end, content[line_start:end]

    def _remove_blocks(self, content: str, spans: List[Tuple[int, int]]) -> str:
        if not spans:
            return content
        spans = sorted(set(spans), key=lambda s: s[0], reverse=True)
        for start, end in spans:
            content = content[:start] + content[end:]
        return content

    # ---------- group / file / build-file removers ----------

    def _remove_sync_root_groups(self, content: str, paths: Iterable[str]):
        print("🗂️ 清理旧的 PBXFileSystemSynchronizedRootGroup...")
        removed: Set[str] = set()
        spans: List[Tuple[int, int]] = []
        paths = set(paths)
        for obj_id, comment, s, e, text in self._iter_object_blocks(content, "PBXFileSystemSynchronizedRootGroup"):
            path_match = re.search(r"path = ([^;]+);", text)
            if not path_match:
                continue
            raw = self._strip_quotes(path_match.group(1))
            if raw in paths:
                print(f"  - 删除同步组: {raw} ({comment})")
                removed.add(obj_id)
                spans.append((s, e))
        content = self._remove_blocks(content, spans)
        return content, removed

    def _remove_pbx_groups_by_path(self, content: str, paths: Iterable[str]):
        print("📂 清理按路径匹配的旧 PBXGroup...")
        removed: Set[str] = set()
        spans: List[Tuple[int, int]] = []
        paths = set(paths)
        for obj_id, comment, s, e, text in self._iter_object_blocks(content, "PBXGroup"):
            path_match = re.search(r"path = ([^;]+);", text)
            if not path_match:
                continue
            raw = self._strip_quotes(path_match.group(1))
            if raw in paths:
                print(f"  - 删除 PBXGroup: {raw} ({comment})")
                removed.add(obj_id)
                spans.append((s, e))
                # also collect child ids to prune later (children lines)
                for child_id in re.findall(r"([A-F0-9]{24}) /\*", text):
                    if child_id != obj_id:
                        removed.add(child_id)
        content = self._remove_blocks(content, spans)
        return content, removed

    def _remove_pbx_groups_by_name_or_path(self, content: str, group_name: str, folder_name: str, sdk_rel_path: str):
        print("📂 清理旧的 SDK PBXGroup (按名称或路径)...")
        removed: Set[str] = set()
        spans: List[Tuple[int, int]] = []
        candidates = {group_name, folder_name, sdk_rel_path}
        for obj_id, comment, s, e, text in self._iter_object_blocks(content, "PBXGroup"):
            name_match = re.search(r"name = ([^;]+);", text)
            path_match = re.search(r"path = ([^;]+);", text)
            raw_name = self._strip_quotes(name_match.group(1)) if name_match else ""
            raw_path = self._strip_quotes(path_match.group(1)) if path_match else ""
            comment_name = self._strip_quotes(comment)
            hits = {raw_name, raw_path, comment_name} & candidates
            if hits:
                print(f"  - 删除 SDK 组: name={raw_name}, path={raw_path}, comment={comment_name}")
                removed.add(obj_id)
                spans.append((s, e))
                for child_id in re.findall(r"([A-F0-9]{24}) /\*", text):
                    if child_id != obj_id:
                        removed.add(child_id)
        content = self._remove_blocks(content, spans)
        return content, removed

    def _remove_sub_groups_recursively(self, content: str, parent_ids: Set[str]):
        """Recursively remove PBXGroup entries whose ID is in parent_ids, collecting all descendant IDs."""
        print("📂 递归清理子组...")
        all_removed: Set[str] = set()
        pending_ids = set(parent_ids)
        while True:
            found_spans: List[Tuple[int, int]] = []
            new_child_ids: Set[str] = set()
            for obj_id, comment, s, e, text in self._iter_object_blocks(content, "PBXGroup"):
                if obj_id in pending_ids:
                    print(f"  - 删除子组: {comment} ({obj_id})")
                    found_spans.append((s, e))
                    all_removed.add(obj_id)
                    for child_id in re.findall(r"([A-F0-9]{24}) /\*", text):
                        if child_id != obj_id and child_id not in all_removed:
                            new_child_ids.add(child_id)
                            all_removed.add(child_id)
            content = self._remove_blocks(content, found_spans)
            if not new_child_ids:
                break
            pending_ids = new_child_ids
        return content, all_removed

    def _remove_file_references_by_ids(self, content: str, ids: Set[str]):
        """Remove PBXFileReference entries whose ID is in the given set (orphaned children of deleted groups)."""
        if not ids:
            return content, set()
        print("📄 清理已删除组的子项 PBXFileReference（防止 Recovered References）...")
        removed: Set[str] = set()
        pattern = re.compile(
            r"^(\s*)([A-F0-9]{24}) /\* ([^*]+?) \*/ = \{isa = PBXFileReference;[^}]*\};\s*$",
            re.MULTILINE,
        )

        def replacer(match):
            ref_id = match.group(2)
            name = match.group(3).strip()
            if ref_id in ids:
                removed.add(ref_id)
                print(f"  - 删除孤儿文件引用: {name} ({ref_id})")
                return ""
            return match.group(0)

        content = pattern.sub(replacer, content)
        return content, removed

    def _remove_file_references(self, content: str, file_names: List[str]):
        print("📄 清理旧的文件引用...")
        removed: Set[str] = set()
        if not file_names:
            return content, removed
        name_set = set(file_names)
        pattern = re.compile(
            r"^(\s*)([A-F0-9]{24}) /\* ([^*]+?) \*/ = \{isa = PBXFileReference;[^}]*\};\s*$",
            re.MULTILINE,
        )

        def replacer(match):
            name = match.group(3).strip()
            if name in name_set:
                removed.add(match.group(2))
                print(f"  - 删除文件引用: {name}")
                return ""
            return match.group(0)

        content = pattern.sub(replacer, content)
        return content, removed

    def _remove_build_files(self, content: str, file_ref_ids: Set[str]):
        """Remove PBXBuildFile entries whose fileRef is in the given set."""
        print("🔧 清理与已删除 fileRef 关联的构建文件...")
        removed: Set[str] = set()
        mapping = {}
        if not file_ref_ids:
            return content, removed, mapping
        pattern = re.compile(
            r"^\s*([A-F0-9]{24}) /\* ([^*]+?) \*/ = \{isa = PBXBuildFile; fileRef = ([A-F0-9]{24}) [^}]*\};\s*$",
            re.MULTILINE,
        )

        def replacer(match):
            build_id = match.group(1)
            desc = match.group(2).strip()
            ref_id = match.group(3)
            if ref_id in file_ref_ids:
                removed.add(build_id)
                mapping[ref_id] = build_id
                print(f"  - 删除构建文件: {desc}")
                return ""
            return match.group(0)

        content = pattern.sub(replacer, content)
        return content, removed, mapping

    def _remove_build_files_by_target_names(self, content: str, file_names: List[str]):
        print("🔧 再次按文件名清理残留的 PBXBuildFile...")
        removed: Set[str] = set()
        if not file_names:
            return content, removed
        name_re = "|".join(re.escape(n) for n in file_names)
        pattern = re.compile(
            rf"^\s*([A-F0-9]{{24}}) /\* ({name_re}) in (Frameworks|Resources|Sources) \*/ = \{{isa = PBXBuildFile;[^}}]*\}};\s*$",
            re.MULTILINE,
        )

        def replacer(match):
            removed.add(match.group(1))
            print(f"  - 删除构建文件: {match.group(2)} in {match.group(3)}")
            return ""

        content = pattern.sub(replacer, content)
        return content, removed

    def _strip_id_references(self, content: str, ids: Set[str]):
        if not ids:
            return content
        print("🧽 清理 children / build phases / synchronizedGroups 中的旧引用...")
        for item_id in ids:
            pattern = re.compile(rf"^\s*{re.escape(item_id)} /\* [^*]* \*/,\s*$", re.MULTILINE)
            content = pattern.sub("", content)
        return content

    # ---------- search path block cleaner ----------

    def _clean_search_paths(self, content: str, sdk_folder_name: str):
        print("🔍 清理搜索路径（按块精确处理）...")
        bad_tokens = [
            "../OSFrameworks",
            "../OSResources",
            "../CNFrameworks",
            "../CNResources",
            f"/{sdk_folder_name}",
        ]

        for key in SEARCH_PATH_KEYS:
            # Multi-line: KEY = ( ... );
            multi_pattern = re.compile(rf"(\s*){re.escape(key)} = \((.*?)\);", re.DOTALL)

            def multi_replacer(m):
                indent = m.group(1)
                body = m.group(2)
                items = [line for line in body.split("\n")]
                kept = []
                for line in items:
                    stripped = line.strip().rstrip(",").strip()
                    if not stripped:
                        kept.append(line)
                        continue
                    if any(tok in stripped for tok in bad_tokens):
                        print(f"  - 删除 {key} 条目: {stripped}")
                        continue
                    kept.append(line)
                new_body = "\n".join(kept)
                return f"{indent}{key} = ({new_body});"

            content = multi_pattern.sub(multi_replacer, content)

            # Single-line: KEY = "...";
            single_pattern = re.compile(rf"(\s*){re.escape(key)} = \"([^\"]*)\";\s*\n")

            def single_replacer(m):
                value = m.group(2)
                if any(tok in value for tok in bad_tokens):
                    print(f"  - 删除 {key} 单值: {value}")
                    return ""
                return m.group(0)

            content = single_pattern.sub(single_replacer, content)

        return content

    def _validate(self, content: str):
        print("🔍 校验 pbxproj 结构...")
        required_sections = [
            "/* Begin PBXFileReference section */",
            "/* End PBXFileReference section */",
            "/* Begin PBXGroup section */",
            "/* End PBXGroup section */",
            "/* Begin XCBuildConfiguration section */",
            "/* End XCBuildConfiguration section */",
        ]
        for section in required_sections:
            if section not in content:
                print(f"❌ 缺少必要节: {section}")
                return False
        if content.count("{") != content.count("}"):
            print("❌ 花括号数量不匹配")
            return False
        if not re.search(r"rootObject = [A-F0-9]{24}", content):
            print("❌ 缺少 rootObject")
            return False
        print("✅ pbxproj 结构校验通过")
        return True


def main():
    if len(sys.argv) < 3:
        print("用法: python custom_pbxproj_clean.py <project.pbxproj路径> <SDK名称> [SDK相对路径] [组名] [导出.a逗号列表]")
        sys.exit(1)

    pbxproj_path = sys.argv[1]
    sdk_name = sys.argv[2]
    sdk_rel_path = sys.argv[3] if len(sys.argv) > 3 else "../SDK"
    group_name = sys.argv[4] if len(sys.argv) > 4 else "SDK"
    exported_libs_csv = sys.argv[5] if len(sys.argv) > 5 else ""
    exported_libs = [item.strip() for item in exported_libs_csv.split(",") if item.strip()]

    cleaner = FinalSDKProjectCleaner(pbxproj_path)
    ok = cleaner.clean(sdk_name, sdk_rel_path=sdk_rel_path, group_name=group_name, exported_libs=exported_libs)
    sys.exit(0 if ok else 1)


if __name__ == "__main__":
    main()
o