#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ compare_manifests.py 读取三个 filesystem.manifest 文件,比较包的并集,并输出包含发行版勾选标记、Priority、Section、Description 的 Markdown 表格。 增加详细日志输出,并使用多线程加速 apt show 调用。 修复:自动剥离包名中的 ":" 后缀。 """ import sys import subprocess import logging import concurrent.futures import time # 日志配置 logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S" ) def parse_manifest(path): """ 读取 manifest 文件,返回其中所有包名的集合。 自动剥离包名中的 ":" 后缀。 """ pkgs = set() logging.info(f"Parsing manifest: {path}") try: with open(path, encoding='utf-8') as f: for line in f: line = line.strip() if not line or line.startswith('#'): continue raw_pkg = line.split()[0] pkg = raw_pkg.split(':', 1)[0] pkgs.add(pkg) logging.info(f"Found {len(pkgs)} unique packages in {path}") except Exception as e: logging.error(f"Error reading manifest {path}: {e}") sys.exit(1) return pkgs def get_pkg_info(pkg): """ 调用 `apt show pkg`,解析 Priority、Section、Description(首段), 返回 dict 包含信息。 若调用失败,返回空字段。 """ logging.debug(f"Fetching apt info for package: {pkg}") try: p = subprocess.run( ["apt", "show", pkg], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, encoding='utf-8', timeout=30 ) except subprocess.TimeoutExpired: logging.warning(f"Timeout fetching info for {pkg}") return {"Priority": "", "Section": "", "Description": ""} if p.returncode != 0: logging.debug(f"apt show failed for {pkg}") return {"Priority": "", "Section": "", "Description": ""} priority = "" section = "" desc_lines = [] in_desc = False for line in p.stdout.splitlines(): if in_desc: if not line.strip(): break desc_lines.append(line.strip()) else: if line.startswith("Priority:"): priority = line.split(":", 1)[1].strip() elif line.startswith("Section:"): section = line.split(":", 1)[1].strip() elif line.startswith("Description:"): desc = line.split(":", 1)[1].strip() desc_lines.append(desc) in_desc = True full_desc = " ".join(desc_lines) logging.debug(f"Obtained info for {pkg}: priority={priority}, section={section}") return {"Priority": priority, "Section": section, "Description": full_desc} def main(): if len(sys.argv) != 4: print(f"Usage: {sys.argv[0]} ") sys.exit(1) start_time = time.time() paths = { "Ubuntu": sys.argv[1], "Zorin": sys.argv[2], "Anduin": sys.argv[3], } # 解析 manifests sets = {name: parse_manifest(path) for name, path in paths.items()} all_pkgs = sorted(set.union(*sets.values())) total = len(all_pkgs) logging.info(f"Total unique packages to process: {total}") # 多线程获取 apt 信息 pkg_infos = {} max_workers = min(10, total) logging.info(f"Starting ThreadPoolExecutor with {max_workers} workers") with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: future_to_pkg = {executor.submit(get_pkg_info, pkg): pkg for pkg in all_pkgs} for idx, future in enumerate(concurrent.futures.as_completed(future_to_pkg), 1): pkg = future_to_pkg[future] try: pkg_infos[pkg] = future.result() logging.info(f"[{idx}/{total}] Processed {pkg}") except Exception as e: logging.error(f"Error processing {pkg}: {e}") pkg_infos[pkg] = {"Priority": "", "Section": "", "Description": ""} # 写入 Markdown out_path = "comp_result.md" logging.info(f"Writing results to {out_path}") with open(out_path, "w", encoding="utf-8") as out: # 新表头:检查标记 -> Priority -> Section -> Description out.write("| Package | Ubuntu | Zorin | Anduin | Priority | Section | Description |\n") out.write("|---------|--------|-------|--------|----------|---------|-------------|\n") for pkg in all_pkgs: info = pkg_infos.get(pkg, {"Priority": "", "Section": "", "Description": ""}) mark = lambda name: "√" if pkg in sets[name] else "" desc = info["Description"].replace("|", "\\|") out.write( f"| {pkg} | {mark('Ubuntu')} | {mark('Zorin')} | {mark('Anduin')} |" f" {info['Priority']} | {info['Section']} | {desc} |\n" ) elapsed = time.time() - start_time logging.info(f"Completed in {elapsed:.2f}s") if __name__ == "__main__": main()