#!/bin/bash
set -e

# 超时时间（秒），30 分钟
UPGRADE_TIMEOUT=1800

# 清理函数：脚本退出时杀掉 watchdog
cleanup_watchdog() {
  if [ -n "$WATCHDOG_PID" ] && kill -0 "$WATCHDOG_PID" 2>/dev/null; then
    kill "$WATCHDOG_PID" 2>/dev/null || true
  fi
}

# 脚本退出时：清理 watchdog，非正常退出写 FAILED
cleanup_exit() {
  local rc=$?
  cleanup_watchdog
  # 如果退出码非 0 且状态还是 PROGRESSING，标记为 FAILED
  if [ $rc -ne 0 ]; then
    local status
    status=$(cat /etc/lighthouse/upgrade_status 2>/dev/null || echo "")
    if [ "$status" = "PROGRESSING" ]; then
      echo "FAILED" > /etc/lighthouse/upgrade_status
    fi
  fi
}

trap 'cleanup_exit' EXIT

# 检查参数
if [ $# -ne 4 ]; then
  echo "Usage: $0 --blueprint-id <id> --version <version>"
  exit 1
fi

# 解析参数
BLUEPRINT_ID=""
VERSION=""

while [[ $# -gt 0 ]]; do
  case $1 in
    --blueprint-id)
      BLUEPRINT_ID="$2"
      shift 2
      ;;
    --version)
      VERSION="$2"
      shift 2
      ;;
    *)
      echo "Unknown option: $1"
      echo "Usage: $0 --blueprint-id <id> --version <version>"
      exit 1
      ;;
  esac
done

# 验证参数
if [ -z "$BLUEPRINT_ID" ]; then
  echo "Error: --blueprint-id is required"
  exit 1
fi

if [ -z "$VERSION" ]; then
  echo "Error: --version is required"
  exit 1
fi

# ========== 基础设施 ==========

mkdir -p /etc/lighthouse
SCRIPT_START=$SECONDS
STAGE_FILE="/etc/lighthouse/upgrade_stage"
TIMER_LOG="/etc/lighthouse/upgrade_timer.log"
CURRENT_STAGE=$(cat "$STAGE_FILE" 2>/dev/null || echo "0")

# 首次运行清空计时日志，断点续跑追加
if [ "$CURRENT_STAGE" -eq 0 ]; then
  echo "" > "$TIMER_LOG"
fi

echo "=== OpenClaw Upgrade ==="
echo "Blueprint ID: $BLUEPRINT_ID"
echo "Version: $VERSION"
if [ "$CURRENT_STAGE" -gt 0 ]; then
  echo "从阶段 $((CURRENT_STAGE + 1)) 断点续跑"
fi

# 安装开机自检服务（防止升级中断后状态卡在 PROGRESSING）
cat > /etc/systemd/system/openclaw-upgrade-guard.service << 'GUARD'
[Unit]
Description=OpenClaw upgrade status guard - reset stale PROGRESSING to FAILED on boot
After=local-fs.target

[Service]
Type=oneshot
ExecStart=/bin/bash -c 'if [ -f /etc/lighthouse/upgrade_status ] && [ "$(cat /etc/lighthouse/upgrade_status)" = "PROGRESSING" ]; then echo "FAILED" > /etc/lighthouse/upgrade_status; fi'
RemainAfterExit=no

[Install]
WantedBy=multi-user.target
GUARD
systemctl daemon-reload
systemctl enable openclaw-upgrade-guard.service 2>/dev/null || true

# 保存升级状态
echo "PROGRESSING" > /etc/lighthouse/upgrade_status

# 启动超时 watchdog（后台进程，超时后标记 FAILED 并杀掉主脚本）
(
  sleep "$UPGRADE_TIMEOUT"
  echo "FAILED" > /etc/lighthouse/upgrade_status
  echo "Upgrade timed out after ${UPGRADE_TIMEOUT}s, aborting..."
  kill -TERM $$ 2>/dev/null || true
) &
WATCHDOG_PID=$!

# ========== 镜像源测速 ==========

REGISTRY_FILE="/etc/lighthouse/npm_registry"

test_url_speed() {
  local url=$1
  local start end rc
  start=$(date +%s%N)
  curl -sS --connect-timeout 3 --max-time 5 \
    -o /dev/null "${url}" 2>&1
  rc=$?
  end=$(date +%s%N)
  if [ $rc -ne 0 ]; then
    echo "99999"
    return
  fi
  echo $(( (end - start) / 1000000 ))
}

# 从候选列表中选最快的源，结果缓存到文件
# 用法: select_fastest <label> <cache_file> <url1> <url2> ...
# 输出: 最快的 URL（stdout），失败时输出空
select_fastest() {
  local label=$1 cache_file=$2
  shift 2

  # 有缓存直接返回
  local cached
  cached=$(cat "$cache_file" 2>/dev/null || echo "")
  if [ -n "$cached" ]; then
    echo "$cached"
    return
  fi

  echo "Testing ${label} speed..." >&2
  local best_url="" best_time=99999
  for url in "$@"; do
    local t
    t=$(test_url_speed "$url")
    echo "  $url => ${t}ms" >&2
    if [ "$t" -lt "$best_time" ]; then
      best_time=$t
      best_url=$url
    fi
  done

  if [ -n "$best_url" ] && [ "$best_time" -lt 99999 ]; then
    echo "Selected: $best_url (${best_time}ms)" >&2
    echo "$best_url" > "$cache_file"
    echo "$best_url"
  fi
}

# ========== 环境加载辅助函数 ==========

# 加载 nvm + 恢复已选镜像源（每个 stage 开头调用）
load_env() {
  export NVM_DIR="$HOME/.nvm"
  [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"

  # 恢复 pnpm 全局 bin 目录
  if [ -z "$PNPM_HOME" ] && [ -d "$HOME/.local/share/pnpm" ]; then
    export PNPM_HOME="$HOME/.local/share/pnpm"
    export PATH="$PNPM_HOME:$PATH"
  fi

  # 非交互环境下禁用 pnpm 进度条和交互提示（避免 TAT 等无 TTY 环境卡住）
  if [ ! -t 1 ]; then
    export CI=true
  fi

  # 恢复 systemd 用户会话（gateway 需要 systemctl --user）
  export XDG_RUNTIME_DIR=/run/user/$(id -u)
  loginctl enable-linger $(whoami) 2>/dev/null || true

  local saved_registry
  saved_registry=$(cat "$REGISTRY_FILE" 2>/dev/null || echo "")
  if [ -n "$saved_registry" ]; then
    export npm_config_registry="${saved_registry%%/openclaw/latest}"
  fi

  # 清除 bash 命令路径缓存（避免 uninstall/reinstall 后仍指向旧路径）
  hash -r 2>/dev/null || true
}

# ========== 断点续跑引擎 ==========

run_stage() {
  local num=$1 name=$2
  shift 2
  if [ "$CURRENT_STAGE" -ge "$num" ]; then
    echo "跳过阶段 $num: $name（已完成）"
    return 0
  fi
  echo ""
  echo "=== 阶段 $num: $name ==="
  local start=$SECONDS
  "$@"
  local elapsed=$((SECONDS - start))
  echo "${name}=${elapsed}s" >> "$TIMER_LOG"
  echo "$num" > "$STAGE_FILE"
}

# ========== 阶段函数 ==========

stage_env_setup() {
  # 安装基础依赖
  echo "Installing dependencies..."
  if command -v apt-get &>/dev/null; then
    apt-get -o DPkg::Lock::Timeout=120 update -y
    apt-get -o DPkg::Lock::Timeout=120 install -y jq git curl unzip python3 python3-pip ffmpeg
    pip install playwright --break-system-packages 2>/dev/null || pip install playwright || true
  else
    yum install -y jq git curl unzip python3 python3-pip ffmpeg 2>/dev/null || true
    pip install playwright 2>/dev/null || pip3 install playwright 2>/dev/null || true
  fi

  # 验证关键依赖已安装
  if ! command -v jq &>/dev/null; then
    echo "Error: jq installation failed"
    exit 1
  fi
  if ! command -v git &>/dev/null; then
    echo "Error: git installation failed"
    exit 1
  fi

  # 加载 nvm 环境
  export NVM_DIR="$HOME/.nvm"
  [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"

  # 验证 npm 可用
  if ! command -v npm &>/dev/null; then
    echo "Error: npm not found. Please install Node.js 22+ first:"
    echo "  curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash"
    echo "  source ~/.nvm/nvm.sh"
    echo "  nvm install 22"
    exit 1
  fi

  # 创建 nvm current 软链接，供 openclaw gateway 的 PATH 探测使用
  # 存量用户的旧镜像可能没有此软链接
  if [ -n "$NVM_DIR" ] && command -v node &>/dev/null; then
    ln -sf "$NVM_DIR/versions/node/$(node -v)" "$NVM_DIR/current"
  fi

  # 安装 pnpm（用于加速插件依赖安装）
  if ! command -v pnpm &>/dev/null; then
    echo "Installing pnpm..."
    npm install -g pnpm
  fi

  # 确保 pnpm 全局 bin 目录存在且在 PATH 中
  if [ -z "$PNPM_HOME" ]; then
    pnpm setup
    export PNPM_HOME="$HOME/.local/share/pnpm"
    export PATH="$PNPM_HOME:$PATH"
  fi

  # ubuntu 上 pnpm 默认为 hardlink 模式，会导致 openclaw web ui 返回 404
  pnpm config set package-import-method copy -g

  # 清理 pnpm store 缓存（避免脏缓存导致安装卡住）
  pnpm store prune 2>/dev/null || true

  # 优化网络参数（避免 registry 元数据请求慢导致卡住）
  # 使用环境变量，仅脚本执行期间生效，不污染用户全局配置
  export npm_config_fetch_retries=3
  export npm_config_fetch_timeout=120000

  # 选择最快的 npm 镜像源
  local best_reg
  best_reg=$(select_fastest "npm registry" "$REGISTRY_FILE" \
    "https://registry.npmjs.org/openclaw/latest" \
    "https://mirrors.cloud.tencent.com/npm/openclaw/latest")
  if [ -n "$best_reg" ]; then
    # 去掉测速用的路径后缀，保留 registry 根地址
    export npm_config_registry="${best_reg%%/openclaw/latest}"
  fi

  # 配置 git 使用 HTTPS 替代 SSH（避免 SSH key 权限问题）
  git config --global --unset-all url."https://github.com/".insteadOf 2>/dev/null || true
  git config --global url."https://github.com/".insteadOf "ssh://git@github.com/"
  git config --global --add url."https://github.com/".insteadOf "git@github.com:"

  # 设置 systemd 用户会话（确保 gateway 服务可以正常运行）
  echo "Setting up systemd user session..."
  loginctl enable-linger $(whoami) 2>/dev/null || true
  export XDG_RUNTIME_DIR=/run/user/$(id -u)
  mkdir -p "$XDG_RUNTIME_DIR"

  # 持久化环境变量到 /etc/profile，确保后续 SSH 登录也能用
  if ! grep -q 'XDG_RUNTIME_DIR' /etc/profile; then
    cat >> /etc/profile << 'PROFILE'
loginctl enable-linger $(whoami) 2>/dev/null || true
export XDG_RUNTIME_DIR=/run/user/$(id -u)
mkdir -p "$XDG_RUNTIME_DIR" 2>/dev/null || true
PROFILE
  fi
  
  if ! grep -q 'PNPM_HOME' /etc/profile; then
    cat >> /etc/profile << 'PROFILE'
export PNPM_HOME="$HOME/.local/share/pnpm"
export PATH="$PNPM_HOME:$PATH"
PROFILE
  fi

  # 持久化 ~/.local/bin 到 PATH（skillhub 安装目录）
  if ! grep -q '\.local/bin' /etc/profile; then
    cat >> /etc/profile << 'PROFILE'
export PATH="$HOME/.local/bin:$PATH"
PROFILE
  fi

  # 确保有足够的 swap（在安装前配置，避免 OOM）
  echo "Checking swap..."
  SWAP_SIZE=$(free -m | awk '/^Swap:/ {print $2}')
  if [ "$SWAP_SIZE" -lt 4000 ]; then
    echo "Current swap: ${SWAP_SIZE}MB, need more swap..."

    # 检查可用磁盘空间（MB）
    AVAILABLE_SPACE=$(df -m / | awk 'NR==2 {print $4}')
    echo "Available disk space: ${AVAILABLE_SPACE}MB"

    SWAP_FILE="/swapfile"
    SWAP_CREATED=false

    # 如果已存在 swapfile，先关闭并删除
    if [ -f "$SWAP_FILE" ]; then
      echo "Removing existing swap file..."
      swapoff "$SWAP_FILE" 2>/dev/null || true
      rm -f "$SWAP_FILE"
    fi

    # 尝试创建 8G swap
    if [ "$AVAILABLE_SPACE" -gt 9000 ]; then
      echo "Creating 8G swap file..."
      if fallocate -l 8G "$SWAP_FILE" 2>/dev/null || dd if=/dev/zero of="$SWAP_FILE" bs=1M count=8192 status=progress; then
        SWAP_CREATED=true
      fi
    # 尝试创建 4G swap
    elif [ "$AVAILABLE_SPACE" -gt 5000 ]; then
      echo "Not enough space for 8G, creating 4G swap file..."
      if fallocate -l 4G "$SWAP_FILE" 2>/dev/null || dd if=/dev/zero of="$SWAP_FILE" bs=1M count=4096 status=progress; then
        SWAP_CREATED=true
      fi
    else
      echo "Error: Not enough disk space for swap (need at least 5GB free)"
      exit 1
    fi

    if [ "$SWAP_CREATED" = true ]; then
      chmod 600 "$SWAP_FILE"
      mkswap "$SWAP_FILE"
      swapon "$SWAP_FILE"

      # 持久化 swap 配置
      if ! grep -q "$SWAP_FILE" /etc/fstab; then
        echo "Adding swap to /etc/fstab for persistence..."
        echo "$SWAP_FILE none swap sw 0 0" >> /etc/fstab
      fi

      echo "Swap created and enabled: $(free -m | awk '/^Swap:/ {print $2}')MB"
    else
      echo "Error: Failed to create swap file"
      exit 1
    fi
  else
    echo "Swap is sufficient: ${SWAP_SIZE}MB"
  fi

  # 设置 swappiness（与镜像一致）
  if ! grep -q 'vm.swappiness' /etc/sysctl.conf; then
    echo 'vm.swappiness=50' >> /etc/sysctl.conf
    sysctl -p 2>/dev/null || true
  fi
}

stage_install_openclaw() {
  load_env

  # 检查当前 openclaw 版本是否已经是目标版本
  CURRENT_VERSION=$(openclaw --version 2>/dev/null | head -n1 || echo "")
  if [ "$CURRENT_VERSION" = "$VERSION" ]; then
    echo "Already at version $VERSION, skipping openclaw upgrade"
    return 0
  fi

  # 判断是 update 还是全新安装
  if command -v pnpm &>/dev/null && pnpm list -g openclaw 2>/dev/null | grep -q openclaw; then
    # pnpm 已有 openclaw，尝试 update；失败则保留旧版继续
    echo "Updating openclaw to $VERSION via pnpm..."
    if ! timeout --kill-after=10 300 env CI=true pnpm update -g "openclaw@$VERSION"; then
      echo "Warning: update failed, keeping current version"
      return 0
    fi
  else
    # pnpm 中没有 openclaw，全新安装
    echo "Installing openclaw@$VERSION..."
    timeout --kill-after=10 300 env CI=true pnpm install -g "openclaw@$VERSION"

    # 安装成功后才清理历史遗留（clawdbot 旧包名 + npm 旧安装）
    echo "Cleaning up legacy installations..."
    npm uninstall -g clawdbot &>/dev/null || true
    npm uninstall -g openclaw &>/dev/null || true
    pnpm uninstall -g clawdbot &>/dev/null || true
  fi

  # 清理 /usr/local/bin/ 中可能残留的旧文件或软链接
  rm -f /usr/local/bin/openclaw /usr/local/bin/clawdbot

  # 清除 bash 命令路径缓存（旧 openclaw 路径可能已失效）
  hash -r 2>/dev/null || true

  local pnpm_bin="${PNPM_HOME:-$HOME/.local/share/pnpm}"
  if [ -x "$pnpm_bin/openclaw" ] && command -v node &>/dev/null; then
    local node_bin
    node_bin="$(dirname "$(which node)")"
    if [ -d "$node_bin" ]; then
      cat > "$node_bin/openclaw" << NVMWRAPPER
#!/bin/sh
exec "$pnpm_bin/openclaw" "\$@"
NVMWRAPPER
      chmod +x "$node_bin/openclaw"
    fi
  fi
}

stage_doctor() {
  # 停止 gateway 服务（避免与 doctor 同时操作配置文件导致竞态）
  export XDG_RUNTIME_DIR=/run/user/$(id -u)
  systemctl --user stop openclaw-gateway 2>/dev/null || true
  pkill -f "openclaw" 2>/dev/null || true
  sleep 2

  load_env

  # 预设插件列表（升级脚本管理的插件，会被清理后重装）
  local MANAGED_PLUGINS=("qqbot" "openclaw-qqbot" "ddingtalk" "wecom" "adp-openclaw" "yuanbao" "openclaw-plugin-yuanbao" "openclaw-weixin")

  # 只清理预设插件目录，保留用户自行安装的第三方插件
  echo "Cleaning managed plugin directories..."
  for pid in "${MANAGED_PLUGINS[@]}"; do
    rm -rf "$HOME/.openclaw/extensions/${pid}"
    rm -rf "$HOME/.clawdbot/extensions/${pid}"
  done

  # 清理配置文件中的插件引用（只清预设插件，保留第三方插件及其配置）
  for cfg in "$HOME/.openclaw/openclaw.json" "$HOME/.clawdbot/clawdbot.json"; do
    if [ -f "$cfg" ]; then
      # 第一步：wecom V1 flat 格式（根级 token）迁移为 V2 bot 格式
      if jq -e '.channels.wecom.token and ((.channels.wecom.bot or .channels.wecom.agent or .channels.wecom.accounts) | not)' "$cfg" > /dev/null 2>&1; then
        jq '.channels.wecom = {
          enabled: (.channels.wecom.enabled // true),
          bot: { token: .channels.wecom.token,
                 encodingAESKey: .channels.wecom.encodingAESKey,
                 streamPlaceholderContent: (.channels.wecom.streamPlaceholderContent // "正在思考..."),
                 welcomeText: (.channels.wecom.welcomeText // "你好！我是 AI 助手"),
                 dm: (.channels.wecom.dm // {"policy": "open"}) }
        }' "$cfg" > "$cfg.tmp" && mv "$cfg.tmp" "$cfg"
      fi

      # 第二步：dingtalk → ddingtalk 重命名迁移
      if jq -e '.channels.dingtalk and (.channels.ddingtalk | not)' "$cfg" > /dev/null 2>&1; then
        jq '.channels.ddingtalk = .channels.dingtalk | del(.channels.dingtalk)' "$cfg" > "$cfg.tmp" && mv "$cfg.tmp" "$cfg"
      fi

      # 第三步：只删预设插件的 entries/installs，保留第三方插件完整配置和 channels
      jq '
        .plugins.entries |= (del(.qqbot, ."openclaw-qqbot", .ddingtalk, .wecom, ."adp-openclaw", .yuanbao, ."openclaw-plugin-yuanbao", ."openclaw-weixin"))
        | .plugins.installs |= (del(.qqbot, ."openclaw-qqbot", .ddingtalk, .wecom, ."adp-openclaw", .yuanbao, ."openclaw-plugin-yuanbao", ."openclaw-weixin"))
        | if .plugins.allow then .plugins.allow |= map(select(. != "qqbot" and . != "skillhub")) else . end
      ' "$cfg" > "$cfg.tmp" && mv "$cfg.tmp" "$cfg"
    fi
  done

  # 运行 doctor 修复（迁移 ~/.clawdbot → ~/.openclaw，包括 agents/auth 等数据）
  echo "Running doctor..."
  openclaw doctor --fix --yes || true

  # doctor 之后确保 extensions 目录存在
  mkdir -p "$HOME/.openclaw/extensions"

  # 清理残留的旧配置文件和目录
  rm -f "$HOME/.openclaw/clawdbot.json"*
  rm -f "$HOME/.clawdbot/clawdbot.json"*
  rm -rf "$HOME/.clawdbot"

  echo ""
  echo "New version: $(openclaw --version)"
}

stage_gateway() {
  load_env

  echo "Installing gateway service..."
  openclaw gateway install
}

stage_clawhub() {
  load_env

  echo "Installing clawhub..."
  timeout --kill-after=10 300 env CI=true pnpm install -g clawhub
}

stage_plugins() {
  load_env

  echo "Installing plugins..."

  # 启用官方内置飞书插件
  echo "Enabling official feishu plugin..."
  openclaw plugins enable feishu || true

  # 插件列表：spec=安装参数，id=plugins list 中的 ID
  PLUGIN_SPECS=(
    "@tencent-connect/openclaw-qqbot@1.6.3|openclaw-qqbot"
    "@largezhou/ddingtalk|ddingtalk"
    "@mocrane/wecom|wecom"
    "adp-openclaw|adp-openclaw"
    "openclaw-plugin-yuanbao@1.0.3|openclaw-plugin-yuanbao"
    "@tencent-weixin/openclaw-weixin|openclaw-weixin"
  )

  local cfg="$HOME/.openclaw/openclaw.json"

  # 第一轮：检查所有插件，清理不完整的安装
  # 必须在任何 openclaw 命令之前全部清完，因为 openclaw 启动时会校验整个配置
  # 只要有一个脏条目（配置指向不存在的目录），所有命令都会报错
  PLUGINS_TO_INSTALL=()
  for entry in "${PLUGIN_SPECS[@]}"; do
    spec="${entry%%|*}"
    id="${entry##*|}"
    local plugin_dir="$HOME/.openclaw/extensions/${id}"

    local has_install_record=false
    if [ -f "$cfg" ] && command -v jq &>/dev/null; then
      if jq -e ".plugins.installs.\"${id}\"" "$cfg" > /dev/null 2>&1; then
        has_install_record=true
      fi
    fi

    if [ "$has_install_record" = true ] && [ -d "$plugin_dir" ]; then
      echo "Plugin $id already installed, skipping"
    else
      echo "Plugin $id needs install, cleaning up residue..."
      rm -rf "$plugin_dir"
      if [ -f "$cfg" ] && command -v jq &>/dev/null; then
        jq "del(.plugins.entries.\"${id}\") | del(.plugins.installs.\"${id}\")" "$cfg" > "$cfg.tmp" && mv "$cfg.tmp" "$cfg"
      fi
      PLUGINS_TO_INSTALL+=("$entry")
    fi
  done

  # 第二轮：npm pack 下载 + pnpm install --prod 装依赖
  # npm pack 支持远程包下载（pnpm pack 不支持），pnpm 装依赖更快
  for entry in "${PLUGINS_TO_INSTALL[@]}"; do
    spec="${entry%%|*}"
    id="${entry##*|}"
    local plugin_dir="$HOME/.openclaw/extensions/${id}"
    local tmp_dir
    tmp_dir=$(mktemp -d /tmp/openclaw-plugin-XXXXXX)

    echo "Installing $spec..."

    # 用 npm pack 下载 tgz
    # 如果当前镜像源下载失败，自动回退到官方源重试
    local tgz_file
    tgz_file=$(cd "$tmp_dir" && timeout --kill-after=10 120 npm pack "$spec" 2>/dev/null | tail -n1)
    if [ -z "$tgz_file" ] || [ ! -f "$tmp_dir/$tgz_file" ]; then
      echo "Mirror failed, retrying with official registry..."
      tgz_file=$(cd "$tmp_dir" && timeout --kill-after=10 120 npm pack "$spec" --registry https://registry.npmjs.org 2>/dev/null | tail -n1)
    fi
    if [ -z "$tgz_file" ] || [ ! -f "$tmp_dir/$tgz_file" ]; then
      echo "Error: failed to download $spec"
      rm -rf "$tmp_dir"
      exit 1
    fi

    # 解压到插件目录
    mkdir -p "$plugin_dir"
    tar -xzf "$tmp_dir/$tgz_file" -C "$plugin_dir" --strip-components=1
    rm -rf "$tmp_dir"

    # 用 pnpm 安装生产依赖（比 npm 快）
    if jq -e '.dependencies | length > 0' "$plugin_dir/package.json" > /dev/null 2>&1; then
      echo "Installing dependencies for $id..."
      (cd "$plugin_dir" && timeout --kill-after=10 300 env CI=true pnpm install --prod)
    fi

    # 获取版本号
    local version
    version=$(jq -r '.version // "unknown"' "$plugin_dir/package.json")

    # 写入配置（模拟 openclaw plugins install 的行为）
    if [ -f "$cfg" ] && command -v jq &>/dev/null; then
      jq ".plugins.entries.\"${id}\" = {\"enabled\": true} | .plugins.installs.\"${id}\" = {\"source\": \"npm\", \"spec\": \"${spec}\", \"installPath\": \"${plugin_dir}\", \"version\": \"${version}\", \"installedAt\": \"$(date -u +%Y-%m-%dT%H:%M:%S.000Z)\"}" "$cfg" > "$cfg.tmp" && mv "$cfg.tmp" "$cfg"
    fi

    echo "Plugin $id@$version installed"
  done
}

stage_skills() {
  # skills 全部为可选功能，任何失败都不应阻塞升级
  set +e
  load_env

  echo "Installing skills..."

  SKILLS_DIR="$HOME/.openclaw/workspace/skills"
  mkdir -p "$SKILLS_DIR"

  # 安装 skillhub CLI（国内镜像源，替代 clawhub）
  echo "Installing skillhub..."
  export PATH="$HOME/.local/bin:$PATH"
  rm -rf  ~/.openclaw/extensions/skillhub/index.ts
  rm -rf  ~/.openclaw/extensions/skillhub/openclaw.plugin.json
  timeout --kill-after=10 120 bash -c 'curl -fsSL https://skillhub-1388575217.cos.ap-guangzhou.myqcloud.com/install/install.sh | bash' || true

  SKILLS_BASE_URL="https://wry-manatee-359.convex.site/api/v1/download"
  LOCK_FILE="$HOME/.openclaw/workspace/.clawhub/lock.json"

  # 从 convex 直接下载安装（不限频）
  manual_install_skill() {
    local skill_name="$1"
    local download_url="${SKILLS_BASE_URL}?slug=${skill_name}"
    local target_dir="${SKILLS_DIR}/${skill_name}"
    local tmp_dir="/tmp/skill_install"
    local headers_file="${tmp_dir}/${skill_name}_headers.txt"

    echo "[${skill_name}] Installing via manual download..."

    mkdir -p "$tmp_dir" "$target_dir" "$(dirname "$LOCK_FILE")"

    curl -fSL --max-time 60 -D "$headers_file" -o "${tmp_dir}/${skill_name}.zip" "$download_url"
    if [ $? -ne 0 ]; then
      echo "[${skill_name}] Download failed!"
      return 1
    fi

    local filename
    filename=$(grep -i 'Content-Disposition' "$headers_file" | sed -n 's/.*filename=["]*\([^"]*\)["]*\r*/\1/p')
    local version="unknown"
    if [ -n "$filename" ]; then
      version=$(echo "$filename" | sed "s/^${skill_name}-//;s/\.zip$//")
    fi

    unzip -o "${tmp_dir}/${skill_name}.zip" -d "$target_dir"
    if [ $? -ne 0 ]; then
      echo "[${skill_name}] Extract failed!"
      return 1
    fi

    rm -f "${tmp_dir}/${skill_name}.zip" "$headers_file"

    local installed_at=$(date +%s%3N)
    if [ -f "$LOCK_FILE" ]; then
      jq --arg name "$skill_name" \
         --arg ver "$version" \
         --argjson ts "$installed_at" \
         '.skills[$name] = {"version": $ver, "installedAt": $ts}' \
         "$LOCK_FILE" > "$LOCK_FILE.tmp" && mv "$LOCK_FILE.tmp" "$LOCK_FILE"
    else
      jq -n --arg name "$skill_name" \
            --arg ver "$version" \
            --argjson ts "$installed_at" \
            '{"version": 1, "skills": {($name): {"version": $ver, "installedAt": $ts}}}' \
            > "$LOCK_FILE"
    fi

    echo "[${skill_name}] Installed v${version}"
  }

  # 安装单个 skill：skillhub → manual_install_skill → clawhub
  install_skill() {
    local slug="$1"
    # 优先 skillhub（国内 COS 源）
    if command -v skillhub >/dev/null 2>&1 && timeout --kill-after=10 120 skillhub --dir "$SKILLS_DIR" install --force "$slug"; then
      return 0
    fi
    echo "skillhub failed for $slug, falling back to manual download..."
    # 回退到 convex 直接下载（不限频）
    if manual_install_skill "$slug"; then
      return 0
    fi
    echo "manual download failed for $slug, falling back to clawhub..."
    # 最后兜底 clawhub（带重试，避免限频）
    if command -v clawhub >/dev/null 2>&1; then
      local attempt=1 max_retries=3 retry_delay=10
      while [ $attempt -le $max_retries ]; do
        echo "clawhub install $slug (attempt $attempt/$max_retries)..."
        if timeout --kill-after=10 120 clawhub install --force "$slug"; then
          return 0
        fi
        echo "clawhub failed, waiting ${retry_delay}s before retry..."
        sleep $retry_delay
        attempt=$((attempt + 1))
      done
    fi
    echo "Error: all install methods failed for $slug" >&2
    return 1
  }

  install_skill "tavily-search" || true
  install_skill "summarize" || true
  install_skill "agent-browser" || true
  install_skill "find-skills" || true
  install_skill "github" || true
  install_skill "obsidian" || true
  install_skill "weather" || true

  # agent-browser 额外依赖（npm 全局包 + playwright 浏览器 + 中文字体）
  echo "Installing agent-browser dependencies..."
  timeout --kill-after=10 300 env CI=true pnpm install -g agent-browser || true

  # 安装 Chromium 浏览器（Playwright 需要）
  local pw_dir="$HOME/.cache/ms-playwright/chromium-1208"
  if [ -f "$pw_dir/INSTALLATION_COMPLETE" ]; then
    echo "Chromium already installed, skipping"
  else
    local cos_url="https://blueprint-mirrors-1325194254.cos.ap-guangzhou.myqcloud.com/openclaw/chromium-1208.tar.gz"

    mkdir -p "$HOME/.cache/ms-playwright"
    echo "Installing Chromium from COS..."
    if ! curl -fSL --max-time 120 "$cos_url" | tar -xzf - -C "$HOME/.cache/ms-playwright/"; then
      echo "COS download failed, falling back to official CDN..."
      yes | timeout --kill-after=10 300 agent-browser install --with-deps || true
    fi
  fi

  # 安装系统依赖（Chromium 运行所需的共享库 + 中文字体）
  echo "Installing system dependencies..."
  if command -v apt-get &>/dev/null; then
    timeout --kill-after=10 300 apt-get -o DPkg::Lock::Timeout=120 install -y libxcb-shm0 libx11-xcb1 libx11-6 libxcb1 libxext6 libxrandr2 \
      libxcomposite1 libxcursor1 libxdamage1 libxfixes3 libxi6 libgtk-3-0 libpangocairo-1.0-0 \
      libpango-1.0-0 libatk1.0-0 libcairo-gobject2 libcairo2 libgdk-pixbuf-2.0-0 libxrender1 \
      libasound2t64 libfreetype6 libfontconfig1 libdbus-1-3 libnss3 libnspr4 libatk-bridge2.0-0 \
      libdrm2 libxkbcommon0 libatspi2.0-0 libcups2 libxshmfence1 libgbm1 libglib2.0-0 \
      fonts-noto-cjk 2>/dev/null || true
  fi
  dnf install -y google-noto-sans-cjk-fonts 2>/dev/null || yum install -y google-noto-sans-cjk-fonts 2>/dev/null || true

  # 恢复 set -e
  set -e
}

stage_finalize() {
  load_env

  # 创建 wrapper 脚本到 /usr/local/bin/（pnpm 全局 bin 不在默认 PATH 里）
  echo "Creating symlinks..."
  local pnpm_bin="${PNPM_HOME:-$HOME/.local/share/pnpm}"
  if [ -x "$pnpm_bin/openclaw" ]; then
    for cmd in openclaw clawdbot; do
      cat > "/usr/local/bin/$cmd" << WRAPPER
#!/bin/sh
export XDG_RUNTIME_DIR=\${XDG_RUNTIME_DIR:-/run/user/\$(id -u)}
exec "$pnpm_bin/openclaw" "\$@"
WRAPPER
      chmod +x "/usr/local/bin/$cmd"
    done
  fi

  echo "$BLUEPRINT_ID" > /etc/lighthouse/blueprint_id

  # 升级后 tools.profile 从 messaging 升级为 full（解锁完整工具权限）
  local cfg="$HOME/.openclaw/openclaw.json"
  if [ -f "$cfg" ] && command -v jq &>/dev/null; then
    local current_profile
    current_profile=$(jq -r '.tools.profile // empty' "$cfg" 2>/dev/null)
    if [ "$current_profile" = "messaging" ]; then
      echo "Upgrading tools.profile from messaging to full..."
      jq '.tools.profile = "full"' "$cfg" > "$cfg.tmp" && mv "$cfg.tmp" "$cfg"
    fi
  fi

  jq '.gateway.mode = "local"' ~/.openclaw/openclaw.json > /tmp/oc.tmp && mv /tmp/oc.tmp ~/.openclaw/openclaw.json

  # 确保 openclaw-weixin 在 plugins.allow 白名单中
  jq 'if .plugins.allow and (["openclaw-weixin"] - .plugins.allow | length > 0) then .plugins.allow += ["openclaw-weixin"] else . end' ~/.openclaw/openclaw.json > /tmp/oc.tmp && mv /tmp/oc.tmp ~/.openclaw/openclaw.json

  # 重启 gateway，确保加载最新配置（插件、tools.profile 等可能在后续阶段被修改）
  echo "Restarting gateway..."
  systemctl --user restart openclaw-gateway 2>/dev/null || true
}

# ========== 执行阶段 ==========

run_stage 1 "环境准备"     stage_env_setup
run_stage 2 "OpenClaw安装" stage_install_openclaw
run_stage 3 "Doctor修复"   stage_doctor
run_stage 4 "Gateway安装"  stage_gateway
run_stage 5 "Clawhub安装"  stage_clawhub
run_stage 6 "插件安装"     stage_plugins
run_stage 7 "Skills安装"   stage_skills
run_stage 8 "收尾"         stage_finalize

# ========== 全部完成 ==========

# 先杀 watchdog，避免 TAT 等子进程导致假超时
cleanup_watchdog

rm -f "$STAGE_FILE"
rm -f "$REGISTRY_FILE"
echo "COMPLETED:$VERSION" > /etc/lighthouse/upgrade_status

TOTAL=$((SECONDS - SCRIPT_START))
echo "总耗时=${TOTAL}s" >> "$TIMER_LOG"
echo ""
echo "=== Upgrade Complete in ${TOTAL}s ==="
