k8s_mig

k8s_mig

需要将老k8s集群(k8sv1.19.16 CONTAINER-RUNTIME:docker://20.10.24,apiVersion:apps/v1beta2) 命名空间prod里面所有业务,迁移到新k8sv1.24.6 CONTAINER-RUNTIME:docker://25.0.5(已部署完成)上
以下资源是否包含所有需要考虑的完整资源
deployments,statefulsets,daemonsets,services,configmaps,ingresses,jobs,cronjobs,serviceaccounts,roles,rolebindings,networkpolicies,secret,pvc
以上资源在新集群的apply正确顺序应该是什么

迁移

  1. 运行导出脚本
  2. 运行清洗脚本

清洗

# 使用 yq 或自定义脚本清理 YAML
cat prod-backup-raw.yaml | yq eval '
  del(.items[].status) |
  del(.items[].metadata.uid) |
  del(.items[].metadata.resourceVersion) |
  del(.items[].metadata.creationTimestamp) |
  del(.items[].metadata.selfLink) |
  del(.items[].metadata.annotations."kubectl.kubernetes.io/last-applied-configuration") |
  del(.items[].spec.clusterIP) |  # 让新集群重新分配
  del(.items[].spec.nodeName)     # 移除节点绑定
' - > prod-clean.yaml

# 检查并更新 API 版本
# - extensions/v1beta1 Ingress → networking.k8s.io/v1
# - apps/v1beta1/Deployment → apps/v1
# - batch/v1beta1 CronJob → batch/v1 (1.21+)

参考命令

查看所有deploy涉及的镜像文件
kubectl -n prod get deployments -o jsonpath='{range .items[*]}{range .spec.template.spec.containers[*]}{.image}{"\n"}{end}{end}' | sort | uniq
提取仓库地址
kubectl -n prod get pods -o jsonpath='{range .items[*]}{range .spec.containers[*]}{.image}{"\n"}{end}{end}' | \
>   awk -F '/' '{if (NF>1) print $1; else print "docker.io"}' | \
>   sort | uniq

1. 查看prod命名空间的镜像拉取密钥(Secret)
kubectl -n prod get secrets | grep dockerconfigjson

2. 查看具体密钥的仓库配置(替换为实际Secret名称)
kubectl -n prod get secret harbor-dianbai-secret -o jsonpath='{.data.\.dockerconfigjson}' | base64 -d
3. 登录仓库
docker login harbor-dianbai.igo.com -u 用户名 -p 密码
4. 拉取镜像
cat prod_mir/mir.txt | grep -v "^$" | while read img; do echo "处理镜像:$img"; tar_name=$(echo $img | sed 's/[\/:.]/_/g').tar; if docker pull $img; then docker save $img -o prod_mir/$tar_name; echo "✅ $img 保存成功 → prod_mir/$tar_name"; else echo "❌ $img 拉取失败!"; fi; done
5. 配置仓库
5.1 docker仓库
登录后,Docker 会生成认证文件 ~/.docker/config.json,需将其同步到 /etc/docker/ 目录(全局生效)
docker login harbor-dianbai.igo.com -u 仓库用户名 -p 仓库密码 
mkdir -p /etc/docker
cp ~/.docker/config.json /etc/docker/
systemctl restart docker
 验证(无需登录即可拉取镜像)
docker pull harbor-dianbai.igo.com/mom/6x575i8_prod:r116

5.2 K8s 配置镜像拉取密钥
Docker 登录仅对节点生效,K8s Pod 调度时需要集群级的镜像拉取密钥(ImagePullSecret),才能让 Pod 拉取私有仓库镜像
先建立个密钥
kubectl create secret docker-registry harbor-secret \
  --namespace=prod \
  --docker-server=harbor-dianbai.igo.com \
  --docker-username=仓库用户名 \
  --docker-password=仓库密码 \
  --docker-email=admin@example.com 任意邮箱
创建命名空间默认拉取密钥
kubectl patch serviceaccount default -n prod -p '{"imagePullSecrets": [{"name": "harbor-secret"}]}'
或者创建集群默认密钥,所有ns生效
kubectl edit serviceaccount default -n kube-system

导出脚本

#!/bin/bash
#===============================================================================
#
#          FILE: k8s-export.sh
#
#         USAGE: ./k8s-export.sh [NAMESPACE]
#
#   DESCRIPTION: 导出Kubernetes命名空间资源到结构化目录
#                - 原生K8s资源按类型分目录,每个实例独立文件
#                - CRD资源单独放在crd目录,仅导出指定命名空间实际使用的CRD
#                - 生成资源清单文件list.txt,包含数量和名称列表
#                - 脚本执行日志保存到out$date.log
#
#       OPTIONS: NAMESPACE - 目标命名空间,默认为prod
#  REQUIREMENTS: kubectl, 且当前context能访问源集群
#          BUGS: ---
#         NOTES: 排除PV/SC等集群级资源和系统自动生成资源(如endpoints)
#        AUTHOR:
#       VERSION: 1.1
#       CREATED: 2024
#      REVISION: ---
#===============================================================================

set -euo pipefail

# 配置
NAMESPACE="${1:-prod}"
OUTPUT_DIR="./${NAMESPACE}_out"
RESOURCES_DIR="$OUTPUT_DIR/resources"
DATE_STR=$(date '+%Y%m%d_%H%M%S')
LOG_FILE="$OUTPUT_DIR/out${DATE_STR}.log"

# 颜色输出
GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m'

# 定义需要导出的资源(用户定义,非系统生成)
RESOURCES="
    configmaps
    secrets
    serviceaccounts
    roles
    rolebindings
    persistentvolumeclaims
    deployments
    statefulsets
    daemonsets
    jobs
    cronjobs
    services
    ingresses
    networkpolicies
    poddisruptionbudgets
    limitranges
    resourcequotas
    horizontalpodautoscalers
"

# 日志函数:同时输出到终端和日志文件
log() {
    echo "$@"
    echo "$@" >> "$LOG_FILE"
}

log_color() {
    local color=$1
    shift
    echo -e "${color}$@${NC}"
    echo "$@" >> "$LOG_FILE"
}

# 创建目录结构
mkdir -p "$OUTPUT_DIR/crd"

# 初始化日志
echo "========================================" > "$LOG_FILE"
echo "K8s资源导出工具 - 执行日志" >> "$LOG_FILE"
echo "命名空间: $NAMESPACE" >> "$LOG_FILE"
echo "输出目录: $OUTPUT_DIR" >> "$LOG_FILE"
echo "开始时间: $(date '+%Y-%m-%d %H:%M:%S')" >> "$LOG_FILE"
echo "========================================" >> "$LOG_FILE"
echo "" >> "$LOG_FILE"

log_color "$GREEN" "========================================"
log_color "$GREEN" "K8s资源导出工具"
log "命名空间: $NAMESPACE"
log "输出目录: $OUTPUT_DIR"
log "日志文件: $LOG_FILE"
log_color "$GREEN" "========================================"
log ""

# 生成list.txt文件头
echo "# 资源清单 - 命名空间: $NAMESPACE" > "$OUTPUT_DIR/list.txt"
echo "# 生成时间: $(date '+%Y-%m-%d %H:%M:%S')" >> "$OUTPUT_DIR/list.txt"
echo "" >> "$OUTPUT_DIR/list.txt"

# 导出原生K8s资源
log_color "$YELLOW" "[1/2] 导出原生K8s资源..."
for res in $RESOURCES; do
    # 检查是否有资源
    names=$(kubectl get "$res" -n "$NAMESPACE" --ignore-not-found --no-headers 2>/dev/null | awk '{print $1}' || true)

    if [ -z "$names" ]; then
        continue
    fi

    # 创建资源类型目录
    res_dir="$RESOURCES_DIR/$res"
    mkdir -p "$res_dir"

    count=0
    name_list=""
    for name in $names; do
        kubectl get "$res" "$name" -n "$NAMESPACE" -o yaml > "$res_dir/${name}.yaml" 2>/dev/null
        ((count++)) || true
        name_list="$name_list $name"
    done

    # 写入list.txt:资源类型: 数量 name1 name2 name3...
    echo "$res: $count$name_list" >> "$OUTPUT_DIR/list.txt"
    log_color "$GREEN" "  ✓ $res: $count个实例$name_list"
done

# 导出CRD资源(仅导出该命名空间实际使用的)
log ""
log_color "$YELLOW" "[2/2] 导出CRD资源..."
crd_count=0
for crd in $(kubectl get crd --no-headers 2>/dev/null | awk '{print $1}'); do
    short=$(echo "$crd" | cut -d. -f1)

    # 检查该CRD在目标命名空间是否有实例
    names=$(kubectl get "$short" -n "$NAMESPACE" --ignore-not-found --no-headers 2>/dev/null | awk '{print $1}' || true)

    if [ -z "$names" ]; then
        continue
    fi

    # 创建CRD类型子目录
    crd_type_dir="$OUTPUT_DIR/crd/$short"
    mkdir -p "$crd_type_dir"

    instance_count=0
    name_list=""
    for name in $names; do
        kubectl get "$short" "$name" -n "$NAMESPACE" -o yaml > "$crd_type_dir/${name}.yaml" 2>/dev/null
        ((instance_count++)) || true
        name_list="$name_list $name"
    done

    echo "crd/$short: $instance_count$name_list" >> "$OUTPUT_DIR/list.txt"
    log_color "$GREEN" "  ✓ CRD $short: $instance_count个实例$name_list"
    ((crd_count++)) || true
done

# 完成统计
log ""
log_color "$GREEN" "========================================"
log "导出完成!"
log "目录结构:"
log "  $OUTPUT_DIR/"
log "  ├── list.txt          # 资源清单"
log "  ├── out${DATE_STR}.log    # 执行日志"
log "  ├── resources/        # 原生K8s资源"
for res in $RESOURCES; do
    if [ -d "$RESOURCES_DIR/$res" ] 2>/dev/null; then
        log "  │   └── $res/"
    fi
done
if [ $crd_count -gt 0 ]; then
    log "  └── crd/              # CRD资源 ($crd_count种类型)"
fi
log ""
log "资源清单: $OUTPUT_DIR/list.txt"
log "执行日志: $LOG_FILE"
log_color "$GREEN" "========================================"

# 追加结束时间到日志
echo "" >> "$LOG_FILE"
echo "结束时间: $(date '+%Y-%m-%d %H:%M:%S')" >> "$LOG_FILE"

清洗脚本

#!/bin/bash
#===============================================================================
#
#          FILE: k8s-clean.sh
#
#         USAGE: ./k8s-clean.sh [EXPORT_DIR]
#
#   DESCRIPTION: 清洗K8s导出资源,移除集群特定字段,自动转换废弃API版本
#                - 读取导出目录中的资源文件
#                - 清理后输出到新目录,保留原文件不变
#                - 自动转换 extensions/v1beta1 Ingress 为 networking.k8s.io/v1
#
#       OPTIONS: EXPORT_DIR - 导出目录路径,默认为./prod_out
#  REQUIREMENTS: yq (YAML处理器)
#          BUGS: ---
#         NOTES: 不会修改原文件,清洗结果输出到${EXPORT_DIR}_cleaned
#        AUTHOR: 
#       VERSION: 1.2
#       CREATED: 2024
#===============================================================================

set -euo pipefail

# 配置
INPUT_DIR="${1:-./prod_out}"
OUTPUT_DIR="${INPUT_DIR}_cleaned"
DATE_STR=$(date '+%Y%m%d_%H%M%S')
LOG_FILE="${OUTPUT_DIR}/clean${DATE_STR}.log"

# 检查依赖
if ! command -v yq &> /dev/null; then
    echo "错误: 需要安装 yq (https://github.com/mikefarah/yq)"
    exit 1
fi

# 颜色输出
GREEN='\033[0;32m'; YELLOW='\033[1;33m'; RED='\033[0;31m'; BLUE='\033[0;34m'; NC='\033[0m'

# 日志函数
log() {
    echo "$@"
    echo "$@" >> "$LOG_FILE" 2>/dev/null || true
}

log_color() {
    local color=$1
    shift
    echo -e "${color}$@${NC}"
    echo "$@" >> "$LOG_FILE" 2>/dev/null || true
}

# 创建输出目录
mkdir -p "$OUTPUT_DIR"

# 初始化日志
echo "========================================" > "$LOG_FILE"
echo "K8s资源清洗工具 - 执行日志" >> "$LOG_FILE"
echo "输入目录: $INPUT_DIR" >> "$LOG_FILE"
echo "输出目录: $OUTPUT_DIR" >> "$LOG_FILE"
echo "开始时间: $(date '+%Y-%m-%d %H:%M:%S')" >> "$LOG_FILE"
echo "========================================" >> "$LOG_FILE"
echo "" >> "$LOG_FILE"

log_color "$GREEN" "========================================"
log "K8s资源清洗工具"
log "输入: $INPUT_DIR"
log "输出: $OUTPUT_DIR"
log "日志: $LOG_FILE"
log_color "$GREEN" "========================================"
log ""

# 基础清理表达式(所有资源通用)
BASE_CLEAN='
    # metadata集群特定字段
    del(.metadata.uid) |
    del(.metadata.resourceVersion) |
    del(.metadata.creationTimestamp) |
    del(.metadata.selfLink) |
    del(.metadata.managedFields) |
    del(.metadata.generation) |
    del(.metadata.annotations."kubectl.kubernetes.io/last-applied-configuration") |

    # Rancher特定注解
    del(.metadata.annotations."field.cattle.io/publicEndpoints") |
    del(.metadata.annotations."cattle.io/timestamp") |
    del(.metadata.annotations."field.cattle.io/ports") |
    del(.metadata.annotations."field.cattle.io/ingressId") |
    del(.metadata.annotations."field.cattle.io/serviceId") |
    del(.metadata.annotations."lifecycle.cattle.io/create.*") |
    del(.metadata.annotations."deployment.kubernetes.io/revision") |
    del(.metadata.annotations."deployment.kubernetes.io/revision-history") |
    del(.metadata.annotations."deployment.kubernetes.io/desired-replicas") |
    del(.metadata.annotations."deployment.kubernetes.io/max-replicas") |

    # 状态字段
    del(.status) |

    # spec中的运行时信息
    del(.spec.nodeName) |
    del(.spec.template.spec.nodeName) |

    # template中的metadata清理(针对Deployment/StatefulSet/DaemonSet/Job等)
    del(.spec.template.metadata.creationTimestamp) |
    del(.spec.template.metadata.uid) |
    del(.spec.template.metadata.resourceVersion) |

    # template中的Rancher注解
    del(.spec.template.metadata.annotations."cattle.io/timestamp") |
    del(.spec.template.metadata.annotations."field.cattle.io/ports") |
    del(.spec.template.metadata.annotations."field.cattle.io/publicEndpoints") |
    del(.spec.template.metadata.annotations."field.cattle.io/ingressId") |
    del(.spec.template.metadata.annotations."field.cattle.io/serviceId")
'

# 转换 extensions/v1beta1 Ingress 为 networking.k8s.io/v1
convert_ingress() {
    local input_file=$1
    local output_file=$2

    # 检查是否需要转换
    if ! grep -q "apiVersion: extensions/v1beta1" "$input_file" 2>/dev/null; then
        # 不需要转换,直接基础清理
        yq eval "$BASE_CLEAN" "$input_file" > "$output_file"
        return 0
    fi

    log_color "$YELLOW" "    检测到旧版Ingress API,自动转换为 networking.k8s.io/v1"

    # 使用yq进行复杂转换
    yq eval '
        # 修改API版本
        .apiVersion = "networking.k8s.io/v1" |

        # 基础清理
        '"$BASE_CLEAN"' |

        # 转换spec.rules[].http.paths[].backend
        # 旧格式: backend: {serviceName: "name", servicePort: 80}
        # 新格式: backend: {service: {name: "name", port: {number: 80}}}

        # 使用with来安全地遍历paths
        with(.spec.rules;
            . |= map(
                with(.http.paths;
                    . |= map(
                        .backend = {
                            "service": {
                                "name": .backend.serviceName,
                                "port": {
                                    "number": (.backend.servicePort | tonumber)
                                }
                            }
                        } |
                        del(.backend.serviceName) |
                        del(.backend.servicePort) |

                        # 确保pathType存在(默认Prefix)
                        .pathType = (.pathType // "Prefix")
                    )
                )
            )
        ) |

        # 移除不兼容的注解(如果还有其他)
        del(.metadata.annotations."kubernetes.io/ingress.class") |

        # 添加ingressClassName(从注解中提取,如果不存在)
        (.spec.ingressClassName = (.metadata.annotations."kubernetes.io/ingress.class" // "nginx")) |
        del(.metadata.annotations."kubernetes.io/ingress.class")
    ' "$input_file" > "$output_file"
}

# 清洗单个文件
clean_file() {
    local input_file=$1
    local output_file=$2
    local resource_type=$3

    # 特殊处理Ingress(需要API版本转换)
    if [[ "$resource_type" == "ingresses" ]]; then
        if convert_ingress "$input_file" "$output_file"; then
            return 0
        else
            return 1
        fi
    fi

    # 根据资源类型添加特定清理
    local clean_expr="$BASE_CLEAN"

    case "$resource_type" in
        services)
            clean_expr="$clean_expr | del(.spec.clusterIP) | del(.spec.clusterIPs) | del(.spec.ipFamily) | del(.spec.ipFamilies)"
            ;;
        persistentvolumeclaims)
            clean_expr="$clean_expr | del(.spec.volumeName) | del(.metadata.annotations.\"pv.kubernetes.io/bind-completed\") | del(.metadata.annotations.\"pv.kubernetes.io/bound-by-controller\")"
            ;;
        deployments|statefulsets|daemonsets)
            # 工作负载已在BASE_CLEAN中处理template
            clean_expr="$clean_expr"
            ;;
        jobs|cronjobs)
            # Job/CronJob也有template,但结构略有不同
            clean_expr="$clean_expr | del(.spec.jobTemplate.spec.template.metadata.creationTimestamp) | del(.spec.jobTemplate.spec.template.metadata.uid) | del(.spec.jobTemplate.spec.template.metadata.resourceVersion)"
            ;;
        configmaps|secrets)
            # 基础清理即可
            clean_expr="$clean_expr"
            ;;
        *)
            # 其他资源(包括CRD)基础清理
            clean_expr="$clean_expr"
            ;;
    esac

    # 执行清理
    if yq eval "$clean_expr" "$input_file" > "$output_file" 2>/dev/null; then
        return 0
    else
        return 1
    fi
}

# 遍历并清洗
total_files=0
success_files=0
converted_ingress=0

# 处理resources目录
if [ -d "$INPUT_DIR/resources" ]; then
    log_color "$YELLOW" "[1/2] 清洗原生K8s资源..."

    for res_dir in "$INPUT_DIR/resources"/*/; do
        [ -d "$res_dir" ] || continue

        res_name=$(basename "$res_dir")
        mkdir -p "$OUTPUT_DIR/resources/$res_name"

        file_count=0
        for yaml_file in "$res_dir"/*.yaml; do
            [ -f "$yaml_file" ] || continue

            filename=$(basename "$yaml_file")
            output_file="$OUTPUT_DIR/resources/$res_name/$filename"

            if clean_file "$yaml_file" "$output_file" "$res_name"; then
                ((success_files++)) || true
                if [[ "$res_name" == "ingresses" ]] && grep -q "apiVersion: extensions/v1beta1" "$yaml_file" 2>/dev/null; then
                    ((converted_ingress++)) || true
                fi
            else
                log_color "$RED" "  ✗ 失败: $res_name/$filename"
            fi
            ((total_files++)) || true
            ((file_count++)) || true
        done

        if [ $file_count -gt 0 ]; then
            log_color "$GREEN" "  ✓ $res_name: $file_count个文件"
        fi
    done
fi

# 处理crd目录
if [ -d "$INPUT_DIR/crd" ]; then
    log ""
    log_color "$YELLOW" "[2/2] 清洗CRD资源..."

    for crd_dir in "$INPUT_DIR/crd"/*/; do
        [ -d "$crd_dir" ] || continue

        crd_name=$(basename "$crd_dir")
        mkdir -p "$OUTPUT_DIR/crd/$crd_name"

        file_count=0
        for yaml_file in "$crd_dir"/*.yaml; do
            [ -f "$yaml_file" ] || continue

            filename=$(basename "$yaml_file")
            output_file="$OUTPUT_DIR/crd/$crd_name/$filename"

            if clean_file "$yaml_file" "$output_file" "crd"; then
                ((success_files++)) || true
            else
                log_color "$RED" "  ✗ 失败: crd/$crd_name/$filename"
            fi
            ((total_files++)) || true
            ((file_count++)) || true
        done

        if [ $file_count -gt 0 ]; then
            log_color "$GREEN" "  ✓ $crd_name: $file_count个文件"
        fi
    done
fi

# 复制list.txt
if [ -f "$INPUT_DIR/list.txt" ]; then
    cp "$INPUT_DIR/list.txt" "$OUTPUT_DIR/list.txt"
    log ""
    log "已复制 list.txt"
fi

# 完成
log ""
log_color "$GREEN" "========================================"
log "清洗完成!"
log "总文件: $total_files, 成功: $success_files"
if [ $converted_ingress -gt 0 ]; then
    log_color "$BLUE" "API转换: $converted_ingress个Ingress从extensions/v1beta1转为networking.k8s.io/v1"
fi
log "输出目录: $OUTPUT_DIR"
log ""
log "清洗内容:"
log "  [系统字段] uid/resourceVersion/creationTimestamp/selfLink/managedFields/generation/status"
log "  [Rancher] field.cattle.io/*, cattle.io/timestamp, deployment.kubernetes.io/revision"
log "  [Service] clusterIP/clusterIPs"
log "  [PVC] volumeName"
log "  [Ingress] 自动转换extensions/v1beta1→networking.k8s.io/v1, backend格式转换"
log ""
log "原文件保留: $INPUT_DIR (未修改)"
log_color "$GREEN" "========================================"

echo "" >> "$LOG_FILE"
echo "结束时间: $(date '+%Y-%m-%d %H:%M:%S')" >> "$LOG_FILE"

igozhang 2021