2024-05-18

Linux自动化部署：构建高效运维流水线

自动化部署是现代DevOps实践的核心组成部分，通过自动化工具和流程，可以显著提升部署效率、减少人为错误、确保部署一致性。本文将深入探讨Linux环境下的自动化部署方案和最佳实践。

Shell脚本自动化部署

1. 基础部署脚本

#!/bin/bash

# 应用自动化部署脚本

APP_NAME="myapp"
APP_VERSION="1.0.0"
DEPLOY_DIR="/opt/apps"
BACKUP_DIR="/opt/backups"
LOG_FILE="/var/log/deploy.log"

# 日志函数
function log() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a $LOG_FILE
}

# 错误处理
function error_exit() {
    log "ERROR: $1"
    exit 1
}

# 备份当前版本
function backup_current_version() {
    log "备份当前版本..."
    
    if [ -d "$DEPLOY_DIR/$APP_NAME" ]; then
        local backup_name="${APP_NAME}_$(date +%Y%m%d_%H%M%S)"
        mkdir -p $BACKUP_DIR
        
        cp -r "$DEPLOY_DIR/$APP_NAME" "$BACKUP_DIR/$backup_name" || error_exit "备份失败"
        log "备份完成: $BACKUP_DIR/$backup_name"
    fi
}

# 下载新版本
function download_new_version() {
    log "下载新版本 $APP_VERSION..."
    
    local download_url="https://releases.example.com/${APP_NAME}-${APP_VERSION}.tar.gz"
    local temp_file="/tmp/${APP_NAME}-${APP_VERSION}.tar.gz"
    
    wget -O "$temp_file" "$download_url" || error_exit "下载失败"
    
    # 验证文件完整性
    if [ ! -f "$temp_file" ]; then
        error_exit "下载文件不存在"
    fi
    
    log "下载完成: $temp_file"
}

# 部署新版本
function deploy_new_version() {
    log "部署新版本..."
    
    local temp_file="/tmp/${APP_NAME}-${APP_VERSION}.tar.gz"
    local temp_dir="/tmp/${APP_NAME}-deploy"
    
    # 创建临时目录
    mkdir -p "$temp_dir"
    
    # 解压文件
    tar -xzf "$temp_file" -C "$temp_dir" || error_exit "解压失败"
    
    # 停止服务
    systemctl stop $APP_NAME 2>/dev/null
    
    # 部署文件
    mkdir -p "$DEPLOY_DIR"
    rm -rf "$DEPLOY_DIR/$APP_NAME"
    mv "$temp_dir/$APP_NAME" "$DEPLOY_DIR/" || error_exit "部署失败"
    
    # 设置权限
    chown -R app:app "$DEPLOY_DIR/$APP_NAME"
    chmod +x "$DEPLOY_DIR/$APP_NAME/bin/*"
    
    # 清理临时文件
    rm -rf "$temp_dir" "$temp_file"
    
    log "部署完成"
}

# 启动服务
function start_service() {
    log "启动服务..."
    
    systemctl start $APP_NAME || error_exit "服务启动失败"
    systemctl enable $APP_NAME
    
    # 等待服务启动
    sleep 5
    
    if systemctl is-active --quiet $APP_NAME; then
        log "服务启动成功"
    else
        error_exit "服务启动失败"
    fi
}

# 健康检查
function health_check() {
    log "执行健康检查..."
    
    local health_url="http://localhost:8080/health"
    local max_attempts=30
    local attempt=1
    
    while [ $attempt -le $max_attempts ]; do
        if curl -f -s "$health_url" > /dev/null; then
            log "健康检查通过"
            return 0
        fi
        
        log "健康检查失败，重试 $attempt/$max_attempts"
        sleep 10
        ((attempt++))
    done
    
    error_exit "健康检查失败"
}

# 主部署流程
function main() {
    log "开始部署 $APP_NAME v$APP_VERSION"
    
    backup_current_version
    download_new_version
    deploy_new_version
    start_service
    health_check
    
    log "部署完成！"
}

# 执行部署
main

2. 回滚脚本

#!/bin/bash

# 应用回滚脚本

APP_NAME="myapp"
BACKUP_DIR="/opt/backups"
DEPLOY_DIR="/opt/apps"
LOG_FILE="/var/log/rollback.log"

function log() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a $LOG_FILE
}

function rollback_to_version() {
    local backup_version=$1
    
    if [ -z "$backup_version" ]; then
        log "请指定要回滚的版本"
        list_available_backups
        exit 1
    fi
    
    local backup_path="$BACKUP_DIR/$backup_version"
    
    if [ ! -d "$backup_path" ]; then
        log "备份版本不存在: $backup_path"
        exit 1
    fi
    
    log "回滚到版本: $backup_version"
    
    # 停止服务
    systemctl stop $APP_NAME
    
    # 备份当前版本
    if [ -d "$DEPLOY_DIR/$APP_NAME" ]; then
        mv "$DEPLOY_DIR/$APP_NAME" "$DEPLOY_DIR/${APP_NAME}_rollback_$(date +%Y%m%d_%H%M%S)"
    fi
    
    # 恢复备份版本
    cp -r "$backup_path" "$DEPLOY_DIR/$APP_NAME"
    
    # 启动服务
    systemctl start $APP_NAME
    
    log "回滚完成"
}

function list_available_backups() {
    log "可用的备份版本:"
    ls -la $BACKUP_DIR/ | grep $APP_NAME
}

# 执行回滚
if [ $# -eq 0 ]; then
    list_available_backups
else
    rollback_to_version $1
fi

Ansible自动化部署

1. Ansible Playbook

---
# 应用部署 Playbook
- name: Deploy Application
  hosts: app_servers
  become: yes
  vars:
    app_name: myapp
    app_version: "{{ version | default('latest') }}"
    deploy_dir: /opt/apps
    app_user: app
    app_group: app
    
  tasks:
    - name: Create application user
      user:
        name: "{{ app_user }}"
        group: "{{ app_group }}"
        system: yes
        shell: /bin/bash
        home: "{{ deploy_dir }}"
        
    - name: Create directories
      file:
        path: "{{ item }}"
        state: directory
        owner: "{{ app_user }}"
        group: "{{ app_group }}"
        mode: '0755'
      loop:
        - "{{ deploy_dir }}"
        - "{{ deploy_dir }}/{{ app_name }}"
        - /var/log/{{ app_name }}
        - /etc/{{ app_name }}
        
    - name: Stop application service
      systemd:
        name: "{{ app_name }}"
        state: stopped
      ignore_errors: yes
      
    - name: Backup current version
      archive:
        path: "{{ deploy_dir }}/{{ app_name }}"
        dest: "{{ deploy_dir }}/{{ app_name }}_backup_{{ ansible_date_time.epoch }}.tar.gz"
      when: ansible_stat.stat.exists
      
    - name: Download application package
      get_url:
        url: "https://releases.example.com/{{ app_name }}-{{ app_version }}.tar.gz"
        dest: "/tmp/{{ app_name }}-{{ app_version }}.tar.gz"
        mode: '0644'
        
    - name: Extract application
      unarchive:
        src: "/tmp/{{ app_name }}-{{ app_version }}.tar.gz"
        dest: "{{ deploy_dir }}"
        owner: "{{ app_user }}"
        group: "{{ app_group }}"
        remote_src: yes
        
    - name: Install application configuration
      template:
        src: "{{ app_name }}.conf.j2"
        dest: "/etc/{{ app_name }}/{{ app_name }}.conf"
        owner: "{{ app_user }}"
        group: "{{ app_group }}"
        mode: '0644'
      notify: restart application
      
    - name: Install systemd service
      template:
        src: "{{ app_name }}.service.j2"
        dest: "/etc/systemd/system/{{ app_name }}.service"
        mode: '0644'
      notify:
        - reload systemd
        - restart application
        
    - name: Start and enable application
      systemd:
        name: "{{ app_name }}"
        state: started
        enabled: yes
        daemon_reload: yes
        
    - name: Wait for application to start
      wait_for:
        port: 8080
        host: "{{ ansible_default_ipv4.address }}"
        delay: 10
        timeout: 60
        
    - name: Health check
      uri:
        url: "http://{{ ansible_default_ipv4.address }}:8080/health"
        method: GET
        status_code: 200
      retries: 5
      delay: 10
      
  handlers:
    - name: reload systemd
      systemd:
        daemon_reload: yes
        
    - name: restart application
      systemd:
        name: "{{ app_name }}"
        state: restarted

2. Ansible配置文件

# ansible.cfg
[defaults]
inventory = inventory/hosts
host_key_checking = False
retry_files_enabled = False
log_path = /var/log/ansible.log

[ssh_connection]
ssh_args = -o ControlMaster=auto -o ControlPersist=60s
pipelining = True

# inventory/hosts
[app_servers]
app1.example.com ansible_host=192.168.1.10
app2.example.com ansible_host=192.168.1.11
app3.example.com ansible_host=192.168.1.12

[app_servers:vars]
ansible_user=deploy
ansible_ssh_private_key_file=~/.ssh/deploy_key

[database_servers]
db1.example.com ansible_host=192.168.1.20

[load_balancers]
lb1.example.com ansible_host=192.168.1.30

Docker容器化部署

1. Dockerfile

# 多阶段构建Dockerfile
FROM node:16-alpine AS builder

WORKDIR /app
COPY package*.json ./
RUN npm ci --only=production

COPY . .
RUN npm run build

# 生产镜像
FROM node:16-alpine AS production

RUN addgroup -g 1001 -S nodejs
RUN adduser -S nextjs -u 1001

WORKDIR /app

# 复制构建产物
COPY --from=builder --chown=nextjs:nodejs /app/dist ./dist
COPY --from=builder --chown=nextjs:nodejs /app/node_modules ./node_modules
COPY --from=builder --chown=nextjs:nodejs /app/package.json ./package.json

USER nextjs

EXPOSE 3000

CMD ["npm", "start"]

2. Docker Compose部署

# docker-compose.yml
version: '3.8'

services:
  app:
    build:
      context: .
      dockerfile: Dockerfile
    image: myapp:${VERSION:-latest}
    container_name: myapp
    restart: unless-stopped
    ports:
      - "3000:3000"
    environment:
      - NODE_ENV=production
      - DATABASE_URL=${DATABASE_URL}
      - REDIS_URL=${REDIS_URL}
    volumes:
      - ./logs:/app/logs
      - ./uploads:/app/uploads
    networks:
      - app-network
    depends_on:
      - database
      - redis
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      
  database:
    image: postgres:13-alpine
    container_name: myapp-db
    restart: unless-stopped
    environment:
      - POSTGRES_DB=${DB_NAME}
      - POSTGRES_USER=${DB_USER}
      - POSTGRES_PASSWORD=${DB_PASSWORD}
    volumes:
      - db_data:/var/lib/postgresql/data
      - ./init.sql:/docker-entrypoint-initdb.d/init.sql
    networks:
      - app-network
      
  redis:
    image: redis:6-alpine
    container_name: myapp-redis
    restart: unless-stopped
    command: redis-server --appendonly yes
    volumes:
      - redis_data:/data
    networks:
      - app-network
      
  nginx:
    image: nginx:alpine
    container_name: myapp-nginx
    restart: unless-stopped
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf
      - ./ssl:/etc/nginx/ssl
    networks:
      - app-network
    depends_on:
      - app

volumes:
  db_data:
  redis_data:

networks:
  app-network:
    driver: bridge

3. 容器部署脚本

#!/bin/bash

# Docker容器部署脚本

APP_NAME="myapp"
VERSION=${1:-latest}
ENV_FILE=".env.production"
COMPOSE_FILE="docker-compose.yml"

function deploy_containers() {
    echo "部署容器应用 $APP_NAME:$VERSION"
    
    # 检查环境文件
    if [ ! -f "$ENV_FILE" ]; then
        echo "环境配置文件不存在: $ENV_FILE"
        exit 1
    fi
    
    # 拉取最新镜像
    docker-compose -f $COMPOSE_FILE pull
    
    # 构建应用镜像
    VERSION=$VERSION docker-compose -f $COMPOSE_FILE build app
    
    # 停止旧容器
    docker-compose -f $COMPOSE_FILE down
    
    # 启动新容器
    VERSION=$VERSION docker-compose -f $COMPOSE_FILE up -d
    
    # 等待服务启动
    echo "等待服务启动..."
    sleep 30
    
    # 健康检查
    if docker-compose -f $COMPOSE_FILE ps | grep -q "Up (healthy)"; then
        echo "部署成功！"
    else
        echo "部署失败，检查容器状态:"
        docker-compose -f $COMPOSE_FILE ps
        docker-compose -f $COMPOSE_FILE logs
        exit 1
    fi
}

# 清理旧镜像
function cleanup_old_images() {
    echo "清理旧镜像..."
    docker image prune -f
    docker system prune -f
}

deploy_containers
cleanup_old_images

CI/CD流水线集成

1. GitLab CI配置

# .gitlab-ci.yml
stages:
  - test
  - build
  - deploy

variables:
  DOCKER_REGISTRY: registry.example.com
  APP_NAME: myapp
  
test:
  stage: test
  image: node:16-alpine
  script:
    - npm ci
    - npm run test
    - npm run lint
  coverage: '/Lines\s*:\s*(\d+\.?\d*)%/'
  artifacts:
    reports:
      coverage_report:
        coverage_format: cobertura
        path: coverage/cobertura-coverage.xml
        
build:
  stage: build
  image: docker:20.10.16
  services:
    - docker:20.10.16-dind
  before_script:
    - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
  script:
    - docker build -t $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA .
    - docker push $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
    - docker tag $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA $CI_REGISTRY_IMAGE:latest
    - docker push $CI_REGISTRY_IMAGE:latest
  only:
    - main
    
deploy_staging:
  stage: deploy
  image: alpine:latest
  before_script:
    - apk add --no-cache openssh-client
    - eval $(ssh-agent -s)
    - echo "$SSH_PRIVATE_KEY" | tr -d '\r' | ssh-add -
    - mkdir -p ~/.ssh
    - chmod 700 ~/.ssh
  script:
    - ssh -o StrictHostKeyChecking=no deploy@staging.example.com 
      "cd /opt/apps && 
       docker pull $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA && 
       VERSION=$CI_COMMIT_SHA docker-compose up -d"
  environment:
    name: staging
    url: https://staging.example.com
  only:
    - main
    
deploy_production:
  stage: deploy
  image: alpine:latest
  before_script:
    - apk add --no-cache openssh-client ansible
    - eval $(ssh-agent -s)
    - echo "$SSH_PRIVATE_KEY" | tr -d '\r' | ssh-add -
  script:
    - ansible-playbook -i inventory/production deploy.yml 
      --extra-vars "version=$CI_COMMIT_SHA"
  environment:
    name: production
    url: https://example.com
  when: manual
  only:
    - main

2. 部署监控脚本

#!/bin/bash

# 部署监控脚本

APP_NAME="myapp"
MONITOR_LOG="/var/log/deploy-monitor.log"
SLACK_WEBHOOK="https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK"

function send_notification() {
    local status=$1
    local message=$2
    local color="good"
    
    if [ "$status" = "failed" ]; then
        color="danger"
    fi
    
    curl -X POST -H 'Content-type: application/json' \
        --data "{
            \"attachments\": [{
                \"color\": \"$color\",
                \"title\": \"部署通知\",
                \"text\": \"$message\",
                \"ts\": $(date +%s)
            }]
        }" \
        $SLACK_WEBHOOK
}

function monitor_deployment() {
    local deployment_id=$1
    local start_time=$(date +%s)
    
    echo "[$(date)] 开始监控部署: $deployment_id" >> $MONITOR_LOG
    
    # 监控部署状态
    while true; do
        local current_time=$(date +%s)
        local elapsed=$((current_time - start_time))
        
        # 超时检查（30分钟）
        if [ $elapsed -gt 1800 ]; then
            send_notification "failed" "部署超时: $deployment_id"
            echo "[$(date)] 部署超时: $deployment_id" >> $MONITOR_LOG
            exit 1
        fi
        
        # 检查应用健康状态
        if curl -f -s http://localhost:8080/health > /dev/null; then
            send_notification "success" "部署成功: $deployment_id"
            echo "[$(date)] 部署成功: $deployment_id" >> $MONITOR_LOG
            break
        fi
        
        sleep 30
    done
}

# 执行监控
if [ $# -eq 0 ]; then
    echo "用法: $0 <deployment_id>"
    exit 1
fi

monitor_deployment $1

最佳实践与总结

自动化部署原则

幂等性：多次执行产生相同结果
可回滚：支持快速回滚到上一版本
零停机：蓝绿部署或滚动更新
可监控：完整的部署日志和监控

部署策略

蓝绿部署：维护两套环境，快速切换
滚动更新：逐步替换实例，保持服务可用
金丝雀发布：小流量验证，逐步扩大范围

安全考虑

权限控制：最小权限原则
密钥管理：使用密钥管理系统
网络隔离：部署网络与生产网络隔离
审计日志：记录所有部署操作

结语

Linux自动化部署是提升运维效率的关键技术。通过合理选择工具、设计流程、建立监控，可以构建稳定可靠的自动化部署体系，为业务快速迭代提供强有力的技术保障。

编外计划 - 日志

To be or not to be,--that is question.

Linux自动化部署：构建高效运维流水线

Shell脚本自动化部署

1. 基础部署脚本

2. 回滚脚本

Ansible自动化部署

1. Ansible Playbook

2. Ansible配置文件

Docker容器化部署

1. Dockerfile

2. Docker Compose部署

3. 容器部署脚本

CI/CD流水线集成

1. GitLab CI配置

2. 部署监控脚本

最佳实践与总结

自动化部署原则

部署策略

安全考虑

结语