automated terminal push

This commit is contained in:
lenape
2025-07-16 01:11:58 +00:00
parent 727b3af1a1
commit 2d3302c588
8 changed files with 1327 additions and 252 deletions

565
Jenkinsfile vendored
View File

@@ -49,6 +49,12 @@ pipeline {
// Enterprise settings
TF_IN_AUTOMATION = 'true'
TF_INPUT = 'false'
// Ansible configuration
ANSIBLE_HOST_KEY_CHECKING = 'False'
// Fix: Use relative path without leading slash
ANSIBLE_CONFIG = './ansible/ansible.cfg'
// Fix: Define log group as variable to avoid shell interpolation issues
ECS_LOG_GROUP = "/ecs/nvhi-atsila-cluster"
}
stages {
@@ -165,7 +171,7 @@ pipeline {
echo " • Commit: ${gitCommit.take(8)}"
echo " • Author: ${gitAuthor}"
echo " • Container Registry: ECR (AWS-native, secure)"
echo " • Architecture: SSM-based ECS access (secure, keyless)"
echo " • Architecture: Ansible-based deployment (enterprise security)"
echo " • Security Model: Principle of Least Privilege"
echo " • Timestamp: ${new Date()}"
echo "🔄 DEPLOYMENT TYPE CONFIRMATION: ${env.DEPLOYMENT_TYPE}"
@@ -178,7 +184,7 @@ pipeline {
"git_author": "${gitAuthor}",
"infrastructure_files_changed": "${infrastructureFiles}",
"container_registry": "ECR",
"architecture": "ssm_based_ecs_access",
"architecture": "ansible_based_deployment",
"security_model": "principle_of_least_privilege",
"timestamp": "${new Date()}"
}"""
@@ -379,7 +385,7 @@ pipeline {
echo "🔍 DEPLOYMENT: Force parameter = ${params.FORCE_INFRASTRUCTURE_DEPLOY}"
echo "🔍 DEPLOYMENT: Deployment type = ${env.DEPLOYMENT_TYPE}"
echo "🚨 SECURITY NOTICE: Infrastructure deployment requested"
echo "🏗️ ARCHITECTURE: Deploying ECS Cluster with SSM access (secure, keyless)"
echo "🏗️ ARCHITECTURE: Deploying ECS Cluster with Ansible-based deployment (enterprise security)"
echo "🔐 In production: This would require infrastructure-admin role"
echo "🚀 Attempting infrastructure deployment..."
@@ -473,197 +479,270 @@ pipeline {
}
}
stage('Configure & Deploy Application') {
stage('Configure & Deploy Application with Ansible') {
when {
not { expression { env.DEPLOYMENT_TYPE == "DESTROY" } }
}
parallel {
stage('Configure EC2 Instance via SSM') {
when {
expression {
def hasInstances = false
try {
def instanceId = sh(
script: """
cd terraform && terraform output -raw ecs_instance_id 2>/dev/null || echo ''
""",
returnStdout: true
).trim()
hasInstances = (instanceId != "" && instanceId != "null")
} catch (Exception e) {
echo "⚠️ No instances to configure: ${e.getMessage()}"
}
return hasInstances
}
steps {
script {
echo "🚀 ENTERPRISE: Deploying with Ansible (replacing SSM approach)"
// Get infrastructure details from Terraform
def instanceId = ""
def publicIp = ""
def executionRoleArn = ""
try {
instanceId = sh(
script: "cd terraform && terraform output -raw ecs_instance_id",
returnStdout: true
).trim()
publicIp = sh(
script: "cd terraform && terraform output -raw ecs_instance_public_ip",
returnStdout: true
).trim()
executionRoleArn = sh(
script: "cd terraform && terraform output -raw ecs_task_execution_role_arn",
returnStdout: true
).trim()
echo "📍 Target Instance: ${instanceId} (${publicIp})"
echo "🔧 Execution Role: ${executionRoleArn}"
} catch (Exception e) {
echo "⚠️ Could not get all Terraform outputs: ${e.getMessage()}"
echo "⚠️ Some outputs may be missing, continuing with available data..."
}
steps {
withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: env.AWS_CRED_ID]]) {
script {
echo "🔧 ENTERPRISE: Configuring EC2 instance via SSM (no SSH required)"
def instanceId = ""
def ec2_ip = ""
try {
sh "test -d terraform || (echo 'Terraform directory not found' && exit 1)"
instanceId = sh(
script: """
cd terraform && terraform output -raw ecs_instance_id
""",
returnStdout: true
).trim()
ec2_ip = sh(
script: """
cd terraform && terraform output -raw ecs_instance_public_ip
""",
returnStdout: true
).trim()
} catch (Exception e) {
echo "⚠️ Could not get instance details: ${e.getMessage()}"
echo "⚠️ Skipping SSM configuration - no instances available"
return
// Create Ansible working directory and files
sh "mkdir -p ansible/group_vars"
// Fix: Create inventory with safer path handling
def inventoryContent = """[inventory_hosts]
ec2-instance ansible_host=${publicIp} ansible_user=ec2-user
[inventory_hosts:vars]
ansible_ssh_private_key_file=~/.ssh/id_rsa
ansible_ssh_common_args='-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -o ServerAliveInterval=60'
ansible_python_interpreter=/usr/bin/python3
ansible_connection=ssh
ansible_ssh_retries=3
aws_region=${AWS_REGION}
"""
writeFile file: 'ansible/hosts', text: inventoryContent
// Fix: Create Ansible config with safer paths
def ansibleConfig = """[defaults]
inventory = hosts
host_key_checking = False
retry_files_enabled = False
gathering = smart
stdout_callback = yaml
timeout = 30
log_path = ./ansible.log
[ssh_connection]
ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ConnectTimeout=10
pipelining = True
"""
writeFile file: 'ansible/ansible.cfg', text: ansibleConfig
// Fix: Create group variables with safer variable handling
def groupVarsContent = """---
ecs_cluster_name: ${TF_VAR_cluster_name}
service_name: ${TF_VAR_cluster_name}-service
task_family: ${TF_VAR_cluster_name}-task
container_name: ${ECR_REPO}
aws_region: ${AWS_REGION}
container_port: 8080
"""
writeFile file: 'ansible/group_vars/all.yml', text: groupVarsContent
// Test connectivity and execute deployment
withCredentials([
[$class: 'AmazonWebServicesCredentialsBinding',
credentialsId: env.AWS_CRED_ID,
accessKeyVariable: 'AWS_ACCESS_KEY_ID',
secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']
]) {
// Fix: Use safer shell command construction
sh """
cd ansible
# Set environment variables
export AWS_DEFAULT_REGION="${AWS_REGION}"
export ANSIBLE_HOST_KEY_CHECKING=False
export ANSIBLE_CONFIG="./ansible.cfg"
# Wait for SSH connectivity
echo "🔍 Testing SSH connectivity to ${publicIp}..."
timeout 120 bash -c 'while ! nc -z ${publicIp} 22; do echo "Waiting for SSH..."; sleep 5; done'
# Install Python dependencies if needed
pip3 install --user boto3 botocore jq > /dev/null 2>&1 || true
# Test Ansible connectivity
echo "🔍 Testing Ansible connectivity..."
ansible inventory_hosts -m ping -i hosts -v
if [ \$? -ne 0 ]; then
echo "❌ Ansible connectivity failed"
echo "Debugging SSH connection..."
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 ec2-user@${publicIp} 'echo "SSH test successful"' || {
echo "SSH connection failed"
exit 1
}
echo "📍 Target Instance: ${instanceId} (${ec2_ip})"
echo "⏳ Waiting for SSM agent to be ready..."
timeout(time: 10, unit: 'MINUTES') {
waitUntil {
def ssmStatus = sh(
script: """
aws ssm describe-instance-information --filters "Key=InstanceIds,Values=${instanceId}" --region ${AWS_REGION} --query 'InstanceInformationList[0].PingStatus' --output text 2>/dev/null || echo 'Offline'
""",
returnStdout: true
).trim()
if (ssmStatus == "Online") {
echo "✅ SSM agent is online"
return true
} else {
echo "⏳ SSM agent status: ${ssmStatus}, waiting..."
sleep(30)
return false
}
exit 1
fi
echo "✅ Connectivity test passed"
# Execute main deployment playbook
echo "🚀 Starting deployment..."
ansible-playbook configure_ecs.yml \\
-i hosts \\
-e "app_version=${IMAGE_TAG}" \\
-e "aws_account_id=${AWS_ACCOUNT_ID}" \\
-e "aws_region=${AWS_REGION}" \\
-e "task_execution_role_arn=${executionRoleArn}" \\
--timeout 600 \\
-v
"""
}
// Final verification
echo "🔍 Running final verification..."
sh """
echo "Testing application endpoint..."
for i in {1..10}; do
if curl -f -s "http://${publicIp}:8080/health"; then
echo "✅ Application health check passed"
break
else
echo "⏳ Health check attempt \$i/10..."
sleep 10
fi
done
"""
}
}
post {
success {
script {
def publicIp = sh(
script: "cd terraform && terraform output -raw ecs_instance_public_ip",
returnStdout: true
).trim()
echo """
========================================
🎉 DEPLOYMENT SUCCESSFUL!
========================================
Application URL: http://${publicIp}:8080
Health Endpoint: http://${publicIp}:8080/health
Version: ${IMAGE_TAG}
Deployment Method: Ansible (Enterprise Security)
========================================
"""
}
// Archive deployment artifacts
archiveArtifacts artifacts: 'ansible/ansible.log', allowEmptyArchive: true
}
failure {
echo "❌ DEPLOYMENT FAILED - Gathering debug information..."
script {
// Fix: Use environment variable for log group to avoid shell interpolation issues
sh """
echo "=== ANSIBLE DEBUG INFORMATION ==="
cat ansible/ansible.log 2>/dev/null || echo "No Ansible log available"
echo "=== ECS SERVICE STATUS ==="
aws ecs describe-services \\
--cluster "${TF_VAR_cluster_name}" \\
--services "${TF_VAR_cluster_name}-service" \\
--region "${AWS_REGION}" \\
--query 'services[0].{Status:status,Running:runningCount,Pending:pendingCount,Events:events[0:3]}' \\
--output json 2>/dev/null || echo "Could not get ECS service status"
echo "=== ECS CLUSTER STATUS ==="
aws ecs describe-clusters \\
--clusters "${TF_VAR_cluster_name}" \\
--region "${AWS_REGION}" \\
--query 'clusters[0].{Status:status,ActiveInstances:activeContainerInstancesCount,Tasks:runningTasksCount}' \\
--output json 2>/dev/null || echo "Could not get ECS cluster status"
echo "=== RECENT CONTAINER LOGS ==="
# Fix: Use environment variable for log group name
LATEST_STREAM=\$(aws logs describe-log-streams \\
--log-group-name "${ECS_LOG_GROUP}" \\
--region "${AWS_REGION}" \\
--order-by LastEventTime \\
--descending \\
--max-items 1 \\
--query 'logStreams[0].logStreamName' \\
--output text 2>/dev/null)
if [ "\$LATEST_STREAM" != "None" ] && [ "\$LATEST_STREAM" != "" ]; then
echo "Latest log stream: \$LATEST_STREAM"
aws logs get-log-events \\
--log-group-name "${ECS_LOG_GROUP}" \\
--log-stream-name "\$LATEST_STREAM" \\
--region "${AWS_REGION}" \\
--start-from-head \\
--query 'events[-20:].[timestamp,message]' \\
--output table 2>/dev/null || echo "Could not retrieve logs"
else
echo "No log streams found"
fi
"""
}
// Offer rollback option
script {
try {
timeout(time: 5, unit: 'MINUTES') {
def rollbackChoice = input(
message: 'Deployment failed. Would you like to rollback to the previous version?',
parameters: [
choice(choices: ['No', 'Yes'], description: 'Rollback?', name: 'ROLLBACK')
]
)
if (rollbackChoice == 'Yes') {
echo "🔄 Initiating automatic rollback..."
withCredentials([
[$class: 'AmazonWebServicesCredentialsBinding',
credentialsId: env.AWS_CRED_ID,
accessKeyVariable: 'AWS_ACCESS_KEY_ID',
secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']
]) {
sh """
cd ansible
ansible-playbook rollback.yml \\
-e auto_rollback=true \\
-v
"""
}
}
echo "🔧 Running configuration commands via SSM..."
sh """
# Install or update Docker if needed
aws ssm send-command \\
--instance-ids ${instanceId} \\
--document-name "AWS-RunShellScript" \\
--parameters 'commands=["sudo yum update -y && sudo yum install -y docker && sudo systemctl start docker && sudo systemctl enable docker"]' \\
--region ${AWS_REGION} \\
--comment "Installing Docker on ECS instance"
# Wait for command to complete
sleep 60
# Configure ECS agent
aws ssm send-command \\
--instance-ids ${instanceId} \\
--document-name "AWS-RunShellScript" \\
--parameters 'commands=["echo ECS_CLUSTER=${TF_VAR_cluster_name} | sudo tee -a /etc/ecs/ecs.config","sudo systemctl restart ecs"]' \\
--region ${AWS_REGION} \\
--comment "Configuring ECS agent"
"""
echo "✅ ENTERPRISE: EC2 instance configured via SSM"
}
} catch (Exception e) {
echo "Rollback prompt timed out or was cancelled"
}
}
}
stage('Deploy ECS Service') {
steps {
withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: env.AWS_CRED_ID]]) {
script {
echo "🚀 DEPLOYMENT: Deploying application to ECS cluster"
// Create task definition
def executionRoleArn = ""
try {
executionRoleArn = sh(
script: 'cd terraform && terraform output -raw ecs_task_execution_role_arn',
returnStdout: true
).trim()
} catch (Exception e) {
echo "⚠️ Could not get execution role ARN: ${e.getMessage()}"
echo "⚠️ Task definition will be created without execution role"
}
def taskDefinition = """
{
"family": "${TF_VAR_cluster_name}-task",
"networkMode": "bridge",
"requiresCompatibilities": ["EC2"],
"memory": "512",
"cpu": "256"${executionRoleArn ? ",\n \"executionRoleArn\": \"${executionRoleArn}\"" : ""},
"containerDefinitions": [
{
"name": "${ECR_REPO}",
"image": "${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPO}:${IMAGE_TAG}",
"memory": 512,
"cpu": 256,
"essential": true,
"portMappings": [
{
"containerPort": 8080,
"hostPort": 8080,
"protocol": "tcp"
}
],
"healthCheck": {
"command": ["CMD-SHELL", "curl -f http://localhost:8080/health || exit 1"],
"interval": 30,
"timeout": 5,
"retries": 3,
"startPeriod": 60
},
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-group": "/ecs/${TF_VAR_cluster_name}",
"awslogs-region": "${AWS_REGION}",
"awslogs-stream-prefix": "ecs"
}
}
}
]
}
"""
writeFile file: 'task-definition.json', text: taskDefinition
sh """
# Create CloudWatch log group if it doesn't exist
aws logs create-log-group --log-group-name /ecs/${TF_VAR_cluster_name} --region ${AWS_REGION} || echo "Log group already exists"
# Register task definition
aws ecs register-task-definition \\
--cli-input-json file://task-definition.json \\
--region ${AWS_REGION}
# Check if service exists
if aws ecs describe-services --cluster ${TF_VAR_cluster_name} --services ${TF_VAR_cluster_name}-service --region ${AWS_REGION} --query 'services[0].status' --output text 2>/dev/null | grep -q 'ACTIVE'; then
echo "✅ Service exists, updating..."
aws ecs update-service \\
--cluster ${TF_VAR_cluster_name} \\
--service ${TF_VAR_cluster_name}-service \\
--task-definition ${TF_VAR_cluster_name}-task \\
--desired-count 1 \\
--force-new-deployment \\
--region ${AWS_REGION}
else
echo "✅ Creating new service..."
aws ecs create-service \\
--cluster ${TF_VAR_cluster_name} \\
--service-name ${TF_VAR_cluster_name}-service \\
--task-definition ${TF_VAR_cluster_name}-task \\
--desired-count 1 \\
--region ${AWS_REGION}
fi
"""
echo "✅ DEPLOYMENT: ECS service deployment initiated"
}
}
}
always {
// Cleanup temporary files
sh """
rm -f ansible/hosts 2>/dev/null || true
rm -f ansible/ansible.cfg 2>/dev/null || true
rm -f ansible/group_vars/all.yml 2>/dev/null || true
"""
}
}
}
@@ -675,64 +754,76 @@ pipeline {
steps {
withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: env.AWS_CRED_ID]]) {
script {
echo "🔍 VERIFICATION: Checking deployment status..."
echo "🔍 VERIFICATION: Running comprehensive validation..."
timeout(time: 15, unit: 'MINUTES') {
waitUntil {
def serviceStatus = sh(
script: """
aws ecs describe-services \\
--cluster ${TF_VAR_cluster_name} \\
--services ${TF_VAR_cluster_name}-service \\
--region ${AWS_REGION} \\
--query 'services[0].deployments[0].status' \\
--output text 2>/dev/null || echo 'UNKNOWN'
""",
returnStdout: true
).trim()
def runningCount = sh(
script: """
aws ecs describe-services \\
--cluster ${TF_VAR_cluster_name} \\
--services ${TF_VAR_cluster_name}-service \\
--region ${AWS_REGION} \\
--query 'services[0].runningCount' \\
--output text 2>/dev/null || echo '0'
""",
returnStdout: true
).trim()
echo "Service Status: ${serviceStatus}, Running Tasks: ${runningCount}"
if (serviceStatus == "STEADY" && runningCount.toInteger() > 0) {
echo "✅ Service deployment completed successfully"
return true
} else {
echo "⏳ Waiting for service to stabilize..."
sleep(30)
return false
}
}
}
def publicIp = sh(
script: "cd terraform && terraform output -raw ecs_instance_public_ip",
returnStdout: true
).trim()
// Get application URL
def appUrl = ""
try {
appUrl = sh(
script: """
cd terraform && terraform output -raw ecs_instance_public_ip 2>/dev/null || echo 'unavailable'
""",
returnStdout: true
).trim()
// Fix: Use safer URL construction and environment variables
sh """
echo "=== APPLICATION HEALTH CHECK ==="
curl -f -v "http://${publicIp}:8080/health"
if (appUrl != "unavailable" && appUrl != "") {
echo "🌐 APPLICATION URL: http://${appUrl}:8080"
currentBuild.description = "${currentBuild.description} | URL: http://${appUrl}:8080"
}
} catch (Exception e) {
echo "⚠️ Could not determine application URL: ${e.getMessage()}"
}
echo "=== ECS SERVICE VALIDATION ==="
aws ecs describe-services \\
--cluster "${TF_VAR_cluster_name}" \\
--services "${TF_VAR_cluster_name}-service" \\
--region "${AWS_REGION}" \\
--query 'services[0].{Status:status,TaskDefinition:taskDefinition,Running:runningCount,Desired:desiredCount}' \\
--output table
echo "=== CONTAINER HEALTH CHECK ==="
# Check if containers are healthy
RUNNING_TASKS=\$(aws ecs list-tasks \\
--cluster "${TF_VAR_cluster_name}" \\
--service-name "${TF_VAR_cluster_name}-service" \\
--desired-status RUNNING \\
--region "${AWS_REGION}" \\
--query 'taskArns' \\
--output text)
if [ -n "\$RUNNING_TASKS" ]; then
aws ecs describe-tasks \\
--cluster "${TF_VAR_cluster_name}" \\
--tasks \$RUNNING_TASKS \\
--region "${AWS_REGION}" \\
--query 'tasks[0].containers[0].{Name:name,Status:lastStatus,Health:healthStatus}' \\
--output table
fi
echo "=== LOG VALIDATION ==="
# Check for any errors in recent logs
LATEST_STREAM=\$(aws logs describe-log-streams \\
--log-group-name "${ECS_LOG_GROUP}" \\
--region "${AWS_REGION}" \\
--order-by LastEventTime \\
--descending \\
--max-items 1 \\
--query 'logStreams[0].logStreamName' \\
--output text 2>/dev/null)
if [ "\$LATEST_STREAM" != "None" ] && [ "\$LATEST_STREAM" != "" ]; then
ERROR_COUNT=\$(aws logs get-log-events \\
--log-group-name "${ECS_LOG_GROUP}" \\
--log-stream-name "\$LATEST_STREAM" \\
--region "${AWS_REGION}" \\
--query 'events[?contains(message, \`ERROR\`) || contains(message, \`FATAL\`) || contains(message, \`Exception\`)].message' \\
--output text | wc -l)
if [ "\$ERROR_COUNT" -gt 0 ]; then
echo "⚠️ Found \$ERROR_COUNT potential errors in logs - please review"
else
echo "✅ No errors found in recent application logs"
fi
fi
echo "✅ All validation checks completed successfully"
"""
// Update build description with URL
currentBuild.description = "${currentBuild.description} | URL: http://${publicIp}:8080"
echo "✅ VERIFICATION: Deployment verification completed"
}

19
ansible/ansible.cfg Normal file
View File

@@ -0,0 +1,19 @@
[defaults]
inventory = hosts
host_key_checking = False
retry_files_enabled = False
gathering = smart
fact_caching = memory
stdout_callback = yaml
stderr_callback = yaml
timeout = 30
log_path = ./ansible.log
nocows = 1
[ssh_connection]
ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ConnectTimeout=10
pipelining = True
control_path = /tmp/ansible-ssh-%%h-%%p-%%r
[inventory]
enable_plugins = host_list, script, auto, yaml, ini

View File

@@ -1,49 +1,493 @@
---
- name: Configure EC2 for ECS Cluster
- name: Configure and Deploy ECS Application (Enterprise Security)
hosts: inventory_hosts
become: yes
# DO NOT use blanket root access
become: no
gather_facts: yes
vars:
ecs_cluster_name: "nvhi-atsila-cluster"
service_name: "nvhi-atsila-cluster-service"
task_family: "nvhi-atsila-cluster-task"
container_name: "nvhi-atsila-microservice"
app_version: "{{ app_version | default('latest') }}"
aws_region: "{{ aws_region | default('us-east-2') }}"
log_group: "/ecs/{{ ecs_cluster_name }}"
# Security: Use dedicated service account
ecs_user: "ecs-user"
ecs_group: "ecs-group"
pre_tasks:
- name: Validate required variables
assert:
that:
- ecs_cluster_name is defined
- aws_region is defined
- aws_account_id is defined
- task_execution_role_arn is defined
fail_msg: "Required variables missing. Check app_version, aws_account_id, task_execution_role_arn"
tags: [validation]
- name: Test connectivity
ping:
tags: [validation]
# Security: Create dedicated service account
- name: Create ECS service group
group:
name: "{{ ecs_group }}"
state: present
become: yes
become_user: root
tags: [security, users]
- name: Create ECS service user
user:
name: "{{ ecs_user }}"
group: "{{ ecs_group }}"
system: yes
shell: /bin/bash
home: /home/{{ ecs_user }}
create_home: yes
state: present
become: yes
become_user: root
tags: [security, users]
- name: Add ECS user to docker group
user:
name: "{{ ecs_user }}"
groups: docker
append: yes
become: yes
become_user: root
tags: [security, users]
tasks:
- name: Update all packages
# Infrastructure Setup - Only escalate when necessary
- name: Update system packages
yum:
name: '*'
state: latest
update_cache: yes
become: yes
become_user: root
async: 300
poll: 0
register: yum_update
tags: [infrastructure]
- name: Install Docker
yum:
name: docker
state: present
- name: Wait for package update to complete
async_status:
jid: "{{ yum_update.ansible_job_id }}"
register: update_result
until: update_result.finished
retries: 30
delay: 10
tags: [infrastructure]
- name: Install ECS init
- name: Install required packages
yum:
name: ecs-init
name:
- docker
- ecs-init
- curl
- wget
- jq
state: present
become: yes
become_user: root
retries: 3
delay: 5
tags: [infrastructure]
# Security: Configure Docker securely
- name: Create Docker configuration directory
file:
path: /etc/docker
state: directory
mode: '0755'
owner: root
group: root
become: yes
become_user: root
tags: [infrastructure, security]
- name: Configure Docker daemon securely
copy:
dest: /etc/docker/daemon.json
content: |
{
"log-driver": "json-file",
"log-opts": {
"max-size": "100m",
"max-file": "3"
},
"live-restore": true,
"userland-proxy": false,
"no-new-privileges": true
}
mode: '0644'
owner: root
group: root
become: yes
become_user: root
notify: restart docker
tags: [infrastructure, security]
- name: Start and enable Docker
service:
systemd:
name: docker
state: started
enabled: true
daemon_reload: true
become: yes
become_user: root
register: docker_service
tags: [infrastructure]
- name: Verify Docker is running
command: docker info
register: docker_check
failed_when: docker_check.rc != 0
retries: 3
delay: 5
changed_when: false
# Security: Run as regular user (ECS user is in docker group)
become: yes
become_user: "{{ ecs_user }}"
tags: [infrastructure, validation]
# Security: Create ECS directory with proper permissions
- name: Create ECS config directory
file:
path: /etc/ecs
state: directory
mode: '0755'
owner: root
group: "{{ ecs_group }}"
become: yes
become_user: root
tags: [infrastructure, security]
- name: Write ECS config file
- name: Configure ECS agent
copy:
dest: /etc/ecs/ecs.config
content: |
ECS_CLUSTER={{ ecs_cluster_name }}
ECS_AVAILABLE_LOGGING_DRIVERS=["json-file","awslogs"]
ECS_ENABLE_TASK_IAM_ROLE=true
ECS_ENABLE_CONTAINER_METADATA=true
ECS_CONTAINER_STOP_TIMEOUT=30s
# Security: Disable privileged containers by default
ECS_DISABLE_PRIVILEGED=true
# Security: Enable AppArmor/SELinux support
ECS_SELINUX_CAPABLE=true
ECS_APPARMOR_CAPABLE=true
mode: '0640' # Security: More restrictive permissions
owner: root
group: "{{ ecs_group }}" # Security: Group ownership for ECS
backup: yes
become: yes
become_user: root
notify: restart ecs
tags: [infrastructure, security]
# Security: Configure ECS agent service with proper user
- name: Create ECS service override directory
file:
path: /etc/systemd/system/ecs.service.d
state: directory
mode: '0755'
owner: root
group: root
become: yes
become_user: root
tags: [infrastructure, security]
- name: Configure ECS service security settings
copy:
dest: /etc/systemd/system/ecs.service.d/security.conf
content: |
[Service]
# Security: Additional hardening
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
PrivateTmp=true
# Allow access to ECS directories
ReadWritePaths=/var/lib/ecs /var/log/ecs /etc/ecs
mode: '0644'
owner: root
group: root
become: yes
become_user: root
notify:
- reload systemd
- restart ecs
tags: [infrastructure, security]
- name: Start and enable ECS agent
service:
systemd:
name: ecs
state: started
enabled: true
enabled: true
daemon_reload: true
become: yes
become_user: root
tags: [infrastructure]
- name: Wait for ECS agent to register
shell: |
count=0
while [ $count -lt 30 ]; do
instances=$(aws ecs list-container-instances --cluster {{ ecs_cluster_name }} --region {{ aws_region }} --query 'length(containerInstanceArns)' --output text 2>/dev/null || echo "0")
if [ "$instances" != "0" ] && [ "$instances" != "None" ]; then
echo "ECS agent registered successfully"
exit 0
fi
echo "Waiting for ECS agent registration (attempt $((count+1))/30)..."
sleep 10
count=$((count+1))
done
echo "ECS agent failed to register"
exit 1
environment:
AWS_DEFAULT_REGION: "{{ aws_region }}"
delegate_to: localhost
run_once: true
# Security: Run AWS CLI as regular user with proper AWS credentials
become: no
tags: [infrastructure]
# Application Deployment - No root required
- name: Create CloudWatch log group
shell: |
aws logs create-log-group --log-group-name "{{ log_group }}" --region {{ aws_region }} 2>/dev/null || echo "Log group exists"
aws logs put-retention-policy --log-group-name "{{ log_group }}" --retention-in-days 7 --region {{ aws_region }} 2>/dev/null || echo "Retention policy exists"
environment:
AWS_DEFAULT_REGION: "{{ aws_region }}"
delegate_to: localhost
run_once: true
# Security: No root required for AWS API calls
become: no
tags: [deployment]
# Security: Create temp file in user's home directory
- name: Create task definition file
copy:
dest: "/tmp/task-definition-{{ ansible_date_time.epoch }}.json"
content: |
{
"family": "{{ task_family }}",
"executionRoleArn": "{{ task_execution_role_arn }}",
"networkMode": "bridge",
"requiresCompatibilities": ["EC2"],
"cpu": "256",
"memory": "512",
"containerDefinitions": [
{
"name": "{{ container_name }}",
"image": "{{ aws_account_id }}.dkr.ecr.{{ aws_region }}.amazonaws.com/{{ container_name }}:{{ app_version }}",
"cpu": 256,
"memory": 512,
"essential": true,
"user": "1000:1000",
"readonlyRootFilesystem": true,
"portMappings": [
{
"containerPort": 8080,
"hostPort": 8080,
"protocol": "tcp"
}
],
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-group": "{{ log_group }}",
"awslogs-region": "{{ aws_region }}",
"awslogs-stream-prefix": "ecs"
}
},
"healthCheck": {
"command": [
"CMD-SHELL",
"curl -f http://localhost:8080/health || exit 1"
],
"interval": 30,
"timeout": 5,
"retries": 3,
"startPeriod": 60
},
"tmpfs": [
{
"containerPath": "/tmp",
"size": 100
}
],
"mountPoints": [],
"volumesFrom": []
}
]
}
mode: '0644'
# Security: File owned by current user, not root
owner: "{{ ansible_user | default(ansible_ssh_user) }}"
group: "{{ ansible_user | default(ansible_ssh_user) }}"
delegate_to: localhost
run_once: true
# Security: No root required
become: no
register: task_def_file
tags: [deployment, security]
- name: Register task definition
shell: |
aws ecs register-task-definition \
--cli-input-json file://{{ task_def_file.dest }} \
--region {{ aws_region }} \
--output json
environment:
AWS_DEFAULT_REGION: "{{ aws_region }}"
delegate_to: localhost
run_once: true
# Security: No root required for AWS API calls
become: no
register: task_registration
tags: [deployment]
- name: Update ECS service
shell: |
aws ecs update-service \
--cluster {{ ecs_cluster_name }} \
--service {{ service_name }} \
--task-definition {{ task_family }} \
--desired-count 1 \
--force-new-deployment \
--region {{ aws_region }} \
--output json
environment:
AWS_DEFAULT_REGION: "{{ aws_region }}"
delegate_to: localhost
run_once: true
# Security: No root required
become: no
register: service_update
tags: [deployment]
- name: Wait for service deployment to complete
shell: |
echo "Waiting for service to stabilize..."
count=0
while [ $count -lt 30 ]; do
service_status=$(aws ecs describe-services \
--cluster {{ ecs_cluster_name }} \
--services {{ service_name }} \
--region {{ aws_region }} \
--query 'services[0]' \
--output json 2>/dev/null)
if [ $? -eq 0 ]; then
running=$(echo "$service_status" | jq -r '.runningCount // 0')
pending=$(echo "$service_status" | jq -r '.pendingCount // 0')
echo "Running: $running, Pending: $pending"
if [ "$running" -ge "1" ] && [ "$pending" -eq "0" ]; then
echo "Service deployment completed successfully"
exit 0
fi
fi
echo "Waiting for deployment completion (attempt $((count+1))/30)..."
sleep 20
count=$((count+1))
done
echo "Service deployment did not complete within expected time"
exit 1
environment:
AWS_DEFAULT_REGION: "{{ aws_region }}"
delegate_to: localhost
run_once: true
# Security: No root required
become: no
tags: [deployment]
# Health Verification - No root required
- name: Wait for application health check
uri:
url: "http://{{ ansible_default_ipv4.address }}:8080/health"
method: GET
timeout: 10
status_code: 200
register: health_check
until: health_check.status == 200
retries: 10
delay: 15
# Security: No root required for HTTP requests
become: no
tags: [verification]
- name: Display deployment summary
debug:
msg: |
========================================
🎉 SECURE DEPLOYMENT COMPLETED
========================================
Cluster: {{ ecs_cluster_name }}
Service: {{ service_name }}
Task Family: {{ task_family }}
Image Version: {{ app_version }}
Instance IP: {{ ansible_default_ipv4.address }}
Health Status: HEALTHY
Security: Non-root containers, least privilege
Application URL: http://{{ ansible_default_ipv4.address }}:8080
========================================
tags: [reporting]
handlers:
- name: reload systemd
systemd:
daemon_reload: yes
become: yes
become_user: root
- name: restart docker
systemd:
name: docker
state: restarted
become: yes
become_user: root
- name: restart ecs
systemd:
name: ecs
state: restarted
daemon_reload: true
become: yes
become_user: root
post_tasks:
- name: Cleanup temporary files
file:
path: "{{ item }}"
state: absent
loop:
- "/tmp/task-definition-{{ ansible_date_time.epoch }}.json"
delegate_to: localhost
# Security: No root required for cleanup
become: no
tags: [cleanup]
# Security: Audit log
- name: Log deployment action
lineinfile:
path: /var/log/ecs-deployments.log
line: "{{ ansible_date_time.iso8601 }} - Deployment v{{ app_version }} by {{ ansible_user | default('unknown') }} from {{ ansible_env.SSH_CLIENT.split()[0] | default('unknown') }}"
create: yes
mode: '0644'
owner: root
group: "{{ ecs_group }}"
become: yes
become_user: root
tags: [audit, security]

View File

@@ -0,0 +1,33 @@
---
# Global variables for all environments
# These can be overridden by host-specific variables or command line
# ECS Configuration
ecs_cluster_name: nvhi-atsila-cluster
service_name: nvhi-atsila-cluster-service
task_family: nvhi-atsila-cluster-task
container_name: nvhi-atsila-microservice
# AWS Configuration
aws_region: us-east-2
container_port: 8080
health_check_path: /health
# Connection Settings
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -o ServerAliveInterval=60'
ansible_ssh_retries: 3
ansible_timeout: 30
# Application Settings
app_port: 8080
health_check_timeout: 10
health_check_retries: 10
health_check_delay: 15
# Deployment Settings
deployment_timeout: 600
service_stabilization_retries: 30
service_stabilization_delay: 20
# Logging
log_retention_days: 7

View File

@@ -1,2 +1,14 @@
[inventory_hosts]
# overwritten dynamically by Jenkins with the EC2 public IP
# This file will be dynamically generated by Jenkins
# Format: hostname ansible_host=IP_ADDRESS ansible_user=USERNAME
[inventory_hosts:vars]
# SSH connection settings
ansible_ssh_private_key_file=~/.ssh/id_rsa
ansible_ssh_common_args='-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -o ServerAliveInterval=60'
ansible_python_interpreter=/usr/bin/python3
ansible_connection=ssh
ansible_ssh_retries=3
# AWS configuration
aws_region=us-east-2

147
ansible/rollback.yml Normal file
View File

@@ -0,0 +1,147 @@
---
- name: Rollback ECS Service
hosts: localhost
connection: local
gather_facts: false
vars:
ecs_cluster_name: "nvhi-atsila-cluster"
service_name: "nvhi-atsila-cluster-service"
task_family: "nvhi-atsila-cluster-task"
aws_region: "us-east-2"
tasks:
- name: Get current service task definition
shell: |
aws ecs describe-services \
--cluster {{ ecs_cluster_name }} \
--services {{ service_name }} \
--region {{ aws_region }} \
--query 'services[0].taskDefinition' \
--output text
register: current_task_def
environment:
AWS_DEFAULT_REGION: "{{ aws_region }}"
- name: Extract current revision number
set_fact:
current_revision: "{{ current_task_def.stdout.split(':')[-1] | int }}"
- name: Calculate rollback revision
set_fact:
rollback_revision: "{{ (current_revision | int) - 1 }}"
when: rollback_revision is not defined
- name: Validate rollback revision
fail:
msg: "Cannot rollback - target revision {{ rollback_revision }} is invalid (must be >= 1)"
when: (rollback_revision | int) < 1
- name: Display rollback information
debug:
msg: |
=================================
ROLLBACK INFORMATION
=================================
Service: {{ service_name }}
Cluster: {{ ecs_cluster_name }}
Current Revision: {{ current_revision }}
Target Revision: {{ rollback_revision }}
=================================
- name: Confirm rollback (interactive)
pause:
prompt: |
WARNING: You are about to rollback the ECS service!
Service: {{ service_name }}
From: {{ task_family }}:{{ current_revision }}
To: {{ task_family }}:{{ rollback_revision }}
Do you want to continue? (yes/no)
register: rollback_confirm
when: auto_rollback is not defined
- name: Set automatic confirmation
set_fact:
rollback_confirm:
user_input: "yes"
when: auto_rollback is defined and auto_rollback
- name: Execute rollback
shell: |
aws ecs update-service \
--cluster {{ ecs_cluster_name }} \
--service {{ service_name }} \
--task-definition {{ task_family }}:{{ rollback_revision }} \
--force-new-deployment \
--region {{ aws_region }} \
--output json
environment:
AWS_DEFAULT_REGION: "{{ aws_region }}"
when: rollback_confirm.user_input | lower == 'yes'
register: rollback_result
- name: Wait for rollback completion
shell: |
echo "Waiting for rollback to complete..."
count=0
while [ $count -lt 20 ]; do
service_status=$(aws ecs describe-services \
--cluster {{ ecs_cluster_name }} \
--services {{ service_name }} \
--region {{ aws_region }} \
--query 'services[0]' \
--output json 2>/dev/null)
if [ $? -eq 0 ]; then
running=$(echo "$service_status" | jq -r '.runningCount // 0')
pending=$(echo "$service_status" | jq -r '.pendingCount // 0')
echo "Running: $running, Pending: $pending"
if [ "$running" -ge "1" ] && [ "$pending" -eq "0" ]; then
echo "Rollback completed successfully"
exit 0
fi
fi
echo "Waiting for rollback completion (attempt $((count+1))/20)..."
sleep 15
count=$((count+1))
done
echo "WARNING: Rollback may not have completed within expected time"
exit 1
environment:
AWS_DEFAULT_REGION: "{{ aws_region }}"
when: rollback_confirm.user_input | lower == 'yes'
- name: Verify rollback status
shell: |
aws ecs describe-services \
--cluster {{ ecs_cluster_name }} \
--services {{ service_name }} \
--region {{ aws_region }} \
--query 'services[0].{TaskDefinition:taskDefinition,RunningCount:runningCount,Status:status}' \
--output table
environment:
AWS_DEFAULT_REGION: "{{ aws_region }}"
when: rollback_confirm.user_input | lower == 'yes'
register: final_status
- name: Display rollback results
debug:
msg: |
========================================
🔄 ROLLBACK COMPLETED
========================================
Service: {{ service_name }}
Rolled back to: {{ task_family }}:{{ rollback_revision }}
Status: Check output above
========================================
when: rollback_confirm.user_input | lower == 'yes'
- name: Rollback cancelled
debug:
msg: "Rollback operation was cancelled by user"
when: rollback_confirm.user_input | lower != 'yes'

220
ansible/setup-ansible.sh Normal file
View File

@@ -0,0 +1,220 @@
#!/bin/bash
# Enterprise Ansible Setup and Test Script
# This script sets up the Ansible environment and runs tests
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Function to print colored output
print_status() {
echo -e "${BLUE}[INFO]${NC} $1"
}
print_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Check if we're in the right directory
if [ ! -d "ansible" ]; then
print_error "ansible directory not found. Please run this script from your project root."
exit 1
fi
cd ansible
print_status "Setting up Enterprise Ansible environment..."
# Create necessary directories
print_status "Creating directory structure..."
mkdir -p group_vars
mkdir -p templates
mkdir -p roles
mkdir -p inventories/production
mkdir -p inventories/staging
# Install Python dependencies
print_status "Installing Python dependencies..."
pip3 install --user boto3 botocore jmespath > /dev/null 2>&1 || {
print_warning "Could not install Python dependencies. Install manually: pip3 install boto3 botocore jmespath"
}
# Check Ansible installation
if ! command -v ansible &> /dev/null; then
print_error "Ansible not found. Please install Ansible first:"
echo " Ubuntu/Debian: sudo apt update && sudo apt install ansible"
echo " RHEL/CentOS: sudo yum install ansible"
echo " macOS: brew install ansible"
exit 1
fi
ANSIBLE_VERSION=$(ansible --version | head -n1)
print_success "Found: $ANSIBLE_VERSION"
# Check AWS CLI
if ! command -v aws &> /dev/null; then
print_error "AWS CLI not found. Please install AWS CLI first."
exit 1
fi
AWS_VERSION=$(aws --version)
print_success "Found: $AWS_VERSION"
# Validate configuration files
print_status "Validating Ansible configuration files..."
# Check if main playbook exists
if [ ! -f "configure_ecs.yml" ]; then
print_error "configure_ecs.yml not found!"
exit 1
fi
# Validate YAML syntax
if command -v yamllint &> /dev/null; then
print_status "Checking YAML syntax..."
yamllint configure_ecs.yml || print_warning "YAML syntax issues found (non-critical)"
else
print_warning "yamllint not found. Install with: pip3 install yamllint"
fi
# Validate Ansible playbook syntax
print_status "Validating Ansible playbook syntax..."
ansible-playbook configure_ecs.yml --syntax-check || {
print_error "Ansible syntax validation failed!"
exit 1
}
print_success "Ansible syntax validation passed"
# Test functions
test_connectivity() {
local ip=$1
if [ -z "$ip" ]; then
print_error "No IP address provided for connectivity test"
return 1
fi
print_status "Testing connectivity to $ip..."
# Test SSH connectivity
if timeout 10 bash -c "nc -z $ip 22" &>/dev/null; then
print_success "SSH port (22) is reachable"
else
print_error "SSH port (22) is not reachable"
return 1
fi
# Test Ansible ping
if ansible inventory_hosts -m ping -i hosts &>/dev/null; then
print_success "Ansible connectivity test passed"
else
print_error "Ansible connectivity test failed"
return 1
fi
return 0
}
# Create a test inventory for validation
create_test_inventory() {
local ip=${1:-"127.0.0.1"}
print_status "Creating test inventory with IP: $ip"
cat > hosts_test << EOF
[inventory_hosts]
test-instance ansible_host=$ip ansible_user=ec2-user
[inventory_hosts:vars]
ansible_ssh_private_key_file=~/.ssh/id_rsa
ansible_ssh_common_args='-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10'
ansible_python_interpreter=/usr/bin/python3
ansible_connection=ssh
aws_region=us-east-2
EOF
}
# Main execution
print_status "Ansible Enterprise Setup Complete!"
echo
echo "Available operations:"
echo " 1. Test connectivity (requires EC2 IP)"
echo " 2. Run simple deployment test"
echo " 3. Validate all playbooks"
echo " 4. Show configuration summary"
echo
# Interactive mode
if [ "$1" == "--interactive" ]; then
echo -n "Enter operation number (1-4): "
read -r operation
case $operation in
1)
echo -n "Enter EC2 instance IP: "
read -r ec2_ip
create_test_inventory "$ec2_ip"
if test_connectivity "$ec2_ip"; then
print_success "Connectivity test passed!"
else
print_error "Connectivity test failed!"
fi
;;
2)
echo -n "Enter EC2 instance IP: "
read -r ec2_ip
create_test_inventory "$ec2_ip"
print_status "Running simple deployment test..."
ansible-playbook simple-deploy.yml -i hosts_test -v
;;
3)
print_status "Validating all playbooks..."
for playbook in *.yml; do
if [ -f "$playbook" ]; then
print_status "Validating $playbook..."
ansible-playbook "$playbook" --syntax-check
fi
done
print_success "All playbooks validated!"
;;
4)
print_status "Configuration Summary:"
echo " - Working Directory: $(pwd)"
echo " - Ansible Version: $(ansible --version | head -n1)"
echo " - AWS CLI Version: $(aws --version 2>&1)"
echo " - Available Playbooks:"
ls -la *.yml 2>/dev/null | awk '{print " - " $9}' || echo " - None found"
echo " - Python Dependencies:"
python3 -c "import boto3, botocore; print(' - boto3: ' + boto3.__version__); print(' - botocore: ' + botocore.__version__)" 2>/dev/null || echo " - Not installed"
;;
*)
print_error "Invalid operation number"
;;
esac
fi
# Cleanup
if [ -f "hosts_test" ]; then
rm -f hosts_test
fi
print_success "Setup script completed!"
echo
echo "Next steps:"
echo " 1. Update your Jenkins pipeline with the new Ansible integration"
echo " 2. Test with: ./setup-ansible.sh --interactive"
echo " 3. Run deployment: ansible-playbook configure_ecs.yml -i hosts -v"
echo

109
ansible/simple-deploy.yml Normal file
View File

@@ -0,0 +1,109 @@
---
- name: Simple ECS Configuration Test
hosts: inventory_hosts
become: yes
gather_facts: yes
vars:
ecs_cluster_name: "nvhi-atsila-cluster"
tasks:
- name: Test connectivity
ping:
tags: [test]
- name: Check system information
debug:
msg: |
System: {{ ansible_distribution }} {{ ansible_distribution_version }}
Hostname: {{ ansible_hostname }}
IP: {{ ansible_default_ipv4.address }}
tags: [info]
- name: Update system packages
yum:
name: '*'
state: latest
update_cache: yes
async: 300
poll: 0
register: yum_update
tags: [packages]
- name: Wait for package update
async_status:
jid: "{{ yum_update.ansible_job_id }}"
register: update_result
until: update_result.finished
retries: 30
delay: 10
tags: [packages]
- name: Install Docker and ECS components
yum:
name:
- docker
- ecs-init
- curl
- jq
state: present
tags: [install]
- name: Start Docker service
systemd:
name: docker
state: started
enabled: true
daemon_reload: true
tags: [services]
- name: Verify Docker is working
command: docker --version
register: docker_version
changed_when: false
tags: [verify]
- name: Create ECS configuration directory
file:
path: /etc/ecs
state: directory
mode: '0755'
tags: [config]
- name: Write ECS configuration
copy:
dest: /etc/ecs/ecs.config
content: |
ECS_CLUSTER={{ ecs_cluster_name }}
ECS_AVAILABLE_LOGGING_DRIVERS=["json-file","awslogs"]
ECS_ENABLE_TASK_IAM_ROLE=true
mode: '0644'
backup: yes
notify: restart ecs
tags: [config]
- name: Start ECS agent
systemd:
name: ecs
state: started
enabled: true
daemon_reload: true
tags: [services]
- name: Display configuration summary
debug:
msg: |
========================================
✅ SIMPLE CONFIGURATION COMPLETED
========================================
Docker Version: {{ docker_version.stdout }}
ECS Cluster: {{ ecs_cluster_name }}
Instance IP: {{ ansible_default_ipv4.address }}
========================================
tags: [summary]
handlers:
- name: restart ecs
systemd:
name: ecs
state: restarted
daemon_reload: true