automated terminal push
This commit is contained in:
565
Jenkinsfile
vendored
565
Jenkinsfile
vendored
@@ -49,6 +49,12 @@ pipeline {
|
||||
// Enterprise settings
|
||||
TF_IN_AUTOMATION = 'true'
|
||||
TF_INPUT = 'false'
|
||||
// Ansible configuration
|
||||
ANSIBLE_HOST_KEY_CHECKING = 'False'
|
||||
// Fix: Use relative path without leading slash
|
||||
ANSIBLE_CONFIG = './ansible/ansible.cfg'
|
||||
// Fix: Define log group as variable to avoid shell interpolation issues
|
||||
ECS_LOG_GROUP = "/ecs/nvhi-atsila-cluster"
|
||||
}
|
||||
|
||||
stages {
|
||||
@@ -165,7 +171,7 @@ pipeline {
|
||||
echo " • Commit: ${gitCommit.take(8)}"
|
||||
echo " • Author: ${gitAuthor}"
|
||||
echo " • Container Registry: ECR (AWS-native, secure)"
|
||||
echo " • Architecture: SSM-based ECS access (secure, keyless)"
|
||||
echo " • Architecture: Ansible-based deployment (enterprise security)"
|
||||
echo " • Security Model: Principle of Least Privilege"
|
||||
echo " • Timestamp: ${new Date()}"
|
||||
echo "🔄 DEPLOYMENT TYPE CONFIRMATION: ${env.DEPLOYMENT_TYPE}"
|
||||
@@ -178,7 +184,7 @@ pipeline {
|
||||
"git_author": "${gitAuthor}",
|
||||
"infrastructure_files_changed": "${infrastructureFiles}",
|
||||
"container_registry": "ECR",
|
||||
"architecture": "ssm_based_ecs_access",
|
||||
"architecture": "ansible_based_deployment",
|
||||
"security_model": "principle_of_least_privilege",
|
||||
"timestamp": "${new Date()}"
|
||||
}"""
|
||||
@@ -379,7 +385,7 @@ pipeline {
|
||||
echo "🔍 DEPLOYMENT: Force parameter = ${params.FORCE_INFRASTRUCTURE_DEPLOY}"
|
||||
echo "🔍 DEPLOYMENT: Deployment type = ${env.DEPLOYMENT_TYPE}"
|
||||
echo "🚨 SECURITY NOTICE: Infrastructure deployment requested"
|
||||
echo "🏗️ ARCHITECTURE: Deploying ECS Cluster with SSM access (secure, keyless)"
|
||||
echo "🏗️ ARCHITECTURE: Deploying ECS Cluster with Ansible-based deployment (enterprise security)"
|
||||
echo "🔐 In production: This would require infrastructure-admin role"
|
||||
echo "🚀 Attempting infrastructure deployment..."
|
||||
|
||||
@@ -473,197 +479,270 @@ pipeline {
|
||||
}
|
||||
}
|
||||
|
||||
stage('Configure & Deploy Application') {
|
||||
stage('Configure & Deploy Application with Ansible') {
|
||||
when {
|
||||
not { expression { env.DEPLOYMENT_TYPE == "DESTROY" } }
|
||||
}
|
||||
parallel {
|
||||
stage('Configure EC2 Instance via SSM') {
|
||||
when {
|
||||
expression {
|
||||
def hasInstances = false
|
||||
try {
|
||||
def instanceId = sh(
|
||||
script: """
|
||||
cd terraform && terraform output -raw ecs_instance_id 2>/dev/null || echo ''
|
||||
""",
|
||||
returnStdout: true
|
||||
).trim()
|
||||
hasInstances = (instanceId != "" && instanceId != "null")
|
||||
} catch (Exception e) {
|
||||
echo "⚠️ No instances to configure: ${e.getMessage()}"
|
||||
}
|
||||
return hasInstances
|
||||
}
|
||||
steps {
|
||||
script {
|
||||
echo "🚀 ENTERPRISE: Deploying with Ansible (replacing SSM approach)"
|
||||
|
||||
// Get infrastructure details from Terraform
|
||||
def instanceId = ""
|
||||
def publicIp = ""
|
||||
def executionRoleArn = ""
|
||||
|
||||
try {
|
||||
instanceId = sh(
|
||||
script: "cd terraform && terraform output -raw ecs_instance_id",
|
||||
returnStdout: true
|
||||
).trim()
|
||||
|
||||
publicIp = sh(
|
||||
script: "cd terraform && terraform output -raw ecs_instance_public_ip",
|
||||
returnStdout: true
|
||||
).trim()
|
||||
|
||||
executionRoleArn = sh(
|
||||
script: "cd terraform && terraform output -raw ecs_task_execution_role_arn",
|
||||
returnStdout: true
|
||||
).trim()
|
||||
|
||||
echo "📍 Target Instance: ${instanceId} (${publicIp})"
|
||||
echo "🔧 Execution Role: ${executionRoleArn}"
|
||||
} catch (Exception e) {
|
||||
echo "⚠️ Could not get all Terraform outputs: ${e.getMessage()}"
|
||||
echo "⚠️ Some outputs may be missing, continuing with available data..."
|
||||
}
|
||||
steps {
|
||||
withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: env.AWS_CRED_ID]]) {
|
||||
script {
|
||||
echo "🔧 ENTERPRISE: Configuring EC2 instance via SSM (no SSH required)"
|
||||
def instanceId = ""
|
||||
def ec2_ip = ""
|
||||
try {
|
||||
sh "test -d terraform || (echo 'Terraform directory not found' && exit 1)"
|
||||
instanceId = sh(
|
||||
script: """
|
||||
cd terraform && terraform output -raw ecs_instance_id
|
||||
""",
|
||||
returnStdout: true
|
||||
).trim()
|
||||
ec2_ip = sh(
|
||||
script: """
|
||||
cd terraform && terraform output -raw ecs_instance_public_ip
|
||||
""",
|
||||
returnStdout: true
|
||||
).trim()
|
||||
} catch (Exception e) {
|
||||
echo "⚠️ Could not get instance details: ${e.getMessage()}"
|
||||
echo "⚠️ Skipping SSM configuration - no instances available"
|
||||
return
|
||||
|
||||
// Create Ansible working directory and files
|
||||
sh "mkdir -p ansible/group_vars"
|
||||
|
||||
// Fix: Create inventory with safer path handling
|
||||
def inventoryContent = """[inventory_hosts]
|
||||
ec2-instance ansible_host=${publicIp} ansible_user=ec2-user
|
||||
|
||||
[inventory_hosts:vars]
|
||||
ansible_ssh_private_key_file=~/.ssh/id_rsa
|
||||
ansible_ssh_common_args='-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -o ServerAliveInterval=60'
|
||||
ansible_python_interpreter=/usr/bin/python3
|
||||
ansible_connection=ssh
|
||||
ansible_ssh_retries=3
|
||||
aws_region=${AWS_REGION}
|
||||
"""
|
||||
writeFile file: 'ansible/hosts', text: inventoryContent
|
||||
|
||||
// Fix: Create Ansible config with safer paths
|
||||
def ansibleConfig = """[defaults]
|
||||
inventory = hosts
|
||||
host_key_checking = False
|
||||
retry_files_enabled = False
|
||||
gathering = smart
|
||||
stdout_callback = yaml
|
||||
timeout = 30
|
||||
log_path = ./ansible.log
|
||||
|
||||
[ssh_connection]
|
||||
ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ConnectTimeout=10
|
||||
pipelining = True
|
||||
"""
|
||||
writeFile file: 'ansible/ansible.cfg', text: ansibleConfig
|
||||
|
||||
// Fix: Create group variables with safer variable handling
|
||||
def groupVarsContent = """---
|
||||
ecs_cluster_name: ${TF_VAR_cluster_name}
|
||||
service_name: ${TF_VAR_cluster_name}-service
|
||||
task_family: ${TF_VAR_cluster_name}-task
|
||||
container_name: ${ECR_REPO}
|
||||
aws_region: ${AWS_REGION}
|
||||
container_port: 8080
|
||||
"""
|
||||
writeFile file: 'ansible/group_vars/all.yml', text: groupVarsContent
|
||||
|
||||
// Test connectivity and execute deployment
|
||||
withCredentials([
|
||||
[$class: 'AmazonWebServicesCredentialsBinding',
|
||||
credentialsId: env.AWS_CRED_ID,
|
||||
accessKeyVariable: 'AWS_ACCESS_KEY_ID',
|
||||
secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']
|
||||
]) {
|
||||
// Fix: Use safer shell command construction
|
||||
sh """
|
||||
cd ansible
|
||||
|
||||
# Set environment variables
|
||||
export AWS_DEFAULT_REGION="${AWS_REGION}"
|
||||
export ANSIBLE_HOST_KEY_CHECKING=False
|
||||
export ANSIBLE_CONFIG="./ansible.cfg"
|
||||
|
||||
# Wait for SSH connectivity
|
||||
echo "🔍 Testing SSH connectivity to ${publicIp}..."
|
||||
timeout 120 bash -c 'while ! nc -z ${publicIp} 22; do echo "Waiting for SSH..."; sleep 5; done'
|
||||
|
||||
# Install Python dependencies if needed
|
||||
pip3 install --user boto3 botocore jq > /dev/null 2>&1 || true
|
||||
|
||||
# Test Ansible connectivity
|
||||
echo "🔍 Testing Ansible connectivity..."
|
||||
ansible inventory_hosts -m ping -i hosts -v
|
||||
|
||||
if [ \$? -ne 0 ]; then
|
||||
echo "❌ Ansible connectivity failed"
|
||||
echo "Debugging SSH connection..."
|
||||
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 ec2-user@${publicIp} 'echo "SSH test successful"' || {
|
||||
echo "SSH connection failed"
|
||||
exit 1
|
||||
}
|
||||
echo "📍 Target Instance: ${instanceId} (${ec2_ip})"
|
||||
echo "⏳ Waiting for SSM agent to be ready..."
|
||||
timeout(time: 10, unit: 'MINUTES') {
|
||||
waitUntil {
|
||||
def ssmStatus = sh(
|
||||
script: """
|
||||
aws ssm describe-instance-information --filters "Key=InstanceIds,Values=${instanceId}" --region ${AWS_REGION} --query 'InstanceInformationList[0].PingStatus' --output text 2>/dev/null || echo 'Offline'
|
||||
""",
|
||||
returnStdout: true
|
||||
).trim()
|
||||
if (ssmStatus == "Online") {
|
||||
echo "✅ SSM agent is online"
|
||||
return true
|
||||
} else {
|
||||
echo "⏳ SSM agent status: ${ssmStatus}, waiting..."
|
||||
sleep(30)
|
||||
return false
|
||||
}
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✅ Connectivity test passed"
|
||||
|
||||
# Execute main deployment playbook
|
||||
echo "🚀 Starting deployment..."
|
||||
ansible-playbook configure_ecs.yml \\
|
||||
-i hosts \\
|
||||
-e "app_version=${IMAGE_TAG}" \\
|
||||
-e "aws_account_id=${AWS_ACCOUNT_ID}" \\
|
||||
-e "aws_region=${AWS_REGION}" \\
|
||||
-e "task_execution_role_arn=${executionRoleArn}" \\
|
||||
--timeout 600 \\
|
||||
-v
|
||||
"""
|
||||
}
|
||||
|
||||
// Final verification
|
||||
echo "🔍 Running final verification..."
|
||||
sh """
|
||||
echo "Testing application endpoint..."
|
||||
for i in {1..10}; do
|
||||
if curl -f -s "http://${publicIp}:8080/health"; then
|
||||
echo "✅ Application health check passed"
|
||||
break
|
||||
else
|
||||
echo "⏳ Health check attempt \$i/10..."
|
||||
sleep 10
|
||||
fi
|
||||
done
|
||||
"""
|
||||
}
|
||||
}
|
||||
|
||||
post {
|
||||
success {
|
||||
script {
|
||||
def publicIp = sh(
|
||||
script: "cd terraform && terraform output -raw ecs_instance_public_ip",
|
||||
returnStdout: true
|
||||
).trim()
|
||||
|
||||
echo """
|
||||
========================================
|
||||
🎉 DEPLOYMENT SUCCESSFUL!
|
||||
========================================
|
||||
Application URL: http://${publicIp}:8080
|
||||
Health Endpoint: http://${publicIp}:8080/health
|
||||
Version: ${IMAGE_TAG}
|
||||
Deployment Method: Ansible (Enterprise Security)
|
||||
========================================
|
||||
"""
|
||||
}
|
||||
|
||||
// Archive deployment artifacts
|
||||
archiveArtifacts artifacts: 'ansible/ansible.log', allowEmptyArchive: true
|
||||
}
|
||||
|
||||
failure {
|
||||
echo "❌ DEPLOYMENT FAILED - Gathering debug information..."
|
||||
|
||||
script {
|
||||
// Fix: Use environment variable for log group to avoid shell interpolation issues
|
||||
sh """
|
||||
echo "=== ANSIBLE DEBUG INFORMATION ==="
|
||||
cat ansible/ansible.log 2>/dev/null || echo "No Ansible log available"
|
||||
|
||||
echo "=== ECS SERVICE STATUS ==="
|
||||
aws ecs describe-services \\
|
||||
--cluster "${TF_VAR_cluster_name}" \\
|
||||
--services "${TF_VAR_cluster_name}-service" \\
|
||||
--region "${AWS_REGION}" \\
|
||||
--query 'services[0].{Status:status,Running:runningCount,Pending:pendingCount,Events:events[0:3]}' \\
|
||||
--output json 2>/dev/null || echo "Could not get ECS service status"
|
||||
|
||||
echo "=== ECS CLUSTER STATUS ==="
|
||||
aws ecs describe-clusters \\
|
||||
--clusters "${TF_VAR_cluster_name}" \\
|
||||
--region "${AWS_REGION}" \\
|
||||
--query 'clusters[0].{Status:status,ActiveInstances:activeContainerInstancesCount,Tasks:runningTasksCount}' \\
|
||||
--output json 2>/dev/null || echo "Could not get ECS cluster status"
|
||||
|
||||
echo "=== RECENT CONTAINER LOGS ==="
|
||||
# Fix: Use environment variable for log group name
|
||||
LATEST_STREAM=\$(aws logs describe-log-streams \\
|
||||
--log-group-name "${ECS_LOG_GROUP}" \\
|
||||
--region "${AWS_REGION}" \\
|
||||
--order-by LastEventTime \\
|
||||
--descending \\
|
||||
--max-items 1 \\
|
||||
--query 'logStreams[0].logStreamName' \\
|
||||
--output text 2>/dev/null)
|
||||
|
||||
if [ "\$LATEST_STREAM" != "None" ] && [ "\$LATEST_STREAM" != "" ]; then
|
||||
echo "Latest log stream: \$LATEST_STREAM"
|
||||
aws logs get-log-events \\
|
||||
--log-group-name "${ECS_LOG_GROUP}" \\
|
||||
--log-stream-name "\$LATEST_STREAM" \\
|
||||
--region "${AWS_REGION}" \\
|
||||
--start-from-head \\
|
||||
--query 'events[-20:].[timestamp,message]' \\
|
||||
--output table 2>/dev/null || echo "Could not retrieve logs"
|
||||
else
|
||||
echo "No log streams found"
|
||||
fi
|
||||
"""
|
||||
}
|
||||
|
||||
// Offer rollback option
|
||||
script {
|
||||
try {
|
||||
timeout(time: 5, unit: 'MINUTES') {
|
||||
def rollbackChoice = input(
|
||||
message: 'Deployment failed. Would you like to rollback to the previous version?',
|
||||
parameters: [
|
||||
choice(choices: ['No', 'Yes'], description: 'Rollback?', name: 'ROLLBACK')
|
||||
]
|
||||
)
|
||||
|
||||
if (rollbackChoice == 'Yes') {
|
||||
echo "🔄 Initiating automatic rollback..."
|
||||
withCredentials([
|
||||
[$class: 'AmazonWebServicesCredentialsBinding',
|
||||
credentialsId: env.AWS_CRED_ID,
|
||||
accessKeyVariable: 'AWS_ACCESS_KEY_ID',
|
||||
secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']
|
||||
]) {
|
||||
sh """
|
||||
cd ansible
|
||||
ansible-playbook rollback.yml \\
|
||||
-e auto_rollback=true \\
|
||||
-v
|
||||
"""
|
||||
}
|
||||
}
|
||||
|
||||
echo "🔧 Running configuration commands via SSM..."
|
||||
sh """
|
||||
# Install or update Docker if needed
|
||||
aws ssm send-command \\
|
||||
--instance-ids ${instanceId} \\
|
||||
--document-name "AWS-RunShellScript" \\
|
||||
--parameters 'commands=["sudo yum update -y && sudo yum install -y docker && sudo systemctl start docker && sudo systemctl enable docker"]' \\
|
||||
--region ${AWS_REGION} \\
|
||||
--comment "Installing Docker on ECS instance"
|
||||
|
||||
# Wait for command to complete
|
||||
sleep 60
|
||||
|
||||
# Configure ECS agent
|
||||
aws ssm send-command \\
|
||||
--instance-ids ${instanceId} \\
|
||||
--document-name "AWS-RunShellScript" \\
|
||||
--parameters 'commands=["echo ECS_CLUSTER=${TF_VAR_cluster_name} | sudo tee -a /etc/ecs/ecs.config","sudo systemctl restart ecs"]' \\
|
||||
--region ${AWS_REGION} \\
|
||||
--comment "Configuring ECS agent"
|
||||
"""
|
||||
echo "✅ ENTERPRISE: EC2 instance configured via SSM"
|
||||
}
|
||||
} catch (Exception e) {
|
||||
echo "Rollback prompt timed out or was cancelled"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Deploy ECS Service') {
|
||||
steps {
|
||||
withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: env.AWS_CRED_ID]]) {
|
||||
script {
|
||||
echo "🚀 DEPLOYMENT: Deploying application to ECS cluster"
|
||||
|
||||
// Create task definition
|
||||
def executionRoleArn = ""
|
||||
try {
|
||||
executionRoleArn = sh(
|
||||
script: 'cd terraform && terraform output -raw ecs_task_execution_role_arn',
|
||||
returnStdout: true
|
||||
).trim()
|
||||
} catch (Exception e) {
|
||||
echo "⚠️ Could not get execution role ARN: ${e.getMessage()}"
|
||||
echo "⚠️ Task definition will be created without execution role"
|
||||
}
|
||||
|
||||
def taskDefinition = """
|
||||
{
|
||||
"family": "${TF_VAR_cluster_name}-task",
|
||||
"networkMode": "bridge",
|
||||
"requiresCompatibilities": ["EC2"],
|
||||
"memory": "512",
|
||||
"cpu": "256"${executionRoleArn ? ",\n \"executionRoleArn\": \"${executionRoleArn}\"" : ""},
|
||||
"containerDefinitions": [
|
||||
{
|
||||
"name": "${ECR_REPO}",
|
||||
"image": "${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPO}:${IMAGE_TAG}",
|
||||
"memory": 512,
|
||||
"cpu": 256,
|
||||
"essential": true,
|
||||
"portMappings": [
|
||||
{
|
||||
"containerPort": 8080,
|
||||
"hostPort": 8080,
|
||||
"protocol": "tcp"
|
||||
}
|
||||
],
|
||||
"healthCheck": {
|
||||
"command": ["CMD-SHELL", "curl -f http://localhost:8080/health || exit 1"],
|
||||
"interval": 30,
|
||||
"timeout": 5,
|
||||
"retries": 3,
|
||||
"startPeriod": 60
|
||||
},
|
||||
"logConfiguration": {
|
||||
"logDriver": "awslogs",
|
||||
"options": {
|
||||
"awslogs-group": "/ecs/${TF_VAR_cluster_name}",
|
||||
"awslogs-region": "${AWS_REGION}",
|
||||
"awslogs-stream-prefix": "ecs"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
|
||||
writeFile file: 'task-definition.json', text: taskDefinition
|
||||
|
||||
sh """
|
||||
# Create CloudWatch log group if it doesn't exist
|
||||
aws logs create-log-group --log-group-name /ecs/${TF_VAR_cluster_name} --region ${AWS_REGION} || echo "Log group already exists"
|
||||
|
||||
# Register task definition
|
||||
aws ecs register-task-definition \\
|
||||
--cli-input-json file://task-definition.json \\
|
||||
--region ${AWS_REGION}
|
||||
|
||||
# Check if service exists
|
||||
if aws ecs describe-services --cluster ${TF_VAR_cluster_name} --services ${TF_VAR_cluster_name}-service --region ${AWS_REGION} --query 'services[0].status' --output text 2>/dev/null | grep -q 'ACTIVE'; then
|
||||
echo "✅ Service exists, updating..."
|
||||
aws ecs update-service \\
|
||||
--cluster ${TF_VAR_cluster_name} \\
|
||||
--service ${TF_VAR_cluster_name}-service \\
|
||||
--task-definition ${TF_VAR_cluster_name}-task \\
|
||||
--desired-count 1 \\
|
||||
--force-new-deployment \\
|
||||
--region ${AWS_REGION}
|
||||
else
|
||||
echo "✅ Creating new service..."
|
||||
aws ecs create-service \\
|
||||
--cluster ${TF_VAR_cluster_name} \\
|
||||
--service-name ${TF_VAR_cluster_name}-service \\
|
||||
--task-definition ${TF_VAR_cluster_name}-task \\
|
||||
--desired-count 1 \\
|
||||
--region ${AWS_REGION}
|
||||
fi
|
||||
"""
|
||||
|
||||
echo "✅ DEPLOYMENT: ECS service deployment initiated"
|
||||
}
|
||||
}
|
||||
}
|
||||
always {
|
||||
// Cleanup temporary files
|
||||
sh """
|
||||
rm -f ansible/hosts 2>/dev/null || true
|
||||
rm -f ansible/ansible.cfg 2>/dev/null || true
|
||||
rm -f ansible/group_vars/all.yml 2>/dev/null || true
|
||||
"""
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -675,64 +754,76 @@ pipeline {
|
||||
steps {
|
||||
withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: env.AWS_CRED_ID]]) {
|
||||
script {
|
||||
echo "🔍 VERIFICATION: Checking deployment status..."
|
||||
echo "🔍 VERIFICATION: Running comprehensive validation..."
|
||||
|
||||
timeout(time: 15, unit: 'MINUTES') {
|
||||
waitUntil {
|
||||
def serviceStatus = sh(
|
||||
script: """
|
||||
aws ecs describe-services \\
|
||||
--cluster ${TF_VAR_cluster_name} \\
|
||||
--services ${TF_VAR_cluster_name}-service \\
|
||||
--region ${AWS_REGION} \\
|
||||
--query 'services[0].deployments[0].status' \\
|
||||
--output text 2>/dev/null || echo 'UNKNOWN'
|
||||
""",
|
||||
returnStdout: true
|
||||
).trim()
|
||||
|
||||
def runningCount = sh(
|
||||
script: """
|
||||
aws ecs describe-services \\
|
||||
--cluster ${TF_VAR_cluster_name} \\
|
||||
--services ${TF_VAR_cluster_name}-service \\
|
||||
--region ${AWS_REGION} \\
|
||||
--query 'services[0].runningCount' \\
|
||||
--output text 2>/dev/null || echo '0'
|
||||
""",
|
||||
returnStdout: true
|
||||
).trim()
|
||||
|
||||
echo "Service Status: ${serviceStatus}, Running Tasks: ${runningCount}"
|
||||
|
||||
if (serviceStatus == "STEADY" && runningCount.toInteger() > 0) {
|
||||
echo "✅ Service deployment completed successfully"
|
||||
return true
|
||||
} else {
|
||||
echo "⏳ Waiting for service to stabilize..."
|
||||
sleep(30)
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
def publicIp = sh(
|
||||
script: "cd terraform && terraform output -raw ecs_instance_public_ip",
|
||||
returnStdout: true
|
||||
).trim()
|
||||
|
||||
// Get application URL
|
||||
def appUrl = ""
|
||||
try {
|
||||
appUrl = sh(
|
||||
script: """
|
||||
cd terraform && terraform output -raw ecs_instance_public_ip 2>/dev/null || echo 'unavailable'
|
||||
""",
|
||||
returnStdout: true
|
||||
).trim()
|
||||
// Fix: Use safer URL construction and environment variables
|
||||
sh """
|
||||
echo "=== APPLICATION HEALTH CHECK ==="
|
||||
curl -f -v "http://${publicIp}:8080/health"
|
||||
|
||||
if (appUrl != "unavailable" && appUrl != "") {
|
||||
echo "🌐 APPLICATION URL: http://${appUrl}:8080"
|
||||
currentBuild.description = "${currentBuild.description} | URL: http://${appUrl}:8080"
|
||||
}
|
||||
} catch (Exception e) {
|
||||
echo "⚠️ Could not determine application URL: ${e.getMessage()}"
|
||||
}
|
||||
echo "=== ECS SERVICE VALIDATION ==="
|
||||
aws ecs describe-services \\
|
||||
--cluster "${TF_VAR_cluster_name}" \\
|
||||
--services "${TF_VAR_cluster_name}-service" \\
|
||||
--region "${AWS_REGION}" \\
|
||||
--query 'services[0].{Status:status,TaskDefinition:taskDefinition,Running:runningCount,Desired:desiredCount}' \\
|
||||
--output table
|
||||
|
||||
echo "=== CONTAINER HEALTH CHECK ==="
|
||||
# Check if containers are healthy
|
||||
RUNNING_TASKS=\$(aws ecs list-tasks \\
|
||||
--cluster "${TF_VAR_cluster_name}" \\
|
||||
--service-name "${TF_VAR_cluster_name}-service" \\
|
||||
--desired-status RUNNING \\
|
||||
--region "${AWS_REGION}" \\
|
||||
--query 'taskArns' \\
|
||||
--output text)
|
||||
|
||||
if [ -n "\$RUNNING_TASKS" ]; then
|
||||
aws ecs describe-tasks \\
|
||||
--cluster "${TF_VAR_cluster_name}" \\
|
||||
--tasks \$RUNNING_TASKS \\
|
||||
--region "${AWS_REGION}" \\
|
||||
--query 'tasks[0].containers[0].{Name:name,Status:lastStatus,Health:healthStatus}' \\
|
||||
--output table
|
||||
fi
|
||||
|
||||
echo "=== LOG VALIDATION ==="
|
||||
# Check for any errors in recent logs
|
||||
LATEST_STREAM=\$(aws logs describe-log-streams \\
|
||||
--log-group-name "${ECS_LOG_GROUP}" \\
|
||||
--region "${AWS_REGION}" \\
|
||||
--order-by LastEventTime \\
|
||||
--descending \\
|
||||
--max-items 1 \\
|
||||
--query 'logStreams[0].logStreamName' \\
|
||||
--output text 2>/dev/null)
|
||||
|
||||
if [ "\$LATEST_STREAM" != "None" ] && [ "\$LATEST_STREAM" != "" ]; then
|
||||
ERROR_COUNT=\$(aws logs get-log-events \\
|
||||
--log-group-name "${ECS_LOG_GROUP}" \\
|
||||
--log-stream-name "\$LATEST_STREAM" \\
|
||||
--region "${AWS_REGION}" \\
|
||||
--query 'events[?contains(message, \`ERROR\`) || contains(message, \`FATAL\`) || contains(message, \`Exception\`)].message' \\
|
||||
--output text | wc -l)
|
||||
|
||||
if [ "\$ERROR_COUNT" -gt 0 ]; then
|
||||
echo "⚠️ Found \$ERROR_COUNT potential errors in logs - please review"
|
||||
else
|
||||
echo "✅ No errors found in recent application logs"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "✅ All validation checks completed successfully"
|
||||
"""
|
||||
|
||||
// Update build description with URL
|
||||
currentBuild.description = "${currentBuild.description} | URL: http://${publicIp}:8080"
|
||||
|
||||
echo "✅ VERIFICATION: Deployment verification completed"
|
||||
}
|
||||
|
19
ansible/ansible.cfg
Normal file
19
ansible/ansible.cfg
Normal file
@@ -0,0 +1,19 @@
|
||||
[defaults]
|
||||
inventory = hosts
|
||||
host_key_checking = False
|
||||
retry_files_enabled = False
|
||||
gathering = smart
|
||||
fact_caching = memory
|
||||
stdout_callback = yaml
|
||||
stderr_callback = yaml
|
||||
timeout = 30
|
||||
log_path = ./ansible.log
|
||||
nocows = 1
|
||||
|
||||
[ssh_connection]
|
||||
ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ConnectTimeout=10
|
||||
pipelining = True
|
||||
control_path = /tmp/ansible-ssh-%%h-%%p-%%r
|
||||
|
||||
[inventory]
|
||||
enable_plugins = host_list, script, auto, yaml, ini
|
@@ -1,49 +1,493 @@
|
||||
---
|
||||
- name: Configure EC2 for ECS Cluster
|
||||
- name: Configure and Deploy ECS Application (Enterprise Security)
|
||||
hosts: inventory_hosts
|
||||
become: yes
|
||||
|
||||
# DO NOT use blanket root access
|
||||
become: no
|
||||
gather_facts: yes
|
||||
vars:
|
||||
ecs_cluster_name: "nvhi-atsila-cluster"
|
||||
service_name: "nvhi-atsila-cluster-service"
|
||||
task_family: "nvhi-atsila-cluster-task"
|
||||
container_name: "nvhi-atsila-microservice"
|
||||
app_version: "{{ app_version | default('latest') }}"
|
||||
aws_region: "{{ aws_region | default('us-east-2') }}"
|
||||
log_group: "/ecs/{{ ecs_cluster_name }}"
|
||||
# Security: Use dedicated service account
|
||||
ecs_user: "ecs-user"
|
||||
ecs_group: "ecs-group"
|
||||
|
||||
pre_tasks:
|
||||
- name: Validate required variables
|
||||
assert:
|
||||
that:
|
||||
- ecs_cluster_name is defined
|
||||
- aws_region is defined
|
||||
- aws_account_id is defined
|
||||
- task_execution_role_arn is defined
|
||||
fail_msg: "Required variables missing. Check app_version, aws_account_id, task_execution_role_arn"
|
||||
tags: [validation]
|
||||
|
||||
- name: Test connectivity
|
||||
ping:
|
||||
tags: [validation]
|
||||
|
||||
# Security: Create dedicated service account
|
||||
- name: Create ECS service group
|
||||
group:
|
||||
name: "{{ ecs_group }}"
|
||||
state: present
|
||||
become: yes
|
||||
become_user: root
|
||||
tags: [security, users]
|
||||
|
||||
- name: Create ECS service user
|
||||
user:
|
||||
name: "{{ ecs_user }}"
|
||||
group: "{{ ecs_group }}"
|
||||
system: yes
|
||||
shell: /bin/bash
|
||||
home: /home/{{ ecs_user }}
|
||||
create_home: yes
|
||||
state: present
|
||||
become: yes
|
||||
become_user: root
|
||||
tags: [security, users]
|
||||
|
||||
- name: Add ECS user to docker group
|
||||
user:
|
||||
name: "{{ ecs_user }}"
|
||||
groups: docker
|
||||
append: yes
|
||||
become: yes
|
||||
become_user: root
|
||||
tags: [security, users]
|
||||
|
||||
tasks:
|
||||
- name: Update all packages
|
||||
# Infrastructure Setup - Only escalate when necessary
|
||||
- name: Update system packages
|
||||
yum:
|
||||
name: '*'
|
||||
state: latest
|
||||
update_cache: yes
|
||||
become: yes
|
||||
become_user: root
|
||||
async: 300
|
||||
poll: 0
|
||||
register: yum_update
|
||||
tags: [infrastructure]
|
||||
|
||||
- name: Install Docker
|
||||
yum:
|
||||
name: docker
|
||||
state: present
|
||||
- name: Wait for package update to complete
|
||||
async_status:
|
||||
jid: "{{ yum_update.ansible_job_id }}"
|
||||
register: update_result
|
||||
until: update_result.finished
|
||||
retries: 30
|
||||
delay: 10
|
||||
tags: [infrastructure]
|
||||
|
||||
- name: Install ECS init
|
||||
- name: Install required packages
|
||||
yum:
|
||||
name: ecs-init
|
||||
name:
|
||||
- docker
|
||||
- ecs-init
|
||||
- curl
|
||||
- wget
|
||||
- jq
|
||||
state: present
|
||||
become: yes
|
||||
become_user: root
|
||||
retries: 3
|
||||
delay: 5
|
||||
tags: [infrastructure]
|
||||
|
||||
# Security: Configure Docker securely
|
||||
- name: Create Docker configuration directory
|
||||
file:
|
||||
path: /etc/docker
|
||||
state: directory
|
||||
mode: '0755'
|
||||
owner: root
|
||||
group: root
|
||||
become: yes
|
||||
become_user: root
|
||||
tags: [infrastructure, security]
|
||||
|
||||
- name: Configure Docker daemon securely
|
||||
copy:
|
||||
dest: /etc/docker/daemon.json
|
||||
content: |
|
||||
{
|
||||
"log-driver": "json-file",
|
||||
"log-opts": {
|
||||
"max-size": "100m",
|
||||
"max-file": "3"
|
||||
},
|
||||
"live-restore": true,
|
||||
"userland-proxy": false,
|
||||
"no-new-privileges": true
|
||||
}
|
||||
mode: '0644'
|
||||
owner: root
|
||||
group: root
|
||||
become: yes
|
||||
become_user: root
|
||||
notify: restart docker
|
||||
tags: [infrastructure, security]
|
||||
|
||||
- name: Start and enable Docker
|
||||
service:
|
||||
systemd:
|
||||
name: docker
|
||||
state: started
|
||||
enabled: true
|
||||
daemon_reload: true
|
||||
become: yes
|
||||
become_user: root
|
||||
register: docker_service
|
||||
tags: [infrastructure]
|
||||
|
||||
- name: Verify Docker is running
|
||||
command: docker info
|
||||
register: docker_check
|
||||
failed_when: docker_check.rc != 0
|
||||
retries: 3
|
||||
delay: 5
|
||||
changed_when: false
|
||||
# Security: Run as regular user (ECS user is in docker group)
|
||||
become: yes
|
||||
become_user: "{{ ecs_user }}"
|
||||
tags: [infrastructure, validation]
|
||||
|
||||
# Security: Create ECS directory with proper permissions
|
||||
- name: Create ECS config directory
|
||||
file:
|
||||
path: /etc/ecs
|
||||
state: directory
|
||||
mode: '0755'
|
||||
owner: root
|
||||
group: "{{ ecs_group }}"
|
||||
become: yes
|
||||
become_user: root
|
||||
tags: [infrastructure, security]
|
||||
|
||||
- name: Write ECS config file
|
||||
- name: Configure ECS agent
|
||||
copy:
|
||||
dest: /etc/ecs/ecs.config
|
||||
content: |
|
||||
ECS_CLUSTER={{ ecs_cluster_name }}
|
||||
ECS_AVAILABLE_LOGGING_DRIVERS=["json-file","awslogs"]
|
||||
ECS_ENABLE_TASK_IAM_ROLE=true
|
||||
ECS_ENABLE_CONTAINER_METADATA=true
|
||||
ECS_CONTAINER_STOP_TIMEOUT=30s
|
||||
# Security: Disable privileged containers by default
|
||||
ECS_DISABLE_PRIVILEGED=true
|
||||
# Security: Enable AppArmor/SELinux support
|
||||
ECS_SELINUX_CAPABLE=true
|
||||
ECS_APPARMOR_CAPABLE=true
|
||||
mode: '0640' # Security: More restrictive permissions
|
||||
owner: root
|
||||
group: "{{ ecs_group }}" # Security: Group ownership for ECS
|
||||
backup: yes
|
||||
become: yes
|
||||
become_user: root
|
||||
notify: restart ecs
|
||||
tags: [infrastructure, security]
|
||||
|
||||
# Security: Configure ECS agent service with proper user
|
||||
- name: Create ECS service override directory
|
||||
file:
|
||||
path: /etc/systemd/system/ecs.service.d
|
||||
state: directory
|
||||
mode: '0755'
|
||||
owner: root
|
||||
group: root
|
||||
become: yes
|
||||
become_user: root
|
||||
tags: [infrastructure, security]
|
||||
|
||||
- name: Configure ECS service security settings
|
||||
copy:
|
||||
dest: /etc/systemd/system/ecs.service.d/security.conf
|
||||
content: |
|
||||
[Service]
|
||||
# Security: Additional hardening
|
||||
NoNewPrivileges=true
|
||||
ProtectSystem=strict
|
||||
ProtectHome=true
|
||||
PrivateTmp=true
|
||||
# Allow access to ECS directories
|
||||
ReadWritePaths=/var/lib/ecs /var/log/ecs /etc/ecs
|
||||
mode: '0644'
|
||||
owner: root
|
||||
group: root
|
||||
become: yes
|
||||
become_user: root
|
||||
notify:
|
||||
- reload systemd
|
||||
- restart ecs
|
||||
tags: [infrastructure, security]
|
||||
|
||||
- name: Start and enable ECS agent
|
||||
service:
|
||||
systemd:
|
||||
name: ecs
|
||||
state: started
|
||||
enabled: true
|
||||
enabled: true
|
||||
daemon_reload: true
|
||||
become: yes
|
||||
become_user: root
|
||||
tags: [infrastructure]
|
||||
|
||||
- name: Wait for ECS agent to register
|
||||
shell: |
|
||||
count=0
|
||||
while [ $count -lt 30 ]; do
|
||||
instances=$(aws ecs list-container-instances --cluster {{ ecs_cluster_name }} --region {{ aws_region }} --query 'length(containerInstanceArns)' --output text 2>/dev/null || echo "0")
|
||||
if [ "$instances" != "0" ] && [ "$instances" != "None" ]; then
|
||||
echo "ECS agent registered successfully"
|
||||
exit 0
|
||||
fi
|
||||
echo "Waiting for ECS agent registration (attempt $((count+1))/30)..."
|
||||
sleep 10
|
||||
count=$((count+1))
|
||||
done
|
||||
echo "ECS agent failed to register"
|
||||
exit 1
|
||||
environment:
|
||||
AWS_DEFAULT_REGION: "{{ aws_region }}"
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
# Security: Run AWS CLI as regular user with proper AWS credentials
|
||||
become: no
|
||||
tags: [infrastructure]
|
||||
|
||||
# Application Deployment - No root required
|
||||
- name: Create CloudWatch log group
|
||||
shell: |
|
||||
aws logs create-log-group --log-group-name "{{ log_group }}" --region {{ aws_region }} 2>/dev/null || echo "Log group exists"
|
||||
aws logs put-retention-policy --log-group-name "{{ log_group }}" --retention-in-days 7 --region {{ aws_region }} 2>/dev/null || echo "Retention policy exists"
|
||||
environment:
|
||||
AWS_DEFAULT_REGION: "{{ aws_region }}"
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
# Security: No root required for AWS API calls
|
||||
become: no
|
||||
tags: [deployment]
|
||||
|
||||
# Security: Create temp file in user's home directory
|
||||
- name: Create task definition file
|
||||
copy:
|
||||
dest: "/tmp/task-definition-{{ ansible_date_time.epoch }}.json"
|
||||
content: |
|
||||
{
|
||||
"family": "{{ task_family }}",
|
||||
"executionRoleArn": "{{ task_execution_role_arn }}",
|
||||
"networkMode": "bridge",
|
||||
"requiresCompatibilities": ["EC2"],
|
||||
"cpu": "256",
|
||||
"memory": "512",
|
||||
"containerDefinitions": [
|
||||
{
|
||||
"name": "{{ container_name }}",
|
||||
"image": "{{ aws_account_id }}.dkr.ecr.{{ aws_region }}.amazonaws.com/{{ container_name }}:{{ app_version }}",
|
||||
"cpu": 256,
|
||||
"memory": 512,
|
||||
"essential": true,
|
||||
"user": "1000:1000",
|
||||
"readonlyRootFilesystem": true,
|
||||
"portMappings": [
|
||||
{
|
||||
"containerPort": 8080,
|
||||
"hostPort": 8080,
|
||||
"protocol": "tcp"
|
||||
}
|
||||
],
|
||||
"logConfiguration": {
|
||||
"logDriver": "awslogs",
|
||||
"options": {
|
||||
"awslogs-group": "{{ log_group }}",
|
||||
"awslogs-region": "{{ aws_region }}",
|
||||
"awslogs-stream-prefix": "ecs"
|
||||
}
|
||||
},
|
||||
"healthCheck": {
|
||||
"command": [
|
||||
"CMD-SHELL",
|
||||
"curl -f http://localhost:8080/health || exit 1"
|
||||
],
|
||||
"interval": 30,
|
||||
"timeout": 5,
|
||||
"retries": 3,
|
||||
"startPeriod": 60
|
||||
},
|
||||
"tmpfs": [
|
||||
{
|
||||
"containerPath": "/tmp",
|
||||
"size": 100
|
||||
}
|
||||
],
|
||||
"mountPoints": [],
|
||||
"volumesFrom": []
|
||||
}
|
||||
]
|
||||
}
|
||||
mode: '0644'
|
||||
# Security: File owned by current user, not root
|
||||
owner: "{{ ansible_user | default(ansible_ssh_user) }}"
|
||||
group: "{{ ansible_user | default(ansible_ssh_user) }}"
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
# Security: No root required
|
||||
become: no
|
||||
register: task_def_file
|
||||
tags: [deployment, security]
|
||||
|
||||
- name: Register task definition
|
||||
shell: |
|
||||
aws ecs register-task-definition \
|
||||
--cli-input-json file://{{ task_def_file.dest }} \
|
||||
--region {{ aws_region }} \
|
||||
--output json
|
||||
environment:
|
||||
AWS_DEFAULT_REGION: "{{ aws_region }}"
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
# Security: No root required for AWS API calls
|
||||
become: no
|
||||
register: task_registration
|
||||
tags: [deployment]
|
||||
|
||||
- name: Update ECS service
|
||||
shell: |
|
||||
aws ecs update-service \
|
||||
--cluster {{ ecs_cluster_name }} \
|
||||
--service {{ service_name }} \
|
||||
--task-definition {{ task_family }} \
|
||||
--desired-count 1 \
|
||||
--force-new-deployment \
|
||||
--region {{ aws_region }} \
|
||||
--output json
|
||||
environment:
|
||||
AWS_DEFAULT_REGION: "{{ aws_region }}"
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
# Security: No root required
|
||||
become: no
|
||||
register: service_update
|
||||
tags: [deployment]
|
||||
|
||||
- name: Wait for service deployment to complete
|
||||
shell: |
|
||||
echo "Waiting for service to stabilize..."
|
||||
count=0
|
||||
while [ $count -lt 30 ]; do
|
||||
service_status=$(aws ecs describe-services \
|
||||
--cluster {{ ecs_cluster_name }} \
|
||||
--services {{ service_name }} \
|
||||
--region {{ aws_region }} \
|
||||
--query 'services[0]' \
|
||||
--output json 2>/dev/null)
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
running=$(echo "$service_status" | jq -r '.runningCount // 0')
|
||||
pending=$(echo "$service_status" | jq -r '.pendingCount // 0')
|
||||
|
||||
echo "Running: $running, Pending: $pending"
|
||||
|
||||
if [ "$running" -ge "1" ] && [ "$pending" -eq "0" ]; then
|
||||
echo "Service deployment completed successfully"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Waiting for deployment completion (attempt $((count+1))/30)..."
|
||||
sleep 20
|
||||
count=$((count+1))
|
||||
done
|
||||
|
||||
echo "Service deployment did not complete within expected time"
|
||||
exit 1
|
||||
environment:
|
||||
AWS_DEFAULT_REGION: "{{ aws_region }}"
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
# Security: No root required
|
||||
become: no
|
||||
tags: [deployment]
|
||||
|
||||
# Health Verification - No root required
|
||||
- name: Wait for application health check
|
||||
uri:
|
||||
url: "http://{{ ansible_default_ipv4.address }}:8080/health"
|
||||
method: GET
|
||||
timeout: 10
|
||||
status_code: 200
|
||||
register: health_check
|
||||
until: health_check.status == 200
|
||||
retries: 10
|
||||
delay: 15
|
||||
# Security: No root required for HTTP requests
|
||||
become: no
|
||||
tags: [verification]
|
||||
|
||||
- name: Display deployment summary
|
||||
debug:
|
||||
msg: |
|
||||
========================================
|
||||
🎉 SECURE DEPLOYMENT COMPLETED
|
||||
========================================
|
||||
Cluster: {{ ecs_cluster_name }}
|
||||
Service: {{ service_name }}
|
||||
Task Family: {{ task_family }}
|
||||
Image Version: {{ app_version }}
|
||||
Instance IP: {{ ansible_default_ipv4.address }}
|
||||
Health Status: HEALTHY
|
||||
Security: Non-root containers, least privilege
|
||||
Application URL: http://{{ ansible_default_ipv4.address }}:8080
|
||||
========================================
|
||||
tags: [reporting]
|
||||
|
||||
handlers:
|
||||
- name: reload systemd
|
||||
systemd:
|
||||
daemon_reload: yes
|
||||
become: yes
|
||||
become_user: root
|
||||
|
||||
- name: restart docker
|
||||
systemd:
|
||||
name: docker
|
||||
state: restarted
|
||||
become: yes
|
||||
become_user: root
|
||||
|
||||
- name: restart ecs
|
||||
systemd:
|
||||
name: ecs
|
||||
state: restarted
|
||||
daemon_reload: true
|
||||
become: yes
|
||||
become_user: root
|
||||
|
||||
post_tasks:
|
||||
- name: Cleanup temporary files
|
||||
file:
|
||||
path: "{{ item }}"
|
||||
state: absent
|
||||
loop:
|
||||
- "/tmp/task-definition-{{ ansible_date_time.epoch }}.json"
|
||||
delegate_to: localhost
|
||||
# Security: No root required for cleanup
|
||||
become: no
|
||||
tags: [cleanup]
|
||||
|
||||
# Security: Audit log
|
||||
- name: Log deployment action
|
||||
lineinfile:
|
||||
path: /var/log/ecs-deployments.log
|
||||
line: "{{ ansible_date_time.iso8601 }} - Deployment v{{ app_version }} by {{ ansible_user | default('unknown') }} from {{ ansible_env.SSH_CLIENT.split()[0] | default('unknown') }}"
|
||||
create: yes
|
||||
mode: '0644'
|
||||
owner: root
|
||||
group: "{{ ecs_group }}"
|
||||
become: yes
|
||||
become_user: root
|
||||
tags: [audit, security]
|
33
ansible/group_vars/all.yml
Normal file
33
ansible/group_vars/all.yml
Normal file
@@ -0,0 +1,33 @@
|
||||
---
|
||||
# Global variables for all environments
|
||||
# These can be overridden by host-specific variables or command line
|
||||
|
||||
# ECS Configuration
|
||||
ecs_cluster_name: nvhi-atsila-cluster
|
||||
service_name: nvhi-atsila-cluster-service
|
||||
task_family: nvhi-atsila-cluster-task
|
||||
container_name: nvhi-atsila-microservice
|
||||
|
||||
# AWS Configuration
|
||||
aws_region: us-east-2
|
||||
container_port: 8080
|
||||
health_check_path: /health
|
||||
|
||||
# Connection Settings
|
||||
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -o ServerAliveInterval=60'
|
||||
ansible_ssh_retries: 3
|
||||
ansible_timeout: 30
|
||||
|
||||
# Application Settings
|
||||
app_port: 8080
|
||||
health_check_timeout: 10
|
||||
health_check_retries: 10
|
||||
health_check_delay: 15
|
||||
|
||||
# Deployment Settings
|
||||
deployment_timeout: 600
|
||||
service_stabilization_retries: 30
|
||||
service_stabilization_delay: 20
|
||||
|
||||
# Logging
|
||||
log_retention_days: 7
|
@@ -1,2 +1,14 @@
|
||||
[inventory_hosts]
|
||||
# overwritten dynamically by Jenkins with the EC2 public IP
|
||||
# This file will be dynamically generated by Jenkins
|
||||
# Format: hostname ansible_host=IP_ADDRESS ansible_user=USERNAME
|
||||
|
||||
[inventory_hosts:vars]
|
||||
# SSH connection settings
|
||||
ansible_ssh_private_key_file=~/.ssh/id_rsa
|
||||
ansible_ssh_common_args='-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -o ServerAliveInterval=60'
|
||||
ansible_python_interpreter=/usr/bin/python3
|
||||
ansible_connection=ssh
|
||||
ansible_ssh_retries=3
|
||||
|
||||
# AWS configuration
|
||||
aws_region=us-east-2
|
147
ansible/rollback.yml
Normal file
147
ansible/rollback.yml
Normal file
@@ -0,0 +1,147 @@
|
||||
---
|
||||
- name: Rollback ECS Service
|
||||
hosts: localhost
|
||||
connection: local
|
||||
gather_facts: false
|
||||
vars:
|
||||
ecs_cluster_name: "nvhi-atsila-cluster"
|
||||
service_name: "nvhi-atsila-cluster-service"
|
||||
task_family: "nvhi-atsila-cluster-task"
|
||||
aws_region: "us-east-2"
|
||||
|
||||
tasks:
|
||||
- name: Get current service task definition
|
||||
shell: |
|
||||
aws ecs describe-services \
|
||||
--cluster {{ ecs_cluster_name }} \
|
||||
--services {{ service_name }} \
|
||||
--region {{ aws_region }} \
|
||||
--query 'services[0].taskDefinition' \
|
||||
--output text
|
||||
register: current_task_def
|
||||
environment:
|
||||
AWS_DEFAULT_REGION: "{{ aws_region }}"
|
||||
|
||||
- name: Extract current revision number
|
||||
set_fact:
|
||||
current_revision: "{{ current_task_def.stdout.split(':')[-1] | int }}"
|
||||
|
||||
- name: Calculate rollback revision
|
||||
set_fact:
|
||||
rollback_revision: "{{ (current_revision | int) - 1 }}"
|
||||
when: rollback_revision is not defined
|
||||
|
||||
- name: Validate rollback revision
|
||||
fail:
|
||||
msg: "Cannot rollback - target revision {{ rollback_revision }} is invalid (must be >= 1)"
|
||||
when: (rollback_revision | int) < 1
|
||||
|
||||
- name: Display rollback information
|
||||
debug:
|
||||
msg: |
|
||||
=================================
|
||||
ROLLBACK INFORMATION
|
||||
=================================
|
||||
Service: {{ service_name }}
|
||||
Cluster: {{ ecs_cluster_name }}
|
||||
Current Revision: {{ current_revision }}
|
||||
Target Revision: {{ rollback_revision }}
|
||||
=================================
|
||||
|
||||
- name: Confirm rollback (interactive)
|
||||
pause:
|
||||
prompt: |
|
||||
WARNING: You are about to rollback the ECS service!
|
||||
|
||||
Service: {{ service_name }}
|
||||
From: {{ task_family }}:{{ current_revision }}
|
||||
To: {{ task_family }}:{{ rollback_revision }}
|
||||
|
||||
Do you want to continue? (yes/no)
|
||||
register: rollback_confirm
|
||||
when: auto_rollback is not defined
|
||||
|
||||
- name: Set automatic confirmation
|
||||
set_fact:
|
||||
rollback_confirm:
|
||||
user_input: "yes"
|
||||
when: auto_rollback is defined and auto_rollback
|
||||
|
||||
- name: Execute rollback
|
||||
shell: |
|
||||
aws ecs update-service \
|
||||
--cluster {{ ecs_cluster_name }} \
|
||||
--service {{ service_name }} \
|
||||
--task-definition {{ task_family }}:{{ rollback_revision }} \
|
||||
--force-new-deployment \
|
||||
--region {{ aws_region }} \
|
||||
--output json
|
||||
environment:
|
||||
AWS_DEFAULT_REGION: "{{ aws_region }}"
|
||||
when: rollback_confirm.user_input | lower == 'yes'
|
||||
register: rollback_result
|
||||
|
||||
- name: Wait for rollback completion
|
||||
shell: |
|
||||
echo "Waiting for rollback to complete..."
|
||||
count=0
|
||||
while [ $count -lt 20 ]; do
|
||||
service_status=$(aws ecs describe-services \
|
||||
--cluster {{ ecs_cluster_name }} \
|
||||
--services {{ service_name }} \
|
||||
--region {{ aws_region }} \
|
||||
--query 'services[0]' \
|
||||
--output json 2>/dev/null)
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
running=$(echo "$service_status" | jq -r '.runningCount // 0')
|
||||
pending=$(echo "$service_status" | jq -r '.pendingCount // 0')
|
||||
|
||||
echo "Running: $running, Pending: $pending"
|
||||
|
||||
if [ "$running" -ge "1" ] && [ "$pending" -eq "0" ]; then
|
||||
echo "Rollback completed successfully"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Waiting for rollback completion (attempt $((count+1))/20)..."
|
||||
sleep 15
|
||||
count=$((count+1))
|
||||
done
|
||||
|
||||
echo "WARNING: Rollback may not have completed within expected time"
|
||||
exit 1
|
||||
environment:
|
||||
AWS_DEFAULT_REGION: "{{ aws_region }}"
|
||||
when: rollback_confirm.user_input | lower == 'yes'
|
||||
|
||||
- name: Verify rollback status
|
||||
shell: |
|
||||
aws ecs describe-services \
|
||||
--cluster {{ ecs_cluster_name }} \
|
||||
--services {{ service_name }} \
|
||||
--region {{ aws_region }} \
|
||||
--query 'services[0].{TaskDefinition:taskDefinition,RunningCount:runningCount,Status:status}' \
|
||||
--output table
|
||||
environment:
|
||||
AWS_DEFAULT_REGION: "{{ aws_region }}"
|
||||
when: rollback_confirm.user_input | lower == 'yes'
|
||||
register: final_status
|
||||
|
||||
- name: Display rollback results
|
||||
debug:
|
||||
msg: |
|
||||
========================================
|
||||
🔄 ROLLBACK COMPLETED
|
||||
========================================
|
||||
Service: {{ service_name }}
|
||||
Rolled back to: {{ task_family }}:{{ rollback_revision }}
|
||||
Status: Check output above
|
||||
========================================
|
||||
when: rollback_confirm.user_input | lower == 'yes'
|
||||
|
||||
- name: Rollback cancelled
|
||||
debug:
|
||||
msg: "Rollback operation was cancelled by user"
|
||||
when: rollback_confirm.user_input | lower != 'yes'
|
220
ansible/setup-ansible.sh
Normal file
220
ansible/setup-ansible.sh
Normal file
@@ -0,0 +1,220 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Enterprise Ansible Setup and Test Script
|
||||
# This script sets up the Ansible environment and runs tests
|
||||
|
||||
set -e
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Function to print colored output
|
||||
print_status() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
print_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
print_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
# Check if we're in the right directory
|
||||
if [ ! -d "ansible" ]; then
|
||||
print_error "ansible directory not found. Please run this script from your project root."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd ansible
|
||||
|
||||
print_status "Setting up Enterprise Ansible environment..."
|
||||
|
||||
# Create necessary directories
|
||||
print_status "Creating directory structure..."
|
||||
mkdir -p group_vars
|
||||
mkdir -p templates
|
||||
mkdir -p roles
|
||||
mkdir -p inventories/production
|
||||
mkdir -p inventories/staging
|
||||
|
||||
# Install Python dependencies
|
||||
print_status "Installing Python dependencies..."
|
||||
pip3 install --user boto3 botocore jmespath > /dev/null 2>&1 || {
|
||||
print_warning "Could not install Python dependencies. Install manually: pip3 install boto3 botocore jmespath"
|
||||
}
|
||||
|
||||
# Check Ansible installation
|
||||
if ! command -v ansible &> /dev/null; then
|
||||
print_error "Ansible not found. Please install Ansible first:"
|
||||
echo " Ubuntu/Debian: sudo apt update && sudo apt install ansible"
|
||||
echo " RHEL/CentOS: sudo yum install ansible"
|
||||
echo " macOS: brew install ansible"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ANSIBLE_VERSION=$(ansible --version | head -n1)
|
||||
print_success "Found: $ANSIBLE_VERSION"
|
||||
|
||||
# Check AWS CLI
|
||||
if ! command -v aws &> /dev/null; then
|
||||
print_error "AWS CLI not found. Please install AWS CLI first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
AWS_VERSION=$(aws --version)
|
||||
print_success "Found: $AWS_VERSION"
|
||||
|
||||
# Validate configuration files
|
||||
print_status "Validating Ansible configuration files..."
|
||||
|
||||
# Check if main playbook exists
|
||||
if [ ! -f "configure_ecs.yml" ]; then
|
||||
print_error "configure_ecs.yml not found!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Validate YAML syntax
|
||||
if command -v yamllint &> /dev/null; then
|
||||
print_status "Checking YAML syntax..."
|
||||
yamllint configure_ecs.yml || print_warning "YAML syntax issues found (non-critical)"
|
||||
else
|
||||
print_warning "yamllint not found. Install with: pip3 install yamllint"
|
||||
fi
|
||||
|
||||
# Validate Ansible playbook syntax
|
||||
print_status "Validating Ansible playbook syntax..."
|
||||
ansible-playbook configure_ecs.yml --syntax-check || {
|
||||
print_error "Ansible syntax validation failed!"
|
||||
exit 1
|
||||
}
|
||||
print_success "Ansible syntax validation passed"
|
||||
|
||||
# Test functions
|
||||
test_connectivity() {
|
||||
local ip=$1
|
||||
if [ -z "$ip" ]; then
|
||||
print_error "No IP address provided for connectivity test"
|
||||
return 1
|
||||
fi
|
||||
|
||||
print_status "Testing connectivity to $ip..."
|
||||
|
||||
# Test SSH connectivity
|
||||
if timeout 10 bash -c "nc -z $ip 22" &>/dev/null; then
|
||||
print_success "SSH port (22) is reachable"
|
||||
else
|
||||
print_error "SSH port (22) is not reachable"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Test Ansible ping
|
||||
if ansible inventory_hosts -m ping -i hosts &>/dev/null; then
|
||||
print_success "Ansible connectivity test passed"
|
||||
else
|
||||
print_error "Ansible connectivity test failed"
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# Create a test inventory for validation
|
||||
create_test_inventory() {
|
||||
local ip=${1:-"127.0.0.1"}
|
||||
|
||||
print_status "Creating test inventory with IP: $ip"
|
||||
|
||||
cat > hosts_test << EOF
|
||||
[inventory_hosts]
|
||||
test-instance ansible_host=$ip ansible_user=ec2-user
|
||||
|
||||
[inventory_hosts:vars]
|
||||
ansible_ssh_private_key_file=~/.ssh/id_rsa
|
||||
ansible_ssh_common_args='-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10'
|
||||
ansible_python_interpreter=/usr/bin/python3
|
||||
ansible_connection=ssh
|
||||
aws_region=us-east-2
|
||||
EOF
|
||||
}
|
||||
|
||||
# Main execution
|
||||
print_status "Ansible Enterprise Setup Complete!"
|
||||
echo
|
||||
echo "Available operations:"
|
||||
echo " 1. Test connectivity (requires EC2 IP)"
|
||||
echo " 2. Run simple deployment test"
|
||||
echo " 3. Validate all playbooks"
|
||||
echo " 4. Show configuration summary"
|
||||
echo
|
||||
|
||||
# Interactive mode
|
||||
if [ "$1" == "--interactive" ]; then
|
||||
echo -n "Enter operation number (1-4): "
|
||||
read -r operation
|
||||
|
||||
case $operation in
|
||||
1)
|
||||
echo -n "Enter EC2 instance IP: "
|
||||
read -r ec2_ip
|
||||
create_test_inventory "$ec2_ip"
|
||||
if test_connectivity "$ec2_ip"; then
|
||||
print_success "Connectivity test passed!"
|
||||
else
|
||||
print_error "Connectivity test failed!"
|
||||
fi
|
||||
;;
|
||||
2)
|
||||
echo -n "Enter EC2 instance IP: "
|
||||
read -r ec2_ip
|
||||
create_test_inventory "$ec2_ip"
|
||||
print_status "Running simple deployment test..."
|
||||
ansible-playbook simple-deploy.yml -i hosts_test -v
|
||||
;;
|
||||
3)
|
||||
print_status "Validating all playbooks..."
|
||||
for playbook in *.yml; do
|
||||
if [ -f "$playbook" ]; then
|
||||
print_status "Validating $playbook..."
|
||||
ansible-playbook "$playbook" --syntax-check
|
||||
fi
|
||||
done
|
||||
print_success "All playbooks validated!"
|
||||
;;
|
||||
4)
|
||||
print_status "Configuration Summary:"
|
||||
echo " - Working Directory: $(pwd)"
|
||||
echo " - Ansible Version: $(ansible --version | head -n1)"
|
||||
echo " - AWS CLI Version: $(aws --version 2>&1)"
|
||||
echo " - Available Playbooks:"
|
||||
ls -la *.yml 2>/dev/null | awk '{print " - " $9}' || echo " - None found"
|
||||
echo " - Python Dependencies:"
|
||||
python3 -c "import boto3, botocore; print(' - boto3: ' + boto3.__version__); print(' - botocore: ' + botocore.__version__)" 2>/dev/null || echo " - Not installed"
|
||||
;;
|
||||
*)
|
||||
print_error "Invalid operation number"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
if [ -f "hosts_test" ]; then
|
||||
rm -f hosts_test
|
||||
fi
|
||||
|
||||
print_success "Setup script completed!"
|
||||
echo
|
||||
echo "Next steps:"
|
||||
echo " 1. Update your Jenkins pipeline with the new Ansible integration"
|
||||
echo " 2. Test with: ./setup-ansible.sh --interactive"
|
||||
echo " 3. Run deployment: ansible-playbook configure_ecs.yml -i hosts -v"
|
||||
echo
|
109
ansible/simple-deploy.yml
Normal file
109
ansible/simple-deploy.yml
Normal file
@@ -0,0 +1,109 @@
|
||||
---
|
||||
- name: Simple ECS Configuration Test
|
||||
hosts: inventory_hosts
|
||||
become: yes
|
||||
gather_facts: yes
|
||||
vars:
|
||||
ecs_cluster_name: "nvhi-atsila-cluster"
|
||||
|
||||
tasks:
|
||||
- name: Test connectivity
|
||||
ping:
|
||||
tags: [test]
|
||||
|
||||
- name: Check system information
|
||||
debug:
|
||||
msg: |
|
||||
System: {{ ansible_distribution }} {{ ansible_distribution_version }}
|
||||
Hostname: {{ ansible_hostname }}
|
||||
IP: {{ ansible_default_ipv4.address }}
|
||||
tags: [info]
|
||||
|
||||
- name: Update system packages
|
||||
yum:
|
||||
name: '*'
|
||||
state: latest
|
||||
update_cache: yes
|
||||
async: 300
|
||||
poll: 0
|
||||
register: yum_update
|
||||
tags: [packages]
|
||||
|
||||
- name: Wait for package update
|
||||
async_status:
|
||||
jid: "{{ yum_update.ansible_job_id }}"
|
||||
register: update_result
|
||||
until: update_result.finished
|
||||
retries: 30
|
||||
delay: 10
|
||||
tags: [packages]
|
||||
|
||||
- name: Install Docker and ECS components
|
||||
yum:
|
||||
name:
|
||||
- docker
|
||||
- ecs-init
|
||||
- curl
|
||||
- jq
|
||||
state: present
|
||||
tags: [install]
|
||||
|
||||
- name: Start Docker service
|
||||
systemd:
|
||||
name: docker
|
||||
state: started
|
||||
enabled: true
|
||||
daemon_reload: true
|
||||
tags: [services]
|
||||
|
||||
- name: Verify Docker is working
|
||||
command: docker --version
|
||||
register: docker_version
|
||||
changed_when: false
|
||||
tags: [verify]
|
||||
|
||||
- name: Create ECS configuration directory
|
||||
file:
|
||||
path: /etc/ecs
|
||||
state: directory
|
||||
mode: '0755'
|
||||
tags: [config]
|
||||
|
||||
- name: Write ECS configuration
|
||||
copy:
|
||||
dest: /etc/ecs/ecs.config
|
||||
content: |
|
||||
ECS_CLUSTER={{ ecs_cluster_name }}
|
||||
ECS_AVAILABLE_LOGGING_DRIVERS=["json-file","awslogs"]
|
||||
ECS_ENABLE_TASK_IAM_ROLE=true
|
||||
mode: '0644'
|
||||
backup: yes
|
||||
notify: restart ecs
|
||||
tags: [config]
|
||||
|
||||
- name: Start ECS agent
|
||||
systemd:
|
||||
name: ecs
|
||||
state: started
|
||||
enabled: true
|
||||
daemon_reload: true
|
||||
tags: [services]
|
||||
|
||||
- name: Display configuration summary
|
||||
debug:
|
||||
msg: |
|
||||
========================================
|
||||
✅ SIMPLE CONFIGURATION COMPLETED
|
||||
========================================
|
||||
Docker Version: {{ docker_version.stdout }}
|
||||
ECS Cluster: {{ ecs_cluster_name }}
|
||||
Instance IP: {{ ansible_default_ipv4.address }}
|
||||
========================================
|
||||
tags: [summary]
|
||||
|
||||
handlers:
|
||||
- name: restart ecs
|
||||
systemd:
|
||||
name: ecs
|
||||
state: restarted
|
||||
daemon_reload: true
|
Reference in New Issue
Block a user