automated terminal push

This commit is contained in:
lenape
2025-07-16 01:11:58 +00:00
parent 727b3af1a1
commit 2d3302c588
8 changed files with 1327 additions and 252 deletions

565
Jenkinsfile vendored
View File

@@ -49,6 +49,12 @@ pipeline {
// Enterprise settings
TF_IN_AUTOMATION = 'true'
TF_INPUT = 'false'
// Ansible configuration
ANSIBLE_HOST_KEY_CHECKING = 'False'
// Fix: Use relative path without leading slash
ANSIBLE_CONFIG = './ansible/ansible.cfg'
// Fix: Define log group as variable to avoid shell interpolation issues
ECS_LOG_GROUP = "/ecs/nvhi-atsila-cluster"
}
stages {
@@ -165,7 +171,7 @@ pipeline {
echo " • Commit: ${gitCommit.take(8)}"
echo " • Author: ${gitAuthor}"
echo " • Container Registry: ECR (AWS-native, secure)"
echo " • Architecture: SSM-based ECS access (secure, keyless)"
echo " • Architecture: Ansible-based deployment (enterprise security)"
echo " • Security Model: Principle of Least Privilege"
echo " • Timestamp: ${new Date()}"
echo "🔄 DEPLOYMENT TYPE CONFIRMATION: ${env.DEPLOYMENT_TYPE}"
@@ -178,7 +184,7 @@ pipeline {
"git_author": "${gitAuthor}",
"infrastructure_files_changed": "${infrastructureFiles}",
"container_registry": "ECR",
"architecture": "ssm_based_ecs_access",
"architecture": "ansible_based_deployment",
"security_model": "principle_of_least_privilege",
"timestamp": "${new Date()}"
}"""
@@ -379,7 +385,7 @@ pipeline {
echo "🔍 DEPLOYMENT: Force parameter = ${params.FORCE_INFRASTRUCTURE_DEPLOY}"
echo "🔍 DEPLOYMENT: Deployment type = ${env.DEPLOYMENT_TYPE}"
echo "🚨 SECURITY NOTICE: Infrastructure deployment requested"
echo "🏗️ ARCHITECTURE: Deploying ECS Cluster with SSM access (secure, keyless)"
echo "🏗️ ARCHITECTURE: Deploying ECS Cluster with Ansible-based deployment (enterprise security)"
echo "🔐 In production: This would require infrastructure-admin role"
echo "🚀 Attempting infrastructure deployment..."
@@ -473,197 +479,270 @@ pipeline {
}
}
stage('Configure & Deploy Application') {
stage('Configure & Deploy Application with Ansible') {
when {
not { expression { env.DEPLOYMENT_TYPE == "DESTROY" } }
}
parallel {
stage('Configure EC2 Instance via SSM') {
when {
expression {
def hasInstances = false
try {
def instanceId = sh(
script: """
cd terraform && terraform output -raw ecs_instance_id 2>/dev/null || echo ''
""",
returnStdout: true
).trim()
hasInstances = (instanceId != "" && instanceId != "null")
} catch (Exception e) {
echo "⚠️ No instances to configure: ${e.getMessage()}"
}
return hasInstances
}
steps {
script {
echo "🚀 ENTERPRISE: Deploying with Ansible (replacing SSM approach)"
// Get infrastructure details from Terraform
def instanceId = ""
def publicIp = ""
def executionRoleArn = ""
try {
instanceId = sh(
script: "cd terraform && terraform output -raw ecs_instance_id",
returnStdout: true
).trim()
publicIp = sh(
script: "cd terraform && terraform output -raw ecs_instance_public_ip",
returnStdout: true
).trim()
executionRoleArn = sh(
script: "cd terraform && terraform output -raw ecs_task_execution_role_arn",
returnStdout: true
).trim()
echo "📍 Target Instance: ${instanceId} (${publicIp})"
echo "🔧 Execution Role: ${executionRoleArn}"
} catch (Exception e) {
echo "⚠️ Could not get all Terraform outputs: ${e.getMessage()}"
echo "⚠️ Some outputs may be missing, continuing with available data..."
}
steps {
withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: env.AWS_CRED_ID]]) {
script {
echo "🔧 ENTERPRISE: Configuring EC2 instance via SSM (no SSH required)"
def instanceId = ""
def ec2_ip = ""
try {
sh "test -d terraform || (echo 'Terraform directory not found' && exit 1)"
instanceId = sh(
script: """
cd terraform && terraform output -raw ecs_instance_id
""",
returnStdout: true
).trim()
ec2_ip = sh(
script: """
cd terraform && terraform output -raw ecs_instance_public_ip
""",
returnStdout: true
).trim()
} catch (Exception e) {
echo "⚠️ Could not get instance details: ${e.getMessage()}"
echo "⚠️ Skipping SSM configuration - no instances available"
return
// Create Ansible working directory and files
sh "mkdir -p ansible/group_vars"
// Fix: Create inventory with safer path handling
def inventoryContent = """[inventory_hosts]
ec2-instance ansible_host=${publicIp} ansible_user=ec2-user
[inventory_hosts:vars]
ansible_ssh_private_key_file=~/.ssh/id_rsa
ansible_ssh_common_args='-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -o ServerAliveInterval=60'
ansible_python_interpreter=/usr/bin/python3
ansible_connection=ssh
ansible_ssh_retries=3
aws_region=${AWS_REGION}
"""
writeFile file: 'ansible/hosts', text: inventoryContent
// Fix: Create Ansible config with safer paths
def ansibleConfig = """[defaults]
inventory = hosts
host_key_checking = False
retry_files_enabled = False
gathering = smart
stdout_callback = yaml
timeout = 30
log_path = ./ansible.log
[ssh_connection]
ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ConnectTimeout=10
pipelining = True
"""
writeFile file: 'ansible/ansible.cfg', text: ansibleConfig
// Fix: Create group variables with safer variable handling
def groupVarsContent = """---
ecs_cluster_name: ${TF_VAR_cluster_name}
service_name: ${TF_VAR_cluster_name}-service
task_family: ${TF_VAR_cluster_name}-task
container_name: ${ECR_REPO}
aws_region: ${AWS_REGION}
container_port: 8080
"""
writeFile file: 'ansible/group_vars/all.yml', text: groupVarsContent
// Test connectivity and execute deployment
withCredentials([
[$class: 'AmazonWebServicesCredentialsBinding',
credentialsId: env.AWS_CRED_ID,
accessKeyVariable: 'AWS_ACCESS_KEY_ID',
secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']
]) {
// Fix: Use safer shell command construction
sh """
cd ansible
# Set environment variables
export AWS_DEFAULT_REGION="${AWS_REGION}"
export ANSIBLE_HOST_KEY_CHECKING=False
export ANSIBLE_CONFIG="./ansible.cfg"
# Wait for SSH connectivity
echo "🔍 Testing SSH connectivity to ${publicIp}..."
timeout 120 bash -c 'while ! nc -z ${publicIp} 22; do echo "Waiting for SSH..."; sleep 5; done'
# Install Python dependencies if needed
pip3 install --user boto3 botocore jq > /dev/null 2>&1 || true
# Test Ansible connectivity
echo "🔍 Testing Ansible connectivity..."
ansible inventory_hosts -m ping -i hosts -v
if [ \$? -ne 0 ]; then
echo "❌ Ansible connectivity failed"
echo "Debugging SSH connection..."
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 ec2-user@${publicIp} 'echo "SSH test successful"' || {
echo "SSH connection failed"
exit 1
}
echo "📍 Target Instance: ${instanceId} (${ec2_ip})"
echo "⏳ Waiting for SSM agent to be ready..."
timeout(time: 10, unit: 'MINUTES') {
waitUntil {
def ssmStatus = sh(
script: """
aws ssm describe-instance-information --filters "Key=InstanceIds,Values=${instanceId}" --region ${AWS_REGION} --query 'InstanceInformationList[0].PingStatus' --output text 2>/dev/null || echo 'Offline'
""",
returnStdout: true
).trim()
if (ssmStatus == "Online") {
echo "✅ SSM agent is online"
return true
} else {
echo "⏳ SSM agent status: ${ssmStatus}, waiting..."
sleep(30)
return false
}
exit 1
fi
echo "✅ Connectivity test passed"
# Execute main deployment playbook
echo "🚀 Starting deployment..."
ansible-playbook configure_ecs.yml \\
-i hosts \\
-e "app_version=${IMAGE_TAG}" \\
-e "aws_account_id=${AWS_ACCOUNT_ID}" \\
-e "aws_region=${AWS_REGION}" \\
-e "task_execution_role_arn=${executionRoleArn}" \\
--timeout 600 \\
-v
"""
}
// Final verification
echo "🔍 Running final verification..."
sh """
echo "Testing application endpoint..."
for i in {1..10}; do
if curl -f -s "http://${publicIp}:8080/health"; then
echo "✅ Application health check passed"
break
else
echo "⏳ Health check attempt \$i/10..."
sleep 10
fi
done
"""
}
}
post {
success {
script {
def publicIp = sh(
script: "cd terraform && terraform output -raw ecs_instance_public_ip",
returnStdout: true
).trim()
echo """
========================================
🎉 DEPLOYMENT SUCCESSFUL!
========================================
Application URL: http://${publicIp}:8080
Health Endpoint: http://${publicIp}:8080/health
Version: ${IMAGE_TAG}
Deployment Method: Ansible (Enterprise Security)
========================================
"""
}
// Archive deployment artifacts
archiveArtifacts artifacts: 'ansible/ansible.log', allowEmptyArchive: true
}
failure {
echo "❌ DEPLOYMENT FAILED - Gathering debug information..."
script {
// Fix: Use environment variable for log group to avoid shell interpolation issues
sh """
echo "=== ANSIBLE DEBUG INFORMATION ==="
cat ansible/ansible.log 2>/dev/null || echo "No Ansible log available"
echo "=== ECS SERVICE STATUS ==="
aws ecs describe-services \\
--cluster "${TF_VAR_cluster_name}" \\
--services "${TF_VAR_cluster_name}-service" \\
--region "${AWS_REGION}" \\
--query 'services[0].{Status:status,Running:runningCount,Pending:pendingCount,Events:events[0:3]}' \\
--output json 2>/dev/null || echo "Could not get ECS service status"
echo "=== ECS CLUSTER STATUS ==="
aws ecs describe-clusters \\
--clusters "${TF_VAR_cluster_name}" \\
--region "${AWS_REGION}" \\
--query 'clusters[0].{Status:status,ActiveInstances:activeContainerInstancesCount,Tasks:runningTasksCount}' \\
--output json 2>/dev/null || echo "Could not get ECS cluster status"
echo "=== RECENT CONTAINER LOGS ==="
# Fix: Use environment variable for log group name
LATEST_STREAM=\$(aws logs describe-log-streams \\
--log-group-name "${ECS_LOG_GROUP}" \\
--region "${AWS_REGION}" \\
--order-by LastEventTime \\
--descending \\
--max-items 1 \\
--query 'logStreams[0].logStreamName' \\
--output text 2>/dev/null)
if [ "\$LATEST_STREAM" != "None" ] && [ "\$LATEST_STREAM" != "" ]; then
echo "Latest log stream: \$LATEST_STREAM"
aws logs get-log-events \\
--log-group-name "${ECS_LOG_GROUP}" \\
--log-stream-name "\$LATEST_STREAM" \\
--region "${AWS_REGION}" \\
--start-from-head \\
--query 'events[-20:].[timestamp,message]' \\
--output table 2>/dev/null || echo "Could not retrieve logs"
else
echo "No log streams found"
fi
"""
}
// Offer rollback option
script {
try {
timeout(time: 5, unit: 'MINUTES') {
def rollbackChoice = input(
message: 'Deployment failed. Would you like to rollback to the previous version?',
parameters: [
choice(choices: ['No', 'Yes'], description: 'Rollback?', name: 'ROLLBACK')
]
)
if (rollbackChoice == 'Yes') {
echo "🔄 Initiating automatic rollback..."
withCredentials([
[$class: 'AmazonWebServicesCredentialsBinding',
credentialsId: env.AWS_CRED_ID,
accessKeyVariable: 'AWS_ACCESS_KEY_ID',
secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']
]) {
sh """
cd ansible
ansible-playbook rollback.yml \\
-e auto_rollback=true \\
-v
"""
}
}
echo "🔧 Running configuration commands via SSM..."
sh """
# Install or update Docker if needed
aws ssm send-command \\
--instance-ids ${instanceId} \\
--document-name "AWS-RunShellScript" \\
--parameters 'commands=["sudo yum update -y && sudo yum install -y docker && sudo systemctl start docker && sudo systemctl enable docker"]' \\
--region ${AWS_REGION} \\
--comment "Installing Docker on ECS instance"
# Wait for command to complete
sleep 60
# Configure ECS agent
aws ssm send-command \\
--instance-ids ${instanceId} \\
--document-name "AWS-RunShellScript" \\
--parameters 'commands=["echo ECS_CLUSTER=${TF_VAR_cluster_name} | sudo tee -a /etc/ecs/ecs.config","sudo systemctl restart ecs"]' \\
--region ${AWS_REGION} \\
--comment "Configuring ECS agent"
"""
echo "✅ ENTERPRISE: EC2 instance configured via SSM"
}
} catch (Exception e) {
echo "Rollback prompt timed out or was cancelled"
}
}
}
stage('Deploy ECS Service') {
steps {
withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: env.AWS_CRED_ID]]) {
script {
echo "🚀 DEPLOYMENT: Deploying application to ECS cluster"
// Create task definition
def executionRoleArn = ""
try {
executionRoleArn = sh(
script: 'cd terraform && terraform output -raw ecs_task_execution_role_arn',
returnStdout: true
).trim()
} catch (Exception e) {
echo "⚠️ Could not get execution role ARN: ${e.getMessage()}"
echo "⚠️ Task definition will be created without execution role"
}
def taskDefinition = """
{
"family": "${TF_VAR_cluster_name}-task",
"networkMode": "bridge",
"requiresCompatibilities": ["EC2"],
"memory": "512",
"cpu": "256"${executionRoleArn ? ",\n \"executionRoleArn\": \"${executionRoleArn}\"" : ""},
"containerDefinitions": [
{
"name": "${ECR_REPO}",
"image": "${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPO}:${IMAGE_TAG}",
"memory": 512,
"cpu": 256,
"essential": true,
"portMappings": [
{
"containerPort": 8080,
"hostPort": 8080,
"protocol": "tcp"
}
],
"healthCheck": {
"command": ["CMD-SHELL", "curl -f http://localhost:8080/health || exit 1"],
"interval": 30,
"timeout": 5,
"retries": 3,
"startPeriod": 60
},
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-group": "/ecs/${TF_VAR_cluster_name}",
"awslogs-region": "${AWS_REGION}",
"awslogs-stream-prefix": "ecs"
}
}
}
]
}
"""
writeFile file: 'task-definition.json', text: taskDefinition
sh """
# Create CloudWatch log group if it doesn't exist
aws logs create-log-group --log-group-name /ecs/${TF_VAR_cluster_name} --region ${AWS_REGION} || echo "Log group already exists"
# Register task definition
aws ecs register-task-definition \\
--cli-input-json file://task-definition.json \\
--region ${AWS_REGION}
# Check if service exists
if aws ecs describe-services --cluster ${TF_VAR_cluster_name} --services ${TF_VAR_cluster_name}-service --region ${AWS_REGION} --query 'services[0].status' --output text 2>/dev/null | grep -q 'ACTIVE'; then
echo "✅ Service exists, updating..."
aws ecs update-service \\
--cluster ${TF_VAR_cluster_name} \\
--service ${TF_VAR_cluster_name}-service \\
--task-definition ${TF_VAR_cluster_name}-task \\
--desired-count 1 \\
--force-new-deployment \\
--region ${AWS_REGION}
else
echo "✅ Creating new service..."
aws ecs create-service \\
--cluster ${TF_VAR_cluster_name} \\
--service-name ${TF_VAR_cluster_name}-service \\
--task-definition ${TF_VAR_cluster_name}-task \\
--desired-count 1 \\
--region ${AWS_REGION}
fi
"""
echo "✅ DEPLOYMENT: ECS service deployment initiated"
}
}
}
always {
// Cleanup temporary files
sh """
rm -f ansible/hosts 2>/dev/null || true
rm -f ansible/ansible.cfg 2>/dev/null || true
rm -f ansible/group_vars/all.yml 2>/dev/null || true
"""
}
}
}
@@ -675,64 +754,76 @@ pipeline {
steps {
withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: env.AWS_CRED_ID]]) {
script {
echo "🔍 VERIFICATION: Checking deployment status..."
echo "🔍 VERIFICATION: Running comprehensive validation..."
timeout(time: 15, unit: 'MINUTES') {
waitUntil {
def serviceStatus = sh(
script: """
aws ecs describe-services \\
--cluster ${TF_VAR_cluster_name} \\
--services ${TF_VAR_cluster_name}-service \\
--region ${AWS_REGION} \\
--query 'services[0].deployments[0].status' \\
--output text 2>/dev/null || echo 'UNKNOWN'
""",
returnStdout: true
).trim()
def runningCount = sh(
script: """
aws ecs describe-services \\
--cluster ${TF_VAR_cluster_name} \\
--services ${TF_VAR_cluster_name}-service \\
--region ${AWS_REGION} \\
--query 'services[0].runningCount' \\
--output text 2>/dev/null || echo '0'
""",
returnStdout: true
).trim()
echo "Service Status: ${serviceStatus}, Running Tasks: ${runningCount}"
if (serviceStatus == "STEADY" && runningCount.toInteger() > 0) {
echo "✅ Service deployment completed successfully"
return true
} else {
echo "⏳ Waiting for service to stabilize..."
sleep(30)
return false
}
}
}
def publicIp = sh(
script: "cd terraform && terraform output -raw ecs_instance_public_ip",
returnStdout: true
).trim()
// Get application URL
def appUrl = ""
try {
appUrl = sh(
script: """
cd terraform && terraform output -raw ecs_instance_public_ip 2>/dev/null || echo 'unavailable'
""",
returnStdout: true
).trim()
// Fix: Use safer URL construction and environment variables
sh """
echo "=== APPLICATION HEALTH CHECK ==="
curl -f -v "http://${publicIp}:8080/health"
if (appUrl != "unavailable" && appUrl != "") {
echo "🌐 APPLICATION URL: http://${appUrl}:8080"
currentBuild.description = "${currentBuild.description} | URL: http://${appUrl}:8080"
}
} catch (Exception e) {
echo "⚠️ Could not determine application URL: ${e.getMessage()}"
}
echo "=== ECS SERVICE VALIDATION ==="
aws ecs describe-services \\
--cluster "${TF_VAR_cluster_name}" \\
--services "${TF_VAR_cluster_name}-service" \\
--region "${AWS_REGION}" \\
--query 'services[0].{Status:status,TaskDefinition:taskDefinition,Running:runningCount,Desired:desiredCount}' \\
--output table
echo "=== CONTAINER HEALTH CHECK ==="
# Check if containers are healthy
RUNNING_TASKS=\$(aws ecs list-tasks \\
--cluster "${TF_VAR_cluster_name}" \\
--service-name "${TF_VAR_cluster_name}-service" \\
--desired-status RUNNING \\
--region "${AWS_REGION}" \\
--query 'taskArns' \\
--output text)
if [ -n "\$RUNNING_TASKS" ]; then
aws ecs describe-tasks \\
--cluster "${TF_VAR_cluster_name}" \\
--tasks \$RUNNING_TASKS \\
--region "${AWS_REGION}" \\
--query 'tasks[0].containers[0].{Name:name,Status:lastStatus,Health:healthStatus}' \\
--output table
fi
echo "=== LOG VALIDATION ==="
# Check for any errors in recent logs
LATEST_STREAM=\$(aws logs describe-log-streams \\
--log-group-name "${ECS_LOG_GROUP}" \\
--region "${AWS_REGION}" \\
--order-by LastEventTime \\
--descending \\
--max-items 1 \\
--query 'logStreams[0].logStreamName' \\
--output text 2>/dev/null)
if [ "\$LATEST_STREAM" != "None" ] && [ "\$LATEST_STREAM" != "" ]; then
ERROR_COUNT=\$(aws logs get-log-events \\
--log-group-name "${ECS_LOG_GROUP}" \\
--log-stream-name "\$LATEST_STREAM" \\
--region "${AWS_REGION}" \\
--query 'events[?contains(message, \`ERROR\`) || contains(message, \`FATAL\`) || contains(message, \`Exception\`)].message' \\
--output text | wc -l)
if [ "\$ERROR_COUNT" -gt 0 ]; then
echo "⚠️ Found \$ERROR_COUNT potential errors in logs - please review"
else
echo "✅ No errors found in recent application logs"
fi
fi
echo "✅ All validation checks completed successfully"
"""
// Update build description with URL
currentBuild.description = "${currentBuild.description} | URL: http://${publicIp}:8080"
echo "✅ VERIFICATION: Deployment verification completed"
}