diff --git a/Jenkinsfile b/Jenkinsfile index ad08c4e..9bd8c1c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -299,6 +299,8 @@ pipeline { if (serviceExists == "false") { echo "🚨 SECURITY NOTICE: ECS service not found - forcing infrastructure deployment" echo " This is normal for first deployment or after infrastructure cleanup" + + // FIXED: Properly set the environment variable env.DEPLOYMENT_TYPE = "INFRASTRUCTURE" currentBuild.description = "INFRASTRUCTURE (auto-detected) | ${env.IMAGE_TAG}" } @@ -359,7 +361,6 @@ pipeline { script { def ec2_ip = "" try { - // Ensure terraform directory exists and get output sh "test -d terraform || (echo 'Terraform directory not found' && exit 1)" ec2_ip = sh( script: "cd terraform && terraform output -raw ecs_instance_public_ip", @@ -370,20 +371,258 @@ pipeline { ec2_ip = "unknown" } - echo "🔧 SECURITY: Configuring EC2 instance for ECS agent: ${ec2_ip}" - echo "🔐 ARCHITECTURE: Using Ansible to optimize EC2 for ECS workloads" + echo "🔧 ENTERPRISE: Configuring EC2 instance for ECS agent: ${ec2_ip}" + echo "🔐 ARCHITECTURE: Using Jenkins credential store for AWS key pair" - writeFile file: 'ansible/hosts', text: "[inventory_hosts]\n${ec2_ip} ansible_user=ec2-user" + if (ec2_ip != "unknown") { + + // STEP 1: ENTERPRISE INSTANCE READINESS CHECK + echo "🔍 ENTERPRISE: Validating EC2 instance readiness..." + + timeout(time: 15, unit: 'MINUTES') { + waitUntil { + script { + // Test network connectivity + def pingResult = sh( + script: "ping -c 1 -W 5 ${ec2_ip} >/dev/null 2>&1 && echo 'ping_ok' || echo 'ping_failed'", + returnStdout: true + ).trim() + + if (pingResult != "ping_ok") { + echo "⏳ ENTERPRISE: Instance not responding to ping, waiting..." + sleep(20) + return false + } + + // Test SSH port availability + def sshResult = sh( + script: "nc -z -w5 ${ec2_ip} 22 >/dev/null 2>&1 && echo 'ssh_ready' || echo 'ssh_not_ready'", + returnStdout: true + ).trim() + + if (sshResult != "ssh_ready") { + echo "⏳ ENTERPRISE: SSH service not ready, waiting..." + sleep(20) + return false + } + + echo "✅ ENTERPRISE: Instance is ready for SSH connection" + return true + } + } + } + + // STEP 2: ENTERPRISE SSH AUTHENTICATION TEST + echo "🔐 ENTERPRISE: Testing SSH authentication with Jenkins credentials..." + + def authSuccessful = false + + timeout(time: 5, unit: 'MINUTES') { + waitUntil { + script { + try { + withCredentials([sshUserPrivateKey(credentialsId: 'jenkins-ssh', keyFileVariable: 'SSH_KEY', usernameVariable: 'SSH_USER')]) { + def authTest = sh( + script: """ + ssh -o ConnectTimeout=10 \\ + -o StrictHostKeyChecking=no \\ + -o UserKnownHostsFile=/dev/null \\ + -o BatchMode=yes \\ + -i \${SSH_KEY} \\ + ec2-user@${ec2_ip} \\ + 'echo "ENTERPRISE_AUTH_SUCCESS"' 2>/dev/null || echo "auth_failed" + """, + returnStdout: true + ).trim() + + if (authTest.contains("ENTERPRISE_AUTH_SUCCESS")) { + echo "✅ ENTERPRISE: SSH authentication successful with Jenkins credentials" + authSuccessful = true + return true + } else { + echo "⏳ ENTERPRISE: SSH authentication not ready, retrying..." + sleep(15) + return false + } + } + } catch (Exception e) { + echo "⏳ ENTERPRISE: SSH test failed, retrying... (${e.getMessage()})" + sleep(15) + return false + } + } + } + } + + if (authSuccessful) { + + // STEP 3: ENTERPRISE CONFIGURATION DEPLOYMENT - SIMPLIFIED APPROACH + echo "🎯 ENTERPRISE: Deploying ECS configuration via direct SSH (most reliable)..." + + // Skip Ansible entirely - use direct SSH which is more reliable + withCredentials([sshUserPrivateKey(credentialsId: 'jenkins-ssh', keyFileVariable: 'SSH_KEY')]) { + sh """ + ssh -o StrictHostKeyChecking=no \\ + -o UserKnownHostsFile=/dev/null \\ + -o ConnectTimeout=30 \\ + -i \${SSH_KEY} \\ + ec2-user@${ec2_ip} \\ + ' + set -e + echo "=== ENTERPRISE ECS CONFIGURATION STARTING ===" + echo "Target: \$(hostname)" + echo "Cluster: ${TF_VAR_cluster_name}" + echo "Time: \$(date)" + + # Update system packages + echo "📦 Updating system packages..." + sudo yum update -y + + # Install Docker (may already be installed) + echo "🐳 Installing Docker..." + sudo yum install -y docker || echo "Docker already installed" + + # Install ECS initialization + echo "🚀 Installing ECS initialization..." + sudo yum install -y ecs-init + + # Configure ECS cluster settings + echo "⚙️ Configuring ECS cluster settings..." + sudo tee /etc/ecs/ecs.config << EOF +ECS_CLUSTER=${TF_VAR_cluster_name} +ECS_ENABLE_CONTAINER_METADATA=true +ECS_ENABLE_TASK_IAM_ROLE=true +ECS_ENABLE_SPOT_INSTANCE_DRAINING=true +ECS_CONTAINER_STOP_TIMEOUT=30s +ECS_CONTAINER_START_TIMEOUT=3m +ECS_DISABLE_IMAGE_CLEANUP=false +ECS_AVAILABLE_LOGGING_DRIVERS=["json-file","awslogs"] +EOF + + # Start required services + echo "🚀 Starting Docker and ECS services..." + sudo service docker start + sudo start ecs + + # Enable services for auto-start + echo "🔧 Enabling services for auto-start..." + sudo chkconfig docker on + sudo chkconfig ecs on + + # Verify services are running + echo "✅ Verifying service status..." + sudo service docker status + sudo service ecs status + + echo "=== ENTERPRISE ECS CONFIGURATION COMPLETED ===" + echo "Instance ready for ECS workloads" + ' + """ + } + echo "✅ ENTERPRISE: Direct SSH configuration completed successfully" + + // STEP 4: POST-CONFIGURATION VALIDATION + echo "🔍 ENTERPRISE: Performing post-configuration validation..." + + withCredentials([sshUserPrivateKey(credentialsId: 'jenkins-ssh', keyFileVariable: 'SSH_KEY')]) { + sh """ + ssh -o StrictHostKeyChecking=no \\ + -o ConnectTimeout=10 \\ + -i \${SSH_KEY} \\ + ec2-user@${ec2_ip} \\ + ' + echo "=== ENTERPRISE VALIDATION REPORT ===" + echo "Instance: \$(hostname)" + echo "Date: \$(date)" + echo "" + + echo "Docker Service Status:" + sudo service docker status || echo "Docker service check failed" + echo "" + + echo "ECS Service Status:" + sudo service ecs status || echo "ECS service check failed" + echo "" + + echo "ECS Configuration:" + cat /etc/ecs/ecs.config || echo "ECS config file not found" + echo "" + + echo "ECS Agent Metadata (if available):" + timeout 10 curl -s http://localhost:51678/v1/metadata 2>/dev/null | head -10 || echo "ECS metadata not yet available" + echo "" + + echo "=== VALIDATION COMPLETED ===" + ' + """ + } + + echo "✅ ENTERPRISE: Configuration and validation completed successfully" + + } else { + // ENTERPRISE DIAGNOSTICS FOR FAILED AUTHENTICATION + echo "❌ ENTERPRISE: SSH authentication failed - collecting diagnostics..." + + sh """ + echo "=== ENTERPRISE DIAGNOSTIC REPORT ===" + echo "Authentication Method: Jenkins Credential Store" + echo "Credential ID: jenkins-ssh" + echo "Target Instance: ${ec2_ip}" + echo "Expected User: ec2-user" + echo "Jenkins Server: \$(hostname)" + echo "Time: \$(date)" + echo "" + + echo "=== Network Connectivity Tests ===" + echo "Ping Test:" + ping -c 3 ${ec2_ip} || echo "Ping failed" + echo "" + + echo "SSH Port Test:" + nc -z -v ${ec2_ip} 22 || echo "SSH port not accessible" + echo "" + + echo "=== AWS Instance Information ===" + aws ec2 describe-instances \\ + --filters "Name=ip-address,Values=${ec2_ip}" \\ + --query 'Reservations[*].Instances[*].[InstanceId,State.Name,KeyName,LaunchTime]' \\ + --output table 2>/dev/null || echo "Could not retrieve instance information" + echo "" + + echo "=== Security Group Analysis ===" + INSTANCE_SG=\$(aws ec2 describe-instances \\ + --filters "Name=ip-address,Values=${ec2_ip}" \\ + --query 'Reservations[*].Instances[*].SecurityGroups[0].GroupId' \\ + --output text 2>/dev/null || echo "unknown") + + if [ "\$INSTANCE_SG" != "unknown" ]; then + echo "Instance Security Group: \$INSTANCE_SG" + aws ec2 describe-security-groups \\ + --group-ids \$INSTANCE_SG \\ + --query 'SecurityGroups[*].IpPermissions[?FromPort==\`22\`]' \\ + --output table 2>/dev/null || echo "Could not retrieve security group rules" + else + echo "Could not determine instance security group" + fi + echo "" + + echo "=== TROUBLESHOOTING RECOMMENDATIONS ===" + echo "1. Verify Jenkins credential 'jenkins-ssh' contains correct private key" + echo "2. Confirm AWS key pair 'nvhi-atsila-deployer' matches Jenkins credential" + echo "3. Check security group allows SSH (port 22) from Jenkins server IP: 38.110.1.139" + echo "4. Ensure EC2 instance has completed initialization" + echo "5. Verify IAM permissions for EC2 operations" + + echo "=== END DIAGNOSTIC REPORT ===" + """ + + throw new Exception("ENTERPRISE: SSH authentication failed - see diagnostic report for troubleshooting") + } + + } else { + throw new Exception("ENTERPRISE: Cannot proceed - EC2 instance IP address not available") + } } - - // Secure Ansible configuration - ansiblePlaybook( - playbook: 'ansible/configure_ecs.yml', - inventory: 'ansible/hosts', - credentialsId: env.SSH_CRED_ID - ) - - echo "✅ SECURITY: EC2 optimally configured for ECS with Ansible" } } @@ -436,31 +675,45 @@ pipeline { --network-mode bridge \\ --container-definitions file://task-definition.json \\ --region ${AWS_REGION} - - # Perform zero-downtime rolling deployment with service check - echo "🔄 Performing secure zero-downtime deployment..." - - # Wait for service to be active if it was just created - echo "⏳ Ensuring ECS service is ready for deployment..." - timeout=300 - while [ \$timeout -gt 0 ]; do - if aws ecs describe-services --cluster ${TF_VAR_cluster_name} --services ${TF_VAR_cluster_name}-service --region ${AWS_REGION} 2>/dev/null | grep -q "ACTIVE"; then - echo "✅ ECS service is active and ready" - break + """ + + // FIXED: Check if service exists and create/update accordingly + def serviceExists = sh( + script: ''' + if aws ecs describe-services --cluster nvhi-atsila-cluster --services nvhi-atsila-cluster-service --region us-east-2 2>/dev/null | grep -q "ACTIVE"; then + echo "true" else - echo "⏳ Waiting for ECS service to become active..." - sleep 10 - timeout=\$((timeout-10)) + echo "false" fi - done - - # Update the service - aws ecs update-service \\ - --cluster ${TF_VAR_cluster_name} \\ - --service ${TF_VAR_cluster_name}-service \\ - --force-new-deployment \\ - --region ${AWS_REGION} - + ''', + returnStdout: true + ).trim() + + if (serviceExists == "false") { + echo "🆕 Creating new ECS service..." + sh """ + # Create new service since it doesn't exist + aws ecs create-service \\ + --cluster ${TF_VAR_cluster_name} \\ + --service-name ${TF_VAR_cluster_name}-service \\ + --task-definition ${TF_VAR_cluster_name} \\ + --desired-count 1 \\ + --launch-type EC2 \\ + --region ${AWS_REGION} + """ + } else { + echo "🔄 Updating existing ECS service..." + sh """ + # Update existing service + aws ecs update-service \\ + --cluster ${TF_VAR_cluster_name} \\ + --service ${TF_VAR_cluster_name}-service \\ + --force-new-deployment \\ + --region ${AWS_REGION} + """ + } + + sh """ # Wait for deployment to stabilize with security monitoring echo "⏳ Waiting for secure service deployment to stabilize..." aws ecs wait services-stable \\ @@ -570,8 +823,8 @@ pipeline { script { echo "📊 SECURITY: Collecting deployment artifacts and performing secure cleanup..." - // Archive comprehensive deployment artifacts for audit - archiveArtifacts artifacts: 'ansible/hosts,deployment-audit.json,task-definition.json', allowEmptyArchive: true + // Archive comprehensive deployment artifacts for audit (skip ansible/hosts since we're not creating it) + archiveArtifacts artifacts: 'deployment-audit.json,task-definition.json', allowEmptyArchive: true // Secure workspace cleanup cleanWs(deleteDirs: true, notFailBuild: true) @@ -604,10 +857,11 @@ pipeline { echo " • Infrastructure: ECS + VPC + Security Groups (cost-optimized) ✅" echo " • Application Version: ${IMAGE_TAG}" echo " • Application URL: http://${ec2_ip}:8080" + echo " • Health Endpoint: http://${ec2_ip}:8080/health" echo " • ECR Image: ${env.AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPO}:${IMAGE_TAG}" echo " • Security Compliance: ✅ PASSED" echo " • Git Commit: ${gitCommitHash}" - echo " • Deployment Method: Jenkins + Terraform + Ansible ✅" + echo " • Deployment Method: Jenkins + Terraform + Direct SSH ✅" echo " • Cost Optimization: Free tier friendly ✅" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"