automated terminal push

This commit is contained in:
lenape
2025-07-14 03:15:21 +00:00
parent c13a8aacf9
commit f6a98772f4
3 changed files with 297 additions and 307 deletions

462
Jenkinsfile vendored
View File

@@ -19,8 +19,6 @@ pipeline {
TF_BACKEND_PREFIX = 'ecs/terraform.tfstate'
TF_DDB_TABLE = 'nvhi-atsila-locks'
SSH_CRED_ID = 'jenkins-ssh'
// Application variables
TF_VAR_cluster_name = 'nvhi-atsila-cluster'
TF_VAR_vpc_cidr = '10.0.0.0/16'
@@ -78,7 +76,7 @@ pipeline {
echo " • Commit: ${gitCommit.take(8)}"
echo " • Author: ${gitAuthor}"
echo " • Container Registry: ECR (AWS-native, secure)"
echo " • Architecture: Direct ECS access (appropriate for microservice demo)"
echo " • Architecture: SSM-based ECS access (secure, keyless)"
echo " • Security Model: Principle of Least Privilege"
echo " • Timestamp: ${new Date()}"
@@ -91,7 +89,7 @@ pipeline {
"git_author": "${gitAuthor}",
"infrastructure_files_changed": "${infrastructureFiles}",
"container_registry": "ECR",
"architecture": "direct_ecs_access",
"architecture": "ssm_based_ecs_access",
"security_model": "principle_of_least_privilege",
"timestamp": "${new Date()}"
}"""
@@ -326,7 +324,7 @@ pipeline {
dir('terraform') {
script {
echo "🚨 SECURITY NOTICE: Infrastructure deployment requested"
echo "🏗️ ARCHITECTURE: Deploying ECS Cluster with direct access (optimal for microservice demo)"
echo "🏗️ ARCHITECTURE: Deploying ECS Cluster with SSM access (secure, keyless)"
echo "🔐 In production: This would require infrastructure-admin role"
echo "🚀 Attempting infrastructure deployment..."
@@ -356,271 +354,155 @@ pipeline {
stage('Configure & Deploy Application') {
parallel {
stage('Configure EC2 Instance') {
stage('Configure EC2 Instance via SSM') {
steps {
script {
def ec2_ip = ""
try {
sh "test -d terraform || (echo 'Terraform directory not found' && exit 1)"
ec2_ip = sh(
script: "cd terraform && terraform output -raw ecs_instance_public_ip",
withCredentials([[
$class: 'AmazonWebServicesCredentialsBinding',
credentialsId: env.AWS_CRED_ID
]]) {
script {
echo "🔧 ENTERPRISE: Configuring EC2 instance via SSM (no SSH required)"
// Get instance ID from Terraform output
def instanceId = ""
def ec2_ip = ""
try {
sh "test -d terraform || (echo 'Terraform directory not found' && exit 1)"
instanceId = sh(
script: "cd terraform && terraform output -raw ecs_instance_id",
returnStdout: true
).trim()
ec2_ip = sh(
script: "cd terraform && terraform output -raw ecs_instance_public_ip",
returnStdout: true
).trim()
} catch (Exception e) {
echo "⚠️ Could not get instance details: ${e.getMessage()}"
throw new Exception("ENTERPRISE: Cannot proceed - instance details not available")
}
echo "📍 Target Instance: ${instanceId} (${ec2_ip})"
// Wait for SSM agent to be ready
echo "⏳ Waiting for SSM agent to be ready..."
timeout(time: 10, unit: 'MINUTES') {
waitUntil {
script {
def ssmStatus = sh(
script: """
aws ssm describe-instance-information \\
--filters "Key=InstanceIds,Values=${instanceId}" \\
--region ${AWS_REGION} \\
--query 'InstanceInformationList[0].PingStatus' \\
--output text 2>/dev/null || echo "Offline"
""",
returnStdout: true
).trim()
if (ssmStatus == "Online") {
echo "✅ SSM agent is online"
return true
} else {
echo "⏳ Waiting for SSM agent... (Status: ${ssmStatus})"
sleep(20)
return false
}
}
}
}
// Configure ECS agent via SSM
echo "🔧 Configuring ECS agent via SSM..."
def commandId = sh(
script: """
aws ssm send-command \\
--instance-ids ${instanceId} \\
--document-name "AWS-RunShellScript" \\
--parameters 'commands=[
"echo \\"=== ECS Configuration via SSM ===\\"",
"echo \\"Cluster: ${TF_VAR_cluster_name}\\"",
"echo \\"Time: \$(date)\\"",
"echo \\"Instance: \$(hostname)\\"",
"sudo systemctl status ecs --no-pager",
"sudo systemctl status docker --no-pager",
"curl -s http://localhost:51678/v1/metadata || echo \\"ECS agent not ready\\"",
"sudo systemctl restart ecs",
"sleep 15",
"sudo systemctl status ecs --no-pager",
"curl -s http://localhost:51678/v1/metadata || echo \\"ECS agent still starting\\"",
"echo \\"=== Configuration completed ===\\""
]' \\
--region ${AWS_REGION} \\
--output text \\
--query 'Command.CommandId'
""",
returnStdout: true
).trim()
} catch (Exception e) {
echo "⚠️ Could not get EC2 IP - terraform output failed: ${e.getMessage()}"
ec2_ip = "unknown"
}
echo "🔧 ENTERPRISE: Configuring EC2 instance for ECS agent: ${ec2_ip}"
echo "🔐 ARCHITECTURE: Using Jenkins credential store for AWS key pair"
if (ec2_ip != "unknown") {
// STEP 1: ENTERPRISE INSTANCE READINESS CHECK
echo "🔍 ENTERPRISE: Validating EC2 instance readiness..."
echo "📋 SSM Command ID: ${commandId}"
timeout(time: 15, unit: 'MINUTES') {
waitUntil {
script {
// Test network connectivity
def pingResult = sh(
script: "ping -c 1 -W 5 ${ec2_ip} >/dev/null 2>&1 && echo 'ping_ok' || echo 'ping_failed'",
returnStdout: true
).trim()
if (pingResult != "ping_ok") {
echo "⏳ ENTERPRISE: Instance not responding to ping, waiting..."
sleep(20)
return false
}
// Test SSH port availability
def sshResult = sh(
script: "nc -z -w5 ${ec2_ip} 22 >/dev/null 2>&1 && echo 'ssh_ready' || echo 'ssh_not_ready'",
returnStdout: true
).trim()
if (sshResult != "ssh_ready") {
echo "⏳ ENTERPRISE: SSH service not ready, waiting..."
sleep(20)
return false
}
echo "✅ ENTERPRISE: Instance is ready for SSH connection"
return true
}
}
}
// Wait for command completion
echo "⏳ Waiting for SSM command completion..."
sh """
aws ssm wait command-executed \\
--command-id ${commandId} \\
--instance-id ${instanceId} \\
--region ${AWS_REGION}
"""
// STEP 2: ENTERPRISE SSH AUTHENTICATION TEST
echo "🔐 ENTERPRISE: Testing SSH authentication with Jenkins credentials..."
// Get command output
echo "📋 SSM Command Output:"
sh """
aws ssm get-command-invocation \\
--command-id ${commandId} \\
--instance-id ${instanceId} \\
--region ${AWS_REGION} \\
--query 'StandardOutputContent' \\
--output text
"""
def authSuccessful = false
// Check for any errors
def commandStatus = sh(
script: """
aws ssm get-command-invocation \\
--command-id ${commandId} \\
--instance-id ${instanceId} \\
--region ${AWS_REGION} \\
--query 'Status' \\
--output text
""",
returnStdout: true
).trim()
timeout(time: 5, unit: 'MINUTES') {
waitUntil {
script {
try {
withCredentials([sshUserPrivateKey(credentialsId: 'jenkins-ssh', keyFileVariable: 'SSH_KEY', usernameVariable: 'SSH_USER')]) {
def authTest = sh(
script: """
ssh -o ConnectTimeout=10 \\
-o StrictHostKeyChecking=no \\
-o UserKnownHostsFile=/dev/null \\
-o BatchMode=yes \\
-i \${SSH_KEY} \\
ec2-user@${ec2_ip} \\
'echo "ENTERPRISE_AUTH_SUCCESS"' 2>/dev/null || echo "auth_failed"
""",
returnStdout: true
).trim()
if (authTest.contains("ENTERPRISE_AUTH_SUCCESS")) {
echo "✅ ENTERPRISE: SSH authentication successful with Jenkins credentials"
authSuccessful = true
return true
} else {
echo "⏳ ENTERPRISE: SSH authentication not ready, retrying..."
sleep(15)
return false
}
}
} catch (Exception e) {
echo "⏳ ENTERPRISE: SSH test failed, retrying... (${e.getMessage()})"
sleep(15)
return false
}
}
}
}
if (authSuccessful) {
// STEP 3: ENTERPRISE CONFIGURATION DEPLOYMENT - SIMPLIFIED APPROACH
echo "🎯 ENTERPRISE: Deploying ECS configuration via direct SSH (most reliable)..."
// Skip Ansible entirely - use direct SSH which is more reliable
withCredentials([sshUserPrivateKey(credentialsId: 'jenkins-ssh', keyFileVariable: 'SSH_KEY')]) {
sh """
ssh -o StrictHostKeyChecking=no \\
-o UserKnownHostsFile=/dev/null \\
-o ConnectTimeout=30 \\
-i \${SSH_KEY} \\
ec2-user@${ec2_ip} \\
'
set -e
echo "=== ENTERPRISE ECS CONFIGURATION STARTING ==="
echo "Target: \$(hostname)"
echo "Cluster: ${TF_VAR_cluster_name}"
echo "Time: \$(date)"
# Update system packages
echo "📦 Updating system packages..."
sudo yum update -y
# Install Docker (may already be installed)
echo "🐳 Installing Docker..."
sudo yum install -y docker || echo "Docker already installed"
# Install ECS initialization
echo "🚀 Installing ECS initialization..."
sudo yum install -y ecs-init
# Configure ECS cluster settings
echo "⚙️ Configuring ECS cluster settings..."
sudo tee /etc/ecs/ecs.config << EOF
ECS_CLUSTER=${TF_VAR_cluster_name}
ECS_ENABLE_CONTAINER_METADATA=true
ECS_ENABLE_TASK_IAM_ROLE=true
ECS_ENABLE_SPOT_INSTANCE_DRAINING=true
ECS_CONTAINER_STOP_TIMEOUT=30s
ECS_CONTAINER_START_TIMEOUT=3m
ECS_DISABLE_IMAGE_CLEANUP=false
ECS_AVAILABLE_LOGGING_DRIVERS=["json-file","awslogs"]
EOF
# Start required services
echo "🚀 Starting Docker and ECS services..."
sudo service docker start
sudo start ecs
# Enable services for auto-start
echo "🔧 Enabling services for auto-start..."
sudo chkconfig docker on
sudo chkconfig ecs on
# Verify services are running
echo "✅ Verifying service status..."
sudo service docker status
sudo service ecs status
echo "=== ENTERPRISE ECS CONFIGURATION COMPLETED ==="
echo "Instance ready for ECS workloads"
'
"""
}
echo "✅ ENTERPRISE: Direct SSH configuration completed successfully"
// STEP 4: POST-CONFIGURATION VALIDATION
echo "🔍 ENTERPRISE: Performing post-configuration validation..."
withCredentials([sshUserPrivateKey(credentialsId: 'jenkins-ssh', keyFileVariable: 'SSH_KEY')]) {
sh """
ssh -o StrictHostKeyChecking=no \\
-o ConnectTimeout=10 \\
-i \${SSH_KEY} \\
ec2-user@${ec2_ip} \\
'
echo "=== ENTERPRISE VALIDATION REPORT ==="
echo "Instance: \$(hostname)"
echo "Date: \$(date)"
echo ""
echo "Docker Service Status:"
sudo service docker status || echo "Docker service check failed"
echo ""
echo "ECS Service Status:"
sudo service ecs status || echo "ECS service check failed"
echo ""
echo "ECS Configuration:"
cat /etc/ecs/ecs.config || echo "ECS config file not found"
echo ""
echo "ECS Agent Metadata (if available):"
timeout 10 curl -s http://localhost:51678/v1/metadata 2>/dev/null | head -10 || echo "ECS metadata not yet available"
echo ""
echo "=== VALIDATION COMPLETED ==="
'
"""
}
echo "✅ ENTERPRISE: Configuration and validation completed successfully"
} else {
// ENTERPRISE DIAGNOSTICS FOR FAILED AUTHENTICATION
echo "❌ ENTERPRISE: SSH authentication failed - collecting diagnostics..."
if (commandStatus != "Success") {
echo "❌ SSM Command failed with status: ${commandStatus}"
// Get error output
sh """
echo "=== ENTERPRISE DIAGNOSTIC REPORT ==="
echo "Authentication Method: Jenkins Credential Store"
echo "Credential ID: jenkins-ssh"
echo "Target Instance: ${ec2_ip}"
echo "Expected User: ec2-user"
echo "Jenkins Server: \$(hostname)"
echo "Time: \$(date)"
echo ""
echo "=== Network Connectivity Tests ==="
echo "Ping Test:"
ping -c 3 ${ec2_ip} || echo "Ping failed"
echo ""
echo "SSH Port Test:"
nc -z -v ${ec2_ip} 22 || echo "SSH port not accessible"
echo ""
echo "=== AWS Instance Information ==="
aws ec2 describe-instances \\
--filters "Name=ip-address,Values=${ec2_ip}" \\
--query 'Reservations[*].Instances[*].[InstanceId,State.Name,KeyName,LaunchTime]' \\
--output table 2>/dev/null || echo "Could not retrieve instance information"
echo ""
echo "=== Security Group Analysis ==="
INSTANCE_SG=\$(aws ec2 describe-instances \\
--filters "Name=ip-address,Values=${ec2_ip}" \\
--query 'Reservations[*].Instances[*].SecurityGroups[0].GroupId' \\
--output text 2>/dev/null || echo "unknown")
if [ "\$INSTANCE_SG" != "unknown" ]; then
echo "Instance Security Group: \$INSTANCE_SG"
aws ec2 describe-security-groups \\
--group-ids \$INSTANCE_SG \\
--query 'SecurityGroups[*].IpPermissions[*]' \\
--output table 2>/dev/null || echo "Could not retrieve security group rules"
else
echo "Could not determine instance security group"
fi
echo ""
echo "=== TROUBLESHOOTING RECOMMENDATIONS ==="
echo "1. Verify Jenkins credential 'jenkins-ssh' contains correct private key"
echo "2. Confirm AWS key pair 'nvhi-atsila-deployer' matches Jenkins credential"
echo "3. Check security group allows SSH (port 22) from Jenkins server IP: 38.110.1.139"
echo "4. Ensure EC2 instance has completed initialization"
echo "5. Verify IAM permissions for EC2 operations"
echo "=== END DIAGNOSTIC REPORT ==="
echo "Error Output:"
aws ssm get-command-invocation \\
--command-id ${commandId} \\
--instance-id ${instanceId} \\
--region ${AWS_REGION} \\
--query 'StandardErrorContent' \\
--output text
"""
throw new Exception("ENTERPRISE: SSH authentication failed - see diagnostic report for troubleshooting")
throw new Exception("SSM configuration command failed")
}
} else {
throw new Exception("ENTERPRISE: Cannot proceed - EC2 instance IP address not available")
echo "✅ ENTERPRISE: EC2 instance configured via SSM successfully"
echo """
🔐 SSM Session Manager Access:
To connect to the instance for troubleshooting:
aws ssm start-session \\
--target ${instanceId} \\
--region ${AWS_REGION}
Instance ID: ${instanceId}
Instance IP: ${ec2_ip}
"""
}
}
}
@@ -661,7 +543,7 @@ EOF
{"name": "GIT_COMMIT", "value": "${gitCommitHash}"},
{"name": "DEPLOYMENT_TIME", "value": "${new Date().format('yyyy-MM-dd HH:mm:ss')}"},
{"name": "CONTAINER_REGISTRY", "value": "ECR"},
{"name": "ARCHITECTURE", "value": "direct_ecs_access"}
{"name": "ARCHITECTURE", "value": "ssm_based_ecs_access"}
]
}]"""
@@ -677,15 +559,15 @@ EOF
--region ${AWS_REGION}
"""
// FIXED: Check if service exists and create/update accordingly
// Check if service exists and create/update accordingly
def serviceExists = sh(
script: '''
if aws ecs describe-services --cluster nvhi-atsila-cluster --services nvhi-atsila-cluster-service --region us-east-2 2>/dev/null | grep -q "ACTIVE"; then
script: """
if aws ecs describe-services --cluster ${TF_VAR_cluster_name} --services ${TF_VAR_cluster_name}-service --region ${AWS_REGION} 2>/dev/null | grep -q "ACTIVE"; then
echo "true"
else
echo "false"
fi
''',
""",
returnStdout: true
).trim()
@@ -713,14 +595,16 @@ EOF
"""
}
sh """
# Wait for deployment to stabilize with security monitoring
echo "⏳ Waiting for secure service deployment to stabilize..."
aws ecs wait services-stable \\
--cluster ${TF_VAR_cluster_name} \\
--services ${TF_VAR_cluster_name}-service \\
--region ${AWS_REGION}
"""
// Wait for deployment with better timeout handling
echo "⏳ Waiting for secure service deployment to stabilize..."
timeout(time: 10, unit: 'MINUTES') {
sh """
aws ecs wait services-stable \\
--cluster ${TF_VAR_cluster_name} \\
--services ${TF_VAR_cluster_name}-service \\
--region ${AWS_REGION}
"""
}
echo "✅ SECURITY: Application deployed successfully with ECR integration"
}
@@ -747,7 +631,7 @@ EOF
}
echo "🏥 SECURITY: Running health validation on http://${ec2_ip}:8080/health"
echo "🔗 ARCHITECTURE: Direct access appropriate for microservice demonstration"
echo "🔗 ARCHITECTURE: Direct access with SSM management (secure and efficient)"
if (ec2_ip != "unknown") {
timeout(time: 5, unit: 'MINUTES') {
@@ -804,7 +688,7 @@ EOF
echo "🛡️ SECURITY: Validating network security and access controls..."
echo " Testing only allowed ports are accessible"
echo " Verifying ECR integration working correctly"
echo " Confirming direct access security model"
echo " Confirming SSM-based access security model"
echo "✅ SECURITY: All smoke tests and security validations passed"
"""
@@ -823,7 +707,7 @@ EOF
script {
echo "📊 SECURITY: Collecting deployment artifacts and performing secure cleanup..."
// Archive comprehensive deployment artifacts for audit (skip ansible/hosts since we're not creating it)
// Archive comprehensive deployment artifacts for audit
archiveArtifacts artifacts: 'deployment-audit.json,task-definition.json', allowEmptyArchive: true
// Secure workspace cleanup
@@ -836,6 +720,7 @@ EOF
success {
script {
def ec2_ip = ""
def instanceId = ""
def gitCommitHash = ""
try {
sh "test -d terraform || echo 'Terraform directory not found'"
@@ -843,29 +728,39 @@ EOF
script: "cd terraform && terraform output -raw ecs_instance_public_ip 2>/dev/null || echo 'unknown'",
returnStdout: true
).trim()
instanceId = sh(
script: "cd terraform && terraform output -raw ecs_instance_id 2>/dev/null || echo 'unknown'",
returnStdout: true
).trim()
gitCommitHash = sh(script: 'git rev-parse HEAD 2>/dev/null || echo "unknown"', returnStdout: true).trim().take(8)
} catch (Exception e) {
ec2_ip = "unknown"
instanceId = "unknown"
gitCommitHash = "unknown"
}
echo "🎉 OPTIMAL ARCHITECTURE DEPLOYMENT SUCCESSFUL!"
echo "🎉 SSM-BASED SECURE DEPLOYMENT SUCCESSFUL!"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "📋 DEPLOYMENT SUMMARY (OPTIMIZED FOR INTERVIEW):"
echo "📋 DEPLOYMENT SUMMARY (SSM-OPTIMIZED FOR SECURITY):"
echo " • Container Registry: ECR (AWS-native, secure) ✅"
echo " • Architecture: Direct ECS access (appropriate for microservice) ✅"
echo " • Infrastructure: ECS + VPC + Security Groups (cost-optimized) ✅"
echo " • Architecture: SSM-based ECS access (keyless, secure) ✅"
echo " • Infrastructure: ECS + VPC + Security Groups (SSM-enabled) ✅"
echo " • Application Version: ${IMAGE_TAG}"
echo " • Application URL: http://${ec2_ip}:8080"
echo " • Health Endpoint: http://${ec2_ip}:8080/health"
echo " • ECR Image: ${env.AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPO}:${IMAGE_TAG}"
echo " • Security Compliance: ✅ PASSED"
echo " • Security Compliance: ✅ PASSED (No SSH keys required)"
echo " • Git Commit: ${gitCommitHash}"
echo " • Deployment Method: Jenkins + Terraform + Direct SSH ✅"
echo " • Deployment Method: Jenkins + Terraform + SSM ✅"
echo " • Instance Access: SSM Session Manager (${instanceId}) ✅"
echo " • Cost Optimization: Free tier friendly ✅"
echo ""
echo "🔐 SSM ACCESS COMMANDS:"
echo " • Connect to instance: aws ssm start-session --target ${instanceId} --region ${AWS_REGION}"
echo " • View logs: aws ssm send-command --instance-ids ${instanceId} --document-name AWS-RunShellScript --parameters 'commands=[\"tail -f /var/log/ecs/ecs-agent.log\"]'"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
currentBuild.description = "✅ ECR | Direct Access | ${IMAGE_TAG} | ${ec2_ip}"
currentBuild.description = "✅ SSM | ECR | ${IMAGE_TAG} | ${ec2_ip}"
}
}
@@ -875,6 +770,11 @@ EOF
echo "🔍 Check the logs for issues with ECR authentication or ECS deployment"
echo "💡 Security audit trail: deployment-audit.json"
echo "🔒 State backup available: secure-state-backup-${BUILD_NUMBER}.json"
echo ""
echo "🔧 SSM TROUBLESHOOTING:"
echo " • If instance is available, connect via: aws ssm start-session --target <INSTANCE_ID> --region ${AWS_REGION}"
echo " • Check SSM agent status on instance: sudo systemctl status amazon-ssm-agent"
echo " • Verify IAM permissions include AmazonSSMManagedInstanceCore policy"
currentBuild.description = "❌ Failed: ${env.DEPLOYMENT_TYPE} | ${env.STAGE_NAME}"
}

View File

@@ -57,20 +57,13 @@ resource "aws_route_table_association" "public" {
route_table_id = aws_route_table.public.id
}
# Security Group
# Security Group - Updated for SSM (removed SSH, kept application access)
resource "aws_security_group" "ecs_sg" {
name = "${var.cluster_name}-sg"
description = "Allow SSH & HTTP to ECS"
description = "Allow HTTP to ECS and HTTPS outbound for SSM/ECR"
vpc_id = aws_vpc.main.id
ingress {
description = "SSH from Jenkins"
from_port = 22
to_port = 22
protocol = "tcp"
cidr_blocks = [var.jenkins_ip_cidr]
}
# HTTP access for application
ingress {
description = "HTTP from anywhere"
from_port = 8080
@@ -79,11 +72,30 @@ resource "aws_security_group" "ecs_sg" {
cidr_blocks = ["0.0.0.0/0"]
}
# HTTPS outbound for SSM, ECR, and AWS services
egress {
description = "All outbound traffic"
from_port = 0
to_port = 0
protocol = "-1"
description = "HTTPS outbound for AWS services"
from_port = 443
to_port = 443
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
# HTTP outbound for package updates
egress {
description = "HTTP outbound for package updates"
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
# DNS resolution
egress {
description = "DNS resolution"
from_port = 53
to_port = 53
protocol = "udp"
cidr_blocks = ["0.0.0.0/0"]
}
@@ -92,7 +104,7 @@ resource "aws_security_group" "ecs_sg" {
}
}
# Key Pair
# Key Pair (keeping for compatibility, but not needed for SSM)
resource "aws_key_pair" "deployer" {
key_name = var.key_pair_name
public_key = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDDFBAOogBj/GHKXQs6FLROGQfXkZe2uKbRron0We7ZOLgt6e1bI7U8IMe+DIH250CHSi4R5DBYFQF5Bk1TkS5cgMtPIAb87vRUGI3sLs29DQA/kllYiZlQi9ejxcEz2+TRWn10Q/Kltlb6ESNLnnnTsIUUxKUeY3MKFFd+V13FleSVLGYondwPWYwD/XJ6a3VwSTJ1wFKO+lpKknSjDl2ZOgYpWFALPH+EwMlRGVMrUXAB604zqR1XOzYXAAWnhmmC9IGgCzU/5JnEgFyhfZbR3kpEH8SmSXahvdFZERp+3j9d3ROjchqnf0Z0zZ7vzX+G+jvzT/jGOkzH9tx0/OqIO9f47OFF8iUfZgUtJU1QGbepdsmQqognhxfJQfMZbVtKUw7zt+mzJz3A0XcRp7IwVHaqJ2QW2dpXi4UbWtejtZqROg6byWq2FpvFGNIT3eiKTf+EpCoOec6YGSrRQlj73Ob0+FhmsyQ6e8KKncaRYx38PqtnWsI3UnLtdKmEJmDBPI0ipxJzmKJKtb0vtJPVYvFEpgiXSwnDX883rAUQrXR/EhOMmbMwk7JSes6/GXH9rWN10JHh1/i1LLpl+rg6VyktFgVBHzVw++y29QSfFixeTvFkkTS5kl//CpKd1GDQb9ZBH6SPgkgOjmASPUo+p5e/NiN/SIBSpYpMjOKs7Q== jacques@Xochiquetzal"
@@ -140,12 +152,18 @@ resource "aws_iam_role" "ecs_instance_role" {
}
}
# IAM Role Policy Attachment
# IAM Role Policy Attachment for ECS
resource "aws_iam_role_policy_attachment" "ecs_instance_role_policy" {
role = aws_iam_role.ecs_instance_role.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role"
}
# IAM Role Policy Attachment for SSM
resource "aws_iam_role_policy_attachment" "ecs_instance_ssm_policy" {
role = aws_iam_role.ecs_instance_role.name
policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
}
# IAM Instance Profile
resource "aws_iam_instance_profile" "ecs_instance_profile" {
name = "${var.cluster_name}-ecs-instance-profile"
@@ -170,17 +188,11 @@ resource "aws_ecs_cluster" "main" {
}
}
# User data script for ECS instance
# User data script for ECS instance with SSM
locals {
user_data = base64encode(<<-EOF
#!/bin/bash
yum update -y
yum install -y ecs-init
echo ECS_CLUSTER=${var.cluster_name} >> /etc/ecs/ecs.config
service docker start
start ecs
EOF
)
user_data = base64encode(templatefile("${path.module}/user_data.sh", {
cluster_name = var.cluster_name
}))
}
# EC2 Instance for ECS
@@ -241,6 +253,11 @@ output "ecs_instance_public_ip" {
value = aws_instance.ecs_instance.public_ip
}
output "ecs_instance_id" {
description = "Instance ID for SSM access"
value = aws_instance.ecs_instance.id
}
output "ecs_cluster_name" {
description = "Name of the ECS cluster"
value = aws_ecs_cluster.main.name

73
terraform/user_data.sh Normal file
View File

@@ -0,0 +1,73 @@
#!/bin/bash
# Enhanced user data script with SSM and better logging
exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1
echo "=== Starting EC2 User Data Script ==="
echo "Timestamp: $(date)"
echo "Instance ID: $(curl -s http://169.254.169.254/latest/meta-data/instance-id)"
echo "Cluster Name: ${cluster_name}"
# Update system
echo "=== Updating system packages ==="
yum update -y
# Install and configure SSM agent (should already be installed on Amazon Linux 2)
echo "=== Configuring SSM Agent ==="
yum install -y amazon-ssm-agent
systemctl enable amazon-ssm-agent
systemctl start amazon-ssm-agent
# Install ECS agent
echo "=== Installing ECS Agent ==="
yum install -y ecs-init
# Configure ECS cluster
echo "=== Configuring ECS Cluster ==="
cat > /etc/ecs/ecs.config << EOF
ECS_CLUSTER=${cluster_name}
ECS_ENABLE_LOGGING=true
ECS_LOGLEVEL=info
ECS_ENABLE_CONTAINER_METADATA=true
ECS_ENABLE_TASK_IAM_ROLE=true
ECS_AVAILABLE_LOGGING_DRIVERS=["json-file","awslogs"]
ECS_CONTAINER_STOP_TIMEOUT=30s
ECS_CONTAINER_START_TIMEOUT=3m
ECS_DISABLE_IMAGE_CLEANUP=false
EOF
# Start Docker and ECS
echo "=== Starting Docker and ECS services ==="
systemctl enable docker
systemctl start docker
systemctl enable ecs
systemctl start ecs
# Wait for services to be ready
echo "=== Waiting for services to initialize ==="
sleep 30
# Verify services
echo "=== Service Status Check ==="
echo "SSM Agent Status:"
systemctl status amazon-ssm-agent --no-pager || echo "SSM agent status check failed"
echo "Docker Status:"
systemctl status docker --no-pager || echo "Docker status check failed"
echo "ECS Status:"
systemctl status ecs --no-pager || echo "ECS status check failed"
# Check ECS agent connection
echo "=== ECS Agent Status ==="
for i in {1..5}; do
if curl -s http://localhost:51678/v1/metadata; then
echo "ECS agent is responding"
break
else
echo "ECS agent not ready yet, attempt $i/5"
sleep 10
fi
done
echo "=== User Data Script Completed ==="
echo "Timestamp: $(date)"