Przejdź do głównej zawartości

Infrastructure as Code Patterns

Ta treść nie jest jeszcze dostępna w Twoim języku.

Master Infrastructure as Code with Cursor IDE and Claude Code. This guide covers Terraform modules, CloudFormation templates, Pulumi programs, Ansible playbooks, GitOps workflows, and production IaC patterns with AI assistance.

  1. Initialize IaC Project

    Terminal window
    # Generate IaC configuration
    Agent: "Create infrastructure setup with:
    - Terraform modules for AWS
    - State management with S3
    - Environment separation
    - Security best practices
    - Cost optimization"
  2. Install IaC MCP Servers (optional)

    Terminal window
    # AWS
    claude mcp add aws -- docker run -e AWS_ACCESS_KEY_ID=... ghcr.io/aws/mcp-server
    # Azure (no URL endpoint - use npm package)
    claude mcp add azure -- npx -y @azure/mcp@latest
    # Kubernetes
    claude mcp add k8s -- npx -y kubernetes-mcp-server
  3. Configure AI Rules

    # .cursorrules or CLAUDE.md
    IaC best practices:
    - Use remote state management
    - Implement proper tagging
    - Follow least privilege principle
    - Use modules for reusability
    - Version control everything
    - Test infrastructure changes
    - Document all resources
# AI Prompt
Agent: "Create Terraform module structure for:
- VPC with public/private subnets
- EKS cluster with node groups
- RDS with read replicas
- Application load balancer
- Security groups and IAM"
# modules/vpc/main.tf
terraform {
required_version = ">= 1.5.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
}
}
locals {
common_tags = merge(
var.tags,
{
Module = "vpc"
ManagedBy = "terraform"
Environment = var.environment
LastModified = timestamp()
}
)
max_subnet_length = max(
length(var.private_subnets),
length(var.public_subnets),
length(var.database_subnets)
)
nat_gateway_count = var.single_nat_gateway ? 1 : var.one_nat_gateway_per_az ? length(var.azs) : local.max_subnet_length
}
# VPC
resource "aws_vpc" "this" {
cidr_block = var.cidr
enable_dns_hostnames = var.enable_dns_hostnames
enable_dns_support = var.enable_dns_support
tags = merge(
local.common_tags,
{
Name = format("%s-vpc", var.name)
}
)
}
# Internet Gateway
resource "aws_internet_gateway" "this" {
count = length(var.public_subnets) > 0 ? 1 : 0
vpc_id = aws_vpc.this.id
tags = merge(
local.common_tags,
{
Name = format("%s-igw", var.name)
}
)
}
# Public Subnets
resource "aws_subnet" "public" {
count = length(var.public_subnets)
vpc_id = aws_vpc.this.id
cidr_block = element(var.public_subnets, count.index)
availability_zone = element(var.azs, count.index)
map_public_ip_on_launch = var.map_public_ip_on_launch
tags = merge(
local.common_tags,
{
Name = format("%s-public-%s", var.name, element(var.azs, count.index))
Type = "public"
"kubernetes.io/role/elb" = "1"
"kubernetes.io/cluster/${var.cluster_name}" = "shared"
}
)
}
# Private Subnets
resource "aws_subnet" "private" {
count = length(var.private_subnets)
vpc_id = aws_vpc.this.id
cidr_block = element(var.private_subnets, count.index)
availability_zone = element(var.azs, count.index)
tags = merge(
local.common_tags,
{
Name = format("%s-private-%s", var.name, element(var.azs, count.index))
Type = "private"
"kubernetes.io/role/internal-elb" = "1"
"kubernetes.io/cluster/${var.cluster_name}" = "shared"
}
)
}
# Database Subnets
resource "aws_subnet" "database" {
count = length(var.database_subnets)
vpc_id = aws_vpc.this.id
cidr_block = element(var.database_subnets, count.index)
availability_zone = element(var.azs, count.index)
tags = merge(
local.common_tags,
{
Name = format("%s-database-%s", var.name, element(var.azs, count.index))
Type = "database"
}
)
}
# Elastic IPs for NAT Gateways
resource "aws_eip" "nat" {
count = var.enable_nat_gateway ? local.nat_gateway_count : 0
domain = "vpc"
tags = merge(
local.common_tags,
{
Name = format("%s-eip-%s", var.name, element(var.azs, count.index))
}
)
depends_on = [aws_internet_gateway.this]
}
# NAT Gateways
resource "aws_nat_gateway" "this" {
count = var.enable_nat_gateway ? local.nat_gateway_count : 0
allocation_id = element(aws_eip.nat[*].id, count.index)
subnet_id = element(aws_subnet.public[*].id, count.index)
tags = merge(
local.common_tags,
{
Name = format("%s-nat-%s", var.name, element(var.azs, count.index))
}
)
depends_on = [aws_internet_gateway.this]
}
# Route Tables
resource "aws_route_table" "public" {
count = length(var.public_subnets) > 0 ? 1 : 0
vpc_id = aws_vpc.this.id
tags = merge(
local.common_tags,
{
Name = format("%s-public-rt", var.name)
Type = "public"
}
)
}
resource "aws_route_table" "private" {
count = var.enable_nat_gateway ? local.nat_gateway_count : 0
vpc_id = aws_vpc.this.id
tags = merge(
local.common_tags,
{
Name = format("%s-private-rt-%s", var.name, element(var.azs, count.index))
Type = "private"
}
)
}
# Routes
resource "aws_route" "public_internet_gateway" {
count = length(var.public_subnets) > 0 ? 1 : 0
route_table_id = aws_route_table.public[0].id
destination_cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.this[0].id
timeouts {
create = "5m"
}
}
resource "aws_route" "private_nat_gateway" {
count = var.enable_nat_gateway ? local.nat_gateway_count : 0
route_table_id = element(aws_route_table.private[*].id, count.index)
destination_cidr_block = "0.0.0.0/0"
nat_gateway_id = element(aws_nat_gateway.this[*].id, count.index)
timeouts {
create = "5m"
}
}
# Route Table Associations
resource "aws_route_table_association" "public" {
count = length(var.public_subnets)
subnet_id = element(aws_subnet.public[*].id, count.index)
route_table_id = aws_route_table.public[0].id
}
resource "aws_route_table_association" "private" {
count = length(var.private_subnets)
subnet_id = element(aws_subnet.private[*].id, count.index)
route_table_id = element(
aws_route_table.private[*].id,
var.single_nat_gateway ? 0 : count.index
)
}
# VPC Endpoints
resource "aws_vpc_endpoint" "s3" {
count = var.enable_s3_endpoint ? 1 : 0
vpc_id = aws_vpc.this.id
service_name = data.aws_vpc_endpoint_service.s3.service_name
tags = merge(
local.common_tags,
{
Name = format("%s-s3-endpoint", var.name)
}
)
}
resource "aws_vpc_endpoint_route_table_association" "s3_public" {
count = var.enable_s3_endpoint && length(var.public_subnets) > 0 ? 1 : 0
vpc_endpoint_id = aws_vpc_endpoint.s3[0].id
route_table_id = aws_route_table.public[0].id
}
resource "aws_vpc_endpoint_route_table_association" "s3_private" {
count = var.enable_s3_endpoint ? local.nat_gateway_count : 0
vpc_endpoint_id = aws_vpc_endpoint.s3[0].id
route_table_id = element(aws_route_table.private[*].id, count.index)
}
# Flow Logs
resource "aws_flow_log" "this" {
count = var.enable_flow_log ? 1 : 0
iam_role_arn = aws_iam_role.flow_log[0].arn
log_destination = aws_cloudwatch_log_group.flow_log[0].arn
traffic_type = var.flow_log_traffic_type
vpc_id = aws_vpc.this.id
tags = merge(
local.common_tags,
{
Name = format("%s-flow-log", var.name)
}
)
}
resource "aws_cloudwatch_log_group" "flow_log" {
count = var.enable_flow_log ? 1 : 0
name = "/aws/vpc/${var.name}"
retention_in_days = var.flow_log_retention_in_days
kms_key_id = var.flow_log_kms_key_id
tags = local.common_tags
}
resource "aws_iam_role" "flow_log" {
count = var.enable_flow_log ? 1 : 0
name = format("%s-flow-log-role", var.name)
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
Service = "vpc-flow-logs.amazonaws.com"
}
Action = "sts:AssumeRole"
}
]
})
tags = local.common_tags
}
resource "aws_iam_role_policy" "flow_log" {
count = var.enable_flow_log ? 1 : 0
name = format("%s-flow-log-policy", var.name)
role = aws_iam_role.flow_log[0].id
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents",
"logs:DescribeLogGroups",
"logs:DescribeLogStreams"
]
Resource = "*"
}
]
})
}
# AI Prompt
Agent: "Create CloudFormation template for:
- Three-tier architecture
- Auto-scaling groups
- RDS Multi-AZ
- ElastiCache cluster
- CloudFront distribution"
# templates/infrastructure.yaml
AWSTemplateFormatVersion: '2010-09-09'
Description: 'Three-tier application infrastructure with HA and auto-scaling'
Parameters:
EnvironmentName:
Description: Environment name prefix
Type: String
Default: production
VPCCidr:
Description: CIDR block for VPC
Type: String
Default: 10.0.0.0/16
PublicSubnetCidrs:
Description: CIDR blocks for public subnets
Type: CommaDelimitedList
Default: "10.0.1.0/24,10.0.2.0/24,10.0.3.0/24"
PrivateSubnetCidrs:
Description: CIDR blocks for private subnets
Type: CommaDelimitedList
Default: "10.0.11.0/24,10.0.12.0/24,10.0.13.0/24"
DatabaseSubnetCidrs:
Description: CIDR blocks for database subnets
Type: CommaDelimitedList
Default: "10.0.21.0/24,10.0.22.0/24,10.0.23.0/24"
InstanceType:
Description: EC2 instance type for application servers
Type: String
Default: t3.medium
AllowedValues:
- t3.small
- t3.medium
- t3.large
- m5.large
- m5.xlarge
DatabaseInstanceType:
Description: RDS instance type
Type: String
Default: db.t3.medium
DatabasePassword:
Description: RDS master password
Type: String
NoEcho: true
MinLength: 8
MaxLength: 41
KeyPairName:
Description: EC2 key pair name
Type: AWS::EC2::KeyPair::KeyName
MinSize:
Description: Minimum number of instances
Type: Number
Default: 2
MinValue: 1
MaxSize:
Description: Maximum number of instances
Type: Number
Default: 10
MinValue: 1
DesiredCapacity:
Description: Desired number of instances
Type: Number
Default: 4
MinValue: 1
Metadata:
AWS::CloudFormation::Interface:
ParameterGroups:
- Label:
default: "Network Configuration"
Parameters:
- VPCCidr
- PublicSubnetCidrs
- PrivateSubnetCidrs
- DatabaseSubnetCidrs
- Label:
default: "Server Configuration"
Parameters:
- InstanceType
- KeyPairName
- MinSize
- MaxSize
- DesiredCapacity
- Label:
default: "Database Configuration"
Parameters:
- DatabaseInstanceType
- DatabasePassword
Resources:
# VPC
VPC:
Type: AWS::EC2::VPC
Properties:
CidrBlock: !Ref VPCCidr
EnableDnsHostnames: true
EnableDnsSupport: true
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-vpc
InternetGateway:
Type: AWS::EC2::InternetGateway
Properties:
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-igw
InternetGatewayAttachment:
Type: AWS::EC2::VPCGatewayAttachment
Properties:
InternetGatewayId: !Ref InternetGateway
VpcId: !Ref VPC
# Public Subnets
PublicSubnet1:
Type: AWS::EC2::Subnet
Properties:
VpcId: !Ref VPC
AvailabilityZone: !Select [0, !GetAZs '']
CidrBlock: !Select [0, !Ref PublicSubnetCidrs]
MapPublicIpOnLaunch: true
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-public-subnet-1
- Key: kubernetes.io/role/elb
Value: 1
PublicSubnet2:
Type: AWS::EC2::Subnet
Properties:
VpcId: !Ref VPC
AvailabilityZone: !Select [1, !GetAZs '']
CidrBlock: !Select [1, !Ref PublicSubnetCidrs]
MapPublicIpOnLaunch: true
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-public-subnet-2
- Key: kubernetes.io/role/elb
Value: 1
PublicSubnet3:
Type: AWS::EC2::Subnet
Properties:
VpcId: !Ref VPC
AvailabilityZone: !Select [2, !GetAZs '']
CidrBlock: !Select [2, !Ref PublicSubnetCidrs]
MapPublicIpOnLaunch: true
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-public-subnet-3
- Key: kubernetes.io/role/elb
Value: 1
# NAT Gateways
NatGateway1EIP:
Type: AWS::EC2::EIP
DependsOn: InternetGatewayAttachment
Properties:
Domain: vpc
NatGateway2EIP:
Type: AWS::EC2::EIP
DependsOn: InternetGatewayAttachment
Properties:
Domain: vpc
NatGateway1:
Type: AWS::EC2::NatGateway
Properties:
AllocationId: !GetAtt NatGateway1EIP.AllocationId
SubnetId: !Ref PublicSubnet1
NatGateway2:
Type: AWS::EC2::NatGateway
Properties:
AllocationId: !GetAtt NatGateway2EIP.AllocationId
SubnetId: !Ref PublicSubnet2
# Route Tables
PublicRouteTable:
Type: AWS::EC2::RouteTable
Properties:
VpcId: !Ref VPC
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-public-routes
DefaultPublicRoute:
Type: AWS::EC2::Route
DependsOn: InternetGatewayAttachment
Properties:
RouteTableId: !Ref PublicRouteTable
DestinationCidrBlock: 0.0.0.0/0
GatewayId: !Ref InternetGateway
PublicSubnet1RouteTableAssociation:
Type: AWS::EC2::SubnetRouteTableAssociation
Properties:
RouteTableId: !Ref PublicRouteTable
SubnetId: !Ref PublicSubnet1
PublicSubnet2RouteTableAssociation:
Type: AWS::EC2::SubnetRouteTableAssociation
Properties:
RouteTableId: !Ref PublicRouteTable
SubnetId: !Ref PublicSubnet2
PublicSubnet3RouteTableAssociation:
Type: AWS::EC2::SubnetRouteTableAssociation
Properties:
RouteTableId: !Ref PublicRouteTable
SubnetId: !Ref PublicSubnet3
# Private Subnets
PrivateSubnet1:
Type: AWS::EC2::Subnet
Properties:
VpcId: !Ref VPC
AvailabilityZone: !Select [0, !GetAZs '']
CidrBlock: !Select [0, !Ref PrivateSubnetCidrs]
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-private-subnet-1
- Key: kubernetes.io/role/internal-elb
Value: 1
PrivateSubnet2:
Type: AWS::EC2::Subnet
Properties:
VpcId: !Ref VPC
AvailabilityZone: !Select [1, !GetAZs '']
CidrBlock: !Select [1, !Ref PrivateSubnetCidrs]
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-private-subnet-2
- Key: kubernetes.io/role/internal-elb
Value: 1
PrivateSubnet3:
Type: AWS::EC2::Subnet
Properties:
VpcId: !Ref VPC
AvailabilityZone: !Select [2, !GetAZs '']
CidrBlock: !Select [2, !Ref PrivateSubnetCidrs]
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-private-subnet-3
- Key: kubernetes.io/role/internal-elb
Value: 1
# Private Route Tables
PrivateRouteTable1:
Type: AWS::EC2::RouteTable
Properties:
VpcId: !Ref VPC
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-private-routes-1
DefaultPrivateRoute1:
Type: AWS::EC2::Route
Properties:
RouteTableId: !Ref PrivateRouteTable1
DestinationCidrBlock: 0.0.0.0/0
NatGatewayId: !Ref NatGateway1
PrivateSubnet1RouteTableAssociation:
Type: AWS::EC2::SubnetRouteTableAssociation
Properties:
RouteTableId: !Ref PrivateRouteTable1
SubnetId: !Ref PrivateSubnet1
PrivateRouteTable2:
Type: AWS::EC2::RouteTable
Properties:
VpcId: !Ref VPC
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-private-routes-2
DefaultPrivateRoute2:
Type: AWS::EC2::Route
Properties:
RouteTableId: !Ref PrivateRouteTable2
DestinationCidrBlock: 0.0.0.0/0
NatGatewayId: !Ref NatGateway2
PrivateSubnet2RouteTableAssociation:
Type: AWS::EC2::SubnetRouteTableAssociation
Properties:
RouteTableId: !Ref PrivateRouteTable2
SubnetId: !Ref PrivateSubnet2
PrivateSubnet3RouteTableAssociation:
Type: AWS::EC2::SubnetRouteTableAssociation
Properties:
RouteTableId: !Ref PrivateRouteTable2
SubnetId: !Ref PrivateSubnet3
# Database Subnets
DatabaseSubnet1:
Type: AWS::EC2::Subnet
Properties:
VpcId: !Ref VPC
AvailabilityZone: !Select [0, !GetAZs '']
CidrBlock: !Select [0, !Ref DatabaseSubnetCidrs]
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-database-subnet-1
DatabaseSubnet2:
Type: AWS::EC2::Subnet
Properties:
VpcId: !Ref VPC
AvailabilityZone: !Select [1, !GetAZs '']
CidrBlock: !Select [1, !Ref DatabaseSubnetCidrs]
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-database-subnet-2
DatabaseSubnet3:
Type: AWS::EC2::Subnet
Properties:
VpcId: !Ref VPC
AvailabilityZone: !Select [2, !GetAZs '']
CidrBlock: !Select [2, !Ref DatabaseSubnetCidrs]
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-database-subnet-3
# Security Groups
ALBSecurityGroup:
Type: AWS::EC2::SecurityGroup
Properties:
GroupName: !Sub ${EnvironmentName}-alb-sg
GroupDescription: Security group for Application Load Balancer
VpcId: !Ref VPC
SecurityGroupIngress:
- IpProtocol: tcp
FromPort: 80
ToPort: 80
CidrIp: 0.0.0.0/0
- IpProtocol: tcp
FromPort: 443
ToPort: 443
CidrIp: 0.0.0.0/0
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-alb-sg
WebServerSecurityGroup:
Type: AWS::EC2::SecurityGroup
Properties:
GroupName: !Sub ${EnvironmentName}-webserver-sg
GroupDescription: Security group for web servers
VpcId: !Ref VPC
SecurityGroupIngress:
- IpProtocol: tcp
FromPort: 80
ToPort: 80
SourceSecurityGroupId: !Ref ALBSecurityGroup
- IpProtocol: tcp
FromPort: 443
ToPort: 443
SourceSecurityGroupId: !Ref ALBSecurityGroup
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-webserver-sg
DatabaseSecurityGroup:
Type: AWS::EC2::SecurityGroup
Properties:
GroupName: !Sub ${EnvironmentName}-database-sg
GroupDescription: Security group for RDS database
VpcId: !Ref VPC
SecurityGroupIngress:
- IpProtocol: tcp
FromPort: 3306
ToPort: 3306
SourceSecurityGroupId: !Ref WebServerSecurityGroup
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-database-sg
CacheSecurityGroup:
Type: AWS::EC2::SecurityGroup
Properties:
GroupName: !Sub ${EnvironmentName}-cache-sg
GroupDescription: Security group for ElastiCache
VpcId: !Ref VPC
SecurityGroupIngress:
- IpProtocol: tcp
FromPort: 6379
ToPort: 6379
SourceSecurityGroupId: !Ref WebServerSecurityGroup
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-cache-sg
# Application Load Balancer
ApplicationLoadBalancer:
Type: AWS::ElasticLoadBalancingV2::LoadBalancer
Properties:
Name: !Sub ${EnvironmentName}-alb
Subnets:
- !Ref PublicSubnet1
- !Ref PublicSubnet2
- !Ref PublicSubnet3
SecurityGroups:
- !Ref ALBSecurityGroup
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-alb
ALBTargetGroup:
Type: AWS::ElasticLoadBalancingV2::TargetGroup
Properties:
Name: !Sub ${EnvironmentName}-tg
Port: 80
Protocol: HTTP
VpcId: !Ref VPC
HealthCheckEnabled: true
HealthCheckPath: /health
HealthCheckProtocol: HTTP
HealthCheckIntervalSeconds: 30
HealthCheckTimeoutSeconds: 5
HealthyThresholdCount: 2
UnhealthyThresholdCount: 3
TargetType: instance
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-tg
ALBListener:
Type: AWS::ElasticLoadBalancingV2::Listener
Properties:
DefaultActions:
- Type: forward
TargetGroupArn: !Ref ALBTargetGroup
LoadBalancerArn: !Ref ApplicationLoadBalancer
Port: 80
Protocol: HTTP
# Launch Template
LaunchTemplate:
Type: AWS::EC2::LaunchTemplate
Properties:
LaunchTemplateName: !Sub ${EnvironmentName}-lt
LaunchTemplateData:
ImageId: !Ref LatestAmiId
InstanceType: !Ref InstanceType
KeyName: !Ref KeyPairName
SecurityGroupIds:
- !Ref WebServerSecurityGroup
IamInstanceProfile:
Arn: !GetAtt InstanceProfile.Arn
UserData:
Fn::Base64: !Sub |
#!/bin/bash
yum update -y
yum install -y httpd
systemctl start httpd
systemctl enable httpd
# Install CloudWatch agent
wget https://s3.amazonaws.com/amazoncloudwatch-agent/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm
rpm -U ./amazon-cloudwatch-agent.rpm
# Configure application
cat > /var/www/html/index.html <<EOF
<h1>Hello from ${EnvironmentName}</h1>
<p>Instance ID: $(ec2-metadata --instance-id | cut -d " " -f 2)</p>
<p>Availability Zone: $(ec2-metadata --availability-zone | cut -d " " -f 2)</p>
EOF
# Configure CloudWatch logs
cat > /opt/aws/amazon-cloudwatch-agent/etc/config.json <<EOF
{
"metrics": {
"namespace": "${EnvironmentName}",
"metrics_collected": {
"cpu": {
"measurement": [
"cpu_usage_idle",
"cpu_usage_iowait"
],
"metrics_collection_interval": 60
},
"disk": {
"measurement": [
"used_percent"
],
"metrics_collection_interval": 60,
"resources": [
"*"
]
},
"mem": {
"measurement": [
"mem_used_percent"
],
"metrics_collection_interval": 60
}
}
},
"logs": {
"logs_collected": {
"files": {
"collect_list": [
{
"file_path": "/var/log/httpd/access_log",
"log_group_name": "/aws/ec2/${EnvironmentName}/httpd",
"log_stream_name": "{instance_id}/access_log"
}
]
}
}
}
}
EOF
/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl \
-a fetch-config \
-m ec2 \
-s \
-c file:/opt/aws/amazon-cloudwatch-agent/etc/config.json
TagSpecifications:
- ResourceType: instance
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-instance
- ResourceType: volume
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-volume
# Auto Scaling Group
AutoScalingGroup:
Type: AWS::AutoScaling::AutoScalingGroup
Properties:
AutoScalingGroupName: !Sub ${EnvironmentName}-asg
VPCZoneIdentifier:
- !Ref PrivateSubnet1
- !Ref PrivateSubnet2
- !Ref PrivateSubnet3
LaunchTemplate:
LaunchTemplateId: !Ref LaunchTemplate
Version: !GetAtt LaunchTemplate.LatestVersionNumber
MinSize: !Ref MinSize
MaxSize: !Ref MaxSize
DesiredCapacity: !Ref DesiredCapacity
HealthCheckType: ELB
HealthCheckGracePeriod: 300
TargetGroupARNs:
- !Ref ALBTargetGroup
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-asg-instance
PropagateAtLaunch: true
# Scaling Policies
ScaleUpPolicy:
Type: AWS::AutoScaling::ScalingPolicy
Properties:
AdjustmentType: ChangeInCapacity
AutoScalingGroupName: !Ref AutoScalingGroup
Cooldown: 300
ScalingAdjustment: 2
ScaleDownPolicy:
Type: AWS::AutoScaling::ScalingPolicy
Properties:
AdjustmentType: ChangeInCapacity
AutoScalingGroupName: !Ref AutoScalingGroup
Cooldown: 300
ScalingAdjustment: -1
CPUAlarmHigh:
Type: AWS::CloudWatch::Alarm
Properties:
AlarmName: !Sub ${EnvironmentName}-cpu-high
AlarmDescription: Trigger scaling up based on CPU utilization
MetricName: CPUUtilization
Namespace: AWS/EC2
Statistic: Average
Period: 300
EvaluationPeriods: 2
Threshold: 70
ComparisonOperator: GreaterThanThreshold
Dimensions:
- Name: AutoScalingGroupName
Value: !Ref AutoScalingGroup
AlarmActions:
- !Ref ScaleUpPolicy
CPUAlarmLow:
Type: AWS::CloudWatch::Alarm
Properties:
AlarmName: !Sub ${EnvironmentName}-cpu-low
AlarmDescription: Trigger scaling down based on CPU utilization
MetricName: CPUUtilization
Namespace: AWS/EC2
Statistic: Average
Period: 300
EvaluationPeriods: 2
Threshold: 30
ComparisonOperator: LessThanThreshold
Dimensions:
- Name: AutoScalingGroupName
Value: !Ref AutoScalingGroup
AlarmActions:
- !Ref ScaleDownPolicy
# RDS Database
DatabaseSubnetGroup:
Type: AWS::RDS::DBSubnetGroup
Properties:
DBSubnetGroupName: !Sub ${EnvironmentName}-db-subnet-group
DBSubnetGroupDescription: Subnet group for RDS database
SubnetIds:
- !Ref DatabaseSubnet1
- !Ref DatabaseSubnet2
- !Ref DatabaseSubnet3
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-db-subnet-group
DatabaseInstance:
Type: AWS::RDS::DBInstance
DeletionPolicy: Snapshot
Properties:
DBInstanceIdentifier: !Sub ${EnvironmentName}-database
DBInstanceClass: !Ref DatabaseInstanceType
Engine: mysql
EngineVersion: '8.0'
MasterUsername: admin
MasterUserPassword: !Ref DatabasePassword
AllocatedStorage: 100
StorageType: gp3
StorageEncrypted: true
VPCSecurityGroups:
- !Ref DatabaseSecurityGroup
DBSubnetGroupName: !Ref DatabaseSubnetGroup
BackupRetentionPeriod: 7
PreferredBackupWindow: "03:00-04:00"
PreferredMaintenanceWindow: "sun:04:00-sun:05:00"
MultiAZ: true
EnableCloudwatchLogsExports:
- error
- general
- slowquery
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-database
# ElastiCache
CacheSubnetGroup:
Type: AWS::ElastiCache::SubnetGroup
Properties:
CacheSubnetGroupName: !Sub ${EnvironmentName}-cache-subnet-group
Description: Subnet group for ElastiCache
SubnetIds:
- !Ref PrivateSubnet1
- !Ref PrivateSubnet2
- !Ref PrivateSubnet3
CacheCluster:
Type: AWS::ElastiCache::CacheCluster
Properties:
CacheClusterId: !Sub ${EnvironmentName}-cache
Engine: redis
CacheNodeType: cache.t3.micro
NumCacheNodes: 1
CacheSubnetGroupName: !Ref CacheSubnetGroup
VpcSecurityGroupIds:
- !Ref CacheSecurityGroup
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-cache
# IAM Role
InstanceRole:
Type: AWS::IAM::Role
Properties:
RoleName: !Sub ${EnvironmentName}-instance-role
AssumeRolePolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Principal:
Service:
- ec2.amazonaws.com
Action:
- sts:AssumeRole
ManagedPolicyArns:
- arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy
- arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore
Policies:
- PolicyName: S3Access
PolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Action:
- s3:GetObject
- s3:PutObject
Resource:
- !Sub 'arn:aws:s3:::${EnvironmentName}-app-bucket/*'
InstanceProfile:
Type: AWS::IAM::InstanceProfile
Properties:
InstanceProfileName: !Sub ${EnvironmentName}-instance-profile
Roles:
- !Ref InstanceRole
# CloudFront Distribution
CloudFrontDistribution:
Type: AWS::CloudFront::Distribution
Properties:
DistributionConfig:
Comment: !Sub ${EnvironmentName} CloudFront Distribution
Enabled: true
HttpVersion: http2
Origins:
- Id: ALBOrigin
DomainName: !GetAtt ApplicationLoadBalancer.DNSName
CustomOriginConfig:
HTTPPort: 80
HTTPSPort: 443
OriginProtocolPolicy: http-only
DefaultCacheBehavior:
TargetOriginId: ALBOrigin
ViewerProtocolPolicy: redirect-to-https
AllowedMethods:
- GET
- HEAD
- OPTIONS
- PUT
- POST
- PATCH
- DELETE
CachedMethods:
- GET
- HEAD
Compress: true
ForwardedValues:
QueryString: true
Cookies:
Forward: all
Headers:
- Authorization
- CloudFront-Forwarded-Proto
- CloudFront-Is-Desktop-Viewer
- CloudFront-Is-Mobile-Viewer
- CloudFront-Is-SmartTV-Viewer
- CloudFront-Is-Tablet-Viewer
- CloudFront-Viewer-Country
- Host
- User-Agent
PriceClass: PriceClass_100
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-cloudfront
# Parameter Store for Latest AMI
Parameters:
LatestAmiId:
Type: AWS::SSM::Parameter::Value<AWS::EC2::Image::Id>
Default: /aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-gp2
Outputs:
VPCId:
Description: VPC ID
Value: !Ref VPC
Export:
Name: !Sub ${EnvironmentName}-VPC-ID
PublicSubnets:
Description: Public subnet IDs
Value: !Join [',', [!Ref PublicSubnet1, !Ref PublicSubnet2, !Ref PublicSubnet3]]
Export:
Name: !Sub ${EnvironmentName}-PUBLIC-SUBNETS
PrivateSubnets:
Description: Private subnet IDs
Value: !Join [',', [!Ref PrivateSubnet1, !Ref PrivateSubnet2, !Ref PrivateSubnet3]]
Export:
Name: !Sub ${EnvironmentName}-PRIVATE-SUBNETS
ALBDNSName:
Description: Application Load Balancer DNS name
Value: !GetAtt ApplicationLoadBalancer.DNSName
Export:
Name: !Sub ${EnvironmentName}-ALB-DNS
CloudFrontURL:
Description: CloudFront distribution URL
Value: !GetAtt CloudFrontDistribution.DomainName
Export:
Name: !Sub ${EnvironmentName}-CLOUDFRONT-URL
DatabaseEndpoint:
Description: RDS database endpoint
Value: !GetAtt DatabaseInstance.Endpoint.Address
Export:
Name: !Sub ${EnvironmentName}-DATABASE-ENDPOINT
CacheEndpoint:
Description: ElastiCache endpoint
Value: !GetAtt CacheCluster.RedisEndpoint.Address
Export:
Name: !Sub ${EnvironmentName}-CACHE-ENDPOINT
// AI Prompt
Agent: "Create Pulumi program for:
- Kubernetes cluster with GitOps
- Microservices deployment
- Service mesh setup
- Observability stack
- Multi-region deployment"
// index.ts
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
import * as awsx from "@pulumi/awsx";
import * as eks from "@pulumi/eks";
import * as k8s from "@pulumi/kubernetes";
import * as cloudflare from "@pulumi/cloudflare";
// Configuration
const config = new pulumi.Config();
const environment = config.require("environment");
const region = config.get("region") || aws.config.region;
const azCount = config.getNumber("azCount") || 3;
const instanceType = config.get("instanceType") || "t3.medium";
const minSize = config.getNumber("minSize") || 2;
const maxSize = config.getNumber("maxSize") || 10;
const desiredCapacity = config.getNumber("desiredCapacity") || 4;
// Tags
const tags = {
Environment: environment,
ManagedBy: "pulumi",
Project: pulumi.getProject(),
Stack: pulumi.getStack(),
};
// Create VPC
const vpc = new awsx.ec2.Vpc(`${environment}-vpc`, {
numberOfAvailabilityZones: azCount,
natGateways: {
strategy: "Single", // Or "HighlyAvailable" for production
},
tags: {
...tags,
Name: `${environment}-vpc`,
},
subnets: [
{
type: "public",
tags: {
...tags,
"kubernetes.io/role/elb": "1",
Type: "public",
},
},
{
type: "private",
tags: {
...tags,
"kubernetes.io/role/internal-elb": "1",
Type: "private",
},
},
{
type: "isolated",
name: "database",
tags: {
...tags,
Type: "database",
},
},
],
});
// Create EKS Cluster
const cluster = new eks.Cluster(`${environment}-cluster`, {
vpcId: vpc.id,
subnetIds: vpc.privateSubnetIds,
instanceType: instanceType,
desiredCapacity: desiredCapacity,
minSize: minSize,
maxSize: maxSize,
nodeAssociatePublicIpAddress: false,
version: "1.28",
enabledClusterLogTypes: [
"api",
"audit",
"authenticator",
"controllerManager",
"scheduler",
],
tags: tags,
// Enable OIDC provider for IRSA
createOidcProvider: true,
// Node group configuration
nodeGroupOptions: {
amiType: "AL2_x86_64",
diskSize: 100,
instanceTypes: [instanceType],
labels: {
"node.kubernetes.io/lifecycle": "normal",
},
taints: [],
tags: {
...tags,
Name: `${environment}-node`,
},
},
// Fargate profiles for system workloads
fargateProfiles: [
{
name: "system",
selectors: [
{
namespace: "kube-system",
labels: {
"fargate": "true",
},
},
{
namespace: "cert-manager",
},
],
},
],
});
// Create Kubernetes provider
const k8sProvider = new k8s.Provider(`${environment}-k8s`, {
kubeconfig: cluster.kubeconfig,
});
// Install Metrics Server
const metricsServer = new k8s.helm.v3.Release(
"metrics-server",
{
chart: "metrics-server",
namespace: "kube-system",
repositoryOpts: {
repo: "https://kubernetes-sigs.github.io/metrics-server/",
},
values: {
args: [
"--cert-dir=/tmp",
"--kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname",
"--kubelet-use-node-status-port",
"--metric-resolution=15s",
],
},
},
{ provider: k8sProvider }
);
// Install Cluster Autoscaler
const clusterAutoscaler = new k8s.helm.v3.Release(
"cluster-autoscaler",
{
chart: "cluster-autoscaler",
namespace: "kube-system",
repositoryOpts: {
repo: "https://kubernetes.github.io/autoscaler",
},
values: {
autoDiscovery: {
clusterName: cluster.eksCluster.name,
},
awsRegion: region,
rbac: {
serviceAccount: {
annotations: {
"eks.amazonaws.com/role-arn": clusterAutoscalerRole.arn,
},
},
},
},
},
{ provider: k8sProvider, dependsOn: [cluster] }
);
// Install AWS Load Balancer Controller
const awsLoadBalancerController = new k8s.helm.v3.Release(
"aws-load-balancer-controller",
{
chart: "aws-load-balancer-controller",
namespace: "kube-system",
repositoryOpts: {
repo: "https://aws.github.io/eks-charts",
},
values: {
clusterName: cluster.eksCluster.name,
serviceAccount: {
annotations: {
"eks.amazonaws.com/role-arn": albControllerRole.arn,
},
},
},
},
{ provider: k8sProvider, dependsOn: [cluster] }
);
// Install Istio Service Mesh
const istioBase = new k8s.helm.v3.Release(
"istio-base",
{
chart: "base",
namespace: "istio-system",
createNamespace: true,
repositoryOpts: {
repo: "https://istio-release.storage.googleapis.com/charts",
},
},
{ provider: k8sProvider }
);
const istiod = new k8s.helm.v3.Release(
"istiod",
{
chart: "istiod",
namespace: "istio-system",
repositoryOpts: {
repo: "https://istio-release.storage.googleapis.com/charts",
},
values: {
pilot: {
autoscaleEnabled: true,
resources: {
requests: {
cpu: "100m",
memory: "128Mi",
},
},
},
},
},
{ provider: k8sProvider, dependsOn: [istioBase] }
);
const istioIngress = new k8s.helm.v3.Release(
"istio-ingress",
{
chart: "gateway",
namespace: "istio-ingress",
createNamespace: true,
repositoryOpts: {
repo: "https://istio-release.storage.googleapis.com/charts",
},
values: {
service: {
type: "LoadBalancer",
annotations: {
"service.beta.kubernetes.io/aws-load-balancer-type": "nlb",
"service.beta.kubernetes.io/aws-load-balancer-cross-zone-load-balancing-enabled": "true",
},
},
},
},
{ provider: k8sProvider, dependsOn: [istiod] }
);
// Install Prometheus Stack
const prometheus = new k8s.helm.v3.Release(
"prometheus",
{
chart: "kube-prometheus-stack",
namespace: "monitoring",
createNamespace: true,
repositoryOpts: {
repo: "https://prometheus-community.github.io/helm-charts",
},
values: {
prometheus: {
prometheusSpec: {
retention: "7d",
storageSpec: {
volumeClaimTemplate: {
spec: {
storageClassName: "gp3",
accessModes: ["ReadWriteOnce"],
resources: {
requests: {
storage: "50Gi",
},
},
},
},
},
serviceMonitorSelectorNilUsesHelmValues: false,
podMonitorSelectorNilUsesHelmValues: false,
},
},
grafana: {
adminPassword: config.requireSecret("grafanaPassword"),
ingress: {
enabled: true,
annotations: {
"kubernetes.io/ingress.class": "istio",
},
hosts: [`grafana.${environment}.example.com`],
},
},
},
},
{ provider: k8sProvider }
);
// Install Loki for Logging
const loki = new k8s.helm.v3.Release(
"loki",
{
chart: "loki-stack",
namespace: "monitoring",
repositoryOpts: {
repo: "https://grafana.github.io/helm-charts",
},
values: {
loki: {
persistence: {
enabled: true,
size: "50Gi",
storageClassName: "gp3",
},
},
promtail: {
enabled: true,
},
},
},
{ provider: k8sProvider, dependsOn: [prometheus] }
);
// Create RDS Database
const dbSubnetGroup = new aws.rds.SubnetGroup(`${environment}-db-subnet`, {
subnetIds: vpc.isolatedSubnetIds,
tags: {
...tags,
Name: `${environment}-db-subnet-group`,
},
});
const dbSecurityGroup = new aws.ec2.SecurityGroup(`${environment}-db-sg`, {
vpcId: vpc.id,
ingress: [
{
protocol: "tcp",
fromPort: 5432,
toPort: 5432,
securityGroups: [cluster.nodeSecurityGroup.id],
},
],
egress: [
{
protocol: "-1",
fromPort: 0,
toPort: 0,
cidrBlocks: ["0.0.0.0/0"],
},
],
tags: {
...tags,
Name: `${environment}-db-sg`,
},
});
const database = new aws.rds.Instance(`${environment}-db`, {
engine: "postgres",
engineVersion: "15.4",
instanceClass: "db.t3.medium",
allocatedStorage: 100,
storageType: "gp3",
storageEncrypted: true,
dbName: "app",
username: "dbadmin",
password: config.requireSecret("dbPassword"),
vpcSecurityGroupIds: [dbSecurityGroup.id],
dbSubnetGroupName: dbSubnetGroup.name,
backupRetentionPeriod: 7,
backupWindow: "03:00-04:00",
maintenanceWindow: "sun:04:00-sun:05:00",
multiAz: true,
skipFinalSnapshot: false,
finalSnapshotIdentifier: `${environment}-db-final-snapshot`,
tags: {
...tags,
Name: `${environment}-database`,
},
});
// Create S3 Buckets
const appBucket = new aws.s3.BucketV2(`${environment}-app`, {
bucket: `${environment}-app-${pulumi.getStack()}`,
tags: tags,
});
const appBucketVersioning = new aws.s3.BucketVersioningV2(
`${environment}-app-versioning`,
{
bucket: appBucket.id,
versioningConfiguration: {
status: "Enabled",
},
}
);
const appBucketEncryption = new aws.s3.BucketServerSideEncryptionConfigurationV2(
`${environment}-app-encryption`,
{
bucket: appBucket.id,
rules: [
{
applyServerSideEncryptionByDefault: {
sseAlgorithm: "AES256",
},
},
],
}
);
// Create IAM Roles for IRSA
const clusterAutoscalerRole = new aws.iam.Role(
`${environment}-cluster-autoscaler`,
{
assumeRolePolicy: pulumi
.all([cluster.core.oidcProvider?.arn, cluster.core.oidcProvider?.url])
.apply(([arn, url]) => {
return JSON.stringify({
Version: "2012-10-17",
Statement: [
{
Effect: "Allow",
Principal: {
Federated: arn,
},
Action: "sts:AssumeRoleWithWebIdentity",
Condition: {
StringEquals: {
[`${url}:sub`]: "system:serviceaccount:kube-system:cluster-autoscaler",
},
},
},
],
});
}),
tags: tags,
}
);
const clusterAutoscalerPolicy = new aws.iam.RolePolicy(
`${environment}-cluster-autoscaler-policy`,
{
role: clusterAutoscalerRole.name,
policy: JSON.stringify({
Version: "2012-10-17",
Statement: [
{
Effect: "Allow",
Action: [
"autoscaling:DescribeAutoScalingGroups",
"autoscaling:DescribeAutoScalingInstances",
"autoscaling:DescribeLaunchConfigurations",
"autoscaling:DescribeTags",
"autoscaling:SetDesiredCapacity",
"autoscaling:TerminateInstanceInAutoScalingGroup",
"ec2:DescribeLaunchTemplateVersions",
],
Resource: "*",
},
],
}),
}
);
const albControllerRole = new aws.iam.Role(
`${environment}-alb-controller`,
{
assumeRolePolicy: pulumi
.all([cluster.core.oidcProvider?.arn, cluster.core.oidcProvider?.url])
.apply(([arn, url]) => {
return JSON.stringify({
Version: "2012-10-17",
Statement: [
{
Effect: "Allow",
Principal: {
Federated: arn,
},
Action: "sts:AssumeRoleWithWebIdentity",
Condition: {
StringEquals: {
[`${url}:sub`]: "system:serviceaccount:kube-system:aws-load-balancer-controller",
},
},
},
],
});
}),
tags: tags,
}
);
// Attach AWS Load Balancer Controller policy
const albControllerPolicyAttachment = new aws.iam.RolePolicyAttachment(
`${environment}-alb-controller-policy`,
{
role: albControllerRole.name,
policyArn: "arn:aws:iam::aws:policy/ElasticLoadBalancingFullAccess",
}
);
// Create CloudFront Distribution
const cdn = new aws.cloudfront.Distribution(`${environment}-cdn`, {
enabled: true,
isIpv6Enabled: true,
comment: `${environment} CloudFront Distribution`,
defaultRootObject: "index.html",
priceClass: "PriceClass_100",
origins: [
{
domainName: appBucket.bucketRegionalDomainName.apply(d => d),
originId: "S3-Origin",
s3OriginConfig: {
originAccessIdentity: originAccessIdentity.cloudfrontAccessIdentityPath,
},
},
{
domainName: istioIngress.status.apply(
s => s.loadBalancer?.ingress?.[0]?.hostname || ""
),
originId: "API-Origin",
customOriginConfig: {
httpPort: 80,
httpsPort: 443,
originProtocolPolicy: "https-only",
originSslProtocols: ["TLSv1.2"],
},
},
],
defaultCacheBehavior: {
targetOriginId: "S3-Origin",
viewerProtocolPolicy: "redirect-to-https",
allowedMethods: ["GET", "HEAD", "OPTIONS"],
cachedMethods: ["GET", "HEAD"],
compress: true,
forwardedValues: {
queryString: false,
cookies: {
forward: "none",
},
},
minTtl: 0,
defaultTtl: 3600,
maxTtl: 86400,
},
orderedCacheBehaviors: [
{
pathPattern: "/api/*",
targetOriginId: "API-Origin",
viewerProtocolPolicy: "https-only",
allowedMethods: ["GET", "HEAD", "OPTIONS", "PUT", "POST", "PATCH", "DELETE"],
cachedMethods: ["GET", "HEAD"],
compress: true,
forwardedValues: {
queryString: true,
headers: ["Authorization", "Content-Type"],
cookies: {
forward: "all",
},
},
minTtl: 0,
defaultTtl: 0,
maxTtl: 0,
},
],
customErrorResponses: [
{
errorCode: 404,
responseCode: 200,
responsePagePath: "/index.html",
errorCachingMinTtl: 300,
},
],
restrictions: {
geoRestriction: {
restrictionType: "none",
},
},
viewerCertificate: {
cloudfrontDefaultCertificate: true,
},
tags: tags,
});
const originAccessIdentity = new aws.cloudfront.OriginAccessIdentity(
`${environment}-oai`,
{
comment: `OAI for ${environment} S3 bucket`,
}
);
// Bucket Policy for CloudFront
const bucketPolicy = new aws.s3.BucketPolicy(`${environment}-bucket-policy`, {
bucket: appBucket.id,
policy: pulumi.all([appBucket.arn, originAccessIdentity.iamArn]).apply(
([bucketArn, oaiArn]) =>
JSON.stringify({
Version: "2012-10-17",
Statement: [
{
Effect: "Allow",
Principal: {
AWS: oaiArn,
},
Action: "s3:GetObject",
Resource: `${bucketArn}/*`,
},
],
})
),
});
// Export outputs
export const vpcId = vpc.id;
export const clusterName = cluster.eksCluster.name;
export const kubeconfig = cluster.kubeconfig;
export const databaseEndpoint = database.endpoint;
export const databasePort = database.port;
export const appBucketName = appBucket.bucket;
export const cdnDomainName = cdn.domainName;
export const istioIngressEndpoint = istioIngress.status.apply(
s => s.loadBalancer?.ingress?.[0]?.hostname || ""
);
// Stack outputs for reference
export const stackOutputs = {
vpc: {
id: vpc.id,
publicSubnets: vpc.publicSubnetIds,
privateSubnets: vpc.privateSubnetIds,
databaseSubnets: vpc.isolatedSubnetIds,
},
eks: {
clusterName: cluster.eksCluster.name,
clusterEndpoint: cluster.eksCluster.endpoint,
nodeSecurityGroup: cluster.nodeSecurityGroup.id,
},
database: {
endpoint: database.endpoint,
port: database.port,
},
storage: {
appBucket: appBucket.bucket,
},
cdn: {
domainName: cdn.domainName,
distributionId: cdn.id,
},
};
# AI Prompt: "Create Pulumi Python program for data platform infrastructure"
"""Data Platform Infrastructure with Pulumi Python"""
import pulumi
import pulumi_aws as aws
import pulumi_kubernetes as k8s
from pulumi import Config, Output, export
import json
# Configuration
config = Config()
environment = config.require("environment")
region = config.get("region") or "us-east-1"
vpc_cidr = config.get("vpcCidr") or "10.0.0.0/16"
# Common tags
tags = {
"Environment": environment,
"ManagedBy": "pulumi",
"Project": pulumi.get_project(),
"Stack": pulumi.get_stack(),
}
# Create VPC
vpc = aws.ec2.Vpc(
f"{environment}-vpc",
cidr_block=vpc_cidr,
enable_dns_hostnames=True,
enable_dns_support=True,
tags={**tags, "Name": f"{environment}-vpc"},
)
# Create Internet Gateway
igw = aws.ec2.InternetGateway(
f"{environment}-igw",
vpc_id=vpc.id,
tags={**tags, "Name": f"{environment}-igw"},
)
# Get availability zones
azs = aws.get_availability_zones(state="available")
# Create subnets
public_subnets = []
private_subnets = []
database_subnets = []
for i in range(3):
# Public subnet
public_subnet = aws.ec2.Subnet(
f"{environment}-public-{i+1}",
vpc_id=vpc.id,
cidr_block=f"10.0.{i+1}.0/24",
availability_zone=azs.names[i],
map_public_ip_on_launch=True,
tags={
**tags,
"Name": f"{environment}-public-{i+1}",
"Type": "public",
"kubernetes.io/role/elb": "1",
},
)
public_subnets.append(public_subnet)
# Private subnet
private_subnet = aws.ec2.Subnet(
f"{environment}-private-{i+1}",
vpc_id=vpc.id,
cidr_block=f"10.0.{i+11}.0/24",
availability_zone=azs.names[i],
tags={
**tags,
"Name": f"{environment}-private-{i+1}",
"Type": "private",
"kubernetes.io/role/internal-elb": "1",
},
)
private_subnets.append(private_subnet)
# Database subnet
database_subnet = aws.ec2.Subnet(
f"{environment}-database-{i+1}",
vpc_id=vpc.id,
cidr_block=f"10.0.{i+21}.0/24",
availability_zone=azs.names[i],
tags={
**tags,
"Name": f"{environment}-database-{i+1}",
"Type": "database",
},
)
database_subnets.append(database_subnet)
# Create NAT Gateway
eip = aws.ec2.Eip(
f"{environment}-nat-eip",
domain="vpc",
tags={**tags, "Name": f"{environment}-nat-eip"},
)
nat_gateway = aws.ec2.NatGateway(
f"{environment}-nat",
allocation_id=eip.id,
subnet_id=public_subnets[0].id,
tags={**tags, "Name": f"{environment}-nat"},
)
# Route tables
public_route_table = aws.ec2.RouteTable(
f"{environment}-public-rt",
vpc_id=vpc.id,
tags={**tags, "Name": f"{environment}-public-rt"},
)
public_route = aws.ec2.Route(
f"{environment}-public-route",
route_table_id=public_route_table.id,
destination_cidr_block="0.0.0.0/0",
gateway_id=igw.id,
)
# Associate public subnets with public route table
for i, subnet in enumerate(public_subnets):
aws.ec2.RouteTableAssociation(
f"{environment}-public-rta-{i+1}",
subnet_id=subnet.id,
route_table_id=public_route_table.id,
)
# Private route table
private_route_table = aws.ec2.RouteTable(
f"{environment}-private-rt",
vpc_id=vpc.id,
tags={**tags, "Name": f"{environment}-private-rt"},
)
private_route = aws.ec2.Route(
f"{environment}-private-route",
route_table_id=private_route_table.id,
destination_cidr_block="0.0.0.0/0",
nat_gateway_id=nat_gateway.id,
)
# Associate private subnets with private route table
for i, subnet in enumerate(private_subnets):
aws.ec2.RouteTableAssociation(
f"{environment}-private-rta-{i+1}",
subnet_id=subnet.id,
route_table_id=private_route_table.id,
)
# Security Groups
alb_sg = aws.ec2.SecurityGroup(
f"{environment}-alb-sg",
vpc_id=vpc.id,
description="Security group for Application Load Balancer",
ingress=[
{
"protocol": "tcp",
"from_port": 80,
"to_port": 80,
"cidr_blocks": ["0.0.0.0/0"],
},
{
"protocol": "tcp",
"from_port": 443,
"to_port": 443,
"cidr_blocks": ["0.0.0.0/0"],
},
],
egress=[
{
"protocol": "-1",
"from_port": 0,
"to_port": 0,
"cidr_blocks": ["0.0.0.0/0"],
},
],
tags={**tags, "Name": f"{environment}-alb-sg"},
)
app_sg = aws.ec2.SecurityGroup(
f"{environment}-app-sg",
vpc_id=vpc.id,
description="Security group for application servers",
ingress=[
{
"protocol": "tcp",
"from_port": 80,
"to_port": 80,
"security_groups": [alb_sg.id],
},
],
egress=[
{
"protocol": "-1",
"from_port": 0,
"to_port": 0,
"cidr_blocks": ["0.0.0.0/0"],
},
],
tags={**tags, "Name": f"{environment}-app-sg"},
)
db_sg = aws.ec2.SecurityGroup(
f"{environment}-db-sg",
vpc_id=vpc.id,
description="Security group for database",
ingress=[
{
"protocol": "tcp",
"from_port": 5432,
"to_port": 5432,
"security_groups": [app_sg.id],
},
],
egress=[
{
"protocol": "-1",
"from_port": 0,
"to_port": 0,
"cidr_blocks": ["0.0.0.0/0"],
},
],
tags={**tags, "Name": f"{environment}-db-sg"},
)
# RDS Subnet Group
db_subnet_group = aws.rds.SubnetGroup(
f"{environment}-db-subnet-group",
subnet_ids=[subnet.id for subnet in database_subnets],
tags={**tags, "Name": f"{environment}-db-subnet-group"},
)
# RDS Instance
database = aws.rds.Instance(
f"{environment}-database",
engine="postgres",
engine_version="15.4",
instance_class="db.t3.medium",
allocated_storage=100,
storage_type="gp3",
storage_encrypted=True,
db_name="app",
username="dbadmin",
password=config.require_secret("dbPassword"),
vpc_security_group_ids=[db_sg.id],
db_subnet_group_name=db_subnet_group.name,
backup_retention_period=7,
backup_window="03:00-04:00",
maintenance_window="sun:04:00-sun:05:00",
multi_az=True,
skip_final_snapshot=False,
final_snapshot_identifier=f"{environment}-db-final-snapshot",
tags={**tags, "Name": f"{environment}-database"},
)
# S3 Buckets for Data Lake
data_lake_bucket = aws.s3.BucketV2(
f"{environment}-data-lake",
bucket=f"{environment}-data-lake-{pulumi.get_stack()}",
tags=tags,
)
# Enable versioning
aws.s3.BucketVersioningV2(
f"{environment}-data-lake-versioning",
bucket=data_lake_bucket.id,
versioning_configuration={
"status": "Enabled",
},
)
# Enable encryption
aws.s3.BucketServerSideEncryptionConfigurationV2(
f"{environment}-data-lake-encryption",
bucket=data_lake_bucket.id,
rules=[
{
"apply_server_side_encryption_by_default": {
"sse_algorithm": "AES256",
},
},
],
)
# Lifecycle rules for data lake
aws.s3.BucketLifecycleConfigurationV2(
f"{environment}-data-lake-lifecycle",
bucket=data_lake_bucket.id,
rules=[
{
"id": "archive-old-data",
"status": "Enabled",
"transitions": [
{
"days": 30,
"storage_class": "STANDARD_IA",
},
{
"days": 90,
"storage_class": "GLACIER",
},
],
},
{
"id": "delete-temp-data",
"status": "Enabled",
"filter": {
"prefix": "temp/",
},
"expiration": {
"days": 7,
},
},
],
)
# EMR Cluster for Big Data Processing
emr_role = aws.iam.Role(
f"{environment}-emr-role",
assume_role_policy=json.dumps({
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "elasticmapreduce.amazonaws.com",
},
"Action": "sts:AssumeRole",
},
],
}),
tags=tags,
)
aws.iam.RolePolicyAttachment(
f"{environment}-emr-policy",
role=emr_role.name,
policy_arn="arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceRole",
)
emr_ec2_role = aws.iam.Role(
f"{environment}-emr-ec2-role",
assume_role_policy=json.dumps({
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "ec2.amazonaws.com",
},
"Action": "sts:AssumeRole",
},
],
}),
tags=tags,
)
aws.iam.RolePolicyAttachment(
f"{environment}-emr-ec2-policy",
role=emr_ec2_role.name,
policy_arn="arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role",
)
emr_ec2_instance_profile = aws.iam.InstanceProfile(
f"{environment}-emr-ec2-profile",
role=emr_ec2_role.name,
)
# EMR Cluster
emr_cluster = aws.emr.Cluster(
f"{environment}-emr",
release_label="emr-6.15.0",
applications=["Spark", "Hadoop", "Hive", "JupyterHub"],
termination_protection=False,
keep_job_flow_alive_when_no_steps=True,
scale_down_behavior="TERMINATE_AT_TASK_COMPLETION",
service_role=emr_role.arn,
ec2_attributes={
"subnet_id": private_subnets[0].id,
"emr_managed_master_security_group": app_sg.id,
"emr_managed_slave_security_group": app_sg.id,
"instance_profile": emr_ec2_instance_profile.arn,
"key_name": config.get("keyPairName"),
},
master_instance_group={
"instance_type": "m5.xlarge",
"instance_count": 1,
},
core_instance_group={
"instance_type": "m5.xlarge",
"instance_count": 2,
"bid_price": "0.1",
},
configurations=json.dumps([
{
"Classification": "spark-defaults",
"Properties": {
"spark.dynamicAllocation.enabled": "true",
"spark.executor.memory": "4g",
"spark.executor.cores": "2",
},
},
]),
log_uri=Output.concat("s3://", data_lake_bucket.bucket, "/emr-logs/"),
tags={**tags, "Name": f"{environment}-emr"},
)
# Lambda Functions for Data Processing
lambda_role = aws.iam.Role(
f"{environment}-lambda-role",
assume_role_policy=json.dumps({
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "lambda.amazonaws.com",
},
"Action": "sts:AssumeRole",
},
],
}),
tags=tags,
)
# Attach policies to Lambda role
aws.iam.RolePolicyAttachment(
f"{environment}-lambda-basic",
role=lambda_role.name,
policy_arn="arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole",
)
# Lambda policy for S3 and DynamoDB access
lambda_policy = aws.iam.RolePolicy(
f"{environment}-lambda-policy",
role=lambda_role.name,
policy=Output.all(data_lake_bucket.arn).apply(
lambda args: json.dumps({
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject",
],
"Resource": f"{args[0]}/*",
},
{
"Effect": "Allow",
"Action": [
"dynamodb:PutItem",
"dynamodb:GetItem",
"dynamodb:Query",
"dynamodb:Scan",
],
"Resource": "*",
},
],
})
),
)
# Data processing Lambda
data_processor = aws.lambda_.Function(
f"{environment}-data-processor",
runtime="python3.11",
handler="index.handler",
role=lambda_role.arn,
timeout=300,
memory_size=1024,
environment={
"variables": {
"ENVIRONMENT": environment,
"DATA_BUCKET": data_lake_bucket.bucket,
},
},
code=pulumi.AssetArchive({
".": pulumi.FileArchive("./lambda/data-processor"),
}),
tags={**tags, "Name": f"{environment}-data-processor"},
)
# EventBridge Rule for scheduled processing
processing_schedule = aws.cloudwatch.EventRule(
f"{environment}-processing-schedule",
schedule_expression="rate(1 hour)",
tags=tags,
)
aws.cloudwatch.EventTarget(
f"{environment}-processing-target",
rule=processing_schedule.name,
arn=data_processor.arn,
)
aws.lambda_.Permission(
f"{environment}-processing-permission",
action="lambda:InvokeFunction",
function=data_processor.name,
principal="events.amazonaws.com",
source_arn=processing_schedule.arn,
)
# Glue Catalog Database
glue_database = aws.glue.CatalogDatabase(
f"{environment}-glue-db",
name=f"{environment}_data_catalog",
description=f"Data catalog for {environment} environment",
)
# Athena Workgroup
athena_workgroup = aws.athena.Workgroup(
f"{environment}-athena-workgroup",
name=f"{environment}-workgroup",
configuration={
"result_configuration": {
"output_location": Output.concat("s3://", data_lake_bucket.bucket, "/athena-results/"),
},
"enforce_workgroup_configuration": True,
"publish_cloudwatch_metrics_enabled": True,
},
tags=tags,
)
# Kinesis Data Stream
kinesis_stream = aws.kinesis.Stream(
f"{environment}-stream",
name=f"{environment}-data-stream",
shard_count=2,
retention_period=24,
shard_level_metrics=[
"IncomingBytes",
"OutgoingBytes",
],
tags={**tags, "Name": f"{environment}-stream"},
)
# Kinesis Firehose for S3 delivery
firehose_role = aws.iam.Role(
f"{environment}-firehose-role",
assume_role_policy=json.dumps({
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "firehose.amazonaws.com",
},
"Action": "sts:AssumeRole",
},
],
}),
tags=tags,
)
firehose_policy = aws.iam.RolePolicy(
f"{environment}-firehose-policy",
role=firehose_role.name,
policy=Output.all(data_lake_bucket.arn, kinesis_stream.arn).apply(
lambda args: json.dumps({
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:PutObject",
"s3:GetObject",
"s3:ListBucket",
],
"Resource": [
args[0],
f"{args[0]}/*",
],
},
{
"Effect": "Allow",
"Action": [
"kinesis:DescribeStream",
"kinesis:GetShardIterator",
"kinesis:GetRecords",
"kinesis:ListShards",
],
"Resource": args[1],
},
],
})
),
)
firehose_delivery_stream = aws.kinesis.FirehoseDeliveryStream(
f"{environment}-firehose",
name=f"{environment}-s3-delivery",
destination="extended_s3",
kinesis_source_configuration={
"kinesis_stream_arn": kinesis_stream.arn,
"role_arn": firehose_role.arn,
},
extended_s3_configuration={
"role_arn": firehose_role.arn,
"bucket_arn": data_lake_bucket.arn,
"prefix": "raw-data/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/",
"error_output_prefix": "error-data/",
"buffering_interval": 60,
"buffering_size": 5,
"compression_format": "GZIP",
"data_format_conversion_configuration": {
"enabled": True,
"output_format_configuration": {
"serializer": {
"parquet_serde": {},
},
},
"schema_configuration": {
"database_name": glue_database.name,
"table_name": "raw_events",
"role_arn": firehose_role.arn,
},
},
},
tags={**tags, "Name": f"{environment}-firehose"},
)
# Exports
export("vpc_id", vpc.id)
export("database_endpoint", database.endpoint)
export("data_lake_bucket", data_lake_bucket.bucket)
export("kinesis_stream_name", kinesis_stream.name)
export("emr_cluster_id", emr_cluster.id)
export("athena_workgroup", athena_workgroup.name)
# Component Resources for better organization
class DataPlatform(pulumi.ComponentResource):
def __init__(self, name, opts=None):
super().__init__("custom:infrastructure:DataPlatform", name, None, opts)
# All resources created above would be children of this component
# This provides better organization in the Pulumi console
data_platform = DataPlatform(f"{environment}-data-platform")
# AI Prompt
Agent: "Create Ansible playbook for:
- Server hardening
- Application deployment
- Database configuration
- Monitoring setup
- Security compliance"
# site.yml - Main playbook
---
- name: Configure Infrastructure
hosts: all
become: yes
gather_facts: yes
vars_files:
- vars/common.yml
- "vars/{{ environment }}.yml"
pre_tasks:
- name: Update package cache
package:
update_cache: yes
changed_when: false
- name: Install common packages
package:
name: "{{ common_packages }}"
state: present
roles:
- role: common
tags: common
- role: security
tags: security
- role: monitoring
tags: monitoring
# roles/common/tasks/main.yml
---
- name: Configure system settings
include_tasks: system.yml
- name: Configure users and groups
include_tasks: users.yml
- name: Configure SSH
include_tasks: ssh.yml
- name: Configure firewall
include_tasks: firewall.yml
# roles/common/tasks/system.yml
---
- name: Set hostname
hostname:
name: "{{ inventory_hostname }}"
- name: Configure timezone
timezone:
name: "{{ timezone | default('UTC') }}"
- name: Configure sysctl parameters
sysctl:
name: "{{ item.key }}"
value: "{{ item.value }}"
state: present
reload: yes
loop: "{{ sysctl_parameters | dict2items }}"
when: sysctl_parameters is defined
- name: Configure system limits
pam_limits:
domain: "*"
limit_type: "{{ item.type }}"
limit_item: "{{ item.item }}"
value: "{{ item.value }}"
loop:
- { type: 'soft', item: 'nofile', value: '65536' }
- { type: 'hard', item: 'nofile', value: '65536' }
- { type: 'soft', item: 'nproc', value: '65536' }
- { type: 'hard', item: 'nproc', value: '65536' }
# roles/common/tasks/users.yml
---
- name: Create application user
user:
name: "{{ app_user }}"
shell: /bin/bash
home: "/home/{{ app_user }}"
create_home: yes
groups: "{{ app_groups | default([]) }}"
- name: Create admin users
user:
name: "{{ item.name }}"
shell: /bin/bash
groups: "{{ item.groups | default(['sudo']) }}"
create_home: yes
password: "{{ item.password | password_hash('sha512') }}"
loop: "{{ admin_users }}"
when: admin_users is defined
- name: Add SSH keys for users
authorized_key:
user: "{{ item.0.name }}"
key: "{{ item.1 }}"
state: present
loop: "{{ admin_users | subelements('ssh_keys', skip_missing=True) }}"
when: admin_users is defined
# roles/security/tasks/main.yml
---
- name: Install security packages
package:
name:
- fail2ban
- aide
- rkhunter
- auditd
- apparmor
state: present
- name: Configure fail2ban
template:
src: fail2ban.local.j2
dest: /etc/fail2ban/jail.local
owner: root
group: root
mode: '0644'
notify: restart fail2ban
- name: Configure auditd rules
template:
src: audit.rules.j2
dest: /etc/audit/rules.d/audit.rules
owner: root
group: root
mode: '0640'
notify: restart auditd
- name: Enable and start security services
systemd:
name: "{{ item }}"
enabled: yes
state: started
loop:
- fail2ban
- auditd
- apparmor
- name: Configure automatic security updates
copy:
content: |
APT::Periodic::Update-Package-Lists "1";
APT::Periodic::Download-Upgradeable-Packages "1";
APT::Periodic::AutocleanInterval "7";
APT::Periodic::Unattended-Upgrade "1";
dest: /etc/apt/apt.conf.d/20auto-upgrades
owner: root
group: root
mode: '0644'
when: ansible_os_family == "Debian"
# roles/security/tasks/hardening.yml
---
- name: Disable unnecessary services
systemd:
name: "{{ item }}"
enabled: no
state: stopped
loop: "{{ disabled_services }}"
failed_when: false
- name: Set secure kernel parameters
sysctl:
name: "{{ item.key }}"
value: "{{ item.value }}"
state: present
reload: yes
loop:
- { key: 'net.ipv4.tcp_syncookies', value: '1' }
- { key: 'net.ipv4.ip_forward', value: '0' }
- { key: 'net.ipv4.conf.all.send_redirects', value: '0' }
- { key: 'net.ipv4.conf.default.send_redirects', value: '0' }
- { key: 'net.ipv4.conf.all.accept_source_route', value: '0' }
- { key: 'net.ipv4.conf.default.accept_source_route', value: '0' }
- { key: 'net.ipv4.conf.all.accept_redirects', value: '0' }
- { key: 'net.ipv4.conf.default.accept_redirects', value: '0' }
- { key: 'net.ipv4.conf.all.log_martians', value: '1' }
- { key: 'net.ipv4.conf.default.log_martians', value: '1' }
- { key: 'net.ipv4.icmp_echo_ignore_broadcasts', value: '1' }
- { key: 'net.ipv4.icmp_ignore_bogus_error_responses', value: '1' }
- { key: 'net.ipv4.conf.all.rp_filter', value: '1' }
- { key: 'net.ipv4.conf.default.rp_filter', value: '1' }
- name: Configure secure SSH settings
lineinfile:
path: /etc/ssh/sshd_config
regexp: "^#?{{ item.key }}"
line: "{{ item.key }} {{ item.value }}"
state: present
loop:
- { key: 'PermitRootLogin', value: 'no' }
- { key: 'PasswordAuthentication', value: 'no' }
- { key: 'PermitEmptyPasswords', value: 'no' }
- { key: 'X11Forwarding', value: 'no' }
- { key: 'MaxAuthTries', value: '3' }
- { key: 'ClientAliveInterval', value: '300' }
- { key: 'ClientAliveCountMax', value: '0' }
- { key: 'LoginGraceTime', value: '60' }
- { key: 'Protocol', value: '2' }
- { key: 'StrictModes', value: 'yes' }
- { key: 'IgnoreRhosts', value: 'yes' }
- { key: 'HostbasedAuthentication', value: 'no' }
notify: restart ssh
# playbooks/deploy-app.yml
---
- name: Deploy Application
hosts: app_servers
become: yes
vars:
app_version: "{{ version | default('latest') }}"
deployment_user: "{{ app_user }}"
app_port: 3000
tasks:
- name: Create application directories
file:
path: "{{ item }}"
state: directory
owner: "{{ deployment_user }}"
group: "{{ deployment_user }}"
mode: '0755'
loop:
- /opt/{{ app_name }}
- /opt/{{ app_name }}/releases
- /opt/{{ app_name }}/shared
- /opt/{{ app_name }}/shared/logs
- /opt/{{ app_name }}/shared/tmp
- name: Download application artifact
get_url:
url: "{{ artifact_url }}/{{ app_name }}-{{ app_version }}.tar.gz"
dest: "/opt/{{ app_name }}/releases/{{ app_name }}-{{ app_version }}.tar.gz"
owner: "{{ deployment_user }}"
group: "{{ deployment_user }}"
mode: '0644'
checksum: "sha256:{{ artifact_checksum }}"
when: artifact_checksum is defined
- name: Extract application
unarchive:
src: "/opt/{{ app_name }}/releases/{{ app_name }}-{{ app_version }}.tar.gz"
dest: "/opt/{{ app_name }}/releases/"
remote_src: yes
owner: "{{ deployment_user }}"
group: "{{ deployment_user }}"
creates: "/opt/{{ app_name }}/releases/{{ app_version }}"
- name: Install Node.js dependencies
npm:
path: "/opt/{{ app_name }}/releases/{{ app_version }}"
production: yes
become_user: "{{ deployment_user }}"
when: app_type == "nodejs"
- name: Run database migrations
command: npm run migrate
args:
chdir: "/opt/{{ app_name }}/releases/{{ app_version }}"
environment:
NODE_ENV: production
DATABASE_URL: "{{ database_url }}"
become_user: "{{ deployment_user }}"
when: run_migrations | default(false)
- name: Create systemd service
template:
src: app.service.j2
dest: "/etc/systemd/system/{{ app_name }}.service"
owner: root
group: root
mode: '0644'
notify:
- reload systemd
- restart app
- name: Update symlink to new version
file:
src: "/opt/{{ app_name }}/releases/{{ app_version }}"
dest: "/opt/{{ app_name }}/current"
state: link
owner: "{{ deployment_user }}"
group: "{{ deployment_user }}"
- name: Configure nginx
template:
src: nginx-app.conf.j2
dest: "/etc/nginx/sites-available/{{ app_name }}.conf"
owner: root
group: root
mode: '0644'
notify: reload nginx
- name: Enable nginx site
file:
src: "/etc/nginx/sites-available/{{ app_name }}.conf"
dest: "/etc/nginx/sites-enabled/{{ app_name }}.conf"
state: link
notify: reload nginx
- name: Start and enable application
systemd:
name: "{{ app_name }}"
enabled: yes
state: started
daemon_reload: yes
handlers:
- name: reload systemd
systemd:
daemon_reload: yes
- name: restart app
systemd:
name: "{{ app_name }}"
state: restarted
- name: reload nginx
systemd:
name: nginx
state: reloaded
# playbooks/rolling-update.yml
---
- name: Rolling Update Application
hosts: app_servers
become: yes
serial: "{{ rolling_update_batch_size | default(1) }}"
max_fail_percentage: "{{ max_fail_percentage | default(0) }}"
pre_tasks:
- name: Remove server from load balancer
uri:
url: "{{ lb_api_url }}/pools/{{ lb_pool_id }}/members/{{ inventory_hostname }}"
method: DELETE
headers:
Authorization: "Bearer {{ lb_api_token }}"
delegate_to: localhost
- name: Wait for connections to drain
wait_for:
port: "{{ app_port }}"
state: drained
timeout: 30
tasks:
- name: Deploy new version
include_tasks: deploy-tasks.yml
- name: Health check
uri:
url: "http://{{ inventory_hostname }}:{{ app_port }}/health"
method: GET
status_code: 200
retries: 5
delay: 10
post_tasks:
- name: Add server back to load balancer
uri:
url: "{{ lb_api_url }}/pools/{{ lb_pool_id }}/members"
method: POST
headers:
Authorization: "Bearer {{ lb_api_token }}"
body_format: json
body:
address: "{{ inventory_hostname }}"
port: "{{ app_port }}"
weight: 1
delegate_to: localhost
# AI Prompt
Agent: "Create GitOps setup with:
- ArgoCD application manifests
- Multi-environment configuration
- Progressive deployment
- Secret management
- Monitoring integration"
# argocd/applications/production.yaml
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: production-app
namespace: argocd
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: production
source:
repoURL: https://github.com/company/infrastructure
targetRevision: main
path: environments/production
helm:
valueFiles:
- values.yaml
- values-production.yaml
parameters:
- name: image.tag
value: "$ARGOCD_APP_REVISION"
destination:
server: https://kubernetes.default.svc
namespace: production
syncPolicy:
automated:
prune: true
selfHeal: true
allowEmpty: false
syncOptions:
- CreateNamespace=true
- PrunePropagationPolicy=foreground
- PruneLast=true
retry:
limit: 5
backoff:
duration: 5s
factor: 2
maxDuration: 3m
revisionHistoryLimit: 3
ignoreDifferences:
- group: apps
kind: Deployment
jsonPointers:
- /spec/replicas
- group: autoscaling
kind: HorizontalPodAutoscaler
jsonPointers:
- /spec/minReplicas
- /spec/maxReplicas
# argocd/projects/production.yaml
apiVersion: argoproj.io/v1alpha1
kind: AppProject
metadata:
name: production
namespace: argocd
spec:
description: Production environment project
sourceRepos:
- https://github.com/company/infrastructure
- https://github.com/company/applications
destinations:
- namespace: production
server: https://kubernetes.default.svc
- namespace: production-*
server: https://kubernetes.default.svc
clusterResourceWhitelist:
- group: ''
kind: Namespace
- group: rbac.authorization.k8s.io
kind: ClusterRole
- group: rbac.authorization.k8s.io
kind: ClusterRoleBinding
namespaceResourceWhitelist:
- group: '*'
kind: '*'
roles:
- name: admin
policies:
- p, proj:production:admin, applications, *, production/*, allow
- p, proj:production:admin, repositories, *, *, allow
groups:
- company:platform-team
- name: developer
policies:
- p, proj:production:developer, applications, get, production/*, allow
- p, proj:production:developer, applications, sync, production/*, allow
groups:
- company:developers
# environments/base/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- service-account.yaml
- deployment.yaml
- service.yaml
- ingress.yaml
- configmap.yaml
commonLabels:
app.kubernetes.io/managed-by: argocd
configMapGenerator:
- name: app-config
files:
- config.yaml
options:
disableNameSuffixHash: false
secretGenerator:
- name: app-secrets
files:
- secrets.enc.yaml
options:
disableNameSuffixHash: false
images:
- name: app
newName: registry.company.com/app
newTag: latest
replicas:
- name: app
count: 2
# environments/production/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
bases:
- ../base
namespace: production
commonLabels:
environment: production
patchesStrategicMerge:
- deployment-patch.yaml
- service-patch.yaml
- ingress-patch.yaml
configMapGenerator:
- name: app-config
behavior: merge
files:
- config-production.yaml
secretGenerator:
- name: app-secrets
behavior: merge
files:
- secrets-production.enc.yaml
images:
- name: app
newName: registry.company.com/app
newTag: v1.2.3
replicas:
- name: app
count: 5
resources:
- hpa.yaml
- pdb.yaml
- network-policy.yaml
# environments/production/deployment-patch.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: app
spec:
template:
spec:
containers:
- name: app
resources:
requests:
cpu: 500m
memory: 512Mi
limits:
cpu: 2000m
memory: 2Gi
env:
- name: ENVIRONMENT
value: production
- name: LOG_LEVEL
value: info
livenessProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- app
topologyKey: kubernetes.io/hostname
# .github/workflows/gitops.yml
name: GitOps Deploy
on:
push:
branches:
- main
- develop
paths:
- 'src/**'
- 'Dockerfile'
- '.github/workflows/**'
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
build-and-push:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
outputs:
image-tag: ${{ steps.meta.outputs.tags }}
image-digest: ${{ steps.build.outputs.digest }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=sha,prefix={{branch}}-
- name: Build and push Docker image
id: build
uses: docker/build-push-action@v5
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
platforms: linux/amd64,linux/arm64
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
with:
image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.meta.outputs.version }}
format: 'sarif'
output: 'trivy-results.sarif'
- name: Upload Trivy scan results
uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: 'trivy-results.sarif'
update-manifests:
needs: build-and-push
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- name: Checkout infrastructure repo
uses: actions/checkout@v4
with:
repository: company/infrastructure
token: ${{ secrets.INFRASTRUCTURE_PAT }}
- name: Update image tag
run: |
ENV_DIR="environments/${{ github.ref_name == 'main' && 'production' || 'staging' }}"
cd $ENV_DIR
yq e -i '.images[0].newTag = "${{ needs.build-and-push.outputs.image-tag }}"' kustomization.yaml
- name: Create Pull Request
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.INFRASTRUCTURE_PAT }}
commit-message: "chore: update ${{ github.ref_name }} image to ${{ needs.build-and-push.outputs.image-tag }}"
title: "Deploy ${{ needs.build-and-push.outputs.image-tag }} to ${{ github.ref_name }}"
body: |
## Deployment Update
**Image**: `${{ needs.build-and-push.outputs.image-tag }}`
**Digest**: `${{ needs.build-and-push.outputs.image-digest }}`
**Environment**: `${{ github.ref_name == 'main' && 'production' || 'staging' }}`
### Changes
${{ github.event.head_commit.message }}
### Commit
${{ github.sha }}
branch: deploy/${{ github.ref_name }}-${{ github.sha }}
delete-branch: true
clusters/production/flux-system/gotk-sync.yaml
# AI Prompt: "Create Flux v2 GitOps configuration"
apiVersion: source.toolkit.fluxcd.io/v1beta2
kind: GitRepository
metadata:
name: flux-system
namespace: flux-system
spec:
interval: 1m
ref:
branch: main
secretRef:
name: flux-system
url: ssh://git@github.com/company/infrastructure
---
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
kind: Kustomization
metadata:
name: flux-system
namespace: flux-system
spec:
interval: 10m
path: ./clusters/production
prune: true
sourceRef:
kind: GitRepository
name: flux-system
validation: client
decryption:
provider: sops
secretRef:
name: sops-age
# clusters/production/infrastructure.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
kind: Kustomization
metadata:
name: infrastructure
namespace: flux-system
spec:
interval: 10m
dependsOn:
- name: flux-system
sourceRef:
kind: GitRepository
name: flux-system
path: ./infrastructure
prune: true
validation: client
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: ingress-nginx-controller
namespace: ingress-nginx
- apiVersion: apps/v1
kind: Deployment
name: cert-manager
namespace: cert-manager
# infrastructure/sources/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- bitnami.yaml
- ingress-nginx.yaml
- jetstack.yaml
- prometheus-community.yaml
# infrastructure/sources/bitnami.yaml
apiVersion: source.toolkit.fluxcd.io/v1beta2
kind: HelmRepository
metadata:
name: bitnami
namespace: flux-system
spec:
interval: 30m
url: https://charts.bitnami.com/bitnami
# apps/production/app.yaml
apiVersion: source.toolkit.fluxcd.io/v1beta2
kind: GitRepository
metadata:
name: app
namespace: flux-system
spec:
interval: 1m
ref:
branch: main
url: https://github.com/company/app
---
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
kind: Kustomization
metadata:
name: app
namespace: flux-system
spec:
interval: 5m
path: ./deploy/production
prune: true
sourceRef:
kind: GitRepository
name: app
targetNamespace: production
validation: client
images:
- name: app
newName: registry.company.com/app
newTag: ${GIT_COMMIT} # Updated by CI/CD
postBuild:
substituteFrom:
- kind: ConfigMap
name: cluster-config
- kind: Secret
name: cluster-secrets
# AI Prompt
Agent: "Create comprehensive Terraform tests with:
- Unit tests for modules
- Integration tests
- Compliance testing
- Cost validation
- Security scanning"
# test/terraform_test.go
package test
import (
"testing"
"time"
"github.com/gruntwork-io/terratest/modules/terraform"
"github.com/gruntwork-io/terratest/modules/aws"
"github.com/gruntwork-io/terratest/modules/retry"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestVPCModule(t *testing.T) {
t.Parallel()
// AWS Region
awsRegion := "us-east-1"
// Terraform Options
terraformOptions := &terraform.Options{
TerraformDir: "../modules/vpc",
Vars: map[string]interface{}{
"name": "test-vpc",
"cidr": "10.0.0.0/16",
"azs": []string{"us-east-1a", "us-east-1b", "us-east-1c"},
"private_subnets": []string{"10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"},
"public_subnets": []string{"10.0.101.0/24", "10.0.102.0/24", "10.0.103.0/24"},
"enable_nat_gateway": true,
"single_nat_gateway": true,
"environment": "test",
},
EnvVars: map[string]string{
"AWS_DEFAULT_REGION": awsRegion,
},
}
// Clean up resources
defer terraform.Destroy(t, terraformOptions)
// Deploy infrastructure
terraform.InitAndApply(t, terraformOptions)
// Get outputs
vpcID := terraform.Output(t, terraformOptions, "vpc_id")
privateSubnetIDs := terraform.OutputList(t, terraformOptions, "private_subnet_ids")
publicSubnetIDs := terraform.OutputList(t, terraformOptions, "public_subnet_ids")
// Validate VPC
vpc := aws.GetVpcById(t, vpcID, awsRegion)
require.Equal(t, "10.0.0.0/16", vpc.CidrBlock)
// Validate subnets
assert.Equal(t, 3, len(privateSubnetIDs))
assert.Equal(t, 3, len(publicSubnetIDs))
// Test connectivity
for _, subnetID := range publicSubnetIDs {
subnet := aws.GetSubnetById(t, subnetID, awsRegion)
assert.True(t, subnet.MapPublicIpOnLaunch)
}
// Validate NAT Gateway
natGateways := aws.GetNatGatewaysInVpc(t, vpcID, awsRegion)
assert.Equal(t, 1, len(natGateways))
}
func TestEKSCluster(t *testing.T) {
t.Parallel()
terraformOptions := &terraform.Options{
TerraformDir: "../modules/eks",
Vars: map[string]interface{}{
"cluster_name": "test-cluster",
"cluster_version": "1.28",
"instance_types": []string{"t3.medium"},
"min_size": 2,
"max_size": 4,
"desired_size": 2,
},
}
defer terraform.Destroy(t, terraformOptions)
terraform.InitAndApply(t, terraformOptions)
// Get cluster endpoint
clusterEndpoint := terraform.Output(t, terraformOptions, "cluster_endpoint")
// Test cluster is accessible
maxRetries := 30
timeBetweenRetries := 10 * time.Second
retry.DoWithRetry(t, "Check EKS cluster", maxRetries, timeBetweenRetries, func() (string, error) {
return aws.GetEksClusterStatus(t, "test-cluster", awsRegion)
})
}
// tests/unit/vpc_test.tf
terraform {
required_version = ">= 1.5.0"
}
module "vpc_test" {
source = "../../modules/vpc"
name = "test-vpc"
cidr = "10.0.0.0/16"
azs = ["us-east-1a", "us-east-1b"]
private_subnets = ["10.0.1.0/24", "10.0.2.0/24"]
public_subnets = ["10.0.101.0/24", "10.0.102.0/24"]
enable_nat_gateway = true
single_nat_gateway = true
tags = {
Environment = "test"
Purpose = "unit-test"
}
}
# Validation tests
resource "null_resource" "vpc_validation" {
provisioner "local-exec" {
command = <<-EOT
# Check CIDR block
if [[ "${module.vpc_test.vpc_cidr_block}" != "10.0.0.0/16" ]]; then
echo "VPC CIDR validation failed"
exit 1
fi
# Check subnet count
PRIVATE_COUNT=$(echo '${jsonencode(module.vpc_test.private_subnet_ids)}' | jq '. | length')
if [[ $PRIVATE_COUNT -ne 2 ]]; then
echo "Private subnet count validation failed"
exit 1
fi
echo "All validations passed"
EOT
}
}
# tests/compliance/cis_benchmark_test.tf
# AI Prompt: "Create CIS benchmark compliance tests"
resource "null_resource" "cis_compliance" {
provisioner "local-exec" {
command = <<-EOT
# CIS AWS Foundations Benchmark checks
# 2.1.1 Ensure S3 bucket policy is not public
aws s3api get-bucket-policy-status --bucket ${aws_s3_bucket.main.id} \
--query 'PolicyStatus.IsPublic' --output text | grep -q "false"
# 2.1.2 Ensure S3 bucket encryption is enabled
aws s3api get-bucket-encryption --bucket ${aws_s3_bucket.main.id} \
--query 'ServerSideEncryptionConfiguration.Rules[0].ApplyServerSideEncryptionByDefault.SSEAlgorithm' \
--output text | grep -q "AES256\|aws:kms"
# 4.1 Ensure no security groups allow ingress from 0.0.0.0/0 to port 22
aws ec2 describe-security-groups \
--filters "Name=ip-permission.from-port,Values=22" \
"Name=ip-permission.to-port,Values=22" \
"Name=ip-permission.cidr,Values=0.0.0.0/0" \
--query 'SecurityGroups[*].GroupId' --output text | wc -w | grep -q "0"
# 4.2 Ensure no security groups allow ingress from 0.0.0.0/0 to port 3389
aws ec2 describe-security-groups \
--filters "Name=ip-permission.from-port,Values=3389" \
"Name=ip-permission.to-port,Values=3389" \
"Name=ip-permission.cidr,Values=0.0.0.0/0" \
--query 'SecurityGroups[*].GroupId' --output text | wc -w | grep -q "0"
EOT
}
}
# tests/cost/cost_estimation_test.tf
# AI Prompt: "Create cost estimation tests"
data "infracost_estimate" "main" {
terraform_dir = path.module
}
resource "null_resource" "cost_validation" {
provisioner "local-exec" {
command = <<-EOT
# Run Infracost
infracost breakdown --path . --format json --out-file /tmp/infracost.json
# Check monthly cost is under budget
MONTHLY_COST=$(jq '.totalMonthlyCost' /tmp/infracost.json)
BUDGET_LIMIT=5000
if (( $(echo "$MONTHLY_COST > $BUDGET_LIMIT" | bc -l) )); then
echo "ERROR: Monthly cost ($MONTHLY_COST) exceeds budget ($BUDGET_LIMIT)"
exit 1
fi
# Check for expensive resources
jq '.projects[].breakdown.resources[] | select(.monthlyCost > 500) | {name: .name, cost: .monthlyCost}' /tmp/infracost.json
EOT
}
}
policies/terraform.rego
# AI Prompt: "Create OPA policies for infrastructure compliance"
package terraform.analysis
import future.keywords.contains
import future.keywords.if
import future.keywords.in
# Deny public S3 buckets
deny[msg] {
resource := input.resource_changes[_]
resource.type == "aws_s3_bucket"
resource.change.after.acl == "public-read"
msg := sprintf("S3 bucket %v has public read access", [resource.address])
}
deny[msg] {
resource := input.resource_changes[_]
resource.type == "aws_s3_bucket"
resource.change.after.acl == "public-read-write"
msg := sprintf("S3 bucket %v has public read-write access", [resource.address])
}
# Require encryption for RDS instances
deny[msg] {
resource := input.resource_changes[_]
resource.type == "aws_db_instance"
not resource.change.after.storage_encrypted
msg := sprintf("RDS instance %v is not encrypted", [resource.address])
}
# Enforce tagging standards
deny[msg] {
resource := input.resource_changes[_]
required_tags := {"Environment", "Owner", "Project", "CostCenter"}
resource_tags := object.get(resource.change.after, "tags", {})
missing_tags := required_tags - {tag | resource_tags[tag]}
count(missing_tags) > 0
msg := sprintf("Resource %v is missing required tags: %v", [resource.address, missing_tags])
}
# Ensure EC2 instances use approved AMIs
approved_amis := {
"ami-0123456789abcdef0", # Ubuntu 22.04 LTS
"ami-0987654321fedcba0", # Amazon Linux 2023
}
deny[msg] {
resource := input.resource_changes[_]
resource.type == "aws_instance"
not resource.change.after.ami in approved_amis
msg := sprintf("EC2 instance %v uses unapproved AMI: %v", [resource.address, resource.change.after.ami])
}
# Limit instance types for cost control
expensive_instance_types := {
"m5.24xlarge",
"c5.24xlarge",
"r5.24xlarge",
"x1e.32xlarge",
}
warn[msg] {
resource := input.resource_changes[_]
resource.type == "aws_instance"
resource.change.after.instance_type in expensive_instance_types
msg := sprintf("EC2 instance %v uses expensive instance type: %v", [resource.address, resource.change.after.instance_type])
}
# Security group rules
deny[msg] {
resource := input.resource_changes[_]
resource.type == "aws_security_group_rule"
resource.change.after.type == "ingress"
resource.change.after.cidr_blocks[_] == "0.0.0.0/0"
resource.change.after.from_port <= 22
resource.change.after.to_port >= 22
msg := sprintf("Security group rule %v allows SSH from internet", [resource.address])
}
# policies/sentinel/cost-control.sentinel
import "tfplan/v2" as tfplan
import "decimal"
# Monthly budget limit
param monthly_budget default 10000
# Get all resource cost estimates
getAllResourceCosts = func() {
costs = {}
for tfplan.resource_changes as _, rc {
if rc.change.actions contains "create" or
rc.change.actions contains "update" {
# Estimate costs based on resource type
costs[rc.address] = estimateResourceCost(rc)
}
}
return costs
}
# Estimate resource cost
estimateResourceCost = func(resource) {
costs = {
"aws_instance": {
"t3.micro": 8.50,
"t3.small": 17.00,
"t3.medium": 34.00,
"t3.large": 68.00,
"m5.large": 96.00,
"m5.xlarge": 192.00,
},
"aws_db_instance": {
"db.t3.micro": 18.00,
"db.t3.small": 36.00,
"db.t3.medium": 72.00,
"db.r5.large": 230.00,
},
"aws_eks_node_group": {
"base": 72.00, # EKS control plane
"per_node": 96.00, # m5.large nodes
},
}
if resource.type in keys(costs) {
if resource.type == "aws_instance" {
instance_type = resource.change.after.instance_type
if instance_type in keys(costs.aws_instance) {
return costs.aws_instance[instance_type]
}
} else if resource.type == "aws_db_instance" {
instance_class = resource.change.after.instance_class
if instance_class in keys(costs.aws_db_instance) {
return costs.aws_db_instance[instance_class]
}
} else if resource.type == "aws_eks_node_group" {
desired_size = resource.change.after.scaling_config[0].desired_size
return costs.aws_eks_node_group.base +
(costs.aws_eks_node_group.per_node * desired_size)
}
}
return 0
}
# Main rule
main = rule {
all_costs = getAllResourceCosts()
total_cost = decimal.new(0)
for all_costs as resource, cost {
total_cost = total_cost.add(cost)
}
print("Estimated monthly cost: $" + string(total_cost))
total_cost.less_than_or_equal_to(monthly_budget)
}
# test/kitchen.yml - Test Kitchen configuration
---
driver:
name: terraform
command_timeout: 1800
variables:
environment: "test"
provisioner:
name: terraform
platforms:
- name: aws
suites:
- name: vpc
driver:
root_module_directory: test/fixtures/vpc
verifier:
name: terraform
systems:
- name: vpc
backend: aws
controls:
- vpc_validation
- subnet_validation
- security_validation
- name: eks
driver:
root_module_directory: test/fixtures/eks
verifier:
name: terraform
systems:
- name: eks
backend: aws
controls:
- cluster_validation
- node_group_validation
- iam_validation
# test/inspec/vpc_profile/controls/vpc_validation.rb
control 'vpc_validation' do
impact 1.0
title 'VPC Configuration Validation'
desc 'Ensure VPC is properly configured'
describe aws_vpc(vpc_id: attribute('vpc_id')) do
it { should exist }
its('cidr_block') { should eq '10.0.0.0/16' }
its('state') { should eq 'available' }
its('tags') { should include('Environment' => 'test') }
end
describe aws_subnets.where(vpc_id: attribute('vpc_id')) do
its('count') { should eq 6 } # 3 public + 3 private
end
describe aws_internet_gateway(igw_id: attribute('igw_id')) do
it { should exist }
it { should be_attached }
end
end
Terminal window
# Generate infrastructure tests
claude "Create infrastructure testing with:
1. Contract testing
2. Chaos engineering
3. Load testing infrastructure
4. Disaster recovery testing
5. Security penetration testing"
# AI Prompt
Agent: "Create cost optimization strategies with:
- Resource right-sizing
- Spot instance usage
- Reserved instance planning
- Auto-scaling policies
- Cost allocation tags"
# modules/cost-optimized-compute/main.tf
variable "workload_type" {
description = "Type of workload: web, batch, ml"
type = string
default = "web"
}
locals {
# Instance recommendations based on workload
instance_recommendations = {
web = {
small = "t3.small"
medium = "t3.medium"
large = "t3.large"
}
batch = {
small = "m5.large"
medium = "m5.xlarge"
large = "m5.2xlarge"
}
ml = {
small = "g4dn.xlarge"
medium = "g4dn.2xlarge"
large = "g4dn.4xlarge"
}
}
# Spot instance configuration
spot_config = {
web = { enabled = false, max_price = "" }
batch = { enabled = true, max_price = "0.50" }
ml = { enabled = true, max_price = "1.00" }
}
}
# Auto Scaling Group with mixed instances
resource "aws_autoscaling_group" "optimized" {
name = "${var.name}-asg"
vpc_zone_identifier = var.subnet_ids
min_size = var.min_size
max_size = var.max_size
desired_capacity = var.desired_capacity
# Mixed instances policy for cost optimization
mixed_instances_policy {
launch_template {
launch_template_specification {
launch_template_id = aws_launch_template.optimized.id
version = "$Latest"
}
override {
instance_type = local.instance_recommendations[var.workload_type]["small"]
weighted_capacity = 1
}
override {
instance_type = local.instance_recommendations[var.workload_type]["medium"]
weighted_capacity = 2
}
override {
instance_type = local.instance_recommendations[var.workload_type]["large"]
weighted_capacity = 4
}
}
instances_distribution {
on_demand_base_capacity = var.on_demand_base
on_demand_percentage_above_base_capacity = var.on_demand_percentage
spot_allocation_strategy = "capacity-optimized-prioritized"
spot_instance_pools = 3
spot_max_price = local.spot_config[var.workload_type]["max_price"]
}
}
# Predictive scaling for cost optimization
dynamic "predictive_scaling_configuration" {
for_each = var.enable_predictive_scaling ? [1] : []
content {
metric_specification {
target_value = 50
predefined_metric_pair_specification {
predefined_metric_type = "ASGCPUUtilization"
}
}
mode = "ForecastAndScale"
scheduling_buffer_time = 10
max_capacity_breach_behavior = "IncreaseMaxCapacity"
max_capacity_buffer = 10
}
}
enabled_metrics = [
"GroupMinSize",
"GroupMaxSize",
"GroupDesiredCapacity",
"GroupInServiceInstances",
"GroupTotalInstances"
]
tag {
key = "Name"
value = "${var.name}-instance"
propagate_at_launch = true
}
tag {
key = "Workload"
value = var.workload_type
propagate_at_launch = true
}
tag {
key = "CostCenter"
value = var.cost_center
propagate_at_launch = true
}
}
# Scheduled scaling for predictable workloads
resource "aws_autoscaling_schedule" "scale_down_nights" {
scheduled_action_name = "${var.name}-scale-down-nights"
min_size = 1
max_size = var.max_size
desired_capacity = 1
recurrence = "0 20 * * MON-FRI" # 8 PM weekdays
autoscaling_group_name = aws_autoscaling_group.optimized.name
}
resource "aws_autoscaling_schedule" "scale_up_mornings" {
scheduled_action_name = "${var.name}-scale-up-mornings"
min_size = var.min_size
max_size = var.max_size
desired_capacity = var.desired_capacity
recurrence = "0 7 * * MON-FRI" # 7 AM weekdays
autoscaling_group_name = aws_autoscaling_group.optimized.name
}
# modules/cost-optimized-rds/main.tf
resource "aws_db_instance" "optimized" {
identifier = var.identifier
# Use Aurora Serverless for variable workloads
engine = var.use_serverless ? "aurora-mysql" : var.engine
engine_mode = var.use_serverless ? "serverless" : "provisioned"
engine_version = var.engine_version
# Right-sized instance
instance_class = var.use_serverless ? null : var.instance_class
# Storage optimization
allocated_storage = var.allocated_storage
storage_type = "gp3"
storage_encrypted = true
# Use Multi-AZ only for production
multi_az = var.environment == "production"
# Automated backups with lifecycle
backup_retention_period = var.environment == "production" ? 7 : 1
backup_window = "03:00-04:00"
# Performance Insights for optimization
performance_insights_enabled = var.environment == "production"
performance_insights_retention_period = 7
# Scaling configuration for serverless
dynamic "scaling_configuration" {
for_each = var.use_serverless ? [1] : []
content {
auto_pause = true
min_capacity = 1
max_capacity = 4
seconds_until_auto_pause = 300
}
}
tags = merge(var.tags, {
CostOptimization = "enabled"
Environment = var.environment
})
}
# Reserved Instance recommendations
data "aws_rds_reserved_instance_offering" "recommendation" {
db_instance_class = var.instance_class
duration = 31536000 # 1 year
multi_az = var.environment == "production"
offering_type = "All Upfront" # Best discount
product_description = "mysql"
}
# modules/s3-lifecycle-optimization/main.tf
resource "aws_s3_bucket_lifecycle_configuration" "optimized" {
bucket = aws_s3_bucket.main.id
# Transition to cheaper storage classes
rule {
id = "archive-old-data"
status = "Enabled"
transition {
days = 30
storage_class = "STANDARD_IA"
}
transition {
days = 90
storage_class = "GLACIER"
}
transition {
days = 180
storage_class = "DEEP_ARCHIVE"
}
# Delete very old data
expiration {
days = 730 # 2 years
}
}
# Intelligent tiering for unpredictable access
rule {
id = "intelligent-tiering"
status = "Enabled"
filter {
prefix = "data/"
}
transition {
days = 0
storage_class = "INTELLIGENT_TIERING"
}
}
# Clean up incomplete uploads
rule {
id = "cleanup-incomplete-uploads"
status = "Enabled"
abort_incomplete_multipart_upload {
days_after_initiation = 7
}
}
# Delete old versions
rule {
id = "delete-old-versions"
status = "Enabled"
noncurrent_version_transition {
noncurrent_days = 30
storage_class = "STANDARD_IA"
}
noncurrent_version_expiration {
noncurrent_days = 90
}
}
}
# Cost allocation tags
resource "aws_s3_bucket_tagging" "cost_tags" {
bucket = aws_s3_bucket.main.id
tags = {
Environment = var.environment
Project = var.project
CostCenter = var.cost_center
DataType = var.data_type
Compliance = var.compliance_level
}
}
kubernetes/cost-monitoring.yaml
# AI Prompt: "Create comprehensive cost monitoring setup"
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-cost-rules
namespace: monitoring
data:
cost-rules.yaml: |
groups:
- name: cost_monitoring
interval: 5m
rules:
# CPU cost calculation
- record: node_cpu_hourly_cost
expr: |
sum by (node) (
(1 - rate(node_cpu_seconds_total{mode="idle"}[5m]))
* on (node) group_left() node_cpu_hourly_rate
)
# Memory cost calculation
- record: node_memory_hourly_cost
expr: |
sum by (node) (
(1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)
* on (node) group_left() node_memory_hourly_rate
)
# Storage cost calculation
- record: pvc_storage_hourly_cost
expr: |
sum by (persistentvolumeclaim, namespace) (
kubelet_volume_stats_capacity_bytes
* on (persistentvolumeclaim) group_left() pvc_storage_hourly_rate
) / 1024 / 1024 / 1024
# Total namespace cost
- record: namespace_hourly_cost
expr: |
sum by (namespace) (
label_join(
sum by (pod, namespace) (
rate(container_cpu_usage_seconds_total[5m]) * 3600 * 0.05
), "pod", ",", "pod"
)
) +
sum by (namespace) (
label_join(
sum by (pod, namespace) (
container_memory_working_set_bytes / 1024 / 1024 / 1024 * 0.01
), "pod", ",", "pod"
)
)
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-cost-dashboard
namespace: monitoring
data:
dashboard.json: |
{
"dashboard": {
"title": "Cloud Cost Monitoring",
"panels": [
{
"title": "Total Monthly Cost Trend",
"targets": [
{
"expr": "sum(namespace_hourly_cost) * 24 * 30"
}
]
},
{
"title": "Cost by Namespace",
"targets": [
{
"expr": "sum by (namespace) (namespace_hourly_cost) * 24 * 30"
}
]
},
{
"title": "Cost by Resource Type",
"targets": [
{
"expr": "sum(node_cpu_hourly_cost) * 24 * 30",
"legendFormat": "CPU"
},
{
"expr": "sum(node_memory_hourly_cost) * 24 * 30",
"legendFormat": "Memory"
},
{
"expr": "sum(pvc_storage_hourly_cost) * 24 * 30",
"legendFormat": "Storage"
}
]
},
{
"title": "Unused Resources Cost",
"targets": [
{
"expr": "sum((kube_pod_container_resource_requests_cpu_cores - rate(container_cpu_usage_seconds_total[5m])) * 0.05 * 24 * 30)"
}
]
}
]
}
}
# terraform/cost-alerts.tf
resource "aws_budgets_budget" "monthly" {
name = "${var.project}-monthly-budget"
budget_limit_amount = var.monthly_budget_limit
budget_limit_unit = "USD"
budget_type = "COST"
time_unit = "MONTHLY"
cost_filters = {
TagKeyValue = "Project$${var.project}"
}
notification {
comparison_operator = "GREATER_THAN"
threshold = 80
threshold_type = "PERCENTAGE"
notification_type = "ACTUAL"
subscriber_email_addresses = var.budget_alert_emails
}
notification {
comparison_operator = "GREATER_THAN"
threshold = 100
threshold_type = "PERCENTAGE"
notification_type = "FORECASTED"
subscriber_email_addresses = var.budget_alert_emails
subscriber_sns_topic_arns = [aws_sns_topic.cost_alerts.arn]
}
}
resource "aws_ce_anomaly_monitor" "main" {
name = "${var.project}-anomaly-monitor"
monitor_type = "DIMENSIONAL"
monitor_dimension = "SERVICE"
}
resource "aws_ce_anomaly_subscription" "main" {
name = "${var.project}-anomaly-subscription"
threshold = 100
frequency = "DAILY"
monitor_arn_list = [
aws_ce_anomaly_monitor.main.arn
]
subscriber {
type = "EMAIL"
address = var.cost_anomaly_email
}
subscriber {
type = "SNS"
address = aws_sns_topic.cost_alerts.arn
}
}
# AI Prompt
Agent: "Create security scanning pipeline with:
- SAST/DAST scanning
- Container vulnerability scanning
- Infrastructure compliance checks
- Secret detection
- Security reporting"
# .github/workflows/security-scan.yml
name: Security Scanning
on:
push:
branches: [main, develop]
pull_request:
branches: [main]
schedule:
- cron: '0 2 * * *' # Daily at 2 AM
jobs:
infrastructure-scan:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
# Terraform security scanning
- name: Terraform Security Scan
uses: aquasecurity/tfsec-action@v1.0.0
with:
soft_fail: false
- name: Checkov Policy Scan
uses: bridgecrewio/checkov-action@master
with:
directory: .
framework: terraform
output_format: sarif
output_file_path: checkov.sarif
- name: Terrascan
run: |
docker run --rm -v "$(pwd):/src" \
accurics/terrascan scan -t aws -f terraform \
--config-path /src/.terrascan.yaml
# Cloud security posture
- name: Cloud Security Scan
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: |
# Prowler security scan
docker run --rm \
-e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \
toniblyx/prowler:latest \
-g cis_level2 -f json -o /tmp/prowler-report.json
# ScoutSuite scan
docker run --rm \
-e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \
rossja/ncc-scoutsuite \
aws --no-browser --report-dir /tmp/scoutsuite
# Secret scanning
- name: Secret Detection
uses: trufflesecurity/trufflehog@v3
with:
path: ./
base: ${{ github.event.repository.default_branch }}
head: HEAD
- name: GitLeaks
uses: gitleaks/gitleaks-action@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Container scanning
- name: Container Scan
uses: aquasecurity/trivy-action@master
with:
scan-type: 'fs'
scan-ref: '.'
format: 'sarif'
output: 'trivy-results.sarif'
severity: 'CRITICAL,HIGH'
# Upload results
- name: Upload SARIF results
uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: |
checkov.sarif
trivy-results.sarif
compliance-validation:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup InSpec
run: |
curl https://omnitruck.chef.io/install.sh | sudo bash -s -- -P inspec
- name: Run Compliance Tests
run: |
# CIS Benchmarks
inspec exec https://github.com/dev-sec/cis-dil-benchmark \
--reporter json:/tmp/cis-results.json
# AWS Foundations Benchmark
inspec exec https://github.com/inspec/inspec-aws \
-t aws:// \
--controls cis-aws-foundations-1.2.0 \
--reporter json:/tmp/aws-foundations.json
# Custom compliance profile
inspec exec compliance/profiles/company-baseline \
-t aws:// \
--reporter json:/tmp/company-baseline.json html:/tmp/compliance-report.html
- name: Generate Compliance Report
run: |
python scripts/generate_compliance_report.py \
--cis-results /tmp/cis-results.json \
--aws-results /tmp/aws-foundations.json \
--company-results /tmp/company-baseline.json \
--output compliance-report.pdf
- name: Upload Compliance Report
uses: actions/upload-artifact@v3
with:
name: compliance-report
path: |
compliance-report.pdf
/tmp/compliance-report.html
# terraform/security-baseline/main.tf
# Security baseline module
module "security_baseline" {
source = "./modules/security-baseline"
# Enable AWS Security Hub
enable_security_hub = true
security_hub_standards = [
"aws-foundational-security-best-practices/v/1.0.0",
"cis-aws-foundations-benchmark/v/1.2.0",
"pci-dss/v/3.2.1"
]
# Enable GuardDuty
enable_guardduty = true
guardduty_finding_publishing_frequency = "FIFTEEN_MINUTES"
# Enable AWS Config
enable_config = true
config_recording_group = {
all_supported = true
include_global_resource_types = true
}
# Config Rules
config_rules = {
required-tags = {
description = "Ensure required tags are present"
source_identifier = "REQUIRED_TAGS"
input_parameters = jsonencode({
requiredTags = "Environment,Project,Owner,CostCenter"
})
}
encrypted-volumes = {
description = "Ensure EBS volumes are encrypted"
source_identifier = "ENCRYPTED_VOLUMES"
}
rds-encryption-enabled = {
description = "Ensure RDS instances are encrypted"
source_identifier = "RDS_STORAGE_ENCRYPTED"
}
s3-bucket-public-read-prohibited = {
description = "Ensure S3 buckets are not publicly readable"
source_identifier = "S3_BUCKET_PUBLIC_READ_PROHIBITED"
}
}
# Enable CloudTrail
enable_cloudtrail = true
cloudtrail_s3_bucket_name = aws_s3_bucket.cloudtrail.id
enable_log_file_validation = true
event_selector = [{
read_write_type = "All"
include_management_events = true
data_resource = [{
type = "AWS::S3::Object"
values = ["arn:aws:s3:::*/*"]
}]
}]
# Enable Access Analyzer
enable_access_analyzer = true
access_analyzer_name = "${var.project}-analyzer"
tags = var.tags
}
# WAF rules for web applications
resource "aws_wafv2_web_acl" "main" {
name = "${var.project}-waf"
scope = "REGIONAL"
default_action {
allow {}
}
# OWASP Top 10 protection
rule {
name = "RateLimitRule"
priority = 1
action {
block {}
}
statement {
rate_based_statement {
limit = 2000
aggregate_key_type = "IP"
}
}
visibility_config {
cloudwatch_metrics_enabled = true
metric_name = "RateLimitRule"
sampled_requests_enabled = true
}
}
rule {
name = "AWSManagedRulesCommonRuleSet"
priority = 2
override_action {
none {}
}
statement {
managed_rule_group_statement {
name = "AWSManagedRulesCommonRuleSet"
vendor_name = "AWS"
}
}
visibility_config {
cloudwatch_metrics_enabled = true
metric_name = "CommonRuleSetMetric"
sampled_requests_enabled = true
}
}
rule {
name = "AWSManagedRulesKnownBadInputsRuleSet"
priority = 3
override_action {
none {}
}
statement {
managed_rule_group_statement {
name = "AWSManagedRulesKnownBadInputsRuleSet"
vendor_name = "AWS"
}
}
visibility_config {
cloudwatch_metrics_enabled = true
metric_name = "KnownBadInputsMetric"
sampled_requests_enabled = true
}
}
rule {
name = "AWSManagedRulesSQLiRuleSet"
priority = 4
override_action {
none {}
}
statement {
managed_rule_group_statement {
name = "AWSManagedRulesSQLiRuleSet"
vendor_name = "AWS"
}
}
visibility_config {
cloudwatch_metrics_enabled = true
metric_name = "SQLiRuleSetMetric"
sampled_requests_enabled = true
}
}
visibility_config {
cloudwatch_metrics_enabled = true
metric_name = "${var.project}-waf"
sampled_requests_enabled = true
}
tags = var.tags
}
# AI Prompt
Agent: "Create multi-cloud infrastructure with:
- Cloud-agnostic modules
- Provider abstraction
- Unified networking
- Cross-cloud connectivity
- Portable workloads"
# modules/cloud-agnostic-compute/variables.tf
variable "cloud_provider" {
description = "Cloud provider: aws, azure, gcp"
type = string
validation {
condition = contains(["aws", "azure", "gcp"], var.cloud_provider)
error_message = "Cloud provider must be aws, azure, or gcp."
}
}
variable "instance_size" {
description = "Generic instance size: small, medium, large"
type = string
default = "medium"
}
# modules/cloud-agnostic-compute/main.tf
locals {
# Map generic sizes to provider-specific instance types
instance_mapping = {
aws = {
small = "t3.small"
medium = "t3.medium"
large = "t3.large"
}
azure = {
small = "Standard_B2s"
medium = "Standard_B2ms"
large = "Standard_B4ms"
}
gcp = {
small = "e2-small"
medium = "e2-medium"
large = "e2-standard-4"
}
}
# Map generic OS to provider-specific images
image_mapping = {
aws = {
ubuntu = "ami-0c55b159cbfafe1f0" # Ubuntu 22.04
centos = "ami-0f2b4fc905b0bd1f1" # CentOS 8
}
azure = {
ubuntu = {
publisher = "Canonical"
offer = "0001-com-ubuntu-server-jammy"
sku = "22_04-lts"
}
centos = {
publisher = "OpenLogic"
offer = "CentOS"
sku = "8_5"
}
}
gcp = {
ubuntu = "ubuntu-os-cloud/ubuntu-2204-lts"
centos = "centos-cloud/centos-8"
}
}
}
# AWS Implementation
module "aws_compute" {
count = var.cloud_provider == "aws" ? 1 : 0
source = "./aws"
instance_type = local.instance_mapping["aws"][var.instance_size]
ami = local.image_mapping["aws"][var.os_type]
subnet_id = var.subnet_id
user_data = templatefile("${path.module}/templates/cloud-init.yaml", {
hostname = var.hostname
packages = var.packages
})
tags = var.tags
}
# Azure Implementation
module "azure_compute" {
count = var.cloud_provider == "azure" ? 1 : 0
source = "./azure"
vm_size = local.instance_mapping["azure"][var.instance_size]
source_image_reference {
publisher = local.image_mapping["azure"][var.os_type]["publisher"]
offer = local.image_mapping["azure"][var.os_type]["offer"]
sku = local.image_mapping["azure"][var.os_type]["sku"]
version = "latest"
}
custom_data = base64encode(templatefile("${path.module}/templates/cloud-init.yaml", {
hostname = var.hostname
packages = var.packages
}))
tags = var.tags
}
# GCP Implementation
module "gcp_compute" {
count = var.cloud_provider == "gcp" ? 1 : 0
source = "./gcp"
machine_type = local.instance_mapping["gcp"][var.instance_size]
boot_disk {
initialize_params {
image = local.image_mapping["gcp"][var.os_type]
}
}
metadata_startup_script = templatefile("${path.module}/templates/cloud-init.yaml", {
hostname = var.hostname
packages = var.packages
})
labels = var.tags
}
# Outputs
output "instance_id" {
value = coalesce(
try(module.aws_compute[0].instance_id, ""),
try(module.azure_compute[0].vm_id, ""),
try(module.gcp_compute[0].instance_id, "")
)
}
output "private_ip" {
value = coalesce(
try(module.aws_compute[0].private_ip, ""),
try(module.azure_compute[0].private_ip, ""),
try(module.gcp_compute[0].private_ip, "")
)
}
# modules/multi-cloud-kubernetes/main.tf
locals {
k8s_providers = {
aws = "eks"
azure = "aks"
gcp = "gke"
}
}
# EKS Cluster (AWS)
module "eks" {
count = var.cloud_provider == "aws" ? 1 : 0
source = "./eks"
cluster_name = var.cluster_name
cluster_version = var.kubernetes_version
vpc_id = var.vpc_id
subnet_ids = var.subnet_ids
node_groups = {
main = {
desired_capacity = var.node_count
max_capacity = var.max_nodes
min_capacity = var.min_nodes
instance_types = [local.instance_mapping["aws"][var.node_size]]
k8s_labels = var.node_labels
k8s_taints = var.node_taints
}
}
manage_aws_auth = true
tags = var.tags
}
# AKS Cluster (Azure)
module "aks" {
count = var.cloud_provider == "azure" ? 1 : 0
source = "./aks"
cluster_name = var.cluster_name
kubernetes_version = var.kubernetes_version
resource_group_name = var.resource_group_name
location = var.location
default_node_pool = {
name = "default"
node_count = var.node_count
vm_size = local.instance_mapping["azure"][var.node_size]
enable_auto_scaling = true
min_count = var.min_nodes
max_count = var.max_nodes
}
network_profile = {
network_plugin = "azure"
network_policy = "calico"
load_balancer_sku = "standard"
outbound_type = "loadBalancer"
}
tags = var.tags
}
# GKE Cluster (GCP)
module "gke" {
count = var.cloud_provider == "gcp" ? 1 : 0
source = "./gke"
name = var.cluster_name
kubernetes_version = var.kubernetes_version
location = var.region
node_pools = [
{
name = "default"
machine_type = local.instance_mapping["gcp"][var.node_size]
min_count = var.min_nodes
max_count = var.max_nodes
initial_node_count = var.node_count
node_config = {
preemptible = var.use_spot_instances
disk_size_gb = var.node_disk_size
labels = var.node_labels
taints = var.node_taints
}
}
]
network = var.network_name
subnetwork = var.subnet_name
cluster_resource_labels = var.tags
}
# Universal Kubernetes provider configuration
data "template_file" "kubeconfig" {
template = file("${path.module}/templates/kubeconfig.yaml")
vars = {
cluster_name = var.cluster_name
server = local.cluster_endpoint
certificate_data = local.cluster_ca_certificate
token = local.cluster_token
}
}
locals {
cluster_endpoint = coalesce(
try(module.eks[0].cluster_endpoint, ""),
try(module.aks[0].kube_config[0].host, ""),
try(module.gke[0].endpoint, "")
)
cluster_ca_certificate = coalesce(
try(module.eks[0].cluster_certificate_authority_data, ""),
try(module.aks[0].kube_config[0].cluster_ca_certificate, ""),
try(module.gke[0].ca_certificate, "")
)
cluster_token = coalesce(
try(data.aws_eks_cluster_auth.cluster[0].token, ""),
try(module.aks[0].kube_config[0].password, ""),
try(data.google_client_config.default[0].access_token, "")
)
}
# Deploy cloud-agnostic applications
resource "kubernetes_namespace" "apps" {
metadata {
name = "applications"
labels = {
cloud = var.cloud_provider
managed = "terraform"
}
}
}
resource "helm_release" "ingress_controller" {
name = "ingress-nginx"
repository = "https://kubernetes.github.io/ingress-nginx"
chart = "ingress-nginx"
namespace = "ingress-nginx"
create_namespace = true
# Cloud-specific annotations for load balancer
set {
name = "controller.service.annotations.service\\.beta\\.kubernetes\\.io/aws-load-balancer-type"
value = var.cloud_provider == "aws" ? "nlb" : ""
}
set {
name = "controller.service.annotations.service\\.beta\\.kubernetes\\.io/azure-load-balancer-health-probe-request-path"
value = var.cloud_provider == "azure" ? "/healthz" : ""
}
set {
name = "controller.service.annotations.cloud\\.google\\.com/load-balancer-type"
value = var.cloud_provider == "gcp" ? "External" : ""
}
}

Infrastructure as Code Guidelines

  • Store all infrastructure code in Git
  • Use meaningful commit messages
  • Tag releases for production deployments
  • Implement branch protection rules
  • Review all changes through pull requests
  • Create reusable modules
  • Keep modules focused and single-purpose
  • Version your modules
  • Document module interfaces
  • Test modules independently
  • Use remote state backends
  • Enable state locking
  • Implement state file encryption
  • Regular state backups
  • Separate states by environment
  • Never commit secrets
  • Use secret management tools
  • Implement least privilege
  • Enable audit logging
  • Regular security scanning
  • Unit test modules
  • Integration test deployments
  • Compliance validation
  • Cost estimation
  • Performance testing
  • Document architecture decisions
  • Maintain runbooks
  • Create deployment guides
  • Document troubleshooting steps
  • Keep examples updated
# AI Prompt
Agent: "Create comprehensive IaC development workflow with:
- Pre-commit hooks
- Local testing
- CI/CD pipeline
- Deployment strategies
- Rollback procedures"
# .pre-commit-config.yaml
repos:
# Terraform hooks
- repo: https://github.com/antonbabenko/pre-commit-terraform
rev: v1.83.5
hooks:
- id: terraform_fmt
- id: terraform_docs
args:
- --hook-config=--path-to-file=README.md
- --hook-config=--add-to-existing-file=true
- id: terraform_validate
- id: terraform_tflint
args:
- --args=--config=__GIT_WORKING_DIR__/.tflint.hcl
- id: terraform_tfsec
args:
- --args=--exclude-downloaded-modules
- id: checkov
args:
- --args=--quiet
- --args=--framework=terraform
- id: infracost_breakdown
args:
- --args=--path=.
- --hook-config='.totalHourlyCost|tonumber > 1'
verbose: true
# General hooks
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-json
- id: check-merge-conflict
- id: detect-private-key
- id: detect-aws-credentials
args: ['--allow-missing-credentials']
# Secret scanning
- repo: https://github.com/Yelp/detect-secrets
rev: v1.4.0
hooks:
- id: detect-secrets
args: ['--baseline', '.secrets.baseline']
# Ansible hooks
- repo: https://github.com/ansible/ansible-lint
rev: v6.22.0
hooks:
- id: ansible-lint
files: \.(yaml|yml)$
exclude: \.github/
# Kubernetes hooks
- repo: https://github.com/syntaqx/kube-score
rev: v1.17.0
hooks:
- id: kube-score
files: \.(yaml|yml)$
exclude: (helm|charts)/
# Makefile - Local development workflow
.PHONY: help init validate plan apply destroy test docs clean
help: ## Show this help
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
init: ## Initialize Terraform
@echo "🚀 Initializing Terraform..."
terraform init -upgrade
@echo "📦 Installing pre-commit hooks..."
pre-commit install
@echo "✅ Initialization complete!"
validate: ## Validate Terraform configuration
@echo "🔍 Validating Terraform files..."
terraform fmt -check -recursive
terraform validate
tflint --init && tflint
@echo "🔒 Running security checks..."
tfsec . --minimum-severity HIGH
checkov -d . --quiet --framework terraform
@echo "✅ Validation complete!"
plan: validate ## Create Terraform plan
@echo "📋 Creating Terraform plan..."
terraform plan -out=tfplan
@echo "💰 Checking costs..."
infracost breakdown --path . --terraform-plan-flags "-out=tfplan"
@echo "✅ Plan complete!"
apply: ## Apply Terraform changes
@echo "🚀 Applying Terraform changes..."
terraform apply tfplan
@echo "✅ Apply complete!"
destroy: ## Destroy infrastructure
@echo "💥 Destroying infrastructure..."
@read -p "Are you sure? [y/N] " confirm && \
if [ "$$confirm" = "y" ]; then \
terraform destroy -auto-approve; \
fi
test: ## Run tests
@echo "🧪 Running tests..."
cd test && go test -v -timeout 30m ./...
@echo "✅ Tests complete!"
docs: ## Generate documentation
@echo "📚 Generating documentation..."
terraform-docs markdown table --output-file README.md --output-mode inject .
@echo "✅ Documentation complete!"
clean: ## Clean up files
@echo "🧹 Cleaning up..."
rm -rf .terraform tfplan* *.tfstate*
find . -type f -name "*.tfvars" -delete
@echo "✅ Cleanup complete!"
# scripts/deploy.sh - Deployment script
#!/bin/bash
set -euo pipefail
# Configuration
ENVIRONMENT="${1:-staging}"
REGION="${AWS_DEFAULT_REGION:-us-east-1}"
TERRAFORM_VERSION="1.6.0"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# Functions
log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
exit 1
}
# Check prerequisites
check_prerequisites() {
log_info "Checking prerequisites..."
# Check Terraform version
if ! terraform version | grep -q "$TERRAFORM_VERSION"; then
log_error "Terraform $TERRAFORM_VERSION is required"
fi
# Check AWS credentials
if ! aws sts get-caller-identity &>/dev/null; then
log_error "AWS credentials not configured"
fi
# Check environment files
if [[ ! -f "environments/$ENVIRONMENT/terraform.tfvars" ]]; then
log_error "Environment file not found: environments/$ENVIRONMENT/terraform.tfvars"
fi
}
# Initialize Terraform
init_terraform() {
log_info "Initializing Terraform for $ENVIRONMENT..."
terraform init \
-backend-config="environments/$ENVIRONMENT/backend.conf" \
-reconfigure
}
# Create plan
create_plan() {
log_info "Creating Terraform plan..."
terraform plan \
-var-file="environments/$ENVIRONMENT/terraform.tfvars" \
-out="$ENVIRONMENT.tfplan"
# Cost estimation
log_info "Estimating costs..."
infracost breakdown \
--path . \
--terraform-plan-flags "-var-file=environments/$ENVIRONMENT/terraform.tfvars" \
--show-skipped
}
# Apply changes
apply_changes() {
log_info "Applying Terraform changes..."
# Show plan summary
terraform show -no-color "$ENVIRONMENT.tfplan" | grep -E "^ # |^ ~|^ -|^ \+"
# Confirm deployment
read -p "Do you want to apply these changes? [y/N] " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
log_warn "Deployment cancelled"
exit 0
fi
# Apply with timeout
timeout 3600 terraform apply \
-auto-approve \
"$ENVIRONMENT.tfplan"
# Save outputs
terraform output -json > "outputs-$ENVIRONMENT.json"
}
# Post-deployment validation
validate_deployment() {
log_info "Validating deployment..."
# Run InSpec tests
if [[ -d "test/integration/$ENVIRONMENT" ]]; then
inspec exec "test/integration/$ENVIRONMENT" \
-t aws:// \
--reporter cli json:validation-results.json
fi
# Health checks
if [[ -f "scripts/health-check.sh" ]]; then
./scripts/health-check.sh "$ENVIRONMENT"
fi
}
# Main execution
main() {
log_info "Starting deployment for environment: $ENVIRONMENT"
check_prerequisites
init_terraform
create_plan
apply_changes
validate_deployment
log_info "Deployment complete! 🎉"
}
# Run main function
main
Terminal window
# AI Prompt: "Debug Terraform state issues"
# State lock error
Error: Error acquiring the state lock
ConflictException: Unable to acquire lock
# Solution 1: Force unlock (use with caution)
terraform force-unlock <LOCK_ID>
# Solution 2: Check DynamoDB lock table
aws dynamodb scan \
--table-name terraform-state-lock \
--filter-expression "attribute_exists(LockID)"
# Solution 3: Manual cleanup
aws dynamodb delete-item \
--table-name terraform-state-lock \
--key '{"LockID": {"S": "<LOCK_ID>"}}'
# State drift detection
terraform plan -refresh-only
# Import existing resources
terraform import aws_instance.example i-1234567890abcdef0
# Move resources between states
terraform state mv aws_instance.old aws_instance.new
# Remove resources from state
terraform state rm aws_instance.obsolete
# Backup and restore state
terraform state pull > backup.tfstate
terraform state push backup.tfstate
Terminal window
# Enable detailed logging
export TF_LOG=DEBUG
export TF_LOG_PATH=terraform.log
# Terraform console for testing
terraform console
> aws_instance.web.private_ip
> [for instance in aws_instance.web : instance.private_ip]
# Graph dependencies
terraform graph | dot -Tpng > graph.png
# Validate specific modules
terraform validate -json | jq '.diagnostics[] | select(.severity=="error")'
# Check resource attributes
terraform state show aws_instance.web
# List all resources
terraform state list
# Refresh specific resource
terraform apply -refresh-only -target=aws_instance.web
# Debug provider issues
terraform providers lock -platform=linux_amd64 -platform=darwin_amd64
# Check workspace
terraform workspace show
terraform workspace list
# Analyze plan output
terraform show -json tfplan | jq '.resource_changes[] | {address: .address, actions: .change.actions}'