naviat
2/28/2018 - 5:57 PM

ECS/ASG Startup Script

ECS/ASG Startup Script

import boto3
import sys
import time

# Script param validation.
def validate_params():
    param_error_found = False
    if len(sys.argv) < 3:
        param_error_found = True
    if sys.argv[1].lower() not in ['qa0', 'qa1', 'qa2', 'qa3', 'qa4', 'qa5', 'qa6', 'qa7', 'qa8', 'qa9', 'qa10']:
        param_error_found = True
    if sys.argv[2].lower() not in ['start', 'stop']:
        param_error_found = True
    if len(sys.argv) > 3 and sys.argv[3].lower() not in ['internal', 'external']:
        param_error_found = True
    if param_error_found:
        print 'Usage: python control_ecs_asg.py [qa-env-name] [start/stop] [-internal/-external]'
        print ''
        print 'Please specify the QA environment name of an ECS ASG to modify (ex: qa1/qa2/qa3...),'
        print 'along with a command (ex: start/stop) and optional flag for env type (ex: internal/external).'
        print 'If no env type is specified, default of internal is assumed.'
        sys.exit(1)

# Find matching AutoScalingGroup by name prefix supplied and environment type.
def find_matching_asg(env_name, env_type):
    # Filters down to matching on internal services only, for now.
    asg_prefix = None
    if env_type == 'internal':
        asg_prefix = '{0}-cluster-ecsasg'.format(env_name)
    else:
        asg_prefix = '{0}-cluster-external-ecsasg'.format(env_name)
    print 'Searching all ECS ASGs with prefix: {0}...'.format(asg_prefix)
    response = autoscaling_client.describe_auto_scaling_groups()
    all_asgs = response['AutoScalingGroups']
    matched_asg = None
    for asg in all_asgs:
        if asg['AutoScalingGroupName'].lower().startswith(asg_prefix):
            matched_asg = asg
    if not matched_asg:
        print 'ERROR: No matching ECS ASG found with supplied prefix.'
        sys.exit(1)
    print 'Found matching ECS ASG: {0}'.format(matched_asg['AutoScalingGroupName'])
    return matched_asg

def find_matching_cluster_arn(env_name, env_type):
    cluster_suffix = None
    if env_type == 'internal':
        cluster_suffix = 'cluster/{0}'.format(env_name)
    else:
        cluster_suffix = 'cluster/{0}-external'.format(env_name)
    print 'Searching all ECS Clusters with suffix: {0}...'.format(cluster_suffix)
    response = ecs_client.list_clusters()
    all_cluster_arns = response['clusterArns']
    matched_cluster_arn = None
    for arn in all_cluster_arns:
        if arn.lower().endswith(cluster_suffix):
            matched_cluster_arn = arn
    if not matched_cluster_arn:
        print 'ERROR: No matching ECS Cluster found with supplied suffix.'
        sys.exit(1)
    print 'Found matching ECS Cluster: {0}'.format(matched_cluster_arn)
    return matched_cluster_arn

def wait_for_task_startup(cluster_arn):
    print 'Waiting on all ECS tasks to start in cluster: {0}...'.format(cluster_arn)
    timeout = time.time() + 300 # Time out loop n seconds from now.
    describe_cluster = lambda: ecs_client.describe_clusters(clusters=[cluster_arn])['clusters'][0]
    # As each of our services runs 1 task, compare active task count to total services.
    desired_task_count = describe_cluster()['activeServicesCount']

    all_tasks_started = False
    while True:
        running_count = describe_cluster()['runningTasksCount']
        print 'Currently running ECS tasks: {0}'.format(running_count)
        if running_count == desired_task_count:
            all_tasks_started = True
            break
        if time.time() > timeout:
            break
        time.sleep(7)
    if all_tasks_started:
        print 'Successfully started all ECS tasks in cluster: {0}'.format(cluster_arn)
    else:
        print 'ERROR: Timed out while waiting for ECS tasks in cluster: {0}'.format(cluster_arn)
        sys.exit(1)

# Pause script to allow manual interrupt from user.
def pause_script_for_sigint(seconds_to_pause):
    print 'Sending command to ASG in {0} seconds. To cancel, exit script with CTRL+C...'.format(seconds_to_pause)
    time.sleep(seconds_to_pause)

# Send update to AutoScalingGroup's min/max/desired counts.
def update_asg(asg, capacity_to_set):
    print 'Sending command to update ASG now...'
    autoscaling_client.update_auto_scaling_group(
        AutoScalingGroupName=asg['AutoScalingGroupName'],
        MinSize=capacity_to_set,
        MaxSize=capacity_to_set,
        DesiredCapacity=capacity_to_set
    )
    print 'Successfully updated ASG.'

seconds_to_pause = 10 # Time to pause script before sending start/stop command to AutoScalingGroup.
max_capacity = 3 # Max value to set for min/max/desired instance count on ASG.

validate_params()

autoscaling_client = boto3.client('autoscaling')
ecs_client = boto3.client('ecs')

env_name = sys.argv[1].lower()
asg_command = sys.argv[2].lower()
env_type = sys.argv[3].lower() if len(sys.argv) > 3 else 'internal'

matched_asg = find_matching_asg(env_name, env_type)
matched_ecs_cluster_arn = find_matching_cluster_arn(env_name, env_type)

capacity_to_set = max_capacity if asg_command == 'start' else 0
pause_script_for_sigint(seconds_to_pause)
update_asg(matched_asg, capacity_to_set)

if asg_command == 'start':
    wait_for_task_startup(matched_ecs_cluster_arn)
sys.exit(0)