Better EMR coverage and boto3 request/response handling

This revision includes: - A handler for requests for which content-type is JSON (from boto3). - A decorator (generate_boto3_response) to convert XML responses to JSON (for boto3). This way, existing response templates for boto can be shared for generating boto3 response. - Utility class/functions to use botocore's service specification data (accessible under botocore.data) for type casting, from query parameters to Python objects and XML to JSON. - Updates to response handlers/models to cover more EMR end points and mockable parameters
2016-09-21 20:59:19 -07:00 · 2016-09-21 20:59:19 -07:00 · 7cd404808b
commit 7cd404808b
parent 4157abe8de
10 changed files with 2399 additions and 841 deletions
--- a/tests/test_core/test_responses.py
+++ b/tests/test_core/test_responses.py
@ -0,0 +1,73 @@
+from __future__ import unicode_literals
+
+import sure  # noqa
+
+from moto.core.responses import AWSServiceSpec
+from moto.core.responses import flatten_json_request_body
+
+
+def test_flatten_json_request_body():
+    spec = AWSServiceSpec('data/emr/2009-03-31/service-2.json').input_spec('RunJobFlow')
+
+    body = {
+        'Name': 'cluster',
+        'Instances': {
+            'Ec2KeyName': 'ec2key',
+            'InstanceGroups': [
+                {'InstanceRole': 'MASTER',
+                 'InstanceType': 'm1.small'},
+                {'InstanceRole': 'CORE',
+                 'InstanceType': 'm1.medium'},
+            ],
+            'Placement': {'AvailabilityZone': 'us-east-1'},
+        },
+        'Steps': [
+            {'HadoopJarStep': {
+                'Properties': [
+                    {'Key': 'k1', 'Value': 'v1'},
+                    {'Key': 'k2', 'Value': 'v2'}
+                ],
+                'Args': ['arg1', 'arg2']}},
+        ],
+        'Configurations': [
+            {'Classification': 'class',
+             'Properties': {'propkey1': 'propkey1',
+                            'propkey2': 'propkey2'}},
+            {'Classification': 'anotherclass',
+             'Properties': {'propkey3': 'propkey3'}},
+        ]
+    }
+
+    flat = flatten_json_request_body('', body, spec)
+    flat['Name'].should.equal(body['Name'])
+    flat['Instances.Ec2KeyName'].should.equal(body['Instances']['Ec2KeyName'])
+    for idx in range(2):
+        flat['Instances.InstanceGroups.member.' + str(idx + 1) + '.InstanceRole'].should.equal(body['Instances']['InstanceGroups'][idx]['InstanceRole'])
+        flat['Instances.InstanceGroups.member.' + str(idx + 1) + '.InstanceType'].should.equal(body['Instances']['InstanceGroups'][idx]['InstanceType'])
+    flat['Instances.Placement.AvailabilityZone'].should.equal(body['Instances']['Placement']['AvailabilityZone'])
+
+    for idx in range(1):
+        prefix = 'Steps.member.' + str(idx + 1) + '.HadoopJarStep'
+        step = body['Steps'][idx]['HadoopJarStep']
+        i = 0
+        while prefix + '.Properties.member.' + str(i + 1) + '.Key' in flat:
+            flat[prefix + '.Properties.member.' + str(i + 1) + '.Key'].should.equal(step['Properties'][i]['Key'])
+            flat[prefix + '.Properties.member.' + str(i + 1) + '.Value'].should.equal(step['Properties'][i]['Value'])
+            i += 1
+        i = 0
+        while prefix + '.Args.member.' + str(i + 1) in flat:
+            flat[prefix + '.Args.member.' + str(i + 1)].should.equal(step['Args'][i])
+            i += 1
+
+    for idx in range(2):
+        flat['Configurations.member.' + str(idx + 1) + '.Classification'].should.equal(body['Configurations'][idx]['Classification'])
+
+        props = {}
+        i = 1
+        keyfmt = 'Configurations.member.{0}.Properties.entry.{1}'
+        key = keyfmt.format(idx + 1, i)
+        while key + '.key' in flat:
+            props[flat[key + '.key']] = flat[key + '.value']
+            i += 1
+            key = keyfmt.format(idx + 1, i)
+        props.should.equal(body['Configurations'][idx]['Properties'])
--- a/tests/test_emr/test_emr.py
+++ b/tests/test_emr/test_emr.py
@ -1,197 +1,111 @@
 from __future__ import unicode_literals

 import boto
+from boto.emr.bootstrap_action import BootstrapAction
 from boto.emr.instance_group import InstanceGroup
-
 from boto.emr.step import StreamingStep
+
+import six
 import sure  # noqa

 from moto import mock_emr
 from tests.helpers import requires_boto_gte


-@mock_emr
-def test_create_job_flow_in_multiple_regions():
-    step = StreamingStep(
-        name='My wordcount example',
-        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
-        reducer='aggregate',
-        input='s3n://elasticmapreduce/samples/wordcount/input',
-        output='s3n://output_bucket/output/wordcount_output'
-    )
+run_jobflow_args = dict(
+    job_flow_role='EMR_EC2_DefaultRole',
+    keep_alive=True,
+    log_uri='s3://some_bucket/jobflow_logs',
+    master_instance_type='c1.medium',
+    name='My jobflow',
+    num_instances=2,
+    service_role='EMR_DefaultRole',
+    slave_instance_type='c1.medium',
+)

-    west1_conn = boto.emr.connect_to_region('us-east-1')
-    west1_job_id = west1_conn.run_jobflow(
-        name='us-east-1',
-        log_uri='s3://some_bucket/jobflow_logs',
-        master_instance_type='m1.medium',
-        slave_instance_type='m1.small',
-        steps=[step],
-    )

-    west2_conn = boto.emr.connect_to_region('eu-west-1')
-    west2_job_id = west2_conn.run_jobflow(
-        name='eu-west-1',
-        log_uri='s3://some_bucket/jobflow_logs',
-        master_instance_type='m1.medium',
-        slave_instance_type='m1.small',
-        steps=[step],
-    )
-
-    west1_job_flow = west1_conn.describe_jobflow(west1_job_id)
-    west1_job_flow.name.should.equal('us-east-1')
-    west2_job_flow = west2_conn.describe_jobflow(west2_job_id)
-    west2_job_flow.name.should.equal('eu-west-1')
+input_instance_groups = [
+    InstanceGroup(1, 'MASTER', 'c1.medium', 'ON_DEMAND', 'master'),
+    InstanceGroup(3, 'CORE', 'c1.medium', 'ON_DEMAND', 'core'),
+    InstanceGroup(6, 'TASK', 'c1.large', 'SPOT', 'task-1', '0.07'),
+    InstanceGroup(10, 'TASK', 'c1.xlarge', 'SPOT', 'task-2', '0.05'),
+]


@mock_emr
-def test_create_job_flow():
+def test_describe_cluster():
    conn = boto.connect_emr()
-
-    step1 = StreamingStep(
-        name='My wordcount example',
-        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
-        reducer='aggregate',
-        input='s3n://elasticmapreduce/samples/wordcount/input',
-        output='s3n://output_bucket/output/wordcount_output'
-    )
-
-    step2 = StreamingStep(
-        name='My wordcount example2',
-        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
-        reducer='aggregate',
-        input='s3n://elasticmapreduce/samples/wordcount/input2',
-        output='s3n://output_bucket/output/wordcount_output2'
-    )
-
-    job_id = conn.run_jobflow(
-        name='My jobflow',
+    args = run_jobflow_args.copy()
+    args.update(dict(
+        api_params={
+            'Applications.member.1.Name': 'Spark',
+            'Applications.member.1.Version': '2.4.2',
+            'Configurations.member.1.Classification': 'yarn-site',
+            'Configurations.member.1.Properties.entry.1.key': 'someproperty',
+            'Configurations.member.1.Properties.entry.1.value': 'somevalue',
+            'Instances.EmrManagedMasterSecurityGroup': 'master-security-group',
+            'Instances.Ec2SubnetId': 'subnet-8be41cec',
+        },
+        availability_zone='us-east-2b',
+        ec2_keyname='mykey',
+        job_flow_role='EMR_EC2_DefaultRole',
+        keep_alive=False,
        log_uri='s3://some_bucket/jobflow_logs',
-        master_instance_type='m1.medium',
-        slave_instance_type='m1.small',
-        steps=[step1, step2],
-    )
-
-    job_flow = conn.describe_jobflow(job_id)
-    job_flow.state.should.equal('STARTING')
-    job_flow.jobflowid.should.equal(job_id)
-    job_flow.name.should.equal('My jobflow')
-    job_flow.masterinstancetype.should.equal('m1.medium')
-    job_flow.slaveinstancetype.should.equal('m1.small')
-    job_flow.loguri.should.equal('s3://some_bucket/jobflow_logs')
-    job_flow.visibletoallusers.should.equal('False')
-    int(job_flow.normalizedinstancehours).should.equal(0)
-    job_step = job_flow.steps[0]
-    job_step.name.should.equal('My wordcount example')
-    job_step.state.should.equal('STARTING')
-    args = [arg.value for arg in job_step.args]
-    args.should.equal([
-        '-mapper',
-        's3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
-        '-reducer',
-        'aggregate',
-        '-input',
-        's3n://elasticmapreduce/samples/wordcount/input',
-        '-output',
-        's3n://output_bucket/output/wordcount_output',
-    ])
-
-    job_step2 = job_flow.steps[1]
-    job_step2.name.should.equal('My wordcount example2')
-    job_step2.state.should.equal('PENDING')
-    args = [arg.value for arg in job_step2.args]
-    args.should.equal([
-        '-mapper',
-        's3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
-        '-reducer',
-        'aggregate',
-        '-input',
-        's3n://elasticmapreduce/samples/wordcount/input2',
-        '-output',
-        's3n://output_bucket/output/wordcount_output2',
-    ])
-
-
-@requires_boto_gte("2.8")
-@mock_emr
-def test_create_job_flow_with_new_params():
-    # Test that run_jobflow works with newer params
-    conn = boto.connect_emr()
-
-    conn.run_jobflow(
        name='My jobflow',
-        log_uri='s3://some_bucket/jobflow_logs',
-        master_instance_type='m1.medium',
-        slave_instance_type='m1.small',
-        job_flow_role='some-role-arn',
-        steps=[],
-    )
-
-
-@requires_boto_gte("2.8")
-@mock_emr
-def test_create_job_flow_visible_to_all_users():
-    conn = boto.connect_emr()
-
-    job_id = conn.run_jobflow(
-        name='My jobflow',
-        log_uri='s3://some_bucket/jobflow_logs',
-        steps=[],
+        service_role='EMR_DefaultRole',
        visible_to_all_users=True,
-    )
-    job_flow = conn.describe_jobflow(job_id)
-    job_flow.visibletoallusers.should.equal('True')
+    ))
+    cluster_id = conn.run_jobflow(**args)
+    input_tags = {'tag1': 'val1', 'tag2': 'val2'}
+    conn.add_tags(cluster_id, input_tags)

+    cluster = conn.describe_cluster(cluster_id)
+    cluster.applications[0].name.should.equal('Spark')
+    cluster.applications[0].version.should.equal('2.4.2')
+    cluster.autoterminate.should.equal('true')

-@requires_boto_gte("2.8")
-@mock_emr
-def test_create_job_flow_with_instance_groups():
-    conn = boto.connect_emr()
+    # configurations appear not be supplied as attributes?

-    instance_groups = [InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07', '0.07'),
-                       InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07', '0.07')]
-    job_id = conn.run_jobflow(
-        name='My jobflow',
-        log_uri='s3://some_bucket/jobflow_logs',
-        steps=[],
-        instance_groups=instance_groups
-    )
+    attrs = cluster.ec2instanceattributes
+    # AdditionalMasterSecurityGroups
+    # AdditionalSlaveSecurityGroups
+    attrs.ec2availabilityzone.should.equal(args['availability_zone'])
+    attrs.ec2keyname.should.equal(args['ec2_keyname'])
+    attrs.ec2subnetid.should.equal(args['api_params']['Instances.Ec2SubnetId'])
+    # EmrManagedMasterSecurityGroups
+    # EmrManagedSlaveSecurityGroups
+    attrs.iaminstanceprofile.should.equal(args['job_flow_role'])
+    # ServiceAccessSecurityGroup

-    job_flow = conn.describe_jobflow(job_id)
-    int(job_flow.instancecount).should.equal(12)
-    instance_group = job_flow.instancegroups[0]
-    int(instance_group.instancerunningcount).should.equal(6)
+    cluster.id.should.equal(cluster_id)
+    cluster.loguri.should.equal(args['log_uri'])
+    cluster.masterpublicdnsname.should.be.a(six.string_types)
+    cluster.name.should.equal(args['name'])
+    int(cluster.normalizedinstancehours).should.equal(0)
+    # cluster.release_label
+    cluster.shouldnt.have.property('requestedamiversion')
+    cluster.runningamiversion.should.equal('1.0.0')
+    # cluster.securityconfiguration
+    cluster.servicerole.should.equal(args['service_role'])
+
+    cluster.status.state.should.equal('TERMINATED')
+    cluster.status.statechangereason.message.should.be.a(six.string_types)
+    cluster.status.statechangereason.code.should.be.a(six.string_types)
+    cluster.status.timeline.creationdatetime.should.be.a(six.string_types)
+    # cluster.status.timeline.enddatetime.should.be.a(six.string_types)
+    # cluster.status.timeline.readydatetime.should.be.a(six.string_types)
+
+    dict((item.key, item.value) for item in cluster.tags).should.equal(input_tags)
+
+    cluster.terminationprotected.should.equal('false')
+    cluster.visibletoallusers.should.equal('true')


@mock_emr
-def test_terminate_job_flow():
+def test_describe_jobflows():
    conn = boto.connect_emr()
-    job_id = conn.run_jobflow(
-        name='My jobflow',
-        log_uri='s3://some_bucket/jobflow_logs',
-        steps=[]
-    )
-
-    flow = conn.describe_jobflows()[0]
-    flow.state.should.equal('STARTING')
-    conn.terminate_jobflow(job_id)
-    flow = conn.describe_jobflows()[0]
-    flow.state.should.equal('TERMINATED')
-
-
-@mock_emr
-def test_describe_job_flows():
-    conn = boto.connect_emr()
-    job1_id = conn.run_jobflow(
-        name='My jobflow',
-        log_uri='s3://some_bucket/jobflow_logs',
-        steps=[]
-    )
-    job2_id = conn.run_jobflow(
-        name='My jobflow',
-        log_uri='s3://some_bucket/jobflow_logs',
-        steps=[]
-    )
+    job1_id = conn.run_jobflow(**run_jobflow_args)
+    job2_id = conn.run_jobflow(**run_jobflow_args)

    jobs = conn.describe_jobflows()
    jobs.should.have.length_of(2)
@ -205,252 +119,454 @@ def test_describe_job_flows():


@mock_emr
-def test_add_steps_to_flow():
+def test_describe_jobflow():
    conn = boto.connect_emr()
+    args = run_jobflow_args.copy()
+    args.update(dict(
+        ami_version='3.8.1',
+        api_params={
+            #'Applications.member.1.Name': 'Spark',
+            #'Applications.member.1.Version': '2.4.2',
+            #'Configurations.member.1.Classification': 'yarn-site',
+            #'Configurations.member.1.Properties.entry.1.key': 'someproperty',
+            #'Configurations.member.1.Properties.entry.1.value': 'somevalue',
+            #'Instances.EmrManagedMasterSecurityGroup': 'master-security-group',
+            'Instances.Ec2SubnetId': 'subnet-8be41cec',
+        },
+        ec2_keyname='mykey',
+        hadoop_version='2.4.0',

-    step1 = StreamingStep(
-        name='My wordcount example',
-        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
-        reducer='aggregate',
-        input='s3n://elasticmapreduce/samples/wordcount/input',
-        output='s3n://output_bucket/output/wordcount_output'
-    )
-
-    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
-        steps=[step1]
-    )
+        keep_alive=True,
+        master_instance_type='c1.medium',
+        slave_instance_type='c1.medium',
+        num_instances=2,

+        availability_zone='us-west-2b',
+
+        job_flow_role='EMR_EC2_DefaultRole',
+        service_role='EMR_DefaultRole',
+        visible_to_all_users=True,
+    ))
+
+    cluster_id = conn.run_jobflow(**args)
+    jf = conn.describe_jobflow(cluster_id)
+    jf.amiversion.should.equal(args['ami_version'])
+    jf.bootstrapactions.should.equal(None)
+    jf.creationdatetime.should.be.a(six.string_types)
+    jf.should.have.property('laststatechangereason')
+    jf.readydatetime.should.be.a(six.string_types)
+    jf.startdatetime.should.be.a(six.string_types)
+    jf.state.should.equal('WAITING')
+
+    jf.ec2keyname.should.equal(args['ec2_keyname'])
+    # Ec2SubnetId
+    jf.hadoopversion.should.equal(args['hadoop_version'])
+    int(jf.instancecount).should.equal(2)
+
+    for ig in jf.instancegroups:
+        ig.creationdatetime.should.be.a(six.string_types)
+        # ig.enddatetime.should.be.a(six.string_types)
+        ig.should.have.property('instancegroupid').being.a(six.string_types)
+        int(ig.instancerequestcount).should.equal(1)
+        ig.instancerole.should.be.within(['MASTER', 'CORE'])
+        int(ig.instancerunningcount).should.equal(1)
+        ig.instancetype.should.equal('c1.medium')
+        ig.laststatechangereason.should.be.a(six.string_types)
+        ig.market.should.equal('ON_DEMAND')
+        ig.name.should.be.a(six.string_types)
+        ig.readydatetime.should.be.a(six.string_types)
+        ig.startdatetime.should.be.a(six.string_types)
+        ig.state.should.equal('RUNNING')
+
+    jf.keepjobflowalivewhennosteps.should.equal('true')
+    jf.masterinstanceid.should.be.a(six.string_types)
+    jf.masterinstancetype.should.equal(args['master_instance_type'])
+    jf.masterpublicdnsname.should.be.a(six.string_types)
+    int(jf.normalizedinstancehours).should.equal(0)
+    jf.availabilityzone.should.equal(args['availability_zone'])
+    jf.slaveinstancetype.should.equal(args['slave_instance_type'])
+    jf.terminationprotected.should.equal('false')
+
+    jf.jobflowid.should.equal(cluster_id)
+    # jf.jobflowrole.should.equal(args['job_flow_role'])
+    jf.loguri.should.equal(args['log_uri'])
+    jf.name.should.equal(args['name'])
+    # jf.servicerole.should.equal(args['service_role'])
+
+    jf.steps.should.have.length_of(0)
+
+    list(i.value for i in jf.supported_products).should.equal([])
+    jf.visibletoallusers.should.equal('true')
+
+
+@mock_emr
+def test_list_clusters():
+    conn = boto.connect_emr()
+
+    args = run_jobflow_args.copy()
+    args['name'] = 'jobflow1'
+    cluster1_id = conn.run_jobflow(**args)
+    args['name'] = 'jobflow2'
+    cluster2_id = conn.run_jobflow(**args)
+    conn.terminate_jobflow(cluster2_id)
+
+    summary = conn.list_clusters()
+    clusters = summary.clusters
+    clusters.should.have.length_of(2)
+
+    expected = {
+        cluster1_id: {
+            'id': cluster1_id,
+            'name': 'jobflow1',
+            'normalizedinstancehours': 0,
+            'state': 'WAITING'},
+        cluster2_id: {
+            'id': cluster2_id,
+            'name': 'jobflow2',
+            'normalizedinstancehours': 0,
+            'state': 'TERMINATED'},
+    }
+
+    for x in clusters:
+        y = expected[x.id]
+        x.id.should.equal(y['id'])
+        x.name.should.equal(y['name'])
+        int(x.normalizedinstancehours).should.equal(y['normalizedinstancehours'])
+        x.status.state.should.equal(y['state'])
+        x.status.timeline.creationdatetime.should.be.a(six.string_types)
+        if y['state'] == 'TERMINATED':
+            x.status.timeline.enddatetime.should.be.a(six.string_types)
+        else:
+            x.status.timeline.shouldnt.have.property('enddatetime')
+        x.status.timeline.readydatetime.should.be.a(six.string_types)
+
+
+@mock_emr
+def test_run_jobflow():
+    conn = boto.connect_emr()
+    args = run_jobflow_args.copy()
+    job_id = conn.run_jobflow(**args)
    job_flow = conn.describe_jobflow(job_id)
-    job_flow.state.should.equal('STARTING')
+    job_flow.state.should.equal('WAITING')
    job_flow.jobflowid.should.equal(job_id)
-    job_flow.name.should.equal('My jobflow')
-    job_flow.loguri.should.equal('s3://some_bucket/jobflow_logs')
-
-    step2 = StreamingStep(
-        name='My wordcount example2',
-        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
-        reducer='aggregate',
-        input='s3n://elasticmapreduce/samples/wordcount/input2',
-        output='s3n://output_bucket/output/wordcount_output2'
-    )
-
-    conn.add_jobflow_steps(job_id, [step2])
-
-    job_flow = conn.describe_jobflow(job_id)
-    job_step = job_flow.steps[0]
-    job_step.name.should.equal('My wordcount example')
-    job_step.state.should.equal('STARTING')
-    args = [arg.value for arg in job_step.args]
-    args.should.equal([
-        '-mapper',
-        's3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
-        '-reducer',
-        'aggregate',
-        '-input',
-        's3n://elasticmapreduce/samples/wordcount/input',
-        '-output',
-        's3n://output_bucket/output/wordcount_output',
-    ])
-
-    job_step2 = job_flow.steps[1]
-    job_step2.name.should.equal('My wordcount example2')
-    job_step2.state.should.equal('PENDING')
-    args = [arg.value for arg in job_step2.args]
-    args.should.equal([
-        '-mapper',
-        's3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
-        '-reducer',
-        'aggregate',
-        '-input',
-        's3n://elasticmapreduce/samples/wordcount/input2',
-        '-output',
-        's3n://output_bucket/output/wordcount_output2',
-    ])
+    job_flow.name.should.equal(args['name'])
+    job_flow.masterinstancetype.should.equal(args['master_instance_type'])
+    job_flow.slaveinstancetype.should.equal(args['slave_instance_type'])
+    job_flow.loguri.should.equal(args['log_uri'])
+    job_flow.visibletoallusers.should.equal('false')
+    int(job_flow.normalizedinstancehours).should.equal(0)
+    job_flow.steps.should.have.length_of(0)


@mock_emr
-def test_create_instance_groups():
-    conn = boto.connect_emr()
+def test_run_jobflow_in_multiple_regions():
+    regions = {}
+    for region in ['us-east-1', 'eu-west-1']:
+        conn = boto.emr.connect_to_region(region)
+        args = run_jobflow_args.copy()
+        args['name'] = region
+        cluster_id = conn.run_jobflow(**args)
+        regions[region] = {'conn': conn, 'cluster_id': cluster_id}

-    step1 = StreamingStep(
-        name='My wordcount example',
-        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
-        reducer='aggregate',
-        input='s3n://elasticmapreduce/samples/wordcount/input',
-        output='s3n://output_bucket/output/wordcount_output'
-    )
-
-    job_id = conn.run_jobflow(
-        name='My jobflow',
-        log_uri='s3://some_bucket/jobflow_logs',
-        steps=[step1],
-    )
-
-    instance_group = InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07', '0.07')
-    instance_group = conn.add_instance_groups(job_id, [instance_group])
-    instance_group_id = instance_group.instancegroupids
-    job_flow = conn.describe_jobflows()[0]
-    int(job_flow.instancecount).should.equal(6)
-    instance_group = job_flow.instancegroups[0]
-    instance_group.instancegroupid.should.equal(instance_group_id)
-    int(instance_group.instancerunningcount).should.equal(6)
-    instance_group.instancerole.should.equal('TASK')
-    instance_group.instancetype.should.equal('c1.medium')
-    instance_group.market.should.equal('SPOT')
-    instance_group.name.should.equal('spot-0.07')
-    instance_group.bidprice.should.equal('0.07')
-
-
-@mock_emr
-def test_modify_instance_groups():
-    conn = boto.connect_emr()
-
-    step1 = StreamingStep(
-        name='My wordcount example',
-        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
-        reducer='aggregate',
-        input='s3n://elasticmapreduce/samples/wordcount/input',
-        output='s3n://output_bucket/output/wordcount_output'
-    )
-
-    job_id = conn.run_jobflow(
-        name='My jobflow',
-        log_uri='s3://some_bucket/jobflow_logs',
-        steps=[step1]
-    )
-
-    instance_group1 = InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07', '0.07')
-    instance_group2 = InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07', '0.07')
-    instance_group = conn.add_instance_groups(job_id, [instance_group1, instance_group2])
-    instance_group_ids = instance_group.instancegroupids.split(",")
-
-    job_flow = conn.describe_jobflows()[0]
-    int(job_flow.instancecount).should.equal(12)
-    instance_group = job_flow.instancegroups[0]
-    int(instance_group.instancerunningcount).should.equal(6)
-
-    conn.modify_instance_groups(instance_group_ids, [2, 3])
-
-    job_flow = conn.describe_jobflows()[0]
-    int(job_flow.instancecount).should.equal(5)
-    instance_group1 = [
-        group for group
-        in job_flow.instancegroups
-        if group.instancegroupid == instance_group_ids[0]
-    ][0]
-    int(instance_group1.instancerunningcount).should.equal(2)
-    instance_group2 = [
-        group for group
-        in job_flow.instancegroups
-        if group.instancegroupid == instance_group_ids[1]
-    ][0]
-    int(instance_group2.instancerunningcount).should.equal(3)
+    for region in regions.keys():
+        conn = regions[region]['conn']
+        jf = conn.describe_jobflow(regions[region]['cluster_id'])
+        jf.name.should.equal(region)


@requires_boto_gte("2.8")
@mock_emr
-def test_set_visible_to_all_users():
+def test_run_jobflow_with_new_params():
+    # Test that run_jobflow works with newer params
    conn = boto.connect_emr()
+    conn.run_jobflow(**run_jobflow_args)

-    job_id = conn.run_jobflow(
-        name='My jobflow',
-        log_uri='s3://some_bucket/jobflow_logs',
-        steps=[],
-        visible_to_all_users=False,
-    )
+
+@requires_boto_gte("2.8")
+@mock_emr
+def test_run_jobflow_with_visible_to_all_users():
+    conn = boto.connect_emr()
+    for expected in (True, False):
+        job_id = conn.run_jobflow(
+            visible_to_all_users=expected,
+            **run_jobflow_args
+        )
+        job_flow = conn.describe_jobflow(job_id)
+        job_flow.visibletoallusers.should.equal(str(expected).lower())
+
+
+@requires_boto_gte("2.8")
+@mock_emr
+def test_run_jobflow_with_instance_groups():
+    input_groups = dict((g.name, g) for g in input_instance_groups)
+    conn = boto.connect_emr()
+    job_id = conn.run_jobflow(instance_groups=input_instance_groups,
+                              **run_jobflow_args)
    job_flow = conn.describe_jobflow(job_id)
-    job_flow.visibletoallusers.should.equal('False')
-
-    conn.set_visible_to_all_users(job_id, True)
-
-    job_flow = conn.describe_jobflow(job_id)
-    job_flow.visibletoallusers.should.equal('True')
-
-    conn.set_visible_to_all_users(job_id, False)
-
-    job_flow = conn.describe_jobflow(job_id)
-    job_flow.visibletoallusers.should.equal('False')
+    int(job_flow.instancecount).should.equal(sum(g.num_instances for g in input_instance_groups))
+    for instance_group in job_flow.instancegroups:
+        expected = input_groups[instance_group.name]
+        instance_group.should.have.property('instancegroupid')
+        int(instance_group.instancerunningcount).should.equal(expected.num_instances)
+        instance_group.instancerole.should.equal(expected.role)
+        instance_group.instancetype.should.equal(expected.type)
+        instance_group.market.should.equal(expected.market)
+        if hasattr(expected, 'bidprice'):
+            instance_group.bidprice.should.equal(expected.bidprice)


@requires_boto_gte("2.8")
@mock_emr
 def test_set_termination_protection():
    conn = boto.connect_emr()
-
-    job_id = conn.run_jobflow(
-        name='My jobflow',
-        log_uri='s3://some_bucket/jobflow_logs',
-        steps=[]
-    )
+    job_id = conn.run_jobflow(**run_jobflow_args)
    job_flow = conn.describe_jobflow(job_id)
-    job_flow.terminationprotected.should.equal(u'None')
+    job_flow.terminationprotected.should.equal('false')

    conn.set_termination_protection(job_id, True)
-
    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal('true')

    conn.set_termination_protection(job_id, False)
-
    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal('false')


+@requires_boto_gte("2.8")
@mock_emr
-def test_list_clusters():
+def test_set_visible_to_all_users():
    conn = boto.connect_emr()
-    conn.run_jobflow(
-        name='My jobflow',
-        log_uri='s3://some_bucket/jobflow_logs',
-        steps=[],
-    )
+    args = run_jobflow_args.copy()
+    args['visible_to_all_users'] = False
+    job_id = conn.run_jobflow(**args)
+    job_flow = conn.describe_jobflow(job_id)
+    job_flow.visibletoallusers.should.equal('false')

-    summary = conn.list_clusters()
-    clusters = summary.clusters
-    clusters.should.have.length_of(1)
-    cluster = clusters[0]
-    cluster.name.should.equal("My jobflow")
-    cluster.normalizedinstancehours.should.equal('0')
-    cluster.status.state.should.equal("RUNNING")
+    conn.set_visible_to_all_users(job_id, True)
+    job_flow = conn.describe_jobflow(job_id)
+    job_flow.visibletoallusers.should.equal('true')
+
+    conn.set_visible_to_all_users(job_id, False)
+    job_flow = conn.describe_jobflow(job_id)
+    job_flow.visibletoallusers.should.equal('false')


@mock_emr
-def test_describe_cluster():
+def test_terminate_jobflow():
    conn = boto.connect_emr()
-    job_id = conn.run_jobflow(
-        name='My jobflow',
-        log_uri='s3://some_bucket/jobflow_logs',
-        steps=[],
+    job_id = conn.run_jobflow(**run_jobflow_args)
+    flow = conn.describe_jobflows()[0]
+    flow.state.should.equal('WAITING')
+
+    conn.terminate_jobflow(job_id)
+    flow = conn.describe_jobflows()[0]
+    flow.state.should.equal('TERMINATED')
+
+
+# testing multiple end points for each feature
+
+@mock_emr
+def test_bootstrap_actions():
+    bootstrap_actions = [
+        BootstrapAction(
+            name='bs1',
+            path='path/to/script',
+            bootstrap_action_args=['arg1', 'arg2']),
+        BootstrapAction(
+            name='bs2',
+            path='path/to/anotherscript',
+            bootstrap_action_args=[])
+    ]
+
+    conn = boto.connect_emr()
+    cluster_id = conn.run_jobflow(
+        bootstrap_actions=bootstrap_actions,
+        **run_jobflow_args
    )

-    cluster = conn.describe_cluster(job_id)
-    cluster.name.should.equal("My jobflow")
-    cluster.normalizedinstancehours.should.equal('0')
-    cluster.status.state.should.equal("RUNNING")
+    jf = conn.describe_jobflow(cluster_id)
+    for x, y in zip(jf.bootstrapactions, bootstrap_actions):
+        x.name.should.equal(y.name)
+        x.path.should.equal(y.path)
+        list(o.value for o in x.args).should.equal(y.args())
+
+    resp = conn.list_bootstrap_actions(cluster_id)
+    for i, y in enumerate(bootstrap_actions):
+        x = resp.actions[i]
+        x.name.should.equal(y.name)
+        x.scriptpath.should.equal(y.path)
+        list(arg.value for arg in x.args).should.equal(y.args())


@mock_emr
-def test_cluster_tagging():
-    conn = boto.connect_emr()
-    job_id = conn.run_jobflow(
-        name='My jobflow',
-        log_uri='s3://some_bucket/jobflow_logs',
-        steps=[],
-    )
-    cluster_id = job_id
-    conn.add_tags(cluster_id, {"tag1": "val1", "tag2": "val2"})
+def test_instance_groups():
+    input_groups = dict((g.name, g) for g in input_instance_groups)

+    conn = boto.connect_emr()
+    args = run_jobflow_args.copy()
+    for key in ['master_instance_type', 'slave_instance_type', 'num_instances']:
+        del args[key]
+    args['instance_groups'] = input_instance_groups[:2]
+    job_id = conn.run_jobflow(**args)
+
+    jf = conn.describe_jobflow(job_id)
+    base_instance_count = int(jf.instancecount)
+
+    conn.add_instance_groups(job_id, input_instance_groups[2:])
+
+    jf = conn.describe_jobflow(job_id)
+    int(jf.instancecount).should.equal(sum(g.num_instances for g in input_instance_groups))
+    for x in jf.instancegroups:
+        y = input_groups[x.name]
+        if hasattr(y, 'bidprice'):
+            x.bidprice.should.equal(y.bidprice)
+        x.creationdatetime.should.be.a(six.string_types)
+        # x.enddatetime.should.be.a(six.string_types)
+        x.should.have.property('instancegroupid')
+        int(x.instancerequestcount).should.equal(y.num_instances)
+        x.instancerole.should.equal(y.role)
+        int(x.instancerunningcount).should.equal(y.num_instances)
+        x.instancetype.should.equal(y.type)
+        x.laststatechangereason.should.be.a(six.string_types)
+        x.market.should.equal(y.market)
+        x.name.should.be.a(six.string_types)
+        x.readydatetime.should.be.a(six.string_types)
+        x.startdatetime.should.be.a(six.string_types)
+        x.state.should.equal('RUNNING')
+
+    for x in conn.list_instance_groups(job_id).instancegroups:
+        y = input_groups[x.name]
+        if hasattr(y, 'bidprice'):
+            x.bidprice.should.equal(y.bidprice)
+        # Configurations
+        # EbsBlockDevices
+        # EbsOptimized
+        x.should.have.property('id')
+        x.instancegrouptype.should.equal(y.role)
+        x.instancetype.should.equal(y.type)
+        x.market.should.equal(y.market)
+        x.name.should.equal(y.name)
+        int(x.requestedinstancecount).should.equal(y.num_instances)
+        int(x.runninginstancecount).should.equal(y.num_instances)
+        # ShrinkPolicy
+        x.status.state.should.equal('RUNNING')
+        x.status.statechangereason.code.should.be.a(six.string_types)
+        x.status.statechangereason.message.should.be.a(six.string_types)
+        x.status.timeline.creationdatetime.should.be.a(six.string_types)
+        # x.status.timeline.enddatetime.should.be.a(six.string_types)
+        x.status.timeline.readydatetime.should.be.a(six.string_types)
+
+    igs = dict((g.name, g) for g in jf.instancegroups)
+
+    conn.modify_instance_groups(
+        [igs['task-1'].instancegroupid, igs['task-2'].instancegroupid],
+        [2, 3])
+    jf = conn.describe_jobflow(job_id)
+    int(jf.instancecount).should.equal(base_instance_count + 5)
+    igs = dict((g.name, g) for g in jf.instancegroups)
+    int(igs['task-1'].instancerunningcount).should.equal(2)
+    int(igs['task-2'].instancerunningcount).should.equal(3)
+
+
+@mock_emr
+def test_steps():
+    input_steps = [
+        StreamingStep(
+            name='My wordcount example',
+            mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
+            reducer='aggregate',
+            input='s3n://elasticmapreduce/samples/wordcount/input',
+            output='s3n://output_bucket/output/wordcount_output'),
+        StreamingStep(
+            name='My wordcount example2',
+            mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
+            reducer='aggregate',
+            input='s3n://elasticmapreduce/samples/wordcount/input2',
+            output='s3n://output_bucket/output/wordcount_output2')
+    ]
+
+    # TODO: implementation and test for cancel_steps
+
+    conn = boto.connect_emr()
+    cluster_id = conn.run_jobflow(
+        steps=[input_steps[0]],
+        **run_jobflow_args)
+
+    jf = conn.describe_jobflow(cluster_id)
+    jf.steps.should.have.length_of(1)
+
+    conn.add_jobflow_steps(cluster_id, [input_steps[1]])
+
+    jf = conn.describe_jobflow(cluster_id)
+    jf.steps.should.have.length_of(2)
+    for step in jf.steps:
+        step.actiononfailure.should.equal('TERMINATE_JOB_FLOW')
+        list(arg.value for arg in step.args).should.have.length_of(8)
+        step.creationdatetime.should.be.a(six.string_types)
+        # step.enddatetime.should.be.a(six.string_types)
+        step.jar.should.equal('/home/hadoop/contrib/streaming/hadoop-streaming.jar')
+        step.laststatechangereason.should.be.a(six.string_types)
+        step.mainclass.should.equal('')
+        step.name.should.be.a(six.string_types)
+        # step.readydatetime.should.be.a(six.string_types)
+        # step.startdatetime.should.be.a(six.string_types)
+        step.state.should.be.within(['STARTING', 'PENDING'])
+
+    expected = dict((s.name, s) for s in input_steps)
+
+    for x in conn.list_steps(cluster_id).steps:
+        y = expected[x.name]
+        # actiononfailure
+        list(arg.value for arg in x.config.args).should.equal([
+            '-mapper', y.mapper,
+            '-reducer', y.reducer,
+            '-input', y.input,
+            '-output', y.output,
+        ])
+        x.config.jar.should.equal('/home/hadoop/contrib/streaming/hadoop-streaming.jar')
+        x.config.mainclass.should.equal('')
+        # properties
+        x.should.have.property('id').should.be.a(six.string_types)
+        x.name.should.equal(y.name)
+        x.status.state.should.be.within(['STARTING', 'PENDING'])
+        # x.status.statechangereason
+        x.status.timeline.creationdatetime.should.be.a(six.string_types)
+        # x.status.timeline.enddatetime.should.be.a(six.string_types)
+        # x.status.timeline.startdatetime.should.be.a(six.string_types)
+
+        x = conn.describe_step(cluster_id, x.id)
+        list(arg.value for arg in x.config.args).should.equal([
+            '-mapper', y.mapper,
+            '-reducer', y.reducer,
+            '-input', y.input,
+            '-output', y.output,
+        ])
+        x.config.jar.should.equal('/home/hadoop/contrib/streaming/hadoop-streaming.jar')
+        x.config.mainclass.should.equal('')
+        # properties
+        x.should.have.property('id').should.be.a(six.string_types)
+        x.name.should.equal(y.name)
+        x.status.state.should.be.within(['STARTING', 'PENDING'])
+        # x.status.statechangereason
+        x.status.timeline.creationdatetime.should.be.a(six.string_types)
+        # x.status.timeline.enddatetime.should.be.a(six.string_types)
+        # x.status.timeline.startdatetime.should.be.a(six.string_types)
+
+
+@mock_emr
+def test_tags():
+    input_tags = {"tag1": "val1", "tag2": "val2"}
+
+    conn = boto.connect_emr()
+    cluster_id = conn.run_jobflow(**run_jobflow_args)
+
+    conn.add_tags(cluster_id, input_tags)
    cluster = conn.describe_cluster(cluster_id)
    cluster.tags.should.have.length_of(2)
-    tags = dict((tag.key, tag.value) for tag in cluster.tags)
-    tags['tag1'].should.equal('val1')
-    tags['tag2'].should.equal('val2')
+    dict((t.key, t.value) for t in cluster.tags).should.equal(input_tags)

-    # Remove a tag
-    conn.remove_tags(cluster_id, ["tag1"])
+    conn.remove_tags(cluster_id, list(input_tags.keys()))
    cluster = conn.describe_cluster(cluster_id)
-    cluster.tags.should.have.length_of(1)
-    tags = dict((tag.key, tag.value) for tag in cluster.tags)
-    tags['tag2'].should.equal('val2')
+    cluster.tags.should.have.length_of(0)
--- a/tests/test_emr/test_emr_boto3.py
+++ b/tests/test_emr/test_emr_boto3.py
@ -1,46 +1,586 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
+from copy import deepcopy

 import boto3
+import six
 import sure  # noqa
+from botocore.exceptions import ClientError
+from nose.tools import assert_raises

 from moto import mock_emr


+run_job_flow_args = dict(
+    Instances={
+        'InstanceCount': 3,
+        'KeepJobFlowAliveWhenNoSteps': True,
+        'MasterInstanceType': 'c3.medium',
+        'Placement': {'AvailabilityZone': 'us-east-1a'},
+        'SlaveInstanceType': 'c3.xlarge',
+    },
+    JobFlowRole='EMR_EC2_DefaultRole',
+    LogUri='s3://mybucket/log',
+    Name='cluster',
+    ServiceRole='EMR_DefaultRole',
+    VisibleToAllUsers=True)
+
+
+input_instance_groups = [
+    {'InstanceCount': 1,
+     'InstanceRole': 'MASTER',
+     'InstanceType': 'c1.medium',
+     'Market': 'ON_DEMAND',
+     'Name': 'master'},
+    {'InstanceCount': 3,
+     'InstanceRole': 'CORE',
+     'InstanceType': 'c1.medium',
+     'Market': 'ON_DEMAND',
+     'Name': 'core'},
+    {'InstanceCount': 6,
+     'InstanceRole': 'TASK',
+     'InstanceType': 'c1.large',
+     'Market': 'SPOT',
+     'Name': 'task-1',
+     'BidPrice': '0.07'},
+    {'InstanceCount': 10,
+     'InstanceRole': 'TASK',
+     'InstanceType': 'c1.xlarge',
+     'Market': 'SPOT',
+     'Name': 'task-2',
+     'BidPrice': '0.05'},
+]
+
+
@mock_emr
-def test_run_job_flow():
+def test_describe_cluster():
    client = boto3.client('emr', region_name='us-east-1')
-    cluster_id = client.run_job_flow(
-        Name='cluster',
-        Instances={
-            'MasterInstanceType': 'c3.xlarge',
-            'SlaveInstanceType': 'c3.xlarge',
-            'InstanceCount': 3,
-            'Placement': {'AvailabilityZone': 'us-east-1a'},
-            'KeepJobFlowAliveWhenNoSteps': True,
-        },
-        VisibleToAllUsers=True,
-    )
-    cluster_id.should.have.key('JobFlowId')
+
+    args = deepcopy(run_job_flow_args)
+    args['Applications'] = [{'Name': 'Spark', 'Version': '2.4.2'}]
+    args['Configurations'] = [
+        {'Classification': 'yarn-site',
+         'Properties': {'someproperty': 'somevalue'}}]
+    args['Instances']['AdditionalMasterSecurityGroups'] = ['additional-master']
+    args['Instances']['AdditionalSlaveSecurityGroups'] = ['additional-slave']
+    args['Instances']['Ec2KeyName'] = 'mykey'
+    args['Instances']['Ec2SubnetId'] = 'subnet-8be41cec'
+    args['Instances']['EmrManagedMasterSecurityGroup'] = 'master-security-group'
+    args['Instances']['EmrManagedSlaveSecurityGroup'] = 'slave-security-group'
+    args['Instances']['KeepJobFlowAliveWhenNoSteps'] = False
+    args['Instances']['ServiceAccessSecurityGroup'] = 'service-access-security-group'
+    args['Tags'] = [{'Key': 'tag1', 'Value': 'val1'},
+                    {'Key': 'tag2', 'Value': 'val2'}]
+
+    cluster_id = client.run_job_flow(**args)['JobFlowId']
+
+    cl = client.describe_cluster(ClusterId=cluster_id)['Cluster']
+    cl['Applications'][0]['Name'].should.equal('Spark')
+    cl['Applications'][0]['Version'].should.equal('2.4.2')
+    cl['AutoTerminate'].should.equal(True)
+
+    config = cl['Configurations'][0]
+    config['Classification'].should.equal('yarn-site')
+    config['Properties'].should.equal(args['Configurations'][0]['Properties'])
+
+    attrs = cl['Ec2InstanceAttributes']
+    attrs['AdditionalMasterSecurityGroups'].should.equal(args['Instances']['AdditionalMasterSecurityGroups'])
+    attrs['AdditionalSlaveSecurityGroups'].should.equal(args['Instances']['AdditionalSlaveSecurityGroups'])
+    attrs['Ec2AvailabilityZone'].should.equal('us-east-1a')
+    attrs['Ec2KeyName'].should.equal(args['Instances']['Ec2KeyName'])
+    attrs['Ec2SubnetId'].should.equal(args['Instances']['Ec2SubnetId'])
+    attrs['EmrManagedMasterSecurityGroup'].should.equal(args['Instances']['EmrManagedMasterSecurityGroup'])
+    attrs['EmrManagedSlaveSecurityGroup'].should.equal(args['Instances']['EmrManagedSlaveSecurityGroup'])
+    attrs['IamInstanceProfile'].should.equal(args['JobFlowRole'])
+    attrs['ServiceAccessSecurityGroup'].should.equal(args['Instances']['ServiceAccessSecurityGroup'])
+    cl['Id'].should.equal(cluster_id)
+    cl['LogUri'].should.equal(args['LogUri'])
+    cl['MasterPublicDnsName'].should.be.a(six.string_types)
+    cl['Name'].should.equal(args['Name'])
+    cl['NormalizedInstanceHours'].should.equal(0)
+    # cl['ReleaseLabel'].should.equal('emr-5.0.0')
+    cl.shouldnt.have.key('RequestedAmiVersion')
+    cl['RunningAmiVersion'].should.equal('1.0.0')
+    # cl['SecurityConfiguration'].should.be.a(six.string_types)
+    cl['ServiceRole'].should.equal(args['ServiceRole'])
+
+    status = cl['Status']
+    status['State'].should.equal('TERMINATED')
+    # cluster['Status']['StateChangeReason']
+    status['Timeline']['CreationDateTime'].should.be.a('datetime.datetime')
+    # status['Timeline']['EndDateTime'].should.equal(datetime(2014, 1, 24, 2, 19, 46, tzinfo=pytz.utc))
+    status['Timeline']['ReadyDateTime'].should.be.a('datetime.datetime')
+
+    dict((t['Key'], t['Value']) for t in cl['Tags']).should.equal(
+        dict((t['Key'], t['Value']) for t in args['Tags']))
+
+    cl['TerminationProtected'].should.equal(False)
+    cl['VisibleToAllUsers'].should.equal(True)
+
+
+@mock_emr
+def test_describe_job_flows():
+    client = boto3.client('emr', region_name='us-east-1')
+    cluster1_id = client.run_job_flow(**run_job_flow_args)['JobFlowId']
+    cluster2_id = client.run_job_flow(**run_job_flow_args)['JobFlowId']
+
+    resp = client.describe_job_flows()
+    resp['JobFlows'].should.have.length_of(2)
+
+    resp = client.describe_job_flows(JobFlowIds=[cluster2_id])
+    resp['JobFlows'].should.have.length_of(1)
+    resp['JobFlows'][0]['JobFlowId'].should.equal(cluster2_id)
+
+    resp = client.describe_job_flows(JobFlowIds=[cluster1_id])
+    resp['JobFlows'].should.have.length_of(1)
+    resp['JobFlows'][0]['JobFlowId'].should.equal(cluster1_id)
+
+
+@mock_emr
+def test_describe_job_flow():
+    client = boto3.client('emr', region_name='us-east-1')
+
+    args = deepcopy(run_job_flow_args)
+    args['AmiVersion'] = '3.8.1'
+    args['Instances'].update(
+        {'Ec2KeyName': 'ec2keyname',
+         'Ec2SubnetId': 'subnet-8be41cec',
+         'HadoopVersion': '2.4.0'})
+    args['VisibleToAllUsers'] = True
+
+    cluster_id = client.run_job_flow(**args)['JobFlowId']
+
+    jf = client.describe_job_flows(JobFlowIds=[cluster_id])['JobFlows'][0]
+
+    jf['AmiVersion'].should.equal(args['AmiVersion'])
+    jf.shouldnt.have.key('BootstrapActions')
+    esd = jf['ExecutionStatusDetail']
+    esd['CreationDateTime'].should.be.a('datetime.datetime')
+    # esd['EndDateTime'].should.be.a('datetime.datetime')
+    # esd['LastStateChangeReason'].should.be.a(six.string_types)
+    esd['ReadyDateTime'].should.be.a('datetime.datetime')
+    esd['StartDateTime'].should.be.a('datetime.datetime')
+    esd['State'].should.equal('WAITING')
+    attrs = jf['Instances']
+    attrs['Ec2KeyName'].should.equal(args['Instances']['Ec2KeyName'])
+    attrs['Ec2SubnetId'].should.equal(args['Instances']['Ec2SubnetId'])
+    attrs['HadoopVersion'].should.equal(args['Instances']['HadoopVersion'])
+    attrs['InstanceCount'].should.equal(args['Instances']['InstanceCount'])
+    for ig in attrs['InstanceGroups']:
+        # ig['BidPrice']
+        ig['CreationDateTime'].should.be.a('datetime.datetime')
+        # ig['EndDateTime'].should.be.a('datetime.datetime')
+        ig['InstanceGroupId'].should.be.a(six.string_types)
+        ig['InstanceRequestCount'].should.be.a(int)
+        ig['InstanceRole'].should.be.within(['MASTER', 'CORE'])
+        ig['InstanceRunningCount'].should.be.a(int)
+        ig['InstanceType'].should.be.within(['c3.medium', 'c3.xlarge'])
+        # ig['LastStateChangeReason'].should.be.a(six.string_types)
+        ig['Market'].should.equal('ON_DEMAND')
+        ig['Name'].should.be.a(six.string_types)
+        ig['ReadyDateTime'].should.be.a('datetime.datetime')
+        ig['StartDateTime'].should.be.a('datetime.datetime')
+        ig['State'].should.equal('RUNNING')
+    attrs['KeepJobFlowAliveWhenNoSteps'].should.equal(True)
+    # attrs['MasterInstanceId'].should.be.a(six.string_types)
+    attrs['MasterInstanceType'].should.equal(args['Instances']['MasterInstanceType'])
+    attrs['MasterPublicDnsName'].should.be.a(six.string_types)
+    attrs['NormalizedInstanceHours'].should.equal(0)
+    attrs['Placement']['AvailabilityZone'].should.equal(args['Instances']['Placement']['AvailabilityZone'])
+    attrs['SlaveInstanceType'].should.equal(args['Instances']['SlaveInstanceType'])
+    attrs['TerminationProtected'].should.equal(False)
+    jf['JobFlowId'].should.equal(cluster_id)
+    jf['JobFlowRole'].should.equal(args['JobFlowRole'])
+    jf['LogUri'].should.equal(args['LogUri'])
+    jf['Name'].should.equal(args['Name'])
+    jf['ServiceRole'].should.equal(args['ServiceRole'])
+    jf.shouldnt.have.key('Steps')
+    jf.shouldnt.have.key('SupportedProducts')
+    jf['VisibleToAllUsers'].should.equal(True)


@mock_emr
 def test_list_clusters():
    client = boto3.client('emr', region_name='us-east-1')
-    client.run_job_flow(
-        Name='cluster',
-        Instances={
-            'MasterInstanceType': 'c3.xlarge',
-            'SlaveInstanceType': 'c3.xlarge',
-            'InstanceCount': 3,
-            'Placement': {'AvailabilityZone': 'us-east-1a'},
-            'KeepJobFlowAliveWhenNoSteps': True,
-        },
-        VisibleToAllUsers=True,
-    )
+    args = deepcopy(run_job_flow_args)
+    args['Name'] = 'jobflow1'
+    cluster1_id = client.run_job_flow(**args)['JobFlowId']
+    args['Name'] = 'jobflow2'
+    cluster2_id = client.run_job_flow(**args)['JobFlowId']
+    client.terminate_job_flows(JobFlowIds=[cluster2_id])
+
    summary = client.list_clusters()
    clusters = summary['Clusters']
-    clusters.should.have.length_of(1)
-    cluster = clusters[0]
-    cluster['NormalizedInstanceHours'].should.equal(0)
-    cluster['Status']['State'].should.equal("RUNNING")
+    clusters.should.have.length_of(2)
+
+    expected = {
+        cluster1_id: {
+            'Id': cluster1_id,
+            'Name': 'jobflow1',
+            'NormalizedInstanceHours': 0,
+            'State': 'WAITING'},
+        cluster2_id: {
+            'Id': cluster2_id,
+            'Name': 'jobflow2',
+            'NormalizedInstanceHours': 0,
+            'State': 'TERMINATED'},
+    }
+
+    for x in clusters:
+        y = expected[x['Id']]
+        x['Id'].should.equal(y['Id'])
+        x['Name'].should.equal(y['Name'])
+        x['NormalizedInstanceHours'].should.equal(y['NormalizedInstanceHours'])
+        x['Status']['State'].should.equal(y['State'])
+        x['Status']['Timeline']['CreationDateTime'].should.be.a('datetime.datetime')
+        if y['State'] == 'TERMINATED':
+            x['Status']['Timeline']['EndDateTime'].should.be.a('datetime.datetime')
+        else:
+            x['Status']['Timeline'].shouldnt.have.key('EndDateTime')
+        x['Status']['Timeline']['ReadyDateTime'].should.be.a('datetime.datetime')
+
+
+@mock_emr
+def test_run_job_flow():
+    client = boto3.client('emr', region_name='us-east-1')
+    args = deepcopy(run_job_flow_args)
+    cluster_id = client.run_job_flow(**args)['JobFlowId']
+    resp = client.describe_job_flows(JobFlowIds=[cluster_id])['JobFlows'][0]
+    resp['ExecutionStatusDetail']['State'].should.equal('WAITING')
+    resp['JobFlowId'].should.equal(cluster_id)
+    resp['Name'].should.equal(args['Name'])
+    resp['Instances']['MasterInstanceType'].should.equal(args['Instances']['MasterInstanceType'])
+    resp['Instances']['SlaveInstanceType'].should.equal(args['Instances']['SlaveInstanceType'])
+    resp['LogUri'].should.equal(args['LogUri'])
+    resp['VisibleToAllUsers'].should.equal(args['VisibleToAllUsers'])
+    resp['Instances']['NormalizedInstanceHours'].should.equal(0)
+    resp.shouldnt.have.key('Steps')
+
+
+@mock_emr
+def test_run_job_flow_with_invalid_params():
+    client = boto3.client('emr', region_name='us-east-1')
+    with assert_raises(ClientError) as e:
+        # cannot set both AmiVersion and ReleaseLabel
+        args = deepcopy(run_job_flow_args)
+        args['AmiVersion'] = '2.4'
+        args['ReleaseLabel'] = 'emr-5.0.0'
+        client.run_job_flow(**args)
+    e.exception.response['Error']['Code'].should.equal('ValidationException')
+
+
+@mock_emr
+def test_run_job_flow_in_multiple_regions():
+    regions = {}
+    for region in ['us-east-1', 'eu-west-1']:
+        client = boto3.client('emr', region_name=region)
+        args = deepcopy(run_job_flow_args)
+        args['Name'] = region
+        cluster_id = client.run_job_flow(**args)['JobFlowId']
+        regions[region] = {'client': client, 'cluster_id': cluster_id}
+
+    for region in regions.keys():
+        client = regions[region]['client']
+        resp = client.describe_cluster(ClusterId=regions[region]['cluster_id'])
+        resp['Cluster']['Name'].should.equal(region)
+
+
+@mock_emr
+def test_run_job_flow_with_new_params():
+    client = boto3.client('emr', region_name='us-east-1')
+    resp = client.run_job_flow(**run_job_flow_args)
+    resp.should.have.key('JobFlowId')
+
+
+@mock_emr
+def test_run_job_flow_with_visible_to_all_users():
+    client = boto3.client('emr', region_name='us-east-1')
+    for expected in (True, False):
+        args = deepcopy(run_job_flow_args)
+        args['VisibleToAllUsers'] = expected
+        resp = client.run_job_flow(**args)
+        cluster_id = resp['JobFlowId']
+        resp = client.describe_cluster(ClusterId=cluster_id)
+        resp['Cluster']['VisibleToAllUsers'].should.equal(expected)
+
+
+@mock_emr
+def test_run_job_flow_with_instance_groups():
+    input_groups = dict((g['Name'], g) for g in input_instance_groups)
+    client = boto3.client('emr', region_name='us-east-1')
+    args = deepcopy(run_job_flow_args)
+    args['Instances'] = {'InstanceGroups': input_instance_groups}
+    cluster_id = client.run_job_flow(**args)['JobFlowId']
+    groups = client.list_instance_groups(ClusterId=cluster_id)['InstanceGroups']
+    for x in groups:
+        y = input_groups[x['Name']]
+        x.should.have.key('Id')
+        x['RequestedInstanceCount'].should.equal(y['InstanceCount'])
+        x['InstanceGroupType'].should.equal(y['InstanceRole'])
+        x['InstanceType'].should.equal(y['InstanceType'])
+        x['Market'].should.equal(y['Market'])
+        if 'BidPrice' in y:
+            x['BidPrice'].should.equal(y['BidPrice'])
+
+
+@mock_emr
+def test_set_termination_protection():
+    client = boto3.client('emr', region_name='us-east-1')
+    args = deepcopy(run_job_flow_args)
+    args['Instances']['TerminationProtected'] = False
+    resp = client.run_job_flow(**args)
+    cluster_id = resp['JobFlowId']
+    resp = client.describe_cluster(ClusterId=cluster_id)
+    resp['Cluster']['TerminationProtected'].should.equal(False)
+
+    for expected in (True, False):
+        resp = client.set_termination_protection(JobFlowIds=[cluster_id],
+                                                 TerminationProtected=expected)
+        resp = client.describe_cluster(ClusterId=cluster_id)
+        resp['Cluster']['TerminationProtected'].should.equal(expected)
+
+
+@mock_emr
+def test_set_visible_to_all_users():
+    client = boto3.client('emr', region_name='us-east-1')
+    args = deepcopy(run_job_flow_args)
+    args['VisibleToAllUsers'] = False
+    resp = client.run_job_flow(**args)
+    cluster_id = resp['JobFlowId']
+    resp = client.describe_cluster(ClusterId=cluster_id)
+    resp['Cluster']['VisibleToAllUsers'].should.equal(False)
+
+    for expected in (True, False):
+        resp = client.set_visible_to_all_users(JobFlowIds=[cluster_id],
+                                               VisibleToAllUsers=expected)
+        resp = client.describe_cluster(ClusterId=cluster_id)
+        resp['Cluster']['VisibleToAllUsers'].should.equal(expected)
+
+
+@mock_emr
+def test_terminate_job_flows():
+    client = boto3.client('emr', region_name='us-east-1')
+
+    resp = client.run_job_flow(**run_job_flow_args)
+    cluster_id = resp['JobFlowId']
+    resp = client.describe_cluster(ClusterId=cluster_id)
+    resp['Cluster']['Status']['State'].should.equal('WAITING')
+
+    resp = client.terminate_job_flows(JobFlowIds=[cluster_id])
+    resp = client.describe_cluster(ClusterId=cluster_id)
+    resp['Cluster']['Status']['State'].should.equal('TERMINATED')
+
+
+# testing multiple end points for each feature
+
+@mock_emr
+def test_bootstrap_actions():
+    bootstrap_actions = [
+        {'Name': 'bs1',
+         'ScriptBootstrapAction': {
+             'Args': ['arg1', 'arg2'],
+             'Path': 'path/to/script'}},
+        {'Name': 'bs2',
+         'ScriptBootstrapAction': {
+             'Path': 'path/to/anotherscript'}}
+    ]
+
+    client = boto3.client('emr', region_name='us-east-1')
+    args = deepcopy(run_job_flow_args)
+    args['BootstrapActions'] = bootstrap_actions
+    cluster_id = client.run_job_flow(**args)['JobFlowId']
+
+    cl = client.describe_job_flows(JobFlowIds=[cluster_id])['JobFlows'][0]
+    for x, y in zip(cl['BootstrapActions'], bootstrap_actions):
+        x['BootstrapActionConfig'].should.equal(y)
+
+    resp = client.list_bootstrap_actions(ClusterId=cluster_id)
+    for x, y in zip(resp['BootstrapActions'], bootstrap_actions):
+        x['Name'].should.equal(y['Name'])
+        if 'Args' in y['ScriptBootstrapAction']:
+            x['Args'].should.equal(y['ScriptBootstrapAction']['Args'])
+        x['ScriptPath'].should.equal(y['ScriptBootstrapAction']['Path'])
+
+
+@mock_emr
+def test_instance_groups():
+    input_groups = dict((g['Name'], g) for g in input_instance_groups)
+
+    client = boto3.client('emr', region_name='us-east-1')
+    args = deepcopy(run_job_flow_args)
+    for key in ['MasterInstanceType', 'SlaveInstanceType', 'InstanceCount']:
+        del args['Instances'][key]
+    args['Instances']['InstanceGroups'] = input_instance_groups[:2]
+    cluster_id = client.run_job_flow(**args)['JobFlowId']
+
+    jf = client.describe_job_flows(JobFlowIds=[cluster_id])['JobFlows'][0]
+    base_instance_count = jf['Instances']['InstanceCount']
+
+    client.add_instance_groups(JobFlowId=cluster_id, InstanceGroups=input_instance_groups[2:])
+
+    jf = client.describe_job_flows(JobFlowIds=[cluster_id])['JobFlows'][0]
+    jf['Instances']['InstanceCount'].should.equal(sum(g['InstanceCount'] for g in input_instance_groups))
+    for x in jf['Instances']['InstanceGroups']:
+        y = input_groups[x['Name']]
+        if hasattr(y, 'BidPrice'):
+            x['BidPrice'].should.equal('BidPrice')
+        x['CreationDateTime'].should.be.a('datetime.datetime')
+        # x['EndDateTime'].should.be.a('datetime.datetime')
+        x.should.have.key('InstanceGroupId')
+        x['InstanceRequestCount'].should.equal(y['InstanceCount'])
+        x['InstanceRole'].should.equal(y['InstanceRole'])
+        x['InstanceRunningCount'].should.equal(y['InstanceCount'])
+        x['InstanceType'].should.equal(y['InstanceType'])
+        # x['LastStateChangeReason'].should.equal(y['LastStateChangeReason'])
+        x['Market'].should.equal(y['Market'])
+        x['Name'].should.equal(y['Name'])
+        x['ReadyDateTime'].should.be.a('datetime.datetime')
+        x['StartDateTime'].should.be.a('datetime.datetime')
+        x['State'].should.equal('RUNNING')
+
+    groups = client.list_instance_groups(ClusterId=cluster_id)['InstanceGroups']
+    for x in groups:
+        y = input_groups[x['Name']]
+        if hasattr(y, 'BidPrice'):
+            x['BidPrice'].should.equal('BidPrice')
+        # Configurations
+        # EbsBlockDevices
+        # EbsOptimized
+        x.should.have.key('Id')
+        x['InstanceGroupType'].should.equal(y['InstanceRole'])
+        x['InstanceType'].should.equal(y['InstanceType'])
+        x['Market'].should.equal(y['Market'])
+        x['Name'].should.equal(y['Name'])
+        x['RequestedInstanceCount'].should.equal(y['InstanceCount'])
+        x['RunningInstanceCount'].should.equal(y['InstanceCount'])
+        # ShrinkPolicy
+        x['Status']['State'].should.equal('RUNNING')
+        x['Status']['StateChangeReason']['Code'].should.be.a(six.string_types)
+        # x['Status']['StateChangeReason']['Message'].should.be.a(six.string_types)
+        x['Status']['Timeline']['CreationDateTime'].should.be.a('datetime.datetime')
+        # x['Status']['Timeline']['EndDateTime'].should.be.a('datetime.datetime')
+        x['Status']['Timeline']['ReadyDateTime'].should.be.a('datetime.datetime')
+
+    igs = dict((g['Name'], g) for g in groups)
+    client.modify_instance_groups(
+        InstanceGroups=[
+            {'InstanceGroupId': igs['task-1']['Id'],
+             'InstanceCount': 2},
+            {'InstanceGroupId': igs['task-2']['Id'],
+             'InstanceCount': 3}])
+    jf = client.describe_job_flows(JobFlowIds=[cluster_id])['JobFlows'][0]
+    jf['Instances']['InstanceCount'].should.equal(base_instance_count + 5)
+    igs = dict((g['Name'], g) for g in jf['Instances']['InstanceGroups'])
+    igs['task-1']['InstanceRunningCount'].should.equal(2)
+    igs['task-2']['InstanceRunningCount'].should.equal(3)
+
+
+@mock_emr
+def test_steps():
+    input_steps = [{
+        'HadoopJarStep': {
+            'Args': [
+                'hadoop-streaming',
+                '-files', 's3://elasticmapreduce/samples/wordcount/wordSplitter.py#wordSplitter.py',
+                '-mapper', 'python wordSplitter.py',
+                '-input', 's3://elasticmapreduce/samples/wordcount/input',
+                '-output', 's3://output_bucket/output/wordcount_output',
+                '-reducer', 'aggregate'
+            ],
+            'Jar': 'command-runner.jar',
+        },
+        'Name': 'My wordcount example',
+    }, {
+        'HadoopJarStep': {
+            'Args': [
+                'hadoop-streaming',
+                '-files', 's3://elasticmapreduce/samples/wordcount/wordSplitter2.py#wordSplitter2.py',
+                '-mapper', 'python wordSplitter2.py',
+                '-input', 's3://elasticmapreduce/samples/wordcount/input2',
+                '-output', 's3://output_bucket/output/wordcount_output2',
+                '-reducer', 'aggregate'
+            ],
+            'Jar': 'command-runner.jar',
+        },
+        'Name': 'My wordcount example2',
+    }]
+
+    # TODO: implementation and test for cancel_steps
+
+    client = boto3.client('emr', region_name='us-east-1')
+    args = deepcopy(run_job_flow_args)
+    args['Steps'] = [input_steps[0]]
+    cluster_id = client.run_job_flow(**args)['JobFlowId']
+
+    jf = client.describe_job_flows(JobFlowIds=[cluster_id])['JobFlows'][0]
+    jf['Steps'].should.have.length_of(1)
+
+    client.add_job_flow_steps(JobFlowId=cluster_id, Steps=[input_steps[1]])
+
+    jf = client.describe_job_flows(JobFlowIds=[cluster_id])['JobFlows'][0]
+    jf['Steps'].should.have.length_of(2)
+    for idx, (x, y) in enumerate(zip(jf['Steps'], input_steps)):
+        x['ExecutionStatusDetail'].should.have.key('CreationDateTime')
+        # x['ExecutionStatusDetail'].should.have.key('EndDateTime')
+        # x['ExecutionStatusDetail'].should.have.key('LastStateChangeReason')
+        # x['ExecutionStatusDetail'].should.have.key('StartDateTime')
+        x['ExecutionStatusDetail']['State'].should.equal('STARTING' if idx == 0 else 'PENDING')
+        x['StepConfig']['ActionOnFailure'].should.equal('TERMINATE_CLUSTER')
+        x['StepConfig']['HadoopJarStep']['Args'].should.equal(y['HadoopJarStep']['Args'])
+        x['StepConfig']['HadoopJarStep']['Jar'].should.equal(y['HadoopJarStep']['Jar'])
+        if 'MainClass' in y['HadoopJarStep']:
+            x['StepConfig']['HadoopJarStep']['MainClass'].should.equal(y['HadoopJarStep']['MainClass'])
+        if 'Properties' in y['HadoopJarStep']:
+            x['StepConfig']['HadoopJarStep']['Properties'].should.equal(y['HadoopJarStep']['Properties'])
+        x['StepConfig']['Name'].should.equal(y['Name'])
+
+    expected = dict((s['Name'], s) for s in input_steps)
+
+    steps = client.list_steps(ClusterId=cluster_id)['Steps']
+    steps.should.have.length_of(2)
+    for x in steps:
+        y = expected[x['Name']]
+        x['ActionOnFailure'].should.equal('TERMINATE_CLUSTER')
+        x['Config']['Args'].should.equal(y['HadoopJarStep']['Args'])
+        x['Config']['Jar'].should.equal(y['HadoopJarStep']['Jar'])
+        # x['Config']['MainClass'].should.equal(y['HadoopJarStep']['MainClass'])
+        # Properties
+        x['Id'].should.be.a(six.string_types)
+        x['Name'].should.equal(y['Name'])
+        x['Status']['State'].should.be.within(['STARTING', 'PENDING'])
+        # StateChangeReason
+        x['Status']['Timeline']['CreationDateTime'].should.be.a('datetime.datetime')
+        # x['Status']['Timeline']['EndDateTime'].should.be.a('datetime.datetime')
+        # x['Status']['Timeline']['StartDateTime'].should.be.a('datetime.datetime')
+
+        x = client.describe_step(ClusterId=cluster_id, StepId=x['Id'])['Step']
+        x['ActionOnFailure'].should.equal('TERMINATE_CLUSTER')
+        x['Config']['Args'].should.equal(y['HadoopJarStep']['Args'])
+        x['Config']['Jar'].should.equal(y['HadoopJarStep']['Jar'])
+        # x['Config']['MainClass'].should.equal(y['HadoopJarStep']['MainClass'])
+        # Properties
+        x['Id'].should.be.a(six.string_types)
+        x['Name'].should.equal(y['Name'])
+        x['Status']['State'].should.be.within(['STARTING', 'PENDING'])
+        # StateChangeReason
+        x['Status']['Timeline']['CreationDateTime'].should.be.a('datetime.datetime')
+        # x['Status']['Timeline']['EndDateTime'].should.be.a('datetime.datetime')
+        # x['Status']['Timeline']['StartDateTime'].should.be.a('datetime.datetime')
+
+
+@mock_emr
+def test_tags():
+    input_tags = [{'Key': 'newkey1', 'Value': 'newval1'},
+                  {'Key': 'newkey2', 'Value': 'newval2'}]
+
+    client = boto3.client('emr', region_name='us-east-1')
+    cluster_id = client.run_job_flow(**run_job_flow_args)['JobFlowId']
+
+    client.add_tags(ResourceId=cluster_id, Tags=input_tags)
+    resp = client.describe_cluster(ClusterId=cluster_id)['Cluster']
+    resp['Tags'].should.have.length_of(2)
+    dict((t['Key'], t['Value']) for t in resp['Tags']).should.equal(dict((t['Key'], t['Value']) for t in input_tags))
+
+    client.remove_tags(ResourceId=cluster_id, TagKeys=[t['Key'] for t in input_tags])
+    resp = client.describe_cluster(ClusterId=cluster_id)['Cluster']
+    resp.shouldnt.have.key('Tags')