Implement filters and pagers for some EMR end points
This commit is contained in:
parent
2e33e2be90
commit
484faa54c4
6 changed files with 342 additions and 115 deletions
|
|
@ -1,6 +1,9 @@
|
|||
from __future__ import unicode_literals
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
import boto
|
||||
import pytz
|
||||
from boto.emr.bootstrap_action import BootstrapAction
|
||||
from boto.emr.instance_group import InstanceGroup
|
||||
from boto.emr.step import StreamingStep
|
||||
|
|
@ -104,18 +107,53 @@ def test_describe_cluster():
|
|||
@mock_emr
|
||||
def test_describe_jobflows():
|
||||
conn = boto.connect_emr()
|
||||
job1_id = conn.run_jobflow(**run_jobflow_args)
|
||||
job2_id = conn.run_jobflow(**run_jobflow_args)
|
||||
args = run_jobflow_args.copy()
|
||||
expected = {}
|
||||
|
||||
for idx in range(400):
|
||||
cluster_name = 'cluster' + str(idx)
|
||||
args['name'] = cluster_name
|
||||
cluster_id = conn.run_jobflow(**args)
|
||||
expected[cluster_id] = {
|
||||
'id': cluster_id,
|
||||
'name': cluster_name,
|
||||
'state': 'WAITING'
|
||||
}
|
||||
|
||||
# need sleep since it appears the timestamp is always rounded to
|
||||
# the nearest second internally
|
||||
time.sleep(1)
|
||||
timestamp = datetime.now(pytz.utc)
|
||||
time.sleep(1)
|
||||
|
||||
for idx in range(400, 600):
|
||||
cluster_name = 'cluster' + str(idx)
|
||||
args['name'] = cluster_name
|
||||
cluster_id = conn.run_jobflow(**args)
|
||||
conn.terminate_jobflow(cluster_id)
|
||||
expected[cluster_id] = {
|
||||
'id': cluster_id,
|
||||
'name': cluster_name,
|
||||
'state': 'TERMINATED'
|
||||
}
|
||||
jobs = conn.describe_jobflows()
|
||||
jobs.should.have.length_of(2)
|
||||
jobs.should.have.length_of(512)
|
||||
|
||||
jobs = conn.describe_jobflows(jobflow_ids=[job2_id])
|
||||
jobs.should.have.length_of(1)
|
||||
jobs[0].jobflowid.should.equal(job2_id)
|
||||
for cluster_id, y in expected.items():
|
||||
resp = conn.describe_jobflows(jobflow_ids=[cluster_id])
|
||||
resp.should.have.length_of(1)
|
||||
resp[0].jobflowid.should.equal(cluster_id)
|
||||
|
||||
first_job = conn.describe_jobflow(job1_id)
|
||||
first_job.jobflowid.should.equal(job1_id)
|
||||
resp = conn.describe_jobflows(states=['WAITING'])
|
||||
resp.should.have.length_of(400)
|
||||
for x in resp:
|
||||
x.state.should.equal('WAITING')
|
||||
|
||||
resp = conn.describe_jobflows(created_before=timestamp)
|
||||
resp.should.have.length_of(400)
|
||||
|
||||
resp = conn.describe_jobflows(created_after=timestamp)
|
||||
resp.should.have.length_of(200)
|
||||
|
||||
|
||||
@mock_emr
|
||||
|
|
@ -204,43 +242,69 @@ def test_describe_jobflow():
|
|||
@mock_emr
|
||||
def test_list_clusters():
|
||||
conn = boto.connect_emr()
|
||||
|
||||
args = run_jobflow_args.copy()
|
||||
args['name'] = 'jobflow1'
|
||||
cluster1_id = conn.run_jobflow(**args)
|
||||
args['name'] = 'jobflow2'
|
||||
cluster2_id = conn.run_jobflow(**args)
|
||||
conn.terminate_jobflow(cluster2_id)
|
||||
expected = {}
|
||||
|
||||
summary = conn.list_clusters()
|
||||
clusters = summary.clusters
|
||||
clusters.should.have.length_of(2)
|
||||
for idx in range(40):
|
||||
cluster_name = 'jobflow' + str(idx)
|
||||
args['name'] = cluster_name
|
||||
cluster_id = conn.run_jobflow(**args)
|
||||
expected[cluster_id] = {
|
||||
'id': cluster_id,
|
||||
'name': cluster_name,
|
||||
'normalizedinstancehours': '0',
|
||||
'state': 'WAITING'
|
||||
}
|
||||
|
||||
expected = {
|
||||
cluster1_id: {
|
||||
'id': cluster1_id,
|
||||
'name': 'jobflow1',
|
||||
'normalizedinstancehours': 0,
|
||||
'state': 'WAITING'},
|
||||
cluster2_id: {
|
||||
'id': cluster2_id,
|
||||
'name': 'jobflow2',
|
||||
'normalizedinstancehours': 0,
|
||||
'state': 'TERMINATED'},
|
||||
}
|
||||
# need sleep since it appears the timestamp is always rounded to
|
||||
# the nearest second internally
|
||||
time.sleep(1)
|
||||
timestamp = datetime.now(pytz.utc)
|
||||
time.sleep(1)
|
||||
|
||||
for x in clusters:
|
||||
y = expected[x.id]
|
||||
x.id.should.equal(y['id'])
|
||||
x.name.should.equal(y['name'])
|
||||
int(x.normalizedinstancehours).should.equal(y['normalizedinstancehours'])
|
||||
x.status.state.should.equal(y['state'])
|
||||
x.status.timeline.creationdatetime.should.be.a(six.string_types)
|
||||
if y['state'] == 'TERMINATED':
|
||||
x.status.timeline.enddatetime.should.be.a(six.string_types)
|
||||
else:
|
||||
x.status.timeline.shouldnt.have.property('enddatetime')
|
||||
x.status.timeline.readydatetime.should.be.a(six.string_types)
|
||||
for idx in range(40, 70):
|
||||
cluster_name = 'jobflow' + str(idx)
|
||||
args['name'] = cluster_name
|
||||
cluster_id = conn.run_jobflow(**args)
|
||||
conn.terminate_jobflow(cluster_id)
|
||||
expected[cluster_id] = {
|
||||
'id': cluster_id,
|
||||
'name': cluster_name,
|
||||
'normalizedinstancehours': '0',
|
||||
'state': 'TERMINATED'
|
||||
}
|
||||
|
||||
args = {}
|
||||
while 1:
|
||||
resp = conn.list_clusters(**args)
|
||||
clusters = resp.clusters
|
||||
len(clusters).should.be.lower_than_or_equal_to(50)
|
||||
for x in clusters:
|
||||
y = expected[x.id]
|
||||
x.id.should.equal(y['id'])
|
||||
x.name.should.equal(y['name'])
|
||||
x.normalizedinstancehours.should.equal(y['normalizedinstancehours'])
|
||||
x.status.state.should.equal(y['state'])
|
||||
x.status.timeline.creationdatetime.should.be.a(six.string_types)
|
||||
if y['state'] == 'TERMINATED':
|
||||
x.status.timeline.enddatetime.should.be.a(six.string_types)
|
||||
else:
|
||||
x.status.timeline.shouldnt.have.property('enddatetime')
|
||||
x.status.timeline.readydatetime.should.be.a(six.string_types)
|
||||
if not hasattr(resp, 'marker'):
|
||||
break
|
||||
args = {'marker': resp.marker}
|
||||
|
||||
resp = conn.list_clusters(cluster_states=['TERMINATED'])
|
||||
resp.clusters.should.have.length_of(30)
|
||||
for x in resp.clusters:
|
||||
x.status.state.should.equal('TERMINATED')
|
||||
|
||||
resp = conn.list_clusters(created_before=timestamp)
|
||||
resp.clusters.should.have.length_of(40)
|
||||
|
||||
resp = conn.list_clusters(created_after=timestamp)
|
||||
resp.clusters.should.have.length_of(30)
|
||||
|
||||
|
||||
@mock_emr
|
||||
|
|
@ -516,7 +580,8 @@ def test_steps():
|
|||
|
||||
expected = dict((s.name, s) for s in input_steps)
|
||||
|
||||
for x in conn.list_steps(cluster_id).steps:
|
||||
steps = conn.list_steps(cluster_id).steps
|
||||
for x in steps:
|
||||
y = expected[x.name]
|
||||
# actiononfailure
|
||||
list(arg.value for arg in x.config.args).should.equal([
|
||||
|
|
@ -554,6 +619,17 @@ def test_steps():
|
|||
# x.status.timeline.enddatetime.should.be.a(six.string_types)
|
||||
# x.status.timeline.startdatetime.should.be.a(six.string_types)
|
||||
|
||||
@requires_boto_gte('2.39')
|
||||
def test_list_steps_with_states():
|
||||
# boto's list_steps prior to 2.39 has a bug that ignores
|
||||
# step_states argument.
|
||||
steps = conn.list_steps(cluster_id).steps
|
||||
step_id = steps[0].id
|
||||
steps = conn.list_steps(cluster_id, step_states=['STARTING']).steps
|
||||
steps.should.have.length_of(1)
|
||||
steps[0].id.should.equal(step_id)
|
||||
test_list_steps_with_states()
|
||||
|
||||
|
||||
@mock_emr
|
||||
def test_tags():
|
||||
|
|
|
|||
|
|
@ -1,8 +1,11 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
import time
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
|
||||
import boto3
|
||||
import pytz
|
||||
import six
|
||||
import sure # noqa
|
||||
from botocore.exceptions import ClientError
|
||||
|
|
@ -121,19 +124,54 @@ def test_describe_cluster():
|
|||
@mock_emr
|
||||
def test_describe_job_flows():
|
||||
client = boto3.client('emr', region_name='us-east-1')
|
||||
cluster1_id = client.run_job_flow(**run_job_flow_args)['JobFlowId']
|
||||
cluster2_id = client.run_job_flow(**run_job_flow_args)['JobFlowId']
|
||||
args = deepcopy(run_job_flow_args)
|
||||
expected = {}
|
||||
|
||||
for idx in range(400):
|
||||
cluster_name = 'cluster' + str(idx)
|
||||
args['Name'] = cluster_name
|
||||
cluster_id = client.run_job_flow(**args)['JobFlowId']
|
||||
expected[cluster_id] = {
|
||||
'Id': cluster_id,
|
||||
'Name': cluster_name,
|
||||
'State': 'WAITING'
|
||||
}
|
||||
|
||||
# need sleep since it appears the timestamp is always rounded to
|
||||
# the nearest second internally
|
||||
time.sleep(1)
|
||||
timestamp = datetime.now(pytz.utc)
|
||||
time.sleep(1)
|
||||
|
||||
for idx in range(400, 600):
|
||||
cluster_name = 'cluster' + str(idx)
|
||||
args['Name'] = cluster_name
|
||||
cluster_id = client.run_job_flow(**args)['JobFlowId']
|
||||
client.terminate_job_flows(JobFlowIds=[cluster_id])
|
||||
expected[cluster_id] = {
|
||||
'Id': cluster_id,
|
||||
'Name': cluster_name,
|
||||
'State': 'TERMINATED'
|
||||
}
|
||||
|
||||
resp = client.describe_job_flows()
|
||||
resp['JobFlows'].should.have.length_of(2)
|
||||
resp['JobFlows'].should.have.length_of(512)
|
||||
|
||||
resp = client.describe_job_flows(JobFlowIds=[cluster2_id])
|
||||
resp['JobFlows'].should.have.length_of(1)
|
||||
resp['JobFlows'][0]['JobFlowId'].should.equal(cluster2_id)
|
||||
for cluster_id, y in expected.items():
|
||||
resp = client.describe_job_flows(JobFlowIds=[cluster_id])
|
||||
resp['JobFlows'].should.have.length_of(1)
|
||||
resp['JobFlows'][0]['JobFlowId'].should.equal(cluster_id)
|
||||
|
||||
resp = client.describe_job_flows(JobFlowIds=[cluster1_id])
|
||||
resp['JobFlows'].should.have.length_of(1)
|
||||
resp['JobFlows'][0]['JobFlowId'].should.equal(cluster1_id)
|
||||
resp = client.describe_job_flows(JobFlowStates=['WAITING'])
|
||||
resp['JobFlows'].should.have.length_of(400)
|
||||
for x in resp['JobFlows']:
|
||||
x['ExecutionStatusDetail']['State'].should.equal('WAITING')
|
||||
|
||||
resp = client.describe_job_flows(CreatedBefore=timestamp)
|
||||
resp['JobFlows'].should.have.length_of(400)
|
||||
|
||||
resp = client.describe_job_flows(CreatedAfter=timestamp)
|
||||
resp['JobFlows'].should.have.length_of(200)
|
||||
|
||||
|
||||
@mock_emr
|
||||
|
|
@ -203,41 +241,69 @@ def test_describe_job_flow():
|
|||
def test_list_clusters():
|
||||
client = boto3.client('emr', region_name='us-east-1')
|
||||
args = deepcopy(run_job_flow_args)
|
||||
args['Name'] = 'jobflow1'
|
||||
cluster1_id = client.run_job_flow(**args)['JobFlowId']
|
||||
args['Name'] = 'jobflow2'
|
||||
cluster2_id = client.run_job_flow(**args)['JobFlowId']
|
||||
client.terminate_job_flows(JobFlowIds=[cluster2_id])
|
||||
expected = {}
|
||||
|
||||
summary = client.list_clusters()
|
||||
clusters = summary['Clusters']
|
||||
clusters.should.have.length_of(2)
|
||||
|
||||
expected = {
|
||||
cluster1_id: {
|
||||
'Id': cluster1_id,
|
||||
'Name': 'jobflow1',
|
||||
for idx in range(40):
|
||||
cluster_name = 'jobflow' + str(idx)
|
||||
args['Name'] = cluster_name
|
||||
cluster_id = client.run_job_flow(**args)['JobFlowId']
|
||||
expected[cluster_id] = {
|
||||
'Id': cluster_id,
|
||||
'Name': cluster_name,
|
||||
'NormalizedInstanceHours': 0,
|
||||
'State': 'WAITING'},
|
||||
cluster2_id: {
|
||||
'Id': cluster2_id,
|
||||
'Name': 'jobflow2',
|
||||
'NormalizedInstanceHours': 0,
|
||||
'State': 'TERMINATED'},
|
||||
}
|
||||
'State': 'WAITING'
|
||||
}
|
||||
|
||||
for x in clusters:
|
||||
y = expected[x['Id']]
|
||||
x['Id'].should.equal(y['Id'])
|
||||
x['Name'].should.equal(y['Name'])
|
||||
x['NormalizedInstanceHours'].should.equal(y['NormalizedInstanceHours'])
|
||||
x['Status']['State'].should.equal(y['State'])
|
||||
x['Status']['Timeline']['CreationDateTime'].should.be.a('datetime.datetime')
|
||||
if y['State'] == 'TERMINATED':
|
||||
x['Status']['Timeline']['EndDateTime'].should.be.a('datetime.datetime')
|
||||
else:
|
||||
x['Status']['Timeline'].shouldnt.have.key('EndDateTime')
|
||||
x['Status']['Timeline']['ReadyDateTime'].should.be.a('datetime.datetime')
|
||||
# need sleep since it appears the timestamp is always rounded to
|
||||
# the nearest second internally
|
||||
time.sleep(1)
|
||||
timestamp = datetime.now(pytz.utc)
|
||||
time.sleep(1)
|
||||
|
||||
for idx in range(40, 70):
|
||||
cluster_name = 'jobflow' + str(idx)
|
||||
args['Name'] = cluster_name
|
||||
cluster_id = client.run_job_flow(**args)['JobFlowId']
|
||||
client.terminate_job_flows(JobFlowIds=[cluster_id])
|
||||
expected[cluster_id] = {
|
||||
'Id': cluster_id,
|
||||
'Name': cluster_name,
|
||||
'NormalizedInstanceHours': 0,
|
||||
'State': 'TERMINATED'
|
||||
}
|
||||
|
||||
args = {}
|
||||
while 1:
|
||||
resp = client.list_clusters(**args)
|
||||
clusters = resp['Clusters']
|
||||
len(clusters).should.be.lower_than_or_equal_to(50)
|
||||
for x in clusters:
|
||||
y = expected[x['Id']]
|
||||
x['Id'].should.equal(y['Id'])
|
||||
x['Name'].should.equal(y['Name'])
|
||||
x['NormalizedInstanceHours'].should.equal(y['NormalizedInstanceHours'])
|
||||
x['Status']['State'].should.equal(y['State'])
|
||||
x['Status']['Timeline']['CreationDateTime'].should.be.a('datetime.datetime')
|
||||
if y['State'] == 'TERMINATED':
|
||||
x['Status']['Timeline']['EndDateTime'].should.be.a('datetime.datetime')
|
||||
else:
|
||||
x['Status']['Timeline'].shouldnt.have.key('EndDateTime')
|
||||
x['Status']['Timeline']['ReadyDateTime'].should.be.a('datetime.datetime')
|
||||
marker = resp.get('Marker')
|
||||
if marker is None:
|
||||
break
|
||||
args = {'Marker': marker}
|
||||
|
||||
resp = client.list_clusters(ClusterStates=['TERMINATED'])
|
||||
resp['Clusters'].should.have.length_of(30)
|
||||
for x in resp['Clusters']:
|
||||
x['Status']['State'].should.equal('TERMINATED')
|
||||
|
||||
resp = client.list_clusters(CreatedBefore=timestamp)
|
||||
resp['Clusters'].should.have.length_of(40)
|
||||
|
||||
resp = client.list_clusters(CreatedAfter=timestamp)
|
||||
resp['Clusters'].should.have.length_of(30)
|
||||
|
||||
|
||||
@mock_emr
|
||||
|
|
@ -567,6 +633,15 @@ def test_steps():
|
|||
# x['Status']['Timeline']['EndDateTime'].should.be.a('datetime.datetime')
|
||||
# x['Status']['Timeline']['StartDateTime'].should.be.a('datetime.datetime')
|
||||
|
||||
step_id = steps[0]['Id']
|
||||
steps = client.list_steps(ClusterId=cluster_id, StepIds=[step_id])['Steps']
|
||||
steps.should.have.length_of(1)
|
||||
steps[0]['Id'].should.equal(step_id)
|
||||
|
||||
steps = client.list_steps(ClusterId=cluster_id, StepStates=['STARTING'])['Steps']
|
||||
steps.should.have.length_of(1)
|
||||
steps[0]['Id'].should.equal(step_id)
|
||||
|
||||
|
||||
@mock_emr
|
||||
def test_tags():
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue