Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Stephan Huber 2019-12-23 08:38:53 +01:00
commit 0527e88d46
541 changed files with 75504 additions and 51429 deletions

View file

@ -1,56 +1,55 @@
from __future__ import unicode_literals
TABLE_INPUT = {
'Owner': 'a_fake_owner',
'Parameters': {
'EXTERNAL': 'TRUE',
},
'Retention': 0,
'StorageDescriptor': {
'BucketColumns': [],
'Compressed': False,
'InputFormat': 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat',
'NumberOfBuckets': -1,
'OutputFormat': 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat',
'Parameters': {},
'SerdeInfo': {
'Parameters': {
'serialization.format': '1'
},
'SerializationLibrary': 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
},
'SkewedInfo': {
'SkewedColumnNames': [],
'SkewedColumnValueLocationMaps': {},
'SkewedColumnValues': []
},
'SortColumns': [],
'StoredAsSubDirectories': False
},
'TableType': 'EXTERNAL_TABLE',
}
PARTITION_INPUT = {
# 'DatabaseName': 'dbname',
'StorageDescriptor': {
'BucketColumns': [],
'Columns': [],
'Compressed': False,
'InputFormat': 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat',
'Location': 's3://.../partition=value',
'NumberOfBuckets': -1,
'OutputFormat': 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat',
'Parameters': {},
'SerdeInfo': {
'Parameters': {'path': 's3://...', 'serialization.format': '1'},
'SerializationLibrary': 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'},
'SkewedInfo': {'SkewedColumnNames': [],
'SkewedColumnValueLocationMaps': {},
'SkewedColumnValues': []},
'SortColumns': [],
'StoredAsSubDirectories': False,
},
# 'TableName': 'source_table',
# 'Values': ['2018-06-26'],
}
from __future__ import unicode_literals
TABLE_INPUT = {
"Owner": "a_fake_owner",
"Parameters": {"EXTERNAL": "TRUE"},
"Retention": 0,
"StorageDescriptor": {
"BucketColumns": [],
"Compressed": False,
"InputFormat": "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
"NumberOfBuckets": -1,
"OutputFormat": "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat",
"Parameters": {},
"SerdeInfo": {
"Parameters": {"serialization.format": "1"},
"SerializationLibrary": "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
},
"SkewedInfo": {
"SkewedColumnNames": [],
"SkewedColumnValueLocationMaps": {},
"SkewedColumnValues": [],
},
"SortColumns": [],
"StoredAsSubDirectories": False,
},
"TableType": "EXTERNAL_TABLE",
}
PARTITION_INPUT = {
# 'DatabaseName': 'dbname',
"StorageDescriptor": {
"BucketColumns": [],
"Columns": [],
"Compressed": False,
"InputFormat": "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
"Location": "s3://.../partition=value",
"NumberOfBuckets": -1,
"OutputFormat": "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat",
"Parameters": {},
"SerdeInfo": {
"Parameters": {"path": "s3://...", "serialization.format": "1"},
"SerializationLibrary": "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
},
"SkewedInfo": {
"SkewedColumnNames": [],
"SkewedColumnValueLocationMaps": {},
"SkewedColumnValues": [],
},
"SortColumns": [],
"StoredAsSubDirectories": False,
},
# 'TableName': 'source_table',
# 'Values': ['2018-06-26'],
}

View file

@ -6,11 +6,7 @@ from .fixtures.datacatalog import TABLE_INPUT, PARTITION_INPUT
def create_database(client, database_name):
return client.create_database(
DatabaseInput={
'Name': database_name
}
)
return client.create_database(DatabaseInput={"Name": database_name})
def get_database(client, database_name):
@ -19,12 +15,13 @@ def get_database(client, database_name):
def create_table_input(database_name, table_name, columns=[], partition_keys=[]):
table_input = copy.deepcopy(TABLE_INPUT)
table_input['Name'] = table_name
table_input['PartitionKeys'] = partition_keys
table_input['StorageDescriptor']['Columns'] = columns
table_input['StorageDescriptor']['Location'] = 's3://my-bucket/{database_name}/{table_name}'.format(
database_name=database_name,
table_name=table_name
table_input["Name"] = table_name
table_input["PartitionKeys"] = partition_keys
table_input["StorageDescriptor"]["Columns"] = columns
table_input["StorageDescriptor"][
"Location"
] = "s3://my-bucket/{database_name}/{table_name}".format(
database_name=database_name, table_name=table_name
)
return table_input
@ -33,60 +30,43 @@ def create_table(client, database_name, table_name, table_input=None, **kwargs):
if table_input is None:
table_input = create_table_input(database_name, table_name, **kwargs)
return client.create_table(
DatabaseName=database_name,
TableInput=table_input
)
return client.create_table(DatabaseName=database_name, TableInput=table_input)
def update_table(client, database_name, table_name, table_input=None, **kwargs):
if table_input is None:
table_input = create_table_input(database_name, table_name, **kwargs)
return client.update_table(
DatabaseName=database_name,
TableInput=table_input,
)
return client.update_table(DatabaseName=database_name, TableInput=table_input)
def get_table(client, database_name, table_name):
return client.get_table(
DatabaseName=database_name,
Name=table_name
)
return client.get_table(DatabaseName=database_name, Name=table_name)
def get_tables(client, database_name):
return client.get_tables(
DatabaseName=database_name
)
return client.get_tables(DatabaseName=database_name)
def get_table_versions(client, database_name, table_name):
return client.get_table_versions(
DatabaseName=database_name,
TableName=table_name
)
return client.get_table_versions(DatabaseName=database_name, TableName=table_name)
def get_table_version(client, database_name, table_name, version_id):
return client.get_table_version(
DatabaseName=database_name,
TableName=table_name,
VersionId=version_id,
DatabaseName=database_name, TableName=table_name, VersionId=version_id
)
def create_partition_input(database_name, table_name, values=[], columns=[]):
root_path = 's3://my-bucket/{database_name}/{table_name}'.format(
database_name=database_name,
table_name=table_name
root_path = "s3://my-bucket/{database_name}/{table_name}".format(
database_name=database_name, table_name=table_name
)
part_input = copy.deepcopy(PARTITION_INPUT)
part_input['Values'] = values
part_input['StorageDescriptor']['Columns'] = columns
part_input['StorageDescriptor']['SerdeInfo']['Parameters']['path'] = root_path
part_input["Values"] = values
part_input["StorageDescriptor"]["Columns"] = columns
part_input["StorageDescriptor"]["SerdeInfo"]["Parameters"]["path"] = root_path
return part_input
@ -94,13 +74,13 @@ def create_partition(client, database_name, table_name, partiton_input=None, **k
if partiton_input is None:
partiton_input = create_partition_input(database_name, table_name, **kwargs)
return client.create_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionInput=partiton_input
DatabaseName=database_name, TableName=table_name, PartitionInput=partiton_input
)
def update_partition(client, database_name, table_name, old_values=[], partiton_input=None, **kwargs):
def update_partition(
client, database_name, table_name, old_values=[], partiton_input=None, **kwargs
):
if partiton_input is None:
partiton_input = create_partition_input(database_name, table_name, **kwargs)
return client.update_partition(
@ -113,7 +93,5 @@ def update_partition(client, database_name, table_name, old_values=[], partiton_
def get_partition(client, database_name, table_name, values):
return client.get_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionValues=values,
DatabaseName=database_name, TableName=table_name, PartitionValues=values
)

View file

@ -16,80 +16,82 @@ from . import helpers
@mock_glue
def test_create_database():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
helpers.create_database(client, database_name)
response = helpers.get_database(client, database_name)
database = response['Database']
database = response["Database"]
database.should.equal({'Name': database_name})
database.should.equal({"Name": database_name})
@mock_glue
def test_create_database_already_exists():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'cantcreatethisdatabasetwice'
client = boto3.client("glue", region_name="us-east-1")
database_name = "cantcreatethisdatabasetwice"
helpers.create_database(client, database_name)
with assert_raises(ClientError) as exc:
helpers.create_database(client, database_name)
exc.exception.response['Error']['Code'].should.equal('AlreadyExistsException')
exc.exception.response["Error"]["Code"].should.equal("AlreadyExistsException")
@mock_glue
def test_get_database_not_exits():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'nosuchdatabase'
client = boto3.client("glue", region_name="us-east-1")
database_name = "nosuchdatabase"
with assert_raises(ClientError) as exc:
helpers.get_database(client, database_name)
exc.exception.response['Error']['Code'].should.equal('EntityNotFoundException')
exc.exception.response['Error']['Message'].should.match('Database nosuchdatabase not found')
exc.exception.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.exception.response["Error"]["Message"].should.match(
"Database nosuchdatabase not found"
)
@mock_glue
def test_create_table():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
helpers.create_database(client, database_name)
table_name = 'myspecialtable'
table_name = "myspecialtable"
table_input = helpers.create_table_input(database_name, table_name)
helpers.create_table(client, database_name, table_name, table_input)
response = helpers.get_table(client, database_name, table_name)
table = response['Table']
table = response["Table"]
table['Name'].should.equal(table_input['Name'])
table['StorageDescriptor'].should.equal(table_input['StorageDescriptor'])
table['PartitionKeys'].should.equal(table_input['PartitionKeys'])
table["Name"].should.equal(table_input["Name"])
table["StorageDescriptor"].should.equal(table_input["StorageDescriptor"])
table["PartitionKeys"].should.equal(table_input["PartitionKeys"])
@mock_glue
def test_create_table_already_exists():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
helpers.create_database(client, database_name)
table_name = 'cantcreatethistabletwice'
table_name = "cantcreatethistabletwice"
helpers.create_table(client, database_name, table_name)
with assert_raises(ClientError) as exc:
helpers.create_table(client, database_name, table_name)
exc.exception.response['Error']['Code'].should.equal('AlreadyExistsException')
exc.exception.response["Error"]["Code"].should.equal("AlreadyExistsException")
@mock_glue
def test_get_tables():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
helpers.create_database(client, database_name)
table_names = ['myfirsttable', 'mysecondtable', 'mythirdtable']
table_names = ["myfirsttable", "mysecondtable", "mythirdtable"]
table_inputs = {}
for table_name in table_names:
@ -99,31 +101,33 @@ def test_get_tables():
response = helpers.get_tables(client, database_name)
tables = response['TableList']
tables = response["TableList"]
tables.should.have.length_of(3)
for table in tables:
table_name = table['Name']
table_name.should.equal(table_inputs[table_name]['Name'])
table['StorageDescriptor'].should.equal(table_inputs[table_name]['StorageDescriptor'])
table['PartitionKeys'].should.equal(table_inputs[table_name]['PartitionKeys'])
table_name = table["Name"]
table_name.should.equal(table_inputs[table_name]["Name"])
table["StorageDescriptor"].should.equal(
table_inputs[table_name]["StorageDescriptor"]
)
table["PartitionKeys"].should.equal(table_inputs[table_name]["PartitionKeys"])
@mock_glue
def test_get_table_versions():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
helpers.create_database(client, database_name)
table_name = 'myfirsttable'
table_name = "myfirsttable"
version_inputs = {}
table_input = helpers.create_table_input(database_name, table_name)
helpers.create_table(client, database_name, table_name, table_input)
version_inputs["1"] = table_input
columns = [{'Name': 'country', 'Type': 'string'}]
columns = [{"Name": "country", "Type": "string"}]
table_input = helpers.create_table_input(database_name, table_name, columns=columns)
helpers.update_table(client, database_name, table_name, table_input)
version_inputs["2"] = table_input
@ -134,174 +138,189 @@ def test_get_table_versions():
response = helpers.get_table_versions(client, database_name, table_name)
vers = response['TableVersions']
vers = response["TableVersions"]
vers.should.have.length_of(3)
vers[0]['Table']['StorageDescriptor']['Columns'].should.equal([])
vers[-1]['Table']['StorageDescriptor']['Columns'].should.equal(columns)
vers[0]["Table"]["StorageDescriptor"]["Columns"].should.equal([])
vers[-1]["Table"]["StorageDescriptor"]["Columns"].should.equal(columns)
for n, ver in enumerate(vers):
n = str(n + 1)
ver['VersionId'].should.equal(n)
ver['Table']['Name'].should.equal(table_name)
ver['Table']['StorageDescriptor'].should.equal(version_inputs[n]['StorageDescriptor'])
ver['Table']['PartitionKeys'].should.equal(version_inputs[n]['PartitionKeys'])
ver["VersionId"].should.equal(n)
ver["Table"]["Name"].should.equal(table_name)
ver["Table"]["StorageDescriptor"].should.equal(
version_inputs[n]["StorageDescriptor"]
)
ver["Table"]["PartitionKeys"].should.equal(version_inputs[n]["PartitionKeys"])
response = helpers.get_table_version(client, database_name, table_name, "3")
ver = response['TableVersion']
ver = response["TableVersion"]
ver['VersionId'].should.equal("3")
ver['Table']['Name'].should.equal(table_name)
ver['Table']['StorageDescriptor']['Columns'].should.equal(columns)
ver["VersionId"].should.equal("3")
ver["Table"]["Name"].should.equal(table_name)
ver["Table"]["StorageDescriptor"]["Columns"].should.equal(columns)
@mock_glue
def test_get_table_version_not_found():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
with assert_raises(ClientError) as exc:
helpers.get_table_version(client, database_name, 'myfirsttable', "20")
helpers.get_table_version(client, database_name, "myfirsttable", "20")
exc.exception.response['Error']['Code'].should.equal('EntityNotFoundException')
exc.exception.response['Error']['Message'].should.match('version', re.I)
exc.exception.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.exception.response["Error"]["Message"].should.match("version", re.I)
@mock_glue
def test_get_table_version_invalid_input():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
with assert_raises(ClientError) as exc:
helpers.get_table_version(client, database_name, 'myfirsttable', "10not-an-int")
helpers.get_table_version(client, database_name, "myfirsttable", "10not-an-int")
exc.exception.response['Error']['Code'].should.equal('InvalidInputException')
exc.exception.response["Error"]["Code"].should.equal("InvalidInputException")
@mock_glue
def test_get_table_not_exits():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
helpers.create_database(client, database_name)
with assert_raises(ClientError) as exc:
helpers.get_table(client, database_name, 'myfirsttable')
helpers.get_table(client, database_name, "myfirsttable")
exc.exception.response['Error']['Code'].should.equal('EntityNotFoundException')
exc.exception.response['Error']['Message'].should.match('Table myfirsttable not found')
exc.exception.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.exception.response["Error"]["Message"].should.match(
"Table myfirsttable not found"
)
@mock_glue
def test_get_table_when_database_not_exits():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'nosuchdatabase'
client = boto3.client("glue", region_name="us-east-1")
database_name = "nosuchdatabase"
with assert_raises(ClientError) as exc:
helpers.get_table(client, database_name, 'myfirsttable')
helpers.get_table(client, database_name, "myfirsttable")
exc.exception.response['Error']['Code'].should.equal('EntityNotFoundException')
exc.exception.response['Error']['Message'].should.match('Database nosuchdatabase not found')
exc.exception.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.exception.response["Error"]["Message"].should.match(
"Database nosuchdatabase not found"
)
@mock_glue
def test_delete_table():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
helpers.create_database(client, database_name)
table_name = 'myspecialtable'
table_name = "myspecialtable"
table_input = helpers.create_table_input(database_name, table_name)
helpers.create_table(client, database_name, table_name, table_input)
result = client.delete_table(DatabaseName=database_name, Name=table_name)
result['ResponseMetadata']['HTTPStatusCode'].should.equal(200)
result["ResponseMetadata"]["HTTPStatusCode"].should.equal(200)
# confirm table is deleted
with assert_raises(ClientError) as exc:
helpers.get_table(client, database_name, table_name)
exc.exception.response['Error']['Code'].should.equal('EntityNotFoundException')
exc.exception.response['Error']['Message'].should.match('Table myspecialtable not found')
exc.exception.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.exception.response["Error"]["Message"].should.match(
"Table myspecialtable not found"
)
@mock_glue
def test_batch_delete_table():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
helpers.create_database(client, database_name)
table_name = 'myspecialtable'
table_name = "myspecialtable"
table_input = helpers.create_table_input(database_name, table_name)
helpers.create_table(client, database_name, table_name, table_input)
result = client.batch_delete_table(DatabaseName=database_name, TablesToDelete=[table_name])
result['ResponseMetadata']['HTTPStatusCode'].should.equal(200)
result = client.batch_delete_table(
DatabaseName=database_name, TablesToDelete=[table_name]
)
result["ResponseMetadata"]["HTTPStatusCode"].should.equal(200)
# confirm table is deleted
with assert_raises(ClientError) as exc:
helpers.get_table(client, database_name, table_name)
exc.exception.response['Error']['Code'].should.equal('EntityNotFoundException')
exc.exception.response['Error']['Message'].should.match('Table myspecialtable not found')
exc.exception.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.exception.response["Error"]["Message"].should.match(
"Table myspecialtable not found"
)
@mock_glue
def test_get_partitions_empty():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
response = client.get_partitions(DatabaseName=database_name, TableName=table_name)
response['Partitions'].should.have.length_of(0)
response["Partitions"].should.have.length_of(0)
@mock_glue
def test_create_partition():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
values = ['2018-10-01']
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
before = datetime.now(pytz.utc)
part_input = helpers.create_partition_input(database_name, table_name, values=values)
part_input = helpers.create_partition_input(
database_name, table_name, values=values
)
helpers.create_partition(client, database_name, table_name, part_input)
after = datetime.now(pytz.utc)
response = client.get_partitions(DatabaseName=database_name, TableName=table_name)
partitions = response['Partitions']
partitions = response["Partitions"]
partitions.should.have.length_of(1)
partition = partitions[0]
partition['TableName'].should.equal(table_name)
partition['StorageDescriptor'].should.equal(part_input['StorageDescriptor'])
partition['Values'].should.equal(values)
partition['CreationTime'].should.be.greater_than(before)
partition['CreationTime'].should.be.lower_than(after)
partition["TableName"].should.equal(table_name)
partition["StorageDescriptor"].should.equal(part_input["StorageDescriptor"])
partition["Values"].should.equal(values)
partition["CreationTime"].should.be.greater_than(before)
partition["CreationTime"].should.be.lower_than(after)
@mock_glue
def test_create_partition_already_exist():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
values = ['2018-10-01']
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
@ -311,15 +330,15 @@ def test_create_partition_already_exist():
with assert_raises(ClientError) as exc:
helpers.create_partition(client, database_name, table_name, values=values)
exc.exception.response['Error']['Code'].should.equal('AlreadyExistsException')
exc.exception.response["Error"]["Code"].should.equal("AlreadyExistsException")
@mock_glue
def test_get_partition_not_found():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
values = ['2018-10-01']
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
@ -327,14 +346,15 @@ def test_get_partition_not_found():
with assert_raises(ClientError) as exc:
helpers.get_partition(client, database_name, table_name, values)
exc.exception.response['Error']['Code'].should.equal('EntityNotFoundException')
exc.exception.response['Error']['Message'].should.match('partition')
exc.exception.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.exception.response["Error"]["Message"].should.match("partition")
@mock_glue
def test_batch_create_partition():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
@ -344,197 +364,221 @@ def test_batch_create_partition():
partition_inputs = []
for i in range(0, 20):
values = ["2018-10-{:2}".format(i)]
part_input = helpers.create_partition_input(database_name, table_name, values=values)
part_input = helpers.create_partition_input(
database_name, table_name, values=values
)
partition_inputs.append(part_input)
client.batch_create_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionInputList=partition_inputs
PartitionInputList=partition_inputs,
)
after = datetime.now(pytz.utc)
response = client.get_partitions(DatabaseName=database_name, TableName=table_name)
partitions = response['Partitions']
partitions = response["Partitions"]
partitions.should.have.length_of(20)
for idx, partition in enumerate(partitions):
partition_input = partition_inputs[idx]
partition['TableName'].should.equal(table_name)
partition['StorageDescriptor'].should.equal(partition_input['StorageDescriptor'])
partition['Values'].should.equal(partition_input['Values'])
partition['CreationTime'].should.be.greater_than(before)
partition['CreationTime'].should.be.lower_than(after)
partition["TableName"].should.equal(table_name)
partition["StorageDescriptor"].should.equal(
partition_input["StorageDescriptor"]
)
partition["Values"].should.equal(partition_input["Values"])
partition["CreationTime"].should.be.greater_than(before)
partition["CreationTime"].should.be.lower_than(after)
@mock_glue
def test_batch_create_partition_already_exist():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
values = ['2018-10-01']
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
helpers.create_partition(client, database_name, table_name, values=values)
partition_input = helpers.create_partition_input(database_name, table_name, values=values)
partition_input = helpers.create_partition_input(
database_name, table_name, values=values
)
response = client.batch_create_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionInputList=[partition_input]
PartitionInputList=[partition_input],
)
response.should.have.key('Errors')
response['Errors'].should.have.length_of(1)
response['Errors'][0]['PartitionValues'].should.equal(values)
response['Errors'][0]['ErrorDetail']['ErrorCode'].should.equal('AlreadyExistsException')
response.should.have.key("Errors")
response["Errors"].should.have.length_of(1)
response["Errors"][0]["PartitionValues"].should.equal(values)
response["Errors"][0]["ErrorDetail"]["ErrorCode"].should.equal(
"AlreadyExistsException"
)
@mock_glue
def test_get_partition():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
values = [['2018-10-01'], ['2018-09-01']]
values = [["2018-10-01"], ["2018-09-01"]]
helpers.create_partition(client, database_name, table_name, values=values[0])
helpers.create_partition(client, database_name, table_name, values=values[1])
response = client.get_partition(DatabaseName=database_name, TableName=table_name, PartitionValues=values[1])
response = client.get_partition(
DatabaseName=database_name, TableName=table_name, PartitionValues=values[1]
)
partition = response['Partition']
partition = response["Partition"]
partition['TableName'].should.equal(table_name)
partition['Values'].should.equal(values[1])
partition["TableName"].should.equal(table_name)
partition["Values"].should.equal(values[1])
@mock_glue
def test_batch_get_partition():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
values = [['2018-10-01'], ['2018-09-01']]
values = [["2018-10-01"], ["2018-09-01"]]
helpers.create_partition(client, database_name, table_name, values=values[0])
helpers.create_partition(client, database_name, table_name, values=values[1])
partitions_to_get = [
{'Values': values[0]},
{'Values': values[1]},
]
response = client.batch_get_partition(DatabaseName=database_name, TableName=table_name, PartitionsToGet=partitions_to_get)
partitions_to_get = [{"Values": values[0]}, {"Values": values[1]}]
response = client.batch_get_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionsToGet=partitions_to_get,
)
partitions = response['Partitions']
partitions = response["Partitions"]
partitions.should.have.length_of(2)
partition = partitions[1]
partition['TableName'].should.equal(table_name)
partition['Values'].should.equal(values[1])
partition["TableName"].should.equal(table_name)
partition["Values"].should.equal(values[1])
@mock_glue
def test_batch_get_partition_missing_partition():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
values = [['2018-10-01'], ['2018-09-01'], ['2018-08-01']]
values = [["2018-10-01"], ["2018-09-01"], ["2018-08-01"]]
helpers.create_partition(client, database_name, table_name, values=values[0])
helpers.create_partition(client, database_name, table_name, values=values[2])
partitions_to_get = [
{'Values': values[0]},
{'Values': values[1]},
{'Values': values[2]},
{"Values": values[0]},
{"Values": values[1]},
{"Values": values[2]},
]
response = client.batch_get_partition(DatabaseName=database_name, TableName=table_name, PartitionsToGet=partitions_to_get)
response = client.batch_get_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionsToGet=partitions_to_get,
)
partitions = response['Partitions']
partitions = response["Partitions"]
partitions.should.have.length_of(2)
partitions[0]['Values'].should.equal(values[0])
partitions[1]['Values'].should.equal(values[2])
partitions[0]["Values"].should.equal(values[0])
partitions[1]["Values"].should.equal(values[2])
@mock_glue
def test_update_partition_not_found_moving():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
with assert_raises(ClientError) as exc:
helpers.update_partition(client, database_name, table_name, old_values=['0000-00-00'], values=['2018-10-02'])
helpers.update_partition(
client,
database_name,
table_name,
old_values=["0000-00-00"],
values=["2018-10-02"],
)
exc.exception.response['Error']['Code'].should.equal('EntityNotFoundException')
exc.exception.response['Error']['Message'].should.match('partition')
exc.exception.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.exception.response["Error"]["Message"].should.match("partition")
@mock_glue
def test_update_partition_not_found_change_in_place():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
values = ['2018-10-01']
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
with assert_raises(ClientError) as exc:
helpers.update_partition(client, database_name, table_name, old_values=values, values=values)
helpers.update_partition(
client, database_name, table_name, old_values=values, values=values
)
exc.exception.response['Error']['Code'].should.equal('EntityNotFoundException')
exc.exception.response['Error']['Message'].should.match('partition')
exc.exception.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.exception.response["Error"]["Message"].should.match("partition")
@mock_glue
def test_update_partition_cannot_overwrite():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
values = [['2018-10-01'], ['2018-09-01']]
values = [["2018-10-01"], ["2018-09-01"]]
helpers.create_partition(client, database_name, table_name, values=values[0])
helpers.create_partition(client, database_name, table_name, values=values[1])
with assert_raises(ClientError) as exc:
helpers.update_partition(client, database_name, table_name, old_values=values[0], values=values[1])
helpers.update_partition(
client, database_name, table_name, old_values=values[0], values=values[1]
)
exc.exception.response['Error']['Code'].should.equal('AlreadyExistsException')
exc.exception.response["Error"]["Code"].should.equal("AlreadyExistsException")
@mock_glue
def test_update_partition():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
values = ['2018-10-01']
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
@ -546,23 +590,27 @@ def test_update_partition():
table_name,
old_values=values,
values=values,
columns=[{'Name': 'country', 'Type': 'string'}],
columns=[{"Name": "country", "Type": "string"}],
)
response = client.get_partition(DatabaseName=database_name, TableName=table_name, PartitionValues=values)
partition = response['Partition']
response = client.get_partition(
DatabaseName=database_name, TableName=table_name, PartitionValues=values
)
partition = response["Partition"]
partition['TableName'].should.equal(table_name)
partition['StorageDescriptor']['Columns'].should.equal([{'Name': 'country', 'Type': 'string'}])
partition["TableName"].should.equal(table_name)
partition["StorageDescriptor"]["Columns"].should.equal(
[{"Name": "country", "Type": "string"}]
)
@mock_glue
def test_update_partition_move():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
values = ['2018-10-01']
new_values = ['2018-09-01']
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
new_values = ["2018-09-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
@ -574,79 +622,86 @@ def test_update_partition_move():
table_name,
old_values=values,
values=new_values,
columns=[{'Name': 'country', 'Type': 'string'}],
columns=[{"Name": "country", "Type": "string"}],
)
with assert_raises(ClientError) as exc:
helpers.get_partition(client, database_name, table_name, values)
# Old partition shouldn't exist anymore
exc.exception.response['Error']['Code'].should.equal('EntityNotFoundException')
exc.exception.response["Error"]["Code"].should.equal("EntityNotFoundException")
response = client.get_partition(DatabaseName=database_name, TableName=table_name, PartitionValues=new_values)
partition = response['Partition']
response = client.get_partition(
DatabaseName=database_name, TableName=table_name, PartitionValues=new_values
)
partition = response["Partition"]
partition["TableName"].should.equal(table_name)
partition["StorageDescriptor"]["Columns"].should.equal(
[{"Name": "country", "Type": "string"}]
)
partition['TableName'].should.equal(table_name)
partition['StorageDescriptor']['Columns'].should.equal([{'Name': 'country', 'Type': 'string'}])
@mock_glue
def test_delete_partition():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
values = ['2018-10-01']
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
part_input = helpers.create_partition_input(database_name, table_name, values=values)
part_input = helpers.create_partition_input(
database_name, table_name, values=values
)
helpers.create_partition(client, database_name, table_name, part_input)
client.delete_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionValues=values,
DatabaseName=database_name, TableName=table_name, PartitionValues=values
)
response = client.get_partitions(DatabaseName=database_name, TableName=table_name)
partitions = response['Partitions']
partitions = response["Partitions"]
partitions.should.be.empty
@mock_glue
def test_delete_partition_bad_partition():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
values = ['2018-10-01']
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
with assert_raises(ClientError) as exc:
client.delete_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionValues=values,
DatabaseName=database_name, TableName=table_name, PartitionValues=values
)
exc.exception.response['Error']['Code'].should.equal('EntityNotFoundException')
exc.exception.response["Error"]["Code"].should.equal("EntityNotFoundException")
@mock_glue
def test_batch_delete_partition():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
partition_inputs = []
for i in range(0, 20):
values = ["2018-10-{:2}".format(i)]
part_input = helpers.create_partition_input(database_name, table_name, values=values)
part_input = helpers.create_partition_input(
database_name, table_name, values=values
)
partition_inputs.append(part_input)
client.batch_create_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionInputList=partition_inputs
PartitionInputList=partition_inputs,
)
partition_values = [{"Values": p["Values"]} for p in partition_inputs]
@ -657,26 +712,29 @@ def test_batch_delete_partition():
PartitionsToDelete=partition_values,
)
response.should_not.have.key('Errors')
response.should_not.have.key("Errors")
@mock_glue
def test_batch_delete_partition_with_bad_partitions():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
table_name = 'myfirsttable'
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
partition_inputs = []
for i in range(0, 20):
values = ["2018-10-{:2}".format(i)]
part_input = helpers.create_partition_input(database_name, table_name, values=values)
part_input = helpers.create_partition_input(
database_name, table_name, values=values
)
partition_inputs.append(part_input)
client.batch_create_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionInputList=partition_inputs
PartitionInputList=partition_inputs,
)
partition_values = [{"Values": p["Values"]} for p in partition_inputs]
@ -691,9 +749,9 @@ def test_batch_delete_partition_with_bad_partitions():
PartitionsToDelete=partition_values,
)
response.should.have.key('Errors')
response['Errors'].should.have.length_of(3)
error_partitions = map(lambda x: x['PartitionValues'], response['Errors'])
['2018-11-01'].should.be.within(error_partitions)
['2018-11-02'].should.be.within(error_partitions)
['2018-11-03'].should.be.within(error_partitions)
response.should.have.key("Errors")
response["Errors"].should.have.length_of(3)
error_partitions = map(lambda x: x["PartitionValues"], response["Errors"])
["2018-11-01"].should.be.within(error_partitions)
["2018-11-02"].should.be.within(error_partitions)
["2018-11-03"].should.be.within(error_partitions)