Add create_table, get_table, and get_tables for the Glue Data Catalog

This commit is contained in:
TheDooner64 2018-07-26 17:05:09 -04:00
commit d988ee15fe
8 changed files with 235 additions and 18 deletions

View file

@ -0,0 +1 @@
from __future__ import unicode_literals

View file

@ -0,0 +1 @@
from __future__ import unicode_literals

View file

@ -0,0 +1,31 @@
from __future__ import unicode_literals
TABLE_INPUT = {
'Owner': 'a_fake_owner',
'Parameters': {
'EXTERNAL': 'TRUE',
},
'Retention': 0,
'StorageDescriptor': {
'BucketColumns': [],
'Compressed': False,
'InputFormat': 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat',
'NumberOfBuckets': -1,
'OutputFormat': 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat',
'Parameters': {},
'SerdeInfo': {
'Parameters': {
'serialization.format': '1'
},
'SerializationLibrary': 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
},
'SkewedInfo': {
'SkewedColumnNames': [],
'SkewedColumnValueLocationMaps': {},
'SkewedColumnValues': []
},
'SortColumns': [],
'StoredAsSubDirectories': False
},
'TableType': 'EXTERNAL_TABLE',
}

View file

@ -0,0 +1,46 @@
from __future__ import unicode_literals
import copy
from .fixtures.datacatalog import TABLE_INPUT
def create_database(client, database_name):
return client.create_database(
DatabaseInput={
'Name': database_name
}
)
def get_database(client, database_name):
return client.get_database(Name=database_name)
def create_table_input(table_name, s3_location, columns=[], partition_keys=[]):
table_input = copy.deepcopy(TABLE_INPUT)
table_input['Name'] = table_name
table_input['PartitionKeys'] = partition_keys
table_input['StorageDescriptor']['Columns'] = columns
table_input['StorageDescriptor']['Location'] = s3_location
return table_input
def create_table(client, database_name, table_name, table_input):
return client.create_table(
DatabaseName=database_name,
TableInput=table_input
)
def get_table(client, database_name, table_name):
return client.get_table(
DatabaseName=database_name,
Name=table_name
)
def get_tables(client, database_name):
return client.get_tables(
DatabaseName=database_name
)

View file

@ -6,27 +6,16 @@ import boto3
from botocore.client import ClientError
from moto import mock_glue
def create_database(client, database_name):
return client.create_database(
DatabaseInput={
'Name': database_name
}
)
def get_database(client, database_name):
return client.get_database(Name=database_name)
from . import helpers
@mock_glue
def test_create_database():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
create_database(client, database_name)
helpers.create_database(client, database_name)
response = get_database(client, database_name)
response = helpers.get_database(client, database_name)
database = response['Database']
database.should.equal({'Name': database_name})
@ -35,10 +24,85 @@ def test_create_database():
@mock_glue
def test_create_database_already_exists():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'anewdatabase'
create_database(client, database_name)
database_name = 'cantcreatethisdatabasetwice'
helpers.create_database(client, database_name)
with assert_raises(ClientError) as exc:
create_database(client, database_name)
helpers.create_database(client, database_name)
exc.exception.response['Error']['Code'].should.equal('DatabaseAlreadyExistsException')
@mock_glue
def test_create_table():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
helpers.create_database(client, database_name)
table_name = 'myspecialtable'
s3_location = 's3://my-bucket/{database_name}/{table_name}'.format(
database_name=database_name,
table_name=table_name
)
table_input = helpers.create_table_input(table_name, s3_location)
helpers.create_table(client, database_name, table_name, table_input)
response = helpers.get_table(client, database_name, table_name)
table = response['Table']
table['Name'].should.equal(table_input['Name'])
table['StorageDescriptor'].should.equal(table_input['StorageDescriptor'])
table['PartitionKeys'].should.equal(table_input['PartitionKeys'])
@mock_glue
def test_create_table_already_exists():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
helpers.create_database(client, database_name)
table_name = 'cantcreatethistabletwice'
s3_location = 's3://my-bucket/{database_name}/{table_name}'.format(
database_name=database_name,
table_name=table_name
)
table_input = helpers.create_table_input(table_name, s3_location)
helpers.create_table(client, database_name, table_name, table_input)
with assert_raises(ClientError) as exc:
helpers.create_table(client, database_name, table_name, table_input)
exc.exception.response['Error']['Code'].should.equal('TableAlreadyExistsException')
@mock_glue
def test_get_tables():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
helpers.create_database(client, database_name)
table_names = ['myfirsttable', 'mysecondtable', 'mythirdtable']
table_inputs = {}
for table_name in table_names:
s3_location = 's3://my-bucket/{database_name}/{table_name}'.format(
database_name=database_name,
table_name=table_name
)
table_input = helpers.create_table_input(table_name, s3_location)
table_inputs[table_name] = table_input
helpers.create_table(client, database_name, table_name, table_input)
response = helpers.get_tables(client, database_name)
tables = response['TableList']
assert len(tables) == 3
for table in tables:
table_name = table['Name']
table_name.should.equal(table_inputs[table_name]['Name'])
table['StorageDescriptor'].should.equal(table_inputs[table_name]['StorageDescriptor'])
table['PartitionKeys'].should.equal(table_inputs[table_name]['PartitionKeys'])