Skip to content

Commit

Permalink
Add Python unit test to verify that field_id's are correctly filtered…
Browse files Browse the repository at this point in the history
… through to the Arrow schema in Python
  • Loading branch information
wesm committed Feb 12, 2020
1 parent c73b87d commit 2ebdd5e
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 4 deletions.
49 changes: 45 additions & 4 deletions python/pyarrow/tests/test_parquet.py
Expand Up @@ -3251,6 +3251,13 @@ def test_categorical_order_survives_roundtrip():
tm.assert_frame_equal(result, df)


def _simple_table_write_read(table):
bio = pa.BufferOutputStream()
pq.write_table(table, bio)
contents = bio.getvalue()
return pq.read_table(pa.BufferReader(contents))


def test_dictionary_array_automatically_read():
# ARROW-3246

Expand All @@ -3269,16 +3276,50 @@ def test_dictionary_array_automatically_read():
dict_values))

table = pa.table([pa.chunked_array(chunks)], names=['f0'])
result = _simple_table_write_read(table)

assert_tables_equal(result, table)

# The only key in the metadata was the Arrow schema key
assert result.schema.metadata is None


def test_field_id_metadata():
# ARROW-7080
table = pa.table([pa.array([1], type='int32'),
pa.array([[]], type=pa.list_(pa.int32())),
pa.array([b'boo'], type='binary')],
['f0', 'f1', 'f2'])

bio = pa.BufferOutputStream()
pq.write_table(table, bio)
contents = bio.getvalue()
result = pq.read_table(pa.BufferReader(contents))

assert_tables_equal(result, table)
pf = pq.ParquetFile(pa.BufferReader(contents))
schema = pf.schema_arrow

# The only key in the metadata was the Arrow schema key
assert result.schema.metadata is None
# Expected Parquet schema for reference
#
# required group field_id=0 schema {
# optional int32 field_id=1 f0;
# optional group field_id=2 f1 (List) {
# repeated group field_id=3 list {
# optional int32 field_id=4 item;
# }
# }
# optional binary field_id=5 f2;
# }

field_name = b'PARQUET::field_id'
assert schema[0].metadata[field_name] == b'1'

list_field = schema[1]
assert list_field.metadata[field_name] == b'2'

list_item_field = list_field.type.value_field
assert list_item_field.metadata[field_name] == b'4'

assert schema[2].metadata[field_name] == b'5'


@pytest.mark.pandas
Expand Down
4 changes: 4 additions & 0 deletions python/pyarrow/types.pxi
Expand Up @@ -253,6 +253,10 @@ cdef class ListType(DataType):
def __reduce__(self):
return list_, (self.value_type,)

@property
def value_field(self):
return pyarrow_wrap_field(self.list_type.value_field())

@property
def value_type(self):
"""
Expand Down

0 comments on commit 2ebdd5e

Please sign in to comment.