In [5]:
"""
Arrow array and table creation
"""

import pyarrow as pa
import pandas as pd

# Create an Arrow Array from a Python list
int_array = pa.array([1, 2, 3, 4, 5])

# Arrow also supports null values and different data types
str_array = pa.array(["hello", "world", None, "arrow"])

print(int_array)
print(str_array)

# Define some data as a list of dictionaries (commonly used format)
data = [
    {"name": "John", "age": 30, "city": "New York"},
    {"name": "Jane", "age": 28, "city": "Paris"},
    {"name": "Doe", "age": 35, "city": "London"},
]

# Create an Arrow Table
table = pa.Table.from_pandas(pd.DataFrame(data))

print(table)

[
  1,
  2,
  3,
  4,
  5
]
[
  "hello",
  "world",
  null,
  "arrow"
]
pyarrow.Table
name: string
age: int64
city: string
----
name: [["John","Jane","Doe"]]
age: [[30,28,35]]
city: [["New York","Paris","London"]]


In [11]:
"""
Manipulating Arrow Arrays
"""

import pyarrow as pa

# Slicing Array
int_array = pa.array([10, 20, 30, 40, 50])
sliced_array = int_array.slice(1, 3)

print(sliced_array)

# PyArrow does not directly support operations like NumPy does, you can convert Arrow Arrays to NumPy arrays for computation and back
numpy_array = int_array.to_numpy()
# Perform operations with NumPy
result_array = numpy_array * 2
# Convert back to Arrow Array if needed
arrow_result_array = pa.array(result_array)

print(arrow_result_array)

# Filtering Table
import pyarrow.compute as pc

# Assuming 'table' is an Arrow Table created previously
filtered_table = table.filter(pc.equal(table["age"], 30))
print(filtered_table)

# Serializing and Deserializing Arrow Tables
with open("data.arrow", "wb") as f:
    writer = pa.RecordBatchFileWriter(f, table.schema)
    writer.write_table(table)
    writer.close()

with open("data.arrow", "rb") as f:
    reader = pa.RecordBatchFileReader(f)
    deserialized_table = reader.read_all()

# Convert Arrow Table to Pandas DataFrame
df = table.to_pandas()

# Convert Pandas DataFrame to Arrow Table
arrow_table_from_df = pa.Table.from_pandas(df)

[
  20,
  30,
  40
]
[
  20,
  40,
  60,
  80,
  100
]
pyarrow.Table
name: string
age: int64
city: string
----
name: [["John"]]
age: [[30]]
city: [["New York"]]
