In [1]:
import pyarrow
import pymapd
import pandas as pd
import geopandas as gpd 
from shapely.geometry import Point

In [2]:
def cursor2df(cursor):
    col_names = [c.name for c in cursor.description]
    return pd.DataFrame(cursor.fetchall(), columns=col_names)

In [3]:
con = pymapd.connect(host='localhost', user='admin', password='HyperInteractive')

In [4]:
con._client.get_version()

'5.2.0dev-20200329-cecceef8da'

## Array field

In [5]:
con.execute('''
DROP TABLE IF EXISTS mytablearray;
''')

con.execute('''
CREATE TABLE mytablearray (
    col_a INT[],
    col_b INT,
    col_c text,
    col_d double
);
''')

<pymapd.cursor.Cursor at 0x7eff5a3b9c10>

In [6]:
# df = pd.DataFrame({'a': pd.array([pd.array([1, 2], dtype='int'), pd.array([4, 5, 6, 7], dtype='int')])})
df = pd.DataFrame({
    'a': [[1, 2], [4, 5, 6, 7]],
    'b': [1, 2],
    'c': ['a', 'b'],
    'd': [1.1, 2.2]
})
df

Unnamed: 0,a,b,c,d
0,"[1, 2]",1,a,1.1
1,"[4, 5, 6, 7]",2,b,2.2


In [7]:
df.dtypes

a     object
b      int64
c     object
d    float64
dtype: object

In [8]:
df.loc[:,['a']]

Unnamed: 0,a
0,"[1, 2]"
1,"[4, 5, 6, 7]"


In [9]:
con.load_table('mytablearray', df, method='columnar')

In [10]:
cur = con.execute('select * from mytablearray')

In [11]:
cursor2df(cur)

Unnamed: 0,col_a,col_b,col_c,col_d
0,"[1, 2]",1,a,1.1
1,"[4, 5, 6, 7]",2,b,2.2


### Tests

#### test 1

In [12]:
table_name = 'mytablearray_test1'
con.execute('''
DROP TABLE IF EXISTS {};
'''.format(table_name))

con.execute('''
CREATE TABLE {} (
    ary INT[]
);
'''.format(table_name))

<pymapd.cursor.Cursor at 0x7eff543f86d0>

In [13]:
df = pd.DataFrame([
    {'ary': [2,3,4]},
    {'ary': [4444]},
    {'ary': []},
    {'ary': []},
    {'ary': [2,3,4]},
])
df

Unnamed: 0,ary
0,"[2, 3, 4]"
1,[4444]
2,[]
3,[]
4,"[2, 3, 4]"


In [14]:
con.load_table(table_name, df, method='columnar')

In [15]:
cur = con.execute('select * from {}'.format(table_name))
pd.testing.assert_frame_equal(cursor2df(cur), df)

#### test 2

In [16]:
table_name = 'mytablearray_test2'
con.execute('''
DROP TABLE IF EXISTS {};
'''.format(table_name))

con.execute('''
CREATE TABLE {} (
    ary INT[],
    strtest TEXT
);
'''.format(table_name))

<pymapd.cursor.Cursor at 0x7eff88661890>

In [17]:
df = pd.DataFrame([
    {'ary': [2,3,4], 'strtest': 'teststr' },
    {'ary': [2,3], 'strtest': 'teststr' },
    {'ary': [4444], 'strtest': 'teststr' },
    {'ary': [], 'strtest': 'teststr' },
    {'ary': [2,3,4], 'strtest': 'teststr' },
])
df

Unnamed: 0,ary,strtest
0,"[2, 3, 4]",teststr
1,"[2, 3]",teststr
2,[4444],teststr
3,[],teststr
4,"[2, 3, 4]",teststr


In [18]:
con.load_table(table_name, df, method='columnar')

In [19]:
cur = con.execute('select * from {}'.format(table_name))
pd.testing.assert_frame_equal(cursor2df(cur), df)

#### test 3

In [20]:
table_name = 'mytablearray_test3'
con.execute('''
DROP TABLE IF EXISTS {};
'''.format(table_name))

con.execute('''
CREATE TABLE {} (
    ary INT[],
    strtest TEXT
);
'''.format(table_name))

<pymapd.cursor.Cursor at 0x7eff88661090>

In [21]:
df = pd.DataFrame([
    {'ary': [2,3,4], 'strtest': 'teststr' },
    {'ary': [2,3], 'strtest': 'teststr' },
    {'ary': [4444], 'strtest': 'teststr' },
    {'ary': None, 'strtest': 'teststr' },
    {'ary': [2,3,4], 'strtest': 'teststr' },
])
df

Unnamed: 0,ary,strtest
0,"[2, 3, 4]",teststr
1,"[2, 3]",teststr
2,[4444],teststr
3,,teststr
4,"[2, 3, 4]",teststr


In [22]:
con.load_table(table_name, df, method='columnar')

In [23]:
cur = con.execute('select * from {}'.format(table_name))
result = cursor2df(cur)
display(result)
pd.testing.assert_frame_equal(df, result)

Unnamed: 0,ary,strtest
0,"[2, 3, 4]",teststr
1,"[2, 3]",teststr
2,[4444],teststr
3,,teststr
4,"[2, 3, 4]",teststr


#### test 4

In [24]:
table_name = 'mytablearray_test4'
con.execute('''
DROP TABLE IF EXISTS {};
'''.format(table_name))

con.execute('''
CREATE TABLE {} (
  ary INTEGER[]
);
'''.format(table_name))

data = [
    {'ary': [2,3,4]},
    {'ary': [4444]},
    {'ary': []},
    {'ary': None},
    {'ary': [2,3,4]},
]

df = pd.DataFrame(data)
con.load_table_columnar(table_name, df)

In [25]:
cur = con.execute('select * from {}'.format(table_name))
result = cursor2df(cur)
display(result)
pd.testing.assert_frame_equal(df, result)

Unnamed: 0,ary
0,"[2, 3, 4]"
1,[4444]
2,[]
3,
4,"[2, 3, 4]"


## Geospatial fields

In [26]:
table_name = 'mytablegeo'
con.execute('''
DROP TABLE IF EXISTS {};
'''.format(table_name))

con.execute('''
CREATE TABLE {} (
    col_a POINT,
    col_b INT,
    col_c text,
    col_d double
);
'''.format(table_name))

<pymapd.cursor.Cursor at 0x7eff54413f90>

In [27]:
# df = pd.DataFrame({'a': pd.array([pd.array([1, 2], dtype='int'), pd.array([4, 5, 6, 7], dtype='int')])})
df = gpd.GeoDataFrame({
    'a': [Point(0, 0), Point(1, 1)],
    'b': [1, 2],
    'c': ['a', 'b'],
    'd': [1.1, 2.2]
})
df

Unnamed: 0,a,b,c,d
0,POINT (0 0),1,a,1.1
1,POINT (1 1),2,b,2.2


In [28]:
# note: not working
# con.load_table(table_name, df, method='columnar')

cur = con.execute('select * from {}'.format(table_name))

df = cursor2df(cur)

df

Unnamed: 0,col_a,col_b,col_c,col_d
