In [1]:
import inspect
import re
from contextlib import contextmanager
from warnings import catch_warnings, filterwarnings, warn

from pandas.io.sql import read_sql, to_sql
from sqlalchemy import create_engine
from sqlalchemy.event import listen
from sqlalchemy.exc import DatabaseError, ResourceClosedError
from sqlalchemy.pool import NullPool

In [2]:
URI = "mysql://root:password@localhost/mypandas"

In [3]:
engine = create_engine(URI, poolclass=NullPool)

In [4]:
engine

Engine(mysql://root:***@localhost/mypandas)

In [5]:
engine.name

'mysql'

In [6]:
LOADED_TABLES = set()

In [7]:
_conn = engine.connect()

In [8]:
_conn

<sqlalchemy.engine.base.Connection at 0x144083be0>

In [9]:
# so at this point it does this, and I dont know what this does
# doesnt the doc say this is always searched first?
if engine.name == "postgresql":
    _conn.execute("set search_path to pg_temp")
# https://www.postgresql.org/docs/current/ddl-schemas.html#DDL-SCHEMAS-PATH
# https://www.postgresql.org/docs/current/runtime-config-client.html
# # search_path
# This variable specifies the order in which schemas are searched when an object (table, data type, function, etc.) is referenced by a simple name with no schema specified. When there are objects of identical names in different schemas, the one found first in the search path is used. An object that is not in any of the schemas in the search path can only be referenced by specifying its containing schema with a qualified (dotted) name.
# Likewise, the current session's temporary-table schema, pg_temp_nnn, is always searched if it exists. It can be explicitly listed in the path by using the alias pg_temp. If it is not listed in the path then it is searched first (even before pg_catalog). However, the temporary schema is only searched for relation (table, view, sequence, etc) and data type names. It is never searched for function or operator names.

In [10]:
from mypandas.sqldf import extract_table_names

In [11]:
QUERY = """
SELECT *
FROM apple, banana;
"""
extract_table_names(QUERY)

QUALNAME extract_table_names
OUTPUT {'banana', 'apple'}


{'apple', 'banana'}

In [12]:
from mypandas.sqldf import MyPandas

URI = "mysql://root:password@localhost/leetcode"
mpd = MyPandas(URI)
QUERY = """
SELECT *
FROM Purchases p1, Purchases p2;
"""
df = mpd(QUERY, locals())

QUALNAME PandaSQL.__init__
OUTPUT None
QUALNAME PandaSQL.__call__
QUALNAME PandaSQL._init_connection
OUTPUT None
QUALNAME extract_table_names
OUTPUT {'Purchases'}
OUTPUT      user_id          time_stamp  amount  user_id          time_stamp  amount
0          4 2022-11-24 21:50:35    1260        1 2022-07-27 08:41:59    1059
1          5 2022-02-01 05:05:34    6756        1 2022-07-27 08:41:59    1059
2          4 2022-02-11 18:28:11    7134        1 2022-07-27 08:41:59    1059
3          2 2022-03-19 19:24:02    4678        1 2022-07-27 08:41:59    1059
4          4 2022-07-11 21:07:11    6443        1 2022-07-27 08:41:59    1059
..       ...                 ...     ...      ...                 ...     ...
139        3 2022-01-10 15:45:25    3149        4 2022-11-24 21:50:35    1260
140        6 2022-07-29 18:25:31    7636        4 2022-11-24 21:50:35    1260
141        1 2022-11-24 02:31:07    2632        4 2022-11-24 21:50:35    1260
142        6 2022-04-18 12:03:09    4523        4 

In [13]:
from mypandas import load_births
births = load_births()

In [14]:
births

Unnamed: 0,date,births
0,1975-01-01,265775
1,1975-02-01,241045
2,1975-03-01,268849
3,1975-04-01,247455
4,1975-05-01,254545
...,...,...
403,2012-08-01,359554
404,2012-09-01,361922
405,2012-10-01,347625
406,2012-11-01,320195


In [17]:
mpd('select * from births limit 5;', locals())

QUALNAME PandaSQL.__call__
QUALNAME PandaSQL._init_connection
OUTPUT None
QUALNAME extract_table_names
OUTPUT {'births'}
QUALNAME write_table
Table births already exists, ignoring!
OUTPUT None
OUTPUT         date  births
0 1975-01-01  265775
1 1975-02-01  241045
2 1975-03-01  268849
3 1975-04-01  247455
4 1975-05-01  254545


Unnamed: 0,date,births
0,1975-01-01,265775
1,1975-02-01,241045
2,1975-03-01,268849
3,1975-04-01,247455
4,1975-05-01,254545


In [18]:
import pandas as pd
df = pd.DataFrame({'date': ['1975-01-01'], 'foo': ['bar']})

In [19]:
df

Unnamed: 0,date,foo
0,1975-01-01,bar


In [20]:
mpd('select * from df;', locals())

QUALNAME PandaSQL.__call__
QUALNAME PandaSQL._init_connection
OUTPUT None
QUALNAME extract_table_names
OUTPUT {'df'}
QUALNAME write_table
OUTPUT None
OUTPUT          date  foo
0  1975-01-01  bar


Unnamed: 0,date,foo
0,1975-01-01,bar


In [21]:
mpd('select * from births limit 5;', locals())

QUALNAME PandaSQL.__call__
QUALNAME PandaSQL._init_connection
OUTPUT None
QUALNAME extract_table_names
OUTPUT {'births'}
QUALNAME write_table
Table births already exists, ignoring!
OUTPUT None
OUTPUT         date  births
0 1975-01-01  265775
1 1975-02-01  241045
2 1975-03-01  268849
3 1975-04-01  247455
4 1975-05-01  254545


Unnamed: 0,date,births
0,1975-01-01,265775
1,1975-02-01,241045
2,1975-03-01,268849
3,1975-04-01,247455
4,1975-05-01,254545


In [22]:
mpd('select * from births join df on births.date = df.date;', locals())

QUALNAME PandaSQL.__call__
QUALNAME PandaSQL._init_connection
OUTPUT None
QUALNAME extract_table_names
OUTPUT {'births', 'df'}
QUALNAME write_table
Table births already exists, ignoring!
OUTPUT None
QUALNAME write_table
Table df already exists, ignoring!
OUTPUT None
OUTPUT         date  births        date  foo
0 1975-01-01  265775  1975-01-01  bar


Unnamed: 0,date,births,date.1,foo
0,1975-01-01,265775,1975-01-01,bar
