From 505401df33e0eedcf21c2e5c786a0f6db32ec698 Mon Sep 17 00:00:00 2001
From: David Lutterkort <lutter@watzmann.net>
Date: Tue, 21 Jan 2025 16:26:13 -0800
Subject: [PATCH 01/13] store: Make Layout.table a little easier to use

---
 store/postgres/src/relational.rs           | 9 +++++----
 store/postgres/src/relational/ddl_tests.rs | 4 +---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/store/postgres/src/relational.rs b/store/postgres/src/relational.rs
index d148060efc2..c5c929e189a 100644
--- a/store/postgres/src/relational.rs
+++ b/store/postgres/src/relational.rs
@@ -443,12 +443,13 @@ impl Layout {
         Ok(())
     }
 
-    /// Find the table with the provided `name`. The name must exactly match
-    /// the name of an existing table. No conversions of the name are done
-    pub fn table(&self, name: &SqlName) -> Option<&Table> {
+    /// Find the table with the provided `sql_name`. The name must exactly
+    /// match the name of an existing table. No conversions of the name are
+    /// done
+    pub fn table(&self, sql_name: &str) -> Option<&Table> {
         self.tables
             .values()
-            .find(|table| &table.name == name)
+            .find(|table| &table.name == sql_name)
             .map(|rc| rc.as_ref())
     }
 
diff --git a/store/postgres/src/relational/ddl_tests.rs b/store/postgres/src/relational/ddl_tests.rs
index 86e9f232d49..bab910104bf 100644
--- a/store/postgres/src/relational/ddl_tests.rs
+++ b/store/postgres/src/relational/ddl_tests.rs
@@ -26,9 +26,7 @@ fn test_layout(gql: &str) -> Layout {
 #[test]
 fn table_is_sane() {
     let layout = test_layout(THING_GQL);
-    let table = layout
-        .table(&"thing".into())
-        .expect("failed to get 'thing' table");
+    let table = layout.table("thing").expect("failed to get 'thing' table");
     assert_eq!(SqlName::from("thing"), table.name);
     assert_eq!("Thing", table.object.as_str());
 

From f8743a7063965e9c36c0c90fc6be90272af68d6a Mon Sep 17 00:00:00 2001
From: Gustavo Inacio <gustavo@semiotic.ai>
Date: Tue, 7 May 2024 21:29:34 -0300
Subject: [PATCH 02/13] graph, store: Create database sql executor

---
 Cargo.lock                               |  14 +
 Cargo.toml                               |   7 +-
 graph/src/components/store/traits.rs     |   4 +-
 graph/src/data/query/error.rs            |   3 +
 graph/src/data/store/mod.rs              |   3 +
 store/postgres/Cargo.toml                |   2 +
 store/postgres/src/deployment_store.rs   |  23 +-
 store/postgres/src/lib.rs                |   1 +
 store/postgres/src/query_store.rs        |  27 +-
 store/postgres/src/relational_queries.rs |  43 +++
 store/postgres/src/sql/constants.rs      | 435 +++++++++++++++++++++++
 store/postgres/src/sql/formatter.rs      | 101 ++++++
 store/postgres/src/sql/mod.rs            |  10 +
 store/postgres/src/sql/parser.rs         | 191 ++++++++++
 store/postgres/src/sql/validation.rs     | 290 +++++++++++++++
 15 files changed, 1145 insertions(+), 9 deletions(-)
 create mode 100644 store/postgres/src/sql/constants.rs
 create mode 100644 store/postgres/src/sql/formatter.rs
 create mode 100644 store/postgres/src/sql/mod.rs
 create mode 100644 store/postgres/src/sql/parser.rs
 create mode 100644 store/postgres/src/sql/validation.rs

diff --git a/Cargo.lock b/Cargo.lock
index c511d00601f..63ffc8446af 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2149,7 +2149,9 @@ dependencies = [
  "rand 0.8.5",
  "serde",
  "serde_json",
+ "sqlparser",
  "stable-hash 0.3.4",
+ "thiserror 1.0.61",
 ]
 
 [[package]]
@@ -4708,6 +4710,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "11a81a8cad9befe4cf1b9d2d4b9c6841c76f0882a3fec00d95133953c13b3d3d"
 dependencies = [
  "log",
+ "sqlparser_derive",
+]
+
+[[package]]
+name = "sqlparser_derive"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index a8193c3f0ed..b93c2107781 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -69,11 +69,8 @@ serde_derive = "1.0.125"
 serde_json = { version = "1.0", features = ["arbitrary_precision"] }
 serde_regex = "1.1.0"
 serde_yaml = "0.9.21"
-slog = { version = "2.7.0", features = [
-    "release_max_level_trace",
-    "max_level_trace",
-] }
-sqlparser = "0.46.0"
+slog = { version = "2.7.0", features = ["release_max_level_trace", "max_level_trace"] }
+sqlparser = { version = "0.46.0", features = ["visitor"] }
 strum = { version = "0.26", features = ["derive"] }
 syn = { version = "2.0.87", features = ["full"] }
 test-store = { path = "./store/test-store" }
diff --git a/graph/src/components/store/traits.rs b/graph/src/components/store/traits.rs
index 27cb3768e2c..bed1b3af548 100644
--- a/graph/src/components/store/traits.rs
+++ b/graph/src/components/store/traits.rs
@@ -15,7 +15,7 @@ use crate::components::transaction_receipt;
 use crate::components::versions::ApiVersion;
 use crate::data::query::Trace;
 use crate::data::store::ethereum::call;
-use crate::data::store::QueryObject;
+use crate::data::store::{QueryObject, SqlQueryObject};
 use crate::data::subgraph::{status, DeploymentFeatures};
 use crate::data::{query::QueryTarget, subgraph::schema::*};
 use crate::prelude::{DeploymentState, NodeId, QueryExecutionError, SubgraphName};
@@ -635,6 +635,8 @@ pub trait QueryStore: Send + Sync {
         query: EntityQuery,
     ) -> Result<(Vec<QueryObject>, Trace), QueryExecutionError>;
 
+    fn execute_sql(&self, sql: &str) -> Result<Vec<SqlQueryObject>, QueryExecutionError>;
+
     async fn is_deployment_synced(&self) -> Result<bool, Error>;
 
     async fn block_ptr(&self) -> Result<Option<BlockPtr>, StoreError>;
diff --git a/graph/src/data/query/error.rs b/graph/src/data/query/error.rs
index 65fc1bcd259..83c2f5ab8b2 100644
--- a/graph/src/data/query/error.rs
+++ b/graph/src/data/query/error.rs
@@ -72,6 +72,7 @@ pub enum QueryExecutionError {
     InvalidSubgraphManifest,
     ResultTooBig(usize, usize),
     DeploymentNotFound(String),
+    SqlError(String),
     IdMissing,
     IdNotString,
     ConstraintViolation(String),
@@ -133,6 +134,7 @@ impl QueryExecutionError {
             | IdMissing
             | IdNotString
             | ConstraintViolation(_) => false,
+            SqlError(_) => false,
         }
     }
 }
@@ -275,6 +277,7 @@ impl fmt::Display for QueryExecutionError {
             IdMissing => write!(f, "entity is missing an `id` attribute"),
             IdNotString => write!(f, "entity `id` attribute is not a string"),
             ConstraintViolation(msg) => write!(f, "internal constraint violated: {}", msg),
+            SqlError(e) => write!(f, "sql error: {}", e),
         }
     }
 }
diff --git a/graph/src/data/store/mod.rs b/graph/src/data/store/mod.rs
index c8786e9b473..cf464ff7b6b 100644
--- a/graph/src/data/store/mod.rs
+++ b/graph/src/data/store/mod.rs
@@ -1102,6 +1102,9 @@ pub struct QueryObject {
     pub entity: r::Object,
 }
 
+/// An object that is returned from a SQL query. It wraps an `r::Value`
+pub struct SqlQueryObject(pub r::Value);
+
 impl CacheWeight for QueryObject {
     fn indirect_weight(&self) -> usize {
         self.parent.indirect_weight() + self.entity.indirect_weight()
diff --git a/store/postgres/Cargo.toml b/store/postgres/Cargo.toml
index 9a746646807..574926c7010 100644
--- a/store/postgres/Cargo.toml
+++ b/store/postgres/Cargo.toml
@@ -32,6 +32,8 @@ git-testament = "0.2.5"
 itertools = "0.13.0"
 hex = "0.4.3"
 pretty_assertions = "1.4.0"
+sqlparser = { workspace = true }
+thiserror = { workspace = true }
 
 [dev-dependencies]
 clap.workspace = true
diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs
index b148129d924..de342075d4d 100644
--- a/store/postgres/src/deployment_store.rs
+++ b/store/postgres/src/deployment_store.rs
@@ -12,8 +12,9 @@ use graph::components::store::{
     PruningStrategy, QueryPermit, StoredDynamicDataSource, VersionStats,
 };
 use graph::components::versions::VERSIONS;
+use graph::data::graphql::IntoValue;
 use graph::data::query::Trace;
-use graph::data::store::IdList;
+use graph::data::store::{IdList, SqlQueryObject};
 use graph::data::subgraph::{status, SPEC_VERSION_0_0_6};
 use graph::data_source::CausalityRegion;
 use graph::derive::CheapClone;
@@ -54,7 +55,7 @@ use crate::dynds::DataSourcesTable;
 use crate::primary::DeploymentId;
 use crate::relational::index::{CreateIndex, IndexList, Method};
 use crate::relational::{Layout, LayoutCache, SqlName, Table};
-use crate::relational_queries::FromEntityData;
+use crate::relational_queries::{FromEntityData, JSONData};
 use crate::{advisory_lock, catalog, retry};
 use crate::{connection_pool::ConnectionPool, detail};
 use crate::{dynds, primary::Site};
@@ -286,6 +287,24 @@ impl DeploymentStore {
         layout.query(&logger, conn, query)
     }
 
+    pub(crate) fn execute_sql(
+        &self,
+        conn: &mut PgConnection,
+        query: &str,
+    ) -> Result<Vec<SqlQueryObject>, QueryExecutionError> {
+        let query = diesel::sql_query(query);
+
+        // Execute the provided SQL query
+        let results = query
+            .load::<JSONData>(conn)
+            .map_err(|e| QueryExecutionError::SqlError(e.to_string()))?;
+
+        Ok(results
+            .into_iter()
+            .map(|e| SqlQueryObject(e.into_value()))
+            .collect::<Vec<_>>())
+    }
+
     fn check_intf_uniqueness(
         &self,
         conn: &mut PgConnection,
diff --git a/store/postgres/src/lib.rs b/store/postgres/src/lib.rs
index 759e8601313..713adfdb405 100644
--- a/store/postgres/src/lib.rs
+++ b/store/postgres/src/lib.rs
@@ -32,6 +32,7 @@ pub mod query_store;
 mod relational;
 mod relational_queries;
 mod retry;
+mod sql;
 mod store;
 mod store_events;
 mod subgraph_store;
diff --git a/store/postgres/src/query_store.rs b/store/postgres/src/query_store.rs
index 8fc2da822e4..f6b2a22712c 100644
--- a/store/postgres/src/query_store.rs
+++ b/store/postgres/src/query_store.rs
@@ -2,9 +2,10 @@ use std::collections::HashMap;
 use std::time::Instant;
 
 use crate::deployment_store::{DeploymentStore, ReplicaId};
+use crate::sql::Parser;
 use graph::components::store::{DeploymentId, QueryPermit, QueryStore as QueryStoreTrait};
 use graph::data::query::Trace;
-use graph::data::store::QueryObject;
+use graph::data::store::{QueryObject, SqlQueryObject};
 use graph::prelude::*;
 use graph::schema::{ApiSchema, InputSchema};
 
@@ -16,6 +17,7 @@ pub(crate) struct QueryStore {
     store: Arc<DeploymentStore>,
     chain_store: Arc<crate::ChainStore>,
     api_version: Arc<ApiVersion>,
+    sql_parser: Result<Parser, StoreError>,
 }
 
 impl QueryStore {
@@ -26,12 +28,16 @@ impl QueryStore {
         replica_id: ReplicaId,
         api_version: Arc<ApiVersion>,
     ) -> Self {
+        let sql_parser = store
+            .find_layout(site.clone())
+            .map(|layout| Parser::new(layout));
         QueryStore {
             site,
             replica_id,
             store,
             chain_store,
             api_version,
+            sql_parser,
         }
     }
 }
@@ -57,6 +63,25 @@ impl QueryStoreTrait for QueryStore {
             })
     }
 
+    fn execute_sql(
+        &self,
+        sql: &str,
+    ) -> Result<Vec<SqlQueryObject>, graph::prelude::QueryExecutionError> {
+        let mut conn = self
+            .store
+            .get_replica_conn(self.replica_id)
+            .map_err(|e| QueryExecutionError::SqlError(format!("SQL error: {}", e)))?;
+
+        let parser = self
+            .sql_parser
+            .as_ref()
+            .map_err(|e| QueryExecutionError::SqlError(format!("SQL error: {}", e)))?;
+
+        let sql = parser.parse_and_validate(sql)?;
+
+        self.store.execute_sql(&mut conn, &sql)
+    }
+
     /// Return true if the deployment with the given id is fully synced,
     /// and return false otherwise. Errors from the store are passed back up
     async fn is_deployment_synced(&self) -> Result<bool, Error> {
diff --git a/store/postgres/src/relational_queries.rs b/store/postgres/src/relational_queries.rs
index 19f9400c470..ed048699a1a 100644
--- a/store/postgres/src/relational_queries.rs
+++ b/store/postgres/src/relational_queries.rs
@@ -14,6 +14,8 @@ use diesel::sql_types::{Array, BigInt, Binary, Bool, Int8, Integer, Jsonb, Text,
 use diesel::QuerySource as _;
 use graph::components::store::write::{EntityWrite, RowGroup, WriteChunk};
 use graph::components::store::{Child as StoreChild, DerivedEntityQuery};
+
+use graph::data::graphql::IntoValue;
 use graph::data::store::{Id, IdType, NULL};
 use graph::data::store::{IdList, IdRef, QueryObject};
 use graph::data::value::{Object, Word};
@@ -439,6 +441,47 @@ pub fn parse_id(id_type: IdType, json: serde_json::Value) -> Result<Id, StoreErr
     }
 }
 
+#[derive(QueryableByName, Debug)]
+pub struct JSONData {
+    #[diesel(sql_type = Jsonb)]
+    pub data: serde_json::Value,
+}
+
+impl IntoValue for JSONData {
+    fn into_value(self) -> r::Value {
+        JSONData::to_value(self.data)
+    }
+}
+
+impl JSONData {
+    pub fn to_value(data: serde_json::Value) -> r::Value {
+        match data {
+            serde_json::Value::Null => r::Value::Null,
+            serde_json::Value::Bool(b) => r::Value::Boolean(b),
+            serde_json::Value::Number(n) => {
+                if let Some(i) = n.as_i64() {
+                    r::Value::Int(i)
+                } else {
+                    r::Value::Float(n.as_f64().unwrap())
+                }
+            }
+            serde_json::Value::String(s) => r::Value::String(s),
+            serde_json::Value::Array(vals) => {
+                let vals: Vec<_> = vals.into_iter().map(JSONData::to_value).collect::<Vec<_>>();
+                r::Value::List(vals)
+            }
+            serde_json::Value::Object(map) => {
+                let mut m = std::collections::BTreeMap::new();
+                for (k, v) in map {
+                    let value = JSONData::to_value(v);
+                    m.insert(Word::from(k), value);
+                }
+                r::Value::object(m)
+            }
+        }
+    }
+}
+
 /// Helper struct for retrieving entities from the database. With diesel, we
 /// can only run queries that return columns whose number and type are known
 /// at compile time. Because of that, we retrieve the actual data for an
diff --git a/store/postgres/src/sql/constants.rs b/store/postgres/src/sql/constants.rs
new file mode 100644
index 00000000000..b24f191f938
--- /dev/null
+++ b/store/postgres/src/sql/constants.rs
@@ -0,0 +1,435 @@
+use std::collections::HashSet;
+
+use lazy_static::lazy_static;
+use sqlparser::dialect::PostgreSqlDialect;
+
+lazy_static! {
+    pub(super) static ref ALLOWED_FUNCTIONS: HashSet<&'static str> = {
+       vec![
+            // Comparison Functions see https://www.postgresql.org/docs/14/functions-comparison.html#FUNCTIONS-COMPARISON-FUNC-TABLE
+            "num_nonnulls", // Number of non-null arguments
+            "num_nulls", // Number of null arguments
+
+            // Mathematical Functions see https://www.postgresql.org/docs/14/functions-math.html#FUNCTIONS-MATH-FUNC-TABLE
+            "abs", // Asolute value
+            "cbrt", // Cube root
+            "ceil", // Nearest integer greater than or equal to argument
+            "ceiling", // Nearest integer greater than or equal to argument
+            "degrees", // Converts radians to degrees
+            "div", // Integer quotient of y/x (truncates towards zero)
+            "exp", // Exponential (e raised to the given power)
+            "factorial", // Factorial
+            "floor", // Nearest integer less than or equal to argument
+            "gcd", // Greatest common divisor (the largest positive number that divides both inputs with no remainder); returns 0 if both inputs are zero; available for integer, bigint, and numeric
+            "lcm", // Least common multiple (the smallest strictly positive number that is an integral multiple of both inputs); returns 0 if either input is zero; available for integer, bigint, and numeric
+            "ln", // Natural logarithm
+            "log", // Base 10 logarithm
+            "log10", // Base 10 logarithm (same as log)
+            "mod", // Remainder of y/x; available for smallint, integer, bigint, and numeric
+            "pi", // Approximate value of π
+            "power", // a raised to the power of b
+            "radians", // Converts degrees to radians
+            "round", // Rounds to nearest integer. For numeric, ties are broken by rounding away from zero. For double precision, the tie-breaking behavior is platform dependent, but “round to nearest even” is the most common rule.
+            "scale", // Scale of the argument (the number of decimal digits in the fractional part)
+            "sign", // Sign of the argument (-1, 0, or +1)
+            "sqrt", // Square root
+            "trim_scale", // Reduces the value's scale (number of fractional decimal digits) by removing trailing zeroes
+            "trunc", // Truncates to integer (towards zero)
+            "width_bucket", // Returns the number of the bucket in which operand falls in a histogram having count equal-width buckets spanning the range low to high. Returns 0 or count+1 for an input outside that range.
+
+            // Random Functions see https://www.postgresql.org/docs/14/functions-math.html#FUNCTIONS-MATH-RANDOM-TABLE
+            "random", // Returns a random value in the range 0.0 <= x < 1.0
+            "setseed", // Sets the seed for subsequent random() calls; argument must be between -1.0 and 1.0, inclusive
+
+            // Trigonometric Functions see https://www.postgresql.org/docs/14/functions-math.html#FUNCTIONS-MATH-TRIG-TABLE
+            "acos", // Arc cosine, result in radians
+            "acosd", // Arc cosine, result in degrees
+            "asin", // Arc sine, result in radians
+            "asind", // Arc sine, result in degrees
+            "atan", // Arc tangent, result in radians
+            "atand", // Arc tangent, result in degrees
+            "atan2", // Arc tangent of y/x, result in radians
+            "atan2d", // Arc tangent of y/x, result in degrees
+            "cos", // Cosine, argument in radians
+            "cosd", // Cosine, argument in degrees
+            "cot", // Cotangent, argument in radians
+            "cotd", // Cotangent, argument in degrees
+            "sin", // Sine, argument in radians
+            "sind", // Sine, argument in degrees
+            "tan", // Tangent, argument in radians
+            "tand", // Tangent, argument in degrees
+
+            // Hyperbolic Functions see https://www.postgresql.org/docs/14/functions-math.html#FUNCTIONS-MATH-HYPERBOLIC-TABLE
+            "sinh", // Hyperbolic sine
+            "cosh", // Hyperbolic cosine
+            "tanh", // Hyperbolic tangent
+            "asinh", // Inverse hyperbolic sine
+            "acosh", // Inverse hyperbolic cosine
+            "atanh", // Inverse hyperbolic tangent
+
+            // String Functions see https://www.postgresql.org/docs/14/functions-string.html#FUNCTIONS-STRING-SQL
+            "bit_length", // Number of bits in string
+            "char_length", // Number of characters in string
+            "character_length", // Synonym for char_length
+            "lower", // Convert string to lower case
+            "normalize", // Convert string to specified Unicode normalization form
+            "octet_length", // Number of bytes in string
+            "overlay", // Replace substring
+            "position", // Location of specified substring
+            "substring", // Extract substring
+            "trim", // Remove leading and trailing characters
+            "upper", // Convert string to upper case
+
+            //Additional string functions see https://www.postgresql.org/docs/14/functions-string.html#FUNCTIONS-STRING-OTHER
+            "ascii", // Convert first character to its numeric code
+            "btrim", // Remove the longest string containing only characters from characters (a space by default) from the start and end of string
+            "chr", // Convert integer to character
+            "concat", // Concatenate strings
+            "concat_ws", // Concatenate with separator
+            "format", // Format arguments according to a format string
+            "initcap", // Convert first letter of each word to upper case and the rest to lower case
+            "left", // Extract substring
+            "length", // Number of characters in string
+            "lpad", // Pad string to length length by prepending the characters fill (a space by default)
+            "ltrim", // Remove the longest string containing only characters from characters (a space by default) from the start of string
+            "md5", // Compute MD5 hash
+            "parse_ident", // Split qualified_identifier into an array of identifiers, removing any quoting of individual identifiers
+            "quote_ident", // Returns the given string suitably quoted to be used as an identifier in an SQL statement string
+            "quote_literal", // Returns the given string suitably quoted to be used as a string literal in an SQL statement string
+            "quote_nullable", // Returns the given string suitably quoted to be used as a string literal in an SQL statement string; or, if the argument is null, returns NULL
+            "regexp_match", // Returns captured substrings resulting from the first match of a POSIX regular expression to the string
+            "regexp_matches", // Returns captured substrings resulting from the first match of a POSIX regular expression to the string, or multiple matches if the g flag is used
+            "regexp_replace", // Replaces substrings resulting from the first match of a POSIX regular expression, or multiple substring matches if the g flag is used
+            "regexp_split_to_array", // Splits string using a POSIX regular expression as the delimiter, producing an array of results
+            "regexp_split_to_table", // Splits string using a POSIX regular expression as the delimiter, producing a set of results
+            "repeat", // Repeats string the specified number of times
+            "replace", // Replaces all occurrences in string of substring from with substring to
+            "reverse", // Reverses the order of the characters in the string
+            "right", // Extract substring
+            "rpad", // Pad string to length length by appending the characters fill (a space by default)
+            "rtrim", // Remove the longest string containing only characters from characters (a space by default) from the end of string
+            "split_part", // Splits string at occurrences of delimiter and returns the n'th field (counting from one), or when n is negative, returns the |n|'th-from-last field
+            "strpos", // Returns first starting index of the specified substring within string, or zero if it's not present
+            "substr", // Extracts the substring of string starting at the start'th character, and extending for count characters if that is specified
+            "starts_with", // Returns true if string starts with prefix
+            "string_to_array", // Splits the string at occurrences of delimiter and forms the resulting fields into a text array
+            "string_to_table", // Splits the string at occurrences of delimiter and returns the resulting fields as a set of text rows
+            "to_ascii", // Converts string to ASCII from another encoding, which may be identified by name or number
+            "to_hex", // Converts the number to its equivalent hexadecimal representation
+            "translate", // Replaces each character in string that matches a character in the from set with the corresponding character in the to set
+            "unistr", // Evaluate escaped Unicode characters in the argument
+
+            // Binary String Functions see https://www.postgresql.org/docs/14/functions-binarystring.html#FUNCTIONS-BINARYSTRING-OTHER
+            "bit_count", // Number of bits set in the argument
+            "get_bit", // Extracts the n'th bit from string
+            "get_byte", // Extracts the n'th byte from string
+            "set_bit", // Sets the n'th bit in string to newvalue
+            "set_byte", // Sets the n'th byte in string to newvalue
+            "sha224", // Compute SHA-224 hash
+            "sha256", // Compute SHA-256 hash
+            "sha384", // Compute SHA-384 hash
+            "sha512", // Compute SHA-512 hash
+
+            // String conversion functions see https://www.postgresql.org/docs/14/functions-binarystring.html#FUNCTIONS-BINARYSTRING-CONVERSIONS
+            "convert", // Converts a binary string representing text in encoding src_encoding to a binary string in encoding dest_encoding
+            "convert_from", // Converts a binary string representing text in encoding src_encoding to text in the database encoding
+            "convert_to", // Converts a text string (in the database encoding) to a binary string encoded in encoding dest_encoding
+            "encode", // Encodes binary data into a textual representation
+            "decode", // Decodes binary data from a textual representation
+
+            // Formatting Functions see https://www.postgresql.org/docs/14/functions-formatting.html#FUNCTIONS-FORMATTING-TABLE
+            "to_char", // Converts number to a string according to the given format
+            "to_date", // Converts string to date
+            "to_number", // Converts string to number
+            "to_timestamp", // Converts string to timestamp with time zone
+
+            // Date/Time Functions see https://www.postgresql.org/docs/14/functions-datetime.html
+            "age", // Subtract arguments, producing a “symbolic” result that uses years and months, rather than just days
+            "clock_timestamp", // Current date and time (changes during statement execution)
+            "current_date", // Current date
+            "current_time", // Current time of day
+            "current_timestamp", // Current date and time (start of current transaction)
+            "date_bin", // Bin input into specified interval aligned with specified origin
+            "date_part", // Get subfield (equivalent to extract)
+            "date_trunc", // Truncate to specified precision
+            "extract", // Get subfield
+            "isfinite", // Test for finite date (not +/-infinity)
+            "justify_days", // Adjust interval so 30-day time periods are represented as months
+            "justify_hours", // Adjust interval so 24-hour time periods are represented as days
+            "justify_interval", // Adjust interval using justify_days and justify_hours, with additional sign adjustments
+            "localtime", // Current time of day
+            "localtimestamp", // Current date and time (start of current transaction)
+            "make_date", // Create date from year, month and day fields (negative years signify BC)
+            "make_interval", // Create interval from years, months, weeks, days, hours, minutes and seconds fields, each of which can default to zero
+            "make_time", // Create time from hour, minute and seconds fields
+            "make_timestamp", // Create timestamp from year, month, day, hour, minute and seconds fields (negative years signify BC)
+            "make_timestamptz", // Create timestamp with time zone from year, month, day, hour, minute and seconds fields (negative years signify BC).
+            "now", // Current date and time (start of current transaction)
+            "statement_timestamp", // Current date and time (start of current statement)
+            "timeofday", // Current date and time (like clock_timestamp, but as a text string)
+            "transaction_timestamp", // Current date and time (start of current transaction)
+
+            // Enum support functions see https://www.postgresql.org/docs/14/functions-enum.html#FUNCTIONS-ENUM-SUPPORT
+            "enum_first", // Returns the first value of an enum type
+            "enum_last", // Returns the last value of an enum type
+            "enum_range", // Returns a range of values of an enum type
+
+            // Geometric Functions see https://www.postgresql.org/docs/14/functions-geometry.html
+            "area", // Computes area
+            "center", // Computes center point
+            "diagonal", // Extracts box's diagonal as a line segment (same as lseg(box))
+            "diameter", // Computes diameter of circle
+            "height", // Computes vertical size of box
+            "isclosed", // Is path closed?
+            "isopen", // Is path open?
+            "length", // Computes the total length
+            "npoints", // Returns the number of points
+            "pclose", // Converts path to closed form
+            "popen", // Converts path to open form
+            "radius", // Computes radius of circle
+            "slope", // Computes slope of a line drawn through the two points
+            "width", // Computes horizontal size of box
+
+            // Geometric Type Conversion Functions see https://www.postgresql.org/docs/14/functions-geometry.html
+            "box", // Convert to a box
+            "circle", // Convert to a circle
+            "line", // Convert to a line
+            "lseg", // Convert to a line segment
+            "path", // Convert to a path
+            "point", // Convert to a point
+            "polygon", // Convert to a polygon
+
+            // IP Address Functions see https://www.postgresql.org/docs/14/functions-net.html
+            "abbrev", // Creates an abbreviated display format as text
+            "broadcast", // Computes the broadcast address for the address's network
+            "family", // Returns the address's family: 4 for IPv4, 6 for IPv6
+            "host", // Returns the IP address as text, ignoring the netmask
+            "hostmask", // Computes the host mask for the address's network
+            "inet_merge", // Computes the smallest network that includes both of the given networks
+            "inet_same_family", // Tests whether the addresses belong to the same IP family
+            "masklen", // Returns the netmask length in bits
+            "netmask", // Computes the network mask for the address's network
+            "network", // Returns the network part of the address, zeroing out whatever is to the right of the netmask
+            "set_masklen", // Sets the netmask length for an inet value. The address part does not change
+            "text", // Returns the unabbreviated IP address and netmask length as text
+
+            // MAC Address Functions see https://www.postgresql.org/docs/14/functions-net.html#MACADDR-FUNCTIONS-TABLE
+            "macaddr8_set7bit", //Sets the 7th bit of the address to one, creating what is known as modified EUI-64, for inclusion in an IPv6 address.
+
+            // Text Search Functions see https://www.postgresql.org/docs/14/functions-textsearch.html
+            "array_to_tsvector", // Converts an array of lexemes to a tsvector
+            "get_current_ts_config", // Returns the OID of the current default text search configuration (as set by default_text_search_config)
+            "numnode", // Returns the number of lexemes plus operators in the tsquery
+            "plainto_tsquery", // Converts text to a tsquery, normalizing words according to the specified or default configuration.
+            "phraseto_tsquery", // Converts text to a tsquery, normalizing words according to the specified or default configuration.
+            "websearch_to_tsquery", // Converts text to a tsquery, normalizing words according to the specified or default configuration.
+            "querytree", // Produces a representation of the indexable portion of a tsquery. A result that is empty or just T indicates a non-indexable query.
+            "setweight", // Assigns the specified weight to each element of the vector.
+            "strip", // Removes positions and weights from the tsvector.
+            "to_tsquery", // Converts text to a tsquery, normalizing words according to the specified or default configuration.
+            "to_tsvector", // Converts text to a tsvector, normalizing words according to the specified or default configuration.
+            "json_to_tsvector", // Selects each item in the JSON document that is requested by the filter and converts each one to a tsvector, normalizing words according to the specified or default configuration.
+            "jsonb_to_tsvector",// Selects each item in the JSON document that is requested by the filter and converts each one to a tsvector, normalizing words according to the specified or default configuration.
+            "ts_delete", // Removes any occurrence of the given lexeme from the vector.
+            "ts_filter", // Selects only elements with the given weights from the vector.
+            "ts_headline", // Displays, in an abbreviated form, the match(es) for the query in the document, which must be raw text not a tsvector.
+            "ts_rank", // Computes a score showing how well the vector matches the query. See Section 12.3.3 for details.
+            "ts_rank_cd", // Computes a score showing how well the vector matches the query, using a cover density algorithm. See Section 12.3.3 for details.
+            "ts_rewrite", // Replaces occurrences of target with substitute within the query. See Section
+            "tsquery_phrase", // Constructs a phrase query that searches for matches of query1 and query2 at successive lexemes (same as <-> operator).
+            "tsvector_to_array", // Converts a tsvector to an array of lexemes.
+
+            // Text search debugging functions see https://www.postgresql.org/docs/14/functions-textsearch.html#TEXTSEARCH-FUNCTIONS-DEBUG-TABLE
+            "ts_debug", // Extracts and normalizes tokens from the document according to the specified or default text search configuration, and returns information about how each token was processed. See Section 12.8.1 for details.
+            "ts_lexize", // Returns an array of replacement lexemes if the input token is known to the dictionary, or an empty array if the token is known to the dictionary but it is a stop word, or NULL if it is not a known word. See Section 12.8.3 for details.
+            "ts_parse", // Extracts tokens from the document using the named parser. See Section 12.8.2 for details.
+            "ts_token_type", // Returns a table that describes each type of token the named parser can recognize. See Section 12.8.2 for details.
+
+            // UUID Functions see https://www.postgresql.org/docs/14/functions-uuid.html
+            "gen_random_uuid", // Generate a version 4 (random) UUID
+
+            // XML Functions see https://www.postgresql.org/docs/14/functions-xml.html
+            "xmlcomment", // Creates an XML comment
+            "xmlconcat", // Concatenates XML values
+            "xmlelement", // Creates an XML element
+            "xmlforest", // Creates an XML forest (sequence) of elements
+            "xmlpi", // Creates an XML processing instruction
+            "xmlagg", // Concatenates the input values to the aggregate function call, much like xmlconcat does, except that concatenation occurs across rows rather than across expressions in a single row.
+            "xmlexists", // Evaluates an XPath 1.0 expression (the first argument), with the passed XML value as its context item.
+            "xml_is_well_formed", // Checks whether the argument is a well-formed XML document or fragment.
+            "xml_is_well_formed_content", // Checks whether the argument is a well-formed XML document or fragment, and that it contains no document type declaration.
+            "xml_is_well_formed_document", // Checks whether the argument is a well-formed XML document.
+            "xpath", // Evaluates the XPath 1.0 expression xpath (given as text) against the XML value xml.
+            "xpath_exists", // Evaluates the XPath 1.0 expression xpath (given as text) against the XML value xml, and returns true if the expression selects at least one node, otherwise false.
+            "xmltable", // Expands an XML value into a table whose columns match the rowtype defined by the function's parameter list.
+            "table_to_xml", // Converts a table to XML.
+            "cursor_to_xml", // Converts a cursor to XML.
+
+                // JSON and JSONB creation functions see https://www.postgresql.org/docs/14/functions-json.html#FUNCTIONS-JSON-CREATION-TABLE
+            "to_json", // Converts any SQL value to JSON.
+            "to_jsonb", // Converts any SQL value to JSONB.
+            "array_to_json", // Converts an SQL array to a JSON array.
+            "row_to_json", // Converts an SQL composite value to a JSON object.
+            "json_build_array", // Builds a possibly-heterogeneously-typed JSON array out of a variadic argument list.
+            "jsonb_build_array", // Builds a possibly-heterogeneously-typed JSON array out of a variadic argument list.
+            "json_build_object", // Builds a JSON object out of a variadic argument list.
+            "json_object", // Builds a JSON object out of a text array.
+            "jsonb_object", // Builds a JSONB object out of a text array.
+
+            // JSON and JSONB processing functions see https://www.postgresql.org/docs/14/functions-json.html#FUNCTIONS-JSON-PROCESSING-TABLE
+            "json_array_elements", // Expands the top-level JSON array into a set of JSON values.
+            "jsonb_array_elements", // Expands the top-level JSON array into a set of JSONB values.
+            "json_array_elements_text", // Expands the top-level JSON array into a set of text values.
+            "jsonb_array_elements_text", // Expands the top-level JSONB array into a set of text values.
+            "json_array_length", // Returns the number of elements in the top-level JSON array.
+            "jsonb_array_length", // Returns the number of elements in the top-level JSONB array.
+            "json_each", // Expands the top-level JSON object into a set of key/value pairs.
+            "jsonb_each", // Expands the top-level JSONB object into a set of key/value pairs.
+            "json_each_text", // Expands the top-level JSON object into a set of key/value pairs. The returned values will be of type text.
+            "jsonb_each_text", // Expands the top-level JSONB object into a set of key/value pairs. The returned values will be of type text.
+            "json_extract_path", // Extracts JSON sub-object at the specified path.
+            "jsonb_extract_path", // Extracts JSONB sub-object at the specified path.
+            "json_extract_path_text", // Extracts JSON sub-object at the specified path as text.
+            "jsonb_extract_path_text", // Extracts JSONB sub-object at the specified path as text.
+            "json_object_keys", // Returns the set of keys in the top-level JSON object.
+            "jsonb_object_keys", // Returns the set of keys in the top-level JSONB object.
+            "json_populate_record", // Expands the top-level JSON object to a row having the composite type of the base argument.
+            "jsonb_populate_record", // Expands the top-level JSON object to a row having the composite type of the base argument.
+            "json_populate_recordset", // Expands the top-level JSON array of objects to a set of rows having the composite type of the base argument.
+            "jsonb_populate_recordset", // Expands the top-level JSONB array of objects to a set of rows having the composite type of the base argument.
+            "json_to_record", // Expands the top-level JSON object to a row having the composite type defined by an AS clause.
+            "jsonb_to_record", // Expands the top-level JSONB object to a row having the composite type defined by an AS clause.
+            "json_to_recordset", // Expands the top-level JSON array of objects to a set of rows having the composite type defined by an AS clause.
+            "jsonb_to_recordset", // Expands the top-level JSONB array of objects to a set of rows having the composite type defined by an AS clause.
+            "json_strip_nulls", // Deletes all object fields that have null values from the given JSON value, recursively.
+            "jsonb_strip_nulls", // Deletes all object fields that have null values from the given JSONB value, recursively.
+            "jsonb_set", // Returns target with the item designated by path replaced by new_value, or with new_value added if create_if_missing is true (which is the default) and the item designated by path does not exist.
+            "jsonb_set_lax", // If new_value is not NULL, behaves identically to jsonb_set. Otherwise behaves according to the value of null_value_treatment which must be one of 'raise_exception', 'use_json_null', 'delete_key', or 'return_target'. The default is 'use_json_null'.
+            "jsonb_insert", //Returns target with new_value inserted.
+            "jsonb_path_exists", // Checks whether the JSON path returns any item for the specified JSON value.
+            "jsonb_path_match", // Returns the result of a JSON path predicate check for the specified JSON value.
+            "jsonb_path_query", // Returns all JSON items returned by the JSON path for the specified JSON value.
+            "jsonb_path_query_array", // Returns all JSON items returned by the JSON path for the specified JSON value, as a JSON array.
+            "jsonb_path_query_first", // Returns the first JSON item returned by the JSON path for the specified JSON value. Returns NULL if there are no results.
+            "jsonb_path_exists_tz", // Support comparisons of date/time values that require timezone-aware conversions.
+            "jsonb_path_match_tz", // Support comparisons of date/time values that require timezone-aware conversions.
+            "jsonb_path_query_tz", // Support comparisons of date/time values that require timezone-aware conversions.
+            "jsonb_path_query_array_tz", // Support comparisons of date/time values that require timezone-aware conversions.
+            "jsonb_path_query_first_tz", // Support comparisons of date/time values that require timezone-aware conversions.
+            "jsonb_pretty", // Converts the given JSON value to pretty-printed, indented text.
+            "json_typeof", // Returns the type of the top-level JSON value as a text string.
+            "jsonb_typeof", // Returns the type of the top-level JSONB value as a text string.
+
+            // Conditional Expressions hhttps://www.postgresql.org/docs/14/functions-conditional.html
+            "coalesce", // Return first non-null argument.
+            "nullif", // Return null if two arguments are equal, otherwise return the first argument.
+            "greatest", // Return greatest of a list of values.
+            "least", // Return smallest of a list of values.
+
+            // Array Functions https://www.postgresql.org/docs/14/functions-array.html#ARRAY-FUNCTIONS-TABLE
+            "array_append", // Appends an element to the end of an array (same as the || operator).
+            "array_cat", // Concatenates two arrays (same as the || operator).
+            "array_dims", // Returns a text representation of the array's dimensions.
+            "array_fill", // Returns an array filled with copies of the given value, having dimensions of the lengths specified by the second argument. The optional third argument supplies lower-bound values for each dimension (which default to all 1).
+            "array_length", // Returns the length of the requested array dimension. (Produces NULL instead of 0 for empty or missing array dimensions.)
+            "array_lower", // Returns the lower bound of the requested array dimension.
+            "array_ndims", // Returns the number of dimensions of the array.
+            "array_position", // Returns the subscript of the first occurrence of the second argument in the array, or NULL if it's not present.
+            "array_prepend", // Prepends an element to the beginning of an array (same as the || operator).
+            "array_remove", // Removes all elements equal to the given value from the array. The array must be one-dimensional. Comparisons are done using IS NOT DISTINCT FROM semantics, so it is possible to remove NULLs.
+            "array_replace", // Replaces each array element equal to the second argument with the third argument.
+            "array_to_string", // Converts each array element to its text representation, and concatenates those separated by the delimiter string. If null_string is given and is not NULL, then NULL array entries are represented by that string; otherwise, they are omitted.
+            "array_upper", // Returns the upper bound of the requested array dimension.
+            "cardinality", // Returns the total number of elements in the array, or 0 if the array is empty.
+            "trim_array", // Trims an array by removing the last n elements. If the array is multidimensional, only the first dimension is trimmed.
+            "unnest", // Expands an array into a set of rows. The array's elements are read out in storage order.
+
+            // Range Functions https://www.postgresql.org/docs/14/functions-range.html#RANGE-FUNCTIONS-TABLE
+            "lower", // Extracts the lower bound of the range (NULL if the range is empty or the lower bound is infinite).
+            "upper", // Extracts the upper bound of the range (NULL if the range is empty or the upper bound is infinite).
+            "isempty", // Is the range empty?
+            "lower_inc", // Is the range's lower bound inclusive?
+            "upper_inc", // Is the range's upper bound inclusive?
+            "lower_inf", // Is the range's lower bound infinite?
+            "upper_inf", // Is the range's upper bound infinite?
+            "range_merge", // Computes the smallest range that includes both of the given ranges.
+
+            // Multi-range Functions https://www.postgresql.org/docs/14/functions-range.html#MULTIRANGE-FUNCTIONS-TABLE
+            "multirange", // Returns a multirange containing just the given range.
+
+            // General purpose aggregate functions https://www.postgresql.org/docs/14/functions-aggregate.html#FUNCTIONS-AGGREGATE-TABLE
+            "array_agg", // Collects all the input values, including nulls, into an array.
+            "avg", // Computes the average (arithmetic mean) of all the non-null input values.
+            "bit_and", // Computes the bitwise AND of all non-null input values.
+            "bit_or", // Computes the bitwise OR of all non-null input values.
+            "bit_xor", // Computes the bitwise exclusive OR of all non-null input values. Can be useful as a checksum for an unordered set of values.
+            "bool_and", // Returns true if all non-null input values are true, otherwise false.
+            "bool_or", // Returns true if any non-null input value is true, otherwise false.
+            "count", // Computes the number of input rows.
+            "every", // This is the SQL standard's equivalent to bool_and.
+            "json_agg", // Collects all the input values, including nulls, into a JSON array. Values are converted to JSON as per to_json or to_jsonb.
+            "json_object_agg", // Collects all the key/value pairs into a JSON object. Key arguments are coerced to text; value arguments are converted as per to_json or to_jsonb. Values can be null, but not keys.
+            "max", // Computes the maximum of the non-null input values. Available for any numeric, string, date/time, or enum type, as well as inet, interval, money, oid, pg_lsn, tid, and arrays of any of these types.
+            "min", // Computes the minimum of the non-null input values. Available for any numeric, string, date/time, or enum type, as well as inet, interval, money, oid, pg_lsn, tid, and arrays of any of these types.
+            "range_agg", // Computes the union of the non-null input values.
+            "range_intersect_agg", // Computes the intersection of the non-null input values.
+            "string_agg", // Concatenates the non-null input values into a string. Each value after the first is preceded by the corresponding delimiter (if it's not null).
+            "sum", // Computes the sum of the non-null input values.
+            "xmlagg", // Concatenates the non-null XML input values.
+
+            // Statistical aggregate functions https://www.postgresql.org/docs/14/functions-aggregate.html#FUNCTIONS-AGGREGATE-STATISTICS-TABLE
+            "corr", // Computes the correlation coefficient.
+            "covar_pop", // Computes the population covariance.
+            "covar_samp", // Computes the sample covariance.
+            "regr_avgx", // Computes the average of the independent variable, sum(X)/N.
+            "regr_avgy", // Computes the average of the dependent variable, sum(Y)/N.
+            "regr_count", // Computes the number of rows in which both inputs are non-null.
+            "regr_intercept", // Computes the y-intercept of the least-squares-fit linear equation determined by the (X, Y) pairs.
+            "regr_r2", // Computes the square of the correlation coefficient.
+            "regr_slope", // Computes the slope of the least-squares-fit linear equation determined by the (X, Y) pairs.
+            "regr_sxx", // Computes the “sum of squares” of the independent variable, sum(X^2) - sum(X)^2/N.
+            "regr_sxy", // Computes the “sum of products” of independent times dependent variables, sum(X*Y) - sum(X) * sum(Y)/N.
+            "regr_syy", // Computes the “sum of squares” of the dependent variable, sum(Y^2) - sum(Y)^2/N.
+            "stddev", // This is a historical alias for stddev_samp.
+            "stddev_pop", // Computes the population standard deviation of the input values.
+            "stddev_samp", // Computes the sample standard deviation of the input values.
+            "variance", // This is a historical alias for var_samp.
+            "var_pop", // Computes the population variance of the input values (square of the population standard deviation).
+            "var_samp", // Computes the sample variance of the input values (square of the sample standard deviation).
+
+            // Ordered-set aggregate functions https://www.postgresql.org/docs/14/functions-aggregate.html#FUNCTIONS-AGGREGATE-ORDEREDSET-TABLE
+            "mode", // Computes the mode (most frequent value) of the input values.
+            "percentile_cont", // Computes the continuous percentile of the input values.
+            "percentile_disc", // Computes the discrete percentile of the input values.
+
+            // Hypothetical-set aggregate functions https://www.postgresql.org/docs/14/functions-aggregate.html#FUNCTIONS-AGGREGATE-HYPOTHETICAL-TABLE
+            "rank", // Computes the rank of the current row with gaps; same as row_number of its first peer.
+            "dense_rank", // Computes the rank of the current row without gaps; this function counts peer groups.
+            "percent_rank", // Computes the relative rank (percentile) of the current row: (rank - 1) / (total partition rows - 1).
+            "cume_dist", // Computes the relative rank of the current row: (number of partition rows preceding or peer with current row) / (total partition rows).
+
+            // Grouping set aggregate functions https://www.postgresql.org/docs/14/functions-aggregate.html#FUNCTIONS-AGGREGATE-GROUPINGSET-TABLE
+            "grouping", // Returns a bit mask indicating which GROUP BY expressions are not included in the current grouping set.
+
+            // Window functions https://www.postgresql.org/docs/14/functions-window.html#FUNCTIONS-WINDOW-TABLE
+            "row_number", // Number of the current row within its partition, counting from 1.
+            "ntile", // Integer ranging from 1 to the argument value, dividing the partition as equally as possible.
+            "lag", // Returns value evaluated at the row that is offset rows before the current row within the partition; if there is no such row, instead returns default (which must be of a type compatible with value).
+            "lead", // Returns value evaluated at the row that is offset rows after the current row within the partition; if there is no such row, instead returns default (which must be of a type compatible with value).
+            "first_value", // Returns value evaluated at the row that is the first row of the window frame.
+            "last_value", // Returns value evaluated at the row that is the last row of the window frame.
+            "nth_value", // Returns value evaluated at the row that is the n'th row of the window frame (counting from 1); returns NULL if there is no such row.
+
+            // Set returning functions https://www.postgresql.org/docs/14/functions-srf.html
+            "generate_series", // Expands range arguments into a set of rows.
+            "generate_subscripts", // Expands array arguments into a set of rows.
+
+            // Abbreivated syntax for common functions
+            "pow", // see power function
+            "date", // see to_date
+
+       ].into_iter().collect()
+    };
+}
+
+pub(super) static SQL_DIALECT: PostgreSqlDialect = PostgreSqlDialect {};
diff --git a/store/postgres/src/sql/formatter.rs b/store/postgres/src/sql/formatter.rs
new file mode 100644
index 00000000000..dff810aa6ea
--- /dev/null
+++ b/store/postgres/src/sql/formatter.rs
@@ -0,0 +1,101 @@
+use sqlparser::ast::{ObjectName, Statement, TableFactor, VisitMut, VisitorMut};
+use std::ops::ControlFlow;
+
+use super::Schema;
+
+pub struct Formatter<'a> {
+    prelude: &'a str,
+    schema: &'a Schema,
+}
+
+impl<'a> Formatter<'a> {
+    pub fn new(prelude: &'a str, schema: &'a Schema) -> Self {
+        Self { prelude, schema }
+    }
+
+    fn prepend_prefix_to_object_name_mut(&self, name: &mut ObjectName) {
+        let table_identifier = &mut name.0;
+        // remove all but the last identifier
+        table_identifier.drain(0..table_identifier.len() - 1);
+
+        // Ensure schema tables has quotation to match up with prelude generated cte.
+        if let Some(table_name) = table_identifier.last_mut() {
+            if self.schema.contains_key(&table_name.value) {
+                table_name.quote_style = Some('"');
+            }
+        }
+    }
+
+    pub fn format(&mut self, statement: &mut Statement) -> String {
+        statement.visit(self);
+
+        format!(
+            "{} SELECT to_jsonb(sub.*) AS data FROM ( {} ) AS sub",
+            self.prelude, statement
+        )
+    }
+}
+
+impl VisitorMut for Formatter<'_> {
+    type Break = ();
+
+    fn pre_visit_table_factor(
+        &mut self,
+        table_factor: &mut TableFactor,
+    ) -> ControlFlow<Self::Break> {
+        if let TableFactor::Table { name, .. } = table_factor {
+            self.prepend_prefix_to_object_name_mut(name);
+        }
+        ControlFlow::Continue(())
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::collections::HashSet;
+
+    use super::*;
+    use crate::sql::constants::SQL_DIALECT;
+    const CTE_PREFIX: &str = "WITH \"swap\" AS (
+            SELECT
+            id,
+            amount_in,
+            amount_out,
+            concat('0x',encode(token_in,'hex') as token_in,
+            concat('0x',token_out,'hex') AS token_out
+            FROM
+            sdg1.swap
+        )";
+
+    #[test]
+    fn format_sql() {
+        let mut schema = Schema::new();
+        schema.insert(
+            "swap".to_string(),
+            HashSet::from_iter(
+                ["id", "amount_in", "amount_out", "token_in", "token_out"]
+                    .into_iter()
+                    .map(|s| s.to_string()),
+            ),
+        );
+
+        let mut formatter = Formatter::new(CTE_PREFIX, &schema);
+
+        let sql = "SELECT token_in, SUM(amount_in) AS amount FROM unknown.swap GROUP BY token_in";
+
+        let mut statements = sqlparser::parser::Parser::parse_sql(&SQL_DIALECT, sql).unwrap();
+
+        let mut statement = statements.get_mut(0).unwrap();
+
+        let result = formatter.format(&mut statement);
+
+        assert_eq!(
+            result,
+            format!(
+                "{} SELECT to_jsonb(sub.*) AS data FROM ( {} ) AS sub",
+                CTE_PREFIX,
+                "SELECT token_in, SUM(amount_in) AS amount FROM \"swap\" GROUP BY token_in"
+            )
+        );
+    }
+}
diff --git a/store/postgres/src/sql/mod.rs b/store/postgres/src/sql/mod.rs
new file mode 100644
index 00000000000..d3962ae968e
--- /dev/null
+++ b/store/postgres/src/sql/mod.rs
@@ -0,0 +1,10 @@
+mod constants;
+mod formatter;
+mod parser;
+mod validation;
+
+use std::collections::{HashMap, HashSet};
+
+pub(self) type Schema = HashMap<String, HashSet<String>>; // HashMap<Table, HashSet<Column>>
+
+pub use parser::Parser;
diff --git a/store/postgres/src/sql/parser.rs b/store/postgres/src/sql/parser.rs
new file mode 100644
index 00000000000..c449803fe86
--- /dev/null
+++ b/store/postgres/src/sql/parser.rs
@@ -0,0 +1,191 @@
+use super::{constants::SQL_DIALECT, formatter::Formatter, validation::Validator};
+use crate::relational::{ColumnType, Layout};
+use anyhow::{anyhow, Ok, Result};
+use graph::components::store::BLOCK_NUMBER_MAX;
+use itertools::Itertools;
+use std::sync::Arc;
+
+pub fn generate_table_prelude_from_layout(layout: &Layout) -> String {
+    let schema = &layout.catalog.site.namespace;
+    let ctes = layout
+        .tables
+        .iter()
+        .filter(|(entity, _)| !entity.is_poi())
+        .map(|(_, table)| {
+            let table_name = table.name.as_str();
+
+            let (block_column, filter) = if !table.immutable {
+                (
+                    "block_range",
+                    Some(format!(" WHERE \"block_range\" @> {}", BLOCK_NUMBER_MAX)),
+                )
+            } else {
+                ("block$", None)
+            };
+
+            let columns = table
+                .columns
+                .iter()
+                .map(|col| {
+                    if !col.is_list() && col.column_type == ColumnType::Bytes {
+                        format!(
+                            r#"concat('0x', encode("{}", 'hex')) AS "{}""#,
+                            col.name.as_str(),
+                            col.name.as_str()
+                        )
+                    } else {
+                        format!(r#""{}""#, col.name.as_str())
+                    }
+                })
+                .chain(std::iter::once(format!(r#""{}""#, block_column)))
+                .collect::<Vec<_>>()
+                .join(", ");
+            format!(
+                "\"{table_name}\" AS (SELECT {columns} FROM \"{schema}\".\"{table_name}\"{})",
+                filter.unwrap_or_default()
+            )
+        })
+        .sorted()
+        .collect::<Vec<_>>()
+        .join(",\n");
+    format!("WITH {ctes}")
+}
+
+pub struct Parser {
+    schema: super::Schema,
+    prelude: String,
+}
+
+impl Parser {
+    pub fn new(layout: Arc<Layout>) -> Self {
+        Self {
+            schema: layout
+                .tables
+                .iter()
+                .filter(|(entity, _)| !entity.is_poi())
+                .map(|(_, table)| {
+                    (
+                        table.name.to_string(),
+                        table
+                            .columns
+                            .iter()
+                            .map(|column| column.name.to_string())
+                            .collect(),
+                    )
+                })
+                .collect(),
+            prelude: generate_table_prelude_from_layout(&layout),
+        }
+    }
+
+    pub fn parse_and_validate(&self, sql: &str) -> Result<String> {
+        let mut statements = sqlparser::parser::Parser::parse_sql(&SQL_DIALECT, sql)?;
+
+        let mut validator = Validator::new(&self.schema);
+        validator.validate_statements(&statements)?;
+
+        let mut formatter = Formatter::new(&self.prelude, &self.schema);
+
+        let statement = statements
+            .get_mut(0)
+            .ok_or_else(|| anyhow!("No SQL statements found"))?;
+
+        let result = formatter.format(statement);
+
+        Ok(result)
+    }
+}
+
+#[cfg(test)]
+mod test {
+
+    use crate::layout_for_tests::{make_dummy_site, Catalog, Namespace};
+
+    use super::*;
+    use graph::{data::subgraph::DeploymentHash, schema::InputSchema};
+
+    const TEST_GQL: &str = "
+        type SwapMulti @entity(immutable: true) {
+            id: Bytes!
+            sender: Bytes! # address
+            amountsIn: [BigInt!]! # uint256[]
+            tokensIn: [Bytes!]! # address[]
+            amountsOut: [BigInt!]! # uint256[]
+            tokensOut: [Bytes!]! # address[]
+            referralCode: BigInt! # uint32
+            blockNumber: BigInt!
+            blockTimestamp: BigInt!
+            transactionHash: Bytes!
+        }
+
+        type Token @entity {
+            id: ID!
+            address: Bytes! # address
+            symbol: String!
+            name: String!
+            decimals: Int!
+        }
+    ";
+
+    const NAMESPACE: &str = "sgd0815";
+
+    const SQL_QUERY: &str = "
+        with tokens as (
+            select * from (values
+            ('0x0000000000000000000000000000000000000000','ETH','Ethereum',18),
+            ('0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48','USDC','USD Coin',6)
+            ) as t(address,symbol,name,decimals)
+        )
+
+        select
+        date,
+        t.symbol,
+        SUM(amount)/pow(10,t.decimals) as amount
+        from (select
+        date(to_timestamp(block_timestamp) at time zone 'utc') as date,
+        token,
+        amount
+        from swap_multi as sm
+        ,unnest(sm.amounts_in,sm.tokens_in) as smi(amount,token)
+        union all
+        select
+        date(to_timestamp(block_timestamp) at time zone 'utc') as date,
+        token,
+        amount
+        from sgd1.swap_multi as sm
+        ,unnest(sm.amounts_out,sm.tokens_out) as smo(amount,token)
+        ) as tp
+        inner join tokens as t on t.address = '0x' || encode(tp.token,'hex')
+        group by tp.date,t.symbol,t.decimals
+        order by tp.date desc ,amount desc
+
+        ";
+
+    fn test_layout() -> Layout {
+        let subgraph = DeploymentHash::new("subgraph").unwrap();
+        let schema =
+            InputSchema::parse_latest(TEST_GQL, subgraph.clone()).expect("Test schema invalid");
+        let namespace = Namespace::new(NAMESPACE.to_owned()).unwrap();
+        let site = Arc::new(make_dummy_site(subgraph, namespace, "anet".to_string()));
+        let catalog =
+            Catalog::for_tests(site.clone(), Default::default()).expect("Can not create catalog");
+        Layout::new(site, &schema, catalog).expect("Failed to construct Layout")
+    }
+
+    #[test]
+    fn parse_sql() {
+        let parser = Parser::new(Arc::new(test_layout()));
+
+        let result = parser.parse_and_validate(SQL_QUERY);
+
+        assert!(result.is_ok());
+
+        let query = result.unwrap();
+
+        assert_eq!(
+            query,
+            r#"WITH "swap_multi" AS (SELECT concat('0x', encode("id", 'hex')) AS "id", concat('0x', encode("sender", 'hex')) AS "sender", "amounts_in", "tokens_in", "amounts_out", "tokens_out", "referral_code", "block_number", "block_timestamp", concat('0x', encode("transaction_hash", 'hex')) AS "transaction_hash", "block$" FROM "sgd0815"."swap_multi"),
+"token" AS (SELECT "id", concat('0x', encode("address", 'hex')) AS "address", "symbol", "name", "decimals", "block_range" FROM "sgd0815"."token" WHERE "block_range" @> 2147483647) SELECT to_jsonb(sub.*) AS data FROM ( WITH tokens AS (SELECT * FROM (VALUES ('0x0000000000000000000000000000000000000000', 'ETH', 'Ethereum', 18), ('0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48', 'USDC', 'USD Coin', 6)) AS t (address, symbol, name, decimals)) SELECT date, t.symbol, SUM(amount) / pow(10, t.decimals) AS amount FROM (SELECT date(to_timestamp(block_timestamp) AT TIME ZONE 'utc') AS date, token, amount FROM "swap_multi" AS sm, UNNEST(sm.amounts_in, sm.tokens_in) AS smi (amount, token) UNION ALL SELECT date(to_timestamp(block_timestamp) AT TIME ZONE 'utc') AS date, token, amount FROM "swap_multi" AS sm, UNNEST(sm.amounts_out, sm.tokens_out) AS smo (amount, token)) AS tp JOIN tokens AS t ON t.address = '0x' || encode(tp.token, 'hex') GROUP BY tp.date, t.symbol, t.decimals ORDER BY tp.date DESC, amount DESC ) AS sub"#
+        );
+    }
+}
diff --git a/store/postgres/src/sql/validation.rs b/store/postgres/src/sql/validation.rs
new file mode 100644
index 00000000000..674cee4cc5d
--- /dev/null
+++ b/store/postgres/src/sql/validation.rs
@@ -0,0 +1,290 @@
+use sqlparser::ast::{Expr, ObjectName, Query, SetExpr, Statement, TableFactor, Visit, Visitor};
+use std::result::Result;
+use std::{collections::HashSet, ops::ControlFlow};
+
+use super::{constants::ALLOWED_FUNCTIONS, Schema};
+
+#[derive(thiserror::Error, Debug, PartialEq)]
+pub enum Error {
+    #[error("Unknown or unsupported function {0}")]
+    UnknownFunction(String),
+    #[error("Multi statement is not supported.")]
+    MultiStatementUnSupported,
+    #[error("Only SELECT query is supported.")]
+    NotSelectQuery,
+    #[error("Unknown table {0}")]
+    UnknownTable(String),
+}
+
+pub struct Validator<'a> {
+    schema: &'a Schema,
+    ctes: HashSet<String>,
+}
+
+impl<'a> Validator<'a> {
+    pub fn new(schema: &'a Schema) -> Self {
+        Self {
+            schema,
+            ctes: Default::default(),
+        }
+    }
+
+    fn validate_function_name(&self, name: &ObjectName) -> ControlFlow<Error> {
+        let name = name.to_string().to_lowercase();
+        if ALLOWED_FUNCTIONS.contains(name.as_str()) {
+            ControlFlow::Continue(())
+        } else {
+            ControlFlow::Break(Error::UnknownFunction(name))
+        }
+    }
+
+    pub fn validate_statements(&mut self, statements: &Vec<Statement>) -> Result<(), Error> {
+        self.ctes.clear();
+
+        if statements.len() > 1 {
+            return Err(Error::MultiStatementUnSupported);
+        }
+
+        if let ControlFlow::Break(error) = statements.visit(self) {
+            return Err(error);
+        }
+
+        Ok(())
+    }
+
+    fn validate_table_name(&mut self, name: &ObjectName) -> ControlFlow<Error> {
+        if let Some(table_name) = name.0.last() {
+            let table_name = table_name.to_string().to_lowercase();
+            if !self.schema.contains_key(&table_name) && !self.ctes.contains(&table_name) {
+                return ControlFlow::Break(Error::UnknownTable(table_name));
+            }
+        }
+        ControlFlow::Continue(())
+    }
+}
+
+impl Visitor for Validator<'_> {
+    type Break = Error;
+
+    fn pre_visit_statement(&mut self, _statement: &Statement) -> ControlFlow<Self::Break> {
+        match _statement {
+            Statement::Query(_) => ControlFlow::Continue(()),
+            _ => ControlFlow::Break(Error::NotSelectQuery),
+        }
+    }
+
+    fn pre_visit_query(&mut self, _query: &Query) -> ControlFlow<Self::Break> {
+        // Add common table expressions to the set of known tables
+        if let Some(ref with) = _query.with {
+            self.ctes.extend(
+                with.cte_tables
+                    .iter()
+                    .map(|cte| cte.alias.name.value.to_lowercase()),
+            );
+        }
+
+        match *_query.body {
+            SetExpr::Update(_) | SetExpr::Insert(_) => ControlFlow::Break(Error::NotSelectQuery),
+            _ => ControlFlow::Continue(()),
+        }
+    }
+
+    /// Invoked for any table function in the AST.
+    /// See [TableFactor::Table.args](sqlparser::ast::TableFactor::Table::args) for more details identifying a table function
+    fn pre_visit_table_factor(&mut self, table_factor: &TableFactor) -> ControlFlow<Self::Break> {
+        if let TableFactor::Table { name, args, .. } = table_factor {
+            if args.is_some() {
+                return self.validate_function_name(name);
+            } else {
+                return self.validate_table_name(name);
+            }
+        }
+        ControlFlow::Continue(())
+    }
+
+    /// Invoked for any function expressions that appear in the AST
+    fn pre_visit_expr(&mut self, _expr: &Expr) -> ControlFlow<Self::Break> {
+        if let Expr::Function(function) = _expr {
+            return self.validate_function_name(&function.name);
+        }
+        ControlFlow::Continue(())
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::sql::constants::SQL_DIALECT;
+    use std::collections::{HashMap, HashSet};
+
+    fn validate(sql: &str) -> Result<(), Error> {
+        let statements = sqlparser::parser::Parser::parse_sql(&SQL_DIALECT, sql).unwrap();
+
+        let schema: Schema = HashMap::from([(
+            "swap".to_owned(),
+            HashSet::from([
+                "vid".to_owned(),
+                "block$".to_owned(),
+                "id".to_owned(),
+                "sender".to_owned(),
+                "input_amount".to_owned(),
+                "input_token".to_owned(),
+                "amount_out".to_owned(),
+                "output_token".to_owned(),
+                "slippage".to_owned(),
+                "referral_code".to_owned(),
+                "block_number".to_owned(),
+                "block_timestamp".to_owned(),
+                "transaction_hash".to_owned(),
+            ]),
+        )]);
+
+        let mut validator = Validator::new(&schema);
+
+        validator.validate_statements(&statements)
+    }
+
+    #[test]
+    fn test_function_blacklisted() {
+        let result = validate(
+            "
+            SELECT
+                input_token
+            FROM swap
+            WHERE '' = (
+                SELECT
+                    CAST(pg_sleep(5) AS text
+                )
+            )",
+        );
+        assert_eq!(result, Err(Error::UnknownFunction("pg_sleep".to_owned())));
+    }
+
+    #[test]
+    fn test_table_function_blacklisted() {
+        let result = validate(
+            "
+        SELECT
+            vid,
+            k.sname
+        FROM swap,
+        LATERAL(
+            SELECT
+                current_schemas as sname
+            FROM current_schemas(true)
+        ) as k",
+        );
+        assert_eq!(
+            result,
+            Err(Error::UnknownFunction("current_schemas".to_owned()))
+        );
+    }
+
+    #[test]
+    fn test_function_blacklisted_without_paranthesis() {
+        let result = validate(
+            "
+            SELECT
+                input_token
+            FROM swap
+            WHERE '' = (
+                SELECT user
+            )",
+        );
+        assert_eq!(result, Err(Error::UnknownFunction("user".to_owned())));
+    }
+
+    #[test]
+    fn test_function_whitelisted() {
+        let result = validate(
+            "
+            SELECT
+                input_token,
+                SUM(input_amount) AS total_amount
+            FROM swap
+            GROUP BY input_token
+            HAVING SUM(input_amount) > 1000
+            ",
+        );
+        assert_eq!(result, Ok(()));
+    }
+
+    #[test]
+    fn test_function_unknown() {
+        let result = validate(
+            "
+            SELECT
+                input_token
+            FROM swap
+            WHERE '' = (
+                SELECT
+                    CAST(do_strange_math(amount_in) AS text
+                )
+            )",
+        );
+        assert_eq!(
+            result,
+            Err(Error::UnknownFunction("do_strange_math".to_owned()))
+        );
+    }
+
+    #[test]
+    fn test_not_select_ddl() {
+        let result = validate(
+            "
+            CREATE TABLE foo (id INT PRIMARY KEY);
+            ",
+        );
+        assert_eq!(result, Err(Error::NotSelectQuery));
+    }
+
+    #[test]
+    fn test_not_select_insert() {
+        let result = validate(
+            "
+            INSERT INTO foo VALUES (1);
+            ",
+        );
+        assert_eq!(result, Err(Error::NotSelectQuery));
+    }
+
+    #[test]
+    fn test_common_table_expression() {
+        let result = validate(
+            "
+            WITH foo AS (SELECT 1) SELECT * FROM foo;
+            ",
+        );
+        assert_eq!(result, Ok(()));
+    }
+
+    #[test]
+    fn test_common_table_expression_with_effect() {
+        let result = validate(
+            "
+            WITH foo AS (INSERT INTO target VALUES(1)) SELECT * FROM bar;
+            ",
+        );
+        assert_eq!(result, Err(Error::NotSelectQuery));
+    }
+
+    #[test]
+    fn test_no_multi_statement() {
+        let result = validate(
+            "
+            SELECT 1; SELECT 2;
+            ",
+        );
+        assert_eq!(result, Err(Error::MultiStatementUnSupported));
+    }
+
+    #[test]
+    fn test_table_unknown() {
+        let result = validate(
+            "
+            SELECT * FROM unknown_table;
+            ",
+        );
+        assert_eq!(result, Err(Error::UnknownTable("unknown_table".to_owned())));
+    }
+}

From 8b5061564bb0e99fb91527d6ec2fa56d58a116b0 Mon Sep 17 00:00:00 2001
From: David Lutterkort <lutter@watzmann.net>
Date: Tue, 21 Jan 2025 16:41:51 -0800
Subject: [PATCH 03/13] store: Use Layout for schema information

---
 store/postgres/src/sql/formatter.rs  | 38 ++++++++--------
 store/postgres/src/sql/mod.rs        | 25 +++++++++--
 store/postgres/src/sql/parser.rs     | 26 +++--------
 store/postgres/src/sql/validation.rs | 65 ++++++++++++++--------------
 4 files changed, 79 insertions(+), 75 deletions(-)

diff --git a/store/postgres/src/sql/formatter.rs b/store/postgres/src/sql/formatter.rs
index dff810aa6ea..0a2aafb6413 100644
--- a/store/postgres/src/sql/formatter.rs
+++ b/store/postgres/src/sql/formatter.rs
@@ -1,16 +1,16 @@
 use sqlparser::ast::{ObjectName, Statement, TableFactor, VisitMut, VisitorMut};
 use std::ops::ControlFlow;
 
-use super::Schema;
+use crate::relational::{Layout, SqlName};
 
 pub struct Formatter<'a> {
     prelude: &'a str,
-    schema: &'a Schema,
+    layout: &'a Layout,
 }
 
 impl<'a> Formatter<'a> {
-    pub fn new(prelude: &'a str, schema: &'a Schema) -> Self {
-        Self { prelude, schema }
+    pub fn new(prelude: &'a str, layout: &'a Layout) -> Self {
+        Self { prelude, layout }
     }
 
     fn prepend_prefix_to_object_name_mut(&self, name: &mut ObjectName) {
@@ -20,7 +20,8 @@ impl<'a> Formatter<'a> {
 
         // Ensure schema tables has quotation to match up with prelude generated cte.
         if let Some(table_name) = table_identifier.last_mut() {
-            if self.schema.contains_key(&table_name.value) {
+            let sql_name = SqlName::verbatim(table_name.to_string());
+            if self.layout.table(&sql_name).is_some() {
                 table_name.quote_style = Some('"');
             }
         }
@@ -52,10 +53,19 @@ impl VisitorMut for Formatter<'_> {
 
 #[cfg(test)]
 mod test {
-    use std::collections::HashSet;
-
     use super::*;
-    use crate::sql::constants::SQL_DIALECT;
+    use crate::sql::{constants::SQL_DIALECT, test::make_layout};
+
+    const GQL: &str = "
+    type Swap @entity {
+        id: ID!
+        amountIn: BigDecimal!
+        amountOut: BigDecimal!
+        tokenIn: Bytes!
+        tokenOut: Bytes!
+    }
+    ";
+
     const CTE_PREFIX: &str = "WITH \"swap\" AS (
             SELECT
             id,
@@ -69,17 +79,9 @@ mod test {
 
     #[test]
     fn format_sql() {
-        let mut schema = Schema::new();
-        schema.insert(
-            "swap".to_string(),
-            HashSet::from_iter(
-                ["id", "amount_in", "amount_out", "token_in", "token_out"]
-                    .into_iter()
-                    .map(|s| s.to_string()),
-            ),
-        );
+        let layout = make_layout(GQL);
 
-        let mut formatter = Formatter::new(CTE_PREFIX, &schema);
+        let mut formatter = Formatter::new(CTE_PREFIX, &layout);
 
         let sql = "SELECT token_in, SUM(amount_in) AS amount FROM unknown.swap GROUP BY token_in";
 
diff --git a/store/postgres/src/sql/mod.rs b/store/postgres/src/sql/mod.rs
index d3962ae968e..cf655050753 100644
--- a/store/postgres/src/sql/mod.rs
+++ b/store/postgres/src/sql/mod.rs
@@ -3,8 +3,27 @@ mod formatter;
 mod parser;
 mod validation;
 
-use std::collections::{HashMap, HashSet};
+pub use parser::Parser;
 
-pub(self) type Schema = HashMap<String, HashSet<String>>; // HashMap<Table, HashSet<Column>>
+#[cfg(test)]
+mod test {
+    use std::{collections::BTreeSet, sync::Arc};
 
-pub use parser::Parser;
+    use graph::{prelude::DeploymentHash, schema::InputSchema};
+
+    use crate::{
+        catalog::Catalog,
+        primary::{make_dummy_site, Namespace},
+        relational::Layout,
+    };
+
+    pub(crate) fn make_layout(gql: &str) -> Layout {
+        let subgraph = DeploymentHash::new("Qmasubgraph").unwrap();
+        let schema = InputSchema::parse_latest(gql, subgraph.clone()).unwrap();
+        let namespace = Namespace::new("sgd0815".to_string()).unwrap();
+        let site = Arc::new(make_dummy_site(subgraph, namespace, "anet".to_string()));
+        let catalog = Catalog::for_tests(site.clone(), BTreeSet::new()).unwrap();
+        let layout = Layout::new(site, &schema, catalog).unwrap();
+        layout
+    }
+}
diff --git a/store/postgres/src/sql/parser.rs b/store/postgres/src/sql/parser.rs
index c449803fe86..7e972d96777 100644
--- a/store/postgres/src/sql/parser.rs
+++ b/store/postgres/src/sql/parser.rs
@@ -52,39 +52,23 @@ pub fn generate_table_prelude_from_layout(layout: &Layout) -> String {
 }
 
 pub struct Parser {
-    schema: super::Schema,
+    layout: Arc<Layout>,
     prelude: String,
 }
 
 impl Parser {
     pub fn new(layout: Arc<Layout>) -> Self {
-        Self {
-            schema: layout
-                .tables
-                .iter()
-                .filter(|(entity, _)| !entity.is_poi())
-                .map(|(_, table)| {
-                    (
-                        table.name.to_string(),
-                        table
-                            .columns
-                            .iter()
-                            .map(|column| column.name.to_string())
-                            .collect(),
-                    )
-                })
-                .collect(),
-            prelude: generate_table_prelude_from_layout(&layout),
-        }
+        let prelude = generate_table_prelude_from_layout(&layout);
+        Self { layout, prelude }
     }
 
     pub fn parse_and_validate(&self, sql: &str) -> Result<String> {
         let mut statements = sqlparser::parser::Parser::parse_sql(&SQL_DIALECT, sql)?;
 
-        let mut validator = Validator::new(&self.schema);
+        let mut validator = Validator::new(&self.layout);
         validator.validate_statements(&statements)?;
 
-        let mut formatter = Formatter::new(&self.prelude, &self.schema);
+        let mut formatter = Formatter::new(&self.prelude, &self.layout);
 
         let statement = statements
             .get_mut(0)
diff --git a/store/postgres/src/sql/validation.rs b/store/postgres/src/sql/validation.rs
index 674cee4cc5d..4d188e0cc38 100644
--- a/store/postgres/src/sql/validation.rs
+++ b/store/postgres/src/sql/validation.rs
@@ -2,7 +2,9 @@ use sqlparser::ast::{Expr, ObjectName, Query, SetExpr, Statement, TableFactor, V
 use std::result::Result;
 use std::{collections::HashSet, ops::ControlFlow};
 
-use super::{constants::ALLOWED_FUNCTIONS, Schema};
+use crate::relational::Layout;
+
+use super::constants::ALLOWED_FUNCTIONS;
 
 #[derive(thiserror::Error, Debug, PartialEq)]
 pub enum Error {
@@ -17,14 +19,14 @@ pub enum Error {
 }
 
 pub struct Validator<'a> {
-    schema: &'a Schema,
+    layout: &'a Layout,
     ctes: HashSet<String>,
 }
 
 impl<'a> Validator<'a> {
-    pub fn new(schema: &'a Schema) -> Self {
+    pub fn new(layout: &'a Layout) -> Self {
         Self {
-            schema,
+            layout,
             ctes: Default::default(),
         }
     }
@@ -54,9 +56,9 @@ impl<'a> Validator<'a> {
 
     fn validate_table_name(&mut self, name: &ObjectName) -> ControlFlow<Error> {
         if let Some(table_name) = name.0.last() {
-            let table_name = table_name.to_string().to_lowercase();
-            if !self.schema.contains_key(&table_name) && !self.ctes.contains(&table_name) {
-                return ControlFlow::Break(Error::UnknownTable(table_name));
+            let name = &table_name.value;
+            if !self.layout.table(name).is_some() && !self.ctes.contains(name) {
+                return ControlFlow::Break(Error::UnknownTable(name.to_string()));
             }
         }
         ControlFlow::Continue(())
@@ -114,38 +116,35 @@ impl Visitor for Validator<'_> {
 #[cfg(test)]
 mod test {
     use super::*;
-    use crate::sql::constants::SQL_DIALECT;
-    use std::collections::{HashMap, HashSet};
+    use crate::sql::{constants::SQL_DIALECT, test::make_layout};
 
     fn validate(sql: &str) -> Result<(), Error> {
         let statements = sqlparser::parser::Parser::parse_sql(&SQL_DIALECT, sql).unwrap();
 
-        let schema: Schema = HashMap::from([(
-            "swap".to_owned(),
-            HashSet::from([
-                "vid".to_owned(),
-                "block$".to_owned(),
-                "id".to_owned(),
-                "sender".to_owned(),
-                "input_amount".to_owned(),
-                "input_token".to_owned(),
-                "amount_out".to_owned(),
-                "output_token".to_owned(),
-                "slippage".to_owned(),
-                "referral_code".to_owned(),
-                "block_number".to_owned(),
-                "block_timestamp".to_owned(),
-                "transaction_hash".to_owned(),
-            ]),
-        )]);
-
-        let mut validator = Validator::new(&schema);
+        const GQL: &str = "
+            type Swap @entity {
+                id: ID!
+                sender: Bytes!
+                inputAmount: BigDecimal!
+                inputToken: Bytes!
+                amountOut: BigDecimal!
+                outputToken: Bytes!
+                slippage: BigDecimal!
+                referralCode: String
+                blockNumber: Int!
+                blockTimestamp: Timestamp!
+                transactionHash: Bytes!
+            }";
+
+        let layout = make_layout(GQL);
+
+        let mut validator = Validator::new(&layout);
 
         validator.validate_statements(&statements)
     }
 
     #[test]
-    fn test_function_blacklisted() {
+    fn test_function_disallowed() {
         let result = validate(
             "
             SELECT
@@ -161,7 +160,7 @@ mod test {
     }
 
     #[test]
-    fn test_table_function_blacklisted() {
+    fn test_table_function_disallowed() {
         let result = validate(
             "
         SELECT
@@ -181,7 +180,7 @@ mod test {
     }
 
     #[test]
-    fn test_function_blacklisted_without_paranthesis() {
+    fn test_function_disallowed_without_paranthesis() {
         let result = validate(
             "
             SELECT
@@ -195,7 +194,7 @@ mod test {
     }
 
     #[test]
-    fn test_function_whitelisted() {
+    fn test_function_allowed() {
         let result = validate(
             "
             SELECT

From a2dca57777c0ea720bffddb18a315987a114c730 Mon Sep 17 00:00:00 2001
From: David Lutterkort <lutter@watzmann.net>
Date: Thu, 23 Jan 2025 21:07:59 -0800
Subject: [PATCH 04/13] store: Rewrite table names in from clauses

---
 store/postgres/src/sql/formatter.rs  | 103 ---------------------------
 store/postgres/src/sql/mod.rs        |   1 -
 store/postgres/src/sql/parser.rs     | 102 ++++++--------------------
 store/postgres/src/sql/validation.rs |  78 ++++++++++++++------
 4 files changed, 78 insertions(+), 206 deletions(-)
 delete mode 100644 store/postgres/src/sql/formatter.rs

diff --git a/store/postgres/src/sql/formatter.rs b/store/postgres/src/sql/formatter.rs
deleted file mode 100644
index 0a2aafb6413..00000000000
--- a/store/postgres/src/sql/formatter.rs
+++ /dev/null
@@ -1,103 +0,0 @@
-use sqlparser::ast::{ObjectName, Statement, TableFactor, VisitMut, VisitorMut};
-use std::ops::ControlFlow;
-
-use crate::relational::{Layout, SqlName};
-
-pub struct Formatter<'a> {
-    prelude: &'a str,
-    layout: &'a Layout,
-}
-
-impl<'a> Formatter<'a> {
-    pub fn new(prelude: &'a str, layout: &'a Layout) -> Self {
-        Self { prelude, layout }
-    }
-
-    fn prepend_prefix_to_object_name_mut(&self, name: &mut ObjectName) {
-        let table_identifier = &mut name.0;
-        // remove all but the last identifier
-        table_identifier.drain(0..table_identifier.len() - 1);
-
-        // Ensure schema tables has quotation to match up with prelude generated cte.
-        if let Some(table_name) = table_identifier.last_mut() {
-            let sql_name = SqlName::verbatim(table_name.to_string());
-            if self.layout.table(&sql_name).is_some() {
-                table_name.quote_style = Some('"');
-            }
-        }
-    }
-
-    pub fn format(&mut self, statement: &mut Statement) -> String {
-        statement.visit(self);
-
-        format!(
-            "{} SELECT to_jsonb(sub.*) AS data FROM ( {} ) AS sub",
-            self.prelude, statement
-        )
-    }
-}
-
-impl VisitorMut for Formatter<'_> {
-    type Break = ();
-
-    fn pre_visit_table_factor(
-        &mut self,
-        table_factor: &mut TableFactor,
-    ) -> ControlFlow<Self::Break> {
-        if let TableFactor::Table { name, .. } = table_factor {
-            self.prepend_prefix_to_object_name_mut(name);
-        }
-        ControlFlow::Continue(())
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-    use crate::sql::{constants::SQL_DIALECT, test::make_layout};
-
-    const GQL: &str = "
-    type Swap @entity {
-        id: ID!
-        amountIn: BigDecimal!
-        amountOut: BigDecimal!
-        tokenIn: Bytes!
-        tokenOut: Bytes!
-    }
-    ";
-
-    const CTE_PREFIX: &str = "WITH \"swap\" AS (
-            SELECT
-            id,
-            amount_in,
-            amount_out,
-            concat('0x',encode(token_in,'hex') as token_in,
-            concat('0x',token_out,'hex') AS token_out
-            FROM
-            sdg1.swap
-        )";
-
-    #[test]
-    fn format_sql() {
-        let layout = make_layout(GQL);
-
-        let mut formatter = Formatter::new(CTE_PREFIX, &layout);
-
-        let sql = "SELECT token_in, SUM(amount_in) AS amount FROM unknown.swap GROUP BY token_in";
-
-        let mut statements = sqlparser::parser::Parser::parse_sql(&SQL_DIALECT, sql).unwrap();
-
-        let mut statement = statements.get_mut(0).unwrap();
-
-        let result = formatter.format(&mut statement);
-
-        assert_eq!(
-            result,
-            format!(
-                "{} SELECT to_jsonb(sub.*) AS data FROM ( {} ) AS sub",
-                CTE_PREFIX,
-                "SELECT token_in, SUM(amount_in) AS amount FROM \"swap\" GROUP BY token_in"
-            )
-        );
-    }
-}
diff --git a/store/postgres/src/sql/mod.rs b/store/postgres/src/sql/mod.rs
index cf655050753..55917f854c4 100644
--- a/store/postgres/src/sql/mod.rs
+++ b/store/postgres/src/sql/mod.rs
@@ -1,5 +1,4 @@
 mod constants;
-mod formatter;
 mod parser;
 mod validation;
 
diff --git a/store/postgres/src/sql/parser.rs b/store/postgres/src/sql/parser.rs
index 7e972d96777..ded1eac9b45 100644
--- a/store/postgres/src/sql/parser.rs
+++ b/store/postgres/src/sql/parser.rs
@@ -1,92 +1,41 @@
-use super::{constants::SQL_DIALECT, formatter::Formatter, validation::Validator};
-use crate::relational::{ColumnType, Layout};
+use super::{constants::SQL_DIALECT, validation::Validator};
+use crate::relational::Layout;
 use anyhow::{anyhow, Ok, Result};
-use graph::components::store::BLOCK_NUMBER_MAX;
-use itertools::Itertools;
 use std::sync::Arc;
 
-pub fn generate_table_prelude_from_layout(layout: &Layout) -> String {
-    let schema = &layout.catalog.site.namespace;
-    let ctes = layout
-        .tables
-        .iter()
-        .filter(|(entity, _)| !entity.is_poi())
-        .map(|(_, table)| {
-            let table_name = table.name.as_str();
-
-            let (block_column, filter) = if !table.immutable {
-                (
-                    "block_range",
-                    Some(format!(" WHERE \"block_range\" @> {}", BLOCK_NUMBER_MAX)),
-                )
-            } else {
-                ("block$", None)
-            };
-
-            let columns = table
-                .columns
-                .iter()
-                .map(|col| {
-                    if !col.is_list() && col.column_type == ColumnType::Bytes {
-                        format!(
-                            r#"concat('0x', encode("{}", 'hex')) AS "{}""#,
-                            col.name.as_str(),
-                            col.name.as_str()
-                        )
-                    } else {
-                        format!(r#""{}""#, col.name.as_str())
-                    }
-                })
-                .chain(std::iter::once(format!(r#""{}""#, block_column)))
-                .collect::<Vec<_>>()
-                .join(", ");
-            format!(
-                "\"{table_name}\" AS (SELECT {columns} FROM \"{schema}\".\"{table_name}\"{})",
-                filter.unwrap_or_default()
-            )
-        })
-        .sorted()
-        .collect::<Vec<_>>()
-        .join(",\n");
-    format!("WITH {ctes}")
-}
-
 pub struct Parser {
     layout: Arc<Layout>,
-    prelude: String,
 }
 
 impl Parser {
     pub fn new(layout: Arc<Layout>) -> Self {
-        let prelude = generate_table_prelude_from_layout(&layout);
-        Self { layout, prelude }
+        Self { layout }
     }
 
     pub fn parse_and_validate(&self, sql: &str) -> Result<String> {
         let mut statements = sqlparser::parser::Parser::parse_sql(&SQL_DIALECT, sql)?;
 
         let mut validator = Validator::new(&self.layout);
-        validator.validate_statements(&statements)?;
-
-        let mut formatter = Formatter::new(&self.prelude, &self.layout);
+        validator.validate_statements(&mut statements)?;
 
         let statement = statements
             .get_mut(0)
             .ok_or_else(|| anyhow!("No SQL statements found"))?;
 
-        let result = formatter.format(statement);
-
-        Ok(result)
+        let sql = format!(
+            "select to_jsonb(sub.*) as data from ( {} ) as sub",
+            statement
+        );
+        Ok(sql)
     }
 }
 
 #[cfg(test)]
 mod test {
 
-    use crate::layout_for_tests::{make_dummy_site, Catalog, Namespace};
+    use crate::sql::test::make_layout;
 
     use super::*;
-    use graph::{data::subgraph::DeploymentHash, schema::InputSchema};
 
     const TEST_GQL: &str = "
         type SwapMulti @entity(immutable: true) {
@@ -111,8 +60,6 @@ mod test {
         }
     ";
 
-    const NAMESPACE: &str = "sgd0815";
-
     const SQL_QUERY: &str = "
         with tokens as (
             select * from (values
@@ -145,26 +92,15 @@ mod test {
 
         ";
 
-    fn test_layout() -> Layout {
-        let subgraph = DeploymentHash::new("subgraph").unwrap();
-        let schema =
-            InputSchema::parse_latest(TEST_GQL, subgraph.clone()).expect("Test schema invalid");
-        let namespace = Namespace::new(NAMESPACE.to_owned()).unwrap();
-        let site = Arc::new(make_dummy_site(subgraph, namespace, "anet".to_string()));
-        let catalog =
-            Catalog::for_tests(site.clone(), Default::default()).expect("Can not create catalog");
-        Layout::new(site, &schema, catalog).expect("Failed to construct Layout")
+    fn parse_and_validate(sql: &str) -> Result<String, anyhow::Error> {
+        let parser = Parser::new(Arc::new(make_layout(TEST_GQL)));
+
+        parser.parse_and_validate(sql)
     }
 
     #[test]
     fn parse_sql() {
-        let parser = Parser::new(Arc::new(test_layout()));
-
-        let result = parser.parse_and_validate(SQL_QUERY);
-
-        assert!(result.is_ok());
-
-        let query = result.unwrap();
+        let query = parse_and_validate(SQL_QUERY).unwrap();
 
         assert_eq!(
             query,
@@ -172,4 +108,12 @@ mod test {
 "token" AS (SELECT "id", concat('0x', encode("address", 'hex')) AS "address", "symbol", "name", "decimals", "block_range" FROM "sgd0815"."token" WHERE "block_range" @> 2147483647) SELECT to_jsonb(sub.*) AS data FROM ( WITH tokens AS (SELECT * FROM (VALUES ('0x0000000000000000000000000000000000000000', 'ETH', 'Ethereum', 18), ('0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48', 'USDC', 'USD Coin', 6)) AS t (address, symbol, name, decimals)) SELECT date, t.symbol, SUM(amount) / pow(10, t.decimals) AS amount FROM (SELECT date(to_timestamp(block_timestamp) AT TIME ZONE 'utc') AS date, token, amount FROM "swap_multi" AS sm, UNNEST(sm.amounts_in, sm.tokens_in) AS smi (amount, token) UNION ALL SELECT date(to_timestamp(block_timestamp) AT TIME ZONE 'utc') AS date, token, amount FROM "swap_multi" AS sm, UNNEST(sm.amounts_out, sm.tokens_out) AS smo (amount, token)) AS tp JOIN tokens AS t ON t.address = '0x' || encode(tp.token, 'hex') GROUP BY tp.date, t.symbol, t.decimals ORDER BY tp.date DESC, amount DESC ) AS sub"#
         );
     }
+
+    #[test]
+    fn parse_simple_sql() {
+        let query =
+            parse_and_validate("select symbol, address from token where decimals > 10").unwrap();
+
+        println!("{}", query);
+    }
 }
diff --git a/store/postgres/src/sql/validation.rs b/store/postgres/src/sql/validation.rs
index 4d188e0cc38..6eb8606d128 100644
--- a/store/postgres/src/sql/validation.rs
+++ b/store/postgres/src/sql/validation.rs
@@ -1,10 +1,14 @@
-use sqlparser::ast::{Expr, ObjectName, Query, SetExpr, Statement, TableFactor, Visit, Visitor};
+use sqlparser::ast::{
+    Expr, Ident, ObjectName, Query, SetExpr, Statement, TableAlias, TableFactor, VisitMut,
+    VisitorMut,
+};
+use sqlparser::parser::Parser;
 use std::result::Result;
 use std::{collections::HashSet, ops::ControlFlow};
 
 use crate::relational::Layout;
 
-use super::constants::ALLOWED_FUNCTIONS;
+use super::constants::{ALLOWED_FUNCTIONS, SQL_DIALECT};
 
 #[derive(thiserror::Error, Debug, PartialEq)]
 pub enum Error {
@@ -40,7 +44,7 @@ impl<'a> Validator<'a> {
         }
     }
 
-    pub fn validate_statements(&mut self, statements: &Vec<Statement>) -> Result<(), Error> {
+    pub fn validate_statements(&mut self, statements: &mut Vec<Statement>) -> Result<(), Error> {
         self.ctes.clear();
 
         if statements.len() > 1 {
@@ -53,29 +57,19 @@ impl<'a> Validator<'a> {
 
         Ok(())
     }
-
-    fn validate_table_name(&mut self, name: &ObjectName) -> ControlFlow<Error> {
-        if let Some(table_name) = name.0.last() {
-            let name = &table_name.value;
-            if !self.layout.table(name).is_some() && !self.ctes.contains(name) {
-                return ControlFlow::Break(Error::UnknownTable(name.to_string()));
-            }
-        }
-        ControlFlow::Continue(())
-    }
 }
 
-impl Visitor for Validator<'_> {
+impl VisitorMut for Validator<'_> {
     type Break = Error;
 
-    fn pre_visit_statement(&mut self, _statement: &Statement) -> ControlFlow<Self::Break> {
+    fn pre_visit_statement(&mut self, _statement: &mut Statement) -> ControlFlow<Self::Break> {
         match _statement {
             Statement::Query(_) => ControlFlow::Continue(()),
             _ => ControlFlow::Break(Error::NotSelectQuery),
         }
     }
 
-    fn pre_visit_query(&mut self, _query: &Query) -> ControlFlow<Self::Break> {
+    fn pre_visit_query(&mut self, _query: &mut Query) -> ControlFlow<Self::Break> {
         // Add common table expressions to the set of known tables
         if let Some(ref with) = _query.with {
             self.ctes.extend(
@@ -93,19 +87,57 @@ impl Visitor for Validator<'_> {
 
     /// Invoked for any table function in the AST.
     /// See [TableFactor::Table.args](sqlparser::ast::TableFactor::Table::args) for more details identifying a table function
-    fn pre_visit_table_factor(&mut self, table_factor: &TableFactor) -> ControlFlow<Self::Break> {
-        if let TableFactor::Table { name, args, .. } = table_factor {
+    fn post_visit_table_factor(
+        &mut self,
+        table_factor: &mut TableFactor,
+    ) -> ControlFlow<Self::Break> {
+        if let TableFactor::Table {
+            name, args, alias, ..
+        } = table_factor
+        {
             if args.is_some() {
                 return self.validate_function_name(name);
-            } else {
-                return self.validate_table_name(name);
             }
+            let table = if let Some(table_name) = name.0.last() {
+                let name = &table_name.value;
+                let Some(table) = self.layout.table(name) else {
+                    if !self.ctes.contains(name) {
+                        return ControlFlow::Break(Error::UnknownTable(name.to_string()));
+                    } else {
+                        return ControlFlow::Continue(());
+                    }
+                };
+                table
+            } else {
+                return ControlFlow::Continue(());
+            };
+
+            // Change 'from table [as alias]' to 'from (select * from table) as alias'
+            let query = format!("select * from {}", table.qualified_name);
+            let Statement::Query(subquery) = Parser::parse_sql(&SQL_DIALECT, &query)
+                .unwrap()
+                .pop()
+                .unwrap()
+            else {
+                unreachable!();
+            };
+            let alias = alias.as_ref().map(|alias| alias.clone()).or_else(|| {
+                Some(TableAlias {
+                    name: Ident::new(table.name.as_str()),
+                    columns: vec![],
+                })
+            });
+            *table_factor = TableFactor::Derived {
+                lateral: false,
+                subquery,
+                alias,
+            };
         }
         ControlFlow::Continue(())
     }
 
     /// Invoked for any function expressions that appear in the AST
-    fn pre_visit_expr(&mut self, _expr: &Expr) -> ControlFlow<Self::Break> {
+    fn pre_visit_expr(&mut self, _expr: &mut Expr) -> ControlFlow<Self::Break> {
         if let Expr::Function(function) = _expr {
             return self.validate_function_name(&function.name);
         }
@@ -119,7 +151,7 @@ mod test {
     use crate::sql::{constants::SQL_DIALECT, test::make_layout};
 
     fn validate(sql: &str) -> Result<(), Error> {
-        let statements = sqlparser::parser::Parser::parse_sql(&SQL_DIALECT, sql).unwrap();
+        let mut statements = sqlparser::parser::Parser::parse_sql(&SQL_DIALECT, sql).unwrap();
 
         const GQL: &str = "
             type Swap @entity {
@@ -140,7 +172,7 @@ mod test {
 
         let mut validator = Validator::new(&layout);
 
-        validator.validate_statements(&statements)
+        validator.validate_statements(&mut statements)
     }
 
     #[test]

From bb3850b742b971d42ac13b40430c0ab510c8ac60 Mon Sep 17 00:00:00 2001
From: David Lutterkort <lutter@watzmann.net>
Date: Fri, 24 Jan 2025 13:18:11 -0800
Subject: [PATCH 05/13] store: Revamp query rewriting

1. Do not use CTE's to inject a view of a table at a certain block. Instead
   rewrite the 'from' clause
2. Do not turn bytea columns into string columns since that is hugely
   wasteful
---
 store/postgres/src/query_store.rs    |  2 +-
 store/postgres/src/sql/parser.rs     | 16 ++++++++++++----
 store/postgres/src/sql/validation.rs | 22 +++++++++++++++++++---
 3 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/store/postgres/src/query_store.rs b/store/postgres/src/query_store.rs
index f6b2a22712c..c0eeff16dab 100644
--- a/store/postgres/src/query_store.rs
+++ b/store/postgres/src/query_store.rs
@@ -30,7 +30,7 @@ impl QueryStore {
     ) -> Self {
         let sql_parser = store
             .find_layout(site.clone())
-            .map(|layout| Parser::new(layout));
+            .map(|layout| Parser::new(layout, BLOCK_NUMBER_MAX));
         QueryStore {
             site,
             replica_id,
diff --git a/store/postgres/src/sql/parser.rs b/store/postgres/src/sql/parser.rs
index ded1eac9b45..7ea0d74f107 100644
--- a/store/postgres/src/sql/parser.rs
+++ b/store/postgres/src/sql/parser.rs
@@ -1,21 +1,23 @@
 use super::{constants::SQL_DIALECT, validation::Validator};
 use crate::relational::Layout;
 use anyhow::{anyhow, Ok, Result};
+use graph::prelude::BlockNumber;
 use std::sync::Arc;
 
 pub struct Parser {
     layout: Arc<Layout>,
+    block: BlockNumber,
 }
 
 impl Parser {
-    pub fn new(layout: Arc<Layout>) -> Self {
-        Self { layout }
+    pub fn new(layout: Arc<Layout>, block: BlockNumber) -> Self {
+        Self { layout, block }
     }
 
     pub fn parse_and_validate(&self, sql: &str) -> Result<String> {
         let mut statements = sqlparser::parser::Parser::parse_sql(&SQL_DIALECT, sql)?;
 
-        let mut validator = Validator::new(&self.layout);
+        let mut validator = Validator::new(&self.layout, self.block);
         validator.validate_statements(&mut statements)?;
 
         let statement = statements
@@ -33,6 +35,8 @@ impl Parser {
 #[cfg(test)]
 mod test {
 
+    use graph::prelude::BLOCK_NUMBER_MAX;
+
     use crate::sql::test::make_layout;
 
     use super::*;
@@ -93,7 +97,7 @@ mod test {
         ";
 
     fn parse_and_validate(sql: &str) -> Result<String, anyhow::Error> {
-        let parser = Parser::new(Arc::new(make_layout(TEST_GQL)));
+        let parser = Parser::new(Arc::new(make_layout(TEST_GQL)), BLOCK_NUMBER_MAX);
 
         parser.parse_and_validate(sql)
     }
@@ -114,6 +118,10 @@ mod test {
         let query =
             parse_and_validate("select symbol, address from token where decimals > 10").unwrap();
 
+        assert_eq!(
+            query,
+            r#"select to_jsonb(sub.*) as data from ( SELECT symbol, address FROM (SELECT * FROM "sgd0815"."token" WHERE block_range @> 2147483647) AS token WHERE decimals > 10 ) as sub"#
+        );
         println!("{}", query);
     }
 }
diff --git a/store/postgres/src/sql/validation.rs b/store/postgres/src/sql/validation.rs
index 6eb8606d128..7639216aa0a 100644
--- a/store/postgres/src/sql/validation.rs
+++ b/store/postgres/src/sql/validation.rs
@@ -1,3 +1,4 @@
+use graph::prelude::BlockNumber;
 use sqlparser::ast::{
     Expr, Ident, ObjectName, Query, SetExpr, Statement, TableAlias, TableFactor, VisitMut,
     VisitorMut,
@@ -6,6 +7,7 @@ use sqlparser::parser::Parser;
 use std::result::Result;
 use std::{collections::HashSet, ops::ControlFlow};
 
+use crate::block_range::{BLOCK_COLUMN, BLOCK_RANGE_COLUMN};
 use crate::relational::Layout;
 
 use super::constants::{ALLOWED_FUNCTIONS, SQL_DIALECT};
@@ -25,13 +27,15 @@ pub enum Error {
 pub struct Validator<'a> {
     layout: &'a Layout,
     ctes: HashSet<String>,
+    block: BlockNumber,
 }
 
 impl<'a> Validator<'a> {
-    pub fn new(layout: &'a Layout) -> Self {
+    pub fn new(layout: &'a Layout, block: BlockNumber) -> Self {
         Self {
             layout,
             ctes: Default::default(),
+            block,
         }
     }
 
@@ -113,7 +117,17 @@ impl VisitorMut for Validator<'_> {
             };
 
             // Change 'from table [as alias]' to 'from (select * from table) as alias'
-            let query = format!("select * from {}", table.qualified_name);
+            let query = if table.immutable {
+                format!(
+                    "select * from {} where {} <= {}",
+                    table.qualified_name, BLOCK_COLUMN, self.block
+                )
+            } else {
+                format!(
+                    "select * from {} where {} @> {}",
+                    table.qualified_name, BLOCK_RANGE_COLUMN, self.block
+                )
+            };
             let Statement::Query(subquery) = Parser::parse_sql(&SQL_DIALECT, &query)
                 .unwrap()
                 .pop()
@@ -147,6 +161,8 @@ impl VisitorMut for Validator<'_> {
 
 #[cfg(test)]
 mod test {
+    use graph::prelude::BLOCK_NUMBER_MAX;
+
     use super::*;
     use crate::sql::{constants::SQL_DIALECT, test::make_layout};
 
@@ -170,7 +186,7 @@ mod test {
 
         let layout = make_layout(GQL);
 
-        let mut validator = Validator::new(&layout);
+        let mut validator = Validator::new(&layout, BLOCK_NUMBER_MAX);
 
         validator.validate_statements(&mut statements)
     }

From 82d9ec764ac5b4b426a262407450cdcd7c61a87c Mon Sep 17 00:00:00 2001
From: David Lutterkort <lutter@watzmann.net>
Date: Fri, 24 Jan 2025 14:40:25 -0800
Subject: [PATCH 06/13] store: Extract SQL parsing tests into a YAML file

That setup makes it much easier to add more tests that check that we scrub
dangerous constructs from SQL
---
 store/postgres/src/sql/parser.rs         | 160 ++++++++++++++---------
 store/postgres/src/sql/parser_tests.yaml |  55 ++++++++
 2 files changed, 153 insertions(+), 62 deletions(-)
 create mode 100644 store/postgres/src/sql/parser_tests.yaml

diff --git a/store/postgres/src/sql/parser.rs b/store/postgres/src/sql/parser.rs
index 7ea0d74f107..4d19fd444fa 100644
--- a/store/postgres/src/sql/parser.rs
+++ b/store/postgres/src/sql/parser.rs
@@ -34,25 +34,32 @@ impl Parser {
 
 #[cfg(test)]
 mod test {
+    use std::sync::Arc;
 
-    use graph::prelude::BLOCK_NUMBER_MAX;
+    use crate::sql::{parser::SQL_DIALECT, test::make_layout};
+    use graph::prelude::{lazy_static, serde_yaml, BLOCK_NUMBER_MAX};
+    use serde::{Deserialize, Serialize};
 
-    use crate::sql::test::make_layout;
+    use pretty_assertions::assert_eq;
 
-    use super::*;
+    use super::Parser;
 
     const TEST_GQL: &str = "
-        type SwapMulti @entity(immutable: true) {
+        type Swap @entity(immutable: true) {
             id: Bytes!
-            sender: Bytes! # address
-            amountsIn: [BigInt!]! # uint256[]
-            tokensIn: [Bytes!]! # address[]
-            amountsOut: [BigInt!]! # uint256[]
-            tokensOut: [Bytes!]! # address[]
-            referralCode: BigInt! # uint32
-            blockNumber: BigInt!
-            blockTimestamp: BigInt!
-            transactionHash: Bytes!
+            timestamp: BigInt!
+            pool: Bytes!
+            token0: Bytes!
+            token1: Bytes!
+            sender: Bytes!
+            recipient: Bytes!
+            origin: Bytes! # the EOA that initiated the txn
+            amount0: BigDecimal!
+            amount1: BigDecimal!
+            amountUSD: BigDecimal!
+            sqrtPriceX96: BigInt!
+            tick: BigInt!
+            logIndex: BigInt
         }
 
         type Token @entity {
@@ -64,64 +71,93 @@ mod test {
         }
     ";
 
-    const SQL_QUERY: &str = "
-        with tokens as (
-            select * from (values
-            ('0x0000000000000000000000000000000000000000','ETH','Ethereum',18),
-            ('0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48','USDC','USD Coin',6)
-            ) as t(address,symbol,name,decimals)
-        )
-
-        select
-        date,
-        t.symbol,
-        SUM(amount)/pow(10,t.decimals) as amount
-        from (select
-        date(to_timestamp(block_timestamp) at time zone 'utc') as date,
-        token,
-        amount
-        from swap_multi as sm
-        ,unnest(sm.amounts_in,sm.tokens_in) as smi(amount,token)
-        union all
-        select
-        date(to_timestamp(block_timestamp) at time zone 'utc') as date,
-        token,
-        amount
-        from sgd1.swap_multi as sm
-        ,unnest(sm.amounts_out,sm.tokens_out) as smo(amount,token)
-        ) as tp
-        inner join tokens as t on t.address = '0x' || encode(tp.token,'hex')
-        group by tp.date,t.symbol,t.decimals
-        order by tp.date desc ,amount desc
-
-        ";
-
     fn parse_and_validate(sql: &str) -> Result<String, anyhow::Error> {
         let parser = Parser::new(Arc::new(make_layout(TEST_GQL)), BLOCK_NUMBER_MAX);
 
         parser.parse_and_validate(sql)
     }
 
-    #[test]
-    fn parse_sql() {
-        let query = parse_and_validate(SQL_QUERY).unwrap();
+    #[derive(Debug, Serialize, Deserialize)]
+    struct TestCase {
+        name: Option<String>,
+        sql: String,
+        ok: Option<String>,
+        err: Option<String>,
+    }
 
-        assert_eq!(
-            query,
-            r#"WITH "swap_multi" AS (SELECT concat('0x', encode("id", 'hex')) AS "id", concat('0x', encode("sender", 'hex')) AS "sender", "amounts_in", "tokens_in", "amounts_out", "tokens_out", "referral_code", "block_number", "block_timestamp", concat('0x', encode("transaction_hash", 'hex')) AS "transaction_hash", "block$" FROM "sgd0815"."swap_multi"),
-"token" AS (SELECT "id", concat('0x', encode("address", 'hex')) AS "address", "symbol", "name", "decimals", "block_range" FROM "sgd0815"."token" WHERE "block_range" @> 2147483647) SELECT to_jsonb(sub.*) AS data FROM ( WITH tokens AS (SELECT * FROM (VALUES ('0x0000000000000000000000000000000000000000', 'ETH', 'Ethereum', 18), ('0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48', 'USDC', 'USD Coin', 6)) AS t (address, symbol, name, decimals)) SELECT date, t.symbol, SUM(amount) / pow(10, t.decimals) AS amount FROM (SELECT date(to_timestamp(block_timestamp) AT TIME ZONE 'utc') AS date, token, amount FROM "swap_multi" AS sm, UNNEST(sm.amounts_in, sm.tokens_in) AS smi (amount, token) UNION ALL SELECT date(to_timestamp(block_timestamp) AT TIME ZONE 'utc') AS date, token, amount FROM "swap_multi" AS sm, UNNEST(sm.amounts_out, sm.tokens_out) AS smo (amount, token)) AS tp JOIN tokens AS t ON t.address = '0x' || encode(tp.token, 'hex') GROUP BY tp.date, t.symbol, t.decimals ORDER BY tp.date DESC, amount DESC ) AS sub"#
-        );
+    impl TestCase {
+        fn fail(
+            &self,
+            name: &str,
+            msg: &str,
+            exp: impl std::fmt::Display,
+            actual: impl std::fmt::Display,
+        ) {
+            panic!(
+                "case {name} failed: {}\n  expected: {}\n  actual: {}",
+                msg, exp, actual
+            );
+        }
+
+        fn run(&self, num: usize) {
+            fn normalize(query: &str) -> String {
+                sqlparser::parser::Parser::parse_sql(&SQL_DIALECT, query)
+                    .unwrap()
+                    .pop()
+                    .unwrap()
+                    .to_string()
+            }
+
+            let name = self
+                .name
+                .as_ref()
+                .map(|name| format!("{num} ({name})"))
+                .unwrap_or_else(|| num.to_string());
+            let result = parse_and_validate(&self.sql);
+
+            match (&self.ok, &self.err, result) {
+                (Some(expected), None, Ok(actual)) => {
+                    let actual = normalize(&actual);
+                    let expected = normalize(expected);
+                    assert_eq!(actual, expected, "case {} failed", name);
+                }
+                (None, Some(expected), Err(actual)) => {
+                    let actual = actual.to_string();
+                    if !actual.contains(expected) {
+                        self.fail(&name, "expected error message not found", expected, actual);
+                    }
+                }
+                (Some(_), Some(_), _) => {
+                    panic!("case {} has both ok and err", name);
+                }
+                (None, None, _) => {
+                    panic!("case {} has neither ok nor err", name)
+                }
+                (None, Some(exp), Ok(actual)) => {
+                    self.fail(&name, "expected an error", exp, actual);
+                }
+                (Some(exp), None, Err(actual)) => self.fail(&name, "expected success", exp, actual),
+            }
+        }
     }
 
-    #[test]
-    fn parse_simple_sql() {
-        let query =
-            parse_and_validate("select symbol, address from token where decimals > 10").unwrap();
+    lazy_static! {
+        static ref TESTS: Vec<TestCase> = {
+            let file = std::path::PathBuf::from_iter([
+                env!("CARGO_MANIFEST_DIR"),
+                "src",
+                "sql",
+                "parser_tests.yaml",
+            ]);
+            let tests = std::fs::read_to_string(file).unwrap();
+            serde_yaml::from_str(&tests).unwrap()
+        };
+    }
 
-        assert_eq!(
-            query,
-            r#"select to_jsonb(sub.*) as data from ( SELECT symbol, address FROM (SELECT * FROM "sgd0815"."token" WHERE block_range @> 2147483647) AS token WHERE decimals > 10 ) as sub"#
-        );
-        println!("{}", query);
+    #[test]
+    fn parse_sql() {
+        for (num, case) in TESTS.iter().enumerate() {
+            case.run(num);
+        }
     }
 }
diff --git a/store/postgres/src/sql/parser_tests.yaml b/store/postgres/src/sql/parser_tests.yaml
new file mode 100644
index 00000000000..11d2a976e21
--- /dev/null
+++ b/store/postgres/src/sql/parser_tests.yaml
@@ -0,0 +1,55 @@
+# Test cases for the SQL parser. Each test case has the following fields:
+#   name : an optional name for error messages
+#   sql  : the SQL query to parse
+#   ok   : the expected rewritten query
+#   err  : a part of the error message if parsing should fail
+# Of course, only one of ok and err can be specified
+
+- sql: select symbol, address from token where decimals > 10
+  ok: >
+    select to_jsonb(sub.*) as data from (
+      SELECT symbol, address FROM (
+        SELECT * FROM "sgd0815"."token" WHERE block_range @> 2147483647) AS token
+      WHERE decimals > 10 ) as sub
+- sql: >
+        with tokens as (
+            select * from (values
+            ('0x0000000000000000000000000000000000000000','ETH','Ethereum',18),
+            ('0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48','USDC','USD Coin',6)
+            ) as t(address,symbol,name,decimals)
+        )
+
+        select date, t.symbol, SUM(amount)/pow(10,t.decimals) as amount
+          from (select
+                 date(to_timestamp(block_timestamp) at time zone 'utc') as date,
+                 token, amount
+                from swap as sm,
+                     unnest(sm.amounts_in,sm.tokens_in) as smi(amount,token)
+                union all
+                select
+                  date(to_timestamp(block_timestamp) at time zone 'utc') as date,
+                  token, amount
+                from swap as sm,
+                     unnest(sm.amounts_out,sm.tokens_out) as smo(amount,token)) as tp
+        inner join
+          tokens as t on t.address = tp.token
+        group by tp.date, t.symbol, t.decimals
+        order by tp.date desc, amount desc
+  ok: >
+    select to_jsonb(sub.*) as data from (
+      WITH tokens AS (
+        SELECT * FROM (
+          VALUES ('0x0000000000000000000000000000000000000000', 'ETH', 'Ethereum', 18),
+                 ('0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48', 'USDC', 'USD Coin', 6))
+          AS t (address, symbol, name, decimals))
+      SELECT date, t.symbol, SUM(amount) / pow(10, t.decimals) AS amount
+        FROM (SELECT date(to_timestamp(block_timestamp) AT TIME ZONE 'utc') AS date, token, amount
+                FROM (SELECT * FROM "sgd0815"."swap" WHERE block$ <= 2147483647) AS sm,
+                     UNNEST(sm.amounts_in, sm.tokens_in) AS smi (amount, token)
+              UNION ALL
+              SELECT date(to_timestamp(block_timestamp) AT TIME ZONE 'utc') AS date, token, amount
+                FROM (SELECT * FROM "sgd0815"."swap" WHERE block$ <= 2147483647) AS sm,
+                     UNNEST(sm.amounts_out, sm.tokens_out) AS smo (amount, token)) AS tp
+                  JOIN tokens AS t ON t.address = tp.token
+               GROUP BY tp.date, t.symbol, t.decimals
+               ORDER BY tp.date DESC, amount DESC ) as sub

From 35bd6fe796c5c8b54c1761f8ed8b7b03390910e4 Mon Sep 17 00:00:00 2001
From: David Lutterkort <lutter@watzmann.net>
Date: Mon, 27 Jan 2025 17:19:59 -0800
Subject: [PATCH 07/13] graph, graphql, server: Add rudimentary SQL query
 support

---
 graph/src/components/graphql.rs | 10 ++++--
 graph/src/data/query/mod.rs     |  2 +-
 graph/src/data/query/query.rs   | 26 +++++++++++++-
 graph/src/data/store/mod.rs     |  1 +
 graphql/src/runner.rs           | 43 +++++++++++++++++++++-
 server/http/src/service.rs      | 63 +++++++++++++++++++++++++++++++--
 server/http/tests/server.rs     | 12 ++++++-
 7 files changed, 149 insertions(+), 8 deletions(-)

diff --git a/graph/src/components/graphql.rs b/graph/src/components/graphql.rs
index b5fc4273860..8d42cecb9d8 100644
--- a/graph/src/components/graphql.rs
+++ b/graph/src/components/graphql.rs
@@ -1,6 +1,7 @@
-use crate::data::query::QueryResults;
 use crate::data::query::{Query, QueryTarget};
-use crate::prelude::DeploymentHash;
+use crate::data::query::{QueryResults, SqlQueryReq};
+use crate::data::store::SqlQueryObject;
+use crate::prelude::{DeploymentHash, QueryExecutionError};
 
 use async_trait::async_trait;
 use std::sync::Arc;
@@ -28,6 +29,11 @@ pub trait GraphQlRunner: Send + Sync + 'static {
     ) -> QueryResults;
 
     fn metrics(&self) -> Arc<dyn GraphQLMetrics>;
+
+    async fn run_sql_query(
+        self: Arc<Self>,
+        req: SqlQueryReq,
+    ) -> Result<Vec<SqlQueryObject>, QueryExecutionError>;
 }
 
 pub trait GraphQLMetrics: Send + Sync + 'static {
diff --git a/graph/src/data/query/mod.rs b/graph/src/data/query/mod.rs
index 73a6f1fe220..407c2218525 100644
--- a/graph/src/data/query/mod.rs
+++ b/graph/src/data/query/mod.rs
@@ -6,6 +6,6 @@ mod trace;
 
 pub use self::cache_status::CacheStatus;
 pub use self::error::{QueryError, QueryExecutionError};
-pub use self::query::{Query, QueryTarget, QueryVariables};
+pub use self::query::{Query, QueryTarget, QueryVariables, SqlQueryMode, SqlQueryReq};
 pub use self::result::{LatestBlockInfo, QueryResult, QueryResults};
 pub use self::trace::Trace;
diff --git a/graph/src/data/query/query.rs b/graph/src/data/query/query.rs
index 2ca93f0cc43..5bb64a8a134 100644
--- a/graph/src/data/query/query.rs
+++ b/graph/src/data/query/query.rs
@@ -1,7 +1,8 @@
 use serde::de::Deserializer;
-use serde::Deserialize;
+use serde::{Deserialize, Serialize};
 use std::collections::{BTreeMap, HashMap};
 use std::convert::TryFrom;
+use std::hash::{DefaultHasher, Hash as _, Hasher as _};
 use std::ops::{Deref, DerefMut};
 use std::sync::Arc;
 
@@ -165,3 +166,26 @@ impl Query {
         }
     }
 }
+
+#[derive(Copy, Clone, Debug, Deserialize, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum SqlQueryMode {
+    Data,
+    Info,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct SqlQueryReq {
+    pub deployment: DeploymentHash,
+    pub query: String,
+    pub mode: SqlQueryMode,
+}
+
+impl SqlQueryReq {
+    pub fn query_hash(&self) -> u64 {
+        let mut hasher = DefaultHasher::new();
+        self.deployment.hash(&mut hasher);
+        self.query.hash(&mut hasher);
+        hasher.finish()
+    }
+}
diff --git a/graph/src/data/store/mod.rs b/graph/src/data/store/mod.rs
index cf464ff7b6b..f52c70b7ce3 100644
--- a/graph/src/data/store/mod.rs
+++ b/graph/src/data/store/mod.rs
@@ -1103,6 +1103,7 @@ pub struct QueryObject {
 }
 
 /// An object that is returned from a SQL query. It wraps an `r::Value`
+#[derive(CacheWeight, Serialize)]
 pub struct SqlQueryObject(pub r::Value);
 
 impl CacheWeight for QueryObject {
diff --git a/graphql/src/runner.rs b/graphql/src/runner.rs
index 96f30e8bc9d..46169d2f678 100644
--- a/graphql/src/runner.rs
+++ b/graphql/src/runner.rs
@@ -4,12 +4,14 @@ use std::time::Instant;
 use crate::metrics::GraphQLMetrics;
 use crate::prelude::{QueryExecutionOptions, StoreResolver};
 use crate::query::execute_query;
+use graph::data::query::{CacheStatus, SqlQueryReq};
+use graph::data::store::SqlQueryObject;
 use graph::futures03::future;
-use graph::prelude::MetricsRegistry;
 use graph::prelude::{
     async_trait, o, CheapClone, DeploymentState, GraphQLMetrics as GraphQLMetricsTrait,
     GraphQlRunner as GraphQlRunnerTrait, Logger, Query, QueryExecutionError, ENV_VARS,
 };
+use graph::prelude::{ApiVersion, MetricsRegistry};
 use graph::{data::graphql::load_manager::LoadManager, prelude::QueryStoreManager};
 use graph::{
     data::query::{LatestBlockInfo, QueryResults, QueryTarget},
@@ -251,4 +253,43 @@ where
     fn metrics(&self) -> Arc<dyn GraphQLMetricsTrait> {
         self.graphql_metrics.clone()
     }
+
+    async fn run_sql_query(
+        self: Arc<Self>,
+        req: SqlQueryReq,
+    ) -> Result<Vec<SqlQueryObject>, QueryExecutionError> {
+        let store = self
+            .store
+            .query_store(QueryTarget::Deployment(
+                req.deployment.clone(),
+                ApiVersion::default(),
+            ))
+            .await?;
+
+        let query_hash = req.query_hash();
+        self.load_manager
+            .decide(
+                &store.wait_stats().map_err(QueryExecutionError::from)?,
+                store.shard(),
+                store.deployment_id(),
+                query_hash,
+                &req.query,
+            )
+            .to_result()?;
+
+        let query_start = Instant::now();
+        let result = store
+            .execute_sql(&req.query)
+            .map_err(|e| QueryExecutionError::from(e));
+
+        self.load_manager.record_work(
+            store.shard(),
+            store.deployment_id(),
+            query_hash,
+            query_start.elapsed(),
+            CacheStatus::Miss,
+        );
+
+        result
+    }
 }
diff --git a/server/http/src/service.rs b/server/http/src/service.rs
index 8e2237b86ff..c69e6428983 100644
--- a/server/http/src/service.rs
+++ b/server/http/src/service.rs
@@ -9,6 +9,8 @@ use graph::components::server::query::ServerResponse;
 use graph::components::server::query::ServerResult;
 use graph::components::versions::ApiVersion;
 use graph::data::query::QueryResult;
+use graph::data::query::SqlQueryMode;
+use graph::data::query::SqlQueryReq;
 use graph::data::subgraph::DeploymentHash;
 use graph::data::subgraph::SubgraphName;
 use graph::env::ENV_VARS;
@@ -21,6 +23,8 @@ use graph::hyper::{body::Body, header::HeaderValue};
 use graph::hyper::{Method, Request, Response, StatusCode};
 use graph::prelude::serde_json;
 use graph::prelude::serde_json::json;
+use graph::prelude::CacheWeight as _;
+use graph::prelude::QueryError;
 use graph::semver::VersionReq;
 use graph::slog::error;
 use graph::slog::Logger;
@@ -195,6 +199,51 @@ where
         Ok(result.as_http_response())
     }
 
+    async fn handle_sql_query<T: Body>(&self, request: Request<T>) -> ServerResult {
+        let body = request
+            .collect()
+            .await
+            .map_err(|_| ServerError::InternalError("Failed to read request body".into()))?
+            .to_bytes();
+        let sql_req: SqlQueryReq = serde_json::from_slice(&body)
+            .map_err(|e| ServerError::ClientError(format!("{}", e)))?;
+
+        let mode = sql_req.mode;
+        let result = self
+            .graphql_runner
+            .cheap_clone()
+            .run_sql_query(sql_req)
+            .await
+            .map_err(|e| ServerError::QueryError(QueryError::from(e)));
+
+        use SqlQueryMode::*;
+        let response_obj = match (result, mode) {
+            (Ok(result), Info) => {
+                json!({
+                    "count": result.len(),
+                    "bytes" : result.weight(),
+                })
+            }
+            (Ok(result), Data) => {
+                json!({
+                    "data": result,
+                })
+            }
+            (Err(e), _) => json!({
+                "error": e.to_string(),
+            }),
+        };
+
+        let response_str = serde_json::to_string(&response_obj).unwrap();
+
+        Ok(Response::builder()
+            .status(200)
+            .header(ACCESS_CONTROL_ALLOW_ORIGIN, "*")
+            .header(CONTENT_TYPE, "application/json")
+            .body(Full::from(response_str))
+            .unwrap())
+    }
+
     // Handles OPTIONS requests
     fn handle_graphql_options<T>(&self, _request: Request<T>) -> ServerResult {
         Ok(Response::builder()
@@ -327,7 +376,9 @@ where
                 let dest = format!("/{}/graphql", filtered_path);
                 self.handle_temp_redirect(dest)
             }
-
+            (Method::POST, &["subgraphs", "sql"] | &["subgraphs", "sql", ""]) => {
+                self.handle_sql_query(req).await
+            }
             (Method::POST, &["subgraphs", "id", subgraph_id]) => {
                 self.handle_graphql_query_by_id(subgraph_id.to_owned(), req)
                     .await
@@ -395,6 +446,7 @@ where
 
 #[cfg(test)]
 mod tests {
+    use graph::data::store::SqlQueryObject;
     use graph::data::value::{Object, Word};
     use graph::http_body_util::{BodyExt, Full};
     use graph::hyper::body::Bytes;
@@ -402,7 +454,7 @@ mod tests {
     use graph::hyper::{Method, Request, StatusCode};
     use graph::prelude::serde_json::json;
 
-    use graph::data::query::{QueryResults, QueryTarget};
+    use graph::data::query::{QueryResults, QueryTarget, SqlQueryReq};
     use graph::prelude::*;
 
     use crate::test_utils;
@@ -449,6 +501,13 @@ mod tests {
         fn metrics(&self) -> Arc<dyn GraphQLMetrics> {
             Arc::new(TestGraphQLMetrics)
         }
+
+        async fn run_sql_query(
+            self: Arc<Self>,
+            _req: SqlQueryReq,
+        ) -> Result<Vec<SqlQueryObject>, QueryExecutionError> {
+            unimplemented!()
+        }
     }
 
     #[tokio::test]
diff --git a/server/http/tests/server.rs b/server/http/tests/server.rs
index 3ad78138437..b027a73764c 100644
--- a/server/http/tests/server.rs
+++ b/server/http/tests/server.rs
@@ -1,4 +1,7 @@
-use graph::http::StatusCode;
+use graph::{
+    data::{query::SqlQueryReq, store::SqlQueryObject},
+    http::StatusCode,
+};
 use std::time::Duration;
 
 use graph::data::{
@@ -66,6 +69,13 @@ impl GraphQlRunner for TestGraphQlRunner {
     fn metrics(&self) -> Arc<dyn GraphQLMetrics> {
         Arc::new(TestGraphQLMetrics)
     }
+
+    async fn run_sql_query(
+        self: Arc<Self>,
+        _req: SqlQueryReq,
+    ) -> Result<Vec<SqlQueryObject>, QueryExecutionError> {
+        unimplemented!();
+    }
 }
 
 #[cfg(test)]

From bd5817f6546226eb49e6e83683ccd7fdfeffc0c0 Mon Sep 17 00:00:00 2001
From: David Lutterkort <lutter@watzmann.net>
Date: Mon, 27 Jan 2025 17:28:03 -0800
Subject: [PATCH 08/13] store: Restrict the selectable columns to actual
 attributes

---
 store/postgres/src/sql/parser_tests.yaml |  8 +++++---
 store/postgres/src/sql/validation.rs     | 12 +++++++++---
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/store/postgres/src/sql/parser_tests.yaml b/store/postgres/src/sql/parser_tests.yaml
index 11d2a976e21..d6eacaa1e2a 100644
--- a/store/postgres/src/sql/parser_tests.yaml
+++ b/store/postgres/src/sql/parser_tests.yaml
@@ -9,7 +9,7 @@
   ok: >
     select to_jsonb(sub.*) as data from (
       SELECT symbol, address FROM (
-        SELECT * FROM "sgd0815"."token" WHERE block_range @> 2147483647) AS token
+        SELECT id, address, symbol, name, decimals FROM "sgd0815"."token" WHERE block_range @> 2147483647) AS token
       WHERE decimals > 10 ) as sub
 - sql: >
         with tokens as (
@@ -44,11 +44,13 @@
           AS t (address, symbol, name, decimals))
       SELECT date, t.symbol, SUM(amount) / pow(10, t.decimals) AS amount
         FROM (SELECT date(to_timestamp(block_timestamp) AT TIME ZONE 'utc') AS date, token, amount
-                FROM (SELECT * FROM "sgd0815"."swap" WHERE block$ <= 2147483647) AS sm,
+                FROM (SELECT id, timestamp, pool, token_0, token_1, sender, recipient, origin, amount_0, amount_1, amount_usd, sqrt_price_x96, tick, log_index
+                        FROM "sgd0815"."swap" WHERE block$ <= 2147483647) AS sm,
                      UNNEST(sm.amounts_in, sm.tokens_in) AS smi (amount, token)
               UNION ALL
               SELECT date(to_timestamp(block_timestamp) AT TIME ZONE 'utc') AS date, token, amount
-                FROM (SELECT * FROM "sgd0815"."swap" WHERE block$ <= 2147483647) AS sm,
+                FROM (SELECT id, timestamp, pool, token_0, token_1, sender, recipient, origin, amount_0, amount_1, amount_usd, sqrt_price_x96, tick, log_index
+                        FROM "sgd0815"."swap" WHERE block$ <= 2147483647) AS sm,
                      UNNEST(sm.amounts_out, sm.tokens_out) AS smo (amount, token)) AS tp
                   JOIN tokens AS t ON t.address = tp.token
                GROUP BY tp.date, t.symbol, t.decimals
diff --git a/store/postgres/src/sql/validation.rs b/store/postgres/src/sql/validation.rs
index 7639216aa0a..d2370576c22 100644
--- a/store/postgres/src/sql/validation.rs
+++ b/store/postgres/src/sql/validation.rs
@@ -116,15 +116,21 @@ impl VisitorMut for Validator<'_> {
                 return ControlFlow::Continue(());
             };
 
-            // Change 'from table [as alias]' to 'from (select * from table) as alias'
+            // Change 'from table [as alias]' to 'from (select {columns} from table) as alias'
+            let columns = table
+                .columns
+                .iter()
+                .map(|column| column.name.as_str())
+                .collect::<Vec<_>>()
+                .join(", ");
             let query = if table.immutable {
                 format!(
-                    "select * from {} where {} <= {}",
+                    "select {columns} from {} where {} <= {}",
                     table.qualified_name, BLOCK_COLUMN, self.block
                 )
             } else {
                 format!(
-                    "select * from {} where {} @> {}",
+                    "select {columns} from {} where {} @> {}",
                     table.qualified_name, BLOCK_RANGE_COLUMN, self.block
                 )
             };

From 2b1435a76b2aea483fb12e256671b33586f9c0d4 Mon Sep 17 00:00:00 2001
From: David Lutterkort <lutter@watzmann.net>
Date: Tue, 28 Jan 2025 16:34:34 -0800
Subject: [PATCH 09/13] store: Defer wrapping of SQL queries until execution

The wrapping with to_jsonb is closely tied to how we run the query
---
 store/postgres/src/deployment_store.rs   |  2 +
 store/postgres/src/sql/parser.rs         |  8 +---
 store/postgres/src/sql/parser_tests.yaml | 50 +++++++++++-------------
 3 files changed, 27 insertions(+), 33 deletions(-)

diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs
index de342075d4d..028df2b09f5 100644
--- a/store/postgres/src/deployment_store.rs
+++ b/store/postgres/src/deployment_store.rs
@@ -292,6 +292,8 @@ impl DeploymentStore {
         conn: &mut PgConnection,
         query: &str,
     ) -> Result<Vec<SqlQueryObject>, QueryExecutionError> {
+        let query = format!("select to_jsonb(sub.*) as data from ({}) as sub", query);
+
         let query = diesel::sql_query(query);
 
         // Execute the provided SQL query
diff --git a/store/postgres/src/sql/parser.rs b/store/postgres/src/sql/parser.rs
index 4d19fd444fa..a897c44e657 100644
--- a/store/postgres/src/sql/parser.rs
+++ b/store/postgres/src/sql/parser.rs
@@ -21,14 +21,10 @@ impl Parser {
         validator.validate_statements(&mut statements)?;
 
         let statement = statements
-            .get_mut(0)
+            .get(0)
             .ok_or_else(|| anyhow!("No SQL statements found"))?;
 
-        let sql = format!(
-            "select to_jsonb(sub.*) as data from ( {} ) as sub",
-            statement
-        );
-        Ok(sql)
+        Ok(statement.to_string())
     }
 }
 
diff --git a/store/postgres/src/sql/parser_tests.yaml b/store/postgres/src/sql/parser_tests.yaml
index d6eacaa1e2a..3650f0a3620 100644
--- a/store/postgres/src/sql/parser_tests.yaml
+++ b/store/postgres/src/sql/parser_tests.yaml
@@ -7,36 +7,32 @@
 
 - sql: select symbol, address from token where decimals > 10
   ok: >
-    select to_jsonb(sub.*) as data from (
       SELECT symbol, address FROM (
         SELECT id, address, symbol, name, decimals FROM "sgd0815"."token" WHERE block_range @> 2147483647) AS token
-      WHERE decimals > 10 ) as sub
+      WHERE decimals > 10
 - sql: >
-        with tokens as (
-            select * from (values
-            ('0x0000000000000000000000000000000000000000','ETH','Ethereum',18),
-            ('0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48','USDC','USD Coin',6)
-            ) as t(address,symbol,name,decimals)
-        )
-
-        select date, t.symbol, SUM(amount)/pow(10,t.decimals) as amount
-          from (select
-                 date(to_timestamp(block_timestamp) at time zone 'utc') as date,
-                 token, amount
-                from swap as sm,
-                     unnest(sm.amounts_in,sm.tokens_in) as smi(amount,token)
-                union all
-                select
-                  date(to_timestamp(block_timestamp) at time zone 'utc') as date,
-                  token, amount
-                from swap as sm,
-                     unnest(sm.amounts_out,sm.tokens_out) as smo(amount,token)) as tp
-        inner join
-          tokens as t on t.address = tp.token
-        group by tp.date, t.symbol, t.decimals
-        order by tp.date desc, amount desc
+      with tokens as (
+          select * from (values
+          ('0x0000000000000000000000000000000000000000','ETH','Ethereum',18),
+          ('0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48','USDC','USD Coin',6)
+          ) as t(address,symbol,name,decimals))
+      select date, t.symbol, SUM(amount)/pow(10,t.decimals) as amount
+        from (select
+               date(to_timestamp(block_timestamp) at time zone 'utc') as date,
+               token, amount
+              from swap as sm,
+                   unnest(sm.amounts_in,sm.tokens_in) as smi(amount,token)
+              union all
+              select
+                date(to_timestamp(block_timestamp) at time zone 'utc') as date,
+                token, amount
+              from swap as sm,
+                   unnest(sm.amounts_out,sm.tokens_out) as smo(amount,token)) as tp
+      inner join
+        tokens as t on t.address = tp.token
+      group by tp.date, t.symbol, t.decimals
+      order by tp.date desc, amount desc
   ok: >
-    select to_jsonb(sub.*) as data from (
       WITH tokens AS (
         SELECT * FROM (
           VALUES ('0x0000000000000000000000000000000000000000', 'ETH', 'Ethereum', 18),
@@ -54,4 +50,4 @@
                      UNNEST(sm.amounts_out, sm.tokens_out) AS smo (amount, token)) AS tp
                   JOIN tokens AS t ON t.address = tp.token
                GROUP BY tp.date, t.symbol, t.decimals
-               ORDER BY tp.date DESC, amount DESC ) as sub
+               ORDER BY tp.date DESC, amount DESC

From 84349bab663ede9bd550f7635a80458adde3ce3f Mon Sep 17 00:00:00 2001
From: David Lutterkort <lutter@watzmann.net>
Date: Tue, 28 Jan 2025 17:02:05 -0800
Subject: [PATCH 10/13] store: Move SQL validation tests to YAML test file

---
 store/postgres/src/sql/parser_tests.yaml |  95 +++++++++---
 store/postgres/src/sql/validation.rs     | 177 -----------------------
 2 files changed, 71 insertions(+), 201 deletions(-)

diff --git a/store/postgres/src/sql/parser_tests.yaml b/store/postgres/src/sql/parser_tests.yaml
index 3650f0a3620..9640b74177f 100644
--- a/store/postgres/src/sql/parser_tests.yaml
+++ b/store/postgres/src/sql/parser_tests.yaml
@@ -7,16 +7,16 @@
 
 - sql: select symbol, address from token where decimals > 10
   ok: >
-      SELECT symbol, address FROM (
-        SELECT id, address, symbol, name, decimals FROM "sgd0815"."token" WHERE block_range @> 2147483647) AS token
-      WHERE decimals > 10
+      select symbol, address from (
+        select id, address, symbol, name, decimals from "sgd0815"."token" where block_range @> 2147483647) as token
+      where decimals > 10
 - sql: >
       with tokens as (
           select * from (values
-          ('0x0000000000000000000000000000000000000000','ETH','Ethereum',18),
-          ('0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48','USDC','USD Coin',6)
+          ('0x0000000000000000000000000000000000000000','eth','ethereum',18),
+          ('0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48','usdc','usd coin',6)
           ) as t(address,symbol,name,decimals))
-      select date, t.symbol, SUM(amount)/pow(10,t.decimals) as amount
+      select date, t.symbol, sum(amount)/pow(10,t.decimals) as amount
         from (select
                date(to_timestamp(block_timestamp) at time zone 'utc') as date,
                token, amount
@@ -33,21 +33,68 @@
       group by tp.date, t.symbol, t.decimals
       order by tp.date desc, amount desc
   ok: >
-      WITH tokens AS (
-        SELECT * FROM (
-          VALUES ('0x0000000000000000000000000000000000000000', 'ETH', 'Ethereum', 18),
-                 ('0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48', 'USDC', 'USD Coin', 6))
-          AS t (address, symbol, name, decimals))
-      SELECT date, t.symbol, SUM(amount) / pow(10, t.decimals) AS amount
-        FROM (SELECT date(to_timestamp(block_timestamp) AT TIME ZONE 'utc') AS date, token, amount
-                FROM (SELECT id, timestamp, pool, token_0, token_1, sender, recipient, origin, amount_0, amount_1, amount_usd, sqrt_price_x96, tick, log_index
-                        FROM "sgd0815"."swap" WHERE block$ <= 2147483647) AS sm,
-                     UNNEST(sm.amounts_in, sm.tokens_in) AS smi (amount, token)
-              UNION ALL
-              SELECT date(to_timestamp(block_timestamp) AT TIME ZONE 'utc') AS date, token, amount
-                FROM (SELECT id, timestamp, pool, token_0, token_1, sender, recipient, origin, amount_0, amount_1, amount_usd, sqrt_price_x96, tick, log_index
-                        FROM "sgd0815"."swap" WHERE block$ <= 2147483647) AS sm,
-                     UNNEST(sm.amounts_out, sm.tokens_out) AS smo (amount, token)) AS tp
-                  JOIN tokens AS t ON t.address = tp.token
-               GROUP BY tp.date, t.symbol, t.decimals
-               ORDER BY tp.date DESC, amount DESC
+      with tokens as (
+        select * from (
+          values ('0x0000000000000000000000000000000000000000', 'eth', 'ethereum', 18),
+                 ('0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48', 'usdc', 'usd coin', 6))
+          as t (address, symbol, name, decimals))
+      select date, t.symbol, sum(amount) / pow(10, t.decimals) as amount
+        from (select date(to_timestamp(block_timestamp) at time zone 'utc') as date, token, amount
+                from (select id, timestamp, pool, token_0, token_1, sender, recipient, origin, amount_0, amount_1, amount_usd, sqrt_price_x96, tick, log_index
+                        from "sgd0815"."swap" where block$ <= 2147483647) as sm,
+                     unnest(sm.amounts_in, sm.tokens_in) as smi (amount, token)
+              union all
+              select date(to_timestamp(block_timestamp) at time zone 'utc') as date, token, amount
+                from (select id, timestamp, pool, token_0, token_1, sender, recipient, origin, amount_0, amount_1, amount_usd, sqrt_price_x96, tick, log_index
+                        from "sgd0815"."swap" where block$ <= 2147483647) as sm,
+                     unnest(sm.amounts_out, sm.tokens_out) as smo (amount, token)) as tp
+                  join tokens as t on t.address = tp.token
+               group by tp.date, t.symbol, t.decimals
+               order by tp.date desc, amount desc
+- name: pg_sleep forbidden
+  sql: select pool from swap where '' = (select cast(pg_sleep(5) as text))
+  err: Unknown or unsupported function pg_sleep
+- name: table functions forbidden
+  sql: >
+    select vid, k.sname
+      from swap,
+        lateral(select current_schemas as sname from current_schemas(true)) as k
+  err: Unknown or unsupported function current_schemas
+- name: function without parens forbidden
+  sql: select input_token from swap where '' = (select user)
+  err: Unknown or unsupported function user
+- name: aggregation allowed
+  sql: >
+    select token0, sum(amount0) as total_amount
+      from swap
+     group by token0
+    having sum(amount0) > 1000
+  ok: >
+    SELECT token0, sum(amount0) AS total_amount
+      FROM (SELECT id, timestamp, pool, token_0, token_1, sender, recipient, origin, amount_0, amount_1, amount_usd, sqrt_price_x96, tick, log_index
+              FROM "sgd0815"."swap" WHERE block$ <= 2147483647) AS swap
+     GROUP BY token0
+    HAVING sum(amount0) > 1000
+- name: arbitrary function forbidden
+  sql: >
+    select token0 from swap
+     where '' = (select cast(do_strange_math(amount_in) as text))
+  err: Unknown or unsupported function do_strange_math
+- name: create table forbidden
+  sql: create table foo (id int primary key);
+  err: Only SELECT query is supported
+- name: insert forbidden
+  sql: insert into foo values (1);
+  err: Only SELECT query is supported
+- name: CTE allowed
+  sql: with foo as (select 1) select * from foo
+  ok:  with foo as (select 1) select * from foo
+- name: CTE with insert forbidden
+  sql: with foo as (insert into target values(1)) select * from bar
+  err: Only SELECT query is supported
+- name: only single statement
+  sql: select 1; select 2;
+  err: Multi statement is not supported
+- name: unknown tables forbidden
+  sql: select * from unknown_table
+  err: Unknown table unknown_table
diff --git a/store/postgres/src/sql/validation.rs b/store/postgres/src/sql/validation.rs
index d2370576c22..4309793dc0f 100644
--- a/store/postgres/src/sql/validation.rs
+++ b/store/postgres/src/sql/validation.rs
@@ -164,180 +164,3 @@ impl VisitorMut for Validator<'_> {
         ControlFlow::Continue(())
     }
 }
-
-#[cfg(test)]
-mod test {
-    use graph::prelude::BLOCK_NUMBER_MAX;
-
-    use super::*;
-    use crate::sql::{constants::SQL_DIALECT, test::make_layout};
-
-    fn validate(sql: &str) -> Result<(), Error> {
-        let mut statements = sqlparser::parser::Parser::parse_sql(&SQL_DIALECT, sql).unwrap();
-
-        const GQL: &str = "
-            type Swap @entity {
-                id: ID!
-                sender: Bytes!
-                inputAmount: BigDecimal!
-                inputToken: Bytes!
-                amountOut: BigDecimal!
-                outputToken: Bytes!
-                slippage: BigDecimal!
-                referralCode: String
-                blockNumber: Int!
-                blockTimestamp: Timestamp!
-                transactionHash: Bytes!
-            }";
-
-        let layout = make_layout(GQL);
-
-        let mut validator = Validator::new(&layout, BLOCK_NUMBER_MAX);
-
-        validator.validate_statements(&mut statements)
-    }
-
-    #[test]
-    fn test_function_disallowed() {
-        let result = validate(
-            "
-            SELECT
-                input_token
-            FROM swap
-            WHERE '' = (
-                SELECT
-                    CAST(pg_sleep(5) AS text
-                )
-            )",
-        );
-        assert_eq!(result, Err(Error::UnknownFunction("pg_sleep".to_owned())));
-    }
-
-    #[test]
-    fn test_table_function_disallowed() {
-        let result = validate(
-            "
-        SELECT
-            vid,
-            k.sname
-        FROM swap,
-        LATERAL(
-            SELECT
-                current_schemas as sname
-            FROM current_schemas(true)
-        ) as k",
-        );
-        assert_eq!(
-            result,
-            Err(Error::UnknownFunction("current_schemas".to_owned()))
-        );
-    }
-
-    #[test]
-    fn test_function_disallowed_without_paranthesis() {
-        let result = validate(
-            "
-            SELECT
-                input_token
-            FROM swap
-            WHERE '' = (
-                SELECT user
-            )",
-        );
-        assert_eq!(result, Err(Error::UnknownFunction("user".to_owned())));
-    }
-
-    #[test]
-    fn test_function_allowed() {
-        let result = validate(
-            "
-            SELECT
-                input_token,
-                SUM(input_amount) AS total_amount
-            FROM swap
-            GROUP BY input_token
-            HAVING SUM(input_amount) > 1000
-            ",
-        );
-        assert_eq!(result, Ok(()));
-    }
-
-    #[test]
-    fn test_function_unknown() {
-        let result = validate(
-            "
-            SELECT
-                input_token
-            FROM swap
-            WHERE '' = (
-                SELECT
-                    CAST(do_strange_math(amount_in) AS text
-                )
-            )",
-        );
-        assert_eq!(
-            result,
-            Err(Error::UnknownFunction("do_strange_math".to_owned()))
-        );
-    }
-
-    #[test]
-    fn test_not_select_ddl() {
-        let result = validate(
-            "
-            CREATE TABLE foo (id INT PRIMARY KEY);
-            ",
-        );
-        assert_eq!(result, Err(Error::NotSelectQuery));
-    }
-
-    #[test]
-    fn test_not_select_insert() {
-        let result = validate(
-            "
-            INSERT INTO foo VALUES (1);
-            ",
-        );
-        assert_eq!(result, Err(Error::NotSelectQuery));
-    }
-
-    #[test]
-    fn test_common_table_expression() {
-        let result = validate(
-            "
-            WITH foo AS (SELECT 1) SELECT * FROM foo;
-            ",
-        );
-        assert_eq!(result, Ok(()));
-    }
-
-    #[test]
-    fn test_common_table_expression_with_effect() {
-        let result = validate(
-            "
-            WITH foo AS (INSERT INTO target VALUES(1)) SELECT * FROM bar;
-            ",
-        );
-        assert_eq!(result, Err(Error::NotSelectQuery));
-    }
-
-    #[test]
-    fn test_no_multi_statement() {
-        let result = validate(
-            "
-            SELECT 1; SELECT 2;
-            ",
-        );
-        assert_eq!(result, Err(Error::MultiStatementUnSupported));
-    }
-
-    #[test]
-    fn test_table_unknown() {
-        let result = validate(
-            "
-            SELECT * FROM unknown_table;
-            ",
-        );
-        assert_eq!(result, Err(Error::UnknownTable("unknown_table".to_owned())));
-    }
-}

From 133cadbaf1aa9551b86e372778fc7ab4720e7322 Mon Sep 17 00:00:00 2001
From: David Lutterkort <lutter@watzmann.net>
Date: Mon, 3 Feb 2025 15:16:18 -0800
Subject: [PATCH 11/13] store: Limit maximum execution time for SQL queries

---
 store/postgres/src/deployment_store.rs | 15 ++++++++++-----
 store/postgres/src/relational.rs       |  2 +-
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs
index 028df2b09f5..057dcd88acd 100644
--- a/store/postgres/src/deployment_store.rs
+++ b/store/postgres/src/deployment_store.rs
@@ -54,7 +54,7 @@ use crate::detail::ErrorDetail;
 use crate::dynds::DataSourcesTable;
 use crate::primary::DeploymentId;
 use crate::relational::index::{CreateIndex, IndexList, Method};
-use crate::relational::{Layout, LayoutCache, SqlName, Table};
+use crate::relational::{Layout, LayoutCache, SqlName, Table, STATEMENT_TIMEOUT};
 use crate::relational_queries::{FromEntityData, JSONData};
 use crate::{advisory_lock, catalog, retry};
 use crate::{connection_pool::ConnectionPool, detail};
@@ -293,12 +293,17 @@ impl DeploymentStore {
         query: &str,
     ) -> Result<Vec<SqlQueryObject>, QueryExecutionError> {
         let query = format!("select to_jsonb(sub.*) as data from ({}) as sub", query);
-
         let query = diesel::sql_query(query);
 
-        // Execute the provided SQL query
-        let results = query
-            .load::<JSONData>(conn)
+        let results = conn
+            .transaction(|conn| {
+                if let Some(ref timeout_sql) = *STATEMENT_TIMEOUT {
+                    conn.batch_execute(timeout_sql)?;
+                }
+
+                // Execute the provided SQL query
+                query.load::<JSONData>(conn)
+            })
             .map_err(|e| QueryExecutionError::SqlError(e.to_string()))?;
 
         Ok(results
diff --git a/store/postgres/src/relational.rs b/store/postgres/src/relational.rs
index c5c929e189a..d4561f23771 100644
--- a/store/postgres/src/relational.rs
+++ b/store/postgres/src/relational.rs
@@ -95,7 +95,7 @@ pub const STRING_PREFIX_SIZE: usize = 256;
 pub const BYTE_ARRAY_PREFIX_SIZE: usize = 64;
 
 lazy_static! {
-    static ref STATEMENT_TIMEOUT: Option<String> = ENV_VARS
+    pub(crate) static ref STATEMENT_TIMEOUT: Option<String> = ENV_VARS
         .graphql
         .sql_statement_timeout
         .map(|duration| format!("set local statement_timeout={}", duration.as_millis()));

From 804a20818ce89369dba5c924b5952b980f261a03 Mon Sep 17 00:00:00 2001
From: David Lutterkort <lutter@watzmann.net>
Date: Mon, 3 Feb 2025 15:27:53 -0800
Subject: [PATCH 12/13] docs: Add some details on the SQL interface

---
 docs/implementation/sql-interface.md | 82 ++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 docs/implementation/sql-interface.md

diff --git a/docs/implementation/sql-interface.md b/docs/implementation/sql-interface.md
new file mode 100644
index 00000000000..6c8740d200e
--- /dev/null
+++ b/docs/implementation/sql-interface.md
@@ -0,0 +1,82 @@
+# SQL Queries
+
+**This interface is extremely experimental. There is no guarantee that this
+interface will ever be brought to production use. It's solely here to help
+evaluate the utility of such an interface**
+
+SQL queries can be issued by posting a JSON document to
+`/subgraphs/sql`. The server will respond with a JSON response that
+contains the records matching the query in JSON form.
+
+The body of the request must contain the following keys:
+
+* `deployment`: the hash of the deployment against which the query should
+  be run
+* `query`: the SQL query
+* `mode`: either `info` or `data`. When the mode is `info` only some
+  information of the response is reported, with a mode of `data` the query
+  result is sent in the response
+
+The SQL query can use all the tables of the given subgraph. Table and
+attribute names are snake-cased from their form in the GraphQL schema, so
+that data for `SomeDailyStuff` is stored in a table `some_daily_stuff`.
+
+The query can use fairly arbitrary SQL, including aggregations and most
+functions built into PostgreSQL.
+
+## Example
+
+For a subgraph whose schema defines an entity `Block`, the following query
+```json
+{
+    "query": "select number, hash, parent_hash, timestamp from block order by number desc limit 2",
+    "deployment": "QmSoMeThInG",
+    "mode": "data"
+}
+```
+
+might result in this response
+```json
+{
+  "data": [
+    {
+      "hash": "\\x5f91e535ee4d328725b869dd96f4c42059e3f2728dfc452c32e5597b28ce68d6",
+      "number": 5000,
+      "parent_hash": "\\x82e95c1ee3a98cd0646225b5ae6afc0b0229367b992df97aeb669c898657a4bb",
+      "timestamp": "2015-07-30T20:07:44+00:00"
+    },
+    {
+      "hash": "\\x82e95c1ee3a98cd0646225b5ae6afc0b0229367b992df97aeb669c898657a4bb",
+      "number": 4999,
+      "parent_hash": "\\x875c9a0f8215258c3b17fd5af5127541121cca1f594515aae4fbe5a7fbef8389",
+      "timestamp": "2015-07-30T20:07:36+00:00"
+    }
+  ]
+}
+```
+
+## Limitations/Ideas/Disclaimers
+
+Most of these are fairly easy to address:
+
+* queries must finish within `GRAPH_SQL_STATEMENT_TIMEOUT` (unlimited by
+  default)
+* queries are always executed at the subgraph head. It would be easy to add
+  a way to specify a block at which the query should be executed
+* the interface right now pretty much exposes the raw SQL schema for a
+  subgraph, though system columns like `vid` or `block_range` are made
+  inaccessible.
+* it is not possible to join across subgraphs, though it would be possible
+  to add that. Implenting that would require some additional plumbing that
+  hides the effects of sharding.
+* JSON as the response format is pretty terrible, and we should change that
+  to something that isn't so inefficient
+* the response contains data that's pretty raw; as the example shows,
+  binary data uses Postgres' notation for hex strings
+* because of how broad the supported SQL is, it is pretty easy to issue
+  queries that take a very long time. It will therefore not be hard to take
+  down a `graph-node`, especially when no query timeout is set
+
+Most importantly: while quite a bit of effort has been put into making this
+interface safe, in particular, making sure it's not possible to write
+through this interface, there's no guarantee that this works without bugs.

From ef3ee188e235271365af0f40d6db047debfe37a5 Mon Sep 17 00:00:00 2001
From: David Lutterkort <lutter@watzmann.net>
Date: Wed, 12 Mar 2025 11:57:22 +0000
Subject: [PATCH 13/13] store: Enforce existing GraphQL first/skip limits for
 SQL queries

---
 store/postgres/src/deployment_store.rs |  5 +-
 store/postgres/src/sql/parser.rs       |  7 ++-
 store/postgres/src/sql/validation.rs   | 72 +++++++++++++++++++++++---
 3 files changed, 73 insertions(+), 11 deletions(-)

diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs
index 057dcd88acd..a1736521191 100644
--- a/store/postgres/src/deployment_store.rs
+++ b/store/postgres/src/deployment_store.rs
@@ -292,7 +292,10 @@ impl DeploymentStore {
         conn: &mut PgConnection,
         query: &str,
     ) -> Result<Vec<SqlQueryObject>, QueryExecutionError> {
-        let query = format!("select to_jsonb(sub.*) as data from ({}) as sub", query);
+        let query = format!(
+            "select to_jsonb(sub.*) as data from ({}) as sub limit {}",
+            query, ENV_VARS.graphql.max_first
+        );
         let query = diesel::sql_query(query);
 
         let results = conn
diff --git a/store/postgres/src/sql/parser.rs b/store/postgres/src/sql/parser.rs
index a897c44e657..afdaef26292 100644
--- a/store/postgres/src/sql/parser.rs
+++ b/store/postgres/src/sql/parser.rs
@@ -1,7 +1,7 @@
 use super::{constants::SQL_DIALECT, validation::Validator};
 use crate::relational::Layout;
 use anyhow::{anyhow, Ok, Result};
-use graph::prelude::BlockNumber;
+use graph::{env::ENV_VARS, prelude::BlockNumber};
 use std::sync::Arc;
 
 pub struct Parser {
@@ -17,7 +17,10 @@ impl Parser {
     pub fn parse_and_validate(&self, sql: &str) -> Result<String> {
         let mut statements = sqlparser::parser::Parser::parse_sql(&SQL_DIALECT, sql)?;
 
-        let mut validator = Validator::new(&self.layout, self.block);
+        let max_offset = ENV_VARS.graphql.max_skip;
+        let max_limit = ENV_VARS.graphql.max_first;
+
+        let mut validator = Validator::new(&self.layout, self.block, max_limit, max_offset);
         validator.validate_statements(&mut statements)?;
 
         let statement = statements
diff --git a/store/postgres/src/sql/validation.rs b/store/postgres/src/sql/validation.rs
index 4309793dc0f..17e424bd947 100644
--- a/store/postgres/src/sql/validation.rs
+++ b/store/postgres/src/sql/validation.rs
@@ -1,7 +1,7 @@
 use graph::prelude::BlockNumber;
 use sqlparser::ast::{
-    Expr, Ident, ObjectName, Query, SetExpr, Statement, TableAlias, TableFactor, VisitMut,
-    VisitorMut,
+    Expr, Ident, ObjectName, Offset, Query, SetExpr, Statement, TableAlias, TableFactor, Value,
+    VisitMut, VisitorMut,
 };
 use sqlparser::parser::Parser;
 use std::result::Result;
@@ -22,20 +22,30 @@ pub enum Error {
     NotSelectQuery,
     #[error("Unknown table {0}")]
     UnknownTable(String),
+    #[error("Only constant numbers are supported for LIMIT and OFFSET.")]
+    UnsupportedLimitOffset,
+    #[error("The limit of {0} is greater than the maximum allowed limit of {1}.")]
+    UnsupportedLimit(u32, u32),
+    #[error("The offset of {0} is greater than the maximum allowed offset of {1}.")]
+    UnsupportedOffset(u32, u32),
 }
 
 pub struct Validator<'a> {
     layout: &'a Layout,
     ctes: HashSet<String>,
     block: BlockNumber,
+    max_limit: u32,
+    max_offset: u32,
 }
 
 impl<'a> Validator<'a> {
-    pub fn new(layout: &'a Layout, block: BlockNumber) -> Self {
+    pub fn new(layout: &'a Layout, block: BlockNumber, max_limit: u32, max_offset: u32) -> Self {
         Self {
             layout,
             ctes: Default::default(),
             block,
+            max_limit,
+            max_offset,
         }
     }
 
@@ -61,6 +71,45 @@ impl<'a> Validator<'a> {
 
         Ok(())
     }
+
+    pub fn validate_limit_offset(&mut self, query: &mut Query) -> ControlFlow<Error> {
+        let Query { limit, offset, .. } = query;
+
+        if let Some(limit) = limit {
+            match limit {
+                Expr::Value(Value::Number(s, _)) => match s.parse::<u32>() {
+                    Err(_) => return ControlFlow::Break(Error::UnsupportedLimitOffset),
+                    Ok(limit) => {
+                        if limit > self.max_limit {
+                            return ControlFlow::Break(Error::UnsupportedLimit(
+                                limit,
+                                self.max_limit,
+                            ));
+                        }
+                    }
+                },
+                _ => return ControlFlow::Break(Error::UnsupportedLimitOffset),
+            }
+        }
+
+        if let Some(Offset { value, .. }) = offset {
+            match value {
+                Expr::Value(Value::Number(s, _)) => match s.parse::<u32>() {
+                    Err(_) => return ControlFlow::Break(Error::UnsupportedLimitOffset),
+                    Ok(offset) => {
+                        if offset > self.max_offset {
+                            return ControlFlow::Break(Error::UnsupportedOffset(
+                                offset,
+                                self.max_offset,
+                            ));
+                        }
+                    }
+                },
+                _ => return ControlFlow::Break(Error::UnsupportedLimitOffset),
+            }
+        }
+        ControlFlow::Continue(())
+    }
 }
 
 impl VisitorMut for Validator<'_> {
@@ -73,9 +122,9 @@ impl VisitorMut for Validator<'_> {
         }
     }
 
-    fn pre_visit_query(&mut self, _query: &mut Query) -> ControlFlow<Self::Break> {
+    fn pre_visit_query(&mut self, query: &mut Query) -> ControlFlow<Self::Break> {
         // Add common table expressions to the set of known tables
-        if let Some(ref with) = _query.with {
+        if let Some(ref with) = query.with {
             self.ctes.extend(
                 with.cte_tables
                     .iter()
@@ -83,10 +132,17 @@ impl VisitorMut for Validator<'_> {
             );
         }
 
-        match *_query.body {
-            SetExpr::Update(_) | SetExpr::Insert(_) => ControlFlow::Break(Error::NotSelectQuery),
-            _ => ControlFlow::Continue(()),
+        match *query.body {
+            SetExpr::Select(_) | SetExpr::Query(_) => { /* permitted */ }
+            SetExpr::SetOperation { .. } => { /* permitted */ }
+            SetExpr::Table(_) => { /* permitted */ }
+            SetExpr::Values(_) => { /* permitted */ }
+            SetExpr::Insert(_) | SetExpr::Update(_) => {
+                return ControlFlow::Break(Error::NotSelectQuery)
+            }
         }
+
+        self.validate_limit_offset(query)
     }
 
     /// Invoked for any table function in the AST.