Skip to content

Commit

Permalink
[YSQL] Improve exception handling (yugabyte/yugabyte-db#938) (#40)
Browse files Browse the repository at this point in the history
* Made exception handling/wrapping aware of additional cases
* Wrapped teardown in with-errors to prevent it from failing the test
  • Loading branch information
frozenspider committed Jul 18, 2019
1 parent 2f7e3d4 commit 3c83a3f
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 14 deletions.
4 changes: 2 additions & 2 deletions yugabyte/src/yugabyte/utils.clj
Expand Up @@ -11,9 +11,9 @@
m))

(defn pretty-datetime
"Pretty-prints given datetime as yyyy-MM-dd_HH:mm:sss.SSS"
"Pretty-prints given datetime as yyyy-MM-dd_HH:mm:ss.SSS"
[dt]
(let [dtf (SimpleDateFormat. "yyyy-MM-dd_HH:mm:sss.SSS")]
(let [dtf (SimpleDateFormat. "yyyy-MM-dd_HH:mm:ss.SSS")]
(.format dtf dt)))

(defn current-pretty-datetime
Expand Down
56 changes: 44 additions & 12 deletions yugabyte/src/yugabyte/ysql/client.clj
Expand Up @@ -148,7 +148,6 @@
(when-let [m (.getMessage e)]
(condp instance? e
java.sql.SQLTransactionRollbackException

{:type :fail, :error [:rollback m]}

; So far it looks like all SQL exception are wrapped in BatchUpdateException
Expand All @@ -160,7 +159,7 @@
{:type :info, :error [:batch-update m]})

org.postgresql.util.PSQLException
(condp re-find (.getMessage e)
(condp re-find m
#"(?i)Conflicts with [- a-z]+ transaction"
{:type :fail, :error [:conflicting-transaction m]}

Expand All @@ -171,6 +170,30 @@
#"(?i)Operation expired"
{:type :fail, :error [:operation-expired m]}

; Happens upon network partition,
; usually invoked upon RPC request timeout
#"(?i)Timed out after deadline expired"
{:type :info, :error [:timeout m]}

; Happens when tserver has been stopped,
; invoked from PG backend via ProcessInterrupts as a part of CHECK_FOR_INTERRUPTS macro
#"(?i)Terminating connection due to administrator command"
{:type :fail, :error [:conn-closed m]}

;
; PG driver-level errors
; Happens when client connection with yb-tserver has been disrupted
; (usually results in operation failure, but we can't guarantee that)
;

; Might happen on basically any stage
#"(?i)This connection has been closed"
{:type :info, :error [:conn-closed m]}

; Happens when there's a problem communicating with server
#"(?i)An I/O error occurred while sending to the backend"
{:type :info, :error [:data-sending-failed m]}

;
; Errors in test spec, do not suppress throwing
;
Expand All @@ -184,24 +207,32 @@
; Unknown (other) SQL error
{:type :info, :error [:psql-exception m]})

; Happens when with-conn macro detects a closed connection
clojure.lang.ExceptionInfo
(condp = (:type (ex-data e))
:conn-not-ready {:type :fail, :error :conn-not-ready}
nil)
(if-let [e2 (:rollback (ex-data e))]
; Process wrapped exception, if any - happens e.g. when tserver has been stopped
(exception-to-op e2)

; Happens when with-conn macro detects a closed connection
(condp = (:type (ex-data e))
:conn-not-ready {:type :fail, :error :conn-not-ready}
nil))

(condp re-find m
#"^timeout$"
{:type :info, :error :timeout}

#"timed out"
{:type :info, :error :timeout}

nil))))

(defn retryable?
"Whether given exception indicates that an operation can be retried"
[ex]
(let [op (exception-to-op ex) ; either {:type ... :error ...} or nil
op-str (str op)]
(re-find #"(?i)try again" op-str)))
(or (re-find #"(?i)try again" op-str)
(re-find #"(?i)restart read required" op-str))))

(defmacro once-per-cluster
"Runs the given code once per cluster. Requires an atomic boolean (set to false)
Expand Down Expand Up @@ -377,11 +408,12 @@
(once-per-cluster
~'teardown?
(info "Running teardown")
(with-timeout
(with-conn
[~'c ~'conn-wrapper]
(with-retry
(teardown-cluster! ~'inner-client ~'test ~'c ~'conn-wrapper))))))
(with-errors
(with-timeout
(with-conn
[~'c ~'conn-wrapper]
(with-retry
(teardown-cluster! ~'inner-client ~'test ~'c ~'conn-wrapper)))))))

(close! [~'this ~'test]
(rc/close! ~'conn-wrapper)))
Expand Down

0 comments on commit 3c83a3f

Please sign in to comment.