trishullab · amit9oct · Feb 27, 2025 · Feb 27, 2025 · Feb 27, 2025 · Feb 27, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ requires = [
 build-backend = "hatchling.build"
 [project]
 name = "itp_interface"
-version = "1.1.5"
+version = "1.1.6"
 authors = [
   { name="Amitayush Thakur", email="amitayush@utexas.edu" },
 ]
@@ -42,8 +42,8 @@ dependencies = [
     "urllib3>=2.0.7",
     "mathlibtools==1.3.2",
     "pylspclient==0.0.3",
-    "protobuf==3.20.1",
-    "grpcio==1.51.3"
+    "protobuf==3.20.3",
+    "grpcio>=1.51.3"
 ]
 
 [project.urls]

diff --git a/src/data/test/lean4_proj/Lean4Proj/Basic.lean b/src/data/test/lean4_proj/Lean4Proj/Basic.lean
@@ -4,15 +4,30 @@ def hello := "world"
 
 theorem test (p q : Prop) (hp : p) (hq : q)
 : p ∧ q ∧ p := by
-    apply And.intro
-    exact hp
-    apply And.intro
-    exact hq
-    exact hp
+apply And.intro
+exact hp
+apply And.intro
+exact hq
+exact hp
 
 
 theorem test2 : p -> q -> p ∧ q ∧ p := fun hp hq => ⟨hp, ⟨hq, hp⟩⟩
 
+
+-- show a proof which uses calc
+theorem test_calc (n: Nat) : n^2 + 2*n + 1 = (n + 1)*(n + 1) := by
+calc
+    _ = n^2 + n*2 + 1 := by rw [Nat.mul_comm 2 n]
+    _ = n^2 + (n + n) + 1 := by rw [Nat.mul_two]
+    _ = n^2 + n + n + 1 := by rw [←Nat.add_assoc]
+    _ = n*n + n + n + 1 := by rw [Nat.pow_two]
+    _ = n*n + n*1 + n + 1 := by rw [Nat.mul_one n]
+    _ = n*(n + 1) + n + 1 := by rw [Nat.left_distrib n n 1]
+    _ = n*(n + 1) + (n + 1) := by rw [Nat.add_assoc]
+    _ = n*(n + 1) + 1*(n + 1) := by rw (config := { occs := .pos [2]}) [←Nat.mul_one (n + 1), Nat.mul_comm]
+    _ = (n + 1)*(n + 1) := by rw [Nat.right_distrib n 1 (n + 1)]
+done
+
 end Lean4Proj1
 
 namespace Lean4Proj2
@@ -21,11 +36,12 @@ example : p -> q -> p ∧ q ∧ p := fun hp hq => ⟨hp, ⟨hq, hp⟩⟩
 
 theorem test (p q : Prop) (hp : p) (hq : q)
 : p ∧ q ∧ p := by
-    apply And.intro
-    exact hp
-    apply And.intro
-    exact hq
-    exact hp
+apply And.intro
+exact hp
+apply And.intro
+exact hq
+exact hp
+done
 
 theorem test3 (p q : Prop) (hp : p) (hq : q)
 : p ∧ q ∧ p := by

diff --git a/src/itp_interface/rl/simple_proof_env.py b/src/itp_interface/rl/simple_proof_env.py
@@ -320,7 +320,7 @@ def _run_tactic(self, history_idx: int = None):
         assert len(tactics) > 0
         assert all([isinstance(tactic, str) for tactic in tactics])
         # Remove unnecessary spaces, newlines, and tabs
-        tactics = [tactic.strip() for tactic in tactics]
+        tactics = [tactic for tactic in tactics]
         original_tactics = copy.deepcopy(tactics)
         try:
             state, next_state, reward, done, env_info = self._run_tactics(tactics, state, action, env_info)

diff --git a/src/itp_interface/tools/dynamic_lean4_proof_exec.py b/src/itp_interface/tools/dynamic_lean4_proof_exec.py
@@ -87,7 +87,7 @@ def goal_description_compare(description1: str, descripton2: str) -> int:
             return -1
 
 
-    def __init__(self, coq_context_helper: Lean3ContextHelper, project_folder: str = None, proof_file: str = None, instruction_iter: typing.Optional[str] = None, use_hammer: bool = False, timeout_in_seconds: int = 60, use_human_readable_proof_context: bool = True, suppress_error_log: bool = True, context_type: ContextType = ContextType.NoContext, keep_local_context = False):
+    def __init__(self, coq_context_helper: Lean3ContextHelper, project_folder: str = None, proof_file: str = None, instruction_iter: typing.Optional[str] = None, use_hammer: bool = False, timeout_in_seconds: int = 60, use_human_readable_proof_context: bool = True, suppress_error_log: bool = True, context_type: ContextType = ContextType.NoContext, keep_local_context = False, enforce_qed: bool = False):
         assert proof_file is None or os.path.exists(proof_file), f"Proof file {proof_file} does not exist"
         assert coq_context_helper is not None, "coq_context_helper must not be None"
         self.proof_file = proof_file
@@ -97,7 +97,7 @@ def __init__(self, coq_context_helper: Lean3ContextHelper, project_folder: str =
         self.run_state = DynamicProofExecutor.RunState()
         self.logger = None
         self.lean_context_helper = coq_context_helper
-        super().__init__(project_root=project_folder, proof_step_iter=self.tactic_switch_iterator, use_hammer=use_hammer, timeout_in_sec=timeout_in_seconds, use_human_readable_proof_context=use_human_readable_proof_context, suppress_error_log=suppress_error_log, keep_local_context=keep_local_context)
+        super().__init__(project_root=project_folder, proof_step_iter=self.tactic_switch_iterator, use_hammer=use_hammer, timeout_in_sec=timeout_in_seconds, use_human_readable_proof_context=use_human_readable_proof_context, suppress_error_log=suppress_error_log, keep_local_context=keep_local_context, enforce_qed=enforce_qed)
 
     def __enter__(self):
         self.lean_context_helper.__enter__()

diff --git a/src/itp_interface/tools/lean4_sync_executor.py b/src/itp_interface/tools/lean4_sync_executor.py
@@ -61,6 +61,7 @@ def __init__(self,
         enable_search: bool = False, 
         namespaces: Optional[List[str]] = None, 
         keep_local_context: bool = False,
+        enforce_qed: bool = False,
         logger: Optional[logging.Logger] = None):
         assert proof_step_iter is None or isinstance(proof_step_iter, Iterator), \
             "proof_step_iter must be an iterator"
@@ -91,6 +92,8 @@ def __init__(self,
         self.suppress_error_log = suppress_error_log
         self.process_interace : ProcessInterface = None
         self.execution_complete = False
+        self._enforce_qed = enforce_qed
+        self._ready_to_accept_proof = not self._enforce_qed
         self._max_memory_in_mib = 40000 # 40 GiB is needed for mathlib to work seemlessly
         self._lines_executed = []
         self.proof_context : ProofContext = None
@@ -638,7 +641,7 @@ def _run_file_on_lean_server(self, timeout_in_sec: int, only_env_update: bool =
         response = self.process_interace.read_response(timeout_in_sec)
         relevant_msgs = []
         self._parse_response(0, response, relevant_msgs)
-        self._update_proof_context(0, response, relevant_msgs, only_env_update)
+        self._update_proof_context(0, response, relevant_msgs, only_env_update, force_ready_to_accept_proof = True)
         return response
 
     def _add_last_tactic(self, idx: int, stmt: str):
@@ -808,7 +811,7 @@ def _parse_response(self, idx, response, relevant_messages = [], dont_update_err
                 f"Check the error message: {self.lean_error_messages}")
         return has_cnt == 1 and has_at_least_one_unfocussed_goal > 0 and has_new_errors == 0
 
-    def _update_proof_context(self, idx, response, relevant_messages, only_env_update = False):
+    def _update_proof_context(self, idx, response, relevant_messages, only_env_update = False, force_ready_to_accept_proof = False):
         if response is None:
             raise ValueError(f"Response is None cannot update proof context for line number {idx}")
         if 'env' in response:
@@ -856,7 +859,7 @@ def _update_proof_context(self, idx, response, relevant_messages, only_env_updat
             self.lean_error_messages = []
             proof_running = proof_running or goal_text is not None
         if error_messages is None:
-            assert proof_running, f"Proof is not running but no error message is present, response:\n{response}, \nstmt: \n{stmt}, \nlemma: \n{self.curr_lemma_name}, \nlemma_stmt: \n{self.curr_lemma}, \nline_num: \n{self.line_num}"
+            assert proof_running, f"Proof is not running but no error message is present, response:\n{response}, \nlemma: \n{self.curr_lemma_name}, \nlemma_stmt: \n{self.curr_lemma}, \nline_num: \n{self.line_num}"
             self._proof_running = proof_running
             if self._proof_running:
                 proof_state_idx = None
@@ -880,12 +883,15 @@ def _update_proof_context(self, idx, response, relevant_messages, only_env_updat
                 self._update_proof_state_idx(proof_state_idx)
                 self.proof_context = self._parse_proof_context(proof_goals)
                 if self.proof_context == ProofContext.empty():
-                    self._proof_running = False
-                    self.proof_context = None
-                    self.curr_lemma = None
-                    self.curr_lemma_name = None
-                    self._clear_tacitcs()
-                    self._env_idx_last_thm = self._last_env_idx
+                    if self._ready_to_accept_proof or force_ready_to_accept_proof:
+                        self._proof_running = False
+                        self.proof_context = None
+                        self.curr_lemma = None
+                        self.curr_lemma_name = None
+                        self._clear_tacitcs()
+                        self._env_idx_last_thm = self._last_env_idx
+                    if self._enforce_qed and not force_ready_to_accept_proof:
+                        self._ready_to_accept_proof = True # Wait for another 'done' to enforce qed
             else:
                 self.proof_context = None
 
@@ -898,6 +904,19 @@ def _run_stmt_on_lean_server(self, idx : int, stmt: str, theorem_started: bool =
             # We don't need to run the empty statements. This should be treated as a failed proof step
             self.lean_error_messages = ["There is no tactic in the statement, it is just empty line or whitespace"]
             return
+        elif self.proof_context == ProofContext.empty() and \
+            self._proof_running and \
+            stmt != "done":
+            self.lean_error_messages = [
+            "The proof is about to finish, please use 'done' to finish the proof."]
+            return
+        elif stmt == "done" and \
+            self._proof_running and \
+            self.proof_context != ProofContext.empty():
+            self.lean_error_messages = [
+                "The proof is not finished, please complete the proof before using 'done'."]
+            return
+
         proof_should_run = False
         if theorem_started or (not self._proof_running and self._stmt_has_lemma(idx, stmt, do_full_check = True)):
             proof_should_run = self._theorem_started

diff --git a/src/itp_interface/tools/proof_exec_callback.py b/src/itp_interface/tools/proof_exec_callback.py
@@ -37,7 +37,8 @@ def __init__(self,
                 keep_local_context: bool = False,
                 setup_cmds: typing.List[str] = [],
                 port: typing.Optional[int] = None,
-                enable_search: bool = True):
+                enable_search: bool = True,
+                enforce_qed: bool = False):
         self.project_folder = project_folder
         self.file_path = file_path
         self.language = language
@@ -53,6 +54,7 @@ def __init__(self,
         self.setup_cmds = setup_cmds
         self.port = port
         self.enable_search = enable_search
+        self.enforce_qed = enforce_qed
         pass
 
     def get_proof_executor(self) -> typing.Union[DynamicCoqProofExecutor, DynamicLeanProofExecutor, DynamicLean4ProofExecutor, DynamicIsabelleProofExecutor]:
@@ -65,9 +67,9 @@ def get_proof_executor(self) -> typing.Union[DynamicCoqProofExecutor, DynamicLea
             lean_context_helper = Lean3ContextHelper(search_exec, self.search_depth, logger=self.logger)
             return DynamicLeanProofExecutor(lean_context_helper, self.project_folder, self.file_path, context_type=DynamicLeanProofExecutor.ContextType.NoContext, use_hammer=self.use_hammer, timeout_in_seconds=self.timeout_in_secs, suppress_error_log=self.suppress_error_log, use_human_readable_proof_context=self.use_human_readable_proof_context, keep_local_context=self.keep_local_context)
         elif self.language == ProofAction.Language.LEAN4:
-            search_exec = Lean4SyncExecutor(self.project_folder, self.prefix, self.file_path, use_hammer=self.use_hammer, timeout_in_sec=self.timeout_in_secs, suppress_error_log=self.suppress_error_log, use_human_readable_proof_context=self.use_human_readable_proof_context, enable_search=self.always_use_retrieval, keep_local_context=self.keep_local_context)
+            search_exec = Lean4SyncExecutor(self.project_folder, self.prefix, self.file_path, use_hammer=self.use_hammer, timeout_in_sec=self.timeout_in_secs, suppress_error_log=self.suppress_error_log, use_human_readable_proof_context=self.use_human_readable_proof_context, enable_search=self.always_use_retrieval, keep_local_context=self.keep_local_context, enforce_qed=self.enforce_qed)
             lean4_context_helper = Lean4ContextHelper(search_exec, self.search_depth, logger=self.logger)
-            return DynamicLean4ProofExecutor(lean4_context_helper, self.project_folder, self.file_path, context_type=DynamicLeanProofExecutor.ContextType.NoContext, use_hammer=self.use_hammer, timeout_in_seconds=self.timeout_in_secs, suppress_error_log=self.suppress_error_log, use_human_readable_proof_context=self.use_human_readable_proof_context, keep_local_context=self.keep_local_context)
+            return DynamicLean4ProofExecutor(lean4_context_helper, self.project_folder, self.file_path, context_type=DynamicLeanProofExecutor.ContextType.NoContext, use_hammer=self.use_hammer, timeout_in_seconds=self.timeout_in_secs, suppress_error_log=self.suppress_error_log, use_human_readable_proof_context=self.use_human_readable_proof_context, keep_local_context=self.keep_local_context, enforce_qed=self.enforce_qed)
         elif self.language == ProofAction.Language.ISABELLE:
             search_exec = IsabelleExecutor(self.project_folder, self.file_path, use_hammer=self.use_hammer, timeout_in_sec=self.timeout_in_secs, suppress_error_log=self.suppress_error_log, use_human_readable_proof_context=self.use_human_readable_proof_context, port=self.port)
             isabelle_context_helper = IsabelleContextHelper(search_exec, self.search_depth, logger=self.logger)

diff --git a/src/test/simple_data_gen_test.py b/src/test/simple_data_gen_test.py
@@ -40,7 +40,7 @@ def test_proof_step_data_gen(self):
         print(dirs)
         last_dir = dirs[-1]
         train_data = os.path.join(".log/data_generation/benchmark/simple_benchmark_lean", last_dir, "train")
-        data_gen_file = os.path.join(train_data, "local_data_0000000016.json")
+        data_gen_file = os.path.join(train_data, "local_data_0000000025.json")
         print("Data Gen File:", data_gen_file)
         with open(data_gen_file, "r") as f:
             print(f.read())