Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/pyob_service.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ concurrency:

on:
schedule:
- cron: '0 * * * *'
- cron: '0 */6 * * *'
workflow_dispatch:

jobs:
Expand Down
64 changes: 34 additions & 30 deletions src/pyob/core_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,15 +446,19 @@ def on_chunk():

def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> str:
"""
SOTA LLM Orchestrator:
- Rotates Gemini Keys
- Pivots to GitHub Models (Phi-4) in the cloud
- Enforces mandatory bucket refills (60s) on empty responses
- Prevents high-frequency API spam
Elite LLM Orchestrator (v0.3.2):
- Prevents high-frequency Machine Gun API spam.
- Immediate Cloud Pivot (Gemini -> GitHub Models).
- Enforces mandatory 60s bucket refills on cloud failure.
- Multi-tier wait logic for rate-limit protection.
"""
attempts = 0
is_cloud = os.environ.get("GITHUB_ACTIONS") == "true"

logger.info(
f"📊 Engine check: Found {len(self.key_cooldowns)} Gemini API keys."
)

while True:
key = None
now = time.time()
Expand All @@ -465,53 +469,53 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s
logger.info(
f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}"
)
response_text = self._stream_single_llm(
prompt, key=key, context=context
)
elif is_cloud:
logger.warning(
"⏳ Gemini keys limited. Pivoting to GitHub Models (Phi-4)..."
)
response_text = self._stream_single_llm(
prompt, key=None, context=context
)
logger.warning("⏳ Gemini keys limited. Using GitHub Models (Phi-4)...")
else:
logger.info("🏠 Using Local Ollama Engine...")
response_text = self._stream_single_llm(
prompt, key=None, context=context
)

if response_text.startswith("ERROR_CODE_429"):
if key:
self.key_cooldowns[key] = time.time() + 1200
logger.warning(f"⚠️ Key {key[-4:]} rate-limited (429). Rotating...")
else:
logger.warning(
"🚫 GitHub Models rate-limited. Sleeping 2 minutes..."
)
time.sleep(120)
attempts += 1
continue
response_text = self._stream_single_llm(prompt, key=key, context=context)

if is_cloud and (
not response_text or response_text.startswith("ERROR_CODE_")
):
if key is not None:
if "429" in response_text:
self.key_cooldowns[key] = time.time() + 1200
logger.warning(f"⚠️ Key {key[-4:]} rate-limited. Pivoting...")

logger.warning(
"☁️ Gemini failed/limited. Pivoting to GitHub Models (Phi-4) immediately..."
"☁️ Gemini blipped/limited. Pivoting to GitHub Models (Phi-4) immediately..."
)
response_text = self._stream_single_llm(
prompt, key=None, context=context
)

if not response_text or response_text.startswith("ERROR_CODE_"):
wait_time = 60
logger.warning(
f"⚠️ All Cloud Engines failed. Sleeping {wait_time}s to refill tokens..."
f"⚠️ All Cloud Engines exhausted. Sleeping {wait_time}s to refill tokens..."
)
time.sleep(wait_time)
attempts += 1
continue

if response_text.startswith("ERROR_CODE_429"):
if key:
self.key_cooldowns[key] = time.time() + 1200
logger.warning(f"⚠️ Key {key[-4:]} rate-limited (429). Rotating...")
else:
logger.warning(
"🚫 GitHub Models rate-limited. Sleeping 2 minutes..."
)
time.sleep(120)
attempts += 1
continue

if is_cloud and key:
logger.info("⏳ Rotating keys... (10s anti-spam breather)")
time.sleep(10)

if not response_text or response_text.startswith("ERROR_CODE_"):
logger.warning("⚠️ Generic LLM error. Retrying in 10s...")
time.sleep(10)
Expand Down