In [1]:
# !pip install --upgrade pip
# !pip install open-interpreter crewai gradio langchain langchain-community
# !pip install --upgrade open-interpreter
# !pip install SpeechRecognition
# !pip install pyaudio
# !pip install sounddevice wavio SpeechRecognition gTTS pygame

In [2]:
import os
from crewai import Agent, Task, Crew, Process
from interpreter import interpreter
from langchain.tools import tool
from langchain_openai import ChatOpenAI

# For audio
import speech_recognition as sr
import sounddevice as sd
import wavio
import numpy as np
from gtts import gTTS
import pygame

pygame 2.5.2 (SDL 2.28.2, Python 3.11.0)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [3]:
# # 1. Configuration and Tools
# llm = ChatOpenAI(model="gpt-4-turbo-preview")
# interpreter.auto_run = True
# interpreter.llm.model = "openai/gpt-4-turbo-preview"

# class CLITool:
#     @tool("Executor")
#     def execute_cli_command(command: str):
#         """Create and Execute code using Open Interpreter."""
#         result = interpreter.chat(command)
#         return result

# # 2. Creating an Agent for CLI tasks
# cli_agent = Agent(
#     role='Software Engineer',
#     goal='Always use Executor Tool. Ability to perform CLI operations, write programs and execute using Exector Tool',
#     backstory='Expert in command line operations, creating and executing code.',
#     tools=[CLITool.execute_cli_command],
#     verbose=True,
#     llm=llm 
# )

# # 3. Defining a Task for CLI operations
# cli_task = Task(
#     description='Open my Mozilla web browser and navigate to wikipedia.',
#     agent=cli_agent,
#     tools=[CLITool.execute_cli_command]
# )

# # 4. Creating a Crew with CLI focus
# cli_crew = Crew(
#     agents=[cli_agent],
#     tasks=[cli_task],
#     process=Process.sequential,
#     manager_llm=llm
# )

# # 5. Run the Crew
# result = cli_crew.kickoff()
# print(result)

In [4]:
from openai_key import key
os.environ["OPENAI_API_KEY"] = key

# Configuration and Tools
llm = ChatOpenAI(model="gpt-4-turbo-preview")
interpreter.auto_run = True
interpreter.llm.model = "openai/gpt-4-turbo-preview"

class CLITool:
    @tool("Executor")
    def execute_cli_command(command: str):
        """Create and Execute code using Open Interpreter."""
        result = interpreter.chat(command)
        return result

cli_agent = Agent(
    role='Software Engineer',
    goal='Always use Executor Tool. Ability to perform CLI operations, write programs and execute using Executor Tool',
    backstory='Expert in command line operations, creating and executing code.',
    tools=[CLITool.execute_cli_command],
    verbose=True,
    llm=llm 
)

# Function to record audio for a short duration and return the file path
def record_audio(duration=5, fs=44100, filename="temp_recording.wav"):
    print("Listening...")
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=2, dtype=np.int16)
    sd.wait()
    wavio.write(filename, recording, fs, sampwidth=2)
    return filename

# Function to recognize speech from an audio file using speech_recognition
def recognize_speech_from_file(recognizer, audio_file):
    with sr.AudioFile(audio_file) as source:
        audio_data = recognizer.record(source)
    try:
        text = recognizer.recognize_google(audio_data)
        print(f"Recognized: {text}")
        return text
    except sr.UnknownValueError:
        print("Could not understand audio")
        return None
    except sr.RequestError as e:
        print(f"Could not request results; {e}")
        return None

# Function to speak text using gTTS and pygame
def speak_text(text):
    tts = gTTS(text=text, lang='en')
    filename = 'temp_speech.mp3'
    tts.save(filename)
    pygame.mixer.init()
    pygame.mixer.music.load(filename)
    pygame.mixer.music.play()
    while pygame.mixer.music.get_busy():
        pygame.time.Clock().tick(10)
    os.remove(filename)

def continuous_listen():
    recognizer = sr.Recognizer()
    command_buffer = []
    listening_for_command = False

    while True:
        audio_filename = record_audio(duration=5)  # Record for short durations
        speech_text = recognize_speech_from_file(recognizer, audio_filename)
        os.remove(audio_filename)  # Clean up the temporary file

        if speech_text:
            if "start task" in speech_text.lower():
                listening_for_command = True
                command_buffer = []
                speak_text('Listening for tasks.')
                print("Start capturing tasks...")
                continue

            if "stop task" in speech_text.lower() and listening_for_command:
                listening_for_command = False
                command_text = ", ".join(command_buffer)
                print(f"End capturing commands. Command: {command_text}")
                speak_text(f"Processing command: {command_text}")
                if command_text:
                    cli_task = Task(
                        description=command_text,
                        agent=cli_agent,
                        tools=[CLITool.execute_cli_command]
                    )
                
                    cli_crew = Crew(
                        agents=[cli_agent],
                        tasks=[cli_task],
                        process=Process.sequential,
                        manager_llm=llm
                    )
                
                    result = cli_crew.kickoff()
                    print(result)
                    speak_text(result)
                command_buffer = []
                continue

            if listening_for_command:
                command_buffer.append(speech_text)

# if __name__ == "__main__":
continuous_listen()

Listening...
Could not understand audio
Listening...
Recognized: hello
Listening...
Could not understand audio
Listening...
Recognized: start Task
Start capturing tasks...
Listening...
Recognized: tell me what files are on my desktop
Listening...
Could not understand audio
Listening...
Recognized: stop tasks
End capturing commands. Command: tell me what files are on my desktop


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```
Thought: Do I need to use a tool? Yes
Action: Executor
Action Input: ls ~/Desktop[0m














[36;1m[1;3m[{'role': 'user', 'type': 'message', 'content': 'ls ~/Desktop'}, {'role': 'assistant', 'type': 'code', 'format': 'bash', 'content': '\nls ~/Desktop\n'}, {'role': 'computer', 'type': 'console', 'format': 'output', 'content': '\n\ncrewai\nopenai_key\nUntitled.ipynb\nvideos\n\n'}, {'role': 'assistant', 'type': 'message', 'content': "The output from thels command you see lists the contents of your Desktop directory. It shows you have four items:\n\n-crewai`: This could be a directory or a file related to the work you mentioned earlier.\n-openai_key`: Likely a file containing an API key for OpenAI services.\n-Untitled.ipynb`: A Jupyter notebook file, often used for Python programming and data analysis.\n-videos`: This might be a directory containing video files.\n\nIf there's a specific task or operation you want to perform on any of these items, or if there's something else you have in mind, please let me know how I can assist you further!"}][0m[32;1m[1;3mDo I need to use a

KeyboardInterrupt: 