# An example of LLM prompting for programming

In [1]:
# !CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir
# !pip install langchain

%reload_ext dotenv
%dotenv


## Setup LLM

### Local LLM

In [2]:
# import os
# import langchain
# from langchain.callbacks.manager import CallbackManager
# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
# from langchain.chains import LLMChain

# # Use llama.cpp
# from langchain.llms import LlamaCpp

# n_gpu_layers = 1  # Metal set to 1 is enough.
# n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.

# # Make sure the model path is correct for your system!
# # llama_model = "/ws/llm_models/llama-2-7b-chat.q6_K.gguf"
# llama_model = "/ws/llm_models/codellama-7b-instruct.Q6_K.gguf"
# llm = LlamaCpp(
#     model_path=llama_model,
#     n_gpu_layers=n_gpu_layers,
#     n_batch=n_batch,
#     f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
#     temperature=0.0,
#     max_tokens=1024,
#     n_ctx=4096,
#     # top_p=1,
#     streaming=True,
#     callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
#     verbose=True,
# )


Check env -- PPLX endpoint: https://api.perplexity.ai local endpoint: http://localhost:6412/v1 proxy: socks5://localhost:1081


### OpenAI Compatibles

In [5]:
# Config OpenAI client
import os
import openai
from langchain.chat_models import ChatOpenAI
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

print('Check env -- PPLX endpoint:', os.getenv("PPLX_API_ENDPOINT"), \
    'local endpoint:', os.getenv("LOCAL_API_ENDPOINT"), \
    'proxy:', os.getenv('HTTP_PROXY'))

# Perplexity https://docs.perplexity.ai/reference/post_chat_completions
openai.api_key = os.getenv("PPLX_API_KEY")
openai.api_base = os.getenv("PPLX_API_ENDPOINT")
model_name = "codellama-34b-instruct" # llama-2-70b-chat, llama-2-13b-chat, codellama-34b-instruct, and mistral-7b-instruct.

# Local LLMs
# openai.api_key = os.getenv("LOCAL_API_KEY")
# openai.api_base = os.getenv("LOCAL_API_ENDPOINT")
# model_name = ""

# Azure
# openai.api_type = 'azure'
# openai.api_version = '2023-05-15' # this may change in the future
# model_name = ''

# os.environ["OPENAI_API_TYPE"] = openai.api_type
# os.environ["OPENAI_API_BASE"] = openai.api_base
# os.environ["OPENAI_API_KEY"] = openai.api_key
# os.environ["OPENAI_API_VERSION"] = openai.api_version


callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = ChatOpenAI(
    openai_api_key=openai.api_key,
    openai_api_base=openai.api_base,
    model=model_name,
    max_tokens=500,
    temperature=0.0,
    verbose=True,
    streaming=True,
    callback_manager=callback_manager,
)
openai.debug = True

messages = [
    {
        "role": "system",
        "content": (
            "You are an artificial intelligence assistant and you need to "
            "engage in a helpful, detailed, polite conversation with a user."
        ),
    },
    {
        "role": "user",
        "content": (
            "Count to 100, with a comma between each number and no newlines. "
            "E.g., 1, 2, 3, ..."
        ),
    },
]

# demo chat completion with streaming
# response_stream = openai.ChatCompletion.create(
#     api_base=openai.api_base,
#     api_key=openai.api_key,
#     model=model_name,
#     messages=messages,
#     stream=True,
# )
# for response in response_stream:
#     print(response)


Check env -- PPLX endpoint: https://api.perplexity.ai local endpoint: http://localhost:6412/v1 proxy: socks5://localhost:1081


## Setup QA

In [4]:
from langchain.memory import VectorStoreRetrieverMemory
from langchain.chains import ConversationChain

memory = ConversationSummaryBufferMemory(llm=llm,
                                         memory_key="history",
                                         return_messages=False,
                                         )
chat = ConversationChain(llm=llm,
                         memory=memory,
                         )


## Build Self-testing Code with a LLM

### Sets the Context

In [6]:
llm.predict("Whoe are you?")


I am LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner. I am trained on a massive dataset of text from the internet and can generate human-like responses to a wide range of topics and questions. I can be used to create chatbots, virtual assistants, and other applications that require natural language understanding and generation capabilities..

'I am LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner. I am trained on a massive dataset of text from the internet and can generate human-like responses to a wide range of topics and questions. I can be used to create chatbots, virtual assistants, and other applications that require natural language understanding and generation capabilities..'

In [6]:
qa("""The current system is an online whiteboard system. Tech stack: typescript, react, redux, konvajs and react-konva. And vitest, react testing library for model, view model and related hooks, cypress component tests for view.

All codes should be written in the tech stack mentioned above. Requirements should be implemented as react components in the MVVM architecture pattern.

There are 2 types of view model in the system.

    1. Shared view model. View model that represents states shared among local and remote users.

    2. Local view model. View model that represents states only applicable to local user

Here are the common implementation strategy:

    1. Shared view model is implemented as Redux store slice. Tested in vitest.

    2. Local view model is implemented as React component props or states(by useState hook), unless for global local view model, which is also implemented as Redux store slice. Tested in vitest.

    3. Hooks are used as the major view helpers to retrieve data from shared view model. For most the case, it will use ‘createSelector’ and ‘useSelector’ for memorization. Tested in vitest and react testing library.

    4. Don’t dispatch action directly to change the states of shared view model, use an encapsulated view model interface instead. In the interface, each redux action is mapped to a method. Tested in vitest.

    5. View is consist of konva shapes, and implemented as react component via react-konva. Tested in cypress component tests

Here are certain patterns should be followed when implement and test the component

    1. When write test, use describe instead of test

    2. Data-driven tests are preferred.

    3. When test the view component, fake view model via the view model interface

Awareness Layer Requirement:

Display other users’ awareness info(cursor, name and online information) on the whiteboard.

AC1: Don’t display local user

AC2: When remote user changes cursor location, display the change in animation.

Provide an overall solution following the guidance mentioned above. Hint, keep all awareness information in a Konva layer, and an awareness info component to render cursor, and name. Don’t generate code. Describe the solution, and breaking the solution down as a task list based on the guidance mentioned above. And we will refer this task list as our master plan.
""")


1. Implement the shared view model as a Redux store slice and test it using vitest.
2. Create a local view model component for displaying awareness information, which should include cursor position, name, and online status. Test this component using vitest.
3. Develop an awareness info component that renders the cursor and name. This component should be implemented as a React component and tested using cypress component tests.
4. Integrate the local view model component with the awareness info component to display the cursor position, name, and online status for remote users.
5. Implement the shared view model interface to encapsulate the Redux actions for changing the state of the shared view model. Test this interface using vitest.
6. Develop a view component that renders the Konva shapes on the whiteboard. This component should be implemented as a React component and tested using cypress component tests.
7. Integrate the local view model component with the awareness info component a



NotImplementedError: get_num_tokens_from_messages() is not presently implemented for model cl100k_base.See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.