Skip to content

Commit 89101a6

Browse files
committed
Initial commit
1 parent 13e1435 commit 89101a6

File tree

11 files changed

+343
-19
lines changed

11 files changed

+343
-19
lines changed

.dev.vars.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
OPENAI_API_KEY="YOUR-API-KEY"

README.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Client Side Tool Calling with the OpenAI WebRTC Realtime API
2+
3+
This project is a [Cloudflare Workers](https://developers.cloudflare.com) app using [Hono](https://honojs.dev) to relay the [OpenAI Realtime API](https://platform.openai.com/docs/api-reference/realtime) over WebRTC. The main files are just [static assets](https://developers.cloudflare.com/workers/static-assets/).
4+
5+
[<img src="https://img.youtube.com/vi/TcOytsfva0o/0.jpg">](https://youtu.be/TcOytsfva0o "Client Side Tool Calling with the OpenAI WebRTC Realtime API")
6+
7+
8+
## Develop
9+
10+
Copy [.dev.vars.example](./.dev.vars.example) to `.dev.vars` and fill out your OpenAI API Key.
11+
12+
Install your dependencies
13+
14+
```bash
15+
npm install
16+
```
17+
18+
Run local server
19+
20+
```bash
21+
npm run dev
22+
```
23+
24+
## Deploy
25+
26+
Upload your secret
27+
28+
```bash
29+
npx wrangler secret put OPENAI_API_KEY
30+
```
31+
32+
```bash
33+
npm run deploy
34+
```
35+
36+
The hand is a [HiWonder AI Hand](https://www.hiwonder.com/products/aihand?variant=41022039654487). AI and I reverse-engineered the mobile app to make it work over Bluetooth, see [the code in hand.js](./public/hand.js)

package-lock.json

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,8 @@
1515
"typescript": "^5.5.2",
1616
"vitest": "2.1.8",
1717
"wrangler": "^3.60.3"
18+
},
19+
"dependencies": {
20+
"hono": "^4.6.13"
1821
}
1922
}

public/hand.js

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
const UART_SERVICE_UUID = "6e400001-b5a3-f393-e0a9-e50e24dcca9e";
2+
const UART_TX_UUID = "6e400003-b5a3-f393-e0a9-e50e24dcca9e"; // TX characteristic for micro:bit v2
3+
4+
class Hand {
5+
constructor() {
6+
this.bluetoothDevice = undefined;
7+
this.uartService = undefined;
8+
}
9+
10+
async connect() {
11+
try {
12+
this.bluetoothDevice = await navigator.bluetooth.requestDevice({
13+
filters: [{ namePrefix: "BBC" }], // Filter for devices with names starting with "BBC"
14+
optionalServices: [UART_SERVICE_UUID],
15+
});
16+
17+
const server = await this.bluetoothDevice.gatt.connect();
18+
const service = await server.getPrimaryService(UART_SERVICE_UUID);
19+
this.uartService = await service.getCharacteristic(UART_TX_UUID);
20+
21+
console.log("Connected to hand!");
22+
} catch (error) {
23+
console.error("Failed to connect to hand:", error);
24+
alert("Failed to connect to hand. Please try again.");
25+
}
26+
}
27+
28+
async sendCommand(actionNumber) {
29+
if (this.uartService === undefined) {
30+
console.error("Not connected to micro:bit");
31+
alert("Please connect to Hand first!");
32+
return;
33+
}
34+
try {
35+
// Convert action number to hex and pad to two characters
36+
const hexAction = actionNumber.toString(16).toUpperCase().padStart(2, "0");
37+
const command = `CMD|0F|${hexAction}|$`;
38+
const encoder = new TextEncoder();
39+
await this.uartService.writeValue(encoder.encode(command));
40+
console.log(`Command sent: ${command}`);
41+
} catch (error) {
42+
console.error("Failed to send command:", error);
43+
alert("Failed to send command to Yorick.");
44+
}
45+
}
46+
}

public/index.html

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
6+
<title>Just a website</title>
7+
<link rel="stylesheet" href="/styles.css">
8+
<script src="/hand.js"></script>
9+
<script src="/script.js"></script>
10+
</head>
11+
<body>
12+
<div class="content">
13+
<h1>This is a plain old website</h1>
14+
<p>This is just a plain website that is using plain old JavaScript</p>
15+
<button onclick="talkToTheHand()">Talk to the hand</button>
16+
</div>
17+
<footer>
18+
<p>Built with 🧡 on <a href="https://developers.cloudflare.com">Cloudflare Workers</a> and the <a href="https://platform.openai.com/docs/api-reference/realtime">OpenAI Realtime API</a></p>
19+
</footer>
20+
</body>
21+
</html>

public/script.js

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
const hand = new Hand();
2+
3+
function talkToTheHand() {
4+
hand
5+
.connect()
6+
.then(() => console.log('Hand is ready'))
7+
.catch((err) => console.error(err));
8+
}
9+
10+
const fns = {
11+
getPageHTML: () => {
12+
return { success: true, html: document.documentElement.outerHTML };
13+
},
14+
changeBackgroundColor: ({ color }) => {
15+
document.body.style.backgroundColor = color;
16+
return { success: true, color };
17+
},
18+
changeTextColor: ({ color }) => {
19+
document.body.style.color = color;
20+
return { success: true, color };
21+
},
22+
showFingers: async ({ numberOfFingers }) => {
23+
await hand.sendCommand(numberOfFingers);
24+
return { success: true, numberOfFingers };
25+
},
26+
};
27+
28+
// Create a WebRTC Agent
29+
const peerConnection = new RTCPeerConnection();
30+
31+
// On inbound audio add to page
32+
peerConnection.ontrack = (event) => {
33+
const el = document.createElement('audio');
34+
el.srcObject = event.streams[0];
35+
el.autoplay = el.controls = true;
36+
document.body.appendChild(el);
37+
};
38+
39+
const dataChannel = peerConnection.createDataChannel('response');
40+
41+
function configureData() {
42+
console.log('Configuring data channel');
43+
const event = {
44+
type: 'session.update',
45+
session: {
46+
modalities: ['text', 'audio'],
47+
// Provide the tools. Note they match the keys in the `fns` object above
48+
tools: [
49+
{
50+
type: 'function',
51+
name: 'changeBackgroundColor',
52+
description: 'Changes the background color of a web page',
53+
parameters: {
54+
type: 'object',
55+
properties: {
56+
color: { type: 'string', description: 'A hex value of the color' },
57+
},
58+
},
59+
},
60+
{
61+
type: 'function',
62+
name: 'changeTextColor',
63+
description: 'Changes the text color of a web page',
64+
parameters: {
65+
type: 'object',
66+
properties: {
67+
color: { type: 'string', description: 'A hex value of the color' },
68+
},
69+
},
70+
},
71+
{
72+
type: 'function',
73+
name: 'showFingers',
74+
description: 'Controls a robot hand to show a specific number of fingers',
75+
parameters: {
76+
type: 'object',
77+
properties: {
78+
numberOfFingers: { type: 'string', description: 'Values 1 through 5 of the number of fingers to hold up' },
79+
},
80+
},
81+
},
82+
{
83+
type: 'function',
84+
name: 'getPageHTML',
85+
description: 'Gets the HTML for the current page',
86+
},
87+
],
88+
},
89+
};
90+
dataChannel.send(JSON.stringify(event));
91+
}
92+
93+
dataChannel.addEventListener('open', (ev) => {
94+
console.log('Opening data channel', ev);
95+
configureData();
96+
});
97+
98+
// {
99+
// "type": "response.function_call_arguments.done",
100+
// "event_id": "event_Ad2gt864G595umbCs2aF9",
101+
// "response_id": "resp_Ad2griUWUjsyeLyAVtTtt",
102+
// "item_id": "item_Ad2gsxA84w9GgEvFwW1Ex",
103+
// "output_index": 1,
104+
// "call_id": "call_PG12S5ER7l7HrvZz",
105+
// "name": "get_weather",
106+
// "arguments": "{\"location\":\"Portland, Oregon\"}"
107+
// }
108+
109+
dataChannel.addEventListener('message', async (ev) => {
110+
const msg = JSON.parse(ev.data);
111+
// Handle function calls
112+
if (msg.type === 'response.function_call_arguments.done') {
113+
const fn = fns[msg.name];
114+
if (fn !== undefined) {
115+
console.log(`Calling local function ${msg.name} with ${msg.arguments}`);
116+
const args = JSON.parse(msg.arguments);
117+
const result = await fn(args);
118+
console.log('result', result);
119+
// Let OpenAI know that the function has been called and share it's output
120+
const event = {
121+
type: 'conversation.item.create',
122+
item: {
123+
type: 'function_call_output',
124+
call_id: msg.call_id, // call_id from the function_call message
125+
output: JSON.stringify(result), // result of the function
126+
},
127+
};
128+
dataChannel.send(JSON.stringify(event));
129+
}
130+
}
131+
});
132+
133+
// Capture microphone
134+
navigator.mediaDevices.getUserMedia({ audio: true }).then((stream) => {
135+
// Add microphone to PeerConnection
136+
stream.getTracks().forEach((track) => peerConnection.addTransceiver(track, { direction: 'sendrecv' }));
137+
138+
peerConnection.createOffer().then((offer) => {
139+
peerConnection.setLocalDescription(offer);
140+
141+
// Send WebRTC Offer to Workers Realtime WebRTC API Relay
142+
fetch('/rtc-connect', {
143+
method: 'POST',
144+
body: offer.sdp,
145+
headers: {
146+
'Content-Type': 'application/sdp',
147+
},
148+
})
149+
.then((r) => r.text())
150+
.then((answer) => {
151+
// Accept answer from Realtime WebRTC API
152+
peerConnection.setRemoteDescription({
153+
sdp: answer,
154+
type: 'answer',
155+
});
156+
});
157+
});
158+
});

public/styles.css

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/* Ensure the body takes up the full height */
2+
body {
3+
margin: 0;
4+
display: flex;
5+
flex-direction: column;
6+
min-height: 100vh;
7+
}
8+
9+
/* Main content should expand to fill available space */
10+
.content {
11+
flex: 1;
12+
}
13+
14+
/* Sticky footer styling */
15+
footer {
16+
background-color: #333;
17+
color: #fff;
18+
text-align: center;
19+
padding: 10px;
20+
position: sticky;
21+
bottom: 0;
22+
}
23+
24+
footer a {
25+
color: #fff;
26+
}

src/index.ts

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,36 @@
1-
/**
2-
* Welcome to Cloudflare Workers! This is your first worker.
3-
*
4-
* - Run `npm run dev` in your terminal to start a development server
5-
* - Open a browser tab at http://localhost:8787/ to see your worker in action
6-
* - Run `npm run deploy` to publish your worker
7-
*
8-
* Bind resources to your worker in `wrangler.toml`. After adding bindings, a type definition for the
9-
* `Env` object can be regenerated with `npm run cf-typegen`.
10-
*
11-
* Learn more at https://developers.cloudflare.com/workers/
12-
*/
1+
import { Hono } from 'hono';
132

14-
export default {
15-
async fetch(request, env, ctx): Promise<Response> {
16-
return new Response('Hello World!');
17-
},
18-
} satisfies ExportedHandler<Env>;
3+
const app = new Hono<{ Bindings: Env }>();
4+
5+
const DEFAULT_INSTRUCTIONS = `You are helpful and have some tools installed.
6+
7+
In the tools you have the ability to control a robot hand.
8+
`;
9+
10+
app.post('/rtc-connect', async (c) => {
11+
const body = await c.req.text();
12+
const url = new URL('https://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01');
13+
url.searchParams.set('instructions', DEFAULT_INSTRUCTIONS);
14+
url.searchParams.set('voice', 'ash');
15+
16+
const response = await fetch(url.toString(), {
17+
method: 'POST',
18+
body,
19+
headers: {
20+
Authorization: `Bearer ${c.env.OPENAI_API_KEY}`,
21+
'Content-Type': 'application/sdp',
22+
},
23+
});
24+
25+
if (!response.ok) {
26+
throw new Error(`OpenAI API error: ${response.status}`);
27+
}
28+
const sdp = await response.text();
29+
return c.body(sdp, {
30+
headers: {
31+
'Content-Type': 'application/sdp',
32+
},
33+
});
34+
});
35+
36+
export default app;

worker-configuration.d.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
// Generated by Wrangler
2-
// After adding bindings to `wrangler.toml`, regenerate this interface via `npm run cf-typegen`
1+
// Generated by Wrangler by running `wrangler types`
2+
33
interface Env {
4+
OPENAI_API_KEY: string;
45
}

0 commit comments

Comments
 (0)