diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index f30cda55..8b19f7f7 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -5,9 +5,9 @@
 name: Build
 on:
   push:
-    branches: [next, main]
+    branches: [next, main, dev-1.0]
   pull_request:
-    branches: [next, main]
+    branches: [next, main, dev-1.0]
 
 jobs:
   reuse:
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index df780e4f..961f7727 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -5,9 +5,9 @@
 name: Test
 on:
   push:
-    branches: [next, main]
+    branches: [next, main, dev-1.0]
   pull_request:
-    branches: [next, main]
+    branches: [next, main, dev-1.0]
 
 jobs:
   build:
diff --git a/.gitignore b/.gitignore
index 327fa1ea..c085243d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -185,3 +185,6 @@ docs
 
 # direnv
 .direnv
+
+# vscode workspace config
+agents-js.code-workspace
\ No newline at end of file
diff --git a/agents/src/index.ts b/agents/src/index.ts
index 131ea032..1bec48b4 100644
--- a/agents/src/index.ts
+++ b/agents/src/index.ts
@@ -18,6 +18,7 @@ import * as pipeline from './pipeline/index.js';
 import * as stt from './stt/index.js';
 import * as tokenize from './tokenize/index.js';
 import * as tts from './tts/index.js';
+import * as voice from './voice/index.js';
 
 export * from './vad.js';
 export * from './plugin.js';
@@ -31,4 +32,4 @@ export * from './audio.js';
 export * from './transcription.js';
 export * from './inference_runner.js';
 
-export { cli, stt, tts, llm, pipeline, multimodal, tokenize, metrics, ipc };
+export { cli, stt, tts, llm, pipeline, multimodal, tokenize, metrics, ipc, voice };
diff --git a/agents/src/stream/deferred_stream.ts b/agents/src/stream/deferred_stream.ts
new file mode 100644
index 00000000..305f5efe
--- /dev/null
+++ b/agents/src/stream/deferred_stream.ts
@@ -0,0 +1,27 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { type ReadableStream } from 'node:stream/web';
+import { IdentityTransform } from './identity_transform.js';
+
+export class DeferredReadableStream<T> {
+  private transform: IdentityTransform<T>;
+
+  get stream() {
+    return this.transform.readable;
+  }
+
+  constructor() {
+    this.transform = new IdentityTransform<T>();
+  }
+
+  /**
+   * Call once the actual source is ready.
+   */
+  setSource(source: ReadableStream<T>) {
+    if (this.transform.writable.locked) {
+      throw new Error('Stream is already locked');
+    }
+    source.pipeTo(this.transform.writable);
+  }
+}
diff --git a/agents/src/stream/identity_transform.ts b/agents/src/stream/identity_transform.ts
new file mode 100644
index 00000000..cb83f091
--- /dev/null
+++ b/agents/src/stream/identity_transform.ts
@@ -0,0 +1,12 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { TransformStream } from 'node:stream/web';
+
+export class IdentityTransform<T> extends TransformStream<T, T> {
+  constructor() {
+    super({
+      transform: (chunk, controller) => controller.enqueue(chunk),
+    });
+  }
+}
diff --git a/agents/src/stt/stream_adapter.ts b/agents/src/stt/stream_adapter.ts
index 0368ff3b..279de653 100644
--- a/agents/src/stt/stream_adapter.ts
+++ b/agents/src/stt/stream_adapter.ts
@@ -53,11 +53,14 @@ export class StreamAdapterWrapper extends SpeechStream {
 
   async #run() {
     const forwardInput = async () => {
-      for await (const input of this.input) {
-        if (input === SpeechStream.FLUSH_SENTINEL) {
+      while (true) {
+        const { done, value } = await this.inputReader.read();
+        if (done) break;
+
+        if (value === SpeechStream.FLUSH_SENTINEL) {
           this.#vadStream.flush();
         } else {
-          this.#vadStream.pushFrame(input);
+          this.#vadStream.pushFrame(value);
         }
       }
       this.#vadStream.endInput();
@@ -67,10 +70,10 @@ export class StreamAdapterWrapper extends SpeechStream {
       for await (const ev of this.#vadStream) {
         switch (ev.type) {
           case VADEventType.START_OF_SPEECH:
-            this.output.put({ type: SpeechEventType.START_OF_SPEECH });
+            this.outputWriter.write({ type: SpeechEventType.START_OF_SPEECH });
             break;
           case VADEventType.END_OF_SPEECH:
-            this.output.put({ type: SpeechEventType.END_OF_SPEECH });
+            this.outputWriter.write({ type: SpeechEventType.END_OF_SPEECH });
 
             try {
               const event = await this.#stt.recognize(ev.frames);
@@ -78,7 +81,7 @@ export class StreamAdapterWrapper extends SpeechStream {
                 continue;
               }
 
-              this.output.put(event);
+              this.outputWriter.write(event);
               break;
             } catch (error) {
               let logger = log();
diff --git a/agents/src/stt/stt.ts b/agents/src/stt/stt.ts
index 42868bfe..ec11f196 100644
--- a/agents/src/stt/stt.ts
+++ b/agents/src/stt/stt.ts
@@ -4,9 +4,16 @@
 import type { AudioFrame } from '@livekit/rtc-node';
 import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
 import { EventEmitter } from 'node:events';
+import type {
+  ReadableStream,
+  ReadableStreamDefaultReader,
+  WritableStreamDefaultWriter,
+} from 'node:stream/web';
+import { log } from '../log.js';
 import type { STTMetrics } from '../metrics/base.js';
+import { DeferredReadableStream } from '../stream/deferred_stream.js';
+import { IdentityTransform } from '../stream/identity_transform.js';
 import type { AudioBuffer } from '../utils.js';
-import { AsyncIterableQueue } from '../utils.js';
 
 /** Indicates start/middle/end of speech */
 export enum SpeechEventType {
@@ -137,80 +144,138 @@ export abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCal
  */
 export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {
   protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');
-  protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();
-  protected output = new AsyncIterableQueue<SpeechEvent>();
-  protected queue = new AsyncIterableQueue<SpeechEvent>();
-  abstract label: string;
+  protected input = new IdentityTransform<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();
+  protected output = new IdentityTransform<SpeechEvent>();
+
+  protected inputReader: ReadableStreamDefaultReader<
+    AudioFrame | typeof SpeechStream.FLUSH_SENTINEL
+  >;
+  protected outputWriter: WritableStreamDefaultWriter<SpeechEvent>;
   protected closed = false;
+  protected inputClosed = false;
+  abstract label: string;
   #stt: STT;
+  private deferredInputStream: DeferredReadableStream<AudioFrame>;
+  private logger = log();
+  private inputWriter: WritableStreamDefaultWriter<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>;
+  private outputReader: ReadableStreamDefaultReader<SpeechEvent>;
+  private metricsStream: ReadableStream<SpeechEvent>;
 
   constructor(stt: STT) {
     this.#stt = stt;
+    this.deferredInputStream = new DeferredReadableStream<AudioFrame>();
+
+    this.inputWriter = this.input.writable.getWriter();
+    this.inputReader = this.input.readable.getReader();
+    this.outputWriter = this.output.writable.getWriter();
+
+    const [outputStream, metricsStream] = this.output.readable.tee();
+    this.metricsStream = metricsStream;
+    this.outputReader = outputStream.getReader();
+
+    this.pumpDeferredStream();
     this.monitorMetrics();
   }
 
+  /**
+   * Reads from the deferred input stream and forwards chunks to the input writer.
+   *
+   * Note: we can't just do this.deferredInputStream.stream.pipeTo(this.input.writable)
+   * because the inputWriter locks the this.input.writable stream. All writes must go through
+   * the inputWriter.
+   */
+  private async pumpDeferredStream() {
+    const reader = this.deferredInputStream.stream.getReader();
+    try {
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        await this.inputWriter.write(value);
+      }
+    } catch (e) {
+      this.logger.error(`Error pumping deferred stream: ${e}`);
+      throw e;
+    } finally {
+      reader.releaseLock();
+    }
+  }
+
   protected async monitorMetrics() {
     const startTime = process.hrtime.bigint();
+    const metricsReader = this.metricsStream.getReader();
+
+    while (true) {
+      const { done, value } = await metricsReader.read();
+      if (done) {
+        break;
+      }
+
+      if (value.type !== SpeechEventType.RECOGNITION_USAGE) continue;
 
-    for await (const event of this.queue) {
-      this.output.put(event);
-      if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;
       const duration = process.hrtime.bigint() - startTime;
       const metrics: STTMetrics = {
         timestamp: Date.now(),
-        requestId: event.requestId!,
+        requestId: value.requestId!,
         duration: Math.trunc(Number(duration / BigInt(1000000))),
         label: this.label,
-        audioDuration: event.recognitionUsage!.audioDuration,
+        audioDuration: value.recognitionUsage!.audioDuration,
         streamed: true,
       };
       this.#stt.emit(SpeechEventType.METRICS_COLLECTED, metrics);
     }
-    this.output.close();
   }
 
-  /** Push an audio frame to the STT */
+  updateInputStream(audioStream: ReadableStream<AudioFrame>) {
+    this.deferredInputStream.setSource(audioStream);
+  }
+
+  /** @deprecated Use `updateInputStream` instead */
   pushFrame(frame: AudioFrame) {
-    if (this.input.closed) {
+    // TODO: remove this method in future version
+    if (this.inputClosed) {
       throw new Error('Input is closed');
     }
     if (this.closed) {
       throw new Error('Stream is closed');
     }
-    this.input.put(frame);
+    this.inputWriter.write(frame);
   }
 
   /** Flush the STT, causing it to process all pending text */
   flush() {
-    if (this.input.closed) {
+    if (this.inputClosed) {
       throw new Error('Input is closed');
     }
     if (this.closed) {
       throw new Error('Stream is closed');
     }
-    this.input.put(SpeechStream.FLUSH_SENTINEL);
+    this.inputWriter.write(SpeechStream.FLUSH_SENTINEL);
   }
 
   /** Mark the input as ended and forbid additional pushes */
   endInput() {
-    if (this.input.closed) {
+    if (this.inputClosed) {
       throw new Error('Input is closed');
     }
     if (this.closed) {
       throw new Error('Stream is closed');
     }
-    this.input.close();
+    this.inputClosed = true;
+    this.inputWriter.close();
   }
 
-  next(): Promise<IteratorResult<SpeechEvent>> {
-    return this.output.next();
+  async next(): Promise<IteratorResult<SpeechEvent>> {
+    return this.outputReader.read().then(({ done, value }) => {
+      if (done) {
+        return { done: true, value: undefined };
+      }
+      return { done: false, value };
+    });
   }
 
   /** Close both the input and output of the STT stream */
   close() {
-    this.input.close();
-    this.queue.close();
-    this.output.close();
+    this.input.writable.close();
     this.closed = true;
   }
 
diff --git a/agents/src/utils.ts b/agents/src/utils.ts
index 5ec97e5f..d8171137 100644
--- a/agents/src/utils.ts
+++ b/agents/src/utils.ts
@@ -117,14 +117,14 @@ export class Queue<T> {
 }
 
 /** @internal */
-export class Future {
-  #await: Promise<void>;
-  #resolvePromise!: () => void;
+export class Future<T = void> {
+  #await: Promise<T>;
+  #resolvePromise!: (value: T) => void;
   #rejectPromise!: (error: Error) => void;
   #done: boolean = false;
 
   constructor() {
-    this.#await = new Promise<void>((resolve, reject) => {
+    this.#await = new Promise<T>((resolve, reject) => {
       this.#resolvePromise = resolve;
       this.#rejectPromise = reject;
     });
@@ -138,9 +138,9 @@ export class Future {
     return this.#done;
   }
 
-  resolve() {
+  resolve(value: T) {
     this.#done = true;
-    this.#resolvePromise();
+    this.#resolvePromise(value);
   }
 
   reject(error: Error) {
diff --git a/agents/src/vad.ts b/agents/src/vad.ts
index 766bae8b..2e135df3 100644
--- a/agents/src/vad.ts
+++ b/agents/src/vad.ts
@@ -4,8 +4,15 @@
 import type { AudioFrame } from '@livekit/rtc-node';
 import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
 import { EventEmitter } from 'node:events';
+import type {
+  ReadableStream,
+  ReadableStreamDefaultReader,
+  WritableStreamDefaultWriter,
+} from 'node:stream/web';
+import { log } from './log.js';
 import type { VADMetrics } from './metrics/base.js';
-import { AsyncIterableQueue } from './utils.js';
+import { DeferredReadableStream } from './stream/deferred_stream.js';
+import { IdentityTransform } from './stream/identity_transform.js';
 
 export enum VADEventType {
   START_OF_SPEECH,
@@ -77,25 +84,70 @@ export abstract class VAD extends (EventEmitter as new () => TypedEmitter<VADCal
 
 export abstract class VADStream implements AsyncIterableIterator<VADEvent> {
   protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');
-  protected input = new AsyncIterableQueue<AudioFrame | typeof VADStream.FLUSH_SENTINEL>();
-  protected queue = new AsyncIterableQueue<VADEvent>();
-  protected output = new AsyncIterableQueue<VADEvent>();
+  protected input = new IdentityTransform<AudioFrame | typeof VADStream.FLUSH_SENTINEL>();
+  protected output = new IdentityTransform<VADEvent>();
+  protected inputWriter: WritableStreamDefaultWriter<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;
+  protected inputReader: ReadableStreamDefaultReader<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;
+  protected outputWriter: WritableStreamDefaultWriter<VADEvent>;
+  protected outputReader: ReadableStreamDefaultReader<VADEvent>;
   protected closed = false;
+  protected inputClosed = false;
+
   #vad: VAD;
   #lastActivityTime = BigInt(0);
+  private logger = log();
+  private deferredInputStream: DeferredReadableStream<AudioFrame>;
 
+  private metricsStream: ReadableStream<VADEvent>;
   constructor(vad: VAD) {
     this.#vad = vad;
+    this.deferredInputStream = new DeferredReadableStream<AudioFrame>();
+
+    this.inputWriter = this.input.writable.getWriter();
+    this.inputReader = this.input.readable.getReader();
+    this.outputWriter = this.output.writable.getWriter();
+
+    const [outputStream, metricsStream] = this.output.readable.tee();
+    this.metricsStream = metricsStream;
+    this.outputReader = outputStream.getReader();
+
+    this.pumpDeferredStream();
     this.monitorMetrics();
   }
 
+  /**
+   * Reads from the deferred input stream and forwards chunks to the input writer.
+   *
+   * Note: we can't just do this.deferredInputStream.stream.pipeTo(this.input.writable)
+   * because the inputWriter locks the this.input.writable stream. All writes must go through
+   * the inputWriter.
+   */
+  private async pumpDeferredStream() {
+    const reader = this.deferredInputStream.stream.getReader();
+    try {
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        await this.inputWriter.write(value);
+      }
+    } catch (e) {
+      this.logger.error(`Error pumping deferred stream: ${e}`);
+      throw e;
+    } finally {
+      reader.releaseLock();
+    }
+  }
+
   protected async monitorMetrics() {
     let inferenceDurationTotal = 0;
     let inferenceCount = 0;
-
-    for await (const event of this.queue) {
-      this.output.put(event);
-      switch (event.type) {
+    const metricsReader = this.metricsStream.getReader();
+    while (true) {
+      const { done, value } = await metricsReader.read();
+      if (done) {
+        break;
+      }
+      switch (value.type) {
         case VADEventType.START_OF_SPEECH:
           inferenceCount++;
           if (inferenceCount >= 1 / this.#vad.capabilities.updateInterval) {
@@ -119,47 +171,56 @@ export abstract class VADStream implements AsyncIterableIterator<VADEvent> {
           break;
       }
     }
-    this.output.close();
   }
 
+  updateInputStream(audioStream: ReadableStream<AudioFrame>) {
+    this.deferredInputStream.setSource(audioStream);
+  }
+
+  /** @deprecated Use `updateInputStream` instead */
   pushFrame(frame: AudioFrame) {
-    if (this.input.closed) {
+    // TODO(AJS-395): remove this method
+    if (this.inputClosed) {
       throw new Error('Input is closed');
     }
     if (this.closed) {
       throw new Error('Stream is closed');
     }
-    this.input.put(frame);
+    this.inputWriter.write(frame);
   }
 
   flush() {
-    if (this.input.closed) {
+    if (this.inputClosed) {
       throw new Error('Input is closed');
     }
     if (this.closed) {
       throw new Error('Stream is closed');
     }
-    this.input.put(VADStream.FLUSH_SENTINEL);
+    this.inputWriter.write(VADStream.FLUSH_SENTINEL);
   }
 
   endInput() {
-    if (this.input.closed) {
+    if (this.inputClosed) {
       throw new Error('Input is closed');
     }
     if (this.closed) {
       throw new Error('Stream is closed');
     }
-    this.input.close();
+    this.inputClosed = true;
+    this.input.writable.close();
   }
 
-  next(): Promise<IteratorResult<VADEvent>> {
-    return this.output.next();
+  async next(): Promise<IteratorResult<VADEvent>> {
+    return this.outputReader.read().then(({ done, value }) => {
+      if (done) {
+        return { done: true, value: undefined };
+      }
+      return { done: false, value };
+    });
   }
 
   close() {
-    this.input.close();
-    this.queue.close();
-    this.output.close();
+    this.input.writable.close();
     this.closed = true;
   }
 
diff --git a/agents/src/voice/agent.ts b/agents/src/voice/agent.ts
new file mode 100644
index 00000000..d29d8b35
--- /dev/null
+++ b/agents/src/voice/agent.ts
@@ -0,0 +1,128 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+/* eslint-disable @typescript-eslint/no-unused-vars */
+
+/* eslint-disable @typescript-eslint/no-explicit-any */
+import type { AudioFrame } from '@livekit/rtc-node';
+import { ReadableStream } from 'node:stream/web';
+import type { ChatChunk, ChatMessage, LLM } from '../llm/index.js';
+import { ChatContext } from '../llm/index.js';
+import { StreamAdapter as STTStreamAdapter } from '../stt/index.js';
+import type { STT, SpeechEvent } from '../stt/index.js';
+import type { TTS } from '../tts/index.js';
+import type { VAD } from '../vad.js';
+import type { AgentActivity } from './agent_activity.js';
+
+export class Agent {
+  private instructions: string;
+  private chatCtx: ChatContext;
+  private tools: any; // TODO(shubhra): add type
+  private turnDetection: any; // TODO(shubhra): add type
+  private stt: STT | undefined;
+  private vad: VAD | undefined;
+  private llm: LLM | any;
+  private tts: TTS | undefined;
+
+  /** @internal */
+  agentActivity?: AgentActivity;
+
+  constructor(
+    instructions: string,
+    chatCtx?: ChatContext,
+    tools?: any, // TODO(shubhra): add type
+    turnDetection?: any, // TODO(shubhra): add type
+    stt?: STT,
+    vad?: VAD,
+    llm?: LLM | any,
+    tts?: TTS,
+    allowInterruptions?: boolean,
+  ) {
+    this.instructions = instructions;
+    this.chatCtx = chatCtx || new ChatContext();
+    this.tools = tools;
+    this.turnDetection = turnDetection;
+    this.stt = stt;
+    this.vad = vad;
+    this.llm = llm;
+    this.tts = tts;
+    this.agentActivity = undefined; // TODO(shubhra): add type
+  }
+
+  async onEnter(): Promise<void> {}
+
+  async onExit(): Promise<void> {}
+
+  async transcriptionNode(
+    text: ReadableStream<string>,
+    modelSettings: any, // TODO(shubhra): add type
+  ): Promise<ReadableStream<string> | null> {
+    return null;
+  }
+
+  async onUserTurnCompleted(chatCtx: ChatContext, newMessage: ChatMessage): Promise<void> {}
+
+  async sttNode(
+    audio: ReadableStream<AudioFrame>,
+    modelSettings: any, // TODO(shubhra): add type
+  ): Promise<ReadableStream<SpeechEvent | string> | null> {
+    return Agent.default.sttNode(this, audio, modelSettings);
+  }
+
+  async llmNode(
+    chatCtx: ChatContext,
+    tools: Array<any>, // TODO(shubhra): add type
+    modelSettings: any, // TODO(shubhra): add type
+  ): Promise<ReadableStream<ChatChunk | string> | null> {
+    return null;
+  }
+
+  async ttsNode(
+    text: ReadableStream<string>,
+    modelSettings: any, // TODO(shubhra): add type
+  ): Promise<ReadableStream<AudioFrame> | null> {
+    return null;
+  }
+
+  // realtime_audio_output_node
+
+  getActivityOrThrow(): AgentActivity {
+    if (!this.agentActivity) {
+      throw new Error('Agent activity not found');
+    }
+    return this.agentActivity;
+  }
+
+  static default = {
+    async sttNode(
+      agent: Agent,
+      audio: ReadableStream<AudioFrame>,
+      modelSettings: any, // TODO(shubhra): add type
+    ): Promise<ReadableStream<SpeechEvent | string> | null> {
+      const activity = agent.getActivityOrThrow();
+
+      let wrapped_stt = activity.stt;
+
+      if (!wrapped_stt.capabilities.streaming) {
+        if (!agent.vad) {
+          throw new Error(
+            'STT does not support streaming, add a VAD to the AgentTask/VoiceAgent to enable streaming',
+          );
+        }
+        wrapped_stt = new STTStreamAdapter(wrapped_stt, agent.vad);
+      }
+
+      const stream = wrapped_stt.stream();
+      stream.updateInputStream(audio);
+
+      return new ReadableStream({
+        async start(controller) {
+          for await (const event of stream) {
+            controller.enqueue(event);
+          }
+        },
+      });
+    },
+  };
+}
diff --git a/agents/src/voice/agent_activity.ts b/agents/src/voice/agent_activity.ts
new file mode 100644
index 00000000..89d913ea
--- /dev/null
+++ b/agents/src/voice/agent_activity.ts
@@ -0,0 +1,79 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import type { AudioFrame } from '@livekit/rtc-node';
+import type { ReadableStream } from 'node:stream/web';
+import { log } from '../log.js';
+import type { STT, SpeechEvent } from '../stt/stt.js';
+import type { VADEvent } from '../vad.js';
+import type { Agent } from './agent.js';
+import type { AgentSession } from './agent_session.js';
+import {
+  AudioRecognition,
+  type EndOfTurnInfo,
+  type RecognitionHooks,
+} from './audio_recognition.js';
+
+export class AgentActivity implements RecognitionHooks {
+  private started = false;
+  private audioRecognition?: AudioRecognition;
+  private logger = log();
+  private turnDetectionMode?: string;
+
+  agent: Agent;
+  agentSession: AgentSession;
+
+  constructor(agent: Agent, agentSession: AgentSession) {
+    this.agent = agent;
+    this.agentSession = agentSession;
+  }
+
+  async start(): Promise<void> {
+    this.agent.agentActivity = this;
+    this.audioRecognition = new AudioRecognition(
+      this,
+      this.agentSession.vad,
+      // Arrow function preserves the Agent context
+      (...args) => this.agent.sttNode(...args),
+      this.turnDetectionMode === 'manual',
+    );
+    this.audioRecognition.start();
+    this.started = true;
+
+    // TODO(shubhra): Add turn detection mode
+  }
+
+  get stt(): STT {
+    // TODO(shubhra): Allow components to be defined in Agent class
+    return this.agentSession.stt;
+  }
+
+  updateAudioInput(audioStream: ReadableStream<AudioFrame>): void {
+    this.audioRecognition?.setInputAudioStream(audioStream);
+  }
+
+  onStartOfSpeech(ev: VADEvent): void {
+    this.logger.info('Start of speech', ev);
+  }
+
+  onEndOfSpeech(ev: VADEvent): void {
+    this.logger.info('End of speech', ev);
+  }
+
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  onVADInferenceDone(ev: VADEvent): void {
+    // TODO(AJS-40): Implement this
+  }
+
+  onInterimTranscript(ev: SpeechEvent): void {
+    this.logger.info('Interim transcript', ev);
+  }
+
+  onFinalTranscript(ev: SpeechEvent): void {
+    this.logger.info(`Final transcript ${ev.alternatives![0].text}`);
+  }
+
+  onEndOfTurn(ev: EndOfTurnInfo): void {
+    this.logger.info('End of turn', ev);
+  }
+}
diff --git a/agents/src/voice/agent_session.ts b/agents/src/voice/agent_session.ts
new file mode 100644
index 00000000..b3dc43e7
--- /dev/null
+++ b/agents/src/voice/agent_session.ts
@@ -0,0 +1,84 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import type { AudioFrame, AudioSource, Room } from '@livekit/rtc-node';
+import type { ReadableStream } from 'node:stream/web';
+import { log } from '../log.js';
+import type { AgentState } from '../pipeline/index.js';
+import type { STT } from '../stt/index.js';
+import type { VAD } from '../vad.js';
+import type { Agent } from './agent.js';
+import { AgentActivity } from './agent_activity.js';
+import type { UserState } from './events.js';
+import { RoomIO } from './room_io.js';
+
+export class AgentSession {
+  vad: VAD;
+  stt: STT;
+
+  private agent?: Agent;
+  private activity?: AgentActivity;
+  private nextActivity?: AgentActivity;
+  private started = false;
+  private userState: UserState = 'listening';
+  private agentState: AgentState = 'initializing';
+
+  private roomIO?: RoomIO;
+  private logger = log();
+
+  /** @internal */
+  audioInput?: ReadableStream<AudioFrame>;
+  /** @internal */
+  audioOutput?: AudioSource;
+
+  constructor(vad: VAD, stt: STT) {
+    this.vad = vad;
+    this.stt = stt;
+  }
+
+  async start(agent: Agent, room: Room): Promise<void> {
+    if (this.started) {
+      return;
+    }
+
+    this.agent = agent;
+
+    if (this.agent) {
+      await this.updateActivity(this.agent);
+    }
+
+    // TODO(AJS-38): update with TTS sample rate and num channels
+    this.roomIO = new RoomIO(this, room, 0, 0);
+    this.roomIO.start();
+
+    if (this.audioInput) {
+      this.activity?.updateAudioInput(this.audioInput);
+    }
+
+    this.logger.debug('AgentSession started');
+    this.started = true;
+  }
+
+  private async updateActivity(agent: Agent): Promise<void> {
+    this.nextActivity = new AgentActivity(agent, this);
+
+    // TODO(shubhra): Drain and close the old activity
+
+    this.activity = this.nextActivity;
+    this.nextActivity = undefined;
+
+    if (this.activity) {
+      await this.activity.start();
+    }
+  }
+
+  /** @internal */
+  _updateAgentState(state: AgentState) {
+    this.agentState = state;
+  }
+
+  /** @internal */
+  _updateUserState(state: UserState) {
+    this.userState = state;
+  }
+}
diff --git a/agents/src/voice/audio_recognition.ts b/agents/src/voice/audio_recognition.ts
new file mode 100644
index 00000000..88699e6b
--- /dev/null
+++ b/agents/src/voice/audio_recognition.ts
@@ -0,0 +1,151 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import type { AudioFrame } from '@livekit/rtc-node';
+import { ReadableStream } from 'node:stream/web';
+import { log } from '../log.js';
+import { DeferredReadableStream } from '../stream/deferred_stream.js';
+import { type SpeechEvent, SpeechEventType } from '../stt/stt.js';
+import { type VAD, type VADEvent, VADEventType } from '../vad.js';
+import type { STTNode } from './io.js';
+
+export interface EndOfTurnInfo {
+  newTranscript: string;
+  transcriptionDelay: number;
+  endOfUtteranceDelay: number;
+}
+
+export interface RecognitionHooks {
+  onStartOfSpeech: (ev: VADEvent) => void;
+  onEndOfSpeech: (ev: VADEvent) => void;
+  onVADInferenceDone: (ev: VADEvent) => void;
+  onInterimTranscript: (ev: SpeechEvent) => void;
+  onFinalTranscript: (ev: SpeechEvent) => void;
+  onEndOfTurn: (info: EndOfTurnInfo) => void;
+}
+
+export class AudioRecognition {
+  private deferredInputStream: DeferredReadableStream<AudioFrame>;
+  private vadStreamProcessor?: Promise<void>;
+  private sttStreamProcessor?: Promise<void>;
+  private logger = log();
+  private lastLanguage?: string;
+  private lastFinalTranscriptTime = 0;
+  private audioTranscript = '';
+  private audioInterimTranscript = '';
+  private lastSpeakingTime = 0;
+  private userTurnCommitted = false;
+  private speaking = false;
+  constructor(
+    private hooks: RecognitionHooks,
+    private vad: VAD,
+    private stt: STTNode,
+    private manualTurnDetection = false,
+  ) {
+    this.deferredInputStream = new DeferredReadableStream<AudioFrame>();
+  }
+
+  async start() {
+    const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();
+    this.vadStreamProcessor = this.vadTask(vadInputStream).catch((err) => {
+      this.logger.error(`Error in VAD task: ${err}`);
+    });
+    this.sttStreamProcessor = this.sttTask(sttInputStream).catch((err) => {
+      this.logger.error(`Error in STT task: ${err}`);
+    });
+  }
+
+  private async onSTTEvent(ev: SpeechEvent) {
+    // TODO(AJS-30) ignore stt event if user turn already committed and EOU task is done
+    // or it's an interim transcript
+
+    switch (ev.type) {
+      case SpeechEventType.FINAL_TRANSCRIPT:
+        this.hooks.onFinalTranscript(ev);
+        const transcript = ev.alternatives?.[0]?.text;
+        this.lastLanguage = ev.alternatives?.[0]?.language;
+
+        if (!transcript) return;
+
+        this.logger.debug('received user transcript', {
+          user_transcript: transcript,
+          language: this.lastLanguage,
+        });
+
+        this.lastFinalTranscriptTime = Date.now();
+        this.audioTranscript += ` ${transcript}`;
+        this.audioTranscript = this.audioTranscript.trim();
+        this.audioInterimTranscript = '';
+
+        if (!this.speaking) {
+          if (!this.vad) {
+            this.lastSpeakingTime = Date.now();
+          }
+        }
+
+        if (!this.manualTurnDetection || this.userTurnCommitted) {
+          this.hooks.onEndOfTurn({
+            newTranscript: transcript,
+            transcriptionDelay: this.lastFinalTranscriptTime - this.lastSpeakingTime,
+            endOfUtteranceDelay: this.lastFinalTranscriptTime - Date.now(),
+          });
+        }
+
+        break;
+      case SpeechEventType.INTERIM_TRANSCRIPT:
+        this.hooks.onInterimTranscript(ev);
+        this.audioInterimTranscript = ev.alternatives?.[0]?.text ?? '';
+        break;
+    }
+  }
+
+  private async sttTask(inputStream: ReadableStream<AudioFrame>) {
+    const sttStream = await this.stt(inputStream, {});
+    if (sttStream === null) {
+      return;
+    }
+    if (sttStream instanceof ReadableStream) {
+      const reader = sttStream.getReader();
+      while (true) {
+        const { done, value: ev } = await reader.read();
+        if (done) {
+          break;
+        }
+        if (typeof ev === 'string') {
+          throw new Error('STT node must yield SpeechEvent');
+        } else {
+          await this.onSTTEvent(ev);
+        }
+      }
+      reader.releaseLock();
+      sttStream.cancel();
+    }
+  }
+
+  private async vadTask(inputStream: ReadableStream<AudioFrame>) {
+    const vadStream = this.vad.stream();
+    vadStream.updateInputStream(inputStream);
+
+    for await (const ev of vadStream) {
+      switch (ev.type) {
+        case VADEventType.START_OF_SPEECH:
+          this.hooks.onStartOfSpeech(ev);
+          this.speaking = true;
+          break;
+        case VADEventType.INFERENCE_DONE:
+          this.hooks.onVADInferenceDone(ev);
+          break;
+        case VADEventType.END_OF_SPEECH:
+          this.hooks.onEndOfSpeech(ev);
+          this.speaking = false;
+          // when VAD fires END_OF_SPEECH, it already waited for the silence_duration
+          this.lastSpeakingTime = Date.now() - ev.silenceDuration;
+          break;
+      }
+    }
+  }
+
+  setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {
+    this.deferredInputStream.setSource(audioStream);
+  }
+}
diff --git a/agents/src/voice/events.ts b/agents/src/voice/events.ts
new file mode 100644
index 00000000..4710b5ef
--- /dev/null
+++ b/agents/src/voice/events.ts
@@ -0,0 +1,5 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+export type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';
+export type UserState = 'idle' | 'thinking' | 'listening' | 'speaking';
diff --git a/agents/src/voice/index.ts b/agents/src/voice/index.ts
new file mode 100644
index 00000000..7e20ed18
--- /dev/null
+++ b/agents/src/voice/index.ts
@@ -0,0 +1,5 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+export { Agent } from './agent.js';
+export { AgentSession } from './agent_session.js';
diff --git a/agents/src/voice/io.ts b/agents/src/voice/io.ts
new file mode 100644
index 00000000..0a79bf0a
--- /dev/null
+++ b/agents/src/voice/io.ts
@@ -0,0 +1,11 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import type { AudioFrame } from '@livekit/rtc-node';
+import type { ReadableStream } from 'node:stream/web';
+import type { SpeechEvent } from '../stt/stt.js';
+
+export type STTNode = (
+  audio: ReadableStream<AudioFrame>,
+  modelSettings: any, // TODO(shubhra): add type
+) => Promise<ReadableStream<SpeechEvent | string> | null>;
diff --git a/agents/src/voice/room_io.ts b/agents/src/voice/room_io.ts
new file mode 100644
index 00000000..5d60bbe0
--- /dev/null
+++ b/agents/src/voice/room_io.ts
@@ -0,0 +1,74 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import type { AudioFrame, Room } from '@livekit/rtc-node';
+import {
+  AudioSource,
+  AudioStream,
+  LocalAudioTrack,
+  type LocalTrackPublication,
+  type RemoteTrack,
+  RoomEvent,
+  TrackKind,
+  TrackPublishOptions,
+  TrackSource,
+} from '@livekit/rtc-node';
+import type { ReadableStream } from 'node:stream/web';
+import { log } from '../log.js';
+import { DeferredReadableStream } from '../stream/deferred_stream.js';
+import type { AgentSession } from './agent_session.js';
+
+export class RoomIO {
+  private agentSession: AgentSession;
+  private participantAudioInputStream: ReadableStream<AudioFrame>;
+  private logger = log();
+
+  private room: Room;
+
+  private _deferredAudioInputStream = new DeferredReadableStream<AudioFrame>();
+  private audioSource: AudioSource;
+  private publication?: LocalTrackPublication;
+
+  constructor(agentSession: AgentSession, room: Room, sampleRate: number, numChannels: number) {
+    this.agentSession = agentSession;
+    this.room = room;
+    this.participantAudioInputStream = this._deferredAudioInputStream.stream;
+    this.audioSource = new AudioSource(sampleRate, numChannels);
+
+    this.setupEventListeners();
+  }
+
+  private setupEventListeners() {
+    this.room.on(RoomEvent.TrackSubscribed, this.onTrackSubscribed);
+  }
+
+  private cleanup() {
+    this.room.off(RoomEvent.TrackSubscribed, this.onTrackSubscribed);
+  }
+
+  private onTrackSubscribed = (track: RemoteTrack) => {
+    if (track.kind === TrackKind.KIND_AUDIO) {
+      this._deferredAudioInputStream.setSource(
+        new AudioStream(track, {
+          // TODO(AJS-41) remove hardcoded sample rate
+          sampleRate: 16000,
+          numChannels: 1,
+        }),
+      );
+    }
+  };
+
+  private async publishTrack() {
+    const track = LocalAudioTrack.createAudioTrack('roomio_audio', this.audioSource);
+    this.publication = await this.room.localParticipant?.publishTrack(
+      track,
+      new TrackPublishOptions({ source: TrackSource.SOURCE_MICROPHONE }),
+    );
+  }
+
+  start() {
+    this.publishTrack();
+    this.agentSession.audioInput = this.participantAudioInputStream;
+    this.agentSession.audioOutput = this.audioSource;
+  }
+}
diff --git a/examples/src/basic_agent.ts b/examples/src/basic_agent.ts
new file mode 100644
index 00000000..12f5c56b
--- /dev/null
+++ b/examples/src/basic_agent.ts
@@ -0,0 +1,33 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import {
+  type JobContext,
+  type JobProcess,
+  WorkerOptions,
+  cli,
+  defineAgent,
+  voice,
+} from '@livekit/agents';
+import * as deepgram from '@livekit/agents-plugin-deepgram';
+import * as silero from '@livekit/agents-plugin-silero';
+import { fileURLToPath } from 'node:url';
+
+export default defineAgent({
+  prewarm: async (proc: JobProcess) => {
+    proc.userData.vad = await silero.VAD.load();
+  },
+  entry: async (ctx: JobContext) => {
+    const agent = new voice.Agent('test');
+    await ctx.connect();
+    const participant = await ctx.waitForParticipant();
+    console.log('participant joined: ', participant.identity);
+
+    const vad = ctx.proc.userData.vad! as silero.VAD;
+
+    const session = new voice.AgentSession(vad, new deepgram.STT());
+    session.start(agent, ctx.room);
+  },
+});
+
+cli.runApp(new WorkerOptions({ agent: fileURLToPath(import.meta.url) }));
diff --git a/plugins/cartesia/package.json b/plugins/cartesia/package.json
index 67adcf2b..38f3ef06 100644
--- a/plugins/cartesia/package.json
+++ b/plugins/cartesia/package.json
@@ -33,7 +33,7 @@
     "@livekit/agents": "workspace:^x",
     "@livekit/agents-plugin-openai": "workspace:^x",
     "@livekit/agents-plugins-test": "workspace:^x",
-    "@livekit/rtc-node": "^0.13.11",
+    "@livekit/rtc-node": "^0.13.12",
     "@microsoft/api-extractor": "^7.35.0",
     "@types/ws": "^8.5.10",
     "tsup": "^8.3.5",
@@ -44,6 +44,6 @@
   },
   "peerDependencies": {
     "@livekit/agents": "workspace:^x",
-    "@livekit/rtc-node": "^0.13.11"
+    "@livekit/rtc-node": "^0.13.12"
   }
 }
diff --git a/plugins/deepgram/package.json b/plugins/deepgram/package.json
index 0ef1e09a..0dc9d0aa 100644
--- a/plugins/deepgram/package.json
+++ b/plugins/deepgram/package.json
@@ -33,7 +33,7 @@
     "@livekit/agents": "workspace:^x",
     "@livekit/agents-plugin-silero": "workspace:^x",
     "@livekit/agents-plugins-test": "workspace:^x",
-    "@livekit/rtc-node": "^0.13.11",
+    "@livekit/rtc-node": "^0.13.12",
     "@microsoft/api-extractor": "^7.35.0",
     "@types/ws": "^8.5.10",
     "tsup": "^8.3.5",
@@ -44,6 +44,6 @@
   },
   "peerDependencies": {
     "@livekit/agents": "workspace:^x",
-    "@livekit/rtc-node": "^0.13.11"
+    "@livekit/rtc-node": "^0.13.12"
   }
 }
diff --git a/plugins/deepgram/src/stt.ts b/plugins/deepgram/src/stt.ts
index 560b26a7..5fe838da 100644
--- a/plugins/deepgram/src/stt.ts
+++ b/plugins/deepgram/src/stt.ts
@@ -125,7 +125,6 @@ export class SpeechStream extends stt.SpeechStream {
   constructor(stt: STT, opts: STTOptions) {
     super(stt);
     this.#opts = opts;
-    this.closed = false;
     this.#audioEnergyFilter = new AudioEnergyFilter();
 
     this.#run();
@@ -134,7 +133,7 @@ export class SpeechStream extends stt.SpeechStream {
   async #run(maxRetry = 32) {
     let retries = 0;
     let ws: WebSocket;
-    while (!this.input.closed) {
+    while (!this.inputClosed) {
       const streamURL = new URL(API_BASE_URL_V1);
       const params = {
         model: this.#opts.model,
@@ -193,7 +192,7 @@ export class SpeechStream extends stt.SpeechStream {
       }
     }
 
-    this.closed = true;
+    this.close();
   }
 
   updateOptions(opts: Partial<STTOptions>) {
@@ -222,7 +221,10 @@ export class SpeechStream extends stt.SpeechStream {
         samples100Ms,
       );
 
-      for await (const data of this.input) {
+      while (true) {
+        const { done, value: data } = await this.inputReader.read();
+        if (done) break;
+
         let frames: AudioFrame[];
         if (data === SpeechStream.FLUSH_SENTINEL) {
           frames = stream.flush();
@@ -270,7 +272,7 @@ export class SpeechStream extends stt.SpeechStream {
                 // It's also possible we receive a transcript without a SpeechStarted event.
                 if (this.#speaking) return;
                 this.#speaking = true;
-                this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
+                this.outputWriter.write({ type: stt.SpeechEventType.START_OF_SPEECH });
                 break;
               }
               // see this page:
@@ -288,16 +290,16 @@ export class SpeechStream extends stt.SpeechStream {
                 if (alternatives[0] && alternatives[0].text) {
                   if (!this.#speaking) {
                     this.#speaking = true;
-                    this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
+                    this.outputWriter.write({ type: stt.SpeechEventType.START_OF_SPEECH });
                   }
 
                   if (isFinal) {
-                    this.queue.put({
+                    this.outputWriter.write({
                       type: stt.SpeechEventType.FINAL_TRANSCRIPT,
                       alternatives: [alternatives[0], ...alternatives.slice(1)],
                     });
                   } else {
-                    this.queue.put({
+                    this.outputWriter.write({
                       type: stt.SpeechEventType.INTERIM_TRANSCRIPT,
                       alternatives: [alternatives[0], ...alternatives.slice(1)],
                     });
@@ -309,7 +311,7 @@ export class SpeechStream extends stt.SpeechStream {
                 // a non-empty transcript (deepgram doesn't have a SpeechEnded event)
                 if (isEndpoint && this.#speaking) {
                   this.#speaking = false;
-                  this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });
+                  this.outputWriter.write({ type: stt.SpeechEventType.END_OF_SPEECH });
                 }
 
                 break;
diff --git a/plugins/elevenlabs/package.json b/plugins/elevenlabs/package.json
index 758b4d75..7520d5b0 100644
--- a/plugins/elevenlabs/package.json
+++ b/plugins/elevenlabs/package.json
@@ -33,7 +33,7 @@
     "@livekit/agents": "workspace:^x",
     "@livekit/agents-plugin-openai": "workspace:^x",
     "@livekit/agents-plugins-test": "workspace:^x",
-    "@livekit/rtc-node": "^0.13.11",
+    "@livekit/rtc-node": "^0.13.12",
     "@microsoft/api-extractor": "^7.35.0",
     "@types/ws": "^8.5.10",
     "tsup": "^8.3.5",
@@ -44,6 +44,6 @@
   },
   "peerDependencies": {
     "@livekit/agents": "workspace:^x",
-    "@livekit/rtc-node": "^0.13.11"
+    "@livekit/rtc-node": "^0.13.12"
   }
 }
diff --git a/plugins/neuphonic/package.json b/plugins/neuphonic/package.json
index 1277156d..aafe1aa3 100644
--- a/plugins/neuphonic/package.json
+++ b/plugins/neuphonic/package.json
@@ -33,7 +33,7 @@
     "@livekit/agents": "workspace:^x",
     "@livekit/agents-plugin-openai": "workspace:^x",
     "@livekit/agents-plugins-test": "workspace:^x",
-    "@livekit/rtc-node": "^0.13.11",
+    "@livekit/rtc-node": "^0.13.12",
     "@microsoft/api-extractor": "^7.35.0",
     "@types/ws": "^8.5.10",
     "tsup": "^8.3.5",
@@ -44,6 +44,6 @@
   },
   "peerDependencies": {
     "@livekit/agents": "workspace:^x",
-    "@livekit/rtc-node": "^0.13.11"
+    "@livekit/rtc-node": "^0.13.12"      
   }
 }
diff --git a/plugins/openai/package.json b/plugins/openai/package.json
index 16cc6a89..0bc110ef 100644
--- a/plugins/openai/package.json
+++ b/plugins/openai/package.json
@@ -33,7 +33,7 @@
     "@livekit/agents": "workspace:^x",
     "@livekit/agents-plugin-silero": "workspace:^x",
     "@livekit/agents-plugins-test": "workspace:^x",
-    "@livekit/rtc-node": "^0.13.11",
+    "@livekit/rtc-node": "^0.13.12",
     "@microsoft/api-extractor": "^7.35.0",
     "@types/ws": "^8.5.10",
     "tsup": "^8.3.5",
@@ -46,6 +46,6 @@
   },
   "peerDependencies": {
     "@livekit/agents": "workspace:^x",
-    "@livekit/rtc-node": "^0.13.11"
+    "@livekit/rtc-node": "^0.13.12"
   }
 }
diff --git a/plugins/resemble/package.json b/plugins/resemble/package.json
index 3bd876b5..75aa1d28 100644
--- a/plugins/resemble/package.json
+++ b/plugins/resemble/package.json
@@ -33,7 +33,7 @@
     "@livekit/agents": "workspace:^",
     "@livekit/agents-plugin-openai": "workspace:^",
     "@livekit/agents-plugins-test": "workspace:^",
-    "@livekit/rtc-node": "^0.13.11",
+    "@livekit/rtc-node": "^0.13.12",
     "@microsoft/api-extractor": "^7.35.0",
     "@types/ws": "^8.5.10",
     "tsup": "^8.3.5",
@@ -44,6 +44,6 @@
   },
   "peerDependencies": {
     "@livekit/agents": "workspace:^x",
-    "@livekit/rtc-node": "^0.13.11"
+    "@livekit/rtc-node": "^0.13.12"
   }
 }
diff --git a/plugins/silero/package.json b/plugins/silero/package.json
index ad93456b..f960af4f 100644
--- a/plugins/silero/package.json
+++ b/plugins/silero/package.json
@@ -31,7 +31,7 @@
   },
   "devDependencies": {
     "@livekit/agents": "workspace:^x",
-    "@livekit/rtc-node": "^0.13.11",
+    "@livekit/rtc-node": "^0.13.12",
     "@microsoft/api-extractor": "^7.35.0",
     "@types/ws": "^8.5.10",
     "onnxruntime-common": "^1.19.2",
@@ -44,6 +44,6 @@
   },
   "peerDependencies": {
     "@livekit/agents": "workspace:^x",
-    "@livekit/rtc-node": "^0.13.11"
+    "@livekit/rtc-node": "^0.13.12"
   }
 }
diff --git a/plugins/silero/src/vad.ts b/plugins/silero/src/vad.ts
index 6d38462b..1e42aef5 100644
--- a/plugins/silero/src/vad.ts
+++ b/plugins/silero/src/vad.ts
@@ -157,7 +157,12 @@ export class VADStream extends baseStream {
       // used to avoid drift when the sampleRate ratio is not an integer
       let inputCopyRemainingFrac = 0.0;
 
-      for await (const frame of this.input) {
+      while (true) {
+        const { done, value: frame } = await this.inputReader.read();
+        if (done) {
+          break;
+        }
+
         if (typeof frame === 'symbol') {
           continue; // ignore flush sentinel for now
         }
@@ -255,7 +260,7 @@ export class VADStream extends baseStream {
             pubSilenceDuration += inferenceDuration;
           }
 
-          this.queue.put({
+          this.outputWriter.write({
             type: VADEventType.INFERENCE_DONE,
             samplesIndex: pubCurrentSample,
             timestamp: pubTimestamp,
@@ -309,7 +314,7 @@ export class VADStream extends baseStream {
               pubSilenceDuration = 0;
               pubSpeechDuration = speechThresholdDuration;
 
-              this.queue.put({
+              this.outputWriter.write({
                 type: VADEventType.START_OF_SPEECH,
                 samplesIndex: pubCurrentSample,
                 timestamp: pubTimestamp,
@@ -336,7 +341,7 @@ export class VADStream extends baseStream {
               pubSpeechDuration = 0;
               pubSilenceDuration = silenceThresholdDuration;
 
-              this.queue.put({
+              this.outputWriter.write({
                 type: VADEventType.END_OF_SPEECH,
                 samplesIndex: pubCurrentSample,
                 timestamp: pubTimestamp,
diff --git a/plugins/test/package.json b/plugins/test/package.json
index 5437414e..eae6cf6d 100644
--- a/plugins/test/package.json
+++ b/plugins/test/package.json
@@ -28,7 +28,7 @@
   },
   "devDependencies": {
     "@livekit/agents": "workspace:^x",
-    "@livekit/rtc-node": "^0.13.11",
+    "@livekit/rtc-node": "^0.13.12",
     "@types/node": "^22.5.5",
     "tsup": "^8.3.5",
     "typescript": "^5.0.0"
@@ -40,6 +40,6 @@
   },
   "peerDependencies": {
     "@livekit/agents": "workspace:^x",
-    "@livekit/rtc-node": "^0.13.11"
+    "@livekit/rtc-node": "^0.13.12"
   }
 }
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 24dfbb43..881fedc0 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -107,7 +107,7 @@ importers:
     devDependencies:
       '@livekit/rtc-node':
         specifier: ^0.13.12
-        version: 0.13.12
+        version: 0.13.13
       '@microsoft/api-extractor':
         specifier: ^7.35.0
         version: 7.43.7(@types/node@22.5.5)
@@ -146,7 +146,7 @@ importers:
         version: link:../plugins/silero
       '@livekit/rtc-node':
         specifier: ^0.13.11
-        version: 0.13.11
+        version: 0.13.13
       livekit-server-sdk:
         specifier: ^2.9.2
         version: 2.9.2
@@ -180,8 +180,8 @@ importers:
         specifier: workspace:^x
         version: link:../test
       '@livekit/rtc-node':
-        specifier: ^0.13.11
-        version: 0.13.11
+        specifier: ^0.13.12
+        version: 0.13.13
       '@microsoft/api-extractor':
         specifier: ^7.35.0
         version: 7.43.7(@types/node@22.5.5)
@@ -211,8 +211,8 @@ importers:
         specifier: workspace:^x
         version: link:../test
       '@livekit/rtc-node':
-        specifier: ^0.13.11
-        version: 0.13.11
+        specifier: ^0.13.12
+        version: 0.13.13
       '@microsoft/api-extractor':
         specifier: ^7.35.0
         version: 7.43.7(@types/node@22.5.5)
@@ -242,8 +242,8 @@ importers:
         specifier: workspace:^x
         version: link:../test
       '@livekit/rtc-node':
-        specifier: ^0.13.11
-        version: 0.13.11
+        specifier: ^0.13.12
+        version: 0.13.13
       '@microsoft/api-extractor':
         specifier: ^7.35.0
         version: 7.43.7(@types/node@22.5.5)
@@ -298,8 +298,8 @@ importers:
         specifier: workspace:^x
         version: link:../test
       '@livekit/rtc-node':
-        specifier: ^0.13.11
-        version: 0.13.11
+        specifier: ^0.13.12
+        version: 0.13.13
       '@microsoft/api-extractor':
         specifier: ^7.35.0
         version: 7.43.7(@types/node@22.5.5)
@@ -335,8 +335,8 @@ importers:
         specifier: workspace:^x
         version: link:../test
       '@livekit/rtc-node':
-        specifier: ^0.13.11
-        version: 0.13.11
+        specifier: ^0.13.12
+        version: 0.13.13
       '@microsoft/api-extractor':
         specifier: ^7.35.0
         version: 7.43.7(@types/node@22.5.5)
@@ -366,8 +366,8 @@ importers:
         specifier: workspace:^
         version: link:../test
       '@livekit/rtc-node':
-        specifier: ^0.13.11
-        version: 0.13.11
+        specifier: ^0.13.12
+        version: 0.13.13
       '@microsoft/api-extractor':
         specifier: ^7.35.0
         version: 7.43.7(@types/node@22.5.5)
@@ -394,8 +394,8 @@ importers:
         specifier: workspace:^x
         version: link:../../agents
       '@livekit/rtc-node':
-        specifier: ^0.13.11
-        version: 0.13.11
+        specifier: ^0.13.12
+        version: 0.13.13
       '@microsoft/api-extractor':
         specifier: ^7.35.0
         version: 7.43.7(@types/node@22.5.5)
@@ -428,8 +428,8 @@ importers:
         specifier: workspace:^x
         version: link:../../agents
       '@livekit/rtc-node':
-        specifier: ^0.13.11
-        version: 0.13.11
+        specifier: ^0.13.12
+        version: 0.13.13
       '@types/node':
         specifier: ^22.5.5
         version: 22.5.5
@@ -1329,72 +1329,38 @@ packages:
   '@livekit/protocol@1.29.1':
     resolution: {integrity: sha512-OhxXTZlyM5f4ydnAq1p5azzzOtKWmIoCSVtyVj9rgE42zQI5JM1rR9pubVRZovouGSvEDSJx9yL4Js2IlIyM1Q==}
 
-  '@livekit/rtc-node-darwin-arm64@0.13.11':
-    resolution: {integrity: sha512-XqbVUW5rVrRdVzxUI3+f8K6A1bnzAXytbCmPx7YiGOXVNRCV1kC84R7fap7OgrgN/rAtObhyYK882xdJVG/BYA==}
+  '@livekit/rtc-node-darwin-arm64@0.13.13':
+    resolution: {integrity: sha512-iyjWwgr7JKTHa+YX1aCiKPT91Zk/snnBWOrWIJz9qq9X3cDvKVFjxOpJF1wVKPHBWE1dwDmNpSry/tltiUilZQ==}
     engines: {node: '>= 10'}
     cpu: [arm64]
     os: [darwin]
 
-  '@livekit/rtc-node-darwin-arm64@0.13.12':
-    resolution: {integrity: sha512-R13dfyNc3CwIAmmTo3rMO6d5HRddkzwjZ7RRkak8uYUMORMBcIrNZIOSAdRDo6kylLDvD7dzVXBQeqHVP/XBsg==}
-    engines: {node: '>= 10'}
-    cpu: [arm64]
-    os: [darwin]
-
-  '@livekit/rtc-node-darwin-x64@0.13.11':
-    resolution: {integrity: sha512-UFe9Lp+7Z8UZcJq2oOH8+6nCKWlX0PVorB4jeCRZuVa4QL2PL1CcGvo9/kNNw5aA25AkPUgDjMXj2WbfEPNMKA==}
-    engines: {node: '>= 10'}
-    cpu: [x64]
-    os: [darwin]
-
-  '@livekit/rtc-node-darwin-x64@0.13.12':
-    resolution: {integrity: sha512-fStjTQID0N4c6nX3Ii8JuADd1doP8hcXF0kcCQ7yG+yBZQxKpVQ9r66yxplxJSR0bpUd3NPf71GFbAVfWr+7eg==}
+  '@livekit/rtc-node-darwin-x64@0.13.13':
+    resolution: {integrity: sha512-MmmDIUO85D4Mj1vQqVhTBXoP64kfd4HMeTGI8mPxsqlzoj4M80/N0um/dW+in5RuwvMZ6AMHgVKuWK4nKxf7CQ==}
     engines: {node: '>= 10'}
     cpu: [x64]
     os: [darwin]
 
-  '@livekit/rtc-node-linux-arm64-gnu@0.13.11':
-    resolution: {integrity: sha512-GuJtl1nJhJnFEMI9plJqlIJ0BJCWuynJzbhhD7Yd/Zuw/NYzzzIf+wQ2mIZ0Zk9/EUV4oMYJqacJiZXvOvUQ3A==}
-    engines: {node: '>= 10'}
-    cpu: [arm64]
-    os: [linux]
-
-  '@livekit/rtc-node-linux-arm64-gnu@0.13.12':
-    resolution: {integrity: sha512-QZQQk1lXSGSXXR6Bw1W+nbuSJL2m3OkhTQIvpvF0iz12Q5Ck3PVHEIgGoJbPB1qTiIyRDjKTGsZyJt6XJZJtsA==}
+  '@livekit/rtc-node-linux-arm64-gnu@0.13.13':
+    resolution: {integrity: sha512-Jl3Wm0qDyiag+20BDIHbV6Gk50VgSffxyTWYxw1Ecns1UZCVlB+1V5U7O6dcnCe/Wk/n/RxR8sFtTpvbNudnbw==}
     engines: {node: '>= 10'}
     cpu: [arm64]
     os: [linux]
 
-  '@livekit/rtc-node-linux-x64-gnu@0.13.11':
-    resolution: {integrity: sha512-Zi7Elg29JSmDzikxL2Q9YAAka2Khi7GwYHYBv69W6XXHqz3MN4wtnUGShclmqC7aITkHF0tVNLHdexFmMc3trA==}
+  '@livekit/rtc-node-linux-x64-gnu@0.13.13':
+    resolution: {integrity: sha512-B/SgbeBRobpA5LqmDEoBJHpRXePpoF4RO4F0zJf9BdkDhOR0j77p6hD0ZiOuPTRoBzUqukpsTszp+lZnHoNmiA==}
     engines: {node: '>= 10'}
     cpu: [x64]
     os: [linux]
 
-  '@livekit/rtc-node-linux-x64-gnu@0.13.12':
-    resolution: {integrity: sha512-EoP2EhdASL9ZqpkDzjtrVvae/Jv9ATH4vKHseVFOveHfw5gSOirFFwqeoibERUgAzHaV1qfA3AmZlS/ErG+iBw==}
-    engines: {node: '>= 10'}
-    cpu: [x64]
-    os: [linux]
-
-  '@livekit/rtc-node-win32-x64-msvc@0.13.11':
-    resolution: {integrity: sha512-NYemYGbc271SFv+ttYaNvEKLWmwkJqn988xOasq+lWd31kuhj0krR2cThM07HKhgoQwOfdyIcSV87b0mjPzb/A==}
-    engines: {node: '>= 10'}
-    cpu: [x64]
-    os: [win32]
-
-  '@livekit/rtc-node-win32-x64-msvc@0.13.12':
-    resolution: {integrity: sha512-O4M5s1c/DvP9PLc1ac5cDdp9YKZOmnPLHmDG5hmHFzzrnT9YT+1PNXkvl7xDIG7YUHTSlwJyIdMvQ6CGmQkY3w==}
+  '@livekit/rtc-node-win32-x64-msvc@0.13.13':
+    resolution: {integrity: sha512-ygVYV4eHczs3QdaW/p0ADhhm7InUDhFaCYk8OzzIn056ZibZPXzvPizCThZqs8VsDniA01MraZF3qhZZb8IyRg==}
     engines: {node: '>= 10'}
     cpu: [x64]
     os: [win32]
 
-  '@livekit/rtc-node@0.13.11':
-    resolution: {integrity: sha512-yq9uNRK+cdee0W6w0HPMSjTHovUteY4t4ZFrTdmpNt7fg/VxaJkdpXaG7cg8t+RX0pBT/NHskSQ4WFrqumezZg==}
-    engines: {node: '>= 18'}
-
-  '@livekit/rtc-node@0.13.12':
-    resolution: {integrity: sha512-WjfCrTS2tqQgsP250GzgrSsCXq507p6V9FwXR3djv2AYrBWEXal3FwhXIo5LqKQhL17HP1XxBmupVPbN6HV5nQ==}
+  '@livekit/rtc-node@0.13.13':
+    resolution: {integrity: sha512-aB6i46dLWX+nj7VdkirZho2XemcinlJvb0YY1RZj1EZTkNsmKVS6BUDmYk0njLqr5862jazkxGPwG6Zmh3e2kw==}
     engines: {node: '>= 18'}
 
   '@livekit/typed-emitter@3.0.0':
@@ -5214,51 +5180,22 @@ snapshots:
     dependencies:
       '@bufbuild/protobuf': 1.10.0
 
-  '@livekit/rtc-node-darwin-arm64@0.13.11':
-    optional: true
-
-  '@livekit/rtc-node-darwin-arm64@0.13.12':
-    optional: true
-
-  '@livekit/rtc-node-darwin-x64@0.13.11':
+  '@livekit/rtc-node-darwin-arm64@0.13.13':
     optional: true
 
-  '@livekit/rtc-node-darwin-x64@0.13.12':
+  '@livekit/rtc-node-darwin-x64@0.13.13':
     optional: true
 
-  '@livekit/rtc-node-linux-arm64-gnu@0.13.11':
+  '@livekit/rtc-node-linux-arm64-gnu@0.13.13':
     optional: true
 
-  '@livekit/rtc-node-linux-arm64-gnu@0.13.12':
+  '@livekit/rtc-node-linux-x64-gnu@0.13.13':
     optional: true
 
-  '@livekit/rtc-node-linux-x64-gnu@0.13.11':
+  '@livekit/rtc-node-win32-x64-msvc@0.13.13':
     optional: true
 
-  '@livekit/rtc-node-linux-x64-gnu@0.13.12':
-    optional: true
-
-  '@livekit/rtc-node-win32-x64-msvc@0.13.11':
-    optional: true
-
-  '@livekit/rtc-node-win32-x64-msvc@0.13.12':
-    optional: true
-
-  '@livekit/rtc-node@0.13.11':
-    dependencies:
-      '@bufbuild/protobuf': 1.10.0
-      '@livekit/mutex': 1.1.1
-      '@livekit/typed-emitter': 3.0.0
-      pino: 9.6.0
-      pino-pretty: 13.0.0
-    optionalDependencies:
-      '@livekit/rtc-node-darwin-arm64': 0.13.11
-      '@livekit/rtc-node-darwin-x64': 0.13.11
-      '@livekit/rtc-node-linux-arm64-gnu': 0.13.11
-      '@livekit/rtc-node-linux-x64-gnu': 0.13.11
-      '@livekit/rtc-node-win32-x64-msvc': 0.13.11
-
-  '@livekit/rtc-node@0.13.12':
+  '@livekit/rtc-node@0.13.13':
     dependencies:
       '@bufbuild/protobuf': 1.10.0
       '@livekit/mutex': 1.1.1
@@ -5266,11 +5203,11 @@ snapshots:
       pino: 9.6.0
       pino-pretty: 13.0.0
     optionalDependencies:
-      '@livekit/rtc-node-darwin-arm64': 0.13.12
-      '@livekit/rtc-node-darwin-x64': 0.13.12
-      '@livekit/rtc-node-linux-arm64-gnu': 0.13.12
-      '@livekit/rtc-node-linux-x64-gnu': 0.13.12
-      '@livekit/rtc-node-win32-x64-msvc': 0.13.12
+      '@livekit/rtc-node-darwin-arm64': 0.13.13
+      '@livekit/rtc-node-darwin-x64': 0.13.13
+      '@livekit/rtc-node-linux-arm64-gnu': 0.13.13
+      '@livekit/rtc-node-linux-x64-gnu': 0.13.13
+      '@livekit/rtc-node-win32-x64-msvc': 0.13.13
 
   '@livekit/typed-emitter@3.0.0': {}