Skip to content

Gary/llmobs java sdk integration #8519

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 20 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ private HttpUrl getAgentlessUrl(Intake intake) {

public enum Intake {
API("api", "v2", Config::isCiVisibilityAgentlessEnabled, Config::getCiVisibilityAgentlessUrl),
LLMOBS_API("api", "v2", Config::isLlmObsAgentlessEnabled, Config::getLlMObsAgentlessUrl),
LOGS(
"http-intake.logs",
"v2",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import datadog.trace.api.config.GeneralConfig;
import datadog.trace.api.config.IastConfig;
import datadog.trace.api.config.JmxFetchConfig;
import datadog.trace.api.config.LlmObsConfig;
import datadog.trace.api.config.ProfilingConfig;
import datadog.trace.api.config.RemoteConfigConfig;
import datadog.trace.api.config.TraceInstrumentationConfig;
Expand All @@ -41,6 +42,7 @@
import datadog.trace.bootstrap.instrumentation.api.AgentTracer;
import datadog.trace.bootstrap.instrumentation.api.AgentTracer.TracerAPI;
import datadog.trace.bootstrap.instrumentation.api.ProfilingContextIntegration;
import datadog.trace.bootstrap.instrumentation.api.WriterConstants;
import datadog.trace.bootstrap.instrumentation.jfr.InstrumentationBasedProfiling;
import datadog.trace.util.AgentTaskScheduler;
import datadog.trace.util.AgentThreadFactory.AgentThread;
Expand Down Expand Up @@ -109,7 +111,9 @@ private enum AgentFeature {
EXCEPTION_REPLAY(DebuggerConfig.EXCEPTION_REPLAY_ENABLED, false),
CODE_ORIGIN(TraceInstrumentationConfig.CODE_ORIGIN_FOR_SPANS_ENABLED, false),
DATA_JOBS(GeneralConfig.DATA_JOBS_ENABLED, false),
AGENTLESS_LOG_SUBMISSION(GeneralConfig.AGENTLESS_LOG_SUBMISSION_ENABLED, false);
AGENTLESS_LOG_SUBMISSION(GeneralConfig.AGENTLESS_LOG_SUBMISSION_ENABLED, false),
LLMOBS(LlmObsConfig.LLMOBS_ENABLED, false),
LLMOBS_AGENTLESS(LlmObsConfig.LLMOBS_AGENTLESS_ENABLED, false);

private final String configKey;
private final String systemProp;
Expand Down Expand Up @@ -156,6 +160,8 @@ public boolean isEnabledByDefault() {
private static boolean iastFullyDisabled;
private static boolean cwsEnabled = false;
private static boolean ciVisibilityEnabled = false;
private static boolean llmObsEnabled = false;
private static boolean llmObsAgentlessEnabled = false;
private static boolean usmEnabled = false;
private static boolean telemetryEnabled = true;
private static boolean dynamicInstrumentationEnabled = false;
Expand Down Expand Up @@ -292,6 +298,25 @@ public static void start(
exceptionReplayEnabled = isFeatureEnabled(AgentFeature.EXCEPTION_REPLAY);
codeOriginEnabled = isFeatureEnabled(AgentFeature.CODE_ORIGIN);
agentlessLogSubmissionEnabled = isFeatureEnabled(AgentFeature.AGENTLESS_LOG_SUBMISSION);
llmObsEnabled = isFeatureEnabled(AgentFeature.LLMOBS);

// setup writers when llmobs is enabled to accomodate apm and llmobs
if (llmObsEnabled) {
// for llm obs spans, use agent proxy by default, apm spans will use agent writer
setSystemPropertyDefault(
propertyNameToSystemPropertyName(TracerConfig.WRITER_TYPE),
WriterConstants.MULTI_WRITER_TYPE
+ ":"
+ WriterConstants.DD_INTAKE_WRITER_TYPE
+ ","
+ WriterConstants.DD_AGENT_WRITER_TYPE);
if (llmObsAgentlessEnabled) {
// use API writer only
setSystemPropertyDefault(
propertyNameToSystemPropertyName(TracerConfig.WRITER_TYPE),
WriterConstants.DD_INTAKE_WRITER_TYPE);
}
}

patchJPSAccess(inst);

Expand Down Expand Up @@ -599,6 +624,7 @@ public void execute() {

maybeStartAppSec(scoClass, sco);
maybeStartCiVisibility(instrumentation, scoClass, sco);
maybeStartLLMObs(instrumentation, scoClass, sco);
// start debugger before remote config to subscribe to it before starting to poll
maybeStartDebugger(instrumentation, scoClass, sco);
maybeStartRemoteConfig(scoClass, sco);
Expand Down Expand Up @@ -954,6 +980,24 @@ private static void maybeStartCiVisibility(Instrumentation inst, Class<?> scoCla
}
}

private static void maybeStartLLMObs(Instrumentation inst, Class<?> scoClass, Object sco) {
if (llmObsEnabled) {
StaticEventLogger.begin("LLM Observability");

try {
final Class<?> llmObsSysClass =
AGENT_CLASSLOADER.loadClass("datadog.trace.llmobs.LLMObsSystem");
final Method llmObsInstallerMethod =
llmObsSysClass.getMethod("start", Instrumentation.class, scoClass);
llmObsInstallerMethod.invoke(null, inst, sco);
} catch (final Throwable e) {
log.warn("Not starting LLM Observability subsystem", e);
}

StaticEventLogger.end("LLM Observability");
}
}

private static void maybeInstallLogsIntake(Class<?> scoClass, Object sco) {
if (agentlessLogSubmissionEnabled) {
StaticEventLogger.begin("Logs Intake");
Expand Down
2 changes: 1 addition & 1 deletion dd-java-agent/agent-jmxfetch/integrations-core
43 changes: 43 additions & 0 deletions dd-java-agent/agent-llmobs/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
buildscript {
repositories {
mavenCentral()
}

dependencies {
classpath group: 'org.jetbrains.kotlin', name: 'kotlin-gradle-plugin', version: libs.versions.kotlin.get()
}
}

plugins {
id 'com.github.johnrengelman.shadow'
id 'java-test-fixtures'
}

apply from: "$rootDir/gradle/java.gradle"
apply from: "$rootDir/gradle/version.gradle"
apply from: "$rootDir/gradle/test-with-kotlin.gradle"

minimumBranchCoverage = 0.0
minimumInstructionCoverage = 0.0

dependencies {
api libs.slf4j
implementation libs.jctools

implementation project(':communication')
implementation project(':components:json')
implementation project(':internal-api')

testImplementation project(":utils:test-utils")

testFixturesApi project(':dd-java-agent:testing')
testFixturesApi project(':utils:test-utils')
}

shadowJar {
dependencies deps.excludeShared
}

jar {
archiveClassifier = 'unbundled'
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
package datadog.trace.llmobs;

import static datadog.trace.util.AgentThreadFactory.AgentThread.LLMOBS_EVALS_PROCESSOR;
import static datadog.trace.util.AgentThreadFactory.THREAD_JOIN_TIMOUT_MS;
import static datadog.trace.util.AgentThreadFactory.newAgentThread;

import com.squareup.moshi.JsonAdapter;
import com.squareup.moshi.Moshi;
import datadog.communication.ddagent.DDAgentFeaturesDiscovery;
import datadog.communication.ddagent.SharedCommunicationObjects;
import datadog.communication.http.HttpRetryPolicy;
import datadog.communication.http.OkHttpUtils;
import datadog.trace.api.Config;
import datadog.trace.llmobs.domain.LLMObsEval;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import okhttp3.Headers;
import okhttp3.HttpUrl;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.RequestBody;
import org.jctools.queues.MpscBlockingConsumerArrayQueue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class EvalProcessingWorker implements AutoCloseable {

private static final String EVAL_METRIC_API_DOMAIN = "api";
private static final String EVAL_METRIC_API_PATH = "api/intake/llm-obs/v1/eval-metric";

private static final String EVP_SUBDOMAIN_HEADER_NAME = "X-Datadog-EVP-Subdomain";
private static final String DD_API_KEY_HEADER_NAME = "DD-API-KEY";

private static final Logger log = LoggerFactory.getLogger(EvalProcessingWorker.class);

private final MpscBlockingConsumerArrayQueue<LLMObsEval> queue;
private final Thread serializerThread;

public EvalProcessingWorker(
final int capacity,
final long flushInterval,
final TimeUnit timeUnit,
final SharedCommunicationObjects sco,
Config config) {
this.queue = new MpscBlockingConsumerArrayQueue<>(capacity);

boolean isAgentless = config.isLlmObsAgentlessEnabled();
if (isAgentless && (config.getApiKey() == null || config.getApiKey().isEmpty())) {
log.error("Agentless eval metric submission requires an API key");
}

Headers headers;
HttpUrl submissionUrl;
if (isAgentless) {
submissionUrl =
HttpUrl.get(
"https://"
+ EVAL_METRIC_API_DOMAIN
+ "."
+ config.getSite()
+ "/"
+ EVAL_METRIC_API_PATH);
headers = Headers.of(DD_API_KEY_HEADER_NAME, config.getApiKey());
} else {
submissionUrl =
HttpUrl.get(
sco.agentUrl.toString()
+ DDAgentFeaturesDiscovery.V2_EVP_PROXY_ENDPOINT
+ EVAL_METRIC_API_PATH);
headers = Headers.of(EVP_SUBDOMAIN_HEADER_NAME, EVAL_METRIC_API_DOMAIN);
}

EvalSerializingHandler serializingHandler =
new EvalSerializingHandler(queue, flushInterval, timeUnit, submissionUrl, headers);
this.serializerThread = newAgentThread(LLMOBS_EVALS_PROCESSOR, serializingHandler);
}

public void start() {
this.serializerThread.start();
}

public boolean addToQueue(final LLMObsEval eval) {
return queue.offer(eval);
}

@Override
public void close() {
serializerThread.interrupt();
try {
serializerThread.join(THREAD_JOIN_TIMOUT_MS);
} catch (InterruptedException ignored) {
}
}

public static class EvalSerializingHandler implements Runnable {

private static final Logger log = LoggerFactory.getLogger(EvalSerializingHandler.class);
private static final int FLUSH_THRESHOLD = 50;

private final MpscBlockingConsumerArrayQueue<LLMObsEval> queue;
private final long ticksRequiredToFlush;
private long lastTicks;

private final Moshi moshi;
private final JsonAdapter<LLMObsEval.Request> evalJsonAdapter;
private final OkHttpClient httpClient;
private final HttpUrl submissionUrl;
private final Headers headers;

private final List<LLMObsEval> buffer = new ArrayList<>();

public EvalSerializingHandler(
final MpscBlockingConsumerArrayQueue<LLMObsEval> queue,
final long flushInterval,
final TimeUnit timeUnit,
final HttpUrl submissionUrl,
final Headers headers) {
this.queue = queue;
this.moshi = new Moshi.Builder().add(LLMObsEval.class, new LLMObsEval.Adapter()).build();

this.evalJsonAdapter = moshi.adapter(LLMObsEval.Request.class);
this.httpClient = new OkHttpClient();
this.submissionUrl = submissionUrl;
this.headers = headers;

this.lastTicks = System.nanoTime();
this.ticksRequiredToFlush = timeUnit.toNanos(flushInterval);

log.debug("starting eval metric serializer, url={}", submissionUrl);
}

@Override
public void run() {
try {
runDutyCycle();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
log.debug(
"eval processor worker exited. submitting evals stopped. unsubmitted evals left: "
+ !queuesAreEmpty());
}

private void runDutyCycle() throws InterruptedException {
Thread thread = Thread.currentThread();
while (!thread.isInterrupted()) {
consumeBatch();
flushIfNecessary();
}
}

private void consumeBatch() {
queue.drain(buffer::add, queue.size());
}

protected void flushIfNecessary() {
if (buffer.isEmpty()) {
return;
}
if (shouldFlush()) {
LLMObsEval.Request llmobsEvalReq = new LLMObsEval.Request(this.buffer);
HttpRetryPolicy.Factory retryPolicyFactory = new HttpRetryPolicy.Factory(5, 100, 2.0, true);

String reqBod = evalJsonAdapter.toJson(llmobsEvalReq);

RequestBody requestBody =
RequestBody.create(okhttp3.MediaType.parse("application/json"), reqBod);
Request request =
new Request.Builder().headers(headers).url(submissionUrl).post(requestBody).build();

try (okhttp3.Response response =
OkHttpUtils.sendWithRetries(httpClient, retryPolicyFactory, request)) {

if (response.isSuccessful()) {
log.debug("successfully flushed evaluation request with {} evals", this.buffer.size());
this.buffer.clear();
} else {
log.error(
"Could not submit eval metrics (HTTP code "
+ response.code()
+ ")"
+ (response.body() != null ? ": " + response.body().string() : ""));
}
} catch (Exception e) {
log.error("Could not submit eval metrics", e);
}
}
}

private boolean shouldFlush() {
long nanoTime = System.nanoTime();
long ticks = nanoTime - lastTicks;
if (ticks > ticksRequiredToFlush || queue.size() >= FLUSH_THRESHOLD) {
lastTicks = nanoTime;
return true;
}
return false;
}

protected boolean queuesAreEmpty() {
return queue.isEmpty();
}
}
}
Loading
Loading