PySymGym · gsvgit · Apr 8, 2025 · Apr 4, 2025 · Apr 4, 2025 · Apr 4, 2025
diff --git a/VSharp.API/VSharp.cs b/VSharp.API/VSharp.cs
@@ -191,10 +191,10 @@ private static Statistics StartExploration(
                     stopOnCoverageAchieved: 100,
                     randomSeed: options.RandomSeed,
                     stepsLimit: options.StepsLimit,
-                    aiAgentTrainingOptions: options.AIAgentTrainingOptions == null ? FSharpOption<AIAgentTrainingOptions>.None : FSharpOption<AIAgentTrainingOptions>.Some(options.AIAgentTrainingOptions),
+                    aiOptions: options.AIOptions == null ? FSharpOption<AIOptions>.None : FSharpOption<AIOptions>.Some(options.AIOptions),
                     pathToModel: options.PathToModel == null ? FSharpOption<string>.None : FSharpOption<string>.Some(options.PathToModel),
-                    useGPU: options.UseGPU == null ? FSharpOption<bool>.None : FSharpOption<bool>.Some(options.UseGPU),
-                    optimize: options.Optimize == null ? FSharpOption<bool>.None : FSharpOption<bool>.Some(options.Optimize)
+                    useGPU: options.UseGPU,
+                    optimize: options.Optimize
                     );
 
             var fuzzerOptions =

diff --git a/VSharp.API/VSharpOptions.cs b/VSharp.API/VSharpOptions.cs
@@ -113,7 +113,7 @@
     public readonly bool ReleaseBranches = DefaultReleaseBranches;
     public readonly int RandomSeed = DefaultRandomSeed;
     public readonly uint StepsLimit = DefaultStepsLimit;
-    public readonly AIAgentTrainingOptions AIAgentTrainingOptions = null;
+    public readonly AIOptions? AIOptions = null;
     public readonly string PathToModel = DefaultPathToModel;
     public readonly bool UseGPU = false;
     public readonly bool Optimize = false;
@@ -133,7 +133,7 @@
     /// <param name="releaseBranches">If true and timeout is specified, a part of allotted time in the end is given to execute remaining states without branching.</param>
     /// <param name="randomSeed">Fixed seed for random operations. Used if greater than or equal to zero.</param>
     /// <param name="stepsLimit">Number of symbolic machine steps to stop execution after. Zero value means no limit.</param>
-    /// <param name="aiAgentTrainingOptions">Settings for AI searcher training.</param>
+    /// <param name="aiOptions">Settings for AI searcher training.</param>
     /// <param name="pathToModel">Path to ONNX file with model to use in AI searcher.</param>
     /// <param name="useGPU">Specifies whether the ONNX execution session should use a CUDA-enabled GPU.</param>
     /// <param name="optimize">Enabling options like parallel execution and various graph transformations to enhance performance of ONNX.</param>
@@ -150,7 +150,7 @@
         bool releaseBranches = DefaultReleaseBranches,
         int randomSeed = DefaultRandomSeed,
         uint stepsLimit = DefaultStepsLimit,
-        AIAgentTrainingOptions aiAgentTrainingOptions = null,
+        AIOptions? aiOptions = null,
         string pathToModel = DefaultPathToModel,
         bool useGPU = false,
         bool optimize = false)
@@ -167,7 +167,7 @@
         ReleaseBranches = releaseBranches;
         RandomSeed = randomSeed;
         StepsLimit = stepsLimit;
-        AIAgentTrainingOptions = aiAgentTrainingOptions;
+        AIOptions = aiOptions;
         PathToModel = pathToModel;
         UseGPU = useGPU;
         Optimize = optimize;

diff --git a/VSharp.Explorer/AISearcher.fs b/VSharp.Explorer/AISearcher.fs
@@ -2,20 +2,31 @@ namespace VSharp.Explorer
 
 open System.Collections.Generic
 open Microsoft.ML.OnnxRuntime
+open System
+open System.Text
+open System.Text.Json
 open VSharp
 open VSharp.IL.Serializer
 open VSharp.ML.GameServer.Messages
 
-type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentTrainingOptions>) =
+type AIMode =
+    | Runner
+    | TrainingSendModel
+    | TrainingSendEachStep
+
+
+type internal AISearcher(oracle: Oracle, aiAgentTrainingMode: Option<AIAgentTrainingMode>) =
     let stepsToSwitchToAI =
-        match aiAgentTrainingOptions with
+        match aiAgentTrainingMode with
         | None -> 0u<step>
-        | Some options -> options.stepsToSwitchToAI
+        | Some(SendModel options) -> options.aiAgentTrainingOptions.stepsToSwitchToAI
+        | Some(SendEachStep options) -> options.aiAgentTrainingOptions.stepsToSwitchToAI
 
     let stepsToPlay =
-        match aiAgentTrainingOptions with
+        match aiAgentTrainingMode with
         | None -> 0u<step>
-        | Some options -> options.stepsToPlay
+        | Some(SendModel options) -> options.aiAgentTrainingOptions.stepsToPlay
+        | Some(SendEachStep options) -> options.aiAgentTrainingOptions.stepsToPlay
 
     let mutable lastCollectedStatistics = Statistics()
     let mutable defaultSearcherSteps = 0u<step>
@@ -25,14 +36,17 @@ type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentT
     let mutable incorrectPredictedStateId = false
 
     let defaultSearcher =
-        match aiAgentTrainingOptions with
-        | None -> BFSSearcher() :> IForwardSearcher
-        | Some options ->
-            match options.defaultSearchStrategy with
+        let pickSearcher =
+            function
             | BFSMode -> BFSSearcher() :> IForwardSearcher
             | DFSMode -> DFSSearcher() :> IForwardSearcher
             | x -> failwithf $"Unexpected default searcher {x}. DFS and BFS supported for now."
 
+        match aiAgentTrainingMode with
+        | None -> BFSSearcher() :> IForwardSearcher
+        | Some(SendModel options) -> pickSearcher options.aiAgentTrainingOptions.aiBaseOptions.defaultSearchStrategy
+        | Some(SendEachStep options) -> pickSearcher options.aiAgentTrainingOptions.aiBaseOptions.defaultSearchStrategy
+
     let mutable stepsPlayed = 0u<step>
 
     let isInAIMode () =
@@ -41,59 +55,6 @@ type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentT
     let q = ResizeArray<_>()
     let availableStates = HashSet<_>()
 
-    let updateGameState (delta: GameState) =
-        match gameState with
-        | None -> gameState <- Some delta
-        | Some s ->
-            let updatedBasicBlocks = delta.GraphVertices |> Array.map (fun b -> b.Id) |> HashSet
-            let updatedStates = delta.States |> Array.map (fun s -> s.Id) |> HashSet
-
-            let vertices =
-                s.GraphVertices
-                |> Array.filter (fun v -> updatedBasicBlocks.Contains v.Id |> not)
-                |> ResizeArray<_>
-
-            vertices.AddRange delta.GraphVertices
-
-            let edges =
-                s.Map
-                |> Array.filter (fun e -> updatedBasicBlocks.Contains e.VertexFrom |> not)
-                |> ResizeArray<_>
-
-            edges.AddRange delta.Map
-            let activeStates = vertices |> Seq.collect (fun v -> v.States) |> HashSet
-
-            let states =
-                let part1 =
-                    s.States
-                    |> Array.filter (fun s -> activeStates.Contains s.Id && (not <| updatedStates.Contains s.Id))
-                    |> ResizeArray<_>
-
-                part1.AddRange delta.States
-
-                part1.ToArray()
-                |> Array.map (fun s ->
-                    State(
-                        s.Id,
-                        s.Position,
-                        s.PathCondition,
-                        s.VisitedAgainVertices,
-                        s.VisitedNotCoveredVerticesInZone,
-                        s.VisitedNotCoveredVerticesOutOfZone,
-                        s.StepWhenMovedLastTime,
-                        s.InstructionsVisitedInCurrentBlock,
-                        s.History,
-                        s.Children |> Array.filter activeStates.Contains
-                    ))
-
-            let pathConditionVertices =
-                ResizeArray<PathConditionVertex> s.PathConditionVertices
-
-            pathConditionVertices.AddRange delta.PathConditionVertices
-
-            gameState <-
-                Some
-                <| GameState(vertices.ToArray(), states, pathConditionVertices.ToArray(), edges.ToArray())
 
 
     let init states =
@@ -128,15 +89,18 @@ type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentT
         for bb in state._history do
             bb.Key.AssociatedStates.Remove state |> ignore
 
-    let inTrainMode = aiAgentTrainingOptions.IsSome
-
+    let aiMode =
+        match aiAgentTrainingMode with
+        | Some(SendEachStep _) -> TrainingSendEachStep
+        | Some(SendModel _) -> TrainingSendModel
+        | None -> Runner
     let pick selector =
         if useDefaultSearcher then
             defaultSearcherSteps <- defaultSearcherSteps + 1u<step>
 
             if Seq.length availableStates > 0 then
                 let gameStateDelta = collectGameStateDelta ()
-                updateGameState gameStateDelta
+                gameState <- AISearcher.updateGameState gameStateDelta gameState
                 let statistics = computeStatistics gameState.Value
                 Application.applicationGraphDelta.Clear()
                 lastCollectedStatistics <- statistics
@@ -149,7 +113,7 @@ type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentT
             Some(Seq.head availableStates)
         else
             let gameStateDelta = collectGameStateDelta ()
-            updateGameState gameStateDelta
+            gameState <- AISearcher.updateGameState gameStateDelta gameState
             let statistics = computeStatistics gameState.Value
 
             if isInAIMode () then
@@ -158,14 +122,18 @@ type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentT
 
             Application.applicationGraphDelta.Clear()
 
-            if inTrainMode && stepsToPlay = stepsPlayed then
+            if stepsToPlay = stepsPlayed then
                 None
             else
                 let toPredict =
-                    if inTrainMode && stepsPlayed > 0u<step> then
-                        gameStateDelta
-                    else
-                        gameState.Value
+                    match aiMode with
+                    | TrainingSendEachStep
+                    | TrainingSendModel ->
+                        if stepsPlayed > 0u<step> then
+                            gameStateDelta
+                        else
+                            gameState.Value
+                    | Runner -> gameState.Value
 
                 let stateId = oracle.Predict toPredict
                 afterFirstAIPeek <- true
@@ -179,13 +147,77 @@ type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentT
                     incorrectPredictedStateId <- true
                     oracle.Feedback(Feedback.IncorrectPredictedStateId stateId)
                     None
+    static member updateGameState (delta: GameState) (gameState: Option<GameState>) =
+        match gameState with
+        | None -> Some delta
+        | Some s ->
+            let updatedBasicBlocks = delta.GraphVertices |> Array.map (fun b -> b.Id) |> HashSet
+            let updatedStates = delta.States |> Array.map (fun s -> s.Id) |> HashSet
+
+            let vertices =
+                s.GraphVertices
+                |> Array.filter (fun v -> updatedBasicBlocks.Contains v.Id |> not)
+                |> ResizeArray<_>
+
+            vertices.AddRange delta.GraphVertices
+
+            let edges =
+                s.Map
+                |> Array.filter (fun e -> updatedBasicBlocks.Contains e.VertexFrom |> not)
+                |> ResizeArray<_>
+
+            edges.AddRange delta.Map
+            let activeStates = vertices |> Seq.collect (fun v -> v.States) |> HashSet
+
+            let states =
+                let part1 =
+                    s.States
+                    |> Array.filter (fun s -> activeStates.Contains s.Id && (not <| updatedStates.Contains s.Id))
+                    |> ResizeArray<_>
+
+                part1.AddRange delta.States
+
+                part1.ToArray()
+                |> Array.map (fun s ->
+                    State(
+                        s.Id,
+                        s.Position,
+                        s.PathCondition,
+                        s.VisitedAgainVertices,
+                        s.VisitedNotCoveredVerticesInZone,
+                        s.VisitedNotCoveredVerticesOutOfZone,
+                        s.StepWhenMovedLastTime,
+                        s.InstructionsVisitedInCurrentBlock,
+                        s.History,
+                        s.Children |> Array.filter activeStates.Contains
+                    ))
+
+            let pathConditionVertices = ResizeArray<PathConditionVertex> s.PathConditionVertices
 
-    new(pathToONNX: string, useGPU: bool, optimize: bool) =
+            pathConditionVertices.AddRange delta.PathConditionVertices
+
+            Some
+            <| GameState(vertices.ToArray(), states, pathConditionVertices.ToArray(), edges.ToArray())
+
+    static member convertOutputToJson (output: IDisposableReadOnlyCollection<OrtValue>) =
+        seq { 0 .. output.Count - 1 }
+        |> Seq.map (fun i -> output[i].GetTensorDataAsSpan<float32>().ToArray())
+
+
+
+    new
+        (
+            pathToONNX: string,
+            useGPU: bool,
+            optimize: bool,
+            aiAgentTrainingModelOptions: Option<AIAgentTrainingModelOptions>
+        ) =
         let numOfVertexAttributes = 7
         let numOfStateAttributes = 7
         let numOfHistoryEdgeAttributes = 2
 
-        let createOracle (pathToONNX: string) =
+
+        let createOracleRunner (pathToONNX: string, aiAgentTrainingModelOptions: Option<AIAgentTrainingModelOptions>) =
             let sessionOptions =
                 if useGPU then
                     SessionOptions.MakeSessionOptionWithCudaProvider(0)
@@ -199,10 +231,21 @@ type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentT
                 sessionOptions.GraphOptimizationLevel <- GraphOptimizationLevel.ORT_ENABLE_BASIC
 
             let session = new InferenceSession(pathToONNX, sessionOptions)
+
             let runOptions = new RunOptions()
             let feedback (x: Feedback) = ()
 
-            let predict (gameState: GameState) =
+            let mutable stepsPlayed = 0
+            let mutable currentGameState = None
+
+            let predict (gameStateOrDelta: GameState) =
+                let _ =
+                    match aiAgentTrainingModelOptions with
+                    | Some _ when not (stepsPlayed = 0) ->
+                        currentGameState <- AISearcher.updateGameState gameStateOrDelta currentGameState
+                    | _ -> currentGameState <- Some gameStateOrDelta
+
+                let gameState = currentGameState.Value
                 let stateIds = Dictionary<uint<stateId>, int>()
                 let verticesIds = Dictionary<uint<basicBlockGlobalId>, int>()
 
@@ -243,7 +286,7 @@ type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentT
                             let j = i * numOfStateAttributes
                             attributes.[j] <- float32 v.Position
                             // TODO: Support path condition
-                            // attributes.[j + 1] <- float32 v.PathConditionSize 
+                            // attributes.[j + 1] <- float32 v.PathConditionSize
                             attributes.[j + 2] <- float32 v.VisitedAgainVertices
                             attributes.[j + 3] <- float32 v.VisitedNotCoveredVerticesInZone
                             attributes.[j + 4] <- float32 v.VisitedNotCoveredVerticesOutOfZone
@@ -350,14 +393,30 @@ type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentT
                     res
 
                 let output = session.Run(runOptions, networkInput, session.OutputNames)
+
+                let _ =
+                    match aiAgentTrainingModelOptions with
+                    | Some aiAgentOptions ->
+                        aiAgentOptions.stepSaver (
+                            AIGameStep(gameState = gameStateOrDelta, output = AISearcher.convertOutputToJson output)
+                        )
+                    | None -> ()
+
+                stepsPlayed <- stepsPlayed + 1
+
                 let weighedStates = output[0].GetTensorDataAsSpan<float32>().ToArray()
 
                 let id = weighedStates |> Array.mapi (fun i v -> i, v) |> Array.maxBy snd |> fst
                 stateIds |> Seq.find (fun kvp -> kvp.Value = id) |> (fun x -> x.Key)
 
             Oracle(predict, feedback)
 
-        AISearcher(createOracle pathToONNX, None)
+        let aiAgentTrainingOptions =
+            match aiAgentTrainingModelOptions with
+            | Some aiAgentTrainingModelOptions -> Some(SendModel aiAgentTrainingModelOptions)
+            | None -> None
+
+        AISearcher(createOracleRunner (pathToONNX, aiAgentTrainingModelOptions), aiAgentTrainingOptions)
 
     interface IForwardSearcher with
         override x.Init states = init states