Skip to content

Streaming JsonSerializer buffers all content of skipped JSON properties. #96559

@krwq

Description

@krwq

When reading large JSON files which don't have fully defined model serializer may OOM instead of skipping through unused stuff.

Here is the code which generates repro, also showed how defining model is not causing OOM:

using System;
using System.Collections.Generic;
using System.IO;
using System.Text.Json;

string fileName = "foo.json";
const long minFileSize = 5L * 1024 * 1024 * 1024; // 5GB

if (File.Exists(fileName))
{
    using (FileStream fs = new FileStream(fileName, FileMode.Open))
    {
        Repro(fs);
    }
}
else
{
    using (FileStream fs = new FileStream(fileName, FileMode.CreateNew))
    {
        Console.Write("Generating large JSON file... ");
        GenerateLargeJson(fs, minFileSize); // increase as needed
        Console.WriteLine("Done");
        fs.Position = 0;
        Repro(fs);
    }
}

void Repro(FileStream fs)
{
    Console.Write("Deserializing (expected to work)... ");
    JsonSerializer.Deserialize<FooBarClass>(fs);
    Console.WriteLine("Done");

    fs.Position = 0;

    Console.Write("Deserializing (repro)... ");
    JsonSerializer.Deserialize<FooClass>(fs);
    Console.WriteLine("Done"); // <-- never reached, prints "Out of memory."
}


void GenerateLargeJson(FileStream fs, long minSize)
{
    Utf8JsonWriter writer = new Utf8JsonWriter(fs);
    writer.WriteStartObject(); // root

    writer.WriteString("Foo", "FooValue");

    writer.WritePropertyName("Bars");
    writer.WriteStartObject();

    long propIdx = 0;
    do
    {
        for (int i = 0; i < 1000; i++, propIdx++)
        {
            writer.WritePropertyName($"PropertyWithSomeQuiteLongishName{propIdx:000000000}");
            writer.WriteStartObject();

            writer.WriteString("Bar", $"BarValue{propIdx:000000000}SomethingNotTooShortSoWeDontNeedToWriteThatManyTimes");

            writer.WriteEndObject();
        }

        writer.Flush();

    } while (fs.Length < minSize);

    writer.WriteEndObject(); // Bars

    writer.WriteEndObject(); // root
    writer.Flush();
}

class FooClass
{
    public string Foo { get; set; }
}

// Not needed for repro:
class FooBarClass
{
    public string Foo { get; set; }
    public Dictionary<string, BarClass> Bars { get; set; }
}

class BarClass
{
    public string Bar { get; set; }
}

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions