Description
If I include both reference navigations and collection navigations in a query and execute with AsSplitQuery
then joins to the reference navigations are present in all of the collection queries. This doesn't appear to be necessary and can slow down the query significantly if there are multiple reference navigations.
Repro:
using System;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
{
using var context = new MyContext();
if (context.Database.EnsureCreated())
{
var blogType = new BlogType { Type = "Development" };
var blog = new Blog { Name = "EF", BlogType = blogType };
var post = new Post { Title = "Split Query", Blog = blog };
context.AddRange(blogType, blog, post);
context.SaveChanges();
}
}
{
using var context = new MyContext();
var result = context.Blogs
.Include(x => x.BlogType)
.Include(x => x.Posts)
.AsSplitQuery()
.ToList();
}
public class MyContext : DbContext
{
public DbSet<Blog> Blogs { get; set; }
protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
=> optionsBuilder
.UseSqlServer("Server=.;Database=Split;Trusted_Connection=True;Encrypt=False")
.LogTo(Console.WriteLine, LogLevel.Information);
}
public class Blog
{
public int Id { get; set; }
public string Name { get; set; }
public int BlogTypeId { get; set; }
public BlogType BlogType { get; set; }
public ICollection<Post> Posts { get; set; }
}
public class Post
{
public int Id { get; set; }
public string Title { get; set; }
public int BlogId { get; set; }
public Blog Blog { get; set; }
}
public class BlogType
{
public int Id { get; set; }
public string Type { get; set; }
public ICollection<Blog> Blogs { get; set; }
}
Produces this SQL:
SELECT [b].[Id], [b].[BlogTypeId], [b].[Name], [b0].[Id], [b0].[Type]
FROM [Blogs] AS [b]
INNER JOIN [BlogType] AS [b0] ON [b].[BlogTypeId] = [b0].[Id]
ORDER BY [b].[Id], [b0].[Id]
SELECT [p].[Id], [p].[BlogId], [p].[Title], [b].[Id], [b0].[Id]
FROM [Blogs] AS [b]
INNER JOIN [BlogType] AS [b0] ON [b].[BlogTypeId] = [b0].[Id]
INNER JOIN [Post] AS [p] ON [b].[Id] = [p].[BlogId]
ORDER BY [b].[Id], [b0].[Id]
I believe the second query could simply be:
SELECT [p].[Id], [p].[BlogId], [p].[Title], [b].[Id]
FROM [Blogs] AS [b]
INNER JOIN [Post] AS [p] ON [b].[Id] = [p].[BlogId]
ORDER BY [b].[Id]
The reduced joins, and reduced fields selected and in the order by, would improve the query plan. I don't think the BlogType ID is required to match the Posts up to the Blog.
Note that in this example, I am just using a single reference navigation, but when multiple are included, joins to all of them are repeated in each collection query and this begins to really impact the performance.
Same behaviour also occurs in non-entity projection and split query.