Skip to content

Commit b3f5d3c

Browse files
author
David Heinemeier Hansson
committed
Merge pull request rails#20933 from siadat/relations-in-batches-using-ids
in_batches using ids
2 parents 52c1464 + 25cee1f commit b3f5d3c

File tree

6 files changed

+413
-12
lines changed

6 files changed

+413
-12
lines changed

activerecord/CHANGELOG.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,22 @@
1+
* Add `ActiveRecord::Relation#in_batches` to work with records and relations
2+
in batches.
3+
4+
Available options are `of` (batch size), `load`, `begin_at`, and `end_at`.
5+
6+
Examples:
7+
8+
Person.in_batches.each_record(&:party_all_night!)
9+
Person.in_batches.update_all(awesome: true)
10+
Person.in_batches.delete_all
11+
Person.in_batches.each do |relation|
12+
relation.delete_all
13+
sleep 10 # Throttles the delete queries
14+
end
15+
16+
Closes #20933.
17+
18+
*Sina Siadat*
19+
120
* Added methods for PostgreSQL geometric data types to use in migrations
221

322
Example:

activerecord/lib/active_record/querying.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ module Querying
66
delegate :find_or_create_by, :find_or_create_by!, :find_or_initialize_by, to: :all
77
delegate :find_by, :find_by!, to: :all
88
delegate :destroy, :destroy_all, :delete, :delete_all, :update, :update_all, to: :all
9-
delegate :find_each, :find_in_batches, to: :all
9+
delegate :find_each, :find_in_batches, :in_batches, to: :all
1010
delegate :select, :group, :order, :except, :reorder, :limit, :offset, :joins, :or,
1111
:where, :rewhere, :preload, :eager_load, :includes, :from, :lock, :readonly,
1212
:having, :create_with, :uniq, :distinct, :references, :none, :unscope, to: :all

activerecord/lib/active_record/relation.rb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,13 @@ def inspect
667667
"#<#{self.class.name} [#{entries.join(', ')}]>"
668668
end
669669

670+
protected
671+
672+
def load_records(records)
673+
@records = records
674+
@loaded = true
675+
end
676+
670677
private
671678

672679
def exec_queries

activerecord/lib/active_record/relation/batches.rb

Lines changed: 89 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
require "active_record/relation/batches/batch_enumerator"
2+
13
module ActiveRecord
24
module Batches
35
# Looping through a collection of records from the database
@@ -122,24 +124,102 @@ def find_in_batches(begin_at: nil, end_at: nil, batch_size: 1000, start: nil)
122124
end
123125
end
124126

127+
in_batches(of: batch_size, begin_at: begin_at, end_at: end_at, load: true) do |batch|
128+
yield batch.to_a
129+
end
130+
end
131+
132+
# Yields ActiveRecord::Relation objects to work with a batch of records.
133+
#
134+
# Person.where("age > 21").in_batches do |relation|
135+
# relation.delete_all
136+
# sleep(10) # Throttle the delete queries
137+
# end
138+
#
139+
# If you do not provide a block to #in_batches, it will return a
140+
# BatchEnumerator which is enumerable.
141+
#
142+
# Person.in_batches.with_index do |relation, batch_index|
143+
# puts "Processing relation ##{batch_index}"
144+
# relation.each { |relation| relation.delete_all }
145+
# end
146+
#
147+
# Examples of calling methods on the returned BatchEnumerator object:
148+
#
149+
# Person.in_batches.delete_all
150+
# Person.in_batches.update_all(awesome: true)
151+
# Person.in_batches.each_record(&:party_all_night!)
152+
#
153+
# ==== Options
154+
# * <tt>:of</tt> - Specifies the size of the batch. Default to 1000.
155+
# * <tt>:load</tt> - Specifies if the relation should be loaded. Default to false.
156+
# * <tt>:begin_at</tt> - Specifies the primary key value to start from, inclusive of the value.
157+
# * <tt>:end_at</tt> - Specifies the primary key value to end at, inclusive of the value.
158+
#
159+
# This is especially useful if you want to work with the
160+
# ActiveRecord::Relation object instead of the array of records, or if
161+
# you want multiple workers dealing with the same processing queue. You can
162+
# make worker 1 handle all the records between id 0 and 10,000 and worker 2
163+
# handle from 10,000 and beyond (by setting the +:begin_at+ and +:end_at+
164+
# option on each worker).
165+
#
166+
# # Let's process the next 2000 records
167+
# Person.in_batches(of: 2000, begin_at: 2000).update_all(awesome: true)
168+
#
169+
# An example of calling where query method on the relation:
170+
#
171+
# Person.in_batches.each do |relation|
172+
# relation.update_all('age = age + 1')
173+
# relation.where('age > 21').update_all(should_party: true)
174+
# relation.where('age <= 21').delete_all
175+
# end
176+
#
177+
# NOTE: If you are going to iterate through each record, you should call
178+
# #each_record on the yielded BatchEnumerator:
179+
#
180+
# Person.in_batches.each_record(&:party_all_night!)
181+
#
182+
# NOTE: It's not possible to set the order. That is automatically set to
183+
# ascending on the primary key ("id ASC") to make the batch ordering
184+
# consistent. Therefore the primary key must be orderable, e.g an integer
185+
# or a string.
186+
#
187+
# NOTE: You can't set the limit either, that's used to control the batch
188+
# sizes.
189+
def in_batches(of: 1000, begin_at: nil, end_at: nil, load: false)
190+
relation = self
191+
unless block_given?
192+
return BatchEnumerator.new(of: of, begin_at: begin_at, end_at: end_at, relation: self)
193+
end
194+
125195
if logger && (arel.orders.present? || arel.taken.present?)
126196
logger.warn("Scoped order and limit are ignored, it's forced to be batch order and batch size")
127197
end
128198

129-
relation = relation.reorder(batch_order).limit(batch_size)
199+
relation = relation.reorder(batch_order).limit(of)
130200
relation = apply_limits(relation, begin_at, end_at)
131-
records = relation.to_a
201+
batch_relation = relation
202+
203+
loop do
204+
if load
205+
records = batch_relation.to_a
206+
ids = records.map(&:id)
207+
yielded_relation = self.where(primary_key => ids)
208+
yielded_relation.load_records(records)
209+
else
210+
ids = batch_relation.pluck(primary_key)
211+
yielded_relation = self.where(primary_key => ids)
212+
end
132213

133-
while records.any?
134-
records_size = records.size
135-
primary_key_offset = records.last.id
136-
raise "Primary key not included in the custom select clause" unless primary_key_offset
214+
break if ids.empty?
137215

138-
yield records
216+
primary_key_offset = ids.last
217+
raise ArgumentError.new("Primary key not included in the custom select clause") unless primary_key_offset
139218

140-
break if records_size < batch_size
219+
yield yielded_relation
141220

142-
records = relation.where(table[primary_key].gt(primary_key_offset)).to_a
221+
break if ids.length < of
222+
batch_relation = relation.where(table[primary_key].gt(primary_key_offset))
143223
end
144224
end
145225

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
module ActiveRecord
2+
module Batches
3+
class BatchEnumerator
4+
include Enumerable
5+
6+
def initialize(of: 1000, begin_at: nil, end_at: nil, relation:) #:nodoc:
7+
@of = of
8+
@relation = relation
9+
@begin_at = begin_at
10+
@end_at = end_at
11+
end
12+
13+
# Looping through a collection of records from the database (using the
14+
# +all+ method, for example) is very inefficient since it will try to
15+
# instantiate all the objects at once.
16+
#
17+
# In that case, batch processing methods allow you to work with the
18+
# records in batches, thereby greatly reducing memory consumption.
19+
#
20+
# Person.in_batches.each_record do |person|
21+
# person.do_awesome_stuff
22+
# end
23+
#
24+
# Person.where("age > 21").in_batches(of: 10).each_record do |person|
25+
# person.party_all_night!
26+
# end
27+
#
28+
# If you do not provide a block to #each_record, it will return an Enumerator
29+
# for chaining with other methods:
30+
#
31+
# Person.in_batches.each_record.with_index do |person, index|
32+
# person.award_trophy(index + 1)
33+
# end
34+
def each_record
35+
return to_enum(:each_record) unless block_given?
36+
37+
@relation.to_enum(:in_batches, of: @of, begin_at: @begin_at, end_at: @end_at, load: true).each do |relation|
38+
relation.to_a.each { |record| yield record }
39+
end
40+
end
41+
42+
# Delegates #delete_all, #update_all, #destroy_all methods to each batch.
43+
#
44+
# People.in_batches.delete_all
45+
# People.in_batches.destroy_all('age < 10')
46+
# People.in_batches.update_all('age = age + 1')
47+
[:delete_all, :update_all, :destroy_all].each do |method|
48+
define_method(method) do |*args, &block|
49+
@relation.to_enum(:in_batches, of: @of, begin_at: @begin_at, end_at: @end_at, load: false).each do |relation|
50+
relation.send(method, *args, &block)
51+
end
52+
end
53+
end
54+
55+
# Yields an ActiveRecord::Relation object for each batch of records.
56+
#
57+
# Person.in_batches.each do |relation|
58+
# relation.update_all(awesome: true)
59+
# end
60+
def each
61+
enum = @relation.to_enum(:in_batches, of: @of, begin_at: @begin_at, end_at: @end_at, load: false)
62+
return enum.each { |relation| yield relation } if block_given?
63+
enum
64+
end
65+
end
66+
end
67+
end

0 commit comments

Comments
 (0)