Skip to content
This repository was archived by the owner on Aug 4, 2023. It is now read-only.

Commit ac56962

Browse files
committed
feat: add incremental back-off in case of APM Server issues
1 parent 38e32e3 commit ac56962

File tree

4 files changed

+284
-31
lines changed

4 files changed

+284
-31
lines changed

index.js

Lines changed: 88 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ const util = require('util')
44
const os = require('os')
55
const parseUrl = require('url').parse
66
const zlib = require('zlib')
7-
const Writable = require('readable-stream').Writable
7+
const {Writable, PassThrough} = require('readable-stream')
88
const pump = require('pump')
99
const eos = require('end-of-stream')
1010
const streamToBuffer = require('fast-stream-to-buffer')
@@ -48,6 +48,20 @@ function Client (opts) {
4848
Writable.call(this, opts)
4949

5050
const errorproxy = (err) => {
51+
this._errors++
52+
53+
const retryIndex = this._errors === 0 ? 0 : this._errors - 1
54+
const backoff = (Math.min(retryIndex, 6) ** 2) * 1000
55+
56+
if (backoff > 0) {
57+
this._chopper.resetTimer(-1) // disable timer to prepare for back-off mode
58+
this._backoffTimer = setTimeout(() => {
59+
this._backoffTimer = null
60+
this._chopper.resetTimer(this._chopperTime)
61+
if (this._backoffCallback) this._backoffCallback()
62+
}, backoff)
63+
}
64+
5165
if (this._destroyed === false) this.emit('error', err)
5266
}
5367

@@ -57,6 +71,11 @@ function Client (opts) {
5771

5872
this._received = 0 // number of events given to the client for reporting
5973
this.sent = 0 // number of events written to the socket
74+
this._errors = 0 // number of requests that resulted in an error (dropped connection, non-2xx etc)
75+
this._chopperSize = opts.time // needed to set highWatermark on buffer if we enter back-off mode
76+
this._chopperTime = opts.time // needed to restore the normal time if we ever enter back-off mode
77+
this._backoffTimer = null
78+
this._backoffCallback = null
6079
this._active = false
6180
this._destroyed = false
6281
this._onflushed = null
@@ -169,39 +188,15 @@ function onStream (opts, client, onerror) {
169188
const requestOpts = getRequestOptions(opts, client._agent)
170189

171190
return function (stream, next) {
172-
const onerrorproxy = (err) => {
173-
stream.removeListener('error', onerrorproxy)
174-
compressor.removeListener('error', onerrorproxy)
175-
req.removeListener('error', onerrorproxy)
176-
stream.destroy()
177-
onerror(err)
178-
}
179-
180191
client._active = true
181192

182-
const req = client._transport.request(requestOpts, onResult(onerror))
193+
let buffer, req
183194
const compressor = zlib.createGzip()
184195

185-
// Mointor streams for errors so that we can make sure to destory the
186-
// output stream as soon as that occurs
187196
stream.on('error', onerrorproxy)
188197
compressor.on('error', onerrorproxy)
189-
req.on('error', onerrorproxy)
190198

191-
req.on('socket', function (socket) {
192-
// Sockets will automatically be unreffed by the HTTP agent when they are
193-
// not in use by an HTTP request, but as we're keeping the HTTP request
194-
// open, we need to unref the socket manually
195-
socket.unref()
196-
})
197-
198-
if (Number.isFinite(serverTimeout)) {
199-
req.setTimeout(serverTimeout, function () {
200-
req.abort()
201-
})
202-
}
203-
204-
pump(stream, compressor, req, function () {
199+
pump(stream, compressor, requestProxy(requestOpts, onResult(client, onerror)), function () {
205200
// This function is technically called with an error, but because we
206201
// manually attach error listeners on all the streams in the pipeline
207202
// above, we can safely ignore it.
@@ -244,10 +239,72 @@ function onStream (opts, client, onerror) {
244239
const metadata = getMetadata(opts)
245240
truncate.metadata(metadata, opts)
246241
stream.write(ndjson.serialize({metadata}))
242+
243+
// Under normal opperation, just make a request and return it. If
244+
// instructed to back off, make a temporary buffer to hold data until the
245+
// request can be made
246+
function requestProxy (opts, onresponse) {
247+
if (client._backoffTimer) {
248+
buffer = new PassThrough({highWaterMark: client._chopperSize * 2}) // twice as large to allow overflow
249+
buffer.on('error', onerrorproxy)
250+
251+
eos(stream, function () {
252+
client._backoffCallback = null
253+
if (client._backoffTimer) {
254+
// drop all data - back-off still in effect
255+
compressor.destroy()
256+
buffer.destroy()
257+
}
258+
})
259+
260+
client._backoffCallback = function () {
261+
client._backoffCallback = null
262+
req = makeRequest(opts, onresponse)
263+
buffer.pipe(req)
264+
}
265+
266+
return buffer
267+
} else {
268+
return makeRequest(opts, onresponse)
269+
}
270+
}
271+
272+
function makeRequest (opts, onresponse) {
273+
const req = client._transport.request(opts, onresponse)
274+
275+
req.on('error', onerrorproxy)
276+
277+
req.on('socket', function (socket) {
278+
// Sockets will automatically be unreffed by the HTTP agent when they are
279+
// not in use by an HTTP request, but as we're keeping the HTTP request
280+
// open, we need to unref the socket manually
281+
socket.unref()
282+
})
283+
284+
if (Number.isFinite(serverTimeout)) {
285+
req.setTimeout(serverTimeout, function () {
286+
req.abort()
287+
})
288+
}
289+
290+
return req
291+
}
292+
293+
// This function is attached to the error event of the different streams so
294+
// that we can make sure to destroy the output stream as soon as an error
295+
// occurs
296+
function onerrorproxy (err) {
297+
stream.removeListener('error', onerrorproxy)
298+
compressor.removeListener('error', onerrorproxy)
299+
if (buffer) buffer.removeListener('error', onerrorproxy)
300+
if (req) req.removeListener('error', onerrorproxy)
301+
stream.destroy()
302+
onerror(err)
303+
}
247304
}
248305
}
249306

250-
function onResult (onerror) {
307+
function onResult (client, onerror) {
251308
return streamToBuffer.onStream(function (err, buf, res) {
252309
if (err) return onerror(err)
253310
if (res.statusCode < 200 || res.statusCode > 299) {
@@ -261,6 +318,9 @@ function onResult (onerror) {
261318
}
262319
}
263320
onerror(err)
321+
} else {
322+
client._errors = 0
323+
client._chopper.resetTimer(client._chopperTime)
264324
}
265325
})
266326
}

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
"fast-stream-to-buffer": "^1.0.0",
2222
"pump": "^3.0.0",
2323
"readable-stream": "^2.3.6",
24-
"stream-chopper": "^1.1.1",
24+
"stream-chopper": "^2.1.1",
2525
"unicode-byte-truncate": "^1.0.0"
2626
},
2727
"devDependencies": {

test/backoff.js

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
'use strict'
2+
3+
const crypto = require('crypto')
4+
const test = require('tape')
5+
const utils = require('./lib/utils')
6+
7+
const APMServer = utils.APMServer
8+
const processReq = utils.processReq
9+
const assertReq = utils.assertReq
10+
const assertMetadata = utils.assertMetadata
11+
const assertEvent = utils.assertEvent
12+
13+
const testCases = [
14+
{
15+
name: 'only one error',
16+
expectedData: [
17+
assertMetadata,
18+
assertEvent({span: {req: 2}}),
19+
assertMetadata,
20+
assertEvent({span: {req: 3}}),
21+
assertMetadata,
22+
assertEvent({span: {req: 4}})
23+
],
24+
requests: [
25+
[0, true], // no back-off in effect, request fails
26+
[0, false], // back-off in effect, request succeeds
27+
[0, false], // no back-off in effect, request succeeds
28+
[0, false] // no back-off in effect, request succeeds
29+
]
30+
},
31+
{
32+
name: 'only two errors',
33+
expectedData: [
34+
assertMetadata,
35+
assertEvent({span: {req: 3}}),
36+
assertMetadata,
37+
assertEvent({span: {req: 4}}),
38+
assertMetadata,
39+
assertEvent({span: {req: 5}})
40+
],
41+
requests: [
42+
[0, true], // no back-off in effect, request fails
43+
[0, true], // back-off in effect, request fails
44+
[1, false], // back-off in effect, request succeeds
45+
[0, false], // no back-off in effect, request succeeds
46+
[0, false] // no back-off in effect, request succeeds
47+
]
48+
},
49+
{
50+
name: 'top out at full back-off',
51+
expectedData: [
52+
assertMetadata,
53+
assertEvent({span: {req: 9}}),
54+
assertMetadata,
55+
assertEvent({span: {req: 10}}),
56+
assertMetadata,
57+
assertEvent({span: {req: 11}})
58+
],
59+
requests: [
60+
[0, true], // no back-off in effect, request fails
61+
[0, true], // back-off in effect, request fails
62+
[1, true], // back-off in effect, request fails
63+
[4, true], // back-off in effect, request fails
64+
[9, true], // back-off in effect, request fails
65+
[16, true], // back-off in effect, request fails
66+
[25, true], // back-off in effect, request fails
67+
[36, true], // back-off in effect, request fails
68+
[36, false], // back-off in effect, request succeeds
69+
[0, false], // no back-off in effect, request succeeds
70+
[0, false] // no back-off in effect, request succeeds
71+
]
72+
}
73+
]
74+
75+
testCases.forEach(function ({name, expectedData, requests}) {
76+
test('backoff delays - ' + name, function (t) {
77+
let reqNo = 0
78+
let start, client
79+
80+
const server = APMServer(function (req, res) {
81+
const diff = Date.now() - start
82+
const backoffTime = requests[reqNo - 1][0] * 1000
83+
t.ok(diff > backoffTime && diff < backoffTime + 200, `should delay request between ${backoffTime} and ${backoffTime + 200}ms (was delayed ${diff}ms)`)
84+
85+
if (requests[reqNo - 1][1] === true) {
86+
res.writeHead(500)
87+
res.end()
88+
} else {
89+
assertReq(t, req)
90+
req = processReq(req)
91+
req.on('data', function (obj) {
92+
expectedData.shift()(t, obj)
93+
})
94+
req.on('end', function () {
95+
res.end()
96+
if (reqNo < 4) {
97+
setTimeout(makeReq, 10)
98+
} else {
99+
t.equal(expectedData.length, 0, 'should have seen all expected data')
100+
server.close()
101+
t.end()
102+
}
103+
})
104+
}
105+
}).client({time: 1000}, function (_client) {
106+
client = _client
107+
let emittedErrors = 0
108+
109+
client.on('error', function (err) {
110+
emittedErrors++
111+
if (requests[reqNo - 1][1] === true) {
112+
t.equal(err.message, 'Unexpected response code from APM Server: 500', 'client should emit error')
113+
t.equal(client._errors, emittedErrors, 'client error count should have been incremented to ' + emittedErrors)
114+
makeReq()
115+
} else {
116+
t.error(err)
117+
}
118+
})
119+
120+
makeReq()
121+
})
122+
123+
function makeReq () {
124+
client.sendSpan({req: ++reqNo})
125+
start = Date.now()
126+
client.flush()
127+
}
128+
})
129+
})
130+
131+
test('backoff - dropping data', function (t) {
132+
let start, timer
133+
let reqNo = 0
134+
const backoffTimes = [0, 0, 1, 0]
135+
136+
const server = APMServer(function (req, res) {
137+
const diff = Date.now() - start
138+
const backoffTime = backoffTimes.shift() * 1000
139+
t.ok(diff > backoffTime && diff < backoffTime + 200, `should delay request between ${backoffTime} and ${backoffTime + 200}ms (was delayed ${diff}ms)`)
140+
141+
req = processReq(req)
142+
req.on('data', function (obj) {
143+
if ('metadata' in obj) return
144+
t.equal(obj.span.req, reqNo, 'event belongs to expected request no ' + reqNo)
145+
t.equal(obj.span.ok, true, 'expected the event to get sent')
146+
})
147+
req.on('end', function () {
148+
if (reqNo <= 2) {
149+
res.writeHead(500)
150+
res.end()
151+
} else {
152+
clearTimeout(timer)
153+
res.end()
154+
server.close()
155+
t.end()
156+
}
157+
})
158+
}).client({size: 256, time: 500}, function (client) {
159+
client.on('error', function (err) {
160+
if (reqNo === 1) {
161+
t.equal(err.message, 'Unexpected response code from APM Server: 500', 'client should emit error')
162+
t.equal(client._errors, 1, 'client error count should have been incremented to 1')
163+
164+
client.sendSpan({req: ++reqNo, ok: true, filler: crypto.randomBytes(32).toString('hex')})
165+
start = Date.now()
166+
client.flush()
167+
} else if (reqNo === 2) {
168+
t.equal(err.message, 'Unexpected response code from APM Server: 500', 'client should emit error')
169+
t.equal(client._errors, 2, 'client error count should have been incremented to 2')
170+
171+
reqNo++
172+
start = Date.now()
173+
174+
// these will be dropped because they are too big to be cached before the backoff
175+
client.sendSpan({req: reqNo, ok: false, filler: crypto.randomBytes(32).toString('hex')}) // will not overflow
176+
client.sendSpan({req: reqNo, ok: false, filler: crypto.randomBytes(32).toString('hex')}) // will trigger overflow
177+
178+
// this will be the first to get through after the backoff
179+
client.sendSpan({req: reqNo, ok: true, filler: crypto.randomBytes(32).toString('hex')})
180+
181+
timer = setTimeout(function () {
182+
t.fail('took too long')
183+
}, 2000)
184+
} else {
185+
t.error(err)
186+
}
187+
})
188+
189+
client.sendSpan({req: ++reqNo, ok: true, filler: crypto.randomBytes(32).toString('hex')})
190+
start = Date.now()
191+
client.flush()
192+
})
193+
})

test/lib/utils.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ function assertReq (t, req) {
6666
assertReq.asserts = 7
6767

6868
function assertMetadata (t, obj) {
69-
t.deepEqual(Object.keys(obj), ['metadata'])
69+
t.deepEqual(Object.keys(obj), ['metadata'], 'should receive metadata')
7070
const metadata = obj.metadata
7171
t.deepEqual(Object.keys(metadata), ['service', 'process', 'system'])
7272
const service = metadata.service
@@ -92,7 +92,7 @@ function assertMetadata (t, obj) {
9292
t.ok(Array.isArray(_process.argv), 'process.title should be an array')
9393
t.ok(_process.argv.length >= 2, 'process.title should contain at least two elements')
9494
t.ok(/\/node$/.test(_process.argv[0]), `process.argv[0] should match /\\/node$/ (was: ${_process.argv[0]})`)
95-
const regex = /(\/test\/(test|truncate|lib\/unref-client)\.js|node_modules\/\.bin\/tape)$/
95+
const regex = /(\/test\/(test|backoff|truncate|lib\/unref-client)\.js|node_modules\/\.bin\/tape)$/
9696
t.ok(regex.test(_process.argv[1]), `process.argv[1] should match ${regex} (was: ${_process.argv[1]})"`)
9797
const system = metadata.system
9898
t.ok(typeof system.hostname, 'string')

0 commit comments

Comments
 (0)