Skip to content

Show poll-failure details #868

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions assets/l10n/app_en.arb
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,18 @@
"message": {"type": "String", "example": "Invalid format"}
}
},
"errorConnectingToServerShort": "Error connecting to Zulip. Retrying…",
"@errorConnectingToServerShort": {
"description": "Short error message for a generic unknown error connecting to the server."
},
"errorConnectingToServerDetails": "Error connecting to Zulip at {serverUrl}. Will retry:\n\n{error}",
"@errorConnectingToServerDetails": {
"description": "Dialog error message for a generic unknown error connecting to the server with details.",
"placeholders": {
"serverUrl": {"type": "String", "example": "http://example.com/"},
"error": {"type": "String", "example": "Invalid format"}
}
},
"errorSharingFailed": "Sharing failed",
"@errorSharingFailed": {
"description": "Error message when sharing a message failed."
Expand Down
52 changes: 49 additions & 3 deletions lib/model/store.dart
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import '../notifications/receive.dart';
import 'autocomplete.dart';
import 'database.dart';
import 'emoji.dart';
import 'localizations.dart';
import 'message.dart';
import 'message_list.dart';
import 'recent_dm_conversations.dart';
Expand Down Expand Up @@ -912,10 +913,28 @@ class UpdateMachine {
}());
}

/// This controls when we start to report transient errors to the user when
/// polling.
///
/// At the 6th failure, the expected time elapsed since the first failure
/// will be 1.55 seocnds.
static const transientFailureCountNotifyThreshold = 5;

void poll() async {
assert(!_disposed);

BackoffMachine? backoffMachine;
int accumulatedTransientFailureCount = 0;

/// This only reports transient errors after reaching
/// a pre-defined threshold of retries.
void maybeReportTransientError(String? message, {String? details}) {
accumulatedTransientFailureCount++;
if (accumulatedTransientFailureCount > transientFailureCountNotifyThreshold) {
reportErrorToUserBriefly(message, details: details);
}
}

while (true) {
if (_debugLoopSignal != null) {
await _debugLoopSignal!.future;
Expand All @@ -935,6 +954,8 @@ class UpdateMachine {
if (_disposed) return;

store.isLoading = true;
final localizations = GlobalLocalizations.zulipLocalizations;
final serverUrl = store.connection.realmUrl.origin;
switch (e) {
case ZulipApiException(code: 'BAD_EVENT_QUEUE_ID'):
assert(debugLog('Lost event queue for $store. Replacing…'));
Expand All @@ -943,18 +964,40 @@ class UpdateMachine {
debugLog('… Event queue replaced.');
return;

case Server5xxException() || NetworkException():
case Server5xxException():
assert(debugLog('Transient error polling event queue for $store: $e\n'
'Backing off, then will retry…'));
maybeReportTransientError(
localizations.errorConnectingToServerShort,
details: localizations.errorConnectingToServerDetails(
serverUrl, e.toString()));
await (backoffMachine ??= BackoffMachine()).wait();
assert(debugLog('… Backoff wait complete, retrying poll.'));
continue;

case NetworkException():
assert(debugLog('Transient error polling event queue for $store: $e\n'
'Backing off, then will retry…'));
// TODO tell user if transient polling errors persist
if (e.cause is! SocketException) {
// Heuristic check to only report interesting errors to the user.
// A [SocketException] is common when the app returns from sleep.
maybeReportTransientError(
localizations.errorConnectingToServerShort,
details: localizations.errorConnectingToServerDetails(
serverUrl, e.toString()));
}
await (backoffMachine ??= BackoffMachine()).wait();
assert(debugLog('… Backoff wait complete, retrying poll.'));
continue;

default:
assert(debugLog('Error polling event queue for $store: $e\n'
'Backing off and retrying even though may be hopeless…'));
// TODO tell user on non-transient error in polling
// TODO(#186): Handle unrecoverable failures
reportErrorToUserBriefly(
localizations.errorConnectingToServerShort,
details: localizations.errorConnectingToServerDetails(
serverUrl, e.toString()));
await (backoffMachine ??= BackoffMachine()).wait();
assert(debugLog('… Backoff wait complete, retrying poll.'));
continue;
Expand All @@ -978,6 +1021,9 @@ class UpdateMachine {
// and failures, the successes themselves should space out the requests.
backoffMachine = null;
store.isLoading = false;
// Dismiss existing errors, if any.
reportErrorToUserBriefly(null);
accumulatedTransientFailureCount = 0;

final events = result.events;
for (final event in events) {
Expand Down
85 changes: 85 additions & 0 deletions test/model/store_test.dart
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import 'dart:async';
import 'dart:io';

import 'package:checks/checks.dart';
import 'package:fake_async/fake_async.dart';
import 'package:flutter/foundation.dart';
import 'package:http/http.dart' as http;
import 'package:test/scaffolding.dart';
Expand All @@ -12,6 +14,7 @@ import 'package:zulip/api/route/messages.dart';
import 'package:zulip/api/route/realm.dart';
import 'package:zulip/model/message_list.dart';
import 'package:zulip/model/narrow.dart';
import 'package:zulip/log.dart';
import 'package:zulip/model/store.dart';
import 'package:zulip/notifications/receive.dart';

Expand Down Expand Up @@ -652,6 +655,88 @@ void main() {
test('retries on MalformedServerResponseException', () {
checkRetry(() => connection.prepare(httpStatus: 200, body: 'nonsense'));
});

group('report error', () {
String? lastReportedError;
String? takeLastReportedError() {
final result = lastReportedError;
lastReportedError = null;
return result;
}

/// This is an alternative to [ZulipApp]'s implementation of
/// [reportErrorToUserBriefly] for testing.
Future<void> logAndReportErrorToUserBriefly(String? message, {
String? details,
}) async {
if (message == null) return;
lastReportedError = '$message\n$details';
}

Future<void> prepare() async {
reportErrorToUserBriefly = logAndReportErrorToUserBriefly;
addTearDown(() => reportErrorToUserBriefly = defaultReportErrorToUserBriefly);

await prepareStore(lastEventId: 1);
updateMachine.debugPauseLoop();
updateMachine.poll();
}

void pollAndFail(FakeAsync async) {
updateMachine.debugAdvanceLoop();
async.elapse(Duration.zero);
checkLastRequest(lastEventId: 1);
check(store).isLoading.isTrue();
}

test('report non-transient errors', () => awaitFakeAsync((async) async {
await prepare();

connection.prepare(httpStatus: 400, json: {
'result': 'error', 'code': 'BAD_REQUEST', 'msg': 'Bad request'});
pollAndFail(async);
check(takeLastReportedError()).isNotNull().startsWith(
"Error connecting to Zulip. Retrying…\n"
"Error connecting to Zulip at");
}));

test('report transient errors', () => awaitFakeAsync((async) async {
await prepare();

// There should be no user visible error messages during these retries.
for (int i = 0; i < UpdateMachine.transientFailureCountNotifyThreshold; i++) {
connection.prepare(httpStatus: 500, body: 'splat');
pollAndFail(async);
check(takeLastReportedError()).isNull();
// This skips the pending polling backoff.
async.flushTimers();
}

connection.prepare(httpStatus: 500, body: 'splat');
pollAndFail(async);
check(takeLastReportedError()).isNotNull().startsWith(
"Error connecting to Zulip. Retrying…\n"
"Error connecting to Zulip at");
}));

test('ignore boring errors', () => awaitFakeAsync((async) async {
await prepare();

for (int i = 0; i < UpdateMachine.transientFailureCountNotifyThreshold; i++) {
connection.prepare(exception: const SocketException('failed'));
pollAndFail(async);
check(takeLastReportedError()).isNull();
// This skips the pending polling backoff.
async.flushTimers();
}

connection.prepare(exception: const SocketException('failed'));
pollAndFail(async);
// Normally we start showing user visible error messages for transient
// errors after enough number of retries.
check(takeLastReportedError()).isNull();
}));
});
});

group('UpdateMachine.registerNotificationToken', () {
Expand Down