Skip to content

Commit 4df93f7

Browse files
authored
Highlight images during text-to-speech (#382)
1 parent f5cd4b9 commit 4df93f7

File tree

3 files changed

+45
-53
lines changed

3 files changed

+45
-53
lines changed

readium/navigator/src/main/java/org/readium/r2/navigator/media3/tts/TtsNavigator.kt

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ public class TtsNavigator<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
6868
?: ttsEngineProvider.createEmptyPreferences()
6969

7070
val contentIterator =
71-
TtsContentIterator(publication, tokenizerFactory, initialLocator)
71+
TtsUtteranceIterator(publication, tokenizerFactory, initialLocator)
7272

7373
val ttsEngine =
7474
ttsEngineProvider.createEngine(publication, actualInitialPreferences)
@@ -279,28 +279,24 @@ public class TtsNavigator<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
279279
private fun TtsPlayer.Utterance.toPosition(): Location {
280280
val currentLink = publication.readingOrder[position.resourceIndex]
281281

282-
val utteranceHighlight = publication
282+
val utteranceLocator = publication
283283
.locatorFromLink(currentLink)!!
284284
.copy(
285285
locations = position.locations,
286-
text = Locator.Text(
287-
highlight = text,
288-
before = position.textBefore,
289-
after = position.textAfter
290-
)
286+
text = position.text
291287
)
292288

293-
val tokenHighlight = range
294-
?.let { utteranceHighlight.copy(text = utteranceHighlight.text.substring(it)) }
289+
val tokenLocator = range
290+
?.let { utteranceLocator.copy(text = utteranceLocator.text.substring(it)) }
295291

296292
return Location(
297293
href = Href(currentLink.href),
298-
textBefore = position.textBefore,
299-
textAfter = position.textAfter,
294+
textBefore = position.text.before,
295+
textAfter = position.text.after,
300296
utterance = text,
301297
range = range,
302-
utteranceLocator = utteranceHighlight,
303-
tokenLocator = tokenHighlight
298+
utteranceLocator = utteranceLocator,
299+
tokenLocator = tokenLocator
304300
)
305301
}
306302
}

readium/navigator/src/main/java/org/readium/r2/navigator/media3/tts/TtsPlayer.kt

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@ import org.readium.r2.shared.extensions.tryOrNull
2424
import org.readium.r2.shared.publication.Locator
2525

2626
/**
27-
* Plays the content from a [TtsContentIterator] with a [TtsEngine].
27+
* Plays the content from a [TtsUtteranceIterator] with a [TtsEngine].
2828
*/
2929
@ExperimentalReadiumApi
3030
internal class TtsPlayer<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
3131
E : TtsEngine.Error, V : TtsEngine.Voice> private constructor(
3232
private val engineFacade: TtsEngineFacade<S, P, E, V>,
33-
private val contentIterator: TtsContentIterator,
33+
private val contentIterator: TtsUtteranceIterator,
3434
initialWindow: UtteranceWindow,
3535
initialPreferences: P
3636
) : Configurable<S, P> {
@@ -40,7 +40,7 @@ internal class TtsPlayer<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
4040
suspend operator fun <S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
4141
E : TtsEngine.Error, V : TtsEngine.Voice> invoke(
4242
engine: TtsEngine<S, P, E, V>,
43-
contentIterator: TtsContentIterator,
43+
contentIterator: TtsUtteranceIterator,
4444
initialPreferences: P
4545
): TtsPlayer<S, P, E, V>? {
4646
val initialContext = tryOrNull { contentIterator.startContext() }
@@ -59,23 +59,23 @@ internal class TtsPlayer<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
5959
)
6060
}
6161

62-
private suspend fun TtsContentIterator.startContext(): UtteranceWindow? {
63-
val previousUtterance = previousUtterance()
64-
val currentUtterance = nextUtterance()
62+
private suspend fun TtsUtteranceIterator.startContext(): UtteranceWindow? {
63+
val previousUtterance = previous()
64+
val currentUtterance = next()
6565

6666
val startWindow = if (currentUtterance != null) {
6767
UtteranceWindow(
6868
previousUtterance = previousUtterance,
6969
currentUtterance = currentUtterance,
70-
nextUtterance = nextUtterance(),
70+
nextUtterance = next(),
7171
ended = false
7272
)
7373
} else {
7474
val actualCurrentUtterance = previousUtterance ?: return null
75-
val actualPreviousUtterance = previousUtterance()
75+
val actualPreviousUtterance = previous()
7676

7777
// Go back to the end of the iterator.
78-
nextUtterance()
78+
next()
7979

8080
UtteranceWindow(
8181
previousUtterance = actualPreviousUtterance,
@@ -129,15 +129,14 @@ internal class TtsPlayer<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
129129
data class Position(
130130
val resourceIndex: Int,
131131
val locations: Locator.Locations,
132-
val textBefore: String?,
133-
val textAfter: String?
132+
val text: Locator.Text
134133
)
135134
}
136135

137136
private data class UtteranceWindow(
138-
val previousUtterance: TtsContentIterator.Utterance?,
139-
val currentUtterance: TtsContentIterator.Utterance,
140-
val nextUtterance: TtsContentIterator.Utterance?,
137+
val previousUtterance: TtsUtteranceIterator.Utterance?,
138+
val currentUtterance: TtsUtteranceIterator.Utterance,
139+
val nextUtterance: TtsUtteranceIterator.Utterance?,
141140
val ended: Boolean = false
142141
)
143142

@@ -411,19 +410,19 @@ internal class TtsPlayer<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
411410
val previousUtterance =
412411
try {
413412
// Get previously currentUtterance once more
414-
contentIterator.previousUtterance()
413+
contentIterator.previous()
415414

416415
// Get previously previousUtterance once more
417-
contentIterator.previousUtterance()
416+
contentIterator.previous()
418417

419418
// Get new previous utterance
420-
val previousUtterance = contentIterator.previousUtterance()
419+
val previousUtterance = contentIterator.previous()
421420

422421
// Go to currentUtterance position
423-
contentIterator.nextUtterance()
422+
contentIterator.next()
424423

425424
// Go to nextUtterance position
426-
contentIterator.nextUtterance()
425+
contentIterator.next()
427426

428427
previousUtterance
429428
} catch (e: Exception) {
@@ -448,7 +447,7 @@ internal class TtsPlayer<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
448447
}
449448

450449
val nextUtterance = try {
451-
contentIterator.nextUtterance()
450+
contentIterator.next()
452451
} catch (e: Exception) {
453452
onContentError(e)
454453
return
@@ -498,8 +497,8 @@ internal class TtsPlayer<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
498497
playContinuous()
499498
}
500499

501-
private suspend fun speakUtterance(utterance: TtsContentIterator.Utterance): E? =
502-
engineFacade.speak(utterance.text, utterance.language, ::onRangeChanged)
500+
private suspend fun speakUtterance(utterance: TtsUtteranceIterator.Utterance): E? =
501+
engineFacade.speak(utterance.utterance, utterance.language, ::onRangeChanged)
503502

504503
private fun onEngineError(error: E) {
505504
playbackMutable.value = playbackMutable.value.copy(
@@ -535,15 +534,14 @@ internal class TtsPlayer<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
535534
private fun isPlaying() =
536535
playbackMutable.value.playWhenReady && playback.value.state == State.Ready
537536

538-
private fun TtsContentIterator.Utterance.ttsPlayerUtterance(): Utterance =
537+
private fun TtsUtteranceIterator.Utterance.ttsPlayerUtterance(): Utterance =
539538
Utterance(
540-
text = text,
539+
text = utterance,
541540
range = null,
542541
position = Utterance.Position(
543542
resourceIndex = resourceIndex,
544543
locations = locations,
545-
textAfter = textAfter,
546-
textBefore = textBefore
544+
text = text
547545
)
548546
)
549547
}

readium/navigator/src/main/java/org/readium/r2/navigator/media3/tts/TtsContentIterator.kt renamed to readium/navigator/src/main/java/org/readium/r2/navigator/media3/tts/TtsUtteranceIterator.kt

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,16 @@ import org.readium.r2.shared.util.tokenizer.TextTokenizer
2424
*
2525
* Not thread-safe.
2626
*/
27-
internal class TtsContentIterator(
27+
internal class TtsUtteranceIterator(
2828
private val publication: Publication,
2929
private val tokenizerFactory: (language: Language?) -> TextTokenizer,
3030
initialLocator: Locator?
3131
) {
3232
data class Utterance(
33+
val utterance: String,
3334
val resourceIndex: Int,
3435
val locations: Locator.Locations,
35-
val text: String,
36-
val textBefore: String?,
37-
val textAfter: String?,
36+
val text: Locator.Text,
3837
val language: Language?
3938
)
4039

@@ -109,14 +108,14 @@ internal class TtsContentIterator(
109108
/**
110109
* Advances to the previous item and returns it, or null if we reached the beginning.
111110
*/
112-
suspend fun previousUtterance(): Utterance? =
113-
nextUtterance(Direction.Backward)
111+
suspend fun previous(): Utterance? =
112+
next(Direction.Backward)
114113

115114
/**
116115
* Advances to the next item and returns it, or null if we reached the end.
117116
*/
118-
suspend fun nextUtterance(): Utterance? =
119-
nextUtterance(Direction.Forward)
117+
suspend fun next(): Utterance? =
118+
next(Direction.Forward)
120119

121120
private enum class Direction {
122121
Forward, Backward;
@@ -126,10 +125,10 @@ internal class TtsContentIterator(
126125
* Gets the next utterance in the given [direction], or null when reaching the beginning or the
127126
* end.
128127
*/
129-
private suspend fun nextUtterance(direction: Direction): Utterance? {
128+
private suspend fun next(direction: Direction): Utterance? {
130129
val utterance = utterances.nextIn(direction)
131130
if (utterance == null && loadNextUtterances(direction)) {
132-
return nextUtterance(direction)
131+
return next(direction)
133132
}
134133
return utterance
135134
}
@@ -167,7 +166,7 @@ internal class TtsContentIterator(
167166
*/
168167
private fun Content.Element.tokenize(): List<Content.Element> {
169168
val contentTokenizer = TextContentTokenizer(
170-
language = this@TtsContentIterator.language,
169+
language = this@TtsUtteranceIterator.language,
171170
textTokenizerFactory = tokenizerFactory,
172171
overrideContentLanguage = overrideContentLanguage
173172
)
@@ -187,11 +186,10 @@ internal class TtsContentIterator(
187186
?: throw IllegalStateException("Content Element cannot be found in readingOrder.")
188187

189188
return Utterance(
189+
utterance = text,
190190
resourceIndex = resourceIndex,
191191
locations = locator.locations,
192-
text = text,
193-
textBefore = locator.text.before,
194-
textAfter = locator.text.after,
192+
text = locator.text,
195193
language = language
196194
)
197195
}

0 commit comments

Comments
 (0)