diff --git a/frontend/src/components/Editor.jsx b/frontend/src/components/Editor.jsx
index ac25046..5cb550d 100644
--- a/frontend/src/components/Editor.jsx
+++ b/frontend/src/components/Editor.jsx
@@ -27,6 +27,16 @@ const CustomImage = Image.extend({
},
}).configure({ allowBase64: false, inline: false })
+// ── ReadingMark — transient mark that follows the TTS playhead ────────────
+const ReadingMark = Mark.create({
+ name: 'readingWord',
+ inclusive: false,
+ parseHTML() { return [] }, // never restore from HTML — transient UI only
+ renderHTML({ HTMLAttributes }) {
+ return ['span', mergeAttributes({ class: 'reading-word' }, HTMLAttributes), 0]
+ },
+})
+
// ── LintMark — transient mark for spelling / grammar underlines ────────────
const LintMark = Mark.create({
name: 'lintError',
@@ -89,12 +99,12 @@ function clearLintMarks(editor) {
editor.view.dispatch(tr)
}
-// Strip lint marks from the JSON before saving (they're UI-only)
+// Strip transient UI marks (lint, reading playhead) from JSON before saving
function stripLintMarks(node) {
if (!node) return node
const n = { ...node }
if (n.marks) {
- n.marks = n.marks.filter(m => m.type !== 'lintError')
+ n.marks = n.marks.filter(m => m.type !== 'lintError' && m.type !== 'readingWord')
if (!n.marks.length) delete n.marks
}
if (n.content) n.content = n.content.map(stripLintMarks)
@@ -114,7 +124,7 @@ const Editor = forwardRef(function Editor(
const lintStatusRef = useRef('idle')
const lintDebounce = useRef(null)
const runLintRef = useRef(null) // always holds latest runLint, safe to call from onUpdate closure
- const applyingLints = useRef(false) // true while we're dispatching mark transactions — skip onUpdate
+ const applyingLints = useRef(false) // true while dispatching mark transactions — suppresses onUpdate
const editor = useEditor({
extensions: [
@@ -122,6 +132,7 @@ const Editor = forwardRef(function Editor(
Underline,
CustomImage,
LintMark,
+ ReadingMark,
Placeholder.configure({ placeholder: 'Begin your story here…' }),
CharacterCount,
TextAlign.configure({ types: ['heading', 'paragraph'] }),
@@ -221,6 +232,42 @@ const Editor = forwardRef(function Editor(
// Clean up the debounce timer on unmount
useEffect(() => () => clearTimeout(lintDebounce.current), [])
+ // ── Reading-word mark (TTS playhead highlight) ────────────────────────────
+ // Called by Toolbar on each word-boundary event. Both operations are wrapped
+ // in applyingLints so the resulting onUpdate dispatch is silently ignored —
+ // the same guard that protects lint mark dispatches also covers these.
+ const applyReadingMark = useCallback((from, to) => {
+ if (!editor) return
+ const mt = editor.state.schema.marks.readingWord
+ if (!mt) return
+ const { tr } = editor.state
+ // Clear any existing reading mark in one shot then set the new one
+ editor.state.doc.descendants((node, pos) => {
+ if (!node.isText) return
+ node.marks.filter(m => m.type === mt)
+ .forEach(() => tr.removeMark(pos, pos + node.nodeSize, mt))
+ })
+ tr.addMark(from, to, mt.create())
+ applyingLints.current = true
+ editor.view.dispatch(tr)
+ applyingLints.current = false
+ }, [editor])
+
+ const clearReadingMark = useCallback(() => {
+ if (!editor) return
+ const mt = editor.state.schema.marks.readingWord
+ if (!mt) return
+ const { tr } = editor.state
+ editor.state.doc.descendants((node, pos) => {
+ if (!node.isText) return
+ node.marks.filter(m => m.type === mt)
+ .forEach(() => tr.removeMark(pos, pos + node.nodeSize, mt))
+ })
+ applyingLints.current = true
+ editor.view.dispatch(tr)
+ applyingLints.current = false
+ }, [editor])
+
// ── Popover on clicking a lint mark ──────────────────────────────────────
function handleEditorClick(e) {
if (!editor) return
@@ -281,6 +328,8 @@ const Editor = forwardRef(function Editor(
onLint={runLint}
lintStatus={lintStatus}
lintCount={lintCount}
+ applyReadingMark={applyReadingMark}
+ clearReadingMark={clearReadingMark}
/>
diff --git a/frontend/src/components/Toolbar.jsx b/frontend/src/components/Toolbar.jsx
index 59a53e2..8bfd187 100644
--- a/frontend/src/components/Toolbar.jsx
+++ b/frontend/src/components/Toolbar.jsx
@@ -1,6 +1,73 @@
import { useRef, useState, useEffect, useCallback } from 'react'
-export default function Toolbar({ editor, onImageUpload, fontSize, onFontSizeChange, onLint, lintStatus, lintCount }) {
+// ── TTS position utilities ─────────────────────────────────────────────────
+
+// Build a flat character array + parallel ProseMirror-position map for the
+// document range [from, to). Positions are absolute (doc-level).
+// Block boundaries get a '\n' character with posMap entry = null.
+function buildReadingMap(doc, from, to) {
+ const chars = [], posMap = []
+ doc.nodesBetween(from, to, (node, pos) => {
+ if (node.isText) {
+ for (let i = 0; i < node.text.length; i++) {
+ const absPos = pos + i
+ if (absPos >= from && absPos < to) {
+ chars.push(node.text[i])
+ posMap.push(absPos)
+ }
+ }
+ return false // text nodes have no children
+ }
+ // Insert a newline separator between block nodes
+ if (node.isBlock && chars.length > 0 && chars[chars.length - 1] !== '\n') {
+ chars.push('\n')
+ posMap.push(null)
+ }
+ })
+ return { text: chars.join(''), posMap }
+}
+
+// Given charIdx (an index into text/posMap), return the PM [from, to) span
+// of the word that contains that character.
+function wordAtIndex(text, posMap, charIdx) {
+ if (charIdx < 0 || charIdx >= text.length || !/\w/.test(text[charIdx])) return { pmFrom: null, pmTo: null }
+ // Expand backward to word start
+ let start = charIdx
+ while (start > 0 && /\w/.test(text[start - 1])) start--
+ // Expand forward to word end
+ let end = charIdx
+ while (end < text.length && /\w/.test(text[end])) end++
+ if (start === end) return { pmFrom: null, pmTo: null }
+ // Map character span → PM positions
+ let pmFrom = null, pmTo = null
+ for (let i = start; i < end; i++) {
+ if (posMap[i] !== null) {
+ if (pmFrom === null) pmFrom = posMap[i]
+ pmTo = posMap[i] + 1
+ }
+ }
+ return { pmFrom, pmTo }
+}
+
+// Return the PM span for an entire chunk (used for sentence-level fallback highlight).
+// Trims trailing punctuation/whitespace so the highlight ends at the last real word.
+function chunkPmRange(text, posMap, chunk) {
+ const start = chunk.startOffset
+ let end = start + chunk.text.length
+ while (end > start && !/\w/.test(text[end - 1])) end--
+ let pmFrom = null, pmTo = null
+ for (let i = start; i < end && i < posMap.length; i++) {
+ if (posMap[i] !== null) {
+ if (pmFrom === null) pmFrom = posMap[i]
+ pmTo = posMap[i] + 1
+ }
+ }
+ return { pmFrom, pmTo }
+}
+
+// ──────────────────────────────────────────────────────────────────────────
+
+export default function Toolbar({ editor, onImageUpload, fontSize, onFontSizeChange, onLint, lintStatus, lintCount, applyReadingMark, clearReadingMark }) {
const fileRef = useRef()
const [isReading, setIsReading] = useState(false)
const isReadingRef = useRef(false) // ref so closure in next() always sees current value
@@ -98,20 +165,27 @@ export default function Toolbar({ editor, onImageUpload, fontSize, onFontSizeCha
function startReading() {
if (!('speechSynthesis' in window)) return
- // Grab text from cursor (or selection start) to end of document
const { from } = editor.state.selection
- const end = editor.state.doc.content.size
- const text = editor.state.doc.textBetween(from, end, '\n', ' ').trim()
- if (!text) return
+ const end = editor.state.doc.content.size
- // Split into sentences — Chrome stops an utterance after ~15 s if it's too long
- const chunks = (
- text.match(/[^.!?…]+[.!?…]*['"'"]?\s*/g)
- ?.map(s => s.trim())
- .filter(Boolean)
- ) || [text]
+ // Build char array + PM-position map so boundary events can pinpoint words
+ const { text, posMap } = buildReadingMap(editor.state.doc, from, end)
- speechSynthesis.cancel() // clear any leftover utterance
+ // Split into sentence-sized chunks (Chrome drops utterances > ~15 s).
+ // Track each chunk's start offset in `text` so onboundary charIndex can be
+ // translated back to an absolute position in the posMap.
+ const chunks = []
+ let m
+ const re = /[^.!?…]+[.!?…]*['"'"]?\s*/g
+ while ((m = re.exec(text)) !== null) {
+ if (m[0].trim()) chunks.push({ text: m[0], startOffset: m.index })
+ }
+ if (!chunks.length) {
+ if (text.trim()) chunks.push({ text, startOffset: 0 })
+ else return
+ }
+
+ speechSynthesis.cancel()
isReadingRef.current = true
setIsReading(true)
@@ -122,13 +196,56 @@ export default function Toolbar({ editor, onImageUpload, fontSize, onFontSizeCha
if (!isReadingRef.current || idx >= chunks.length) {
isReadingRef.current = false
setIsReading(false)
+ clearReadingMark?.()
return
}
- const u = new SpeechSynthesisUtterance(chunks[idx++])
+ const chunk = chunks[idx++]
+ const u = new SpeechSynthesisUtterance(chunk.text)
if (chosenVoice) u.voice = chosenVoice
- u.rate = ttsRateRef.current
- u.onend = next
- u.onerror = () => { isReadingRef.current = false; setIsReading(false) }
+ u.rate = ttsRateRef.current
+
+ // ── Sentence-level highlight (reliable fallback) ──
+ // onstart fires on every utterance in every browser/voice combination.
+ // Highlight the whole sentence immediately so there's always visible
+ // tracking, even when word-boundary events aren't available.
+ u.onstart = () => {
+ const { pmFrom, pmTo } = chunkPmRange(text, posMap, chunk)
+ if (pmFrom !== null) {
+ applyReadingMark?.(pmFrom, pmTo)
+ requestAnimationFrame(() => {
+ editor.view.dom
+ .querySelector('.reading-word')
+ ?.scrollIntoView({ behavior: 'smooth', block: 'center' })
+ })
+ }
+ }
+
+ // ── Word-level highlight (best-effort via boundary events) ──
+ // Many Linux voices / Firefox don't fire onboundary; when they do,
+ // this overrides the sentence highlight with a tighter word highlight.
+ u.onboundary = (e) => {
+ if (e.name === 'sentence') return
+ const textIdx = chunk.startOffset + (e.charIndex ?? 0)
+ const { pmFrom, pmTo } = wordAtIndex(text, posMap, textIdx)
+ if (pmFrom !== null) {
+ applyReadingMark?.(pmFrom, pmTo)
+ requestAnimationFrame(() => {
+ editor.view.dom
+ .querySelector('.reading-word')
+ ?.scrollIntoView({ behavior: 'smooth', block: 'center' })
+ })
+ }
+ }
+
+ u.onend = next
+ u.onerror = (e) => {
+ // 'interrupted' just means cancel() was called — not a real error
+ if (e.error !== 'interrupted') {
+ isReadingRef.current = false
+ setIsReading(false)
+ clearReadingMark?.()
+ }
+ }
speechSynthesis.speak(u)
}
next()
@@ -138,6 +255,7 @@ export default function Toolbar({ editor, onImageUpload, fontSize, onFontSizeCha
isReadingRef.current = false
setIsReading(false)
speechSynthesis.cancel()
+ clearReadingMark?.()
}
if (!editor) return null
diff --git a/frontend/src/styles/index.css b/frontend/src/styles/index.css
index bcf51d3..19e09f2 100644
--- a/frontend/src/styles/index.css
+++ b/frontend/src/styles/index.css
@@ -1479,6 +1479,16 @@ button { cursor: pointer; font-family: inherit; }
50% { opacity: 0.45; }
}
+/* ── Read-aloud word highlight ────────────────────────── */
+
+/* Rendered by ReadingMark — tracks the TTS playhead word by word */
+.reading-word {
+ background: rgba(251, 191, 36, 0.35); /* warm amber */
+ border-radius: 2px;
+ outline: 1px solid rgba(251, 191, 36, 0.55);
+ outline-offset: 1px;
+}
+
/* ── Spell / Grammar Check ────────────────────────────── */
/* Wavy underlines on lint-marked text */