diff --git a/frontend/src/components/Editor.jsx b/frontend/src/components/Editor.jsx index ac25046..5cb550d 100644 --- a/frontend/src/components/Editor.jsx +++ b/frontend/src/components/Editor.jsx @@ -27,6 +27,16 @@ const CustomImage = Image.extend({ }, }).configure({ allowBase64: false, inline: false }) +// ── ReadingMark — transient mark that follows the TTS playhead ──────────── +const ReadingMark = Mark.create({ + name: 'readingWord', + inclusive: false, + parseHTML() { return [] }, // never restore from HTML — transient UI only + renderHTML({ HTMLAttributes }) { + return ['span', mergeAttributes({ class: 'reading-word' }, HTMLAttributes), 0] + }, +}) + // ── LintMark — transient mark for spelling / grammar underlines ──────────── const LintMark = Mark.create({ name: 'lintError', @@ -89,12 +99,12 @@ function clearLintMarks(editor) { editor.view.dispatch(tr) } -// Strip lint marks from the JSON before saving (they're UI-only) +// Strip transient UI marks (lint, reading playhead) from JSON before saving function stripLintMarks(node) { if (!node) return node const n = { ...node } if (n.marks) { - n.marks = n.marks.filter(m => m.type !== 'lintError') + n.marks = n.marks.filter(m => m.type !== 'lintError' && m.type !== 'readingWord') if (!n.marks.length) delete n.marks } if (n.content) n.content = n.content.map(stripLintMarks) @@ -114,7 +124,7 @@ const Editor = forwardRef(function Editor( const lintStatusRef = useRef('idle') const lintDebounce = useRef(null) const runLintRef = useRef(null) // always holds latest runLint, safe to call from onUpdate closure - const applyingLints = useRef(false) // true while we're dispatching mark transactions — skip onUpdate + const applyingLints = useRef(false) // true while dispatching mark transactions — suppresses onUpdate const editor = useEditor({ extensions: [ @@ -122,6 +132,7 @@ const Editor = forwardRef(function Editor( Underline, CustomImage, LintMark, + ReadingMark, Placeholder.configure({ placeholder: 'Begin your story here…' }), CharacterCount, TextAlign.configure({ types: ['heading', 'paragraph'] }), @@ -221,6 +232,42 @@ const Editor = forwardRef(function Editor( // Clean up the debounce timer on unmount useEffect(() => () => clearTimeout(lintDebounce.current), []) + // ── Reading-word mark (TTS playhead highlight) ──────────────────────────── + // Called by Toolbar on each word-boundary event. Both operations are wrapped + // in applyingLints so the resulting onUpdate dispatch is silently ignored — + // the same guard that protects lint mark dispatches also covers these. + const applyReadingMark = useCallback((from, to) => { + if (!editor) return + const mt = editor.state.schema.marks.readingWord + if (!mt) return + const { tr } = editor.state + // Clear any existing reading mark in one shot then set the new one + editor.state.doc.descendants((node, pos) => { + if (!node.isText) return + node.marks.filter(m => m.type === mt) + .forEach(() => tr.removeMark(pos, pos + node.nodeSize, mt)) + }) + tr.addMark(from, to, mt.create()) + applyingLints.current = true + editor.view.dispatch(tr) + applyingLints.current = false + }, [editor]) + + const clearReadingMark = useCallback(() => { + if (!editor) return + const mt = editor.state.schema.marks.readingWord + if (!mt) return + const { tr } = editor.state + editor.state.doc.descendants((node, pos) => { + if (!node.isText) return + node.marks.filter(m => m.type === mt) + .forEach(() => tr.removeMark(pos, pos + node.nodeSize, mt)) + }) + applyingLints.current = true + editor.view.dispatch(tr) + applyingLints.current = false + }, [editor]) + // ── Popover on clicking a lint mark ────────────────────────────────────── function handleEditorClick(e) { if (!editor) return @@ -281,6 +328,8 @@ const Editor = forwardRef(function Editor( onLint={runLint} lintStatus={lintStatus} lintCount={lintCount} + applyReadingMark={applyReadingMark} + clearReadingMark={clearReadingMark} />
diff --git a/frontend/src/components/Toolbar.jsx b/frontend/src/components/Toolbar.jsx index 59a53e2..8bfd187 100644 --- a/frontend/src/components/Toolbar.jsx +++ b/frontend/src/components/Toolbar.jsx @@ -1,6 +1,73 @@ import { useRef, useState, useEffect, useCallback } from 'react' -export default function Toolbar({ editor, onImageUpload, fontSize, onFontSizeChange, onLint, lintStatus, lintCount }) { +// ── TTS position utilities ───────────────────────────────────────────────── + +// Build a flat character array + parallel ProseMirror-position map for the +// document range [from, to). Positions are absolute (doc-level). +// Block boundaries get a '\n' character with posMap entry = null. +function buildReadingMap(doc, from, to) { + const chars = [], posMap = [] + doc.nodesBetween(from, to, (node, pos) => { + if (node.isText) { + for (let i = 0; i < node.text.length; i++) { + const absPos = pos + i + if (absPos >= from && absPos < to) { + chars.push(node.text[i]) + posMap.push(absPos) + } + } + return false // text nodes have no children + } + // Insert a newline separator between block nodes + if (node.isBlock && chars.length > 0 && chars[chars.length - 1] !== '\n') { + chars.push('\n') + posMap.push(null) + } + }) + return { text: chars.join(''), posMap } +} + +// Given charIdx (an index into text/posMap), return the PM [from, to) span +// of the word that contains that character. +function wordAtIndex(text, posMap, charIdx) { + if (charIdx < 0 || charIdx >= text.length || !/\w/.test(text[charIdx])) return { pmFrom: null, pmTo: null } + // Expand backward to word start + let start = charIdx + while (start > 0 && /\w/.test(text[start - 1])) start-- + // Expand forward to word end + let end = charIdx + while (end < text.length && /\w/.test(text[end])) end++ + if (start === end) return { pmFrom: null, pmTo: null } + // Map character span → PM positions + let pmFrom = null, pmTo = null + for (let i = start; i < end; i++) { + if (posMap[i] !== null) { + if (pmFrom === null) pmFrom = posMap[i] + pmTo = posMap[i] + 1 + } + } + return { pmFrom, pmTo } +} + +// Return the PM span for an entire chunk (used for sentence-level fallback highlight). +// Trims trailing punctuation/whitespace so the highlight ends at the last real word. +function chunkPmRange(text, posMap, chunk) { + const start = chunk.startOffset + let end = start + chunk.text.length + while (end > start && !/\w/.test(text[end - 1])) end-- + let pmFrom = null, pmTo = null + for (let i = start; i < end && i < posMap.length; i++) { + if (posMap[i] !== null) { + if (pmFrom === null) pmFrom = posMap[i] + pmTo = posMap[i] + 1 + } + } + return { pmFrom, pmTo } +} + +// ────────────────────────────────────────────────────────────────────────── + +export default function Toolbar({ editor, onImageUpload, fontSize, onFontSizeChange, onLint, lintStatus, lintCount, applyReadingMark, clearReadingMark }) { const fileRef = useRef() const [isReading, setIsReading] = useState(false) const isReadingRef = useRef(false) // ref so closure in next() always sees current value @@ -98,20 +165,27 @@ export default function Toolbar({ editor, onImageUpload, fontSize, onFontSizeCha function startReading() { if (!('speechSynthesis' in window)) return - // Grab text from cursor (or selection start) to end of document const { from } = editor.state.selection - const end = editor.state.doc.content.size - const text = editor.state.doc.textBetween(from, end, '\n', ' ').trim() - if (!text) return + const end = editor.state.doc.content.size - // Split into sentences — Chrome stops an utterance after ~15 s if it's too long - const chunks = ( - text.match(/[^.!?…]+[.!?…]*['"'"]?\s*/g) - ?.map(s => s.trim()) - .filter(Boolean) - ) || [text] + // Build char array + PM-position map so boundary events can pinpoint words + const { text, posMap } = buildReadingMap(editor.state.doc, from, end) - speechSynthesis.cancel() // clear any leftover utterance + // Split into sentence-sized chunks (Chrome drops utterances > ~15 s). + // Track each chunk's start offset in `text` so onboundary charIndex can be + // translated back to an absolute position in the posMap. + const chunks = [] + let m + const re = /[^.!?…]+[.!?…]*['"'"]?\s*/g + while ((m = re.exec(text)) !== null) { + if (m[0].trim()) chunks.push({ text: m[0], startOffset: m.index }) + } + if (!chunks.length) { + if (text.trim()) chunks.push({ text, startOffset: 0 }) + else return + } + + speechSynthesis.cancel() isReadingRef.current = true setIsReading(true) @@ -122,13 +196,56 @@ export default function Toolbar({ editor, onImageUpload, fontSize, onFontSizeCha if (!isReadingRef.current || idx >= chunks.length) { isReadingRef.current = false setIsReading(false) + clearReadingMark?.() return } - const u = new SpeechSynthesisUtterance(chunks[idx++]) + const chunk = chunks[idx++] + const u = new SpeechSynthesisUtterance(chunk.text) if (chosenVoice) u.voice = chosenVoice - u.rate = ttsRateRef.current - u.onend = next - u.onerror = () => { isReadingRef.current = false; setIsReading(false) } + u.rate = ttsRateRef.current + + // ── Sentence-level highlight (reliable fallback) ── + // onstart fires on every utterance in every browser/voice combination. + // Highlight the whole sentence immediately so there's always visible + // tracking, even when word-boundary events aren't available. + u.onstart = () => { + const { pmFrom, pmTo } = chunkPmRange(text, posMap, chunk) + if (pmFrom !== null) { + applyReadingMark?.(pmFrom, pmTo) + requestAnimationFrame(() => { + editor.view.dom + .querySelector('.reading-word') + ?.scrollIntoView({ behavior: 'smooth', block: 'center' }) + }) + } + } + + // ── Word-level highlight (best-effort via boundary events) ── + // Many Linux voices / Firefox don't fire onboundary; when they do, + // this overrides the sentence highlight with a tighter word highlight. + u.onboundary = (e) => { + if (e.name === 'sentence') return + const textIdx = chunk.startOffset + (e.charIndex ?? 0) + const { pmFrom, pmTo } = wordAtIndex(text, posMap, textIdx) + if (pmFrom !== null) { + applyReadingMark?.(pmFrom, pmTo) + requestAnimationFrame(() => { + editor.view.dom + .querySelector('.reading-word') + ?.scrollIntoView({ behavior: 'smooth', block: 'center' }) + }) + } + } + + u.onend = next + u.onerror = (e) => { + // 'interrupted' just means cancel() was called — not a real error + if (e.error !== 'interrupted') { + isReadingRef.current = false + setIsReading(false) + clearReadingMark?.() + } + } speechSynthesis.speak(u) } next() @@ -138,6 +255,7 @@ export default function Toolbar({ editor, onImageUpload, fontSize, onFontSizeCha isReadingRef.current = false setIsReading(false) speechSynthesis.cancel() + clearReadingMark?.() } if (!editor) return null diff --git a/frontend/src/styles/index.css b/frontend/src/styles/index.css index bcf51d3..19e09f2 100644 --- a/frontend/src/styles/index.css +++ b/frontend/src/styles/index.css @@ -1479,6 +1479,16 @@ button { cursor: pointer; font-family: inherit; } 50% { opacity: 0.45; } } +/* ── Read-aloud word highlight ────────────────────────── */ + +/* Rendered by ReadingMark — tracks the TTS playhead word by word */ +.reading-word { + background: rgba(251, 191, 36, 0.35); /* warm amber */ + border-radius: 2px; + outline: 1px solid rgba(251, 191, 36, 0.55); + outline-offset: 1px; +} + /* ── Spell / Grammar Check ────────────────────────────── */ /* Wavy underlines on lint-marked text */