feat: read-aloud highlights and scrolls to current sentence/word
- Add ReadingMark TipTap extension (transient, never saved to DB) that renders the active TTS passage as <span class='reading-word'> - Build a char→ProseMirror position map on read-start so boundary events can pinpoint exact document positions - Use onstart (fires on every utterance/voice) for reliable sentence-level highlight; onboundary overrides with word-level when the voice supports it - Auto-scroll the highlighted span into view (smooth, centred) on each update - Strip readingWord marks from JSON alongside lintError before saving - Guard all mark dispatches with applyingLints flag to suppress spurious saves and lint re-checks Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
6682810c00
commit
4db65151c8
@ -27,6 +27,16 @@ const CustomImage = Image.extend({
|
|||||||
},
|
},
|
||||||
}).configure({ allowBase64: false, inline: false })
|
}).configure({ allowBase64: false, inline: false })
|
||||||
|
|
||||||
|
// ── ReadingMark — transient mark that follows the TTS playhead ────────────
|
||||||
|
const ReadingMark = Mark.create({
|
||||||
|
name: 'readingWord',
|
||||||
|
inclusive: false,
|
||||||
|
parseHTML() { return [] }, // never restore from HTML — transient UI only
|
||||||
|
renderHTML({ HTMLAttributes }) {
|
||||||
|
return ['span', mergeAttributes({ class: 'reading-word' }, HTMLAttributes), 0]
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
// ── LintMark — transient mark for spelling / grammar underlines ────────────
|
// ── LintMark — transient mark for spelling / grammar underlines ────────────
|
||||||
const LintMark = Mark.create({
|
const LintMark = Mark.create({
|
||||||
name: 'lintError',
|
name: 'lintError',
|
||||||
@ -89,12 +99,12 @@ function clearLintMarks(editor) {
|
|||||||
editor.view.dispatch(tr)
|
editor.view.dispatch(tr)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Strip lint marks from the JSON before saving (they're UI-only)
|
// Strip transient UI marks (lint, reading playhead) from JSON before saving
|
||||||
function stripLintMarks(node) {
|
function stripLintMarks(node) {
|
||||||
if (!node) return node
|
if (!node) return node
|
||||||
const n = { ...node }
|
const n = { ...node }
|
||||||
if (n.marks) {
|
if (n.marks) {
|
||||||
n.marks = n.marks.filter(m => m.type !== 'lintError')
|
n.marks = n.marks.filter(m => m.type !== 'lintError' && m.type !== 'readingWord')
|
||||||
if (!n.marks.length) delete n.marks
|
if (!n.marks.length) delete n.marks
|
||||||
}
|
}
|
||||||
if (n.content) n.content = n.content.map(stripLintMarks)
|
if (n.content) n.content = n.content.map(stripLintMarks)
|
||||||
@ -114,7 +124,7 @@ const Editor = forwardRef(function Editor(
|
|||||||
const lintStatusRef = useRef('idle')
|
const lintStatusRef = useRef('idle')
|
||||||
const lintDebounce = useRef(null)
|
const lintDebounce = useRef(null)
|
||||||
const runLintRef = useRef(null) // always holds latest runLint, safe to call from onUpdate closure
|
const runLintRef = useRef(null) // always holds latest runLint, safe to call from onUpdate closure
|
||||||
const applyingLints = useRef(false) // true while we're dispatching mark transactions — skip onUpdate
|
const applyingLints = useRef(false) // true while dispatching mark transactions — suppresses onUpdate
|
||||||
|
|
||||||
const editor = useEditor({
|
const editor = useEditor({
|
||||||
extensions: [
|
extensions: [
|
||||||
@ -122,6 +132,7 @@ const Editor = forwardRef(function Editor(
|
|||||||
Underline,
|
Underline,
|
||||||
CustomImage,
|
CustomImage,
|
||||||
LintMark,
|
LintMark,
|
||||||
|
ReadingMark,
|
||||||
Placeholder.configure({ placeholder: 'Begin your story here…' }),
|
Placeholder.configure({ placeholder: 'Begin your story here…' }),
|
||||||
CharacterCount,
|
CharacterCount,
|
||||||
TextAlign.configure({ types: ['heading', 'paragraph'] }),
|
TextAlign.configure({ types: ['heading', 'paragraph'] }),
|
||||||
@ -221,6 +232,42 @@ const Editor = forwardRef(function Editor(
|
|||||||
// Clean up the debounce timer on unmount
|
// Clean up the debounce timer on unmount
|
||||||
useEffect(() => () => clearTimeout(lintDebounce.current), [])
|
useEffect(() => () => clearTimeout(lintDebounce.current), [])
|
||||||
|
|
||||||
|
// ── Reading-word mark (TTS playhead highlight) ────────────────────────────
|
||||||
|
// Called by Toolbar on each word-boundary event. Both operations are wrapped
|
||||||
|
// in applyingLints so the resulting onUpdate dispatch is silently ignored —
|
||||||
|
// the same guard that protects lint mark dispatches also covers these.
|
||||||
|
const applyReadingMark = useCallback((from, to) => {
|
||||||
|
if (!editor) return
|
||||||
|
const mt = editor.state.schema.marks.readingWord
|
||||||
|
if (!mt) return
|
||||||
|
const { tr } = editor.state
|
||||||
|
// Clear any existing reading mark in one shot then set the new one
|
||||||
|
editor.state.doc.descendants((node, pos) => {
|
||||||
|
if (!node.isText) return
|
||||||
|
node.marks.filter(m => m.type === mt)
|
||||||
|
.forEach(() => tr.removeMark(pos, pos + node.nodeSize, mt))
|
||||||
|
})
|
||||||
|
tr.addMark(from, to, mt.create())
|
||||||
|
applyingLints.current = true
|
||||||
|
editor.view.dispatch(tr)
|
||||||
|
applyingLints.current = false
|
||||||
|
}, [editor])
|
||||||
|
|
||||||
|
const clearReadingMark = useCallback(() => {
|
||||||
|
if (!editor) return
|
||||||
|
const mt = editor.state.schema.marks.readingWord
|
||||||
|
if (!mt) return
|
||||||
|
const { tr } = editor.state
|
||||||
|
editor.state.doc.descendants((node, pos) => {
|
||||||
|
if (!node.isText) return
|
||||||
|
node.marks.filter(m => m.type === mt)
|
||||||
|
.forEach(() => tr.removeMark(pos, pos + node.nodeSize, mt))
|
||||||
|
})
|
||||||
|
applyingLints.current = true
|
||||||
|
editor.view.dispatch(tr)
|
||||||
|
applyingLints.current = false
|
||||||
|
}, [editor])
|
||||||
|
|
||||||
// ── Popover on clicking a lint mark ──────────────────────────────────────
|
// ── Popover on clicking a lint mark ──────────────────────────────────────
|
||||||
function handleEditorClick(e) {
|
function handleEditorClick(e) {
|
||||||
if (!editor) return
|
if (!editor) return
|
||||||
@ -281,6 +328,8 @@ const Editor = forwardRef(function Editor(
|
|||||||
onLint={runLint}
|
onLint={runLint}
|
||||||
lintStatus={lintStatus}
|
lintStatus={lintStatus}
|
||||||
lintCount={lintCount}
|
lintCount={lintCount}
|
||||||
|
applyReadingMark={applyReadingMark}
|
||||||
|
clearReadingMark={clearReadingMark}
|
||||||
/>
|
/>
|
||||||
<div className="editor-wrap" onClick={handleEditorClick}>
|
<div className="editor-wrap" onClick={handleEditorClick}>
|
||||||
<EditorContent editor={editor} className="editor-body" spellCheck={false} />
|
<EditorContent editor={editor} className="editor-body" spellCheck={false} />
|
||||||
|
|||||||
@ -1,6 +1,73 @@
|
|||||||
import { useRef, useState, useEffect, useCallback } from 'react'
|
import { useRef, useState, useEffect, useCallback } from 'react'
|
||||||
|
|
||||||
export default function Toolbar({ editor, onImageUpload, fontSize, onFontSizeChange, onLint, lintStatus, lintCount }) {
|
// ── TTS position utilities ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
// Build a flat character array + parallel ProseMirror-position map for the
|
||||||
|
// document range [from, to). Positions are absolute (doc-level).
|
||||||
|
// Block boundaries get a '\n' character with posMap entry = null.
|
||||||
|
function buildReadingMap(doc, from, to) {
|
||||||
|
const chars = [], posMap = []
|
||||||
|
doc.nodesBetween(from, to, (node, pos) => {
|
||||||
|
if (node.isText) {
|
||||||
|
for (let i = 0; i < node.text.length; i++) {
|
||||||
|
const absPos = pos + i
|
||||||
|
if (absPos >= from && absPos < to) {
|
||||||
|
chars.push(node.text[i])
|
||||||
|
posMap.push(absPos)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false // text nodes have no children
|
||||||
|
}
|
||||||
|
// Insert a newline separator between block nodes
|
||||||
|
if (node.isBlock && chars.length > 0 && chars[chars.length - 1] !== '\n') {
|
||||||
|
chars.push('\n')
|
||||||
|
posMap.push(null)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return { text: chars.join(''), posMap }
|
||||||
|
}
|
||||||
|
|
||||||
|
// Given charIdx (an index into text/posMap), return the PM [from, to) span
|
||||||
|
// of the word that contains that character.
|
||||||
|
function wordAtIndex(text, posMap, charIdx) {
|
||||||
|
if (charIdx < 0 || charIdx >= text.length || !/\w/.test(text[charIdx])) return { pmFrom: null, pmTo: null }
|
||||||
|
// Expand backward to word start
|
||||||
|
let start = charIdx
|
||||||
|
while (start > 0 && /\w/.test(text[start - 1])) start--
|
||||||
|
// Expand forward to word end
|
||||||
|
let end = charIdx
|
||||||
|
while (end < text.length && /\w/.test(text[end])) end++
|
||||||
|
if (start === end) return { pmFrom: null, pmTo: null }
|
||||||
|
// Map character span → PM positions
|
||||||
|
let pmFrom = null, pmTo = null
|
||||||
|
for (let i = start; i < end; i++) {
|
||||||
|
if (posMap[i] !== null) {
|
||||||
|
if (pmFrom === null) pmFrom = posMap[i]
|
||||||
|
pmTo = posMap[i] + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return { pmFrom, pmTo }
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the PM span for an entire chunk (used for sentence-level fallback highlight).
|
||||||
|
// Trims trailing punctuation/whitespace so the highlight ends at the last real word.
|
||||||
|
function chunkPmRange(text, posMap, chunk) {
|
||||||
|
const start = chunk.startOffset
|
||||||
|
let end = start + chunk.text.length
|
||||||
|
while (end > start && !/\w/.test(text[end - 1])) end--
|
||||||
|
let pmFrom = null, pmTo = null
|
||||||
|
for (let i = start; i < end && i < posMap.length; i++) {
|
||||||
|
if (posMap[i] !== null) {
|
||||||
|
if (pmFrom === null) pmFrom = posMap[i]
|
||||||
|
pmTo = posMap[i] + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return { pmFrom, pmTo }
|
||||||
|
}
|
||||||
|
|
||||||
|
// ──────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export default function Toolbar({ editor, onImageUpload, fontSize, onFontSizeChange, onLint, lintStatus, lintCount, applyReadingMark, clearReadingMark }) {
|
||||||
const fileRef = useRef()
|
const fileRef = useRef()
|
||||||
const [isReading, setIsReading] = useState(false)
|
const [isReading, setIsReading] = useState(false)
|
||||||
const isReadingRef = useRef(false) // ref so closure in next() always sees current value
|
const isReadingRef = useRef(false) // ref so closure in next() always sees current value
|
||||||
@ -98,20 +165,27 @@ export default function Toolbar({ editor, onImageUpload, fontSize, onFontSizeCha
|
|||||||
function startReading() {
|
function startReading() {
|
||||||
if (!('speechSynthesis' in window)) return
|
if (!('speechSynthesis' in window)) return
|
||||||
|
|
||||||
// Grab text from cursor (or selection start) to end of document
|
|
||||||
const { from } = editor.state.selection
|
const { from } = editor.state.selection
|
||||||
const end = editor.state.doc.content.size
|
const end = editor.state.doc.content.size
|
||||||
const text = editor.state.doc.textBetween(from, end, '\n', ' ').trim()
|
|
||||||
if (!text) return
|
|
||||||
|
|
||||||
// Split into sentences — Chrome stops an utterance after ~15 s if it's too long
|
// Build char array + PM-position map so boundary events can pinpoint words
|
||||||
const chunks = (
|
const { text, posMap } = buildReadingMap(editor.state.doc, from, end)
|
||||||
text.match(/[^.!?…]+[.!?…]*['"'"]?\s*/g)
|
|
||||||
?.map(s => s.trim())
|
|
||||||
.filter(Boolean)
|
|
||||||
) || [text]
|
|
||||||
|
|
||||||
speechSynthesis.cancel() // clear any leftover utterance
|
// Split into sentence-sized chunks (Chrome drops utterances > ~15 s).
|
||||||
|
// Track each chunk's start offset in `text` so onboundary charIndex can be
|
||||||
|
// translated back to an absolute position in the posMap.
|
||||||
|
const chunks = []
|
||||||
|
let m
|
||||||
|
const re = /[^.!?…]+[.!?…]*['"'"]?\s*/g
|
||||||
|
while ((m = re.exec(text)) !== null) {
|
||||||
|
if (m[0].trim()) chunks.push({ text: m[0], startOffset: m.index })
|
||||||
|
}
|
||||||
|
if (!chunks.length) {
|
||||||
|
if (text.trim()) chunks.push({ text, startOffset: 0 })
|
||||||
|
else return
|
||||||
|
}
|
||||||
|
|
||||||
|
speechSynthesis.cancel()
|
||||||
isReadingRef.current = true
|
isReadingRef.current = true
|
||||||
setIsReading(true)
|
setIsReading(true)
|
||||||
|
|
||||||
@ -122,13 +196,56 @@ export default function Toolbar({ editor, onImageUpload, fontSize, onFontSizeCha
|
|||||||
if (!isReadingRef.current || idx >= chunks.length) {
|
if (!isReadingRef.current || idx >= chunks.length) {
|
||||||
isReadingRef.current = false
|
isReadingRef.current = false
|
||||||
setIsReading(false)
|
setIsReading(false)
|
||||||
|
clearReadingMark?.()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
const u = new SpeechSynthesisUtterance(chunks[idx++])
|
const chunk = chunks[idx++]
|
||||||
|
const u = new SpeechSynthesisUtterance(chunk.text)
|
||||||
if (chosenVoice) u.voice = chosenVoice
|
if (chosenVoice) u.voice = chosenVoice
|
||||||
u.rate = ttsRateRef.current
|
u.rate = ttsRateRef.current
|
||||||
|
|
||||||
|
// ── Sentence-level highlight (reliable fallback) ──
|
||||||
|
// onstart fires on every utterance in every browser/voice combination.
|
||||||
|
// Highlight the whole sentence immediately so there's always visible
|
||||||
|
// tracking, even when word-boundary events aren't available.
|
||||||
|
u.onstart = () => {
|
||||||
|
const { pmFrom, pmTo } = chunkPmRange(text, posMap, chunk)
|
||||||
|
if (pmFrom !== null) {
|
||||||
|
applyReadingMark?.(pmFrom, pmTo)
|
||||||
|
requestAnimationFrame(() => {
|
||||||
|
editor.view.dom
|
||||||
|
.querySelector('.reading-word')
|
||||||
|
?.scrollIntoView({ behavior: 'smooth', block: 'center' })
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Word-level highlight (best-effort via boundary events) ──
|
||||||
|
// Many Linux voices / Firefox don't fire onboundary; when they do,
|
||||||
|
// this overrides the sentence highlight with a tighter word highlight.
|
||||||
|
u.onboundary = (e) => {
|
||||||
|
if (e.name === 'sentence') return
|
||||||
|
const textIdx = chunk.startOffset + (e.charIndex ?? 0)
|
||||||
|
const { pmFrom, pmTo } = wordAtIndex(text, posMap, textIdx)
|
||||||
|
if (pmFrom !== null) {
|
||||||
|
applyReadingMark?.(pmFrom, pmTo)
|
||||||
|
requestAnimationFrame(() => {
|
||||||
|
editor.view.dom
|
||||||
|
.querySelector('.reading-word')
|
||||||
|
?.scrollIntoView({ behavior: 'smooth', block: 'center' })
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
u.onend = next
|
u.onend = next
|
||||||
u.onerror = () => { isReadingRef.current = false; setIsReading(false) }
|
u.onerror = (e) => {
|
||||||
|
// 'interrupted' just means cancel() was called — not a real error
|
||||||
|
if (e.error !== 'interrupted') {
|
||||||
|
isReadingRef.current = false
|
||||||
|
setIsReading(false)
|
||||||
|
clearReadingMark?.()
|
||||||
|
}
|
||||||
|
}
|
||||||
speechSynthesis.speak(u)
|
speechSynthesis.speak(u)
|
||||||
}
|
}
|
||||||
next()
|
next()
|
||||||
@ -138,6 +255,7 @@ export default function Toolbar({ editor, onImageUpload, fontSize, onFontSizeCha
|
|||||||
isReadingRef.current = false
|
isReadingRef.current = false
|
||||||
setIsReading(false)
|
setIsReading(false)
|
||||||
speechSynthesis.cancel()
|
speechSynthesis.cancel()
|
||||||
|
clearReadingMark?.()
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!editor) return null
|
if (!editor) return null
|
||||||
|
|||||||
@ -1479,6 +1479,16 @@ button { cursor: pointer; font-family: inherit; }
|
|||||||
50% { opacity: 0.45; }
|
50% { opacity: 0.45; }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ── Read-aloud word highlight ────────────────────────── */
|
||||||
|
|
||||||
|
/* Rendered by ReadingMark — tracks the TTS playhead word by word */
|
||||||
|
.reading-word {
|
||||||
|
background: rgba(251, 191, 36, 0.35); /* warm amber */
|
||||||
|
border-radius: 2px;
|
||||||
|
outline: 1px solid rgba(251, 191, 36, 0.55);
|
||||||
|
outline-offset: 1px;
|
||||||
|
}
|
||||||
|
|
||||||
/* ── Spell / Grammar Check ────────────────────────────── */
|
/* ── Spell / Grammar Check ────────────────────────────── */
|
||||||
|
|
||||||
/* Wavy underlines on lint-marked text */
|
/* Wavy underlines on lint-marked text */
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user