import { action, computed, IReactionDisposer, makeObservable, observable, ObservableMap, reaction, runInAction } from 'mobx'; import { observer } from 'mobx-react'; import * as Pdfjs from 'pdfjs-dist'; import { GlobalWorkerOptions } from 'pdfjs-dist/build/pdf.mjs'; import * as PDFJSViewer from 'pdfjs-dist/web/pdf_viewer.mjs'; import * as React from 'react'; import ReactLoading from 'react-loading'; import { addStyleSheet, addStyleSheetRule, clearStyleSheetRules, ClientUtils, returnAll, returnFalse, returnNone, returnZero, smoothScroll } from '../../../ClientUtils'; import { CreateLinkToActiveAudio, Doc, DocListCast, Opt } from '../../../fields/Doc'; import { DocData, Height } from '../../../fields/DocSymbols'; import { Id } from '../../../fields/FieldSymbols'; import { InkTool } from '../../../fields/InkField'; import { Cast, NumCast, StrCast } from '../../../fields/Types'; import { TraceMobx } from '../../../fields/util'; import { emptyFunction, numberRange, unimplementedFunction } from '../../../Utils'; import { DocUtils } from '../../documents/DocUtils'; import { SnappingManager } from '../../util/SnappingManager'; import { Transform } from '../../util/Transform'; import { MarqueeOptionsMenu } from '../collections/collectionFreeForm'; import { CollectionFreeFormView } from '../collections/collectionFreeForm/CollectionFreeFormView'; import { MarqueeAnnotator } from '../MarqueeAnnotator'; import { DocumentView } from '../nodes/DocumentView'; import { FieldViewProps } from '../nodes/FieldView'; import { FocusViewOptions } from '../nodes/FocusViewOptions'; import { LinkInfo } from '../nodes/LinkDocPreview'; import { PDFBox } from '../nodes/PDFBox'; import { ObservableReactComponent } from '../ObservableReactComponent'; import { StyleProp } from '../StyleProp'; import { AnchorMenu } from './AnchorMenu'; import { Annotation } from './Annotation'; import { GPTPopup } from './GPTPopup/GPTPopup'; import './PDFViewer.scss'; import { DocumentViewProps } from '../nodes/DocumentContentsView'; if (window?.Worker) GlobalWorkerOptions.workerSrc = 'files/node_modules/pdfjs-dist/build/pdf.worker.min.mjs'; // npm start/etc use copyfiles to copy the worker from the pdfjs-dist package to the public folder export * from 'pdfjs-dist/build/pdf.mjs'; interface IViewerProps extends FieldViewProps { pdfBox: PDFBox; Doc: Doc; dataDoc: Doc; layoutDoc: Doc; fieldKey: string; pdf: Pdfjs.PDFDocumentProxy; url: string; sidebarAddDoc: (doc: Doc | Doc[], sidebarKey?: string | undefined) => boolean; loaded: (p: { width: number; height: number }, pages: number) => void; // eslint-disable-next-line no-use-before-define setPdfViewer: (view: PDFViewer) => void; anchorMenuClick?: () => undefined | ((anchor: Doc) => void); crop: (region: Doc | undefined, addCrop?: boolean) => Doc | undefined; } // Add this type definition right after the existing imports interface FuzzySearchResult { pageIndex: number; matchIndex: number; text: string; score?: number; isParagraph?: boolean; } /** * Handles rendering and virtualization of the pdf */ @observer export class PDFViewer extends ObservableReactComponent { static _annotationStyle = addStyleSheet().sheet; constructor(props: IViewerProps) { super(props); makeObservable(this); } @observable _pageSizes: { width: number; height: number }[] = []; @observable _savedAnnotations = new ObservableMap(); @observable _textSelecting = true; @observable _showWaiting = true; @observable Index: number = -1; @observable private _loading = false; @observable private _fuzzySearchEnabled = true; @observable private _fuzzySearchResults: FuzzySearchResult[] = []; @observable private _currentFuzzyMatchIndex = 0; private _pdfViewer!: PDFJSViewer.PDFViewer; private _styleRule: number | undefined; // stylesheet rule for making hyperlinks clickable private _retries = 0; // number of times tried to create the PDF viewer private _setPreviewCursor: undefined | ((x: number, y: number, drag: boolean, hide: boolean, doc: Opt) => void); private _marqueeref = React.createRef(); private _annotationLayer: React.RefObject = React.createRef(); private _disposers: { [name: string]: IReactionDisposer } = {}; private _viewer: React.RefObject = React.createRef(); _mainCont: React.RefObject = React.createRef(); private _selectionText: string = ''; private _selectionContent: DocumentFragment | undefined; private _downX: number = 0; private _downY: number = 0; private _lastSearch = false; private _viewerIsSetup = false; private _ignoreScroll = false; private _initialScroll: { loc: Opt; easeFunc: 'linear' | 'ease' | undefined } | undefined; private _forcedScroll = true; _getAnchor: (savedAnnotations: Opt>, addAsAnnotation: boolean) => Opt = () => undefined; selectionText = () => this._selectionText; selectionContent = () => this._selectionContent; @observable isAnnotating = false; // key where data is stored @computed get allAnnotations() { return DocUtils.FilterDocs(DocListCast(this._props.dataDoc[this._props.fieldKey + '_annotations']), this._props.childFilters(), this._props.childFiltersByRanges()); } @computed get inlineTextAnnotations() { return this.allAnnotations.filter(a => a.text_inlineAnnotations); } componentDidMount() { runInAction(() => { this._showWaiting = true; }); this.setupPdfJsViewer(); this._mainCont.current?.addEventListener('scroll', e => { (e.target as HTMLElement).scrollLeft = 0; }); this._disposers.layout_autoHeight = reaction( () => this._props.layoutDoc._layout_autoHeight, layoutAutoHeight => { if (layoutAutoHeight) { this._props.layoutDoc._nativeHeight = NumCast(this._props.Doc[this._props.fieldKey + '_nativeHeight']); this._props.setHeight?.(NumCast(this._props.Doc[this._props.fieldKey + '_nativeHeight']) * (this._props.NativeDimScaling?.() || 1)); } } ); this._disposers.selected = reaction( () => this._props.isSelected(), () => DocumentView.Selected().length === 1 && this.setupPdfJsViewer(), { fireImmediately: true } ); this._disposers.curPage = reaction( () => Cast(this._props.Doc._layout_curPage, 'number', null), page => page !== undefined && page !== this._pdfViewer?.currentPageNumber && this.gotoPage(page), { fireImmediately: true } ); } componentWillUnmount = () => { Object.values(this._disposers).forEach(disposer => disposer?.()); document.removeEventListener('copy', this.copy, true); }; copy = (e: ClipboardEvent) => { if (this._props.isContentActive() && e.clipboardData) { e.clipboardData.setData('text/plain', this._selectionText); const anchor = this._getAnchor(undefined, false); if (anchor) { anchor.textCopied = true; e.clipboardData.setData('dash/pdfAnchor', anchor[DocData][Id]); } e.preventDefault(); e.stopPropagation(); } }; @computed get _scrollHeight() { return this._pageSizes.reduce((size, page) => size + page.height, 0) / window.devicePixelRatio; } initialLoad = () => { if (this._pageSizes.length === 0) { const devicePixelRatio = window.devicePixelRatio; document.documentElement?.style.setProperty('--devicePixelRatio', window.devicePixelRatio.toString()); // set so that css can use this to adjust various PDFJs divs Promise.all(numberRange(this._props.pdf.numPages).map(i => this._props.pdf.getPage(i + 1).then(page => page.getViewport({ scale: devicePixelRatio })))).then( action(pages => { this._pageSizes = pages; this._props.loaded(pages[0], this._props.pdf.numPages); this.createPdfViewer(); }) ); } }; _scrollStopper: undefined | (() => void); // scrolls to focus on a nested annotation document. if this is part a link preview then it will jump to the scroll location, // otherwise it will scroll smoothly. scrollFocus = (doc: Doc, scrollTop: number, options: FocusViewOptions) => { const mainCont = this._mainCont.current; let focusSpeed: Opt; if (doc !== this._props.Doc && mainCont) { const windowHeight = this._props.PanelHeight() / (this._props.NativeDimScaling?.() || 1); const scrollTo = ClientUtils.scrollIntoView(scrollTop, doc[Height](), NumCast(this._props.layoutDoc._layout_scrollTop), windowHeight, windowHeight * 0.1, this._scrollHeight); if (scrollTo !== undefined && scrollTo !== this._props.layoutDoc._layout_scrollTop) { if (!this._pdfViewer) this._initialScroll = { loc: scrollTo, easeFunc: options.easeFunc }; else if (!options.instant) this._scrollStopper = smoothScroll((focusSpeed = options.zoomTime ?? 500), mainCont, scrollTo, options.easeFunc, this._scrollStopper); else this._mainCont.current?.scrollTo({ top: Math.abs(scrollTo || 0) }); } } else { this._initialScroll = { loc: NumCast(this._props.layoutDoc._layout_scrollTop), easeFunc: options.easeFunc }; } return focusSpeed; }; crop = (region: Doc | undefined, addCrop?: boolean) => this._props.crop(region, addCrop); @action setupPdfJsViewer = () => { if (this._viewerIsSetup) return; this._viewerIsSetup = true; this._showWaiting = true; this._props.setPdfViewer(this); this.initialLoad(); }; pagesinit = () => { document.removeEventListener('pagesinit', this.pagesinit); let quickScroll: { loc?: string; easeFunc?: 'ease' | 'linear' } | undefined = { loc: this._initialScroll ? this._initialScroll.loc?.toString() : '', easeFunc: this._initialScroll ? this._initialScroll.easeFunc : undefined }; this._disposers.scale = reaction( () => NumCast(this._props.layoutDoc._freeform_scale, 1), scale => { this._pdfViewer.currentScaleValue = scale + ''; }, { fireImmediately: true } ); this._disposers.scroll = reaction( () => Math.abs(NumCast(this._props.Doc._layout_scrollTop)), pos => { if (!this._ignoreScroll) { this._showWaiting && this.setupPdfJsViewer(); const viewTrans = quickScroll?.loc ?? StrCast(this._props.Doc._viewTransition); const durationMiliStr = viewTrans.match(/([0-9]*)ms/); const durationSecStr = viewTrans.match(/([0-9.]*)s/); const duration = durationMiliStr ? Number(durationMiliStr[1]) : durationSecStr ? Number(durationSecStr[1]) * 1000 : 0; this._forcedScroll = true; if (duration) { setTimeout( () => { this._mainCont.current && (this._scrollStopper = smoothScroll(duration, this._mainCont.current, pos, this._initialScroll?.easeFunc ?? 'ease', this._scrollStopper)); setTimeout(() => { this._forcedScroll = false; }, duration); }, this._mainCont.current ? 0 : 250 ); // wait for mainCont and try again to scroll } else { this._mainCont.current?.scrollTo({ top: pos }); this._forcedScroll = false; } } }, { fireImmediately: true } ); quickScroll = undefined; if (this._initialScroll !== undefined && this._mainCont.current) { this._mainCont.current?.scrollTo({ top: Math.abs(this._initialScroll?.loc || 0) }); this._initialScroll = undefined; } }; createPdfViewer() { if (!this._mainCont.current) { // bcz: I don't think this is ever triggered or needed console.log('PDFViewer- I guess we got here'); if (this._retries < 5) { this._retries++; console.log('PDFViewer- retry num:' + this._retries); setTimeout(() => this.createPdfViewer(), 1000); } return; } document.removeEventListener('copy', this.copy, true); document.addEventListener('copy', this.copy, true); const eventBus = new PDFJSViewer.EventBus(); eventBus._on('pagesinit', this.pagesinit); eventBus._on('pagerendered',action(() => (this._showWaiting = false))); // prettier-ignore const pdfLinkService = new PDFJSViewer.PDFLinkService({ eventBus }); const pdfFindController = new PDFJSViewer.PDFFindController({ linkService: pdfLinkService, eventBus }); this._pdfViewer = new PDFJSViewer.PDFViewer({ container: this._mainCont.current, viewer: this._viewer.current || undefined, linkService: pdfLinkService, findController: pdfFindController, eventBus, }); pdfLinkService.setViewer(this._pdfViewer); pdfLinkService.setDocument(this._props.pdf, null); this._pdfViewer.setDocument(this._props.pdf); } @action prevAnnotation = () => { this.Index = Math.max(this.Index - 1, 0); this.scrollToAnnotation(this.allAnnotations.sort((a, b) => NumCast(a.y) - NumCast(b.y))[this.Index]); }; @action nextAnnotation = () => { this.Index = Math.min(this.Index + 1, this.allAnnotations.length - 1); this.scrollToAnnotation(this.allAnnotations.sort((a, b) => NumCast(a.y) - NumCast(b.y))[this.Index]); }; @action gotoPage = (p: number) => { this._pdfViewer?.scrollPageIntoView({ pageNumber: Math.min(Math.max(1, p), this._pageSizes.length) }); }; @action scrollToAnnotation = (scrollToAnnotation: Doc) => { if (scrollToAnnotation) { this.scrollFocus(scrollToAnnotation, NumCast(scrollToAnnotation.y), { zoomTime: 500 }); Doc.linkFollowHighlight(scrollToAnnotation); } }; @observable private _scrollTimer: NodeJS.Timeout | undefined = undefined; onScroll = () => { if (this._mainCont.current && !this._forcedScroll) { this._ignoreScroll = true; // the pdf scrolled, so we need to tell the Doc to scroll but we don't want the doc to then try to set the PDF scroll pos (which would interfere with the smooth scroll animation) if (!LinkInfo.Instance?.LinkInfo) { this._props.layoutDoc._layout_scrollTop = this._mainCont.current.scrollTop; } this._ignoreScroll = false; this._scrollTimer && clearTimeout(this._scrollTimer); // wait until a scrolling pause, then create an anchor to audio this._scrollTimer = setTimeout(() => { CreateLinkToActiveAudio(() => this._props.pdfBox.getAnchor(true)!, false); this._scrollTimer = undefined; }, 200); } }; // get the page index that the vertical offset passed in is on getPageFromScroll = (vOffset: number) => { let index = 0; let currOffset = vOffset; while (index < this._pageSizes.length && this._pageSizes[index] && currOffset - this._pageSizes[index].height > 0) { currOffset -= this._pageSizes[index++].height; } return index; }; // Normalize text by removing extra spaces, punctuation, and converting to lowercase private normalizeText(text: string): string { return text .toLowerCase() .replace(/\s+/g, ' ') .replace(/[^\w\s]/g, ' ') .trim(); } // Compute similarity between two strings (0-1 where 1 is exact match) private computeSimilarity(str1: string, str2: string): number { const s1 = this.normalizeText(str1); const s2 = this.normalizeText(str2); if (s1 === s2) return 1; if (s1.length === 0 || s2.length === 0) return 0; // For very long texts, check if one contains chunks of the other if (s1.length > 50 || s2.length > 50) { // For long texts, check if significant chunks overlap const longerText = s1.length > s2.length ? s1 : s2; const shorterText = s1.length > s2.length ? s2 : s1; // Break the shorter text into chunks const words = shorterText.split(' '); const chunkSize = Math.min(5, Math.floor(words.length / 2)); if (chunkSize > 0) { let maxChunkMatch = 0; // Check different chunks of the shorter text against the longer text for (let i = 0; i <= words.length - chunkSize; i++) { const chunk = words.slice(i, i + chunkSize).join(' '); if (longerText.includes(chunk)) { maxChunkMatch = Math.max(maxChunkMatch, chunk.length / shorterText.length); } } if (maxChunkMatch > 0.2) { return Math.min(0.9, maxChunkMatch + 0.3); // Boost the score, max 0.9 } } // Check for substantial overlap in content const words1 = new Set(s1.split(' ')); const words2 = new Set(s2.split(' ')); let commonWords = 0; for (const word of words1) { if (word.length > 2 && words2.has(word)) { // Only count meaningful words (length > 2) commonWords++; } } // Calculate ratio of common words const overlapRatio = commonWords / Math.min(words1.size, words2.size); // For long text, a lower match can still be significant if (overlapRatio > 0.4) { return Math.min(0.9, overlapRatio); } } // Simple contains check for shorter texts if (s1.includes(s2) || s2.includes(s1)) { return (0.8 * Math.min(s1.length, s2.length)) / Math.max(s1.length, s2.length); } // For shorter texts, use Levenshtein for more precision if (s1.length < 100 && s2.length < 100) { // Calculate Levenshtein distance const dp: number[][] = Array(s1.length + 1) .fill(0) .map(() => Array(s2.length + 1).fill(0)); for (let i = 0; i <= s1.length; i++) dp[i][0] = i; for (let j = 0; j <= s2.length; j++) dp[0][j] = j; for (let i = 1; i <= s1.length; i++) { for (let j = 1; j <= s2.length; j++) { const cost = s1[i - 1] === s2[j - 1] ? 0 : 1; dp[i][j] = Math.min( dp[i - 1][j] + 1, // deletion dp[i][j - 1] + 1, // insertion dp[i - 1][j - 1] + cost // substitution ); } } const distance = dp[s1.length][s2.length]; return 1 - distance / Math.max(s1.length, s2.length); } return 0; } // Perform fuzzy search on PDF text content private async performFuzzySearch(searchString: string, bwd?: boolean): Promise { if (!this._pdfViewer || !searchString.trim()) return false; const normalizedSearch = this.normalizeText(searchString); this._fuzzySearchResults = []; // Adjust threshold based on text length - more lenient for longer text let similarityThreshold = 0.6; if (searchString.length > 100) similarityThreshold = 0.35; else if (searchString.length > 50) similarityThreshold = 0.45; console.log(`Using similarity threshold: ${similarityThreshold} for query length: ${searchString.length}`); // For longer queries, also look for partial matches const searchWords = normalizedSearch.split(' ').filter(w => w.length > 3); const isLongQuery = searchWords.length > 5; // Track best match for debugging let bestMatchScore = 0; let bestMatchText = ''; // Fallback strategy: extract key phrases for very long search queries let keyPhrases: string[] = []; if (searchString.length > 200) { // Extract key phrases (chunks of 3-6 words) from the search string const words = normalizedSearch.split(' '); for (let i = 0; i < words.length - 2; i += 2) { const phraseLength = Math.min(5, words.length - i); if (phraseLength >= 3) { keyPhrases.push(words.slice(i, i + phraseLength).join(' ')); } } console.log(`Using ${keyPhrases.length} key phrases for long search text`); } // Process PDF in batches to avoid memory issues const totalPages = this._pageSizes.length; const BATCH_SIZE = 10; // Process 10 pages at a time console.log(`Searching all ${totalPages} pages in batches of ${BATCH_SIZE}`); // Process PDF in batches for (let batchStart = 0; batchStart < totalPages; batchStart += BATCH_SIZE) { const batchEnd = Math.min(batchStart + BATCH_SIZE, totalPages); console.log(`Processing pages ${batchStart + 1} to ${batchEnd} of ${totalPages}`); // Process each page in current batch for (let pageIndex = batchStart; pageIndex < batchEnd; pageIndex++) { try { const page = await this._props.pdf.getPage(pageIndex + 1); const textContent = await page.getTextContent(); // For long text, try to reconstruct paragraphs first let paragraphs: string[] = []; try { if (isLongQuery) { // Group text items into paragraphs based on positions let currentY: number | null = null; let currentParagraph = ''; // Sort by Y position first, then X const sortedItems = [...textContent.items].sort((a: any, b: any) => { const aTransform = (a as any).transform || []; const bTransform = (b as any).transform || []; if (Math.abs(aTransform[5] - bTransform[5]) < 5) { return (aTransform[4] || 0) - (bTransform[4] || 0); } return (aTransform[5] || 0) - (bTransform[5] || 0); }); // Limit paragraph size to avoid overflows const MAX_PARAGRAPH_LENGTH = 1000; for (const item of sortedItems) { const text = (item as any).str || ''; const transform = (item as any).transform || []; const y = transform[5]; // If this is a new line or first item if (currentY === null || Math.abs(y - currentY) > 5 || currentParagraph.length + text.length > MAX_PARAGRAPH_LENGTH) { if (currentParagraph) { paragraphs.push(currentParagraph.trim()); } currentParagraph = text; currentY = y; } else { // Continue the current paragraph currentParagraph += ' ' + text; } } // Add the last paragraph if (currentParagraph) { paragraphs.push(currentParagraph.trim()); } // Limit the number of paragraph combinations to avoid exponential growth const MAX_COMBINED_PARAGRAPHS = 5; // Also create overlapping larger paragraphs for better context, but limit size if (paragraphs.length > 1) { const combinedCount = Math.min(paragraphs.length - 1, MAX_COMBINED_PARAGRAPHS); for (let i = 0; i < combinedCount; i++) { if (paragraphs[i].length + paragraphs[i + 1].length < MAX_PARAGRAPH_LENGTH) { paragraphs.push(paragraphs[i] + ' ' + paragraphs[i + 1]); } } } } } catch (paragraphError) { console.warn('Error during paragraph reconstruction:', paragraphError); // Continue with individual items if paragraph reconstruction fails } // For extremely long search texts, use our key phrases approach if (keyPhrases.length > 0) { // Check each paragraph for key phrases for (const paragraph of paragraphs) { let matchingPhrases = 0; let bestPhraseScore = 0; for (const phrase of keyPhrases) { const similarity = this.computeSimilarity(paragraph, phrase); if (similarity > 0.7) matchingPhrases++; bestPhraseScore = Math.max(bestPhraseScore, similarity); } // If multiple key phrases match, this is likely a good result if (matchingPhrases > 1 || bestPhraseScore > 0.8) { this._fuzzySearchResults.push({ pageIndex, matchIndex: paragraphs.indexOf(paragraph), text: paragraph, score: 0.7 + matchingPhrases * 0.05, isParagraph: true, }); } } // Also check each item directly for (const item of textContent.items) { const text = (item as any).str || ''; if (!text.trim()) continue; for (const phrase of keyPhrases) { const similarity = this.computeSimilarity(text, phrase); if (similarity > 0.7) { this._fuzzySearchResults.push({ pageIndex, matchIndex: textContent.items.indexOf(item), text: text, score: similarity, isParagraph: false, }); break; // One matching phrase is enough for direct items } } } continue; // Skip normal processing for this page, we've used the key phrases approach } // Ensure paragraphs aren't too large before checking paragraphs = paragraphs.filter(p => p.length < 5000); // Check both individual items and reconstructed paragraphs try { const itemsToCheck = [ ...textContent.items.map((item: any) => ({ idx: textContent.items.indexOf(item), text: (item as any).str || '', isParagraph: false, })), ...paragraphs.map((p, i) => ({ idx: i, text: p, isParagraph: true, })), ]; for (const item of itemsToCheck) { if (!item.text.trim() || item.text.length > 5000) continue; const similarity = this.computeSimilarity(item.text, normalizedSearch); // Track best match for debugging if (similarity > bestMatchScore) { bestMatchScore = similarity; bestMatchText = item.text.substring(0, 100); } if (similarity > similarityThreshold) { this._fuzzySearchResults.push({ pageIndex, matchIndex: item.idx, text: item.text, score: similarity, isParagraph: item.isParagraph, }); } } } catch (itemCheckError) { console.warn('Error checking items on page:', itemCheckError); } } catch (error) { console.error(`Error extracting text from page ${pageIndex + 1}:`, error); // Continue with other pages even if one fails } } // Check if we already have good matches after each batch // This allows us to stop early if we've found excellent matches if (this._fuzzySearchResults.length > 0) { // Sort results by similarity (descending) this._fuzzySearchResults.sort((a, b) => (b.score || 0) - (a.score || 0)); // If we have an excellent match (score > 0.8), stop searching if (this._fuzzySearchResults[0]?.score && this._fuzzySearchResults[0].score > 0.8) { console.log(`Found excellent match (score: ${this._fuzzySearchResults[0].score?.toFixed(2)}) - stopping early`); break; } // If we have several good matches (score > 0.6), stop searching if (this._fuzzySearchResults.length >= 3 && this._fuzzySearchResults.every(r => r.score && r.score > 0.6)) { console.log(`Found ${this._fuzzySearchResults.length} good matches - stopping early`); break; } } // Perform cleanup between batches to avoid memory buildup if (batchEnd < totalPages) { // Give the browser a moment to breathe and release memory await new Promise(resolve => setTimeout(resolve, 1)); } } // If no results with advanced search, try standard search with key terms if (this._fuzzySearchResults.length === 0 && searchWords.length > 3) { // Find the most distinctive words (longer words are often more specific) const distinctiveWords = searchWords .filter(w => w.length > 4) .sort((a, b) => b.length - a.length) .slice(0, 3); if (distinctiveWords.length > 0) { console.log(`Falling back to standard search with distinctive term: ${distinctiveWords[0]}`); this._pdfViewer.eventBus.dispatch('find', { query: distinctiveWords[0], phraseSearch: false, highlightAll: true, findPrevious: false, }); return true; } } console.log(`Best match (${bestMatchScore.toFixed(2)}): "${bestMatchText}"`); console.log(`Found ${this._fuzzySearchResults.length} matches above threshold ${similarityThreshold}`); // Sort results by similarity (descending) this._fuzzySearchResults.sort((a, b) => (b.score || 0) - (a.score || 0)); // Navigate to the first/last result based on direction if (this._fuzzySearchResults.length > 0) { this._currentFuzzyMatchIndex = bwd ? this._fuzzySearchResults.length - 1 : 0; this.navigateToFuzzyMatch(this._currentFuzzyMatchIndex); return true; } else if (bestMatchScore > 0) { // If we found some match but below threshold, adjust threshold and try again if (bestMatchScore > similarityThreshold * 0.7) { console.log(`Lowering threshold to ${bestMatchScore * 0.9} and retrying search`); similarityThreshold = bestMatchScore * 0.9; return this.performFuzzySearch(searchString, bwd); } } // Ultimate fallback: Use standard PDF.js search with the most common words if (this._fuzzySearchResults.length === 0) { // Extract a few words from the middle of the search string const words = normalizedSearch.split(' '); const middleIndex = Math.floor(words.length / 2); const searchPhrase = words.slice(Math.max(0, middleIndex - 1), Math.min(words.length, middleIndex + 2)).join(' '); console.log(`Falling back to standard search with phrase: ${searchPhrase}`); this._pdfViewer.eventBus.dispatch('find', { query: searchPhrase, phraseSearch: true, highlightAll: true, findPrevious: false, }); return true; } return false; } // Navigate to a specific fuzzy match private navigateToFuzzyMatch(index: number): void { if (index >= 0 && index < this._fuzzySearchResults.length) { const match = this._fuzzySearchResults[index]; console.log(`Navigating to match: ${match.text.substring(0, 50)}... (score: ${match.score?.toFixed(2) || 'unknown'})`); // Scroll to the page containing the match this._pdfViewer.scrollPageIntoView({ pageNumber: match.pageIndex + 1, }); // For paragraph matches, use a more specific approach if (match.isParagraph) { // Break the text into smaller chunks to improve highlighting const words = match.text.split(/\s+/); const normalizedSearch = this.normalizeText(match.text); // Try to highlight with shorter chunks to get better visual feedback if (words.length > 5) { // Create 5-word overlapping chunks const chunks = []; for (let i = 0; i < words.length - 4; i += 3) { chunks.push(words.slice(i, i + 5).join(' ')); } // Highlight each chunk if (chunks.length > 0) { // Highlight the first chunk immediately this._pdfViewer.eventBus.dispatch('find', { query: chunks[0], phraseSearch: true, highlightAll: true, findPrevious: false, }); // Highlight the rest with small delays to avoid conflicts chunks.slice(1).forEach((chunk, i) => { setTimeout( () => { this._pdfViewer.eventBus.dispatch('find', { query: chunk, phraseSearch: true, highlightAll: true, findPrevious: false, }); }, (i + 1) * 100 ); }); return; } } } // Standard highlighting for non-paragraph matches or short text if (this._pdfViewer.findController) { // For longer text, try to find the most unique phrases to highlight if (match.text.length > 50) { const words = match.text.split(/\s+/); // Look for 3-5 word phrases that are likely to be unique let phraseToHighlight = match.text; if (words.length >= 5) { // Take a phrase from the middle of the text const middleIndex = Math.floor(words.length / 2); phraseToHighlight = words.slice(middleIndex - 2, middleIndex + 3).join(' '); } console.log(`Highlighting phrase: "${phraseToHighlight}"`); this._pdfViewer.eventBus.dispatch('find', { query: phraseToHighlight, phraseSearch: true, highlightAll: true, findPrevious: false, }); } else { // For shorter text, use the entire match this._pdfViewer.eventBus.dispatch('find', { query: match.text, phraseSearch: true, highlightAll: true, findPrevious: false, }); } } } } // Navigate to next fuzzy match private nextFuzzyMatch(): boolean { if (this._fuzzySearchResults.length === 0) return false; this._currentFuzzyMatchIndex = (this._currentFuzzyMatchIndex + 1) % this._fuzzySearchResults.length; this.navigateToFuzzyMatch(this._currentFuzzyMatchIndex); return true; } // Navigate to previous fuzzy match private prevFuzzyMatch(): boolean { if (this._fuzzySearchResults.length === 0) return false; this._currentFuzzyMatchIndex = (this._currentFuzzyMatchIndex - 1 + this._fuzzySearchResults.length) % this._fuzzySearchResults.length; this.navigateToFuzzyMatch(this._currentFuzzyMatchIndex); return true; } @action search = (searchString: string, bwd?: boolean, clear: boolean = false) => { if (clear) { this._fuzzySearchResults = []; this._pdfViewer?.eventBus.dispatch('findbarclose', {}); return true; } if (!searchString) { bwd ? this.prevAnnotation() : this.nextAnnotation(); return true; } // If we already have fuzzy search results, navigate through them if (this._fuzzySearchEnabled && this._fuzzySearchResults.length > 0) { return bwd ? this.prevFuzzyMatch() : this.nextFuzzyMatch(); } // For new search, decide between fuzzy and standard search if (this._fuzzySearchEnabled) { // Start fuzzy search this.performFuzzySearch(searchString, bwd); return true; } else { // Use original PDF.js search const findOpts = { caseSensitive: false, findPrevious: bwd, highlightAll: true, phraseSearch: true, query: searchString, }; if (this._pdfViewer?.pageViewsReady) { this._pdfViewer?.eventBus.dispatch('find', { ...findOpts, type: 'again' }); } else if (this._mainCont.current) { const executeFind = () => this._pdfViewer?.eventBus.dispatch('find', findOpts); this._mainCont.current.addEventListener('pagesloaded', executeFind); this._mainCont.current.addEventListener('pagerendered', executeFind); } return true; } }; // Toggle fuzzy search mode @action toggleFuzzySearch = (): boolean => { this._fuzzySearchEnabled = !this._fuzzySearchEnabled; return this._fuzzySearchEnabled; }; @action onPointerDown = (e: React.PointerEvent): void => { // if alt+left click, drag and annotate this._downX = e.clientX; this._downY = e.clientY; if ((this._props.Doc._freeform_scale || 1) !== 1) return; if ((e.button !== 0 || e.altKey) && this._props.isContentActive()) { this._setPreviewCursor?.(e.clientX, e.clientY, true, false, this._props.Doc); } if (!e.altKey && e.button === 0 && this._props.isContentActive() && Doc.ActiveTool !== InkTool.Ink) { this._props.select(false); MarqueeAnnotator.clearAnnotations(this._savedAnnotations); this.isAnnotating = true; const target = e.target as HTMLElement; if (e.target && (target.className.includes('endOfContent') || (target.parentElement?.className !== 'textLayer' && target.parentElement?.parentElement?.className !== 'textLayer'))) { this._textSelecting = false; } else { // if textLayer is hit, then we select text instead of using a marquee so clear out the marquee. setTimeout(() => this._marqueeref.current?.onTerminateSelection(), 100); // bcz: hack .. anchor menu is setup within MarqueeAnnotator so we need to at least create the marqueeAnnotator even though we aren't using it. this._styleRule = addStyleSheetRule(PDFViewer._annotationStyle, 'htmlAnnotation', { 'pointer-events': 'none' }); document.addEventListener('pointerup', this.onSelectEnd); } this._marqueeref.current?.onInitiateSelection([e.clientX, e.clientY]); } }; @action finishMarquee = (/* x?: number, y?: number */) => { AnchorMenu.Instance.makeLabels = unimplementedFunction; this._getAnchor = AnchorMenu.Instance?.GetAnchor; this.isAnnotating = false; this._marqueeref.current?.onTerminateSelection(); this._textSelecting = true; }; @action onSelectEnd = (e: PointerEvent): void => { this._getAnchor = AnchorMenu.Instance?.GetAnchor; this.isAnnotating = false; clearStyleSheetRules(PDFViewer._annotationStyle); this._props.select(false); document.removeEventListener('pointerup', this.onSelectEnd); const sel = window.getSelection(); if (sel) { AnchorMenu.Instance.setSelectedText(sel.toString()); AnchorMenu.Instance.setLocation(NumCast(this._props.layoutDoc.x), NumCast(this._props.layoutDoc.y)); } if (sel?.type === 'Range') { this.createTextAnnotation(sel, sel.getRangeAt(0)); AnchorMenu.Instance.jumpTo(e.clientX, e.clientY); } GPTPopup.Instance.setSidebarFieldKey('data_sidebar'); GPTPopup.Instance.addDoc = this._props.sidebarAddDoc; // allows for creating collection AnchorMenu.Instance.addToCollection = this._props.DocumentView?.()._props.addDocument; AnchorMenu.Instance.makeLabels = unimplementedFunction; AnchorMenu.Instance.AddDrawingAnnotation = this.addDrawingAnnotation; }; addDrawingAnnotation = (drawing: Doc) => { // drawing.x = this._props.pdfBox.ScreenToLocalBoxXf().TranslateX // const scaleX = this._mainCont.current.offsetWidth / boundingRect.width; drawing.y = NumCast(drawing.y) + NumCast(this._props.Doc.layout_scrollTop); this._props.addDocument?.(drawing); }; @action createTextAnnotation = (sel: Selection, selRange: Range) => { if (this._mainCont.current) { this._mainCont.current.style.transform = `rotate(${NumCast(this._props.pdfBox.ScreenToLocalBoxXf().RotateDeg)}deg)`; const boundingRect = this._mainCont.current.getBoundingClientRect(); const clientRects = selRange.getClientRects(); for (let i = 0; i < clientRects.length; i++) { const rect = clientRects.item(i); if (rect && rect?.width && rect.width < this._mainCont.current.clientWidth / this._props.ScreenToLocalTransform().Scale) { const scaleX = this._mainCont.current.offsetWidth / boundingRect.width; const scaleY = this._mainCont.current.offsetHeight / boundingRect.height; const pdfScale = NumCast(this._props.layoutDoc._freeform_scale, 1); const annoBox = document.createElement('div'); annoBox.className = 'marqueeAnnotator-annotationBox'; // transforms the positions from screen onto the pdf div annoBox.style.left = (((rect.left - boundingRect.left) * scaleX) / pdfScale).toString() + 'px'; annoBox.style.top = (((rect.top - boundingRect.top) * scaleY) / pdfScale + this._mainCont.current.scrollTop).toString() + 'px'; annoBox.style.width = ((rect.width * scaleX) / pdfScale).toString() + 'px'; annoBox.style.height = ((rect.height * scaleY) / pdfScale).toString() + 'px'; this._annotationLayer.current && MarqueeAnnotator.previewNewAnnotation(this._savedAnnotations, this._annotationLayer.current, annoBox, this.getPageFromScroll(rect.top)); } } this._mainCont.current!.style.transform = ''; } this._selectionContent = selRange.cloneContents(); this._selectionText = this._selectionContent?.textContent || ''; // clear selection if (sel.empty) { // Chrome sel.empty(); } else if (sel.removeAllRanges) { // Firefox sel.removeAllRanges(); } }; onClick = (e: React.MouseEvent) => { this._scrollStopper?.(); if (this._setPreviewCursor && e.button === 0 && Math.abs(e.clientX - this._downX) < ClientUtils.DRAG_THRESHOLD && Math.abs(e.clientY - this._downY) < ClientUtils.DRAG_THRESHOLD) { this._setPreviewCursor(e.clientX, e.clientY, false, false, this._props.Doc); } // e.stopPropagation(); // bcz: not sure why this was here. We need to allow the DocumentView to get clicks to process doubleClicks }; setPreviewCursor = (func?: (x: number, y: number, drag: boolean, hide: boolean, doc: Opt) => void) => { this._setPreviewCursor = func; }; @action onZoomWheel = (e: React.WheelEvent) => { if (this._props.isContentActive()) { e.stopPropagation(); if (e.ctrlKey) { const curScale = Number(this._pdfViewer.currentScaleValue); this._pdfViewer.currentScaleValue = Math.max(1, Math.min(10, curScale - (curScale * e.deltaY) / 1000)) + ''; this._props.layoutDoc._freeform_scale = Number(this._pdfViewer.currentScaleValue); } } }; pointerEvents = () => this._props.isContentActive() && !MarqueeOptionsMenu.Instance.isShown() ? 'all' // : 'none'; @computed get annotationLayer() { const inlineAnnos = this.inlineTextAnnotations.sort((a, b) => NumCast(a.y) - NumCast(b.y)).filter(anno => !anno.hidden); return (
{inlineAnnos.map(anno => ( ))}
); } getScrollHeight = () => this._scrollHeight; scrollXf = () => this._props.ScreenToLocalTransform().translate(0, this._mainCont.current ? NumCast(this._props.layoutDoc._layout_scrollTop) / 1.333 : 0); overlayTransform = () => this.scrollXf().scale(1 / NumCast(this._props.layoutDoc._freeform_scale, 1)); panelWidth = () => this._props.PanelWidth() / (this._props.NativeDimScaling?.() || 1); panelHeight = () => this._props.PanelHeight() / (this._props.NativeDimScaling?.() || 1); transparentFilter = () => [...this._props.childFilters(), ClientUtils.TransparentBackgroundFilter]; opaqueFilter = () => [...this._props.childFilters(), ClientUtils.noDragDocsFilter, ...(SnappingManager.CanEmbed && this._props.isContentActive() ? [] : [ClientUtils.OpaqueBackgroundFilter])]; childStyleProvider = (doc: Doc | undefined, props: Opt, property: string) => { if (doc instanceof Doc && property === StyleProp.PointerEvents) { if (this.inlineTextAnnotations.includes(doc) || this._props.isContentActive() === false) return 'none'; const isInk = doc.layout_isSvg && !props?.LayoutTemplateString; if (isInk) return 'visiblePainted'; } return this._props.styleProvider?.(doc, props, property); }; childPointerEvents = () => (this._props.isContentActive() !== false ? 'all' : 'none'); renderAnnotations = (childFilters: () => string[], mixBlendMode?: 'hard-light' | 'multiply', display?: string) => (
); @computed get overlayTransparentAnnotations() { const transparentChildren = DocUtils.FilterDocs(DocListCast(this._props.dataDoc[this._props.fieldKey + '_annotations']), this.transparentFilter(), []); return !transparentChildren.length ? null : this.renderAnnotations(this.transparentFilter, 'multiply', SnappingManager.CanEmbed && this._props.isContentActive() ? 'none' : undefined); } @computed get overlayOpaqueAnnotations() { return this.renderAnnotations(this.opaqueFilter, this.allAnnotations.some(anno => anno.mixBlendMode) ? 'hard-light' : undefined); } @computed get overlayLayer() { return (
{this.overlayTransparentAnnotations} {this.overlayOpaqueAnnotations}
); } @computed get pdfViewerDiv() { return
; } savedAnnotations = () => this._savedAnnotations; addDocumentWrapper = (doc: Doc | Doc[]) => this._props.addDocument!(doc); screenToMarqueeXf = () => this.props.pdfBox.DocumentView?.()?.screenToContentsTransform().scale(Pdfjs.PixelsPerInch.PDF_TO_CSS_UNITS) ?? Transform.Identity(); render() { TraceMobx(); return (
600 ? Doc.NativeHeight(this._props.Doc) : `100%`, }}> {this.pdfViewerDiv} {this.annotationLayer} {this.overlayLayer} {this._showWaiting ? : null} {!this._mainCont.current || !this._annotationLayer.current || !this.props.pdfBox.DocumentView ? null : ( Pdfjs.PixelsPerInch.PDF_TO_CSS_UNITS} annotationLayerScrollTop={NumCast(this._props.Doc._layout_scrollTop)} addDocument={this.addDocumentWrapper} docView={this.props.pdfBox.DocumentView} screenTransform={this.screenToMarqueeXf} finishMarquee={this.finishMarquee} savedAnnotations={this.savedAnnotations} selectionText={this.selectionText} annotationLayer={this._annotationLayer.current} marqueeContainer={this._mainCont.current} anchorMenuCrop={this.crop} /> )}
{this._loading ? (
) : null}
); } }