aboutsummaryrefslogtreecommitdiff
path: root/src/client/views/nodes/VideoBox.tsx
diff options
context:
space:
mode:
Diffstat (limited to 'src/client/views/nodes/VideoBox.tsx')
-rw-r--r--src/client/views/nodes/VideoBox.tsx47
1 files changed, 47 insertions, 0 deletions
diff --git a/src/client/views/nodes/VideoBox.tsx b/src/client/views/nodes/VideoBox.tsx
index b3cb0e1db..f994bdbb5 100644
--- a/src/client/views/nodes/VideoBox.tsx
+++ b/src/client/views/nodes/VideoBox.tsx
@@ -30,6 +30,7 @@ import { StyleProp } from '../StyleProp';
import { DocumentView } from './DocumentView';
import { FieldView, FieldViewProps } from './FieldView';
import { FocusViewOptions } from './FocusViewOptions';
+import { gptImageLabel } from '../../apis/gpt/GPT';
import './VideoBox.scss';
/**
@@ -109,6 +110,52 @@ export class VideoBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
return this._videoRef;
}
+ autoTag = async () => {
+ if (this.Document.$tags_chat) return;
+ try {
+ if (!this.player) throw new Error('Video element not available.');
+
+ // 1) Extract a frame at the video's midpoint
+ const videoDuration = this.player.duration;
+ const snapshotTime = videoDuration / 2;
+
+ // Seek the video element to the midpoint
+ await new Promise<void>(resolve => {
+ const onSeeked = () => {
+ this.player!.removeEventListener('seeked', onSeeked);
+ resolve();
+ };
+ this.player!.addEventListener('seeked', onSeeked);
+ this.player!.currentTime = snapshotTime;
+ });
+
+ // 2) Draw the frame onto a canvas and get a base64 representation
+ const canvas = document.createElement('canvas');
+ canvas.width = this.player.videoWidth;
+ canvas.height = this.player.videoHeight;
+ const ctx = canvas.getContext('2d');
+ if (!ctx) throw new Error('Failed to create canvas context.');
+ ctx.drawImage(this.player, 0, 0, canvas.width, canvas.height);
+ const base64Image = canvas.toDataURL('image/png');
+
+ // 3) Send the image data to GPT for classification and descriptive tags
+ const label = await gptImageLabel(
+ base64Image,
+ `Classify this video frame as either a PERSON or LANDSCAPE.
+ Then provide five additional descriptive tags (single words) separated by spaces.
+ Finally, add one detailed summary phrase using underscores.`
+ ).then(raw => raw.trim().toUpperCase());
+
+ // 4) Normalize and store labels in the Document's tags
+ const aspect = this.player!.videoWidth / (this.player!.videoHeight || 1);
+ this.Document.$tags_chat = new List<string>([...label.split(/\s+/), `ASPECT_${aspect}`]);
+ // 5) Turn on tag display in layout
+ this.Document._layout_showTags = true;
+ } catch (err) {
+ console.error('Video autoTag failed:', err);
+ }
+ };
+
componentDidMount() {
this.unmounting = false;
this._props.setContentViewBox?.(this); // this tells the DocumentView that this VideoBox is the "content" of the document. this allows the DocumentView to indirectly call getAnchor() on the VideoBox when making a link.