From 131f3fa7d1f1a42be58de63ee54fecd20e6d9fbf Mon Sep 17 00:00:00 2001 From: ab Date: Thu, 1 Aug 2019 16:29:56 -0400 Subject: key words successfully extracted --- src/client/cognitive_services/CognitiveServices.ts | 50 +++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) (limited to 'src/client/cognitive_services') diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index c118d91d3..8aac20084 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -19,7 +19,8 @@ export type Rectangle = { top: number, left: number, width: number, height: numb export enum Service { ComputerVision = "vision", Face = "face", - Handwriting = "handwriting" + Handwriting = "handwriting", + Text = "text" } export enum Confidence { @@ -211,4 +212,51 @@ export namespace CognitiveServices { } + export namespace Text { + export const Manager: APIManager = { + converter: (data: string) => { + return JSON.stringify({ + documents: [{ + id: 1, + language: "en", + text: data + }] + }); + }, + requester: async (apiKey: string, body: string, service: Service) => { + let serverAddress = "https://eastus.api.cognitive.microsoft.com"; + let endpoint = serverAddress + "/text/analytics/v2.1/keyPhrases"; + let sampleBody = { + "documents": [ + { + "language": "en", + "id": 1, + "text": "Hello world. This is some input text that I love." + } + ] + }; + let actualBody = body; + const options = { + uri: endpoint, + body: actualBody, + headers: { + 'Content-Type': 'application/json', + 'Ocp-Apim-Subscription-Key': apiKey + } + + }; + console.log("requested!"); + return request.post(options); + }, + analyzer: async (target: Doc, keys: string[], data: string, converter: Converter) => { + let results = await ExecuteQuery(Service.Text, Manager, data); + console.log(results); + converter(results); + //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); + console.log("analyzed!"); + return null; + } + }; + } + } \ No newline at end of file -- cgit v1.2.3-70-g09d2 From 38317d4a3687e02a727693b569dfc5980db81854 Mon Sep 17 00:00:00 2001 From: ab Date: Mon, 5 Aug 2019 16:15:59 -0400 Subject: recommender --- package.json | 1 + src/client/cognitive_services/CognitiveServices.ts | 3 ++- src/client/util/Recommender.tsx | 22 ++++++++++++++++++++++ src/client/views/nodes/ImageBox.tsx | 8 +++++--- 4 files changed, 30 insertions(+), 4 deletions(-) create mode 100644 src/client/util/Recommender.tsx (limited to 'src/client/cognitive_services') diff --git a/package.json b/package.json index d469f0c49..9012ff1f7 100644 --- a/package.json +++ b/package.json @@ -214,6 +214,7 @@ "typescript-collections": "^1.3.2", "url-loader": "^1.1.2", "uuid": "^3.3.2", + "word2vec": "^1.1.4", "xoauth2": "^1.2.0", "youtube": "^0.1.0" } diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 8aac20084..6afd2571a 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -251,8 +251,9 @@ export namespace CognitiveServices { analyzer: async (target: Doc, keys: string[], data: string, converter: Converter) => { let results = await ExecuteQuery(Service.Text, Manager, data); console.log(results); - converter(results); + let keyterms = converter(results); //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); + target[keys[0]] = keyterms; console.log("analyzed!"); return null; } diff --git a/src/client/util/Recommender.tsx b/src/client/util/Recommender.tsx new file mode 100644 index 000000000..88d0c85f5 --- /dev/null +++ b/src/client/util/Recommender.tsx @@ -0,0 +1,22 @@ +import React = require("react"); +import { observer } from "mobx-react"; +import { observable, runInAction } from "mobx"; +var w2v = require('word2vec'); + +@observer +export default class Recommender extends React.Component { + + /*** + * Converts text to n-dimensional vector using pretrained word2vec model + */ + text_to_vec(text: string) { + + } + + render() { + return ( +
Recommender System!!!
+ ) + } + +} \ No newline at end of file diff --git a/src/client/views/nodes/ImageBox.tsx b/src/client/views/nodes/ImageBox.tsx index c12fae8e9..7388b532c 100644 --- a/src/client/views/nodes/ImageBox.tsx +++ b/src/client/views/nodes/ImageBox.tsx @@ -245,12 +245,14 @@ export class ImageBox extends DocComponent(ImageD let data = StrCast(this.dataDoc.title); console.log(data); let converter = (results: any) => { + let keyterms = new List(); results.documents.forEach((doc: any) => { - console.log(doc.keyPhrases); + let keyPhrases = doc.keyPhrases; + keyPhrases.map((kp: string) => keyterms.push(kp)); }); - return new Doc(); + return keyterms; }; - CognitiveServices.Text.Manager.analyzer(this.extensionDoc, ["key words", "key word strings"], data, converter); + CognitiveServices.Text.Manager.analyzer(this.extensionDoc, ["key words"], data, converter); } generateMetadata = (threshold: Confidence = Confidence.Excellent) => { -- cgit v1.2.3-70-g09d2 From 06b59a4ec2f1871846696da22928fc7d54ae02d6 Mon Sep 17 00:00:00 2001 From: ab Date: Tue, 6 Aug 2019 16:01:57 -0400 Subject: word2vec is functional --- package.json | 8 +++++- src/client/cognitive_services/CognitiveServices.ts | 18 ++++++++++++- src/client/views/nodes/ImageBox.tsx | 8 ++++-- src/server/Recommender.ts | 30 ++++++++++++++++++---- src/server/index.ts | 19 ++++++++++---- webpack.config.js | 5 +++- 6 files changed, 73 insertions(+), 15 deletions(-) (limited to 'src/client/cognitive_services') diff --git a/package.json b/package.json index 9012ff1f7..44d5287bd 100644 --- a/package.json +++ b/package.json @@ -3,6 +3,12 @@ "version": "1.0.0", "description": "", "main": "index.js", + "browser": { + "child_process": false + }, + "node": { + "child_process": "empty" + }, "scripts": { "start": "cross-env NODE_OPTIONS=--max_old_space_size=4096 ts-node-dev -- src/server/index.ts", "debug": "cross-env NODE_OPTIONS=--max_old_space_size=8192 ts-node-dev --inspect -- src/server/index.ts", @@ -218,4 +224,4 @@ "xoauth2": "^1.2.0", "youtube": "^0.1.0" } -} +} \ No newline at end of file diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 6afd2571a..863236b60 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -6,6 +6,8 @@ import { RouteStore } from "../../server/RouteStore"; import { Utils } from "../../Utils"; import { InkData } from "../../new_fields/InkField"; import { UndoManager } from "../util/UndoManager"; +import requestPromise = require("request-promise"); +import { List } from "../../new_fields/List"; type APIManager = { converter: BodyConverter, requester: RequestExecutor, analyzer: AnalysisApplier }; type RequestExecutor = (apiKey: string, body: string, service: Service) => Promise; @@ -255,9 +257,23 @@ export namespace CognitiveServices { //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); target[keys[0]] = keyterms; console.log("analyzed!"); + vectorize(keyterms); return null; - } + }, + + }; + function vectorize(keyterms: any) { + console.log("vectorizing..."); + keyterms = ["father", "king"]; + let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; + requestPromise.post(args).then((value) => { + value.forEach((wordvec: any) => { + console.log(wordvec.word); + }); + }); + } + } } \ No newline at end of file diff --git a/src/client/views/nodes/ImageBox.tsx b/src/client/views/nodes/ImageBox.tsx index 7388b532c..161226c0d 100644 --- a/src/client/views/nodes/ImageBox.tsx +++ b/src/client/views/nodes/ImageBox.tsx @@ -1,5 +1,5 @@ import { library } from '@fortawesome/fontawesome-svg-core'; -import { faImage, faFileAudio, faPaintBrush, faAsterisk } from '@fortawesome/free-solid-svg-icons'; +import { faImage, faFileAudio, faPaintBrush, faAsterisk, faBrain } from '@fortawesome/free-solid-svg-icons'; import { action, observable, computed, runInAction } from 'mobx'; import { observer } from "mobx-react"; import Lightbox from 'react-image-lightbox'; @@ -31,12 +31,14 @@ import { faEye } from '@fortawesome/free-regular-svg-icons'; import { ComputedField } from '../../../new_fields/ScriptField'; import { CompileScript } from '../../util/Scripting'; import { thisExpression } from 'babel-types'; +import { Recommender } from '../../../server/Recommender'; +import requestPromise = require('request-promise'); var requestImageSize = require('../../util/request-image-size'); var path = require('path'); const { Howl } = require('howler'); -library.add(faImage, faEye, faPaintBrush); +library.add(faImage, faEye, faPaintBrush, faBrain); library.add(faFileAudio, faAsterisk); @@ -253,6 +255,8 @@ export class ImageBox extends DocComponent(ImageD return keyterms; }; CognitiveServices.Text.Manager.analyzer(this.extensionDoc, ["key words"], data, converter); + // request recommender + //fetch(Utils.prepend("/recommender"), { body: body, method: "POST", headers: { "content-type": "application/json" } }).then((value) => console.log(value)); } generateMetadata = (threshold: Confidence = Confidence.Excellent) => { diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index 6d9ca6486..3c71f3aa1 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -3,25 +3,27 @@ var w2v = require('word2vec'); export class Recommender { private _model: any; + static Instance: Recommender; constructor() { console.log("creating recommender..."); + Recommender.Instance = this; } - public loadModel(): Promise { + private loadModel(): Promise { let self = this; return new Promise(res => { w2v.loadModel("./node_modules/word2vec/vectors.txt", function (err: any, model: any) { - console.log(err); - console.log(model); self._model = model; - console.log(model.similarity('father', 'mother')); res(model); }); }); } - public testModel() { + public async testModel() { + if (!this._model) { + await this.loadModel(); + } if (this._model) { let similarity = this._model.similarity('father', 'mother'); console.log(similarity); @@ -30,4 +32,22 @@ export class Recommender { console.log("model not found :("); } } + + public async testInstance(text: string) { + if (!this._model) { + await this.loadModel(); + } + console.log(text); + } + + public async vectorize(text: string[]) { + if (!this._model) { + await this.loadModel(); + } + if (this._model) { + let word_vecs = this._model.getVectors(text); + console.log(word_vecs[0]); + return word_vecs; + } + } } diff --git a/src/server/index.ts b/src/server/index.ts index dda5a870a..67087fc1f 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -52,11 +52,6 @@ const probe = require("probe-image-size"); var SolrNode = require('solr-node'); var shell = require('shelljs'); -let recommender = new Recommender(); -recommender.loadModel().then(() => { - recommender.testModel(); -}); - const download = (url: string, dest: fs.PathLike) => request.get(url).pipe(fs.createWriteStream(dest)); let youtubeApiKey: string; YoutubeApi.readApiKey((apiKey: string) => youtubeApiKey = apiKey); @@ -651,6 +646,20 @@ app.use(RouteStore.corsProxy, (req, res) => { }).pipe(res); }); +//// + +let recommender = new Recommender(); +recommender.testModel(); + +app.post("/recommender", async (req, res) => { + let keyphrases = req.body.keyphrases; + let wordvecs = await recommender.vectorize(keyphrases); + res.send(wordvecs); +}); + + +///// + app.get(RouteStore.delete, (req, res) => { if (release) { res.send("no"); diff --git a/webpack.config.js b/webpack.config.js index 5e0a6a883..6a14dfcda 100644 --- a/webpack.config.js +++ b/webpack.config.js @@ -93,5 +93,8 @@ module.exports = { warnings: true, errors: true } - } + }, + externals: [ + 'child_process' + ] }; \ No newline at end of file -- cgit v1.2.3-70-g09d2 From 17b27d3575d3f91f461262e5ad72a457238d198a Mon Sep 17 00:00:00 2001 From: ab Date: Wed, 7 Aug 2019 16:28:51 -0400 Subject: correlation matrix completed --- package.json | 2 +- src/client/ClientRecommender.ts | 101 +++++++++++++++++++++ src/client/cognitive_services/CognitiveServices.ts | 22 +++-- src/client/views/MainView.tsx | 7 ++ .../collectionFreeForm/CollectionFreeFormView.tsx | 19 +++- src/client/views/nodes/ImageBox.tsx | 14 +-- src/server/Recommender.ts | 27 +++++- 7 files changed, 166 insertions(+), 26 deletions(-) create mode 100644 src/client/ClientRecommender.ts (limited to 'src/client/cognitive_services') diff --git a/package.json b/package.json index 44d5287bd..ebb0c35a8 100644 --- a/package.json +++ b/package.json @@ -224,4 +224,4 @@ "xoauth2": "^1.2.0", "youtube": "^0.1.0" } -} \ No newline at end of file +} diff --git a/src/client/ClientRecommender.ts b/src/client/ClientRecommender.ts new file mode 100644 index 000000000..7ff79ab50 --- /dev/null +++ b/src/client/ClientRecommender.ts @@ -0,0 +1,101 @@ +import { Doc } from "../new_fields/Doc"; +import { StrCast } from "../new_fields/Types"; +import { List } from "../new_fields/List"; +import { CognitiveServices } from "./cognitive_services/CognitiveServices"; + + +var assert = require('assert'); + +export class ClientRecommender { + + static Instance: ClientRecommender; + private docVectors: Set; + + constructor() { + //console.log("creating client recommender..."); + ClientRecommender.Instance = this; + this.docVectors = new Set(); + } + + + /*** + * Computes the cosine similarity between two vectors in Euclidean space. + */ + + private distance(vector1: number[], vector2: number[]) { + assert(vector1.length === vector2.length, "Vectors are not the same length"); + var dotproduct = 0; + var mA = 0; + var mB = 0; + for (let i = 0; i < vector1.length; i++) { // here you missed the i++ + dotproduct += (vector1[i] * vector2[i]); + mA += (vector1[i] * vector1[i]); + mB += (vector2[i] * vector2[i]); + } + mA = Math.sqrt(mA); + mB = Math.sqrt(mB); + var similarity = (dotproduct) / ((mA) * (mB)); // here you needed extra brackets + return similarity; + } + + /*** + * Computes the mean of a set of vectors + */ + + public mean(paragraph: Set) { + const n = 200; + const num_words = paragraph.size; + let meanVector = new Array(n).fill(0); // mean vector + paragraph.forEach((wordvec: number[]) => { + for (let i = 0; i < n; i++) { + meanVector[i] += wordvec[i]; + } + }); + meanVector = meanVector.map(x => x / num_words); + this.addToDocSet(meanVector); + return meanVector; + } + + private addToDocSet(vector: number[]) { + if (this.docVectors) { + this.docVectors.add(vector); + } + } + + /*** + * Uses Cognitive Services to extract keywords from a document + */ + + public async extractText(dataDoc: Doc, extDoc: Doc) { + let data = StrCast(dataDoc.title); + //console.log(data); + let converter = (results: any) => { + let keyterms = new List(); + results.documents.forEach((doc: any) => { + let keyPhrases = doc.keyPhrases; + keyPhrases.map((kp: string) => keyterms.push(kp)); + }); + return keyterms; + }; + await CognitiveServices.Text.Manager.analyzer(extDoc, ["key words"], data, converter); + } + + /*** + * Creates distance matrix for all Documents analyzed + */ + + public createDistanceMatrix(documents: Set = this.docVectors) { + const documents_list = Array.from(documents); + const n = documents_list.length; + var matrix = new Array(n).fill(0).map(() => new Array(n).fill(0)); + for (let i = 0; i < n; i++) { + var doc1 = documents_list[i]; + for (let j = 0; j < n; j++) { + var doc2 = documents_list[j]; + matrix[i][j] = this.distance(doc1, doc2); + } + } + return matrix; + } + +} \ No newline at end of file diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 863236b60..cc366abc2 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -8,6 +8,7 @@ import { InkData } from "../../new_fields/InkField"; import { UndoManager } from "../util/UndoManager"; import requestPromise = require("request-promise"); import { List } from "../../new_fields/List"; +import { ClientRecommender } from "../ClientRecommender"; type APIManager = { converter: BodyConverter, requester: RequestExecutor, analyzer: AnalysisApplier }; type RequestExecutor = (apiKey: string, body: string, service: Service) => Promise; @@ -257,20 +258,21 @@ export namespace CognitiveServices { //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); target[keys[0]] = keyterms; console.log("analyzed!"); - vectorize(keyterms); - return null; - }, - - + await vectorize(keyterms); + } }; - function vectorize(keyterms: any) { + async function vectorize(keyterms: any) { console.log("vectorizing..."); - keyterms = ["father", "king"]; + //keyterms = ["father", "king"]; let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; - requestPromise.post(args).then((value) => { - value.forEach((wordvec: any) => { - console.log(wordvec.word); + await requestPromise.post(args).then(async (wordvecs) => { + var vectorValues = new Set(); + wordvecs.forEach((wordvec: any) => { + //console.log(wordvec.word); + vectorValues.add(wordvec.values as number[]); }); + ClientRecommender.Instance.mean(vectorValues); + //console.log(vectorValues.size); }); } diff --git a/src/client/views/MainView.tsx b/src/client/views/MainView.tsx index 2ecf5fd85..97964166a 100644 --- a/src/client/views/MainView.tsx +++ b/src/client/views/MainView.tsx @@ -39,6 +39,7 @@ import { FilterBox } from './search/FilterBox'; import { CollectionTreeView } from './collections/CollectionTreeView'; import { ClientUtils } from '../util/ClientUtils'; import { SchemaHeaderField, RandomPastel } from '../../new_fields/SchemaHeaderField'; +//import { DocumentManager } from '../util/DocumentManager'; @observer export class MainView extends React.Component { @@ -435,6 +436,12 @@ export class MainView extends React.Component { ; } + // clusterDocuments = () => { + // DocumentManager.Instance.DocumentViews(); + // } + + + @action diff --git a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx index 29f9b1429..9344b43d2 100644 --- a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx +++ b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx @@ -1,6 +1,6 @@ import { library } from "@fortawesome/fontawesome-svg-core"; import { faEye } from "@fortawesome/free-regular-svg-icons"; -import { faCompass, faCompressArrowsAlt, faExpandArrowsAlt, faPaintBrush, faTable, faUpload } from "@fortawesome/free-solid-svg-icons"; +import { faCompass, faCompressArrowsAlt, faExpandArrowsAlt, faPaintBrush, faTable, faUpload, faBrain } from "@fortawesome/free-solid-svg-icons"; import { action, computed } from "mobx"; import { observer } from "mobx-react"; import { Doc, DocListCastAsync, HeightSym, WidthSym } from "../../../../new_fields/Doc"; @@ -37,8 +37,9 @@ import "./CollectionFreeFormView.scss"; import { MarqueeView } from "./MarqueeView"; import React = require("react"); import v5 = require("uuid/v5"); +import { ClientRecommender } from "../../../ClientRecommender"; -library.add(faEye, faTable, faPaintBrush, faExpandArrowsAlt, faCompressArrowsAlt, faCompass, faUpload); +library.add(faEye, faTable, faPaintBrush, faExpandArrowsAlt, faCompressArrowsAlt, faCompass, faUpload, faBrain); export const panZoomSchema = createSchema({ panX: "number", @@ -596,6 +597,20 @@ export class CollectionFreeFormView extends CollectionSubView(PanZoomDocument) { input.click(); } }); + ContextMenu.Instance.addItem({ + description: "Recommender System", + event: async () => { + new ClientRecommender(); + let activedocs = this.getActiveDocuments(); + await Promise.all(activedocs.map((doc: Doc) => { + console.log(StrCast(doc.title)); + const extdoc = doc.data_ext as Doc; + return ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc); + })); + console.log(ClientRecommender.Instance.createDistanceMatrix()); + }, + icon: "brain" + }); } diff --git a/src/client/views/nodes/ImageBox.tsx b/src/client/views/nodes/ImageBox.tsx index 161226c0d..660772c0e 100644 --- a/src/client/views/nodes/ImageBox.tsx +++ b/src/client/views/nodes/ImageBox.tsx @@ -31,7 +31,7 @@ import { faEye } from '@fortawesome/free-regular-svg-icons'; import { ComputedField } from '../../../new_fields/ScriptField'; import { CompileScript } from '../../util/Scripting'; import { thisExpression } from 'babel-types'; -import { Recommender } from '../../../server/Recommender'; +//import { Recommender } from '../../../server/Recommender'; import requestPromise = require('request-promise'); var requestImageSize = require('../../util/request-image-size'); var path = require('path'); @@ -244,17 +244,7 @@ export class ImageBox extends DocComponent(ImageD } extractText = () => { - let data = StrCast(this.dataDoc.title); - console.log(data); - let converter = (results: any) => { - let keyterms = new List(); - results.documents.forEach((doc: any) => { - let keyPhrases = doc.keyPhrases; - keyPhrases.map((kp: string) => keyterms.push(kp)); - }); - return keyterms; - }; - CognitiveServices.Text.Manager.analyzer(this.extensionDoc, ["key words"], data, converter); + //Recommender.Instance.extractText(this.dataDoc, this.extensionDoc); // request recommender //fetch(Utils.prepend("/recommender"), { body: body, method: "POST", headers: { "content-type": "application/json" } }).then((value) => console.log(value)); } diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index 3c71f3aa1..ea59703c3 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -1,4 +1,10 @@ +//import { Doc } from "../new_fields/Doc"; +//import { StrCast } from "../new_fields/Types"; +//import { List } from "../new_fields/List"; +//import { CognitiveServices } from "../client/cognitive_services/CognitiveServices"; + var w2v = require('word2vec'); +var assert = require('assert'); export class Recommender { @@ -10,6 +16,10 @@ export class Recommender { Recommender.Instance = this; } + /*** + * Loads pre-trained model from word2vec + */ + private loadModel(): Promise { let self = this; return new Promise(res => { @@ -20,6 +30,10 @@ export class Recommender { }); } + /*** + * Testing + */ + public async testModel() { if (!this._model) { await this.loadModel(); @@ -33,6 +47,10 @@ export class Recommender { } } + /*** + * Tests if instance exists + */ + public async testInstance(text: string) { if (!this._model) { await this.loadModel(); @@ -40,14 +58,21 @@ export class Recommender { console.log(text); } + /*** + * Uses model to convert words to vectors + */ + public async vectorize(text: string[]) { if (!this._model) { await this.loadModel(); } if (this._model) { let word_vecs = this._model.getVectors(text); - console.log(word_vecs[0]); return word_vecs; } } + + + + } -- cgit v1.2.3-70-g09d2 From 9dd2a31b72e5e527e2dae3b68f856ab8da879e93 Mon Sep 17 00:00:00 2001 From: ab Date: Mon, 12 Aug 2019 16:41:23 -0400 Subject: documentation --- package.json | 2 +- src/client/ClientRecommender.tsx | 18 +++++----- src/client/cognitive_services/CognitiveServices.ts | 42 ++++++++++++---------- src/client/util/SearchUtil.ts | 13 ++++--- .../collectionFreeForm/CollectionFreeFormView.tsx | 1 + src/server/Recommender.ts | 1 + 6 files changed, 45 insertions(+), 32 deletions(-) (limited to 'src/client/cognitive_services') diff --git a/package.json b/package.json index 1e2c74411..1c7a10ac8 100644 --- a/package.json +++ b/package.json @@ -225,4 +225,4 @@ "xoauth2": "^1.2.0", "youtube": "^0.1.0" } -} \ No newline at end of file +} diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index ddaa8a7fc..63f85c737 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -75,13 +75,15 @@ export class ClientRecommender extends React.Component { const n = 200; const num_words = paragraph.size; let meanVector = new Array(n).fill(0); // mean vector - paragraph.forEach((wordvec: number[]) => { - for (let i = 0; i < n; i++) { - meanVector[i] += wordvec[i]; - } - }); - meanVector = meanVector.map(x => x / num_words); - this.addToDocSet(meanVector); + if (num_words > 0) { // check to see if paragraph actually was vectorized + paragraph.forEach((wordvec: number[]) => { + for (let i = 0; i < n; i++) { + meanVector[i] += wordvec[i]; + } + }); + meanVector = meanVector.map(x => x / num_words); + this.addToDocSet(meanVector); + } return meanVector; } @@ -106,7 +108,7 @@ export class ClientRecommender extends React.Component { }); return keyterms; }; - await CognitiveServices.Text.Manager.analyzer(extDoc, ["key words"], data, converter); + await CognitiveServices.Text.Appliers.analyzer(extDoc, ["key words"], data, converter); } /*** diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 954a05585..75d0760ed 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -258,32 +258,38 @@ export namespace CognitiveServices { }; console.log("requested!"); return request.post(options); - }, - analyzer: async (target: Doc, keys: string[], data: string, converter: Converter) => { - let results = await ExecuteQuery(Service.Text, Manager, data); + } + }; + + export namespace Appliers { + + export async function vectorize(keyterms: any) { + console.log("vectorizing..."); + //keyterms = ["father", "king"]; + let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; + await requestPromise.post(args).then(async (wordvecs) => { + var vectorValues = new Set(); + wordvecs.forEach((wordvec: any) => { + //console.log(wordvec.word); + vectorValues.add(wordvec.values as number[]); + }); + ClientRecommender.Instance.mean(vectorValues); + //console.log(vectorValues.size); + }); + } + + export const analyzer = async (target: Doc, keys: string[], data: string, converter: Converter) => { + let results = await ExecuteQuery(Service.Text, Manager, data); console.log(results); let keyterms = converter(results); //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); target[keys[0]] = keyterms; console.log("analyzed!"); await vectorize(keyterms); - } - }; - async function vectorize(keyterms: any) { - console.log("vectorizing..."); - //keyterms = ["father", "king"]; - let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; - await requestPromise.post(args).then(async (wordvecs) => { - var vectorValues = new Set(); - wordvecs.forEach((wordvec: any) => { - //console.log(wordvec.word); - vectorValues.add(wordvec.values as number[]); - }); - ClientRecommender.Instance.mean(vectorValues); - //console.log(vectorValues.size); - }); + }; } } + } \ No newline at end of file diff --git a/src/client/util/SearchUtil.ts b/src/client/util/SearchUtil.ts index 3a3ba1803..1fce995d7 100644 --- a/src/client/util/SearchUtil.ts +++ b/src/client/util/SearchUtil.ts @@ -82,18 +82,21 @@ export namespace SearchUtil { const query = "*"; let response = await rp.get(Utils.prepend('/search'), { qs: { - query + q: query } }); - let res: string[] = JSON.parse(response); - const fields = await DocServer.GetRefFields(res); + let result: IdSearchResult = JSON.parse(response); + const { ids, numFound, highlighting } = result; + const docMap = await DocServer.GetRefFields(ids); const docs: Doc[] = []; - for (const id of res) { - const field = fields[id]; + for (const id of ids) { + const field = docMap[id]; if (field instanceof Doc) { docs.push(field); } } return docs; + // const docs = ids.map((id: string) => docMap[id]).filter((doc: any) => doc instanceof Doc); + // return docs as Doc[]; } } diff --git a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx index e9791df4e..d1e8031fd 100644 --- a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx +++ b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx @@ -894,6 +894,7 @@ export class CollectionFreeFormView extends CollectionSubView(PanZoomDocument) { let activedocs = this.getActiveDocuments(); let allDocs = await SearchUtil.GetAllDocs(); allDocs.forEach(doc => console.log(doc.title)); + // clears internal representation of documents as vectors ClientRecommender.Instance.reset_docs(); await Promise.all(activedocs.map((doc: Doc) => { //console.log(StrCast(doc.title)); diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index d175b67c7..1c95d7ea4 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -70,6 +70,7 @@ export class Recommender { } if (this._model) { let word_vecs = this._model.getVectors(text); + return word_vecs; } } -- cgit v1.2.3-70-g09d2 From e0bfe978e029268b3901b5d098f946b1a6fc7d0d Mon Sep 17 00:00:00 2001 From: ab Date: Thu, 29 Aug 2019 18:43:32 -0400 Subject: ui fixes, datadoc resolved --- src/client/ClientRecommender.tsx | 83 +++++++++++++++------- src/client/cognitive_services/CognitiveServices.ts | 24 ++++--- src/client/views/MainView.tsx | 1 + src/client/views/Recommendations.tsx | 21 ++++-- .../collectionFreeForm/CollectionFreeFormView.tsx | 21 ------ src/client/views/nodes/DocumentView.tsx | 26 ++++--- src/client/views/nodes/ImageBox.tsx | 16 ----- 7 files changed, 104 insertions(+), 88 deletions(-) (limited to 'src/client/cognitive_services') diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index 63f85c737..a6d1a32b3 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -1,5 +1,5 @@ import { Doc } from "../new_fields/Doc"; -import { StrCast } from "../new_fields/Types"; +import { StrCast, Cast } from "../new_fields/Types"; import { List } from "../new_fields/List"; import { CognitiveServices } from "./cognitive_services/CognitiveServices"; import React = require("react"); @@ -8,30 +8,42 @@ import { observable, action, computed, reaction } from "mobx"; var assert = require('assert'); import "./ClientRecommender.scss"; import { JSXElement } from "babel-types"; +import { ToPlainText, RichTextField } from "../new_fields/RichTextField"; export interface RecommenderProps { title: string; } +export interface RecommenderDocument { + actualDoc: Doc; + vectorDoc: number[]; + score: number; +} + @observer export class ClientRecommender extends React.Component { static Instance: ClientRecommender; - private docVectors: Set; + private mainDoc?: RecommenderDocument; + private docVectors: Set = new Set(); @observable private corr_matrix = [[0, 0], [0, 0]]; constructor(props: RecommenderProps) { //console.log("creating client recommender..."); super(props); if (!ClientRecommender.Instance) ClientRecommender.Instance = this; - this.docVectors = new Set(); - //this.corr_matrix = [[0, 0], [0, 0]]; + ClientRecommender.Instance.docVectors = new Set(); + //ClientRecommender.Instance.corr_matrix = [[0, 0], [0, 0]]; } @action public reset_docs() { - this.docVectors = new Set(); - this.corr_matrix = [[0, 0], [0, 0]]; + ClientRecommender.Instance.docVectors = new Set(); + ClientRecommender.Instance.corr_matrix = [[0, 0], [0, 0]]; + } + + public deleteDocs() { + console.log("deleting previews..."); } /*** @@ -67,11 +79,24 @@ export class ClientRecommender extends React.Component { } } + public computeSimilarities() { + ClientRecommender.Instance.docVectors.forEach((doc: RecommenderDocument) => { + if (ClientRecommender.Instance.mainDoc) { + const distance = ClientRecommender.Instance.distance(ClientRecommender.Instance.mainDoc.vectorDoc, doc.vectorDoc, "euclidian"); + doc.score = distance; + } + } + ); + let doclist = Array.from(ClientRecommender.Instance.docVectors); + doclist.sort((a: RecommenderDocument, b: RecommenderDocument) => a.score - b.score); + return doclist; + } + /*** * Computes the mean of a set of vectors */ - public mean(paragraph: Set) { + public mean(paragraph: Set, dataDoc: Doc, mainDoc: boolean) { const n = 200; const num_words = paragraph.size; let meanVector = new Array(n).fill(0); // mean vector @@ -82,14 +107,16 @@ export class ClientRecommender extends React.Component { } }); meanVector = meanVector.map(x => x / num_words); - this.addToDocSet(meanVector); + const internalDoc: RecommenderDocument = { actualDoc: dataDoc, vectorDoc: meanVector, score: 0 }; + if (mainDoc) ClientRecommender.Instance.mainDoc = internalDoc; + ClientRecommender.Instance.addToDocSet(internalDoc); } return meanVector; } - private addToDocSet(vector: number[]) { - if (this.docVectors) { - this.docVectors.add(vector); + private addToDocSet(internalDoc: RecommenderDocument) { + if (ClientRecommender.Instance.docVectors) { + ClientRecommender.Instance.docVectors.add(internalDoc); } } @@ -97,9 +124,11 @@ export class ClientRecommender extends React.Component { * Uses Cognitive Services to extract keywords from a document */ - public async extractText(dataDoc: Doc, extDoc: Doc) { - let data = StrCast(dataDoc.title); - //console.log(data); + public async extractText(dataDoc: Doc, extDoc: Doc, mainDoc: boolean = false) { + let fielddata = Cast(dataDoc.data, RichTextField); + let data: string; + fielddata ? data = fielddata[ToPlainText]() : data = ""; + console.log(data); let converter = (results: any) => { let keyterms = new List(); results.documents.forEach((doc: any) => { @@ -108,7 +137,7 @@ export class ClientRecommender extends React.Component { }); return keyterms; }; - await CognitiveServices.Text.Appliers.analyzer(extDoc, ["key words"], data, converter); + await CognitiveServices.Text.Appliers.analyzer(dataDoc, extDoc, ["key words"], data, converter, mainDoc); } /*** @@ -116,7 +145,7 @@ export class ClientRecommender extends React.Component { */ @action - public createDistanceMatrix(documents: Set = this.docVectors) { + public createDistanceMatrix(documents: Set = ClientRecommender.Instance.docVectors) { const documents_list = Array.from(documents); const n = documents_list.length; var matrix = new Array(n).fill(0).map(() => new Array(n).fill(0)); @@ -124,22 +153,22 @@ export class ClientRecommender extends React.Component { var doc1 = documents_list[i]; for (let j = 0; j < n; j++) { var doc2 = documents_list[j]; - matrix[i][j] = this.distance(doc1, doc2, "euclidian"); + matrix[i][j] = ClientRecommender.Instance.distance(doc1.vectorDoc, doc2.vectorDoc, "euclidian"); } } - this.corr_matrix = matrix; + ClientRecommender.Instance.corr_matrix = matrix; return matrix; } @computed private get generateRows() { - const n = this.corr_matrix.length; + const n = ClientRecommender.Instance.corr_matrix.length; let rows: JSX.Element[] = []; for (let i = 0; i < n; i++) { let children: JSX.Element[] = []; for (let j = 0; j < n; j++) { - //let cell = React.createElement("td", this.corr_matrix[i][j]); - let cell = {this.corr_matrix[i][j].toFixed(4)}; + //let cell = React.createElement("td", ClientRecommender.Instance.corr_matrix[i][j]); + let cell = {ClientRecommender.Instance.corr_matrix[i][j].toFixed(4)}; children.push(cell); } //let row = React.createElement("tr", { children: children, key: i }); @@ -151,22 +180,22 @@ export class ClientRecommender extends React.Component { render() { return (
-

{this.props.title ? this.props.title : "hello"}

+

{ClientRecommender.Instance.props.title ? ClientRecommender.Instance.props.title : "hello"}

{/* - - + + - - + +
{this.corr_matrix[0][0].toFixed(4)}{this.corr_matrix[0][1].toFixed(4)}{ClientRecommender.Instance.corr_matrix[0][0].toFixed(4)}{ClientRecommender.Instance.corr_matrix[0][1].toFixed(4)}
{this.corr_matrix[1][0].toFixed(4)}{this.corr_matrix[1][1].toFixed(4)}{ClientRecommender.Instance.corr_matrix[1][0].toFixed(4)}{ClientRecommender.Instance.corr_matrix[1][1].toFixed(4)}
*/} - {this.generateRows} + {ClientRecommender.Instance.generateRows}
); diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 75d0760ed..874ee433d 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -263,29 +263,35 @@ export namespace CognitiveServices { export namespace Appliers { - export async function vectorize(keyterms: any) { + export async function vectorize(keyterms: any, dataDoc: Doc, mainDoc: boolean = false) { console.log("vectorizing..."); //keyterms = ["father", "king"]; let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; await requestPromise.post(args).then(async (wordvecs) => { - var vectorValues = new Set(); - wordvecs.forEach((wordvec: any) => { - //console.log(wordvec.word); - vectorValues.add(wordvec.values as number[]); - }); - ClientRecommender.Instance.mean(vectorValues); + if (wordvecs.length > 0) { + console.log("successful vectorization!"); + var vectorValues = new Set(); + wordvecs.forEach((wordvec: any) => { + //console.log(wordvec.word); + vectorValues.add(wordvec.values as number[]); + }); + ClientRecommender.Instance.mean(vectorValues, dataDoc, mainDoc); + } // adds document to internal doc set + else { + console.log("unsuccessful :( word(s) not in vocabulary"); + } //console.log(vectorValues.size); }); } - export const analyzer = async (target: Doc, keys: string[], data: string, converter: Converter) => { + export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: Converter, mainDoc: boolean = false) => { let results = await ExecuteQuery(Service.Text, Manager, data); console.log(results); let keyterms = converter(results); //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); target[keys[0]] = keyterms; console.log("analyzed!"); - await vectorize(keyterms); + await vectorize(keyterms, dataDoc, mainDoc); }; } diff --git a/src/client/views/MainView.tsx b/src/client/views/MainView.tsx index 57eb30439..3a5795077 100644 --- a/src/client/views/MainView.tsx +++ b/src/client/views/MainView.tsx @@ -204,6 +204,7 @@ export class MainView extends React.Component { const targets = document.elementsFromPoint(e.x, e.y); if (targets && targets.length && targets[0].className.toString().indexOf("contextMenu") === -1) { ContextMenu.Instance.closeMenu(); + Recommendations.Instance.closeMenu(); } }); diff --git a/src/client/views/Recommendations.tsx b/src/client/views/Recommendations.tsx index 8569996b3..cf1974c69 100644 --- a/src/client/views/Recommendations.tsx +++ b/src/client/views/Recommendations.tsx @@ -10,8 +10,10 @@ import { returnFalse, emptyFunction, returnEmptyString, returnOne } from "../../ import { Transform } from "../util/Transform"; import { ObjectField } from "../../new_fields/ObjectField"; import { DocumentView } from "./nodes/DocumentView"; -import { DocumentType } from "../documents/Documents"; - +import { DocumentType } from '../documents/DocumentTypes'; +import { ClientRecommender } from "../ClientRecommender"; +import { DocServer } from "../DocServer"; +import { Id } from "../../new_fields/FieldSymbols"; export interface RecProps { documents: { preview: Doc, similarity: number }[]; @@ -28,6 +30,7 @@ export class Recommendations extends React.Component<{}> { @observable private _width: number = 0; @observable private _height: number = 0; @observable private _documents: { preview: Doc, score: number }[] = []; + private previewDocs: Doc[] = []; constructor(props: {}) { super(props); @@ -52,7 +55,8 @@ export class Recommendations extends React.Component<{}> { let returnXDimension = () => 50; let returnYDimension = () => 50; let scale = () => returnXDimension() / NumCast(renderDoc.nativeWidth, returnXDimension()); - let newRenderDoc = Doc.MakeDelegate(renderDoc); /// newRenderDoc -> renderDoc -> render"data"Doc -> TextProt + //let scale = () => 1; + //let newRenderDoc = Doc.MakeDelegate(renderDoc); /// newRenderDoc -> renderDoc -> render"data"Doc -> TextProt const docview =
{/* onPointerDown={action(() => { this._useIcons = !this._useIcons; @@ -62,7 +66,7 @@ export class Recommendations extends React.Component<{}> { onPointerLeave={action(() => this._displayDim = 50)} > */} { ContentScaling={scale} />
; - const data = renderDoc.data; - if (data instanceof ObjectField) newRenderDoc.data = ObjectField.MakeCopy(data); - newRenderDoc.preview = true; + // const data = renderDoc.data; + // if (data instanceof ObjectField) newRenderDoc.data = ObjectField.MakeCopy(data); + // newRenderDoc.preview = true; + // this.previewDocs.push(newRenderDoc); return docview; } @@ -92,6 +97,8 @@ export class Recommendations extends React.Component<{}> { @action closeMenu = () => { this._display = false; + this.previewDocs.forEach(doc => DocServer.DeleteDocument(doc[Id])); + this.previewDocs = []; } @action diff --git a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx index 2d4775070..3cef93383 100644 --- a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx +++ b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx @@ -857,27 +857,6 @@ export class CollectionFreeFormView extends CollectionSubView(PanZoomDocument) { input.click(); } }); - ContextMenu.Instance.addItem({ - description: "Recommender System", - event: async () => { - // if (!ClientRecommender.Instance) new ClientRecommender({ title: "Client Recommender" }); - let activedocs = this.getActiveDocuments(); - let allDocs = await SearchUtil.GetAllDocs(); - allDocs.forEach(doc => console.log(doc.title)); - // clears internal representation of documents as vectors - ClientRecommender.Instance.reset_docs(); - await Promise.all(allDocs.map((doc: Doc) => { - console.log(StrCast(doc.title)); - if (doc.type === DocumentType.IMG) { - console.log(doc.title); - const extdoc = doc.data_ext as Doc; - return ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc); - } - })); - console.log(ClientRecommender.Instance.createDistanceMatrix()); - }, - icon: "brain" - }); layoutItems.push({ description: `${this.fitToBox ? "Unset" : "Set"} Fit To Container`, event: this.fitToContainer, icon: !this.fitToBox ? "expand-arrows-alt" : "compress-arrows-alt" }); layoutItems.push({ description: "reset view", event: () => { this.props.Document.panX = this.props.Document.panY = 0; this.props.Document.scale = 1; }, icon: "compress-arrows-alt" }); layoutItems.push({ diff --git a/src/client/views/nodes/DocumentView.tsx b/src/client/views/nodes/DocumentView.tsx index 2a6e91272..f708a7a3a 100644 --- a/src/client/views/nodes/DocumentView.tsx +++ b/src/client/views/nodes/DocumentView.tsx @@ -648,21 +648,31 @@ export class DocumentView extends DocComponent(Docu if (!ClientRecommender.Instance) new ClientRecommender({ title: "Client Recommender" }); let documents: Doc[] = []; let allDocs = await SearchUtil.GetAllDocs(); - allDocs.forEach(doc => console.log(doc.title)); + //allDocs.forEach(doc => console.log(doc.title)); // clears internal representation of documents as vectors ClientRecommender.Instance.reset_docs(); await Promise.all(allDocs.map((doc: Doc) => { - if (doc.type === DocumentType.IMG) { - console.log(StrCast(doc.title)); - documents.push(doc); - const extdoc = doc.data_ext as Doc; - return ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc); + let mainDoc: boolean = false; + const dataDoc = Doc.GetDataDoc(doc); + if (doc.type === DocumentType.TEXT) { + if (dataDoc === Doc.GetDataDoc(this.props.Document)) { + mainDoc = true; + console.log(StrCast(doc.title)); + } + if (!documents.includes(dataDoc)) { + documents.push(dataDoc); + const extdoc = doc.data_ext as Doc; + return ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc, mainDoc); + } } })); console.log(ClientRecommender.Instance.createDistanceMatrix()); + const doclist = ClientRecommender.Instance.computeSimilarities(); let recDocs: { preview: Doc, score: number }[] = []; - for (let i = 0; i < documents.length; i++) { - recDocs.push({ preview: documents[i], score: i }); + // tslint:disable-next-line: prefer-for-of + for (let i = 0; i < doclist.length; i++) { + console.log(doclist[i].score); + recDocs.push({ preview: doclist[i].actualDoc, score: doclist[i].score }); } Recommendations.Instance.addDocuments(recDocs); Recommendations.Instance.displayRecommendations(e.pageX + 100, e.pageY); diff --git a/src/client/views/nodes/ImageBox.tsx b/src/client/views/nodes/ImageBox.tsx index ec35465eb..d94e92847 100644 --- a/src/client/views/nodes/ImageBox.tsx +++ b/src/client/views/nodes/ImageBox.tsx @@ -240,22 +240,6 @@ export class ImageBox extends DocComponent(ImageD } } - extractText = async () => { - //let activedocs = this.getActiveDocuments(); - let allDocs = await SearchUtil.GetAllDocs(); - allDocs.forEach(doc => console.log(doc.title)); - // clears internal representation of documents as vectors - ClientRecommender.Instance.reset_docs(); - await Promise.all(allDocs.map((doc: Doc) => { - //console.log(StrCast(doc.title)); - if (doc.type === DocumentType.IMG) { - const extdoc = doc.data_ext as Doc; - return ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc); - } - })); - console.log(ClientRecommender.Instance.createDistanceMatrix()); - } - generateMetadata = (threshold: Confidence = Confidence.Excellent) => { let converter = (results: any) => { let tagDoc = new Doc; -- cgit v1.2.3-70-g09d2 From a1c2afe27c75354d4365a79ea202eca94516069e Mon Sep 17 00:00:00 2001 From: ab Date: Wed, 4 Sep 2019 09:52:39 -0400 Subject: stopwords, frequency, proto arxiv --- package.json | 2 + src/client/ClientRecommender.tsx | 75 ++++++++++++++++++++-- src/client/cognitive_services/CognitiveServices.ts | 13 ++-- src/client/util/TooltipTextMenu.scss | 2 +- src/client/views/nodes/DocumentView.tsx | 1 + src/server/Recommender.ts | 29 +++++++++ src/server/index.ts | 1 + 7 files changed, 113 insertions(+), 10 deletions(-) (limited to 'src/client/cognitive_services') diff --git a/package.json b/package.json index ec5af93b1..d4b5bdab6 100644 --- a/package.json +++ b/package.json @@ -118,6 +118,7 @@ "@types/youtube": "0.0.38", "adm-zip": "^0.4.13", "archiver": "^3.0.3", + "arxiv-api-node": "0.0.2", "async": "^2.6.2", "babel-runtime": "^6.26.0", "bcrypt-nodejs": "0.0.3", @@ -218,6 +219,7 @@ "socket.io-client": "^2.2.0", "solr-node": "^1.2.1", "standard-http-error": "^2.0.1", + "stopword": "^0.3.3", "typescript-collections": "^1.3.2", "url-loader": "^1.1.2", "uuid": "^3.3.2", diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index 9953700cc..66f0ae745 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -6,6 +6,7 @@ import React = require("react"); import { observer } from "mobx-react"; import { observable, action, computed, reaction } from "mobx"; var assert = require('assert'); +var sw = require('stopword'); import "./ClientRecommender.scss"; import { JSXElement } from "babel-types"; import { ToPlainText, RichTextField } from "../new_fields/RichTextField"; @@ -130,20 +131,86 @@ export class ClientRecommender extends React.Component { let data: string; fielddata ? data = fielddata[ToPlainText]() : data = ""; console.log(data); - let converter = (results: any) => { + let converter = (results: any, data: string) => { let keyterms = new List(); + let keyterms_counted = new List(); results.documents.forEach((doc: any) => { let keyPhrases = doc.keyPhrases; keyPhrases.map((kp: string) => { - const words = kp.split(" "); - words.forEach((word) => keyterms.push(word)); + const frequency = this.countFrequencies(kp, data); + let words = kp.split(" "); // separates phrase into words + words = this.removeStopWords(words); + words.forEach((word) => { + keyterms.push(word); + for (let i = 0; i < frequency; i++) { + keyterms_counted.push(word); + } + }); + }); + }); + return { keyterms: keyterms, keyterms_counted: keyterms_counted }; + }; + let test = (results: any, data: string) => { + results.documents.forEach((doc: any) => { + let kps = doc.keyPhrases; + kps.map((kp: string) => { + this.countFrequencies(kp, data); }); }); - return keyterms; }; await CognitiveServices.Text.Appliers.analyzer(dataDoc, extDoc, ["key words"], data, converter, mainDoc); } + private countFrequencies(keyphrase: string, paragraph: string) { + let data = paragraph.split(" "); + let kp_array = keyphrase.split(" "); + let num_keywords = kp_array.length; + let par_length = data.length; + let frequency = 0; + // console.log("Paragraph: ", data); + // console.log("Keyphrases:", kp_array); + for (let i = 0; i <= par_length - num_keywords; i++) { + const window = data.slice(i, i + num_keywords); + if (JSON.stringify(window) === JSON.stringify(kp_array)) { + frequency++; + } + } + return frequency; + } + + private removeStopWords(word_array: string[]) { + //console.log(sw.removeStopwords(word_array)); + return sw.removeStopwords(word_array); + } + + /** + * Request to the arXiv server for ML articles. + */ + + arxivrequest = async (query: string) => { + let xhttp = new XMLHttpRequest(); + let serveraddress = "http://export.arxiv.org/api" + let endpoint = serveraddress + "/query?search_query=all:" + query + "&start=0&max_results=5"; + let promisified = (resolve: any, reject: any) => { + xhttp.onreadystatechange = function () { + if (this.readyState === 4) { + let result = xhttp.response; + switch (this.status) { + case 200: + console.log(result); + return resolve(result); + case 400: + default: + return reject(result); + } + } + }; + xhttp.open("GET", endpoint, true); + xhttp.send(); + }; + return new Promise(promisified); + } + /*** * Creates distance matrix for all Documents analyzed */ diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 874ee433d..eb1dd5197 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -15,6 +15,7 @@ type RequestExecutor = (apiKey: string, body: string, service: Service) => Promi type AnalysisApplier = (target: Doc, relevantKeys: string[], data: D, ...args: any) => any; type BodyConverter = (data: D) => string; type Converter = (results: any) => Field; +type TextConverter = (results: any, data: string) => { keyterms: Field, keyterms_counted: Field }; export type Tag = { name: string, confidence: number }; export type Rectangle = { top: number, left: number, width: number, height: number }; @@ -263,7 +264,7 @@ export namespace CognitiveServices { export namespace Appliers { - export async function vectorize(keyterms: any, dataDoc: Doc, mainDoc: boolean = false) { + export async function vectorize(keyterms: any, dataDoc: Doc, mainDoc: boolean = false, data: string) { console.log("vectorizing..."); //keyterms = ["father", "king"]; let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; @@ -284,15 +285,17 @@ export namespace CognitiveServices { }); } - export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: Converter, mainDoc: boolean = false) => { + export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: TextConverter, mainDoc: boolean = false) => { let results = await ExecuteQuery(Service.Text, Manager, data); console.log(results); - let keyterms = converter(results); + let keyterms = converter(results, data); //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); - target[keys[0]] = keyterms; + target[keys[0]] = keyterms.keyterms; console.log("analyzed!"); - await vectorize(keyterms, dataDoc, mainDoc); + await vectorize(keyterms.keyterms_counted, dataDoc, mainDoc, data); }; + + // export async function countFrequencies() } } diff --git a/src/client/util/TooltipTextMenu.scss b/src/client/util/TooltipTextMenu.scss index ebf833dbe..ab6cee763 100644 --- a/src/client/util/TooltipTextMenu.scss +++ b/src/client/util/TooltipTextMenu.scss @@ -351,5 +351,5 @@ .dragger{ color: #eee; - margin-left: 5px; + margin: 5px; } \ No newline at end of file diff --git a/src/client/views/nodes/DocumentView.tsx b/src/client/views/nodes/DocumentView.tsx index d51c90b61..a1e64f1c5 100644 --- a/src/client/views/nodes/DocumentView.tsx +++ b/src/client/views/nodes/DocumentView.tsx @@ -651,6 +651,7 @@ export class DocumentView extends DocComponent(Docu // allDocs.forEach(doc => console.log(doc.title)); // clears internal representation of documents as vectors ClientRecommender.Instance.reset_docs(); + ClientRecommender.Instance.arxivrequest("electrons"); await Promise.all(allDocs.map((doc: Doc) => { let mainDoc: boolean = false; const dataDoc = Doc.GetDataDoc(doc); diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index 781974208..efb5fbbbf 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -5,6 +5,9 @@ var w2v = require('word2vec'); var assert = require('assert'); +var arxivapi = require('arxiv-api-node'); +import requestPromise = require("request-promise"); + export class Recommender { @@ -75,6 +78,32 @@ export class Recommender { } } + public async arxivRequest(query: string) { + // let xhttp = new XMLHttpRequest(); + // let serveraddress = "http://export.arxiv.org/api/query?search_query=all:electron&start=0&max_results=1"; + // let promisified = (resolve: any, reject: any) => { + // xhttp.onreadystatechange = function () { + // if (this.readyState === 4) { + // let result = xhttp.response; + // switch (this.status) { + // case 200: + // console.log(result); + // return resolve(result); + // case 400: + // default: + // return reject(result); + // } + // } + // }; + // xhttp.open("GET", serveraddress, true); + // xhttp.send(); + // }; + // return new Promise(promisified); + + let res = await arxivapi.query("all:electrons"); + console.log(res); + } + diff --git a/src/server/index.ts b/src/server/index.ts index e1ecc4ac0..24ffc466f 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -688,6 +688,7 @@ app.use(RouteStore.corsProxy, (req, res) => { let recommender = new Recommender(); recommender.testModel(); +recommender.arxivRequest("Triangle-GAN"); app.post("/recommender", async (req, res) => { let keyphrases = req.body.keyphrases; -- cgit v1.2.3-70-g09d2 From 3f63b49ccc1ebcbeac64443702826ea4597200f3 Mon Sep 17 00:00:00 2001 From: Abdullah Ahmed Date: Mon, 16 Sep 2019 14:27:36 -0400 Subject: ui changes --- src/client/ClientRecommender.tsx | 39 ++++++++++++---------- src/client/cognitive_services/CognitiveServices.ts | 4 +-- src/client/views/nodes/DocumentView.tsx | 26 +++++++++++++-- 3 files changed, 46 insertions(+), 23 deletions(-) (limited to 'src/client/cognitive_services') diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index 0e344dae9..217c89297 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -16,6 +16,12 @@ export interface RecommenderProps { title: string; } +/** + * actualDoc: datadoc + * vectorDoc: mean vector of text + * score: similarity score to main doc + */ + export interface RecommenderDocument { actualDoc: Doc; vectorDoc: number[]; @@ -28,6 +34,8 @@ export class ClientRecommender extends React.Component { static Instance: ClientRecommender; private mainDoc?: RecommenderDocument; private docVectors: Set = new Set(); + private highKP: string[] = []; + @observable private corr_matrix = [[0, 0], [0, 0]]; constructor(props: RecommenderProps) { @@ -45,10 +53,6 @@ export class ClientRecommender extends React.Component { ClientRecommender.Instance.corr_matrix = [[0, 0], [0, 0]]; } - public deleteDocs() { - console.log("deleting previews..."); - } - /*** * Computes the cosine similarity between two vectors in Euclidean space. */ @@ -82,6 +86,10 @@ export class ClientRecommender extends React.Component { } } + /** + * Returns list of {doc, similarity (to main doc)} in increasing score + */ + public computeSimilarities() { ClientRecommender.Instance.docVectors.forEach((doc: RecommenderDocument) => { if (ClientRecommender.Instance.mainDoc) { @@ -127,14 +135,14 @@ export class ClientRecommender extends React.Component { * Uses Cognitive Services to extract keywords from a document */ - public async extractText(dataDoc: Doc, extDoc: Doc, mainDoc: boolean = false) { + public async extractText(dataDoc: Doc, extDoc: Doc, internal: boolean = true, mainDoc: boolean = false) { let fielddata = Cast(dataDoc.data, RichTextField); let data: string; fielddata ? data = fielddata[ToPlainText]() : data = ""; let converter = (results: any, data: string) => { - let keyterms = new List(); - let keyterms_counted = new List(); - let highKP: string[] = [""]; + let keyterms = new List(); // raw keywords + let keyterms_counted = new List(); // keywords, where each keyword is repeated as + let highKP: string[] = [""]; // most frequent let high = 0; results.documents.forEach((doc: any) => { let keyPhrases = doc.keyPhrases; @@ -143,13 +151,12 @@ export class ClientRecommender extends React.Component { if (frequency > high) { high = frequency; highKP = [kp]; - } else if (frequency === high) { highKP.push(kp); } let words = kp.split(" "); // separates phrase into words - words = this.removeStopWords(words); + words = this.removeStopWords(words); // removes stop words if they appear in phrases words.forEach((word) => { keyterms.push(word); for (let i = 0; i < frequency; i++) { @@ -158,16 +165,12 @@ export class ClientRecommender extends React.Component { }); }); }); + this.highKP = highKP; console.log(highKP); this.sendRequest(highKP); return { keyterms: keyterms, keyterms_counted: keyterms_counted }; }; - await CognitiveServices.Text.Appliers.analyzer(dataDoc, extDoc, ["key words"], data, converter, mainDoc); - } - - private findImportantKPs(keyterms_counted: string[], paragraph: string) { - let imporantSet = new Set(); - + await CognitiveServices.Text.Appliers.analyzer(dataDoc, extDoc, ["key words"], data, converter, mainDoc, internal); } private countFrequencies(keyphrase: string, paragraph: string) { @@ -180,7 +183,7 @@ export class ClientRecommender extends React.Component { // console.log("Keyphrases:", kp_array); for (let i = 0; i <= par_length - num_keywords; i++) { const window = data.slice(i, i + num_keywords); - if (JSON.stringify(window) === JSON.stringify(kp_array)) { + if (JSON.stringify(window).toLowerCase() === JSON.stringify(kp_array).toLowerCase() || kp_array.every(val => window.includes(val))) { frequency++; } } @@ -194,7 +197,7 @@ export class ClientRecommender extends React.Component { private async sendRequest(keywords: string[]) { let query = ""; - keywords.forEach((kp: string) => query += kp); + keywords.forEach((kp: string) => query += " " + kp); await this.arxivrequest(query); } diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index eb1dd5197..8a58355a8 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -285,14 +285,14 @@ export namespace CognitiveServices { }); } - export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: TextConverter, mainDoc: boolean = false) => { + export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: TextConverter, mainDoc: boolean = false, internal: boolean = true) => { let results = await ExecuteQuery(Service.Text, Manager, data); console.log(results); let keyterms = converter(results, data); //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); target[keys[0]] = keyterms.keyterms; console.log("analyzed!"); - await vectorize(keyterms.keyterms_counted, dataDoc, mainDoc, data); + if (internal) await vectorize(keyterms.keyterms_counted, dataDoc, mainDoc, data); }; // export async function countFrequencies() diff --git a/src/client/views/nodes/DocumentView.tsx b/src/client/views/nodes/DocumentView.tsx index 48a059b8e..f8b9cd0f1 100644 --- a/src/client/views/nodes/DocumentView.tsx +++ b/src/client/views/nodes/DocumentView.tsx @@ -642,11 +642,23 @@ export class DocumentView extends DocComponent(Docu a.click(); } }); - cm.addItem({ - description: "Recommender System", + let recommender_subitems: ContextMenuProps[] = []; + + recommender_subitems.push({ + description: "Internal recommendations", event: () => this.recommender(e), icon: "brain" }); + + recommender_subitems.push({ + description: "External recommendations", + event: () => this.externalRecommendation(e), + icon: "brain" + }); + + cm.addItem({ description: "Recommender System", subitems: recommender_subitems, icon: "brain" }); + + cm.addItem({ description: "Delete", event: this.deleteClicked, icon: "trash" }); type User = { email: string, userDocumentId: string }; let usersMenu: ContextMenuProps[] = []; @@ -729,7 +741,7 @@ export class DocumentView extends DocComponent(Docu if (!documents.includes(dataDoc)) { documents.push(dataDoc); const extdoc = doc.data_ext as Doc; - return ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc, mainDoc); + return ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc, true, mainDoc); } } })); @@ -743,6 +755,14 @@ export class DocumentView extends DocComponent(Docu Recommendations.Instance.displayRecommendations(e.pageX + 100, e.pageY); } + externalRecommendation = async (e: React.MouseEvent) => { + if (!ClientRecommender.Instance) new ClientRecommender({ title: "Client Recommender" }); + ClientRecommender.Instance.reset_docs(); + const doc = Doc.GetDataDoc(this.props.Document); + const extdoc = doc.data_ext as Doc; + return ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc, false); + } + onPointerEnter = (e: React.PointerEvent): void => { Doc.BrushDoc(this.props.Document); }; onPointerLeave = (e: React.PointerEvent): void => { Doc.UnBrushDoc(this.props.Document); }; -- cgit v1.2.3-70-g09d2 From 19375927c677ad6c99c77d0c7dac17fe7a2712a9 Mon Sep 17 00:00:00 2001 From: ab Date: Mon, 16 Sep 2019 15:26:36 -0400 Subject: beginning to handle external interactions --- src/client/ClientRecommender.tsx | 26 +++++++++++++--------- src/client/cognitive_services/CognitiveServices.ts | 12 ++++++---- src/client/views/Recommendations.tsx | 6 ++++- .../views/collections/CollectionSchemaCells.tsx | 16 ++++++++++++- src/client/views/nodes/DocumentView.tsx | 7 +++++- 5 files changed, 49 insertions(+), 18 deletions(-) (limited to 'src/client/cognitive_services') diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index 217c89297..551047df0 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -139,7 +139,7 @@ export class ClientRecommender extends React.Component { let fielddata = Cast(dataDoc.data, RichTextField); let data: string; fielddata ? data = fielddata[ToPlainText]() : data = ""; - let converter = (results: any, data: string) => { + let converter = async (results: any, data: string) => { let keyterms = new List(); // raw keywords let keyterms_counted = new List(); // keywords, where each keyword is repeated as let highKP: string[] = [""]; // most frequent @@ -167,10 +167,10 @@ export class ClientRecommender extends React.Component { }); this.highKP = highKP; console.log(highKP); - this.sendRequest(highKP); - return { keyterms: keyterms, keyterms_counted: keyterms_counted }; + const values = await this.sendRequest(highKP); + return { keyterms: keyterms, keyterms_counted: keyterms_counted, values }; }; - await CognitiveServices.Text.Appliers.analyzer(dataDoc, extDoc, ["key words"], data, converter, mainDoc, internal); + return CognitiveServices.Text.Appliers.analyzer(dataDoc, extDoc, ["key words"], data, converter, mainDoc, internal); } private countFrequencies(keyphrase: string, paragraph: string) { @@ -198,7 +198,9 @@ export class ClientRecommender extends React.Component { private async sendRequest(keywords: string[]) { let query = ""; keywords.forEach((kp: string) => query += " " + kp); - await this.arxivrequest(query); + return new Promise(resolve => { + this.arxivrequest(query).then(resolve); + }); } /** @@ -207,7 +209,7 @@ export class ClientRecommender extends React.Component { arxivrequest = async (query: string) => { let xhttp = new XMLHttpRequest(); - let serveraddress = "http://export.arxiv.org/api" + let serveraddress = "http://export.arxiv.org/api"; let endpoint = serveraddress + "/query?search_query=all:" + query + "&start=0&max_results=1"; let promisified = (resolve: any, reject: any) => { xhttp.onreadystatechange = function () { @@ -217,20 +219,22 @@ export class ClientRecommender extends React.Component { console.log(xml); switch (this.status) { case 200: + let title: string = "Title"; + let url: string = "Url"; //console.log(result); if (xml) { let titles = xml.getElementsByTagName("title"); if (titles && titles.length > 1) { - let text = titles[1].childNodes[0].nodeValue; - console.log(text); + title = titles[1].childNodes[0].nodeValue!; + console.log(title); } let ids = xml.getElementsByTagName("id"); if (ids && ids.length > 1) { - let text = ids[1].childNodes[0].nodeValue; - console.log(text); + url = ids[1].childNodes[0].nodeValue!; + console.log(url); } } - return resolve(result); + return resolve({ title, url }); case 400: default: return reject(result); diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 8a58355a8..baafb63a1 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -15,7 +15,7 @@ type RequestExecutor = (apiKey: string, body: string, service: Service) => Promi type AnalysisApplier = (target: Doc, relevantKeys: string[], data: D, ...args: any) => any; type BodyConverter = (data: D) => string; type Converter = (results: any) => Field; -type TextConverter = (results: any, data: string) => { keyterms: Field, keyterms_counted: Field }; +type TextConverter = (results: any, data: string) => Promise<{ keyterms: Field, keyterms_counted: Field, values: any }>; export type Tag = { name: string, confidence: number }; export type Rectangle = { top: number, left: number, width: number, height: number }; @@ -288,11 +288,15 @@ export namespace CognitiveServices { export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: TextConverter, mainDoc: boolean = false, internal: boolean = true) => { let results = await ExecuteQuery(Service.Text, Manager, data); console.log(results); - let keyterms = converter(results, data); + let { keyterms, values, keyterms_counted } = await converter(results, data); //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); - target[keys[0]] = keyterms.keyterms; + target[keys[0]] = keyterms; console.log("analyzed!"); - if (internal) await vectorize(keyterms.keyterms_counted, dataDoc, mainDoc, data); + if (internal) { + await vectorize(keyterms_counted, dataDoc, mainDoc, data); + } else { + return values; + } }; // export async function countFrequencies() diff --git a/src/client/views/Recommendations.tsx b/src/client/views/Recommendations.tsx index ff6e66492..b7b1d84d0 100644 --- a/src/client/views/Recommendations.tsx +++ b/src/client/views/Recommendations.tsx @@ -158,11 +158,15 @@ export class RecommendationsBox extends React.Component { // } // let style = { left: this.pageX, top: this.pageY }; //const transform = "translate(" + (NumCast(this.props.node.x) + 350) + "px, " + NumCast(this.props.node.y) + "px" + let title = StrCast((this.props.Document.sourceDoc as Doc).title); + if (title.length > 15) { + title = title.substring(0, 15) + "..."; + } return ( // { this._width = r.offset.width; this._height = r.offset.height; })}> // {({ measureRef }) => (
-

Recommendations

+

Recommendations for "{title}"

{DocListCast(this.props.Document.data).map(doc => { return (
diff --git a/src/client/views/collections/CollectionSchemaCells.tsx b/src/client/views/collections/CollectionSchemaCells.tsx index 9c26a08f0..bf8c4b6f7 100644 --- a/src/client/views/collections/CollectionSchemaCells.tsx +++ b/src/client/views/collections/CollectionSchemaCells.tsx @@ -27,6 +27,7 @@ import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'; import { SchemaHeaderField } from "../../../new_fields/SchemaHeaderField"; import { KeyCodes } from "../../northstar/utils/KeyCodes"; import { undoBatch } from "../../util/UndoManager"; +import { List } from "lodash"; library.add(faExpand); @@ -86,10 +87,23 @@ export class CollectionSchemaCell extends React.Component { } @action - onPointerDown = (e: React.PointerEvent): void => { + onPointerDown = async (e: React.PointerEvent): Promise => { this.props.changeFocusedCellByIndex(this.props.row, this.props.col); this.props.setPreviewDoc(this.props.rowProps.original); + const data = await DocListCastAsync(this.props.Document.data); + if (data) { + let url: string; + if (url = StrCast(data[0].href)) { + try { + new URL(url); + const temp = window.open(url)!; + temp.blur(); + window.focus(); + } catch { } + } + } + // this._isEditing = true; // this.props.setIsEditing(true); diff --git a/src/client/views/nodes/DocumentView.tsx b/src/client/views/nodes/DocumentView.tsx index 2ae71f1da..a034bc1f4 100644 --- a/src/client/views/nodes/DocumentView.tsx +++ b/src/client/views/nodes/DocumentView.tsx @@ -45,6 +45,7 @@ import { RecommendationsBox } from '../Recommendations'; import { SearchUtil } from '../../util/SearchUtil'; import { ClientRecommender } from '../../ClientRecommender'; import { DocumentType } from '../../documents/DocumentTypes'; +import { SchemaHeaderField } from '../../../new_fields/SchemaHeaderField'; const JsxParser = require('react-jsx-parser').default; //TODO Why does this need to be imported like this? library.add(fa.faBrain); @@ -774,7 +775,11 @@ export class DocumentView extends DocComponent(Docu ClientRecommender.Instance.reset_docs(); const doc = Doc.GetDataDoc(this.props.Document); const extdoc = doc.data_ext as Doc; - return ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc, false); + const values = await ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc, false); + const headers = [new SchemaHeaderField("title"), new SchemaHeaderField("href")]; + const body = Docs.Create.FreeformDocument([], { title: values.title }); + body.href = values.url; + CollectionDockingView.Instance.AddRightSplit(Docs.Create.SchemaDocument(headers, [body], { title: `Showing External Recommendations for "${StrCast(doc.title)}"` }), undefined); } onPointerEnter = (e: React.PointerEvent): void => { Doc.BrushDoc(this.props.Document); }; -- cgit v1.2.3-70-g09d2 From 77be33c927b52f93e862b868321f79c59bfc050d Mon Sep 17 00:00:00 2001 From: ab Date: Sat, 28 Sep 2019 17:18:08 -0400 Subject: tensorflow model, server posting fix needed --- package.json | 4 +- src/client/cognitive_services/CognitiveServices.ts | 5 +- src/server/Recommender.ts | 105 +++++++++++---------- src/server/index.ts | 8 +- 4 files changed, 68 insertions(+), 54 deletions(-) (limited to 'src/client/cognitive_services') diff --git a/package.json b/package.json index 422f20e79..12c1e7637 100644 --- a/package.json +++ b/package.json @@ -56,6 +56,8 @@ "@hig/flyout": "^1.0.3", "@hig/theme-context": "^2.1.3", "@hig/theme-data": "^2.3.3", + "@tensorflow-models/universal-sentence-encoder": "^1.2.0", + "@tensorflow/tfjs": "^1.2.9", "@trendmicro/react-dropdown": "^1.3.0", "@types/adm-zip": "^0.4.32", "@types/animejs": "^2.0.2", @@ -229,4 +231,4 @@ "xoauth2": "^1.2.0", "youtube": "^0.1.0" } -} \ No newline at end of file +} diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index baafb63a1..b23441552 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -269,7 +269,7 @@ export namespace CognitiveServices { //keyterms = ["father", "king"]; let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; await requestPromise.post(args).then(async (wordvecs) => { - if (wordvecs.length > 0) { + if (wordvecs.shape[0] > 0) { console.log("successful vectorization!"); var vectorValues = new Set(); wordvecs.forEach((wordvec: any) => { @@ -282,7 +282,8 @@ export namespace CognitiveServices { console.log("unsuccessful :( word(s) not in vocabulary"); } //console.log(vectorValues.size); - }); + } + ); } export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: TextConverter, mainDoc: boolean = false, internal: boolean = true) => { diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index d47257550..d014ba344 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -7,6 +7,8 @@ var w2v = require('word2vec'); var assert = require('assert'); var arxivapi = require('arxiv-api-node'); import requestPromise = require("request-promise"); +import * as use from '@tensorflow-models/universal-sentence-encoder'; +import { Tensor } from "@tensorflow/tfjs-core/dist/tensor"; //http://gnuwin32.sourceforge.net/packages/make.htm @@ -15,12 +17,31 @@ export class Recommender { private _model: any; static Instance: Recommender; private dimension: number = 0; + private choice: string = ""; constructor() { console.log("creating recommender..."); Recommender.Instance = this; } + /*** + * Loads pre-trained model from TF + */ + + public async loadTFModel() { + let self = this; + return new Promise(res => { + use.load().then(model => { + self.choice = "TF"; + self._model = model; + self.dimension = 512; + res(model); + }); + } + + ); + } + /*** * Loads pre-trained model from word2vec */ @@ -29,6 +50,7 @@ export class Recommender { let self = this; return new Promise(res => { w2v.loadModel("./node_modules/word2vec/examples/fixtures/vectors.txt", function (err: any, model: any) { + self.choice = "WV"; self._model = model; self.dimension = model.size; res(model); @@ -42,40 +64,56 @@ export class Recommender { public async testModel() { if (!this._model) { - await this.loadModel(); + await this.loadTFModel(); } if (this._model) { - let similarity = this._model.similarity('father', 'mother'); - console.log(similarity); + if (this.choice === "WV") { + let similarity = this._model.similarity('father', 'mother'); + console.log(similarity); + } + else if (this.choice === "TF") { + const model = this._model as use.UniversalSentenceEncoder; + // Embed an array of sentences. + const sentences = [ + 'Hello.', + 'How are you?' + ]; + const embeddings = await this.vectorize(sentences); + if (embeddings) embeddings.print(true /*verbose*/); + // model.embed(sentences).then(embeddings => { + // // `embeddings` is a 2D tensor consisting of the 512-dimensional embeddings for each sentence. + // // So in this example `embeddings` has the shape [2, 512]. + // embeddings.print(true /* verbose */); + // }); + } } else { console.log("model not found :("); } } - /*** - * Tests if instance exists - */ - - public async testInstance(text: string) { - if (!this._model) { - await this.loadModel(); - } - console.log(text); - } - /*** * Uses model to convert words to vectors */ - public async vectorize(text: string[]) { + public async vectorize(text: string[]): Promise { if (!this._model) { - await this.loadModel(); + await this.loadTFModel(); } if (this._model) { - let word_vecs = this._model.getVectors(text); - - return word_vecs; + if (this.choice === "WV") { + let word_vecs = this._model.getVectors(text); + return word_vecs; + } + else if (this.choice === "TF") { + const model = this._model as use.UniversalSentenceEncoder; + return new Promise(res => { + model.embed(text).then(embeddings => { + res(embeddings); + }); + }); + + } } } @@ -95,33 +133,4 @@ export class Recommender { console.log("phrased!!!"); } - public async arxivRequest(query: string) { - // let xhttp = new XMLHttpRequest(); - // let serveraddress = "http://export.arxiv.org/api/query?search_query=all:electron&start=0&max_results=1"; - // let promisified = (resolve: any, reject: any) => { - // xhttp.onreadystatechange = function () { - // if (this.readyState === 4) { - // let result = xhttp.response; - // switch (this.status) { - // case 200: - // console.log(result); - // return resolve(result); - // case 400: - // default: - // return reject(result); - // } - // } - // }; - // xhttp.open("GET", serveraddress, true); - // xhttp.send(); - // }; - // return new Promise(promisified); - - let res = await arxivapi.query("all:electrons"); - console.log(res); - } - - - - } diff --git a/src/server/index.ts b/src/server/index.ts index e7d49579d..ac803a253 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -688,13 +688,15 @@ app.use(RouteStore.corsProxy, (req, res) => { let recommender = new Recommender(); recommender.testModel(); -recommender.trainModel(); -recommender.arxivRequest("Triangle-GAN"); app.post("/recommender", async (req, res) => { let keyphrases = req.body.keyphrases; let wordvecs = await recommender.vectorize(keyphrases); - res.send(wordvecs); + let embedding: number[][] = []; + if (wordvecs && wordvecs.array()) { + wordvecs.array().then(array => embedding = array as number[][]); + } + res.send(embedding); }); -- cgit v1.2.3-70-g09d2 From a44e12f4625caca5d75a456f0ba1ab977149ae6e Mon Sep 17 00:00:00 2001 From: ab Date: Thu, 3 Oct 2019 12:50:33 -0400 Subject: data sync --- src/client/cognitive_services/CognitiveServices.ts | 4 ++-- src/server/index.ts | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/client/cognitive_services') diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index b23441552..7c660c347 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -269,12 +269,12 @@ export namespace CognitiveServices { //keyterms = ["father", "king"]; let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; await requestPromise.post(args).then(async (wordvecs) => { - if (wordvecs.shape[0] > 0) { + if (wordvecs.length > 0) { console.log("successful vectorization!"); var vectorValues = new Set(); wordvecs.forEach((wordvec: any) => { //console.log(wordvec.word); - vectorValues.add(wordvec.values as number[]); + vectorValues.add(wordvec as number[]); }); ClientRecommender.Instance.mean(vectorValues, dataDoc, mainDoc); } // adds document to internal doc set diff --git a/src/server/index.ts b/src/server/index.ts index ac803a253..49957775d 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -692,9 +692,9 @@ recommender.testModel(); app.post("/recommender", async (req, res) => { let keyphrases = req.body.keyphrases; let wordvecs = await recommender.vectorize(keyphrases); - let embedding: number[][] = []; - if (wordvecs && wordvecs.array()) { - wordvecs.array().then(array => embedding = array as number[][]); + let embedding: Float32Array = new Float32Array(); + if (wordvecs && wordvecs.dataSync()) { + embedding = wordvecs.dataSync() as Float32Array; } res.send(embedding); }); -- cgit v1.2.3-70-g09d2 From 1c75007a5b12224009d0ccfd531e7406b0792760 Mon Sep 17 00:00:00 2001 From: Abdullah Ahmed Date: Mon, 7 Oct 2019 11:02:20 -0400 Subject: idk --- package.json | 2 +- src/client/ClientRecommender.tsx | 11 ++++++++--- src/client/cognitive_services/CognitiveServices.ts | 7 ++++--- src/new_fields/FieldSymbols.ts | 3 ++- src/new_fields/RichTextField.ts | 6 +++++- 5 files changed, 20 insertions(+), 9 deletions(-) (limited to 'src/client/cognitive_services') diff --git a/package.json b/package.json index 2868c79a1..fbed088fd 100644 --- a/package.json +++ b/package.json @@ -235,4 +235,4 @@ "xoauth2": "^1.2.0", "youtube": "^0.1.0" } -} \ No newline at end of file +} diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index 14af0a69b..b4a496563 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -10,7 +10,8 @@ var sw = require('stopword'); var FeedParser = require('feedparser'); import "./ClientRecommender.scss"; import { JSXElement } from "babel-types"; -import { ToPlainText, RichTextField } from "../new_fields/RichTextField"; +import { RichTextField } from "../new_fields/RichTextField"; +import { ToPlainText } from "../new_fields/FieldSymbols"; export interface RecommenderProps { title: string; @@ -166,15 +167,19 @@ export class ClientRecommender extends React.Component { }); }); this.highKP = highKP; - console.log(highKP); + //console.log(highKP); const kts_counted = new List(); keyterms_counted.forEach(kt => kts_counted.push(kt.toLowerCase())); const values = await this.sendRequest(highKP); return { keyterms: keyterms, keyterms_counted: kts_counted, values }; }; - return CognitiveServices.Text.Appliers.analyzer(dataDoc, extDoc, ["key words"], data, converter, mainDoc, internal); + if (data != "") { + return CognitiveServices.Text.Appliers.analyzer(dataDoc, extDoc, ["key words"], data, converter, mainDoc, internal); + } + return; } + private countFrequencies(keyphrase: string, paragraph: string) { let data = paragraph.split(" "); let kp_array = keyphrase.split(" "); diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 7c660c347..e391b98d2 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -269,12 +269,13 @@ export namespace CognitiveServices { //keyterms = ["father", "king"]; let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; await requestPromise.post(args).then(async (wordvecs) => { - if (wordvecs.length > 0) { + if (wordvecs) { + let indices = Object.keys(wordvecs); console.log("successful vectorization!"); var vectorValues = new Set(); - wordvecs.forEach((wordvec: any) => { + indices.forEach((ind: any) => { //console.log(wordvec.word); - vectorValues.add(wordvec as number[]); + vectorValues.add(wordvecs[ind]); }); ClientRecommender.Instance.mean(vectorValues, dataDoc, mainDoc); } // adds document to internal doc set diff --git a/src/new_fields/FieldSymbols.ts b/src/new_fields/FieldSymbols.ts index b5b3aa588..8e5161dac 100644 --- a/src/new_fields/FieldSymbols.ts +++ b/src/new_fields/FieldSymbols.ts @@ -7,4 +7,5 @@ export const Id = Symbol("Id"); export const OnUpdate = Symbol("OnUpdate"); export const Parent = Symbol("Parent"); export const Copy = Symbol("Copy"); -export const ToScriptString = Symbol("ToScriptString"); \ No newline at end of file +export const ToScriptString = Symbol("ToScriptString"); +export const ToPlainText = Symbol("ToPlainText"); \ No newline at end of file diff --git a/src/new_fields/RichTextField.ts b/src/new_fields/RichTextField.ts index d2f76c969..390045ee1 100644 --- a/src/new_fields/RichTextField.ts +++ b/src/new_fields/RichTextField.ts @@ -1,7 +1,7 @@ import { ObjectField } from "./ObjectField"; import { serializable } from "serializr"; import { Deserializable } from "../client/util/SerializationHelper"; -import { Copy, ToScriptString } from "./FieldSymbols"; +import { Copy, ToScriptString, ToPlainText } from "./FieldSymbols"; import { scriptingGlobal } from "../client/util/Scripting"; @scriptingGlobal @@ -23,4 +23,8 @@ export class RichTextField extends ObjectField { return `new RichTextField("${this.Data}")`; } + [ToPlainText]() { + return this.Data; + } + } \ No newline at end of file -- cgit v1.2.3-70-g09d2 From b2434c6f938bf464c8c18c2e6f9d9c171a1aee7e Mon Sep 17 00:00:00 2001 From: Abdullah Ahmed Date: Wed, 9 Oct 2019 16:10:16 -0400 Subject: changed way vectors are processed --- src/client/ClientRecommender.tsx | 14 ++++++++++++-- src/client/cognitive_services/CognitiveServices.ts | 8 ++++---- src/client/util/SearchUtil.ts | 2 +- src/client/views/Recommendations.tsx | 17 +++++++++-------- 4 files changed, 26 insertions(+), 15 deletions(-) (limited to 'src/client/cognitive_services') diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index b4a496563..9ce7df366 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -126,6 +126,14 @@ export class ClientRecommender extends React.Component { return meanVector; } + public processVector(vector: number[], dataDoc: Doc, mainDoc: boolean) { + if (vector.length > 0) { + const internalDoc: RecommenderDocument = { actualDoc: dataDoc, vectorDoc: vector, score: 0 }; + if (mainDoc) ClientRecommender.Instance.mainDoc = internalDoc; + ClientRecommender.Instance.addToDocSet(internalDoc); + } + } + private addToDocSet(internalDoc: RecommenderDocument) { if (ClientRecommender.Instance.docVectors) { ClientRecommender.Instance.docVectors.add(internalDoc); @@ -149,6 +157,7 @@ export class ClientRecommender extends React.Component { let keyPhrases = doc.keyPhrases; keyPhrases.map((kp: string) => { const frequency = this.countFrequencies(kp, data); + keyterms.push(kp); if (frequency > high) { high = frequency; highKP = [kp]; @@ -159,7 +168,7 @@ export class ClientRecommender extends React.Component { let words = kp.split(" "); // separates phrase into words words = this.removeStopWords(words); // removes stop words if they appear in phrases words.forEach((word) => { - keyterms.push(word); + //keyterms.push(word); for (let i = 0; i < frequency; i++) { keyterms_counted.push(word); } @@ -170,7 +179,8 @@ export class ClientRecommender extends React.Component { //console.log(highKP); const kts_counted = new List(); keyterms_counted.forEach(kt => kts_counted.push(kt.toLowerCase())); - const values = await this.sendRequest(highKP); + let values = ""; + if (!internal) values = await this.sendRequest(highKP); return { keyterms: keyterms, keyterms_counted: kts_counted, values }; }; if (data != "") { diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index e391b98d2..c138c68b7 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -272,12 +272,12 @@ export namespace CognitiveServices { if (wordvecs) { let indices = Object.keys(wordvecs); console.log("successful vectorization!"); - var vectorValues = new Set(); + var vectorValues = new List(); indices.forEach((ind: any) => { //console.log(wordvec.word); - vectorValues.add(wordvecs[ind]); + vectorValues.push(wordvecs[ind]); }); - ClientRecommender.Instance.mean(vectorValues, dataDoc, mainDoc); + ClientRecommender.Instance.processVector(vectorValues, dataDoc, mainDoc); } // adds document to internal doc set else { console.log("unsuccessful :( word(s) not in vocabulary"); @@ -295,7 +295,7 @@ export namespace CognitiveServices { target[keys[0]] = keyterms; console.log("analyzed!"); if (internal) { - await vectorize(keyterms_counted, dataDoc, mainDoc, data); + await vectorize(keyterms, dataDoc, mainDoc, data); } else { return values; } diff --git a/src/client/util/SearchUtil.ts b/src/client/util/SearchUtil.ts index d65ec3f40..fb3189069 100644 --- a/src/client/util/SearchUtil.ts +++ b/src/client/util/SearchUtil.ts @@ -127,7 +127,7 @@ export namespace SearchUtil { }); let result: IdSearchResult = JSON.parse(response); const { ids, numFound, highlighting } = result; - console.log(ids.length); + //console.log(ids.length); const docMap = await DocServer.GetRefFields(ids); const docs: Doc[] = []; for (const id of ids) { diff --git a/src/client/views/Recommendations.tsx b/src/client/views/Recommendations.tsx index c44dfc032..5dc62105d 100644 --- a/src/client/views/Recommendations.tsx +++ b/src/client/views/Recommendations.tsx @@ -77,25 +77,26 @@ export class RecommendationsBox extends React.Component { onPointerEnter={action(() => this._displayDim = this._useIcons ? 50 : Number(SEARCH_THUMBNAIL_SIZE))} onPointerLeave={action(() => this._displayDim = 50)} > */}
; @@ -170,16 +171,16 @@ export class RecommendationsBox extends React.Component { {DocListCast(this.props.Document.data).map(doc => { return (
- + {/* {this.DocumentIcon(doc)} - + */} {NumCast(doc.score).toFixed(4)}
DocumentManager.Instance.jumpToDocument(doc, false)}>
-
DocUtils.MakeLink(this.props.Document.sourceDoc as Doc, doc, undefined, "User Selected Link", "Generated from Recommender", undefined)}> + {/*
DocUtils.MakeLink({ doc: this.props.Document.sourceDoc as Doc }, { doc: doc }, "User Selected Link", "Generated from Recommender", undefined)}> -
+
*/}
); })} -- cgit v1.2.3-70-g09d2 From 1f8bf407ef49aab33294c3e7393718606dfa65dd Mon Sep 17 00:00:00 2001 From: Abdullah Ahmed Date: Fri, 11 Oct 2019 17:40:09 -0400 Subject: fixes + refactoring --- src/client/ClientRecommender.tsx | 95 ++++++---------------- src/client/cognitive_services/CognitiveServices.ts | 6 +- src/client/views/Recommendations.tsx | 22 ++--- src/server/Recommender.ts | 2 +- 4 files changed, 37 insertions(+), 88 deletions(-) (limited to 'src/client/cognitive_services') diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index 9ce7df366..bc1cd139c 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -35,7 +35,6 @@ export class ClientRecommender extends React.Component { static Instance: ClientRecommender; private mainDoc?: RecommenderDocument; private docVectors: Set = new Set(); - private highKP: string[] = []; @observable private corr_matrix = [[0, 0], [0, 0]]; @@ -94,7 +93,7 @@ export class ClientRecommender extends React.Component { public computeSimilarities() { ClientRecommender.Instance.docVectors.forEach((doc: RecommenderDocument) => { if (ClientRecommender.Instance.mainDoc) { - const distance = ClientRecommender.Instance.distance(ClientRecommender.Instance.mainDoc.vectorDoc, doc.vectorDoc, "euclidian"); + const distance = ClientRecommender.Instance.distance(ClientRecommender.Instance.mainDoc.vectorDoc, doc.vectorDoc, "cosine"); doc.score = distance; } } @@ -151,17 +150,21 @@ export class ClientRecommender extends React.Component { let converter = async (results: any, data: string) => { let keyterms = new List(); // raw keywords let keyterms_counted = new List(); // keywords, where each keyword is repeated as - let highKP: string[] = [""]; // most frequent + let kp_string: string = ""; + let highKP: string[] = [""]; // most frequent keyphrase let high = 0; results.documents.forEach((doc: any) => { let keyPhrases = doc.keyPhrases; keyPhrases.map((kp: string) => { const frequency = this.countFrequencies(kp, data); keyterms.push(kp); + kp_string += kp + ", "; + // replaces highKP with new one if (frequency > high) { high = frequency; highKP = [kp]; } + // appends to current highKP phrase else if (frequency === high) { highKP.push(kp); } @@ -175,13 +178,11 @@ export class ClientRecommender extends React.Component { }); }); }); - this.highKP = highKP; - //console.log(highKP); const kts_counted = new List(); keyterms_counted.forEach(kt => kts_counted.push(kt.toLowerCase())); let values = ""; if (!internal) values = await this.sendRequest(highKP); - return { keyterms: keyterms, keyterms_counted: kts_counted, values }; + return { keyterms: keyterms, keyterms_counted: kts_counted, values, kp_string: [kp_string] }; }; if (data != "") { return CognitiveServices.Text.Appliers.analyzer(dataDoc, extDoc, ["key words"], data, converter, mainDoc, internal); @@ -189,6 +190,10 @@ export class ClientRecommender extends React.Component { return; } + /** + * + * Counts frequencies of keyphrase in paragraph. + */ private countFrequencies(keyphrase: string, paragraph: string) { let data = paragraph.split(" "); @@ -196,8 +201,7 @@ export class ClientRecommender extends React.Component { let num_keywords = kp_array.length; let par_length = data.length; let frequency = 0; - // console.log("Paragraph: ", data); - // console.log("Keyphrases:", kp_array); + // slides keyphrase windows across paragraph and checks if it matches with corresponding paragraph slice for (let i = 0; i <= par_length - num_keywords; i++) { const window = data.slice(i, i + num_keywords); if (JSON.stringify(window).toLowerCase() === JSON.stringify(kp_array).toLowerCase() || kp_array.every(val => window.includes(val))) { @@ -207,11 +211,21 @@ export class ClientRecommender extends React.Component { return frequency; } + /** + * + * Removes stopwords from list of strings representing a sentence + */ + private removeStopWords(word_array: string[]) { //console.log(sw.removeStopwords(word_array)); return sw.removeStopwords(word_array); } + /** + * + * API for sending arXiv request. + */ + private async sendRequest(keywords: string[]) { let query = ""; keywords.forEach((kp: string) => query += " " + kp); @@ -221,13 +235,14 @@ export class ClientRecommender extends React.Component { } /** - * Request to the arXiv server for ML articles. + * Actual request to the arXiv server for ML articles. */ arxivrequest = async (query: string) => { let xhttp = new XMLHttpRequest(); let serveraddress = "http://export.arxiv.org/api"; - let endpoint = serveraddress + "/query?search_query=all:" + query + "&start=0&max_results=5"; + const maxresults = 5; + let endpoint = serveraddress + "/query?search_query=all:" + query + "&start=0&max_results=" + maxresults.toString(); let promisified = (resolve: any, reject: any) => { xhttp.onreadystatechange = function () { if (this.readyState === 4) { @@ -243,7 +258,7 @@ export class ClientRecommender extends React.Component { let titles = xml.getElementsByTagName("title"); let counter = 1; if (titles && titles.length > 1) { - while (counter <= 5) { + while (counter <= maxresults) { const title = titles[counter].childNodes[0].nodeValue!; console.log(title) title_vals.push(title); @@ -253,7 +268,7 @@ export class ClientRecommender extends React.Component { let ids = xml.getElementsByTagName("id"); counter = 1; if (ids && ids.length > 1) { - while (counter <= 5) { + while (counter <= maxresults) { const url = ids[counter].childNodes[0].nodeValue!; console.log(url); url_vals.push(url); @@ -280,64 +295,8 @@ export class ClientRecommender extends React.Component { console.log(text); } - /*** - * Creates distance matrix for all Documents analyzed - */ - - @action - public createDistanceMatrix(documents: Set = ClientRecommender.Instance.docVectors) { - const documents_list = Array.from(documents); - const n = documents_list.length; - var matrix = new Array(n).fill(0).map(() => new Array(n).fill(0)); - for (let i = 0; i < n; i++) { - var doc1 = documents_list[i]; - for (let j = 0; j < n; j++) { - var doc2 = documents_list[j]; - matrix[i][j] = ClientRecommender.Instance.distance(doc1.vectorDoc, doc2.vectorDoc, "euclidian"); - } - } - ClientRecommender.Instance.corr_matrix = matrix; - return matrix; - } - - @computed - private get generateRows() { - const n = ClientRecommender.Instance.corr_matrix.length; - let rows: JSX.Element[] = []; - for (let i = 0; i < n; i++) { - let children: JSX.Element[] = []; - for (let j = 0; j < n; j++) { - //let cell = React.createElement("td", ClientRecommender.Instance.corr_matrix[i][j]); - let cell = {ClientRecommender.Instance.corr_matrix[i][j].toFixed(4)}; - children.push(cell); - } - //let row = React.createElement("tr", { children: children, key: i }); - let row = {children}; - rows.push(row); - } - return rows; - } - render() { return (
-

{ClientRecommender.Instance.props.title ? ClientRecommender.Instance.props.title : "hello"}

- {/* - - - - - - - - - - -
{ClientRecommender.Instance.corr_matrix[0][0].toFixed(4)}{ClientRecommender.Instance.corr_matrix[0][1].toFixed(4)}
{ClientRecommender.Instance.corr_matrix[1][0].toFixed(4)}{ClientRecommender.Instance.corr_matrix[1][1].toFixed(4)}
*/} - - - {ClientRecommender.Instance.generateRows} - -
); } diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index c138c68b7..eb088763d 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -15,7 +15,7 @@ type RequestExecutor = (apiKey: string, body: string, service: Service) => Promi type AnalysisApplier = (target: Doc, relevantKeys: string[], data: D, ...args: any) => any; type BodyConverter = (data: D) => string; type Converter = (results: any) => Field; -type TextConverter = (results: any, data: string) => Promise<{ keyterms: Field, keyterms_counted: Field, values: any }>; +type TextConverter = (results: any, data: string) => Promise<{ keyterms: Field, keyterms_counted: Field, values: any, kp_string: string[] }>; export type Tag = { name: string, confidence: number }; export type Rectangle = { top: number, left: number, width: number, height: number }; @@ -290,12 +290,12 @@ export namespace CognitiveServices { export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: TextConverter, mainDoc: boolean = false, internal: boolean = true) => { let results = await ExecuteQuery(Service.Text, Manager, data); console.log(results); - let { keyterms, values, keyterms_counted } = await converter(results, data); + let { keyterms, values, keyterms_counted, kp_string } = await converter(results, data); //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); target[keys[0]] = keyterms; console.log("analyzed!"); if (internal) { - await vectorize(keyterms, dataDoc, mainDoc, data); + await vectorize(kp_string, dataDoc, mainDoc, data); } else { return values; } diff --git a/src/client/views/Recommendations.tsx b/src/client/views/Recommendations.tsx index 5dc62105d..f965d655b 100644 --- a/src/client/views/Recommendations.tsx +++ b/src/client/views/Recommendations.tsx @@ -70,15 +70,9 @@ export class RecommendationsBox extends React.Component { newRenderDoc.height = NumCast(this.props.Document.documentIconHeight); newRenderDoc.autoHeight = false; const docview =
- {/* onPointerDown={action(() => { - this._useIcons = !this._useIcons; - this._displayDim = this._useIcons ? 50 : Number(SEARCH_THUMBNAIL_SIZE); - })} - onPointerEnter={action(() => this._displayDim = this._useIcons ? 50 : Number(SEARCH_THUMBNAIL_SIZE))} - onPointerLeave={action(() => this._displayDim = 50)} > */} { ContentScaling={scale} />
; - // const data = renderDoc.data; - // if (data instanceof ObjectField) newRenderDoc.data = ObjectField.MakeCopy(data); - // newRenderDoc.preview = true; - // this.previewDocs.push(newRenderDoc); return docview; } @@ -171,16 +161,16 @@ export class RecommendationsBox extends React.Component { {DocListCast(this.props.Document.data).map(doc => { return (
- {/* + {this.DocumentIcon(doc)} - */} + {NumCast(doc.score).toFixed(4)}
DocumentManager.Instance.jumpToDocument(doc, false)}>
- {/*
DocUtils.MakeLink({ doc: this.props.Document.sourceDoc as Doc }, { doc: doc }, "User Selected Link", "Generated from Recommender", undefined)}> +
DocUtils.MakeLink({ doc: this.props.Document.sourceDoc as Doc }, { doc: doc }, "User Selected Link", "Generated from Recommender", undefined)}> -
*/} +
); })} diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index d014ba344..d974d7ef6 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -17,7 +17,7 @@ export class Recommender { private _model: any; static Instance: Recommender; private dimension: number = 0; - private choice: string = ""; + private choice: string = ""; // Tensorflow or Word2Vec constructor() { console.log("creating recommender..."); -- cgit v1.2.3-70-g09d2 From 814838063b6bbdf8dc813eb601de8da6b4ae0320 Mon Sep 17 00:00:00 2001 From: Abdullah Ahmed Date: Sat, 12 Oct 2019 16:00:04 -0400 Subject: refactor --- src/client/ClientRecommender.tsx | 74 +++++++++++----------- src/client/cognitive_services/CognitiveServices.ts | 15 ++--- src/client/views/nodes/DocumentView.tsx | 2 +- 3 files changed, 45 insertions(+), 46 deletions(-) (limited to 'src/client/cognitive_services') diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index bc1cd139c..0e1e8175a 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -36,7 +36,7 @@ export class ClientRecommender extends React.Component { private mainDoc?: RecommenderDocument; private docVectors: Set = new Set(); - @observable private corr_matrix = [[0, 0], [0, 0]]; + @observable private corr_matrix = [[0, 0], [0, 0]]; // for testing constructor(props: RecommenderProps) { //console.log("creating client recommender..."); @@ -90,16 +90,21 @@ export class ClientRecommender extends React.Component { * Returns list of {doc, similarity (to main doc)} in increasing score */ - public computeSimilarities() { + public computeSimilarities(distance_metric: string) { ClientRecommender.Instance.docVectors.forEach((doc: RecommenderDocument) => { if (ClientRecommender.Instance.mainDoc) { - const distance = ClientRecommender.Instance.distance(ClientRecommender.Instance.mainDoc.vectorDoc, doc.vectorDoc, "cosine"); + const distance = ClientRecommender.Instance.distance(ClientRecommender.Instance.mainDoc.vectorDoc, doc.vectorDoc, distance_metric); doc.score = distance; } } ); let doclist = Array.from(ClientRecommender.Instance.docVectors); - doclist.sort((a: RecommenderDocument, b: RecommenderDocument) => a.score - b.score); + if (distance_metric == "euclidian") { + doclist.sort((a: RecommenderDocument, b: RecommenderDocument) => a.score - b.score); + } + else { + doclist.sort((a: RecommenderDocument, b: RecommenderDocument) => b.score - a.score); + } return doclist; } @@ -107,8 +112,8 @@ export class ClientRecommender extends React.Component { * Computes the mean of a set of vectors */ - public mean(paragraph: Set, dataDoc: Doc, mainDoc: boolean) { - const n = 200; + public mean(paragraph: Set) { + const n = 512; const num_words = paragraph.size; let meanVector = new Array(n).fill(0); // mean vector if (num_words > 0) { // check to see if paragraph actually was vectorized @@ -118,23 +123,20 @@ export class ClientRecommender extends React.Component { } }); meanVector = meanVector.map(x => x / num_words); - const internalDoc: RecommenderDocument = { actualDoc: dataDoc, vectorDoc: meanVector, score: 0 }; - if (mainDoc) ClientRecommender.Instance.mainDoc = internalDoc; - ClientRecommender.Instance.addToDocSet(internalDoc); } return meanVector; } - public processVector(vector: number[], dataDoc: Doc, mainDoc: boolean) { + public processVector(vector: number[], dataDoc: Doc, isMainDoc: boolean) { if (vector.length > 0) { const internalDoc: RecommenderDocument = { actualDoc: dataDoc, vectorDoc: vector, score: 0 }; - if (mainDoc) ClientRecommender.Instance.mainDoc = internalDoc; - ClientRecommender.Instance.addToDocSet(internalDoc); + ClientRecommender.Instance.addToDocSet(internalDoc, isMainDoc); } } - private addToDocSet(internalDoc: RecommenderDocument) { + private addToDocSet(internalDoc: RecommenderDocument, isMainDoc: boolean) { if (ClientRecommender.Instance.docVectors) { + if (isMainDoc) ClientRecommender.Instance.mainDoc = internalDoc; ClientRecommender.Instance.docVectors.add(internalDoc); } } @@ -143,22 +145,25 @@ export class ClientRecommender extends React.Component { * Uses Cognitive Services to extract keywords from a document */ - public async extractText(dataDoc: Doc, extDoc: Doc, internal: boolean = true, mainDoc: boolean = false) { + public async extractText(dataDoc: Doc, extDoc: Doc, internal: boolean = true, isMainDoc: boolean = false) { let fielddata = Cast(dataDoc.data, RichTextField); let data: string; fielddata ? data = fielddata[ToPlainText]() : data = ""; let converter = async (results: any, data: string) => { let keyterms = new List(); // raw keywords - let keyterms_counted = new List(); // keywords, where each keyword is repeated as - let kp_string: string = ""; + // let keyterms_counted = new List(); // keywords, where each keyword is repeated. input to w2v + let kp_string: string = ""; // keywords concatenated into a string. input into TF let highKP: string[] = [""]; // most frequent keyphrase let high = 0; results.documents.forEach((doc: any) => { let keyPhrases = doc.keyPhrases; keyPhrases.map((kp: string) => { - const frequency = this.countFrequencies(kp, data); keyterms.push(kp); - kp_string += kp + ", "; + const frequency = this.countFrequencies(kp, data); // frequency of keyphrase in paragraph + kp_string += kp + ", "; // ensures that if frequency is 0 for some reason kp is still added + for (let i = 0; i < frequency - 1; i++) { + kp_string += kp + ", "; // weights repeated keywords higher + } // replaces highKP with new one if (frequency > high) { high = frequency; @@ -168,24 +173,25 @@ export class ClientRecommender extends React.Component { else if (frequency === high) { highKP.push(kp); } - let words = kp.split(" "); // separates phrase into words - words = this.removeStopWords(words); // removes stop words if they appear in phrases - words.forEach((word) => { - //keyterms.push(word); - for (let i = 0; i < frequency; i++) { - keyterms_counted.push(word); - } - }); + // let words = kp.split(" "); // separates phrase into words + // words = this.removeStopWords(words); // removes stop words if they appear in phrases + // words.forEach((word) => { + // for (let i = 0; i < frequency; i++) { + // keyterms_counted.push(word); + // } + // }); }); }); - const kts_counted = new List(); - keyterms_counted.forEach(kt => kts_counted.push(kt.toLowerCase())); + // const kts_counted = new List(); + // keyterms_counted.forEach(kt => kts_counted.push(kt.toLowerCase())); + if (kp_string.length > 2) kp_string = kp_string.substring(0, kp_string.length - 2); + console.log("kp string: ", kp_string); let values = ""; if (!internal) values = await this.sendRequest(highKP); - return { keyterms: keyterms, keyterms_counted: kts_counted, values, kp_string: [kp_string] }; + return { keyterms: keyterms, external_recommendations: values, kp_string: [kp_string] }; }; if (data != "") { - return CognitiveServices.Text.Appliers.analyzer(dataDoc, extDoc, ["key words"], data, converter, mainDoc, internal); + return CognitiveServices.Text.Appliers.analyzer(dataDoc, extDoc, ["key words"], data, converter, isMainDoc, internal); } return; } @@ -196,7 +202,7 @@ export class ClientRecommender extends React.Component { */ private countFrequencies(keyphrase: string, paragraph: string) { - let data = paragraph.split(" "); + let data = paragraph.split(/ |\n/); // splits by new lines and spaces let kp_array = keyphrase.split(" "); let num_keywords = kp_array.length; let par_length = data.length; @@ -289,12 +295,6 @@ export class ClientRecommender extends React.Component { return new Promise(promisified); } - processArxivResult = (result: any) => { - var xmlDoc = result as XMLDocument; - let text = xmlDoc.getElementsByTagName("title")[0].childNodes[0].nodeValue; - console.log(text); - } - render() { return (
); diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index eb088763d..48519f916 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -15,7 +15,7 @@ type RequestExecutor = (apiKey: string, body: string, service: Service) => Promi type AnalysisApplier = (target: Doc, relevantKeys: string[], data: D, ...args: any) => any; type BodyConverter = (data: D) => string; type Converter = (results: any) => Field; -type TextConverter = (results: any, data: string) => Promise<{ keyterms: Field, keyterms_counted: Field, values: any, kp_string: string[] }>; +type TextConverter = (results: any, data: string) => Promise<{ keyterms: Field, external_recommendations: any, kp_string: string[] }>; export type Tag = { name: string, confidence: number }; export type Rectangle = { top: number, left: number, width: number, height: number }; @@ -264,7 +264,7 @@ export namespace CognitiveServices { export namespace Appliers { - export async function vectorize(keyterms: any, dataDoc: Doc, mainDoc: boolean = false, data: string) { + export async function vectorize(keyterms: any, dataDoc: Doc, mainDoc: boolean = false) { console.log("vectorizing..."); //keyterms = ["father", "king"]; let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; @@ -287,17 +287,16 @@ export namespace CognitiveServices { ); } - export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: TextConverter, mainDoc: boolean = false, internal: boolean = true) => { + export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: TextConverter, isMainDoc: boolean = false, internal: boolean = true) => { let results = await ExecuteQuery(Service.Text, Manager, data); console.log(results); - let { keyterms, values, keyterms_counted, kp_string } = await converter(results, data); - //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); + let { keyterms, external_recommendations, kp_string } = await converter(results, data); target[keys[0]] = keyterms; - console.log("analyzed!"); if (internal) { - await vectorize(kp_string, dataDoc, mainDoc, data); + //await vectorize([data], dataDoc, isMainDoc); + await vectorize(kp_string, dataDoc, isMainDoc); } else { - return values; + return external_recommendations; } }; diff --git a/src/client/views/nodes/DocumentView.tsx b/src/client/views/nodes/DocumentView.tsx index 070b1f426..ab2717eed 100644 --- a/src/client/views/nodes/DocumentView.tsx +++ b/src/client/views/nodes/DocumentView.tsx @@ -602,7 +602,7 @@ export class DocumentView extends DocComponent(Docu } } })); - const doclist = ClientRecommender.Instance.computeSimilarities(); + const doclist = ClientRecommender.Instance.computeSimilarities("cosine"); let recDocs: { preview: Doc, score: number }[] = []; // tslint:disable-next-line: prefer-for-of for (let i = 0; i < doclist.length; i++) { -- cgit v1.2.3-70-g09d2 From 46f25c9a781783350a7c1d76eefb4e066b2cac83 Mon Sep 17 00:00:00 2001 From: ab Date: Thu, 17 Oct 2019 11:57:58 -0400 Subject: optimize tf --- package.json | 4 +++- src/client/cognitive_services/CognitiveServices.ts | 1 + src/server/Recommender.ts | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'src/client/cognitive_services') diff --git a/package.json b/package.json index fbed088fd..f138e7a79 100644 --- a/package.json +++ b/package.json @@ -58,6 +58,7 @@ "@hig/theme-data": "^2.3.3", "@tensorflow-models/universal-sentence-encoder": "^1.2.0", "@tensorflow/tfjs": "^1.2.9", + "@tensorflow/tfjs-node": "^1.2.11", "@trendmicro/react-dropdown": "^1.3.0", "@types/adm-zip": "^0.4.32", "@types/animejs": "^2.0.2", @@ -122,8 +123,8 @@ "@types/youtube": "0.0.38", "adm-zip": "^0.4.13", "archiver": "^3.0.3", - "arxiv-api-node": "0.0.2", "array-batcher": "^1.1.3", + "arxiv-api-node": "0.0.2", "async": "^2.6.2", "babel-runtime": "^6.26.0", "bcrypt-nodejs": "0.0.3", @@ -173,6 +174,7 @@ "mobx-utils": "^5.4.0", "mongodb": "^3.1.13", "mongoose": "^5.6.4", + "node-pre-gyp": "^0.13.0", "node-sass": "^4.12.0", "nodemailer": "^5.1.1", "nodemon": "^1.18.10", diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 48519f916..b0e9138a4 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -267,6 +267,7 @@ export namespace CognitiveServices { export async function vectorize(keyterms: any, dataDoc: Doc, mainDoc: boolean = false) { console.log("vectorizing..."); //keyterms = ["father", "king"]; + let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; await requestPromise.post(args).then(async (wordvecs) => { if (wordvecs) { diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index d974d7ef6..aaed09999 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -9,6 +9,7 @@ var arxivapi = require('arxiv-api-node'); import requestPromise = require("request-promise"); import * as use from '@tensorflow-models/universal-sentence-encoder'; import { Tensor } from "@tensorflow/tfjs-core/dist/tensor"; +require('@tensorflow/tfjs-node'); //http://gnuwin32.sourceforge.net/packages/make.htm -- cgit v1.2.3-70-g09d2 From 1676ddcf9f4da6f9926e2e48eea4302ae9f2efe5 Mon Sep 17 00:00:00 2001 From: ab Date: Fri, 25 Oct 2019 16:21:45 -0400 Subject: images show up, react errors --- src/client/ClientRecommender.tsx | 78 ++++++++++++---------- src/client/cognitive_services/CognitiveServices.ts | 5 +- src/client/views/RecommendationsBox.tsx | 2 +- 3 files changed, 47 insertions(+), 38 deletions(-) (limited to 'src/client/cognitive_services') diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index 97efedd89..ae2413d1d 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -1,4 +1,4 @@ -import { Doc } from "../new_fields/Doc"; +import { Doc, FieldResult } from "../new_fields/Doc"; import { StrCast, Cast } from "../new_fields/Types"; import { List } from "../new_fields/List"; import { CognitiveServices } from "./cognitive_services/CognitiveServices"; @@ -147,47 +147,55 @@ export class ClientRecommender extends React.Component { */ public async extractText(dataDoc: Doc, extDoc: Doc, internal: boolean = true, isMainDoc: boolean = false, image: boolean = false) { - let fielddata = Cast(dataDoc.data, RichTextField); - if (image && extDoc.generatedTags) { - console.log(Cast(extDoc.generatedTags, listSpec("string"))); + let data: string = ""; + let taglist: FieldResult> = undefined; + if (image && extDoc.generatedTags) { // TODO: Automatically generate tags. Need to ask Sam about this. + taglist = Cast(extDoc.generatedTags, listSpec("string")); + taglist!.forEach(tag => { + data += tag + ", "; + }); + } + else { + let fielddata = Cast(dataDoc.data, RichTextField); + fielddata ? data = fielddata[ToPlainText]() : data = ""; } - let data: string; - fielddata ? data = fielddata[ToPlainText]() : data = ""; - let converter = async (results: any, data: string) => { + + let converter = async (results: any, data: string, isImage: boolean = false) => { let keyterms = new List(); // raw keywords // let keyterms_counted = new List(); // keywords, where each keyword is repeated. input to w2v let kp_string: string = ""; // keywords*frequency concatenated into a string. input into TF let highKP: string[] = [""]; // most frequent keyphrase let high = 0; - results.documents.forEach((doc: any) => { - let keyPhrases = doc.keyPhrases; - keyPhrases.map((kp: string) => { - keyterms.push(kp); - const frequency = this.countFrequencies(kp, data); // frequency of keyphrase in paragraph - kp_string += kp + ", "; // ensures that if frequency is 0 for some reason kp is still added - for (let i = 0; i < frequency - 1; i++) { - kp_string += kp + ", "; // weights repeated keywords higher - } - // replaces highKP with new one - if (frequency > high) { - high = frequency; - highKP = [kp]; - } - // appends to current highKP phrase - else if (frequency === high) { - highKP.push(kp); - } - // let words = kp.split(" "); // separates phrase into words - // words = this.removeStopWords(words); // removes stop words if they appear in phrases - // words.forEach((word) => { - // for (let i = 0; i < frequency; i++) { - // keyterms_counted.push(word); - // } - // }); + + if (isImage) { + kp_string = data; + if (taglist) { + keyterms = taglist; + highKP = [taglist[0]]; + } + } + else { // text processing + results.documents.forEach((doc: any) => { + let keyPhrases = doc.keyPhrases; + keyPhrases.map((kp: string) => { + keyterms.push(kp); + const frequency = this.countFrequencies(kp, data); // frequency of keyphrase in paragraph + kp_string += kp + ", "; // ensures that if frequency is 0 for some reason kp is still added + for (let i = 0; i < frequency - 1; i++) { + kp_string += kp + ", "; // weights repeated keywords higher + } + // replaces highKP with new one + if (frequency > high) { + high = frequency; + highKP = [kp]; + } + // appends to current highKP phrase + else if (frequency === high) { + highKP.push(kp); + } + }); }); - }); - // const kts_counted = new List(); - // keyterms_counted.forEach(kt => kts_counted.push(kt.toLowerCase())); + } if (kp_string.length > 2) kp_string = kp_string.substring(0, kp_string.length - 2); console.log("kp string: ", kp_string); let values = ""; diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index b0e9138a4..d496b442e 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -9,6 +9,7 @@ import { UndoManager } from "../util/UndoManager"; import requestPromise = require("request-promise"); import { List } from "../../new_fields/List"; import { ClientRecommender } from "../ClientRecommender"; +import { ImageBox } from "../views/nodes/ImageBox"; type APIManager = { converter: BodyConverter, requester: RequestExecutor }; type RequestExecutor = (apiKey: string, body: string, service: Service) => Promise; @@ -288,12 +289,12 @@ export namespace CognitiveServices { ); } - export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: TextConverter, isMainDoc: boolean = false, internal: boolean = true) => { + export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: TextConverter, isMainDoc: boolean = false, isInternal: boolean = true) => { let results = await ExecuteQuery(Service.Text, Manager, data); console.log(results); let { keyterms, external_recommendations, kp_string } = await converter(results, data); target[keys[0]] = keyterms; - if (internal) { + if (isInternal) { //await vectorize([data], dataDoc, isMainDoc); await vectorize(kp_string, dataDoc, isMainDoc); } else { diff --git a/src/client/views/RecommendationsBox.tsx b/src/client/views/RecommendationsBox.tsx index 3938a8690..c50550bef 100644 --- a/src/client/views/RecommendationsBox.tsx +++ b/src/client/views/RecommendationsBox.tsx @@ -143,7 +143,7 @@ export class RecommendationsBox extends React.Component { // return y; // } - render() { + render() { //TODO: Invariant violation: max depth exceeded error. Occurs when images are rendered. // if (!this._display) { // return null; // } -- cgit v1.2.3-70-g09d2 From c3720ee2e2828d112b6ad34a7b6106d3c5f27878 Mon Sep 17 00:00:00 2001 From: ab Date: Wed, 20 Nov 2019 16:28:39 -0500 Subject: bing integrated --- src/client/ClientRecommender.tsx | 41 ++++++---------- src/client/cognitive_services/CognitiveServices.ts | 54 +++++++++++++++++++++- src/client/views/nodes/DocumentView.tsx | 23 +++++++-- src/server/index.ts | 3 +- 4 files changed, 88 insertions(+), 33 deletions(-) (limited to 'src/client/cognitive_services') diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index a37434c0d..83ca48590 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -191,7 +191,7 @@ export class ClientRecommender extends React.Component { * Uses Cognitive Services to extract keywords from a document */ - public async extractText(dataDoc: Doc, extDoc: Doc, internal: boolean = true, isMainDoc: boolean = false, image: boolean = false) { + public async extractText(dataDoc: Doc, extDoc: Doc, internal: boolean = true, api: string = "bing", isMainDoc: boolean = false, image: boolean = false) { let data: string = ""; let taglist: FieldResult> = undefined; if (image) { @@ -265,7 +265,7 @@ export class ClientRecommender extends React.Component { highKP = [sorted_keywords[0].text]; } }); - values = await this.sendRequest(highKP, "bing"); + values = await this.sendRequest(highKP, api); } return { keyterms: keyterms, external_recommendations: values, kp_string: [kp_string] }; }; @@ -320,38 +320,27 @@ export class ClientRecommender extends React.Component { }); } else if (api === "bing") { - await this.bingWebSearch(query); - } - else { return new Promise(resolve => { - this.arxivrequest(query).then(resolve); + this.bingWebSearch(query).then(resolve); }); } + else { + console.log("no api specified :("); + } } bingWebSearch = async (query: string) => { - https.get({ - hostname: 'api.cognitive.microsoft.com', - path: '/bing/v5.0/search?q=' + encodeURIComponent(query), - headers: { 'Ocp-Apim-Subscription-Key': process.env.BING }, - }, (res: any) => { - let body = ''; - res.on('data', (part: any) => body += part); - res.on('end', () => { - for (var header in res.headers) { - if (header.startsWith("bingapis-") || header.startsWith("x-msedge-")) { - console.log(header + ": " + res.headers[header]) - } - } - console.log('\nJSON Response:\n'); - console.dir(JSON.parse(body), { colors: false, depth: null }); - }) - res.on('error', (e: any) => { - console.log('Error: ' + e.message); - throw e; + const converter = async (results: any) => { + let title_vals: string[] = []; + let url_vals: string[] = []; + results.webPages.value.forEach((doc: any) => { + title_vals.push(doc.name); + url_vals.push(doc.url); }); - }); + return { title_vals, url_vals }; + }; + return CognitiveServices.BingSearch.Appliers.analyzer(query, converter); } /** diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index d496b442e..641747207 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -17,6 +17,7 @@ type AnalysisApplier = (target: Doc, relevantKeys: string[], data: D, ...args type BodyConverter = (data: D) => string; type Converter = (results: any) => Field; type TextConverter = (results: any, data: string) => Promise<{ keyterms: Field, external_recommendations: any, kp_string: string[] }>; +type BingConverter = (results: any) => Promise<{ title_vals: string[], url_vals: string[] }>; export type Tag = { name: string, confidence: number }; export type Rectangle = { top: number, left: number, width: number, height: number }; @@ -25,7 +26,8 @@ export enum Service { ComputerVision = "vision", Face = "face", Handwriting = "handwriting", - Text = "text" + Text = "text", + Bing = "bing" } export enum Confidence { @@ -225,6 +227,56 @@ export namespace CognitiveServices { } + export namespace BingSearch { + export const Manager: APIManager = { + converter: (data: string) => { + return data; + }, + requester: async (apiKey: string, query: string) => { + let xhttp = new XMLHttpRequest(); + let serverAddress = "https://api.cognitive.microsoft.com"; + let endpoint = serverAddress + '/bing/v5.0/search?q=' + encodeURIComponent(query); + let promisified = (resolve: any, reject: any) => { + xhttp.onreadystatechange = function () { + if (this.readyState === 4) { + let result = xhttp.responseText; + switch (this.status) { + case 200: + return resolve(result); + case 400: + default: + return reject(result); + } + } + }; + + if (apiKey) { + xhttp.open("GET", endpoint, true); + xhttp.setRequestHeader('Ocp-Apim-Subscription-Key', apiKey); + xhttp.setRequestHeader('Content-Type', 'application/json'); + xhttp.send(); + } + else { + console.log("API key for BING unavailable"); + } + }; + return new Promise(promisified); + } + + }; + + export namespace Appliers { + export const analyzer = async (query: string, converter: BingConverter) => { + let results = await ExecuteQuery(Service.Bing, Manager, query); + console.log(results); + const { title_vals, url_vals } = await converter(results); + return { title_vals, url_vals }; + }; + } + + } + + export namespace Text { export const Manager: APIManager = { converter: (data: string) => { diff --git a/src/client/views/nodes/DocumentView.tsx b/src/client/views/nodes/DocumentView.tsx index c6ad2f9d7..2455c320d 100644 --- a/src/client/views/nodes/DocumentView.tsx +++ b/src/client/views/nodes/DocumentView.tsx @@ -518,9 +518,22 @@ export class DocumentView extends DocComponent(Docu icon: "brain" }); + let ext_recommender_subitems: ContextMenuProps[] = []; + + ext_recommender_subitems.push({ + description: "arXiv", + event: () => this.externalRecommendation(e, "arxiv"), + icon: "brain" + }); + ext_recommender_subitems.push({ + description: "Bing", + event: () => this.externalRecommendation(e, "bing"), + icon: "brain" + }); + recommender_subitems.push({ description: "External recommendations", - event: () => this.externalRecommendation(e), + subitems: ext_recommender_subitems, icon: "brain" }); @@ -590,7 +603,7 @@ export class DocumentView extends DocComponent(Docu if (!documents.includes(dataDoc)) { documents.push(dataDoc); const extdoc = doc.data_ext as Doc; - return ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc, true, isMainDoc); + return ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc, true, "", isMainDoc); } } if (doc.type === DocumentType.IMG) { @@ -600,7 +613,7 @@ export class DocumentView extends DocComponent(Docu if (!documents.includes(dataDoc)) { documents.push(dataDoc); const extdoc = doc.data_ext as Doc; - return ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc, true, isMainDoc, true); + return ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc, true, "", isMainDoc, true); } } })); @@ -628,12 +641,12 @@ export class DocumentView extends DocComponent(Docu // RecommendationsBox.Instance.displayRecommendations(e.pageX + 100, e.pageY); } - externalRecommendation = async (e: React.MouseEvent) => { + externalRecommendation = async (e: React.MouseEvent, api: string) => { if (!ClientRecommender.Instance) new ClientRecommender({ title: "Client Recommender" }); ClientRecommender.Instance.reset_docs(); const doc = Doc.GetDataDoc(this.props.Document); const extdoc = doc.data_ext as Doc; - const values = await ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc, false); + const values = await ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc, false, api); const headers = [new SchemaHeaderField("title"), new SchemaHeaderField("href")]; let bodies: Doc[] = []; const titles = values.title_vals; diff --git a/src/server/index.ts b/src/server/index.ts index 45fc7fc07..050ab8755 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -674,7 +674,8 @@ const ServicesApiKeyMap = new Map([ ["face", process.env.FACE], ["vision", process.env.VISION], ["handwriting", process.env.HANDWRITING], - ["text", process.env.TEXT] + ["text", process.env.TEXT], + ["bing", process.env.BING] ]); addSecureRoute({ -- cgit v1.2.3-70-g09d2 From 13d4d9873c2a1d5a4f02a79ce269cec01638e009 Mon Sep 17 00:00:00 2001 From: ab Date: Wed, 20 Nov 2019 18:51:30 -0500 Subject: commenting --- src/client/ClientRecommender.tsx | 52 +++++++++++++--------- src/client/cognitive_services/CognitiveServices.ts | 5 +-- 2 files changed, 33 insertions(+), 24 deletions(-) (limited to 'src/client/cognitive_services') diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index 83ca48590..d2e2330b5 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -140,6 +140,10 @@ export class ClientRecommender extends React.Component { return meanVector; } + /*** + * Processes sentence vector as Recommender Document, adds to Doc Set. + */ + public processVector(vector: number[], dataDoc: Doc, isMainDoc: boolean) { if (vector.length > 0) { const internalDoc: RecommenderDocument = { actualDoc: dataDoc, vectorDoc: vector, score: 0 }; @@ -147,6 +151,10 @@ export class ClientRecommender extends React.Component { } } + /*** + * Adds to Doc set. Updates mainDoc (one clicked) if necessary. + */ + private addToDocSet(internalDoc: RecommenderDocument, isMainDoc: boolean) { if (ClientRecommender.Instance.docVectors) { if (isMainDoc) ClientRecommender.Instance.mainDoc = internalDoc; @@ -192,6 +200,7 @@ export class ClientRecommender extends React.Component { */ public async extractText(dataDoc: Doc, extDoc: Doc, internal: boolean = true, api: string = "bing", isMainDoc: boolean = false, image: boolean = false) { + // STEP 1. Consolidate data of document. Depends on type of document. let data: string = ""; let taglist: FieldResult> = undefined; if (image) { @@ -207,14 +216,17 @@ export class ClientRecommender extends React.Component { let fielddata = Cast(dataDoc.data, RichTextField); fielddata ? data = fielddata[ToPlainText]() : data = ""; } + + // STEP 2. Upon receiving response from Text Cognitive Services, do additional processing on keywords. + // Currently we are still using Cognitive Services for internal recommendations, but in the future this might not be necessary. + let converter = async (results: any, data: string, isImage: boolean = false) => { let keyterms = new List(); // raw keywords - // let keyterms_counted = new List(); // keywords, where each keyword is repeated. input to w2v let kp_string: string = ""; // keywords*frequency concatenated into a string. input into TF let highKP: string[] = [""]; // most frequent keyphrase let high = 0; - if (isImage) { + if (isImage) { // no keyphrase processing necessary kp_string = data; if (taglist) { keyterms = taglist; @@ -223,7 +235,7 @@ export class ClientRecommender extends React.Component { } else { // text processing results.documents.forEach((doc: any) => { - let keyPhrases = doc.keyPhrases; + let keyPhrases = doc.keyPhrases; // returned by Cognitive Services keyPhrases.map((kp: string) => { keyterms.push(kp); const frequency = this.countFrequencies(kp, data); // frequency of keyphrase in paragraph @@ -243,9 +255,11 @@ export class ClientRecommender extends React.Component { }); }); } - if (kp_string.length > 2) kp_string = kp_string.substring(0, kp_string.length - 2); - console.log("kp string: ", kp_string); - let values = ""; + if (kp_string.length > 2) kp_string = kp_string.substring(0, kp_string.length - 2); // strips extra comma and space if there are a lot of keywords + console.log("kp_string: ", kp_string); + + let ext_recs = ""; + // Pushing keyword extraction to IBM for external recommendations. Should shift to internal eventually. if (!internal) { const parameters: any = { 'language': 'en', @@ -265,10 +279,14 @@ export class ClientRecommender extends React.Component { highKP = [sorted_keywords[0].text]; } }); - values = await this.sendRequest(highKP, api); + ext_recs = await this.sendRequest(highKP, api); } - return { keyterms: keyterms, external_recommendations: values, kp_string: [kp_string] }; + + // keyterms: list for extDoc, kp_string: input to TF, ext_recs: {titles, urls} of retrieved results from highKP query + return { keyterms: keyterms, external_recommendations: ext_recs, kp_string: [kp_string] }; }; + + // STEP 3: Start recommendation pipeline. Branches off into internal and external in Cognitive Services if (data !== "") { return CognitiveServices.Text.Appliers.analyzer(dataDoc, extDoc, ["key words"], data, converter, isMainDoc, internal); } @@ -296,16 +314,6 @@ export class ClientRecommender extends React.Component { return frequency; } - /** - * - * Removes stopwords from list of strings representing a sentence - */ - - private removeStopWords(word_array: string[]) { - //console.log(sw.removeStopwords(word_array)); - return sw.removeStopwords(word_array); - } - /** * * API for sending arXiv request. @@ -330,6 +338,10 @@ export class ClientRecommender extends React.Component { } + /** + * Request to Bing API. Most of code is in Cognitive Services. + */ + bingWebSearch = async (query: string) => { const converter = async (results: any) => { let title_vals: string[] = []; @@ -357,7 +369,7 @@ export class ClientRecommender extends React.Component { if (this.readyState === 4) { let result = xhttp.response; let xml = xhttp.responseXML; - console.log(xml); + console.log("arXiv Result: ", xml); switch (this.status) { case 200: let title_vals: string[] = []; @@ -369,7 +381,6 @@ export class ClientRecommender extends React.Component { if (titles && titles.length > 1) { while (counter <= maxresults) { const title = titles[counter].childNodes[0].nodeValue!; - console.log(title) title_vals.push(title); counter++; } @@ -379,7 +390,6 @@ export class ClientRecommender extends React.Component { if (ids && ids.length > 1) { while (counter <= maxresults) { const url = ids[counter].childNodes[0].nodeValue!; - console.log(url); url_vals.push(url); counter++; } diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 641747207..becd38a99 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -268,7 +268,7 @@ export namespace CognitiveServices { export namespace Appliers { export const analyzer = async (query: string, converter: BingConverter) => { let results = await ExecuteQuery(Service.Bing, Manager, query); - console.log(results); + console.log("Bing results: ", results); const { title_vals, url_vals } = await converter(results); return { title_vals, url_vals }; }; @@ -310,7 +310,6 @@ export namespace CognitiveServices { } }; - console.log("requested!"); return request.post(options); } }; @@ -343,7 +342,7 @@ export namespace CognitiveServices { export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: TextConverter, isMainDoc: boolean = false, isInternal: boolean = true) => { let results = await ExecuteQuery(Service.Text, Manager, data); - console.log(results); + console.log("Cognitive Services keyphrases: ", results); let { keyterms, external_recommendations, kp_string } = await converter(results, data); target[keys[0]] = keyterms; if (isInternal) { -- cgit v1.2.3-70-g09d2 From a231b597d70f8ee9e435a86b00020c93691dc97d Mon Sep 17 00:00:00 2001 From: ab Date: Sat, 23 Nov 2019 16:06:30 -0500 Subject: keyphrase query ui --- src/client/ClientRecommender.tsx | 2 + src/client/cognitive_services/CognitiveServices.ts | 49 ++++++++++++++++++++++ src/client/views/KeyphraseQueryView.scss | 8 ++++ src/client/views/KeyphraseQueryView.tsx | 30 +++++++++++++ .../views/collections/CollectionViewChromes.scss | 2 +- src/client/views/nodes/DocumentView.tsx | 38 ++++++++++------- 6 files changed, 112 insertions(+), 17 deletions(-) create mode 100644 src/client/views/KeyphraseQueryView.scss create mode 100644 src/client/views/KeyphraseQueryView.tsx (limited to 'src/client/cognitive_services') diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index d2e2330b5..b384a8ebe 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -17,6 +17,7 @@ import { listSpec } from "../new_fields/Schema"; import { Identified } from "./Network"; import { ComputedField } from "../new_fields/ScriptField"; import { ImageField } from "../new_fields/URLField"; +import { KeyphraseQueryView } from "./views/KeyphraseQueryView"; export interface RecommenderProps { title: string; @@ -279,6 +280,7 @@ export class ClientRecommender extends React.Component { highKP = [sorted_keywords[0].text]; } }); + let kpqv = new KeyphraseQueryView({ keyphrases: ["hello"] }); ext_recs = await this.sendRequest(highKP, api); } diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index becd38a99..356cf52ca 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -276,6 +276,55 @@ export namespace CognitiveServices { } + export namespace HathiTrust { + export const Manager: APIManager = { + converter: (data: string) => { + return data; + }, + requester: async (apiKey: string, query: string) => { + let xhttp = new XMLHttpRequest(); + let serverAddress = "https://babel.hathitrust.org/cgi/htd/​"; + let endpoint = serverAddress + '/bing/v5.0/search?q=' + encodeURIComponent(query); + let promisified = (resolve: any, reject: any) => { + xhttp.onreadystatechange = function () { + if (this.readyState === 4) { + let result = xhttp.responseText; + switch (this.status) { + case 200: + return resolve(result); + case 400: + default: + return reject(result); + } + } + }; + + if (apiKey) { + xhttp.open("GET", endpoint, true); + xhttp.setRequestHeader('Ocp-Apim-Subscription-Key', apiKey); + xhttp.setRequestHeader('Content-Type', 'application/json'); + xhttp.send(); + } + else { + console.log("API key for BING unavailable"); + } + }; + return new Promise(promisified); + } + + }; + + export namespace Appliers { + export const analyzer = async (query: string, converter: BingConverter) => { + let results = await ExecuteQuery(Service.Bing, Manager, query); + console.log("Bing results: ", results); + const { title_vals, url_vals } = await converter(results); + return { title_vals, url_vals }; + }; + } + + } + export namespace Text { export const Manager: APIManager = { diff --git a/src/client/views/KeyphraseQueryView.scss b/src/client/views/KeyphraseQueryView.scss new file mode 100644 index 000000000..ac715e5e7 --- /dev/null +++ b/src/client/views/KeyphraseQueryView.scss @@ -0,0 +1,8 @@ +.fading { + animation: fanOut 1s +} + +@keyframes fanOut { + from {opacity: 0;} + to {opacity: 1;} +} \ No newline at end of file diff --git a/src/client/views/KeyphraseQueryView.tsx b/src/client/views/KeyphraseQueryView.tsx new file mode 100644 index 000000000..1955399f9 --- /dev/null +++ b/src/client/views/KeyphraseQueryView.tsx @@ -0,0 +1,30 @@ +import { observer } from "mobx-react"; +import React = require("react"); +import "./KeyphraseQueryView.scss"; + +// tslint:disable-next-line: class-name +export interface KP_Props { + keyphrases: string[]; +} + +@observer +export class KeyphraseQueryView extends React.Component{ + constructor(props: KP_Props) { + super(props); + console.log("FIRST KEY PHRASE: ", props.keyphrases[0]); + } + + render() { + return ( +
+

Select queries to send:

+ {this.props.keyphrases.map((kp: string) => { + setTimeout(() => { + return (

{kp}

); + }, 1000); + + })} +
+ ); + } +} \ No newline at end of file diff --git a/src/client/views/collections/CollectionViewChromes.scss b/src/client/views/collections/CollectionViewChromes.scss index 64411b5fe..1889a192c 100644 --- a/src/client/views/collections/CollectionViewChromes.scss +++ b/src/client/views/collections/CollectionViewChromes.scss @@ -247,7 +247,7 @@ display:flex; flex-direction: row; width: 150px; - margin: auto 0 auto auto; + margin: auto auto auto auto; } .react-autosuggest__container { diff --git a/src/client/views/nodes/DocumentView.tsx b/src/client/views/nodes/DocumentView.tsx index 2455c320d..b82486109 100644 --- a/src/client/views/nodes/DocumentView.tsx +++ b/src/client/views/nodes/DocumentView.tsx @@ -46,6 +46,7 @@ import { RecommendationsBox } from '../RecommendationsBox'; import { SearchUtil } from '../../util/SearchUtil'; import { ClientRecommender } from '../../ClientRecommender'; import { SchemaHeaderField } from '../../../new_fields/SchemaHeaderField'; +import { KeyphraseQueryView } from '../KeyphraseQueryView'; library.add(fa.faBrain); library.add(fa.faEdit); @@ -117,6 +118,7 @@ export class DocumentView extends DocComponent(Docu private _hitTemplateDrag = false; private _mainCont = React.createRef(); private _dropDisposer?: DragManager.DragDropDisposer; + private _showKPQuery: boolean = false; public get ContentDiv() { return this._mainCont.current; } @computed get active() { return SelectionManager.IsSelected(this) || this.props.parentActive(); } @@ -657,6 +659,7 @@ export class DocumentView extends DocComponent(Docu bodies.push(body); } CollectionDockingView.AddRightSplit(Docs.Create.SchemaDocument(headers, bodies, { title: `Showing External Recommendations for "${StrCast(doc.title)}"` }), undefined); + this._showKPQuery = true; } onPointerEnter = (e: React.PointerEvent): void => { Doc.BrushDoc(this.props.Document); }; @@ -807,22 +810,25 @@ export class DocumentView extends DocComponent(Docu const highlightColors = ["transparent", "maroon", "maroon", "yellow", "magenta", "cyan", "orange"]; const highlightStyles = ["solid", "dashed", "solid", "solid", "solid", "solid", "solid", "solid"]; let highlighting = fullDegree && this.layoutDoc.type !== DocumentType.FONTICON && this.layoutDoc.viewType !== CollectionViewType.Linear; - return
Doc.BrushDoc(this.props.Document)} onPointerLeave={e => Doc.UnBrushDoc(this.props.Document)} - style={{ - transition: this.Document.isAnimating !== undefined ? ".5s linear" : StrCast(this.Document.transition), - pointerEvents: this.Document.isBackground && !this.isSelected() ? "none" : "all", - color: StrCast(this.Document.color), - outline: highlighting && !borderRounding ? `${highlightColors[fullDegree]} ${highlightStyles[fullDegree]} ${localScale}px` : "solid 0px", - border: highlighting && borderRounding ? `${highlightStyles[fullDegree]} ${highlightColors[fullDegree]} ${localScale}px` : undefined, - background: this.layoutDoc.type === DocumentType.FONTICON || this.layoutDoc.viewType === CollectionViewType.Linear ? undefined : backgroundColor, - width: animwidth, - height: animheight, - transform: `scale(${this.layoutDoc.fitWidth ? 1 : this.props.ContentScaling()})`, - opacity: this.Document.opacity - }} > - {this.innards} + return
+
Doc.BrushDoc(this.props.Document)} onPointerLeave={e => Doc.UnBrushDoc(this.props.Document)} + style={{ + transition: this.Document.isAnimating !== undefined ? ".5s linear" : StrCast(this.Document.transition), + pointerEvents: this.Document.isBackground && !this.isSelected() ? "none" : "all", + color: StrCast(this.Document.color), + outline: highlighting && !borderRounding ? `${highlightColors[fullDegree]} ${highlightStyles[fullDegree]} ${localScale}px` : "solid 0px", + border: highlighting && borderRounding ? `${highlightStyles[fullDegree]} ${highlightColors[fullDegree]} ${localScale}px` : undefined, + background: this.layoutDoc.type === DocumentType.FONTICON || this.layoutDoc.viewType === CollectionViewType.Linear ? undefined : backgroundColor, + width: animwidth, + height: animheight, + transform: `scale(${this.layoutDoc.fitWidth ? 1 : this.props.ContentScaling()})`, + opacity: this.Document.opacity + }} > + {this.innards} +
+ {this._showKPQuery ? : undefined}
; } } -- cgit v1.2.3-70-g09d2 From 07141291bee793955d7061f4e479942d7aceda67 Mon Sep 17 00:00:00 2001 From: ab Date: Wed, 4 Dec 2019 16:08:12 -0500 Subject: user queries stuff --- src/client/ClientRecommender.tsx | 13 +++++++++--- src/client/cognitive_services/CognitiveServices.ts | 2 +- src/client/views/KeyphraseQueryView.tsx | 21 +++++++++++-------- src/client/views/nodes/DocumentView.tsx | 24 +++++++++++++++++----- 4 files changed, 43 insertions(+), 17 deletions(-) (limited to 'src/client/cognitive_services') diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index b384a8ebe..83aed0204 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -45,6 +45,7 @@ export class ClientRecommender extends React.Component { static Instance: ClientRecommender; private mainDoc?: RecommenderDocument; private docVectors: Set = new Set(); + public _queries: string[] = []; @observable private corr_matrix = [[0, 0], [0, 0]]; // for testing @@ -277,11 +278,17 @@ export class ClientRecommender extends React.Component { const sorted_keywords = response.result.keywords; if (sorted_keywords.length > 0) { console.log("IBM keyphrase", sorted_keywords[0]); - highKP = [sorted_keywords[0].text]; + highKP = []; + for (let i = 0; i < 5; i++) { + if (sorted_keywords[i]) { + highKP.push(sorted_keywords[i].text); + } + } + keyterms = new List(highKP); } }); - let kpqv = new KeyphraseQueryView({ keyphrases: ["hello"] }); - ext_recs = await this.sendRequest(highKP, api); + //let kpqv = new KeyphraseQueryView({ keyphrases: ["hello"] }); + ext_recs = await this.sendRequest([highKP[0]], api); } // keyterms: list for extDoc, kp_string: input to TF, ext_recs: {titles, urls} of retrieved results from highKP query diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 356cf52ca..94532aaaa 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -398,7 +398,7 @@ export namespace CognitiveServices { //await vectorize([data], dataDoc, isMainDoc); await vectorize(kp_string, dataDoc, isMainDoc); } else { - return external_recommendations; + return { recs: external_recommendations, keyterms: keyterms }; } }; diff --git a/src/client/views/KeyphraseQueryView.tsx b/src/client/views/KeyphraseQueryView.tsx index 1955399f9..a9dafc4a4 100644 --- a/src/client/views/KeyphraseQueryView.tsx +++ b/src/client/views/KeyphraseQueryView.tsx @@ -4,7 +4,7 @@ import "./KeyphraseQueryView.scss"; // tslint:disable-next-line: class-name export interface KP_Props { - keyphrases: string[]; + keyphrases: string; } @observer @@ -15,15 +15,20 @@ export class KeyphraseQueryView extends React.Component{ } render() { + let kps = this.props.keyphrases.toString(); + let keyterms = this.props.keyphrases.split(','); return (
-

Select queries to send:

- {this.props.keyphrases.map((kp: string) => { - setTimeout(() => { - return (

{kp}

); - }, 1000); - - })} +
Select queries to send:
+
+ {keyterms.map((kp: string) => { + //return (

{"-" + kp}

); + return (

); + })} +
); } diff --git a/src/client/views/nodes/DocumentView.tsx b/src/client/views/nodes/DocumentView.tsx index 079df9cf4..91618491c 100644 --- a/src/client/views/nodes/DocumentView.tsx +++ b/src/client/views/nodes/DocumentView.tsx @@ -1,6 +1,6 @@ import { library } from '@fortawesome/fontawesome-svg-core'; import * as fa from '@fortawesome/free-solid-svg-icons'; -import { action, computed, runInAction, trace } from "mobx"; +import { action, computed, runInAction, trace, observable } from "mobx"; import { observer } from "mobx-react"; import * as rp from "request-promise"; import { Doc, DocListCast, DocListCastAsync, Opt } from "../../../new_fields/Doc"; @@ -119,6 +119,7 @@ export class DocumentView extends DocComponent(Docu private _mainCont = React.createRef(); private _dropDisposer?: DragManager.DragDropDisposer; private _showKPQuery: boolean = false; + private _queries: string = ""; public get ContentDiv() { return this._mainCont.current; } @computed get active() { return SelectionManager.IsSelected(this) || this.props.parentActive(); } @@ -643,16 +644,28 @@ export class DocumentView extends DocComponent(Docu // RecommendationsBox.Instance.displayRecommendations(e.pageX + 100, e.pageY); } + @action externalRecommendation = async (e: React.MouseEvent, api: string) => { if (!ClientRecommender.Instance) new ClientRecommender({ title: "Client Recommender" }); ClientRecommender.Instance.reset_docs(); const doc = Doc.GetDataDoc(this.props.Document); const extdoc = doc.data_ext as Doc; - const values = await ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc, false, api); + const recs_and_kps = await ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc, false, api); + let recs: any; + let kps: any; + if (recs_and_kps) { + recs = recs_and_kps.recs; + kps = recs_and_kps.keyterms; + } + else { + console.log("recommender system failed :("); + return; + } + console.log("ibm keyterms: ", kps.toString()); const headers = [new SchemaHeaderField("title"), new SchemaHeaderField("href")]; let bodies: Doc[] = []; - const titles = values.title_vals; - const urls = values.url_vals; + const titles = recs.title_vals; + const urls = recs.url_vals; for (let i = 0; i < 5; i++) { const body = Docs.Create.FreeformDocument([], { title: titles[i] }); body.href = urls[i]; @@ -660,6 +673,7 @@ export class DocumentView extends DocComponent(Docu } CollectionDockingView.AddRightSplit(Docs.Create.SchemaDocument(headers, bodies, { title: `Showing External Recommendations for "${StrCast(doc.title)}"` }), undefined); this._showKPQuery = true; + this._queries = kps.toString(); } onPointerEnter = (e: React.PointerEvent): void => { Doc.BrushDoc(this.props.Document); }; @@ -828,7 +842,7 @@ export class DocumentView extends DocComponent(Docu }} > {this.innards}
- {this._showKPQuery ? : undefined} + {this._showKPQuery ? : undefined}
; } } -- cgit v1.2.3-70-g09d2 From eb1ce559cefedd3666a57ea00594013fbc3b3692 Mon Sep 17 00:00:00 2001 From: Stanley Yip Date: Fri, 31 Jan 2020 14:23:59 -0500 Subject: pull --- src/client/cognitive_services/CognitiveServices.ts | 10 ++++++++- src/client/views/GestureOverlay.tsx | 24 +++++++++++++++------- 2 files changed, 26 insertions(+), 8 deletions(-) (limited to 'src/client/cognitive_services') diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 9e2ceac62..62308f056 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -47,7 +47,8 @@ export namespace CognitiveServices { let results: any; try { results = await manager.requester(apiKey, manager.converter(data), service).then(json => JSON.parse(json)); - } catch { + } catch (e) { + throw e; results = undefined; } return results; @@ -193,6 +194,13 @@ export namespace CognitiveServices { batch.end(); }; + export const InterpretStrokes = async (strokes: InkData[]) => { + let results = await ExecuteQuery(Service.Handwriting, Manager, strokes); + if (results) { + results.recognitionUnits && (results = results.recognitionUnits); + } + return results; + } } export interface AzureStrokeData { diff --git a/src/client/views/GestureOverlay.tsx b/src/client/views/GestureOverlay.tsx index 580c53a37..e25647e69 100644 --- a/src/client/views/GestureOverlay.tsx +++ b/src/client/views/GestureOverlay.tsx @@ -6,7 +6,7 @@ import { computed, observable, action, runInAction, IReactionDisposer, reaction import { GestureUtils } from "../../pen-gestures/GestureUtils"; import { InteractionUtils } from "../util/InteractionUtils"; import { InkingControl } from "./InkingControl"; -import { InkTool } from "../../new_fields/InkField"; +import { InkTool, InkData } from "../../new_fields/InkField"; import { Doc } from "../../new_fields/Doc"; import { LinkManager } from "../util/LinkManager"; import { DocUtils } from "../documents/Documents"; @@ -19,6 +19,7 @@ import { Utils, emptyPath, emptyFunction, returnFalse, returnOne, returnEmptyStr import { DocumentView } from "./nodes/DocumentView"; import { Transform } from "../util/Transform"; import { DocumentContentsView } from "./nodes/DocumentContentsView"; +import { CognitiveServices } from "../cognitive_services/CognitiveServices"; @observer export default class GestureOverlay extends Touchable { @@ -34,6 +35,7 @@ export default class GestureOverlay extends Touchable { @observable private _thumbY?: number; @observable private _pointerY?: number; @observable private _points: { X: number, Y: number }[] = []; + @observable private _strokes: InkData[] = []; @observable private _palette?: JSX.Element; @observable private _clipboardDoc?: JSX.Element; @@ -364,7 +366,10 @@ export default class GestureOverlay extends Touchable { if (this.Tool !== ToolglassTools.None && xInGlass && yInGlass) { switch (this.Tool) { case ToolglassTools.InkToText: - break; + this._strokes.push(this._points); + this._points = []; + console.log(CognitiveServices.Inking.Appliers.InterpretStrokes([this._points])); + return; } } else { @@ -420,8 +425,10 @@ export default class GestureOverlay extends Touchable { } @computed get svgBounds() { - const xs = this._points.map(p => p.X); - const ys = this._points.map(p => p.Y); + const sxs = this._strokes.reduce((acc, curr) => acc.concat(...curr.map(p => p.X)), new Array()); + const xs = this._points.map(p => p.X).concat(sxs); + const sys = this._strokes.reduce((acc, curr) => acc.concat(...curr.map(p => p.Y)), new Array()); + const ys = this._points.map(p => p.Y).concat(sys); const right = Math.max(...xs); const left = Math.min(...xs); const bottom = Math.max(...ys); @@ -429,8 +436,8 @@ export default class GestureOverlay extends Touchable { return { right: right, left: left, bottom: bottom, top: top, width: right - left, height: bottom - top }; } - @computed get currentStroke() { - if (this._points.length <= 1) { + @computed get currentStrokes() { + if (this._points.length <= 1 && this._strokes.length <= 1) { return (null); } @@ -438,6 +445,7 @@ export default class GestureOverlay extends Touchable { return ( + {this._strokes.map(l => InteractionUtils.CreatePolyline(l, B.left, B.top, this.Color, this.Width))} {InteractionUtils.CreatePolyline(this._points, B.left, B.top, this.Color, this.Width)} ); @@ -447,7 +455,7 @@ export default class GestureOverlay extends Touchable { return [ this.props.children, this._palette, - this.currentStroke + this.currentStrokes ]; } @@ -511,6 +519,8 @@ export default class GestureOverlay extends Touchable { } } +// export class + export enum ToolglassTools { InkToText = "inktotext", None = "none", -- cgit v1.2.3-70-g09d2