From a8a3af0193cc423fc7b0cff4ca9ff1a9074a7998 Mon Sep 17 00:00:00 2001 From: ab Date: Mon, 5 Aug 2019 18:15:36 -0400 Subject: promises etc --- src/server/Recommender.ts | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 src/server/Recommender.ts (limited to 'src/server/Recommender.ts') diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts new file mode 100644 index 000000000..6d9ca6486 --- /dev/null +++ b/src/server/Recommender.ts @@ -0,0 +1,33 @@ +var w2v = require('word2vec'); + +export class Recommender { + + private _model: any; + + constructor() { + console.log("creating recommender..."); + } + + public loadModel(): Promise { + let self = this; + return new Promise(res => { + w2v.loadModel("./node_modules/word2vec/vectors.txt", function (err: any, model: any) { + console.log(err); + console.log(model); + self._model = model; + console.log(model.similarity('father', 'mother')); + res(model); + }); + }); + } + + public testModel() { + if (this._model) { + let similarity = this._model.similarity('father', 'mother'); + console.log(similarity); + } + else { + console.log("model not found :("); + } + } +} -- cgit v1.2.3-70-g09d2 From 06b59a4ec2f1871846696da22928fc7d54ae02d6 Mon Sep 17 00:00:00 2001 From: ab Date: Tue, 6 Aug 2019 16:01:57 -0400 Subject: word2vec is functional --- package.json | 8 +++++- src/client/cognitive_services/CognitiveServices.ts | 18 ++++++++++++- src/client/views/nodes/ImageBox.tsx | 8 ++++-- src/server/Recommender.ts | 30 ++++++++++++++++++---- src/server/index.ts | 19 ++++++++++---- webpack.config.js | 5 +++- 6 files changed, 73 insertions(+), 15 deletions(-) (limited to 'src/server/Recommender.ts') diff --git a/package.json b/package.json index 9012ff1f7..44d5287bd 100644 --- a/package.json +++ b/package.json @@ -3,6 +3,12 @@ "version": "1.0.0", "description": "", "main": "index.js", + "browser": { + "child_process": false + }, + "node": { + "child_process": "empty" + }, "scripts": { "start": "cross-env NODE_OPTIONS=--max_old_space_size=4096 ts-node-dev -- src/server/index.ts", "debug": "cross-env NODE_OPTIONS=--max_old_space_size=8192 ts-node-dev --inspect -- src/server/index.ts", @@ -218,4 +224,4 @@ "xoauth2": "^1.2.0", "youtube": "^0.1.0" } -} +} \ No newline at end of file diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 6afd2571a..863236b60 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -6,6 +6,8 @@ import { RouteStore } from "../../server/RouteStore"; import { Utils } from "../../Utils"; import { InkData } from "../../new_fields/InkField"; import { UndoManager } from "../util/UndoManager"; +import requestPromise = require("request-promise"); +import { List } from "../../new_fields/List"; type APIManager = { converter: BodyConverter, requester: RequestExecutor, analyzer: AnalysisApplier }; type RequestExecutor = (apiKey: string, body: string, service: Service) => Promise; @@ -255,9 +257,23 @@ export namespace CognitiveServices { //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); target[keys[0]] = keyterms; console.log("analyzed!"); + vectorize(keyterms); return null; - } + }, + + }; + function vectorize(keyterms: any) { + console.log("vectorizing..."); + keyterms = ["father", "king"]; + let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; + requestPromise.post(args).then((value) => { + value.forEach((wordvec: any) => { + console.log(wordvec.word); + }); + }); + } + } } \ No newline at end of file diff --git a/src/client/views/nodes/ImageBox.tsx b/src/client/views/nodes/ImageBox.tsx index 7388b532c..161226c0d 100644 --- a/src/client/views/nodes/ImageBox.tsx +++ b/src/client/views/nodes/ImageBox.tsx @@ -1,5 +1,5 @@ import { library } from '@fortawesome/fontawesome-svg-core'; -import { faImage, faFileAudio, faPaintBrush, faAsterisk } from '@fortawesome/free-solid-svg-icons'; +import { faImage, faFileAudio, faPaintBrush, faAsterisk, faBrain } from '@fortawesome/free-solid-svg-icons'; import { action, observable, computed, runInAction } from 'mobx'; import { observer } from "mobx-react"; import Lightbox from 'react-image-lightbox'; @@ -31,12 +31,14 @@ import { faEye } from '@fortawesome/free-regular-svg-icons'; import { ComputedField } from '../../../new_fields/ScriptField'; import { CompileScript } from '../../util/Scripting'; import { thisExpression } from 'babel-types'; +import { Recommender } from '../../../server/Recommender'; +import requestPromise = require('request-promise'); var requestImageSize = require('../../util/request-image-size'); var path = require('path'); const { Howl } = require('howler'); -library.add(faImage, faEye, faPaintBrush); +library.add(faImage, faEye, faPaintBrush, faBrain); library.add(faFileAudio, faAsterisk); @@ -253,6 +255,8 @@ export class ImageBox extends DocComponent(ImageD return keyterms; }; CognitiveServices.Text.Manager.analyzer(this.extensionDoc, ["key words"], data, converter); + // request recommender + //fetch(Utils.prepend("/recommender"), { body: body, method: "POST", headers: { "content-type": "application/json" } }).then((value) => console.log(value)); } generateMetadata = (threshold: Confidence = Confidence.Excellent) => { diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index 6d9ca6486..3c71f3aa1 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -3,25 +3,27 @@ var w2v = require('word2vec'); export class Recommender { private _model: any; + static Instance: Recommender; constructor() { console.log("creating recommender..."); + Recommender.Instance = this; } - public loadModel(): Promise { + private loadModel(): Promise { let self = this; return new Promise(res => { w2v.loadModel("./node_modules/word2vec/vectors.txt", function (err: any, model: any) { - console.log(err); - console.log(model); self._model = model; - console.log(model.similarity('father', 'mother')); res(model); }); }); } - public testModel() { + public async testModel() { + if (!this._model) { + await this.loadModel(); + } if (this._model) { let similarity = this._model.similarity('father', 'mother'); console.log(similarity); @@ -30,4 +32,22 @@ export class Recommender { console.log("model not found :("); } } + + public async testInstance(text: string) { + if (!this._model) { + await this.loadModel(); + } + console.log(text); + } + + public async vectorize(text: string[]) { + if (!this._model) { + await this.loadModel(); + } + if (this._model) { + let word_vecs = this._model.getVectors(text); + console.log(word_vecs[0]); + return word_vecs; + } + } } diff --git a/src/server/index.ts b/src/server/index.ts index dda5a870a..67087fc1f 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -52,11 +52,6 @@ const probe = require("probe-image-size"); var SolrNode = require('solr-node'); var shell = require('shelljs'); -let recommender = new Recommender(); -recommender.loadModel().then(() => { - recommender.testModel(); -}); - const download = (url: string, dest: fs.PathLike) => request.get(url).pipe(fs.createWriteStream(dest)); let youtubeApiKey: string; YoutubeApi.readApiKey((apiKey: string) => youtubeApiKey = apiKey); @@ -651,6 +646,20 @@ app.use(RouteStore.corsProxy, (req, res) => { }).pipe(res); }); +//// + +let recommender = new Recommender(); +recommender.testModel(); + +app.post("/recommender", async (req, res) => { + let keyphrases = req.body.keyphrases; + let wordvecs = await recommender.vectorize(keyphrases); + res.send(wordvecs); +}); + + +///// + app.get(RouteStore.delete, (req, res) => { if (release) { res.send("no"); diff --git a/webpack.config.js b/webpack.config.js index 5e0a6a883..6a14dfcda 100644 --- a/webpack.config.js +++ b/webpack.config.js @@ -93,5 +93,8 @@ module.exports = { warnings: true, errors: true } - } + }, + externals: [ + 'child_process' + ] }; \ No newline at end of file -- cgit v1.2.3-70-g09d2 From 17b27d3575d3f91f461262e5ad72a457238d198a Mon Sep 17 00:00:00 2001 From: ab Date: Wed, 7 Aug 2019 16:28:51 -0400 Subject: correlation matrix completed --- package.json | 2 +- src/client/ClientRecommender.ts | 101 +++++++++++++++++++++ src/client/cognitive_services/CognitiveServices.ts | 22 +++-- src/client/views/MainView.tsx | 7 ++ .../collectionFreeForm/CollectionFreeFormView.tsx | 19 +++- src/client/views/nodes/ImageBox.tsx | 14 +-- src/server/Recommender.ts | 27 +++++- 7 files changed, 166 insertions(+), 26 deletions(-) create mode 100644 src/client/ClientRecommender.ts (limited to 'src/server/Recommender.ts') diff --git a/package.json b/package.json index 44d5287bd..ebb0c35a8 100644 --- a/package.json +++ b/package.json @@ -224,4 +224,4 @@ "xoauth2": "^1.2.0", "youtube": "^0.1.0" } -} \ No newline at end of file +} diff --git a/src/client/ClientRecommender.ts b/src/client/ClientRecommender.ts new file mode 100644 index 000000000..7ff79ab50 --- /dev/null +++ b/src/client/ClientRecommender.ts @@ -0,0 +1,101 @@ +import { Doc } from "../new_fields/Doc"; +import { StrCast } from "../new_fields/Types"; +import { List } from "../new_fields/List"; +import { CognitiveServices } from "./cognitive_services/CognitiveServices"; + + +var assert = require('assert'); + +export class ClientRecommender { + + static Instance: ClientRecommender; + private docVectors: Set; + + constructor() { + //console.log("creating client recommender..."); + ClientRecommender.Instance = this; + this.docVectors = new Set(); + } + + + /*** + * Computes the cosine similarity between two vectors in Euclidean space. + */ + + private distance(vector1: number[], vector2: number[]) { + assert(vector1.length === vector2.length, "Vectors are not the same length"); + var dotproduct = 0; + var mA = 0; + var mB = 0; + for (let i = 0; i < vector1.length; i++) { // here you missed the i++ + dotproduct += (vector1[i] * vector2[i]); + mA += (vector1[i] * vector1[i]); + mB += (vector2[i] * vector2[i]); + } + mA = Math.sqrt(mA); + mB = Math.sqrt(mB); + var similarity = (dotproduct) / ((mA) * (mB)); // here you needed extra brackets + return similarity; + } + + /*** + * Computes the mean of a set of vectors + */ + + public mean(paragraph: Set) { + const n = 200; + const num_words = paragraph.size; + let meanVector = new Array(n).fill(0); // mean vector + paragraph.forEach((wordvec: number[]) => { + for (let i = 0; i < n; i++) { + meanVector[i] += wordvec[i]; + } + }); + meanVector = meanVector.map(x => x / num_words); + this.addToDocSet(meanVector); + return meanVector; + } + + private addToDocSet(vector: number[]) { + if (this.docVectors) { + this.docVectors.add(vector); + } + } + + /*** + * Uses Cognitive Services to extract keywords from a document + */ + + public async extractText(dataDoc: Doc, extDoc: Doc) { + let data = StrCast(dataDoc.title); + //console.log(data); + let converter = (results: any) => { + let keyterms = new List(); + results.documents.forEach((doc: any) => { + let keyPhrases = doc.keyPhrases; + keyPhrases.map((kp: string) => keyterms.push(kp)); + }); + return keyterms; + }; + await CognitiveServices.Text.Manager.analyzer(extDoc, ["key words"], data, converter); + } + + /*** + * Creates distance matrix for all Documents analyzed + */ + + public createDistanceMatrix(documents: Set = this.docVectors) { + const documents_list = Array.from(documents); + const n = documents_list.length; + var matrix = new Array(n).fill(0).map(() => new Array(n).fill(0)); + for (let i = 0; i < n; i++) { + var doc1 = documents_list[i]; + for (let j = 0; j < n; j++) { + var doc2 = documents_list[j]; + matrix[i][j] = this.distance(doc1, doc2); + } + } + return matrix; + } + +} \ No newline at end of file diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 863236b60..cc366abc2 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -8,6 +8,7 @@ import { InkData } from "../../new_fields/InkField"; import { UndoManager } from "../util/UndoManager"; import requestPromise = require("request-promise"); import { List } from "../../new_fields/List"; +import { ClientRecommender } from "../ClientRecommender"; type APIManager = { converter: BodyConverter, requester: RequestExecutor, analyzer: AnalysisApplier }; type RequestExecutor = (apiKey: string, body: string, service: Service) => Promise; @@ -257,20 +258,21 @@ export namespace CognitiveServices { //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); target[keys[0]] = keyterms; console.log("analyzed!"); - vectorize(keyterms); - return null; - }, - - + await vectorize(keyterms); + } }; - function vectorize(keyterms: any) { + async function vectorize(keyterms: any) { console.log("vectorizing..."); - keyterms = ["father", "king"]; + //keyterms = ["father", "king"]; let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; - requestPromise.post(args).then((value) => { - value.forEach((wordvec: any) => { - console.log(wordvec.word); + await requestPromise.post(args).then(async (wordvecs) => { + var vectorValues = new Set(); + wordvecs.forEach((wordvec: any) => { + //console.log(wordvec.word); + vectorValues.add(wordvec.values as number[]); }); + ClientRecommender.Instance.mean(vectorValues); + //console.log(vectorValues.size); }); } diff --git a/src/client/views/MainView.tsx b/src/client/views/MainView.tsx index 2ecf5fd85..97964166a 100644 --- a/src/client/views/MainView.tsx +++ b/src/client/views/MainView.tsx @@ -39,6 +39,7 @@ import { FilterBox } from './search/FilterBox'; import { CollectionTreeView } from './collections/CollectionTreeView'; import { ClientUtils } from '../util/ClientUtils'; import { SchemaHeaderField, RandomPastel } from '../../new_fields/SchemaHeaderField'; +//import { DocumentManager } from '../util/DocumentManager'; @observer export class MainView extends React.Component { @@ -435,6 +436,12 @@ export class MainView extends React.Component { ; } + // clusterDocuments = () => { + // DocumentManager.Instance.DocumentViews(); + // } + + + @action diff --git a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx index 29f9b1429..9344b43d2 100644 --- a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx +++ b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx @@ -1,6 +1,6 @@ import { library } from "@fortawesome/fontawesome-svg-core"; import { faEye } from "@fortawesome/free-regular-svg-icons"; -import { faCompass, faCompressArrowsAlt, faExpandArrowsAlt, faPaintBrush, faTable, faUpload } from "@fortawesome/free-solid-svg-icons"; +import { faCompass, faCompressArrowsAlt, faExpandArrowsAlt, faPaintBrush, faTable, faUpload, faBrain } from "@fortawesome/free-solid-svg-icons"; import { action, computed } from "mobx"; import { observer } from "mobx-react"; import { Doc, DocListCastAsync, HeightSym, WidthSym } from "../../../../new_fields/Doc"; @@ -37,8 +37,9 @@ import "./CollectionFreeFormView.scss"; import { MarqueeView } from "./MarqueeView"; import React = require("react"); import v5 = require("uuid/v5"); +import { ClientRecommender } from "../../../ClientRecommender"; -library.add(faEye, faTable, faPaintBrush, faExpandArrowsAlt, faCompressArrowsAlt, faCompass, faUpload); +library.add(faEye, faTable, faPaintBrush, faExpandArrowsAlt, faCompressArrowsAlt, faCompass, faUpload, faBrain); export const panZoomSchema = createSchema({ panX: "number", @@ -596,6 +597,20 @@ export class CollectionFreeFormView extends CollectionSubView(PanZoomDocument) { input.click(); } }); + ContextMenu.Instance.addItem({ + description: "Recommender System", + event: async () => { + new ClientRecommender(); + let activedocs = this.getActiveDocuments(); + await Promise.all(activedocs.map((doc: Doc) => { + console.log(StrCast(doc.title)); + const extdoc = doc.data_ext as Doc; + return ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc); + })); + console.log(ClientRecommender.Instance.createDistanceMatrix()); + }, + icon: "brain" + }); } diff --git a/src/client/views/nodes/ImageBox.tsx b/src/client/views/nodes/ImageBox.tsx index 161226c0d..660772c0e 100644 --- a/src/client/views/nodes/ImageBox.tsx +++ b/src/client/views/nodes/ImageBox.tsx @@ -31,7 +31,7 @@ import { faEye } from '@fortawesome/free-regular-svg-icons'; import { ComputedField } from '../../../new_fields/ScriptField'; import { CompileScript } from '../../util/Scripting'; import { thisExpression } from 'babel-types'; -import { Recommender } from '../../../server/Recommender'; +//import { Recommender } from '../../../server/Recommender'; import requestPromise = require('request-promise'); var requestImageSize = require('../../util/request-image-size'); var path = require('path'); @@ -244,17 +244,7 @@ export class ImageBox extends DocComponent(ImageD } extractText = () => { - let data = StrCast(this.dataDoc.title); - console.log(data); - let converter = (results: any) => { - let keyterms = new List(); - results.documents.forEach((doc: any) => { - let keyPhrases = doc.keyPhrases; - keyPhrases.map((kp: string) => keyterms.push(kp)); - }); - return keyterms; - }; - CognitiveServices.Text.Manager.analyzer(this.extensionDoc, ["key words"], data, converter); + //Recommender.Instance.extractText(this.dataDoc, this.extensionDoc); // request recommender //fetch(Utils.prepend("/recommender"), { body: body, method: "POST", headers: { "content-type": "application/json" } }).then((value) => console.log(value)); } diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index 3c71f3aa1..ea59703c3 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -1,4 +1,10 @@ +//import { Doc } from "../new_fields/Doc"; +//import { StrCast } from "../new_fields/Types"; +//import { List } from "../new_fields/List"; +//import { CognitiveServices } from "../client/cognitive_services/CognitiveServices"; + var w2v = require('word2vec'); +var assert = require('assert'); export class Recommender { @@ -10,6 +16,10 @@ export class Recommender { Recommender.Instance = this; } + /*** + * Loads pre-trained model from word2vec + */ + private loadModel(): Promise { let self = this; return new Promise(res => { @@ -20,6 +30,10 @@ export class Recommender { }); } + /*** + * Testing + */ + public async testModel() { if (!this._model) { await this.loadModel(); @@ -33,6 +47,10 @@ export class Recommender { } } + /*** + * Tests if instance exists + */ + public async testInstance(text: string) { if (!this._model) { await this.loadModel(); @@ -40,14 +58,21 @@ export class Recommender { console.log(text); } + /*** + * Uses model to convert words to vectors + */ + public async vectorize(text: string[]) { if (!this._model) { await this.loadModel(); } if (this._model) { let word_vecs = this._model.getVectors(text); - console.log(word_vecs[0]); return word_vecs; } } + + + + } -- cgit v1.2.3-70-g09d2 From 17f28e393e0c24fcace33a3ecd5564bc766fe685 Mon Sep 17 00:00:00 2001 From: ab Date: Thu, 8 Aug 2019 13:00:51 -0400 Subject: fuck you react --- src/client/ClientRecommender.ts | 101 -------------- src/client/ClientRecommender.tsx | 148 +++++++++++++++++++++ .../collectionFreeForm/CollectionFreeFormView.tsx | 4 +- src/server/Recommender.ts | 2 + 4 files changed, 153 insertions(+), 102 deletions(-) delete mode 100644 src/client/ClientRecommender.ts create mode 100644 src/client/ClientRecommender.tsx (limited to 'src/server/Recommender.ts') diff --git a/src/client/ClientRecommender.ts b/src/client/ClientRecommender.ts deleted file mode 100644 index 7ff79ab50..000000000 --- a/src/client/ClientRecommender.ts +++ /dev/null @@ -1,101 +0,0 @@ -import { Doc } from "../new_fields/Doc"; -import { StrCast } from "../new_fields/Types"; -import { List } from "../new_fields/List"; -import { CognitiveServices } from "./cognitive_services/CognitiveServices"; - - -var assert = require('assert'); - -export class ClientRecommender { - - static Instance: ClientRecommender; - private docVectors: Set; - - constructor() { - //console.log("creating client recommender..."); - ClientRecommender.Instance = this; - this.docVectors = new Set(); - } - - - /*** - * Computes the cosine similarity between two vectors in Euclidean space. - */ - - private distance(vector1: number[], vector2: number[]) { - assert(vector1.length === vector2.length, "Vectors are not the same length"); - var dotproduct = 0; - var mA = 0; - var mB = 0; - for (let i = 0; i < vector1.length; i++) { // here you missed the i++ - dotproduct += (vector1[i] * vector2[i]); - mA += (vector1[i] * vector1[i]); - mB += (vector2[i] * vector2[i]); - } - mA = Math.sqrt(mA); - mB = Math.sqrt(mB); - var similarity = (dotproduct) / ((mA) * (mB)); // here you needed extra brackets - return similarity; - } - - /*** - * Computes the mean of a set of vectors - */ - - public mean(paragraph: Set) { - const n = 200; - const num_words = paragraph.size; - let meanVector = new Array(n).fill(0); // mean vector - paragraph.forEach((wordvec: number[]) => { - for (let i = 0; i < n; i++) { - meanVector[i] += wordvec[i]; - } - }); - meanVector = meanVector.map(x => x / num_words); - this.addToDocSet(meanVector); - return meanVector; - } - - private addToDocSet(vector: number[]) { - if (this.docVectors) { - this.docVectors.add(vector); - } - } - - /*** - * Uses Cognitive Services to extract keywords from a document - */ - - public async extractText(dataDoc: Doc, extDoc: Doc) { - let data = StrCast(dataDoc.title); - //console.log(data); - let converter = (results: any) => { - let keyterms = new List(); - results.documents.forEach((doc: any) => { - let keyPhrases = doc.keyPhrases; - keyPhrases.map((kp: string) => keyterms.push(kp)); - }); - return keyterms; - }; - await CognitiveServices.Text.Manager.analyzer(extDoc, ["key words"], data, converter); - } - - /*** - * Creates distance matrix for all Documents analyzed - */ - - public createDistanceMatrix(documents: Set = this.docVectors) { - const documents_list = Array.from(documents); - const n = documents_list.length; - var matrix = new Array(n).fill(0).map(() => new Array(n).fill(0)); - for (let i = 0; i < n; i++) { - var doc1 = documents_list[i]; - for (let j = 0; j < n; j++) { - var doc2 = documents_list[j]; - matrix[i][j] = this.distance(doc1, doc2); - } - } - return matrix; - } - -} \ No newline at end of file diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx new file mode 100644 index 000000000..2344ee490 --- /dev/null +++ b/src/client/ClientRecommender.tsx @@ -0,0 +1,148 @@ +import { Doc } from "../new_fields/Doc"; +import { StrCast } from "../new_fields/Types"; +import { List } from "../new_fields/List"; +import { CognitiveServices } from "./cognitive_services/CognitiveServices"; +import React = require("react"); +import { observer } from "mobx-react"; +import { observable, action, computed, reaction } from "mobx"; +var assert = require('assert'); +import Table from 'react-bootstrap/Table'; + +export interface RecommenderProps { + title: string; +} + +@observer +export class ClientRecommender extends React.Component { + + static Instance: ClientRecommender; + private docVectors: Set; + private corr_matrix = observable([[0, 0], [0, 0]]); + @observable private firstnum = 0; + //@observable private corr_matrix: number[][] = [[0, 0], [0, 0]]; + + constructor(props: RecommenderProps) { + //console.log("creating client recommender..."); + super(props); + ClientRecommender.Instance = this; + this.docVectors = new Set(); + //this.corr_matrix = [[0, 0], [0, 0]]; + } + + + /*** + * Computes the cosine similarity between two vectors in Euclidean space. + */ + + private distance(vector1: number[], vector2: number[]) { + assert(vector1.length === vector2.length, "Vectors are not the same length"); + var dotproduct = 0; + var mA = 0; + var mB = 0; + for (let i = 0; i < vector1.length; i++) { // here you missed the i++ + dotproduct += (vector1[i] * vector2[i]); + mA += (vector1[i] * vector1[i]); + mB += (vector2[i] * vector2[i]); + } + mA = Math.sqrt(mA); + mB = Math.sqrt(mB); + var similarity = (dotproduct) / ((mA) * (mB)); // here you needed extra brackets + return similarity; + } + + /*** + * Computes the mean of a set of vectors + */ + + public mean(paragraph: Set) { + const n = 200; + const num_words = paragraph.size; + let meanVector = new Array(n).fill(0); // mean vector + paragraph.forEach((wordvec: number[]) => { + for (let i = 0; i < n; i++) { + meanVector[i] += wordvec[i]; + } + }); + meanVector = meanVector.map(x => x / num_words); + this.addToDocSet(meanVector); + return meanVector; + } + + private addToDocSet(vector: number[]) { + if (this.docVectors) { + this.docVectors.add(vector); + } + } + + /*** + * Uses Cognitive Services to extract keywords from a document + */ + + public async extractText(dataDoc: Doc, extDoc: Doc) { + let data = StrCast(dataDoc.title); + //console.log(data); + let converter = (results: any) => { + let keyterms = new List(); + results.documents.forEach((doc: any) => { + let keyPhrases = doc.keyPhrases; + keyPhrases.map((kp: string) => keyterms.push(kp)); + }); + return keyterms; + }; + await CognitiveServices.Text.Manager.analyzer(extDoc, ["key words"], data, converter); + } + + /*** + * Creates distance matrix for all Documents analyzed + */ + + @action + public createDistanceMatrix(documents: Set = this.docVectors) { + //this.corr_matrix[0][0] = 500; + this.firstnum = 500; + const documents_list = Array.from(documents); + const n = documents_list.length; + var matrix = new Array(n).fill(0).map(() => new Array(n).fill(0)); + for (let i = 0; i < n; i++) { + var doc1 = documents_list[i]; + for (let j = 0; j < n; j++) { + var doc2 = documents_list[j]; + matrix[i][j] = this.distance(doc1, doc2); + } + } + //this.corr_matrix = matrix; + + return matrix; + } + + @computed get first_num() { + return this.firstnum; + } + + dumb_reaction = reaction( + () => this.first_num, + number => { + console.log("number has changed", number); + this.forceUpdate(); + } + ); + + render() { + return (
+

{this.props.title ? this.props.title : "hello"}

+ + + + + + + + + + + +
{this.first_num}{this.corr_matrix[0][1]}
{this.corr_matrix[1][0]}{this.corr_matrix[1][1]}
+
); + } + +} \ No newline at end of file diff --git a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx index 9344b43d2..5259b9b49 100644 --- a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx +++ b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx @@ -523,6 +523,7 @@ export class CollectionFreeFormView extends CollectionSubView(PanZoomDocument) { super.setCursorPosition(this.getTransform().transformPoint(e.clientX, e.clientY)); } + @action onContextMenu = (e: React.MouseEvent) => { let layoutItems: ContextMenuProps[] = []; layoutItems.push({ @@ -600,7 +601,7 @@ export class CollectionFreeFormView extends CollectionSubView(PanZoomDocument) { ContextMenu.Instance.addItem({ description: "Recommender System", event: async () => { - new ClientRecommender(); + // if (!ClientRecommender.Instance) new ClientRecommender({ title: "Client Recommender" }); let activedocs = this.getActiveDocuments(); await Promise.all(activedocs.map((doc: Doc) => { console.log(StrCast(doc.title)); @@ -715,6 +716,7 @@ class CollectionFreeFormViewPannableContents extends React.Component otherwise, reactions won't fire return
{this.props.children} +
; } } \ No newline at end of file diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index ea59703c3..d175b67c7 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -10,6 +10,7 @@ export class Recommender { private _model: any; static Instance: Recommender; + private dimension: number = 0; constructor() { console.log("creating recommender..."); @@ -25,6 +26,7 @@ export class Recommender { return new Promise(res => { w2v.loadModel("./node_modules/word2vec/vectors.txt", function (err: any, model: any) { self._model = model; + self.dimension = model.size; res(model); }); }); -- cgit v1.2.3-70-g09d2 From 9dd2a31b72e5e527e2dae3b68f856ab8da879e93 Mon Sep 17 00:00:00 2001 From: ab Date: Mon, 12 Aug 2019 16:41:23 -0400 Subject: documentation --- package.json | 2 +- src/client/ClientRecommender.tsx | 18 +++++----- src/client/cognitive_services/CognitiveServices.ts | 42 ++++++++++++---------- src/client/util/SearchUtil.ts | 13 ++++--- .../collectionFreeForm/CollectionFreeFormView.tsx | 1 + src/server/Recommender.ts | 1 + 6 files changed, 45 insertions(+), 32 deletions(-) (limited to 'src/server/Recommender.ts') diff --git a/package.json b/package.json index 1e2c74411..1c7a10ac8 100644 --- a/package.json +++ b/package.json @@ -225,4 +225,4 @@ "xoauth2": "^1.2.0", "youtube": "^0.1.0" } -} \ No newline at end of file +} diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index ddaa8a7fc..63f85c737 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -75,13 +75,15 @@ export class ClientRecommender extends React.Component { const n = 200; const num_words = paragraph.size; let meanVector = new Array(n).fill(0); // mean vector - paragraph.forEach((wordvec: number[]) => { - for (let i = 0; i < n; i++) { - meanVector[i] += wordvec[i]; - } - }); - meanVector = meanVector.map(x => x / num_words); - this.addToDocSet(meanVector); + if (num_words > 0) { // check to see if paragraph actually was vectorized + paragraph.forEach((wordvec: number[]) => { + for (let i = 0; i < n; i++) { + meanVector[i] += wordvec[i]; + } + }); + meanVector = meanVector.map(x => x / num_words); + this.addToDocSet(meanVector); + } return meanVector; } @@ -106,7 +108,7 @@ export class ClientRecommender extends React.Component { }); return keyterms; }; - await CognitiveServices.Text.Manager.analyzer(extDoc, ["key words"], data, converter); + await CognitiveServices.Text.Appliers.analyzer(extDoc, ["key words"], data, converter); } /*** diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 954a05585..75d0760ed 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -258,32 +258,38 @@ export namespace CognitiveServices { }; console.log("requested!"); return request.post(options); - }, - analyzer: async (target: Doc, keys: string[], data: string, converter: Converter) => { - let results = await ExecuteQuery(Service.Text, Manager, data); + } + }; + + export namespace Appliers { + + export async function vectorize(keyterms: any) { + console.log("vectorizing..."); + //keyterms = ["father", "king"]; + let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; + await requestPromise.post(args).then(async (wordvecs) => { + var vectorValues = new Set(); + wordvecs.forEach((wordvec: any) => { + //console.log(wordvec.word); + vectorValues.add(wordvec.values as number[]); + }); + ClientRecommender.Instance.mean(vectorValues); + //console.log(vectorValues.size); + }); + } + + export const analyzer = async (target: Doc, keys: string[], data: string, converter: Converter) => { + let results = await ExecuteQuery(Service.Text, Manager, data); console.log(results); let keyterms = converter(results); //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); target[keys[0]] = keyterms; console.log("analyzed!"); await vectorize(keyterms); - } - }; - async function vectorize(keyterms: any) { - console.log("vectorizing..."); - //keyterms = ["father", "king"]; - let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; - await requestPromise.post(args).then(async (wordvecs) => { - var vectorValues = new Set(); - wordvecs.forEach((wordvec: any) => { - //console.log(wordvec.word); - vectorValues.add(wordvec.values as number[]); - }); - ClientRecommender.Instance.mean(vectorValues); - //console.log(vectorValues.size); - }); + }; } } + } \ No newline at end of file diff --git a/src/client/util/SearchUtil.ts b/src/client/util/SearchUtil.ts index 3a3ba1803..1fce995d7 100644 --- a/src/client/util/SearchUtil.ts +++ b/src/client/util/SearchUtil.ts @@ -82,18 +82,21 @@ export namespace SearchUtil { const query = "*"; let response = await rp.get(Utils.prepend('/search'), { qs: { - query + q: query } }); - let res: string[] = JSON.parse(response); - const fields = await DocServer.GetRefFields(res); + let result: IdSearchResult = JSON.parse(response); + const { ids, numFound, highlighting } = result; + const docMap = await DocServer.GetRefFields(ids); const docs: Doc[] = []; - for (const id of res) { - const field = fields[id]; + for (const id of ids) { + const field = docMap[id]; if (field instanceof Doc) { docs.push(field); } } return docs; + // const docs = ids.map((id: string) => docMap[id]).filter((doc: any) => doc instanceof Doc); + // return docs as Doc[]; } } diff --git a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx index e9791df4e..d1e8031fd 100644 --- a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx +++ b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx @@ -894,6 +894,7 @@ export class CollectionFreeFormView extends CollectionSubView(PanZoomDocument) { let activedocs = this.getActiveDocuments(); let allDocs = await SearchUtil.GetAllDocs(); allDocs.forEach(doc => console.log(doc.title)); + // clears internal representation of documents as vectors ClientRecommender.Instance.reset_docs(); await Promise.all(activedocs.map((doc: Doc) => { //console.log(StrCast(doc.title)); diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index d175b67c7..1c95d7ea4 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -70,6 +70,7 @@ export class Recommender { } if (this._model) { let word_vecs = this._model.getVectors(text); + return word_vecs; } } -- cgit v1.2.3-70-g09d2 From 72a867862313aa2063e346a777d6b73e69b4c0ae Mon Sep 17 00:00:00 2001 From: Abdullah Ahmed Date: Wed, 28 Aug 2019 21:24:01 -0400 Subject: path change --- src/server/Recommender.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/server/Recommender.ts') diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index 1c95d7ea4..781974208 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -24,7 +24,7 @@ export class Recommender { private loadModel(): Promise { let self = this; return new Promise(res => { - w2v.loadModel("./node_modules/word2vec/vectors.txt", function (err: any, model: any) { + w2v.loadModel("./node_modules/word2vec/examples/fixtures/vectors.txt", function (err: any, model: any) { self._model = model; self.dimension = model.size; res(model); -- cgit v1.2.3-70-g09d2 From a1c2afe27c75354d4365a79ea202eca94516069e Mon Sep 17 00:00:00 2001 From: ab Date: Wed, 4 Sep 2019 09:52:39 -0400 Subject: stopwords, frequency, proto arxiv --- package.json | 2 + src/client/ClientRecommender.tsx | 75 ++++++++++++++++++++-- src/client/cognitive_services/CognitiveServices.ts | 13 ++-- src/client/util/TooltipTextMenu.scss | 2 +- src/client/views/nodes/DocumentView.tsx | 1 + src/server/Recommender.ts | 29 +++++++++ src/server/index.ts | 1 + 7 files changed, 113 insertions(+), 10 deletions(-) (limited to 'src/server/Recommender.ts') diff --git a/package.json b/package.json index ec5af93b1..d4b5bdab6 100644 --- a/package.json +++ b/package.json @@ -118,6 +118,7 @@ "@types/youtube": "0.0.38", "adm-zip": "^0.4.13", "archiver": "^3.0.3", + "arxiv-api-node": "0.0.2", "async": "^2.6.2", "babel-runtime": "^6.26.0", "bcrypt-nodejs": "0.0.3", @@ -218,6 +219,7 @@ "socket.io-client": "^2.2.0", "solr-node": "^1.2.1", "standard-http-error": "^2.0.1", + "stopword": "^0.3.3", "typescript-collections": "^1.3.2", "url-loader": "^1.1.2", "uuid": "^3.3.2", diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index 9953700cc..66f0ae745 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -6,6 +6,7 @@ import React = require("react"); import { observer } from "mobx-react"; import { observable, action, computed, reaction } from "mobx"; var assert = require('assert'); +var sw = require('stopword'); import "./ClientRecommender.scss"; import { JSXElement } from "babel-types"; import { ToPlainText, RichTextField } from "../new_fields/RichTextField"; @@ -130,20 +131,86 @@ export class ClientRecommender extends React.Component { let data: string; fielddata ? data = fielddata[ToPlainText]() : data = ""; console.log(data); - let converter = (results: any) => { + let converter = (results: any, data: string) => { let keyterms = new List(); + let keyterms_counted = new List(); results.documents.forEach((doc: any) => { let keyPhrases = doc.keyPhrases; keyPhrases.map((kp: string) => { - const words = kp.split(" "); - words.forEach((word) => keyterms.push(word)); + const frequency = this.countFrequencies(kp, data); + let words = kp.split(" "); // separates phrase into words + words = this.removeStopWords(words); + words.forEach((word) => { + keyterms.push(word); + for (let i = 0; i < frequency; i++) { + keyterms_counted.push(word); + } + }); + }); + }); + return { keyterms: keyterms, keyterms_counted: keyterms_counted }; + }; + let test = (results: any, data: string) => { + results.documents.forEach((doc: any) => { + let kps = doc.keyPhrases; + kps.map((kp: string) => { + this.countFrequencies(kp, data); }); }); - return keyterms; }; await CognitiveServices.Text.Appliers.analyzer(dataDoc, extDoc, ["key words"], data, converter, mainDoc); } + private countFrequencies(keyphrase: string, paragraph: string) { + let data = paragraph.split(" "); + let kp_array = keyphrase.split(" "); + let num_keywords = kp_array.length; + let par_length = data.length; + let frequency = 0; + // console.log("Paragraph: ", data); + // console.log("Keyphrases:", kp_array); + for (let i = 0; i <= par_length - num_keywords; i++) { + const window = data.slice(i, i + num_keywords); + if (JSON.stringify(window) === JSON.stringify(kp_array)) { + frequency++; + } + } + return frequency; + } + + private removeStopWords(word_array: string[]) { + //console.log(sw.removeStopwords(word_array)); + return sw.removeStopwords(word_array); + } + + /** + * Request to the arXiv server for ML articles. + */ + + arxivrequest = async (query: string) => { + let xhttp = new XMLHttpRequest(); + let serveraddress = "http://export.arxiv.org/api" + let endpoint = serveraddress + "/query?search_query=all:" + query + "&start=0&max_results=5"; + let promisified = (resolve: any, reject: any) => { + xhttp.onreadystatechange = function () { + if (this.readyState === 4) { + let result = xhttp.response; + switch (this.status) { + case 200: + console.log(result); + return resolve(result); + case 400: + default: + return reject(result); + } + } + }; + xhttp.open("GET", endpoint, true); + xhttp.send(); + }; + return new Promise(promisified); + } + /*** * Creates distance matrix for all Documents analyzed */ diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 874ee433d..eb1dd5197 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -15,6 +15,7 @@ type RequestExecutor = (apiKey: string, body: string, service: Service) => Promi type AnalysisApplier = (target: Doc, relevantKeys: string[], data: D, ...args: any) => any; type BodyConverter = (data: D) => string; type Converter = (results: any) => Field; +type TextConverter = (results: any, data: string) => { keyterms: Field, keyterms_counted: Field }; export type Tag = { name: string, confidence: number }; export type Rectangle = { top: number, left: number, width: number, height: number }; @@ -263,7 +264,7 @@ export namespace CognitiveServices { export namespace Appliers { - export async function vectorize(keyterms: any, dataDoc: Doc, mainDoc: boolean = false) { + export async function vectorize(keyterms: any, dataDoc: Doc, mainDoc: boolean = false, data: string) { console.log("vectorizing..."); //keyterms = ["father", "king"]; let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; @@ -284,15 +285,17 @@ export namespace CognitiveServices { }); } - export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: Converter, mainDoc: boolean = false) => { + export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: TextConverter, mainDoc: boolean = false) => { let results = await ExecuteQuery(Service.Text, Manager, data); console.log(results); - let keyterms = converter(results); + let keyterms = converter(results, data); //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); - target[keys[0]] = keyterms; + target[keys[0]] = keyterms.keyterms; console.log("analyzed!"); - await vectorize(keyterms, dataDoc, mainDoc); + await vectorize(keyterms.keyterms_counted, dataDoc, mainDoc, data); }; + + // export async function countFrequencies() } } diff --git a/src/client/util/TooltipTextMenu.scss b/src/client/util/TooltipTextMenu.scss index ebf833dbe..ab6cee763 100644 --- a/src/client/util/TooltipTextMenu.scss +++ b/src/client/util/TooltipTextMenu.scss @@ -351,5 +351,5 @@ .dragger{ color: #eee; - margin-left: 5px; + margin: 5px; } \ No newline at end of file diff --git a/src/client/views/nodes/DocumentView.tsx b/src/client/views/nodes/DocumentView.tsx index d51c90b61..a1e64f1c5 100644 --- a/src/client/views/nodes/DocumentView.tsx +++ b/src/client/views/nodes/DocumentView.tsx @@ -651,6 +651,7 @@ export class DocumentView extends DocComponent(Docu // allDocs.forEach(doc => console.log(doc.title)); // clears internal representation of documents as vectors ClientRecommender.Instance.reset_docs(); + ClientRecommender.Instance.arxivrequest("electrons"); await Promise.all(allDocs.map((doc: Doc) => { let mainDoc: boolean = false; const dataDoc = Doc.GetDataDoc(doc); diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index 781974208..efb5fbbbf 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -5,6 +5,9 @@ var w2v = require('word2vec'); var assert = require('assert'); +var arxivapi = require('arxiv-api-node'); +import requestPromise = require("request-promise"); + export class Recommender { @@ -75,6 +78,32 @@ export class Recommender { } } + public async arxivRequest(query: string) { + // let xhttp = new XMLHttpRequest(); + // let serveraddress = "http://export.arxiv.org/api/query?search_query=all:electron&start=0&max_results=1"; + // let promisified = (resolve: any, reject: any) => { + // xhttp.onreadystatechange = function () { + // if (this.readyState === 4) { + // let result = xhttp.response; + // switch (this.status) { + // case 200: + // console.log(result); + // return resolve(result); + // case 400: + // default: + // return reject(result); + // } + // } + // }; + // xhttp.open("GET", serveraddress, true); + // xhttp.send(); + // }; + // return new Promise(promisified); + + let res = await arxivapi.query("all:electrons"); + console.log(res); + } + diff --git a/src/server/index.ts b/src/server/index.ts index e1ecc4ac0..24ffc466f 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -688,6 +688,7 @@ app.use(RouteStore.corsProxy, (req, res) => { let recommender = new Recommender(); recommender.testModel(); +recommender.arxivRequest("Triangle-GAN"); app.post("/recommender", async (req, res) => { let keyphrases = req.body.keyphrases; -- cgit v1.2.3-70-g09d2 From f8eabfdbf30e13bc792ddf321d15e325c9da4944 Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Wed, 11 Sep 2019 16:54:33 -0400 Subject: make command url linked --- src/server/Recommender.ts | 1 + 1 file changed, 1 insertion(+) (limited to 'src/server/Recommender.ts') diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index efb5fbbbf..d762da5b2 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -8,6 +8,7 @@ var assert = require('assert'); var arxivapi = require('arxiv-api-node'); import requestPromise = require("request-promise"); +//http://gnuwin32.sourceforge.net/packages/make.htm export class Recommender { -- cgit v1.2.3-70-g09d2 From 7a01cf9c12f850c0c1d9c278452df88dd55845aa Mon Sep 17 00:00:00 2001 From: ab Date: Sat, 21 Sep 2019 16:54:56 -0400 Subject: parsed 1m file --- package.json | 2 +- sentence_parser.py | 7 +++++++ src/client/views/nodes/DocumentView.tsx | 1 - src/server/Recommender.ts | 16 ++++++++++++++++ src/server/index.ts | 1 + 5 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 sentence_parser.py (limited to 'src/server/Recommender.ts') diff --git a/package.json b/package.json index 3486f752b..422f20e79 100644 --- a/package.json +++ b/package.json @@ -229,4 +229,4 @@ "xoauth2": "^1.2.0", "youtube": "^0.1.0" } -} +} \ No newline at end of file diff --git a/sentence_parser.py b/sentence_parser.py new file mode 100644 index 000000000..a07b25f6d --- /dev/null +++ b/sentence_parser.py @@ -0,0 +1,7 @@ +def parse_text(file_path): + with open(file_path, encoding='utf8',mode='r+') as f: + lines = f.readlines() + print(len(lines)) + print(lines[1][1]) + +parse_text("eng_news-typical_2016_10K-sentences.txt") \ No newline at end of file diff --git a/src/client/views/nodes/DocumentView.tsx b/src/client/views/nodes/DocumentView.tsx index a80eafde2..c383163e8 100644 --- a/src/client/views/nodes/DocumentView.tsx +++ b/src/client/views/nodes/DocumentView.tsx @@ -785,7 +785,6 @@ export class DocumentView extends DocComponent(Docu body.href = urls[i]; bodies.push(body); } - CollectionDockingView.Instance.AddRightSplit(Docs.Create.SchemaDocument(headers, bodies, { title: `Showing External Recommendations for "${StrCast(doc.title)}"` }), undefined); } diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index d762da5b2..d47257550 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -79,6 +79,22 @@ export class Recommender { } } + public async trainModel() { + console.log("phrasing..."); + w2v.word2vec("./node_modules/word2vec/examples/eng_news-typical_2016_1M-sentences.txt", './node_modules/word2vec/examples/my_phrases.txt', { + cbow: 1, + size: 200, + window: 8, + negative: 25, + hs: 0, + sample: 1e-4, + threads: 20, + iter: 200, + minCount: 2 + }); + console.log("phrased!!!"); + } + public async arxivRequest(query: string) { // let xhttp = new XMLHttpRequest(); // let serveraddress = "http://export.arxiv.org/api/query?search_query=all:electron&start=0&max_results=1"; diff --git a/src/server/index.ts b/src/server/index.ts index 24ffc466f..e7d49579d 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -688,6 +688,7 @@ app.use(RouteStore.corsProxy, (req, res) => { let recommender = new Recommender(); recommender.testModel(); +recommender.trainModel(); recommender.arxivRequest("Triangle-GAN"); app.post("/recommender", async (req, res) => { -- cgit v1.2.3-70-g09d2 From 77be33c927b52f93e862b868321f79c59bfc050d Mon Sep 17 00:00:00 2001 From: ab Date: Sat, 28 Sep 2019 17:18:08 -0400 Subject: tensorflow model, server posting fix needed --- package.json | 4 +- src/client/cognitive_services/CognitiveServices.ts | 5 +- src/server/Recommender.ts | 105 +++++++++++---------- src/server/index.ts | 8 +- 4 files changed, 68 insertions(+), 54 deletions(-) (limited to 'src/server/Recommender.ts') diff --git a/package.json b/package.json index 422f20e79..12c1e7637 100644 --- a/package.json +++ b/package.json @@ -56,6 +56,8 @@ "@hig/flyout": "^1.0.3", "@hig/theme-context": "^2.1.3", "@hig/theme-data": "^2.3.3", + "@tensorflow-models/universal-sentence-encoder": "^1.2.0", + "@tensorflow/tfjs": "^1.2.9", "@trendmicro/react-dropdown": "^1.3.0", "@types/adm-zip": "^0.4.32", "@types/animejs": "^2.0.2", @@ -229,4 +231,4 @@ "xoauth2": "^1.2.0", "youtube": "^0.1.0" } -} \ No newline at end of file +} diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index baafb63a1..b23441552 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -269,7 +269,7 @@ export namespace CognitiveServices { //keyterms = ["father", "king"]; let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; await requestPromise.post(args).then(async (wordvecs) => { - if (wordvecs.length > 0) { + if (wordvecs.shape[0] > 0) { console.log("successful vectorization!"); var vectorValues = new Set(); wordvecs.forEach((wordvec: any) => { @@ -282,7 +282,8 @@ export namespace CognitiveServices { console.log("unsuccessful :( word(s) not in vocabulary"); } //console.log(vectorValues.size); - }); + } + ); } export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: TextConverter, mainDoc: boolean = false, internal: boolean = true) => { diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index d47257550..d014ba344 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -7,6 +7,8 @@ var w2v = require('word2vec'); var assert = require('assert'); var arxivapi = require('arxiv-api-node'); import requestPromise = require("request-promise"); +import * as use from '@tensorflow-models/universal-sentence-encoder'; +import { Tensor } from "@tensorflow/tfjs-core/dist/tensor"; //http://gnuwin32.sourceforge.net/packages/make.htm @@ -15,12 +17,31 @@ export class Recommender { private _model: any; static Instance: Recommender; private dimension: number = 0; + private choice: string = ""; constructor() { console.log("creating recommender..."); Recommender.Instance = this; } + /*** + * Loads pre-trained model from TF + */ + + public async loadTFModel() { + let self = this; + return new Promise(res => { + use.load().then(model => { + self.choice = "TF"; + self._model = model; + self.dimension = 512; + res(model); + }); + } + + ); + } + /*** * Loads pre-trained model from word2vec */ @@ -29,6 +50,7 @@ export class Recommender { let self = this; return new Promise(res => { w2v.loadModel("./node_modules/word2vec/examples/fixtures/vectors.txt", function (err: any, model: any) { + self.choice = "WV"; self._model = model; self.dimension = model.size; res(model); @@ -42,40 +64,56 @@ export class Recommender { public async testModel() { if (!this._model) { - await this.loadModel(); + await this.loadTFModel(); } if (this._model) { - let similarity = this._model.similarity('father', 'mother'); - console.log(similarity); + if (this.choice === "WV") { + let similarity = this._model.similarity('father', 'mother'); + console.log(similarity); + } + else if (this.choice === "TF") { + const model = this._model as use.UniversalSentenceEncoder; + // Embed an array of sentences. + const sentences = [ + 'Hello.', + 'How are you?' + ]; + const embeddings = await this.vectorize(sentences); + if (embeddings) embeddings.print(true /*verbose*/); + // model.embed(sentences).then(embeddings => { + // // `embeddings` is a 2D tensor consisting of the 512-dimensional embeddings for each sentence. + // // So in this example `embeddings` has the shape [2, 512]. + // embeddings.print(true /* verbose */); + // }); + } } else { console.log("model not found :("); } } - /*** - * Tests if instance exists - */ - - public async testInstance(text: string) { - if (!this._model) { - await this.loadModel(); - } - console.log(text); - } - /*** * Uses model to convert words to vectors */ - public async vectorize(text: string[]) { + public async vectorize(text: string[]): Promise { if (!this._model) { - await this.loadModel(); + await this.loadTFModel(); } if (this._model) { - let word_vecs = this._model.getVectors(text); - - return word_vecs; + if (this.choice === "WV") { + let word_vecs = this._model.getVectors(text); + return word_vecs; + } + else if (this.choice === "TF") { + const model = this._model as use.UniversalSentenceEncoder; + return new Promise(res => { + model.embed(text).then(embeddings => { + res(embeddings); + }); + }); + + } } } @@ -95,33 +133,4 @@ export class Recommender { console.log("phrased!!!"); } - public async arxivRequest(query: string) { - // let xhttp = new XMLHttpRequest(); - // let serveraddress = "http://export.arxiv.org/api/query?search_query=all:electron&start=0&max_results=1"; - // let promisified = (resolve: any, reject: any) => { - // xhttp.onreadystatechange = function () { - // if (this.readyState === 4) { - // let result = xhttp.response; - // switch (this.status) { - // case 200: - // console.log(result); - // return resolve(result); - // case 400: - // default: - // return reject(result); - // } - // } - // }; - // xhttp.open("GET", serveraddress, true); - // xhttp.send(); - // }; - // return new Promise(promisified); - - let res = await arxivapi.query("all:electrons"); - console.log(res); - } - - - - } diff --git a/src/server/index.ts b/src/server/index.ts index e7d49579d..ac803a253 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -688,13 +688,15 @@ app.use(RouteStore.corsProxy, (req, res) => { let recommender = new Recommender(); recommender.testModel(); -recommender.trainModel(); -recommender.arxivRequest("Triangle-GAN"); app.post("/recommender", async (req, res) => { let keyphrases = req.body.keyphrases; let wordvecs = await recommender.vectorize(keyphrases); - res.send(wordvecs); + let embedding: number[][] = []; + if (wordvecs && wordvecs.array()) { + wordvecs.array().then(array => embedding = array as number[][]); + } + res.send(embedding); }); -- cgit v1.2.3-70-g09d2 From 1f8bf407ef49aab33294c3e7393718606dfa65dd Mon Sep 17 00:00:00 2001 From: Abdullah Ahmed Date: Fri, 11 Oct 2019 17:40:09 -0400 Subject: fixes + refactoring --- src/client/ClientRecommender.tsx | 95 ++++++---------------- src/client/cognitive_services/CognitiveServices.ts | 6 +- src/client/views/Recommendations.tsx | 22 ++--- src/server/Recommender.ts | 2 +- 4 files changed, 37 insertions(+), 88 deletions(-) (limited to 'src/server/Recommender.ts') diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index 9ce7df366..bc1cd139c 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -35,7 +35,6 @@ export class ClientRecommender extends React.Component { static Instance: ClientRecommender; private mainDoc?: RecommenderDocument; private docVectors: Set = new Set(); - private highKP: string[] = []; @observable private corr_matrix = [[0, 0], [0, 0]]; @@ -94,7 +93,7 @@ export class ClientRecommender extends React.Component { public computeSimilarities() { ClientRecommender.Instance.docVectors.forEach((doc: RecommenderDocument) => { if (ClientRecommender.Instance.mainDoc) { - const distance = ClientRecommender.Instance.distance(ClientRecommender.Instance.mainDoc.vectorDoc, doc.vectorDoc, "euclidian"); + const distance = ClientRecommender.Instance.distance(ClientRecommender.Instance.mainDoc.vectorDoc, doc.vectorDoc, "cosine"); doc.score = distance; } } @@ -151,17 +150,21 @@ export class ClientRecommender extends React.Component { let converter = async (results: any, data: string) => { let keyterms = new List(); // raw keywords let keyterms_counted = new List(); // keywords, where each keyword is repeated as - let highKP: string[] = [""]; // most frequent + let kp_string: string = ""; + let highKP: string[] = [""]; // most frequent keyphrase let high = 0; results.documents.forEach((doc: any) => { let keyPhrases = doc.keyPhrases; keyPhrases.map((kp: string) => { const frequency = this.countFrequencies(kp, data); keyterms.push(kp); + kp_string += kp + ", "; + // replaces highKP with new one if (frequency > high) { high = frequency; highKP = [kp]; } + // appends to current highKP phrase else if (frequency === high) { highKP.push(kp); } @@ -175,13 +178,11 @@ export class ClientRecommender extends React.Component { }); }); }); - this.highKP = highKP; - //console.log(highKP); const kts_counted = new List(); keyterms_counted.forEach(kt => kts_counted.push(kt.toLowerCase())); let values = ""; if (!internal) values = await this.sendRequest(highKP); - return { keyterms: keyterms, keyterms_counted: kts_counted, values }; + return { keyterms: keyterms, keyterms_counted: kts_counted, values, kp_string: [kp_string] }; }; if (data != "") { return CognitiveServices.Text.Appliers.analyzer(dataDoc, extDoc, ["key words"], data, converter, mainDoc, internal); @@ -189,6 +190,10 @@ export class ClientRecommender extends React.Component { return; } + /** + * + * Counts frequencies of keyphrase in paragraph. + */ private countFrequencies(keyphrase: string, paragraph: string) { let data = paragraph.split(" "); @@ -196,8 +201,7 @@ export class ClientRecommender extends React.Component { let num_keywords = kp_array.length; let par_length = data.length; let frequency = 0; - // console.log("Paragraph: ", data); - // console.log("Keyphrases:", kp_array); + // slides keyphrase windows across paragraph and checks if it matches with corresponding paragraph slice for (let i = 0; i <= par_length - num_keywords; i++) { const window = data.slice(i, i + num_keywords); if (JSON.stringify(window).toLowerCase() === JSON.stringify(kp_array).toLowerCase() || kp_array.every(val => window.includes(val))) { @@ -207,11 +211,21 @@ export class ClientRecommender extends React.Component { return frequency; } + /** + * + * Removes stopwords from list of strings representing a sentence + */ + private removeStopWords(word_array: string[]) { //console.log(sw.removeStopwords(word_array)); return sw.removeStopwords(word_array); } + /** + * + * API for sending arXiv request. + */ + private async sendRequest(keywords: string[]) { let query = ""; keywords.forEach((kp: string) => query += " " + kp); @@ -221,13 +235,14 @@ export class ClientRecommender extends React.Component { } /** - * Request to the arXiv server for ML articles. + * Actual request to the arXiv server for ML articles. */ arxivrequest = async (query: string) => { let xhttp = new XMLHttpRequest(); let serveraddress = "http://export.arxiv.org/api"; - let endpoint = serveraddress + "/query?search_query=all:" + query + "&start=0&max_results=5"; + const maxresults = 5; + let endpoint = serveraddress + "/query?search_query=all:" + query + "&start=0&max_results=" + maxresults.toString(); let promisified = (resolve: any, reject: any) => { xhttp.onreadystatechange = function () { if (this.readyState === 4) { @@ -243,7 +258,7 @@ export class ClientRecommender extends React.Component { let titles = xml.getElementsByTagName("title"); let counter = 1; if (titles && titles.length > 1) { - while (counter <= 5) { + while (counter <= maxresults) { const title = titles[counter].childNodes[0].nodeValue!; console.log(title) title_vals.push(title); @@ -253,7 +268,7 @@ export class ClientRecommender extends React.Component { let ids = xml.getElementsByTagName("id"); counter = 1; if (ids && ids.length > 1) { - while (counter <= 5) { + while (counter <= maxresults) { const url = ids[counter].childNodes[0].nodeValue!; console.log(url); url_vals.push(url); @@ -280,64 +295,8 @@ export class ClientRecommender extends React.Component { console.log(text); } - /*** - * Creates distance matrix for all Documents analyzed - */ - - @action - public createDistanceMatrix(documents: Set = ClientRecommender.Instance.docVectors) { - const documents_list = Array.from(documents); - const n = documents_list.length; - var matrix = new Array(n).fill(0).map(() => new Array(n).fill(0)); - for (let i = 0; i < n; i++) { - var doc1 = documents_list[i]; - for (let j = 0; j < n; j++) { - var doc2 = documents_list[j]; - matrix[i][j] = ClientRecommender.Instance.distance(doc1.vectorDoc, doc2.vectorDoc, "euclidian"); - } - } - ClientRecommender.Instance.corr_matrix = matrix; - return matrix; - } - - @computed - private get generateRows() { - const n = ClientRecommender.Instance.corr_matrix.length; - let rows: JSX.Element[] = []; - for (let i = 0; i < n; i++) { - let children: JSX.Element[] = []; - for (let j = 0; j < n; j++) { - //let cell = React.createElement("td", ClientRecommender.Instance.corr_matrix[i][j]); - let cell = {ClientRecommender.Instance.corr_matrix[i][j].toFixed(4)}; - children.push(cell); - } - //let row = React.createElement("tr", { children: children, key: i }); - let row = {children}; - rows.push(row); - } - return rows; - } - render() { return (
-

{ClientRecommender.Instance.props.title ? ClientRecommender.Instance.props.title : "hello"}

- {/* - - - - - - - - - - -
{ClientRecommender.Instance.corr_matrix[0][0].toFixed(4)}{ClientRecommender.Instance.corr_matrix[0][1].toFixed(4)}
{ClientRecommender.Instance.corr_matrix[1][0].toFixed(4)}{ClientRecommender.Instance.corr_matrix[1][1].toFixed(4)}
*/} - - - {ClientRecommender.Instance.generateRows} - -
); } diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index c138c68b7..eb088763d 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -15,7 +15,7 @@ type RequestExecutor = (apiKey: string, body: string, service: Service) => Promi type AnalysisApplier = (target: Doc, relevantKeys: string[], data: D, ...args: any) => any; type BodyConverter = (data: D) => string; type Converter = (results: any) => Field; -type TextConverter = (results: any, data: string) => Promise<{ keyterms: Field, keyterms_counted: Field, values: any }>; +type TextConverter = (results: any, data: string) => Promise<{ keyterms: Field, keyterms_counted: Field, values: any, kp_string: string[] }>; export type Tag = { name: string, confidence: number }; export type Rectangle = { top: number, left: number, width: number, height: number }; @@ -290,12 +290,12 @@ export namespace CognitiveServices { export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: TextConverter, mainDoc: boolean = false, internal: boolean = true) => { let results = await ExecuteQuery(Service.Text, Manager, data); console.log(results); - let { keyterms, values, keyterms_counted } = await converter(results, data); + let { keyterms, values, keyterms_counted, kp_string } = await converter(results, data); //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); target[keys[0]] = keyterms; console.log("analyzed!"); if (internal) { - await vectorize(keyterms, dataDoc, mainDoc, data); + await vectorize(kp_string, dataDoc, mainDoc, data); } else { return values; } diff --git a/src/client/views/Recommendations.tsx b/src/client/views/Recommendations.tsx index 5dc62105d..f965d655b 100644 --- a/src/client/views/Recommendations.tsx +++ b/src/client/views/Recommendations.tsx @@ -70,15 +70,9 @@ export class RecommendationsBox extends React.Component { newRenderDoc.height = NumCast(this.props.Document.documentIconHeight); newRenderDoc.autoHeight = false; const docview =
- {/* onPointerDown={action(() => { - this._useIcons = !this._useIcons; - this._displayDim = this._useIcons ? 50 : Number(SEARCH_THUMBNAIL_SIZE); - })} - onPointerEnter={action(() => this._displayDim = this._useIcons ? 50 : Number(SEARCH_THUMBNAIL_SIZE))} - onPointerLeave={action(() => this._displayDim = 50)} > */} { ContentScaling={scale} />
; - // const data = renderDoc.data; - // if (data instanceof ObjectField) newRenderDoc.data = ObjectField.MakeCopy(data); - // newRenderDoc.preview = true; - // this.previewDocs.push(newRenderDoc); return docview; } @@ -171,16 +161,16 @@ export class RecommendationsBox extends React.Component { {DocListCast(this.props.Document.data).map(doc => { return (
- {/* + {this.DocumentIcon(doc)} - */} + {NumCast(doc.score).toFixed(4)}
DocumentManager.Instance.jumpToDocument(doc, false)}>
- {/*
DocUtils.MakeLink({ doc: this.props.Document.sourceDoc as Doc }, { doc: doc }, "User Selected Link", "Generated from Recommender", undefined)}> +
DocUtils.MakeLink({ doc: this.props.Document.sourceDoc as Doc }, { doc: doc }, "User Selected Link", "Generated from Recommender", undefined)}> -
*/} +
); })} diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index d014ba344..d974d7ef6 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -17,7 +17,7 @@ export class Recommender { private _model: any; static Instance: Recommender; private dimension: number = 0; - private choice: string = ""; + private choice: string = ""; // Tensorflow or Word2Vec constructor() { console.log("creating recommender..."); -- cgit v1.2.3-70-g09d2 From 46f25c9a781783350a7c1d76eefb4e066b2cac83 Mon Sep 17 00:00:00 2001 From: ab Date: Thu, 17 Oct 2019 11:57:58 -0400 Subject: optimize tf --- package.json | 4 +++- src/client/cognitive_services/CognitiveServices.ts | 1 + src/server/Recommender.ts | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'src/server/Recommender.ts') diff --git a/package.json b/package.json index fbed088fd..f138e7a79 100644 --- a/package.json +++ b/package.json @@ -58,6 +58,7 @@ "@hig/theme-data": "^2.3.3", "@tensorflow-models/universal-sentence-encoder": "^1.2.0", "@tensorflow/tfjs": "^1.2.9", + "@tensorflow/tfjs-node": "^1.2.11", "@trendmicro/react-dropdown": "^1.3.0", "@types/adm-zip": "^0.4.32", "@types/animejs": "^2.0.2", @@ -122,8 +123,8 @@ "@types/youtube": "0.0.38", "adm-zip": "^0.4.13", "archiver": "^3.0.3", - "arxiv-api-node": "0.0.2", "array-batcher": "^1.1.3", + "arxiv-api-node": "0.0.2", "async": "^2.6.2", "babel-runtime": "^6.26.0", "bcrypt-nodejs": "0.0.3", @@ -173,6 +174,7 @@ "mobx-utils": "^5.4.0", "mongodb": "^3.1.13", "mongoose": "^5.6.4", + "node-pre-gyp": "^0.13.0", "node-sass": "^4.12.0", "nodemailer": "^5.1.1", "nodemon": "^1.18.10", diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 48519f916..b0e9138a4 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -267,6 +267,7 @@ export namespace CognitiveServices { export async function vectorize(keyterms: any, dataDoc: Doc, mainDoc: boolean = false) { console.log("vectorizing..."); //keyterms = ["father", "king"]; + let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; await requestPromise.post(args).then(async (wordvecs) => { if (wordvecs) { diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index d974d7ef6..aaed09999 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -9,6 +9,7 @@ var arxivapi = require('arxiv-api-node'); import requestPromise = require("request-promise"); import * as use from '@tensorflow-models/universal-sentence-encoder'; import { Tensor } from "@tensorflow/tfjs-core/dist/tensor"; +require('@tensorflow/tfjs-node'); //http://gnuwin32.sourceforge.net/packages/make.htm -- cgit v1.2.3-70-g09d2