diff options
author | ab <abdullah_ahmed@brown.edu> | 2019-08-07 16:28:51 -0400 |
---|---|---|
committer | ab <abdullah_ahmed@brown.edu> | 2019-08-07 16:28:51 -0400 |
commit | 17b27d3575d3f91f461262e5ad72a457238d198a (patch) | |
tree | 98e6c0a9f5a8f140ff6239a27ebc6f518c87280b | |
parent | 06b59a4ec2f1871846696da22928fc7d54ae02d6 (diff) |
correlation matrix completed
-rw-r--r-- | package.json | 2 | ||||
-rw-r--r-- | src/client/ClientRecommender.ts | 101 | ||||
-rw-r--r-- | src/client/cognitive_services/CognitiveServices.ts | 22 | ||||
-rw-r--r-- | src/client/views/MainView.tsx | 7 | ||||
-rw-r--r-- | src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx | 19 | ||||
-rw-r--r-- | src/client/views/nodes/ImageBox.tsx | 14 | ||||
-rw-r--r-- | src/server/Recommender.ts | 27 |
7 files changed, 166 insertions, 26 deletions
diff --git a/package.json b/package.json index 44d5287bd..ebb0c35a8 100644 --- a/package.json +++ b/package.json @@ -224,4 +224,4 @@ "xoauth2": "^1.2.0", "youtube": "^0.1.0" } -}
\ No newline at end of file +} diff --git a/src/client/ClientRecommender.ts b/src/client/ClientRecommender.ts new file mode 100644 index 000000000..7ff79ab50 --- /dev/null +++ b/src/client/ClientRecommender.ts @@ -0,0 +1,101 @@ +import { Doc } from "../new_fields/Doc"; +import { StrCast } from "../new_fields/Types"; +import { List } from "../new_fields/List"; +import { CognitiveServices } from "./cognitive_services/CognitiveServices"; + + +var assert = require('assert'); + +export class ClientRecommender { + + static Instance: ClientRecommender; + private docVectors: Set<number[]>; + + constructor() { + //console.log("creating client recommender..."); + ClientRecommender.Instance = this; + this.docVectors = new Set<number[]>(); + } + + + /*** + * Computes the cosine similarity between two vectors in Euclidean space. + */ + + private distance(vector1: number[], vector2: number[]) { + assert(vector1.length === vector2.length, "Vectors are not the same length"); + var dotproduct = 0; + var mA = 0; + var mB = 0; + for (let i = 0; i < vector1.length; i++) { // here you missed the i++ + dotproduct += (vector1[i] * vector2[i]); + mA += (vector1[i] * vector1[i]); + mB += (vector2[i] * vector2[i]); + } + mA = Math.sqrt(mA); + mB = Math.sqrt(mB); + var similarity = (dotproduct) / ((mA) * (mB)); // here you needed extra brackets + return similarity; + } + + /*** + * Computes the mean of a set of vectors + */ + + public mean(paragraph: Set<number[]>) { + const n = 200; + const num_words = paragraph.size; + let meanVector = new Array<number>(n).fill(0); // mean vector + paragraph.forEach((wordvec: number[]) => { + for (let i = 0; i < n; i++) { + meanVector[i] += wordvec[i]; + } + }); + meanVector = meanVector.map(x => x / num_words); + this.addToDocSet(meanVector); + return meanVector; + } + + private addToDocSet(vector: number[]) { + if (this.docVectors) { + this.docVectors.add(vector); + } + } + + /*** + * Uses Cognitive Services to extract keywords from a document + */ + + public async extractText(dataDoc: Doc, extDoc: Doc) { + let data = StrCast(dataDoc.title); + //console.log(data); + let converter = (results: any) => { + let keyterms = new List<string>(); + results.documents.forEach((doc: any) => { + let keyPhrases = doc.keyPhrases; + keyPhrases.map((kp: string) => keyterms.push(kp)); + }); + return keyterms; + }; + await CognitiveServices.Text.Manager.analyzer(extDoc, ["key words"], data, converter); + } + + /*** + * Creates distance matrix for all Documents analyzed + */ + + public createDistanceMatrix(documents: Set<number[]> = this.docVectors) { + const documents_list = Array.from(documents); + const n = documents_list.length; + var matrix = new Array(n).fill(0).map(() => new Array(n).fill(0)); + for (let i = 0; i < n; i++) { + var doc1 = documents_list[i]; + for (let j = 0; j < n; j++) { + var doc2 = documents_list[j]; + matrix[i][j] = this.distance(doc1, doc2); + } + } + return matrix; + } + +}
\ No newline at end of file diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 863236b60..cc366abc2 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -8,6 +8,7 @@ import { InkData } from "../../new_fields/InkField"; import { UndoManager } from "../util/UndoManager"; import requestPromise = require("request-promise"); import { List } from "../../new_fields/List"; +import { ClientRecommender } from "../ClientRecommender"; type APIManager<D> = { converter: BodyConverter<D>, requester: RequestExecutor, analyzer: AnalysisApplier }; type RequestExecutor = (apiKey: string, body: string, service: Service) => Promise<string>; @@ -257,20 +258,21 @@ export namespace CognitiveServices { //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); target[keys[0]] = keyterms; console.log("analyzed!"); - vectorize(keyterms); - return null; - }, - - + await vectorize(keyterms); + } }; - function vectorize(keyterms: any) { + async function vectorize(keyterms: any) { console.log("vectorizing..."); - keyterms = ["father", "king"]; + //keyterms = ["father", "king"]; let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; - requestPromise.post(args).then((value) => { - value.forEach((wordvec: any) => { - console.log(wordvec.word); + await requestPromise.post(args).then(async (wordvecs) => { + var vectorValues = new Set<number[]>(); + wordvecs.forEach((wordvec: any) => { + //console.log(wordvec.word); + vectorValues.add(wordvec.values as number[]); }); + ClientRecommender.Instance.mean(vectorValues); + //console.log(vectorValues.size); }); } diff --git a/src/client/views/MainView.tsx b/src/client/views/MainView.tsx index 2ecf5fd85..97964166a 100644 --- a/src/client/views/MainView.tsx +++ b/src/client/views/MainView.tsx @@ -39,6 +39,7 @@ import { FilterBox } from './search/FilterBox'; import { CollectionTreeView } from './collections/CollectionTreeView'; import { ClientUtils } from '../util/ClientUtils'; import { SchemaHeaderField, RandomPastel } from '../../new_fields/SchemaHeaderField'; +//import { DocumentManager } from '../util/DocumentManager'; @observer export class MainView extends React.Component { @@ -435,6 +436,12 @@ export class MainView extends React.Component { </div >; } + // clusterDocuments = () => { + // DocumentManager.Instance.DocumentViews(); + // } + + + @action diff --git a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx index 29f9b1429..9344b43d2 100644 --- a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx +++ b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx @@ -1,6 +1,6 @@ import { library } from "@fortawesome/fontawesome-svg-core"; import { faEye } from "@fortawesome/free-regular-svg-icons"; -import { faCompass, faCompressArrowsAlt, faExpandArrowsAlt, faPaintBrush, faTable, faUpload } from "@fortawesome/free-solid-svg-icons"; +import { faCompass, faCompressArrowsAlt, faExpandArrowsAlt, faPaintBrush, faTable, faUpload, faBrain } from "@fortawesome/free-solid-svg-icons"; import { action, computed } from "mobx"; import { observer } from "mobx-react"; import { Doc, DocListCastAsync, HeightSym, WidthSym } from "../../../../new_fields/Doc"; @@ -37,8 +37,9 @@ import "./CollectionFreeFormView.scss"; import { MarqueeView } from "./MarqueeView"; import React = require("react"); import v5 = require("uuid/v5"); +import { ClientRecommender } from "../../../ClientRecommender"; -library.add(faEye, faTable, faPaintBrush, faExpandArrowsAlt, faCompressArrowsAlt, faCompass, faUpload); +library.add(faEye, faTable, faPaintBrush, faExpandArrowsAlt, faCompressArrowsAlt, faCompass, faUpload, faBrain); export const panZoomSchema = createSchema({ panX: "number", @@ -596,6 +597,20 @@ export class CollectionFreeFormView extends CollectionSubView(PanZoomDocument) { input.click(); } }); + ContextMenu.Instance.addItem({ + description: "Recommender System", + event: async () => { + new ClientRecommender(); + let activedocs = this.getActiveDocuments(); + await Promise.all(activedocs.map((doc: Doc) => { + console.log(StrCast(doc.title)); + const extdoc = doc.data_ext as Doc; + return ClientRecommender.Instance.extractText(doc, extdoc ? extdoc : doc); + })); + console.log(ClientRecommender.Instance.createDistanceMatrix()); + }, + icon: "brain" + }); } diff --git a/src/client/views/nodes/ImageBox.tsx b/src/client/views/nodes/ImageBox.tsx index 161226c0d..660772c0e 100644 --- a/src/client/views/nodes/ImageBox.tsx +++ b/src/client/views/nodes/ImageBox.tsx @@ -31,7 +31,7 @@ import { faEye } from '@fortawesome/free-regular-svg-icons'; import { ComputedField } from '../../../new_fields/ScriptField'; import { CompileScript } from '../../util/Scripting'; import { thisExpression } from 'babel-types'; -import { Recommender } from '../../../server/Recommender'; +//import { Recommender } from '../../../server/Recommender'; import requestPromise = require('request-promise'); var requestImageSize = require('../../util/request-image-size'); var path = require('path'); @@ -244,17 +244,7 @@ export class ImageBox extends DocComponent<FieldViewProps, ImageDocument>(ImageD } extractText = () => { - let data = StrCast(this.dataDoc.title); - console.log(data); - let converter = (results: any) => { - let keyterms = new List<string>(); - results.documents.forEach((doc: any) => { - let keyPhrases = doc.keyPhrases; - keyPhrases.map((kp: string) => keyterms.push(kp)); - }); - return keyterms; - }; - CognitiveServices.Text.Manager.analyzer(this.extensionDoc, ["key words"], data, converter); + //Recommender.Instance.extractText(this.dataDoc, this.extensionDoc); // request recommender //fetch(Utils.prepend("/recommender"), { body: body, method: "POST", headers: { "content-type": "application/json" } }).then((value) => console.log(value)); } diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index 3c71f3aa1..ea59703c3 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -1,4 +1,10 @@ +//import { Doc } from "../new_fields/Doc"; +//import { StrCast } from "../new_fields/Types"; +//import { List } from "../new_fields/List"; +//import { CognitiveServices } from "../client/cognitive_services/CognitiveServices"; + var w2v = require('word2vec'); +var assert = require('assert'); export class Recommender { @@ -10,6 +16,10 @@ export class Recommender { Recommender.Instance = this; } + /*** + * Loads pre-trained model from word2vec + */ + private loadModel(): Promise<any> { let self = this; return new Promise(res => { @@ -20,6 +30,10 @@ export class Recommender { }); } + /*** + * Testing + */ + public async testModel() { if (!this._model) { await this.loadModel(); @@ -33,6 +47,10 @@ export class Recommender { } } + /*** + * Tests if instance exists + */ + public async testInstance(text: string) { if (!this._model) { await this.loadModel(); @@ -40,14 +58,21 @@ export class Recommender { console.log(text); } + /*** + * Uses model to convert words to vectors + */ + public async vectorize(text: string[]) { if (!this._model) { await this.loadModel(); } if (this._model) { let word_vecs = this._model.getVectors(text); - console.log(word_vecs[0]); return word_vecs; } } + + + + } |