lzy 1 tahun lalu
induk
melakukan
2534a2c707

+ 84 - 1
FilmDraw-app/src/app/tab3/tab3.page.html

@@ -53,8 +53,91 @@
       </ion-card-content>
     </ion-card>
   </section>
+</ion-content>
+  
 
+<ion-content>
+  <ion-segment [value]="tab"  (ionChange)="tabChange($event)">
+    <ion-segment-button value="vector">
+      <ion-label>向量提取</ion-label>
+    </ion-segment-button>
+    <ion-segment-button value="retrive">
+      <ion-label>记忆召回</ion-label>
+    </ion-segment-button>
+    <ion-segment-button value="story">
+      <ion-label>知识问答</ion-label>
+    </ion-segment-button>
+  </ion-segment>
+  @if(tab=="vector"){
+    <ion-list>
+      @for(file of fileList;track file.title){
+        <ion-item>
+          <ion-label>标题:{{file?.title}}</ion-label>
+          <ion-button (click)="preview(file)">预览</ion-button>
+          <ion-button (click)="loader(file)">加载</ion-button>
+        </ion-item>
+        @if(storyMap[file?.url]){
+          <ion-item>
+            加载:正文{{storyMap[file?.url]?.content?.length}}字符 
+            <ion-button (click)="splitter(storyMap[file?.url])">分割</ion-button>
+          </ion-item>
+        }
+        @if(storyMap[file?.url]?.docList?.length){
+          <div style="display: flex;flex-direction: column;">
+            <ion-button (click)="embedings(storyMap[file?.url])">嵌入</ion-button>
+            分割:文本块{{storyMap[file?.url]?.docList?.length}}个
+            @for(doc of storyMap[file?.url]?.docList;track doc){
+              <p>行{{doc.metadata?.loc?.lines?.from}}至行{{doc.metadata?.loc?.lines?.to}}</p>
+              <p>{{doc.pageContent}}</p>
+            }
+          </div>
+        }
+      }
+    </ion-list>
   
+  }
+
+  @if(tab=="retrive"){
+   <ion-list>
+    <ion-item>
+      <ion-textarea [value]="userInput" (ionChange)="inputChange($event)" label="用户输入" placeholder="请输入您的问题"></ion-textarea>
+    </ion-item>
+    <ion-item>
+      <ion-button (click)="retriveFrontEnd()">前端检索</ion-button>
+      <ion-button (click)="retriveBackEnd()">后端检索</ion-button>
+    </ion-item>
+    @if(searchDocList?.length){
+      @for(doc of searchDocList;track doc){
+        <ion-item>
+          相似度:{{doc.similarity}} 内容:{{doc.pageContent}}
+        </ion-item>
+      }
+    }
+   </ion-list>
+  }
+
+  @if(tab=="story"){
+    <ion-list>
+      <ion-item>
+        <ion-textarea [value]="messageInput" (ionChange)="messageChange($event)" label="用户输入" placeholder="请输入您的问题"></ion-textarea>
+      </ion-item>
+      <ion-item>
+        <ion-button (click)="sendMessage()">发送消息</ion-button>
+      </ion-item>
+      @if(messageResult){
+        <ion-item>
+          @if(!isComplete){
+            {{messageResult?.content}}
+          }
+          @if(isComplete){
+            <fm-markdown-preview [content]="messageResult?.content"></fm-markdown-preview>
+          }
+        </ion-item>
+      }
+      
+     </ion-list>
+  }
+</ion-content>
 
 
 <!-- <ion-header [translucent]="true">
@@ -72,4 +155,4 @@
   <h1>示例:门诊问诊的智能体示例(ChatPanel组件)</h1>
   <ion-button (click)="opendazi()">追剧搭子</ion-button>
 
-</ion-content> -->
+ -->

+ 119 - 4
FilmDraw-app/src/app/tab3/tab3.page.ts

@@ -1,4 +1,4 @@
-import { Component, contentChild } from '@angular/core';
+import { Component, OnInit } from '@angular/core';
 import { Router } from '@angular/router';
 import { CommonModule } from '@angular/common';
 import { IonHeader, IonToolbar, IonTitle, IonContent, ModalController, IonButton,
@@ -13,6 +13,12 @@ import { ChatPanelOptions, FmChatModalInput, FmodeChat, FmodeChatMessage, openCh
 import { CloudObject, CloudQuery, CloudUser } from 'src/lib/ncloud';
 import { openUserLoginModal } from 'src/lib/user/modal-user-login/modal-user-login.component';
 
+    
+import { IonSegment,IonSegmentButton,IonTextarea } from "@ionic/angular/standalone";
+import { AgentStory, EmbedQuery, RetriveAllDocument } from 'src/lib/story';
+import { CloudApi } from 'src/lib/ncloud';
+import { FmodeChatCompletion,MarkdownPreviewModule } from 'fmode-ng';
+
 @Component({
   selector: 'app-tab3',
   templateUrl: 'tab3.page.html',
@@ -25,11 +31,16 @@ import { openUserLoginModal } from 'src/lib/user/modal-user-login/modal-user-log
     FmChatModalInput, 
     // ModalAudioMessageComponent
     CommonModule, IonCard, IonCardHeader, IonCardTitle, IonCardContent,
-  IonList, IonItem, IonLabel, IonAvatar, IonInput, IonIcon, IonThumbnail,
-
+    IonList, IonItem, IonLabel, IonAvatar, IonInput, IonIcon, IonThumbnail,
+    IonSegment, IonSegmentButton, IonTextarea, MarkdownPreviewModule
   ]
 })
-export class Tab3Page {
+export class Tab3Page implements OnInit{
+
+  tab:string = "vector"
+  tabChange(ev:any){
+    this.tab = ev.detail.value
+  }
 
   constructor(
     private modalCtrl:ModalController,
@@ -252,10 +263,114 @@ export class Tab3Page {
 
 
 
+    /**
+   * 文档加载、分割、向量存储
+   */
+    fileList:Array<any> = [
+      {
+        title:`市委办公厅 市政府办公厅印发《关于服务保障“抓防控促发展”落实“人才生态37条”的补充意见》的通知`,
+        tags:["杭州","人才政策"],
+        url:`https://app.fmode.cn/dev/jxnu/case/2020%E5%B9%B4%E6%9D%AD%E5%B7%9E%E5%B8%82%E4%BA%BA%E6%89%8D37%E6%9D%A1.docx`
+      },
+      {
+        title:"杭州市余杭区服务保障高层次人才创新创业政策实施细则",
+        tags:["杭州","余杭","人才政策","创新创业","双创"],
+        url:"https://app.fmode.cn/dev/jxnu/case/2022年杭州余杭.docx"
+      }
+    ]
+    storyMap:any = {}
+    preview(file:any){
+      window.open(file.url,"_blank")
+    }
+    async loader(file:any){
+      let story = new AgentStory(file);
+      await story.loader(file.url);
+      console.log(story);
+      this.storyMap[file?.url] = story;
+    }
+    async splitter(story:AgentStory){
+      await story.splitter();
+    }
+    async embedings(story:AgentStory){
+      await story.embedings()
+    }
+  
+    /**
+     * 文本向量检索
+     */
+    userInput:string = "";
+    searchDocList:Array<any> = []
+    inputChange(ev:any){
+      this.userInput = ev.detail.value
+    }
+    async retriveFrontEnd(){
+      let storyList = Object.values(this.storyMap);
+      let docList:any = []
+      storyList.forEach((story:any)=>{
+        docList = docList.concat(story.docList)
+      })
+      let vector512 = await EmbedQuery(this.userInput)
+      console.log(this.userInput,vector512)
+      console.log(docList)
+      this.searchDocList = RetriveAllDocument(vector512,docList)
+      console.log("searchDocList",this.searchDocList)
+    }
+  
+    async retriveBackEnd(){
+      let vector512 = await EmbedQuery(this.userInput)
+      console.log(this.userInput,vector512)
+  
+      let api = new CloudApi()
+      let result = await api.fetch("agent/retrive",{
+        vector512:vector512
+      })
+      console.log(result)
+      this.searchDocList = result?.data || result;
+    }
+  
+    /**
+     * 知识库问答
+     */
+    messageInput:string = "";
+    messageResult:any;
+    isComplete:boolean = false;
+    messageChange(ev:any){
+      this.messageInput = ev.detail.value
+    }
+    async sendMessage(){
+      // 通过RAG检索相关文本块
+      let vector512 = await EmbedQuery(this.userInput)
+      let api = new CloudApi()
+      let result = await api.fetch("agent/retrive",{
+        vector512:vector512
+      })
+      this.searchDocList = result?.data || result;
+      let top10Doc = this.searchDocList.slice(0,10);
+      let docInsertion = `# 知识库文档\n${top10Doc.map(item=>item.pageContent).join("\n")}`
+      
+      let PromptTemplate = `
+      ${docInsertion}
+      您是一名专业的人力资源顾问,帮助应届大学生解答当地政策问题,从政策及知识库文档中中寻找学生所需,并给予建议。
+      以下是学生的问题:${this.messageInput}
+     `
+      let completion = new FmodeChatCompletion([
+        {role:"system",content:""},
+        {role:"user",content:PromptTemplate}
+        ])
+        completion.sendCompletion().subscribe((message:any)=>{
+            this.messageResult = message
+            if(message?.complete){
+              this.isComplete = true;
+            }
+        })
+    }
+
+
   ngOnInit() {
     // 生命周期:页面加载后,运行搭子和角色列表,加载函数
     this.loadFilmPartnerList()
     this.loadFilmRoleList()
+    this.loader(this.fileList[0]);
   }
 
   // 创建用于数据列表存储的属性

+ 31 - 2
FilmDraw-app/src/lib/ncloud.ts

@@ -1,4 +1,3 @@
-
 //CloudObject.ts
 export class CloudObject {
     className: string;
@@ -131,7 +130,7 @@ export class CloudQuery {
         return json || {};
     }
 
-    async find() {
+    async find():Promise<Array<CloudObject>> {
         let url = `http://dev.fmode.cn:1337/parse/classes/${this.className}?`;
 
         if (Object.keys(this.whereOptions).length) {
@@ -396,4 +395,34 @@ export class CloudPost extends CloudObject {
     async deletePost() {
         return await this.destroy(); // 调用父类的 destroy 方法
     }
+}
+
+//CloudApi.ts
+export class CloudApi{
+    async fetch(path:string,body:any,options?:{
+        method:string
+        body:any
+    }){
+
+        let reqOpts:any =  {
+            headers: {
+                "x-parse-application-id": "dev",
+                "Content-Type": "application/json"
+            },
+            method: options?.method || "POST",
+            mode: "cors",
+            credentials: "omit"
+        }
+        if(body||options?.body){
+            reqOpts.body = JSON.stringify(body || options?.body);
+            reqOpts.json = true;
+        }
+        let host = `https://dev.fmode.cn`
+        // host = `http://127.0.0.1:1337`
+        let url = `${host}/api/`+path
+        console.log(url,reqOpts)
+        const response = await fetch(url,reqOpts);
+        let json = await response.json();
+        return json
+    }
 }

+ 262 - 0
FilmDraw-app/src/lib/story.ts

@@ -0,0 +1,262 @@
+// import pdf from 'pdf-parse';
+// import fs from 'fs';
+import { CloudApi, CloudObject, CloudQuery } from 'src/lib/ncloud';
+import mammoth from "mammoth";
+import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
+import { Document } from '@langchain/core/documents';
+
+import * as tf from '@tensorflow/tfjs-core';
+// import "@tensorflow/tfjs-backend-cpu";
+// import '@tensorflow/tfjs-backend-webgpu';
+import '@tensorflow/tfjs-backend-webgl';
+// import '@tensorflow/tfjs-backend-wasm';
+import { TensorFlowEmbeddings } from "@langchain/community/embeddings/tensorflow";
+
+export class AgentStory{
+
+    story:CloudObject|undefined
+    // 文件标题
+    title:string|undefined = ""
+    // 文档标签
+    tags:Array<string>|undefined
+    // 文件源地址
+    url:string|undefined = ""
+    // 文档完整纯文本内容
+    content:string|undefined = ""
+    // 文档hash唯一值
+    hash:string|undefined = ""
+    // 文档分割后的列表
+    docList:Array<Document|any> = []
+
+    constructor(metadata:{
+        url:string,
+        title?:string,
+        tags?:Array<string>
+    }){
+        this.url = metadata.url
+        this.title = metadata.title
+        this.tags = metadata.tags
+        setBackend()
+    }
+    async save(){
+        if(!this.hash){ return }
+        let query = new CloudQuery("Story");
+        query.equalTo("hash",this.hash);
+        let story = await query.first();
+        if(!story?.id){
+            story = new CloudObject("Story");
+        }
+        story.set({
+            title: this.title,
+            url: this.url,
+            content: this.content,
+            hash: this.hash,
+            tags:this.tags
+        })
+        this.story = await story.save();
+    }
+    async loader(url:string){
+        let api = new CloudApi();
+
+        let result;
+        if(url?.endsWith(".docx")){
+            result = await this.loadDocx(url)
+        }
+        if(!result){
+            result = await api.fetch("agent/loader",{url:url})
+        }
+        this.content = result?.data || null
+        if(this.content){
+            this.url = url
+        }
+        this.save();
+        return this.content
+    }
+
+    async loadDocx(url:string){
+        let data:any
+        const response = await fetch(url);
+
+        const arrayBuffer:any = await response.arrayBuffer();
+        
+        let text;
+        try {
+            text = await mammoth.extractRawText({arrayBuffer:arrayBuffer}); // 浏览器 直接传递 arrayBuffer
+        } catch (err) {
+            console.error(err);
+        }
+
+        this.hash = await arrayBufferToHASH(arrayBuffer)
+
+        // let html = mammoth.convertToHtml(buffer)
+        data = text?.value || "";
+        // 正则匹配所有 多个\n换行的字符 替换成一次换行
+        data = data.replaceAll(/\n+/g,"\n") // 剔除多余换行
+        return {data}
+    }
+    async splitter(options?:{
+        chunkSize:number,
+        chunkOverlap:number
+    }){
+        if(!this.content) return
+        // 默认:递归字符文本分割器
+        let splitter = new RecursiveCharacterTextSplitter({
+            chunkSize: options?.chunkSize || 500,
+            chunkOverlap: options?.chunkOverlap || 150,
+        });
+          
+        let docOutput = await splitter.splitDocuments([
+            new Document({ pageContent: this.content }),
+        ]);
+        console.log(docOutput)
+        this.docList = docOutput
+        return this.docList
+    }
+
+    /**
+     * 文本向量提取
+     * @see
+     * https://js.langchain.com/docs/integrations/text_embedding/tensorflow/
+     * @returns 
+     */
+    //  TensorFlow embedding vector(512) NOT NULL -- NOTE: 512 for Tensorflow
+    //  OpenAI embedding vector(1536) NOT NULL -- NOTE: 1536 for ChatGPT
+    async embedings(){
+        if(!this.docList?.length){return}
+        const embeddings = new TensorFlowEmbeddings();
+        let documentRes = await embeddings.embedDocuments(this.docList?.map(item=>item.pageContent));
+        console.log(documentRes);
+
+        // 向量持久化
+        documentRes.forEach(async (vector512:any,index)=>{
+            /**
+             * metadata
+             * pageContent
+             */
+            let document = this.docList[index]
+            this.docList[index].vector512 = vector512
+            let hash = await arrayBufferToHASH(stringToArrayBuffer(document?.pageContent))
+            let query = new CloudQuery("Document");
+            query.equalTo("hash",hash);
+            let docObj = await query.first()
+            if(!docObj?.id){
+                docObj = new CloudObject("Document");
+            }
+            docObj.set({
+                metadata:document?.metadata,
+                pageContent:document?.pageContent,
+                vector512:vector512,
+                hash:hash,
+                story:this.story?.toPointer(),
+            })
+            docObj.save();
+        })
+        return documentRes;
+    }
+    async destoryAllDocument(){
+        if(this.story?.id){
+            let query = new CloudQuery("Document");
+            query.equalTo("story",this.story?.id);
+            let docList = await query.find();
+            docList.forEach(doc=>{
+                doc.destroy();
+            })
+        }
+        
+    }
+}
+
+export async function fetchFileBuffer(url: string): Promise<Buffer> {
+    const response = await fetch(url);
+
+    if (!response.ok) {
+        throw new Error(`Failed to fetch file: ${response.status} ${response.statusText}`);
+    }
+
+    const arrayBuffer = await response.arrayBuffer();
+    return Buffer.from(arrayBuffer);
+}
+
+async function setBackend(){
+
+        let backend
+        let WebGPU = (navigator as any).gpu
+        if (WebGPU) {
+          // WebGPU is supported
+          // console.log(WebGPU)
+          backend = "webgpu"
+        } else {
+          // WebGPU is not supported
+        }
+        let glcanvas = document.createElement('canvas');
+        let WebGL = glcanvas.getContext('webgl') || glcanvas.getContext('experimental-webgl');
+        if (WebGL) {
+          // console.log(WebGL)
+          // WebGL is supported
+          if(!backend) backend = "webgl"
+        } else {
+          // WebGL is not supported
+        }
+
+        if (typeof WebAssembly === 'object' && typeof WebAssembly.instantiate === 'function') {
+          // WebAssembly is supported
+          // console.log(WebAssembly)
+          if(!backend) backend = "wasm"
+        } else {
+          // WebAssembly is not supported
+        }
+
+        backend&&await tf.setBackend(backend);
+        await tf.ready();
+        return
+  }
+
+  export async function arrayBufferToHASH(arrayBuffer:any) {
+    // 使用 SubtleCrypto API 计算哈希
+    const hashBuffer = await crypto.subtle.digest('SHA-256', arrayBuffer); // 使用 SHA-256 代替 MD5
+    const hashArray = Array.from(new Uint8Array(hashBuffer)); // 将缓冲区转换为字节数组
+    const hashHex = hashArray.map(b => ('00' + b.toString(16)).slice(-2)).join(''); // 转换为十六进制字符串
+    return hashHex;
+}
+export function stringToArrayBuffer(str:string) {
+    // 创建一个与字符串长度相同的Uint8Array
+    const encoder = new TextEncoder();
+    return encoder.encode(str).buffer;
+}
+export async function EmbedQuery(str:any):Promise<Array<number>>{
+    const embeddings = new TensorFlowEmbeddings();
+    let documentRes = await embeddings.embedQuery(str);
+    return documentRes
+}
+
+/** 向量余弦相似度计算 */
+export function RetriveAllDocument(vector1: Array<number>, docList: Array<any>): Array<any> {
+    docList.forEach(doc => {
+        const vector512 = doc.vector512;
+        doc.similarity = cosineSimilarity(vector1, vector512); // 计算余弦相似度并存储
+    });
+
+    // 按照相似度排序,降序排列
+    docList.sort((a, b) => b.similarity - a.similarity);
+
+    return docList; // 返回排序后的docList
+}
+function dotProduct(vectorA: number[], vectorB: number[]): number {
+    return vectorA.reduce((sum, value, index) => sum + value * vectorB[index], 0);
+}
+
+function magnitude(vector: number[]): number {
+    return Math.sqrt(vector.reduce((sum, value) => sum + value * value, 0));
+}
+
+function cosineSimilarity(vectorA: number[], vectorB: number[]): number {
+    const dotProd = dotProduct(vectorA, vectorB);
+    const magnitudeA = magnitude(vectorA);
+    const magnitudeB = magnitude(vectorB);
+
+    if (magnitudeA === 0 || magnitudeB === 0) {
+        throw new Error("One or both vectors are zero vectors, cannot compute cosine similarity.");
+    }
+
+    return dotProd / (magnitudeA * magnitudeB);
+}