| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 | /** * npm i gpt-tokenizer -S */const {    encode,    encodeChat,    decode,    isWithinTokenLimit,    encodeGenerator,    decodeGenerator,    decodeAsyncGenerator,  } = require('gpt-tokenizer')const {    SupportedTextSplitterLanguages,    RecursiveCharacterTextSplitter,    TokenTextSplitter  } = require("langchain/text_splitter");// const { OpenAIEmbeddings } = require("langchain/embeddings/openai");var mammoth = require("mammoth");const fs = require('fs');const pdf = require('pdf-parse');async function main(){      // 文本提取文本块    let html = await docsLoader("../data/pgvector.docx")    const splitter = RecursiveCharacterTextSplitter.fromLanguage("html", {        chunkSize: 4096,        chunkOverlap: 20,      });    const output = await splitter.createDocuments([html]);    // console.log(output);    let content = output[0].pageContent;    console.log(content);    // Encode text into tokens    const tokens = encode(content)    console.log(tokens)    // const embeddings = new OpenAIEmbeddings({});    // let vector = embeddings.embed_query(content)}main()async function docsLoader(path){    return new Promise(resolve=>{        mammoth.convertToHtml({path:  path})        .then(function(result){            var html = result.value; // The generated HTML            var messages = result.messages; // Any messages, such as warnings during conversion            console.log(html)            console.log(messages)            resolve(html)        })        .catch(function(error) {            console.error(error);        });    })}async function pdfLoader(path){        let dataBuffer = fs.readFileSync(path);    return new Promise(resolve=>{        pdf(dataBuffer).then(function(data) {                    // number of pages            console.log(data.numpages);            // number of rendered pages            console.log(data.numrender);            // PDF info            console.log(data.info);            // PDF metadata            console.log(data.metadata);             // PDF.js version            // check https://mozilla.github.io/pdf.js/getting_started/            console.log(data.version);            // PDF text            console.log(data.text);             resolve(data)        });    })}
 |