| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980 | 
const {    SupportedTextSplitterLanguages,    RecursiveCharacterTextSplitter,    TokenTextSplitter  } = require("langchain/text_splitter");var mammoth = require("mammoth");const fs = require('fs');const pdf = require('pdf-parse');async function main(){     // 加载器提取纯文本    let data = await pdfLoader("../data/pgvector.pdf")    let text = data.text;    console.log(text)    const splitter = new TokenTextSplitter({        encodingName: "gpt2",        chunkSize: 500,        chunkOverlap: 0,    });    const output = await splitter.createDocuments([text]);    console.log(output)      // 文本提取文本块    let html = await docsLoader("../data/pgvector.docx")    const splitter = RecursiveCharacterTextSplitter.fromLanguage("html", {        chunkSize: 4096,        chunkOverlap: 20,      });    const output = await splitter.createDocuments([html]);        console.log(output);    console.log(JSON.stringify(output[0]));        }main()async function docsLoader(path){    return new Promise(resolve=>{        mammoth.convertToHtml({path:  path})        .then(function(result){            var html = result.value; // The generated HTML            var messages = result.messages; // Any messages, such as warnings during conversion            console.log(html)            console.log(messages)            resolve(html)        })        .catch(function(error) {            console.error(error);        });    })}async function pdfLoader(path){        let dataBuffer = fs.readFileSync(path);    return new Promise(resolve=>{        pdf(dataBuffer).then(function(data) {                    // number of pages            console.log(data.numpages);            // number of rendered pages            console.log(data.numrender);            // PDF info            console.log(data.info);            // PDF metadata            console.log(data.metadata);             // PDF.js version            // check https://mozilla.github.io/pdf.js/getting_started/            console.log(data.version);            // PDF text            console.log(data.text);             resolve(data)        });    })}
 |