| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 | const cheerio = require('cheerio');const { GushiHrefList } = require("./href")// 引用Parse JS SDKconst Parse = require("parse/node");Parse.initialize("dev"); // 设置applicationIdParse.serverURL = "http://web2023.fmode.cn:9999/parse"; // 设置serverURL// 实现分页抓取数据,并保存main()async function main(){    // let list = [GushiHrefList[0]]    let list = GushiHrefList    list.forEach(async href=>{        let gushi = await getGushiDataFromHref(href);        if(!gushi?.title || !gushi?.author) return;        // 查重        let exists = await checkExists(gushi);        if(exists?.id) return        // 新增        let Shige = Parse.Object.extend("Shige");        let sg = new Shige();        sg.set(gushi);        sg.save();    })}async function checkExists(gushi){    let query = new Parse.Query("Shige");    query.equalTo("title",gushi?.title);    query.equalTo("author",gushi?.author);    query.equalTo("dynasty",gushi?.dynasty);    await query.first();    return await query.first();}async function getGushiDataFromHref(href){    let gushi = {}    let response    try{        response = await fetch(href);    }catch(err){        console.log("失败:",href)        return {}    }    let html = await response.text()    const $ = cheerio.load(html);    gushi.title = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(1) > h1').text();    gushi.author = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(1) > div.author-simple-info > span:nth-child(3) > a').text();    gushi.dynasty = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(1) > div.author-simple-info > span:nth-child(1) > a').text();    gushi.content = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(1) > div.shici-content.check-more').html();    gushi.intro = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(5) > div:nth-child(2)').html();    gushi.yiwen = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(5) > div:nth-child(4)').html();    gushi.note = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(5) > div:nth-child(6)').html()    gushi.review = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(5) > div:nth-child(8)').html()    console.log(href,gushi)    return gushi}
 |