| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 | 
							- const cheerio = require('cheerio');
 
- const { GushiHrefList } = require("./href")
 
- // 引用Parse JS SDK
 
- const Parse = require("parse/node");
 
- Parse.initialize("dev"); // 设置applicationId
 
- Parse.serverURL = "http://web2023.fmode.cn:9999/parse"; // 设置serverURL
 
- // 实现分页抓取数据,并保存
 
- main()
 
- async function main(){
 
-     // let list = [GushiHrefList[0]]
 
-     let list = GushiHrefList
 
-     list.forEach(async href=>{
 
-         let gushi = await getGushiDataFromHref(href);
 
-         if(!gushi?.title || !gushi?.author) return;
 
-         // 查重
 
-         let exists = await checkExists(gushi);
 
-         if(exists?.id) return
 
-         // 新增
 
-         let Shige = Parse.Object.extend("Shige");
 
-         let sg = new Shige();
 
-         sg.set(gushi);
 
-         sg.save();
 
-     })
 
- }
 
- async function checkExists(gushi){
 
-     let query = new Parse.Query("Shige");
 
-     query.equalTo("title",gushi?.title);
 
-     query.equalTo("author",gushi?.author);
 
-     query.equalTo("dynasty",gushi?.dynasty);
 
-     await query.first();
 
-     return await query.first();
 
- }
 
- async function getGushiDataFromHref(href){
 
-     let gushi = {}
 
-     let response
 
-     try{
 
-         response = await fetch(href);
 
-     }catch(err){
 
-         console.log("失败:",href)
 
-         return {}
 
-     }
 
-     let html = await response.text()
 
-     const $ = cheerio.load(html);
 
-     gushi.title = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(1) > h1').text();
 
-     gushi.author = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(1) > div.author-simple-info > span:nth-child(3) > a').text();
 
-     gushi.dynasty = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(1) > div.author-simple-info > span:nth-child(1) > a').text();
 
-     gushi.content = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(1) > div.shici-content.check-more').html();
 
-     gushi.intro = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(5) > div:nth-child(2)').html();
 
-     gushi.yiwen = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(5) > div:nth-child(4)').html();
 
-     gushi.note = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(5) > div:nth-child(6)').html()
 
-     gushi.review = $('body > div.container.basic-page > div > div.more-container.col-md-8 > div:nth-child(5) > div:nth-child(8)').html()
 
-     console.log(href,gushi)
 
-     return gushi
 
- }
 
 
  |