| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172 |
- {
- "name": "review-sentiment-analysis",
- "displayName": "评论情感分析",
- "description": "对评论语料库进行情感分析,分类正面/负面/中性,提取情感关键短语并按ASIN/品牌聚合",
- "category": "review-analysis",
- "version": "1.3.0",
- "type": "analysis",
- "parameters": {
- "reviewCorpus": {
- "type": "object",
- "required": true,
- "description": "review-batch-collection 输出的评论语料"
- },
- "language": {
- "type": "string",
- "required": false,
- "default": "en",
- "description": "评论语言"
- },
- "sentimentThreshold": {
- "type": "object",
- "required": false,
- "default": { "positiveMin": 0.6, "negativeMax": 0.4 },
- "properties": {
- "positiveMin": { "type": "number" },
- "negativeMax": { "type": "number" }
- },
- "description": "情感分类阈值"
- }
- },
- "pipeline": [
- {
- "step": 1,
- "name": "星级情感分类",
- "type": "compute",
- "logic": "sentimentClassify(reviewCorpus.reviews, { language, sentimentThreshold })",
- "algorithm": {
- "primaryRule": "基于星级快速分类: star>=4→positive, star<=2→negative, star==3→neutral",
- "secondaryRule": "可选NLP情感分析微调,处理4★但内容消极/2★但内容积极的边缘情况",
- "implementation": "遍历reviews数组,根据star字段分类,边缘情况可选用NLP微调"
- },
- "output": "classifiedReviews"
- },
- {
- "step": 2,
- "name": "按ASIN聚合情感指标",
- "type": "compute",
- "logic": "aggregateByAsin(classifiedReviews)",
- "algorithm": {
- "perAsin": "{ positivePct, neutralPct, negativePct, total }",
- "percentCalc": "Math.round((count/total)*1000)/10 → 保留一位小数",
- "implementation": "groupBy(asin) → 每组统计positive/neutral/negative数量和百分比"
- },
- "output": "asinAggregation"
- },
- {
- "step": 3,
- "name": "提取全局情感热词",
- "type": "compute",
- "logic": "extractGlobalHotPhrases(classifiedReviews)",
- "algorithm": {
- "tokenize": "分词 + 停用词过滤(the/a/an/is/it/i/my/to/and/of/for/in...)",
- "freqCount": "统计词频,按频率降序排列",
- "phraseExtract": "提取高频短语(2-3词组合),如'scent throw', 'oil refill'",
- "splitBysentiment": "分别统计正面/负面评论中的高频词",
- "implementation": "split+toLowerCase+停用词过滤 → Map<word,count> → 按count降序取topN"
- },
- "output": "globalHotPhrases"
- },
- {
- "step": 4,
- "name": "情感短语识别+情感词云",
- "type": "compute",
- "logic": "extractSentimentKeywordCloud(classifiedReviews)",
- "algorithm": {
- "sentimentPhraseRules": {
- "description": "Regex模式匹配多词情感短语,短语优先于单词",
- "negativePhrases": [
- "too small", "too big", "too short", "too long", "too tight", "too thin",
- "poor quality", "bad quality", "cheaply made",
- "fell apart", "doesn't fit", "not worth", "waste of money",
- "not as described/pictured/shown/advertised",
- "wrong size", "wrong color", "bad smell", "chemical smell",
- "sent back", "returned it", "ripped/torn", "stained"
- ],
- "positivePhrases": [
- "well made", "high quality", "great quality", "good quality",
- "perfect fit", "fits perfectly/great/well",
- "highly recommend", "fast shipping",
- "great value/price/deal", "worth the money/penny",
- "loved it", "very comfortable/soft/warm",
- "true to size", "exceeded expectations",
- "beautiful/gorgeous", "super soft", "pleasantly surprised"
- ]
- },
- "sentimentWordDictionary": {
- "positive": ["comfortable", "durable", "sturdy", "soft", "affordable", "flattering", "recommend", "amazing", "awesome", "excellent", "fantastic", "stylish", "elegant", "versatile", "lightweight", "breathable"],
- "negative": ["disappointed", "disappointing", "flimsy", "cheap", "broke", "broken", "defective", "damaged", "uncomfortable", "itchy", "scratchy", "rough", "terrible", "horrible", "awful", "worst", "misleading", "refund", "shrunk", "faded", "wrinkled", "unraveled", "fraying", "pilling", "transparent", "see-through", "smelly", "stiff"]
- },
- "mergeAndOutput": {
- "phraseMinCount": 2,
- "wordMinCount": 3,
- "perSentiment": "正面和负面各取top10",
- "weight": "count/maxCount * 100"
- }
- },
- "output": "sentimentKeywordCloud"
- }
- ],
- "response": {
- "type": "object",
- "properties": {
- "reviewSentiments": {
- "type": "array",
- "description": "带情感标签的评论列表",
- "items": {
- "type": "object",
- "properties": {
- "asin": { "type": "string" },
- "star": { "type": "integer" },
- "sentiment": { "type": "string", "enum": ["positive", "negative", "neutral"] },
- "title": { "type": "string" },
- "content": { "type": "string" }
- }
- }
- },
- "asinAggregation": {
- "type": "object",
- "description": "按ASIN聚合的情感指标",
- "additionalProperties": {
- "type": "object",
- "properties": {
- "total": { "type": "integer" },
- "positivePct": { "type": "number" },
- "neutralPct": { "type": "number" },
- "negativePct": { "type": "number" }
- }
- }
- },
- "globalHotPhrases": {
- "type": "object",
- "description": "全局情感热词",
- "properties": {
- "positive": { "type": "array", "items": { "type": "object", "properties": { "phrase": { "type": "string" }, "frequency": { "type": "integer" } } } },
- "negative": { "type": "array", "items": { "type": "object", "properties": { "phrase": { "type": "string" }, "frequency": { "type": "integer" } } } }
- }
- },
- "sentimentKeywordCloud": {
- "type": "array",
- "description": "带情感标签的关键词云(短语+单词)",
- "items": {
- "type": "object",
- "properties": {
- "text": { "type": "string" },
- "weight": { "type": "integer", "description": "0-100权重" },
- "sentiment": { "type": "string", "enum": ["positive", "negative"] }
- }
- }
- }
- }
- },
- "algorithmNotes": {
- "starRatings": "Sorftime返回的星级字段(fiveStartRatings等)是百分比值,不是计数,加权评分=sum(star*pct)/sum(pct)",
- "stopWords": "English停用词表: the/a/an/is/it/i/my/to/and/of/for/in/this/that/was/with/but/have/not/are"
- },
- "timeout": 300000,
- "retry": {
- "maxAttempts": 2,
- "delay": 1000,
- "backoffMultiplier": 2
- }
- }
|