api-config.json 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. {
  2. "name": "review-sentiment-analysis",
  3. "displayName": "评论情感分析",
  4. "description": "对评论语料库进行情感分析,分类正面/负面/中性,提取情感关键短语并按ASIN/品牌聚合",
  5. "category": "review-analysis",
  6. "version": "1.3.0",
  7. "type": "analysis",
  8. "parameters": {
  9. "reviewCorpus": {
  10. "type": "object",
  11. "required": true,
  12. "description": "review-batch-collection 输出的评论语料"
  13. },
  14. "language": {
  15. "type": "string",
  16. "required": false,
  17. "default": "en",
  18. "description": "评论语言"
  19. },
  20. "sentimentThreshold": {
  21. "type": "object",
  22. "required": false,
  23. "default": { "positiveMin": 0.6, "negativeMax": 0.4 },
  24. "properties": {
  25. "positiveMin": { "type": "number" },
  26. "negativeMax": { "type": "number" }
  27. },
  28. "description": "情感分类阈值"
  29. }
  30. },
  31. "pipeline": [
  32. {
  33. "step": 1,
  34. "name": "星级情感分类",
  35. "type": "compute",
  36. "logic": "sentimentClassify(reviewCorpus.reviews, { language, sentimentThreshold })",
  37. "algorithm": {
  38. "primaryRule": "基于星级快速分类: star>=4→positive, star<=2→negative, star==3→neutral",
  39. "secondaryRule": "可选NLP情感分析微调,处理4★但内容消极/2★但内容积极的边缘情况",
  40. "implementation": "遍历reviews数组,根据star字段分类,边缘情况可选用NLP微调"
  41. },
  42. "output": "classifiedReviews"
  43. },
  44. {
  45. "step": 2,
  46. "name": "按ASIN聚合情感指标",
  47. "type": "compute",
  48. "logic": "aggregateByAsin(classifiedReviews)",
  49. "algorithm": {
  50. "perAsin": "{ positivePct, neutralPct, negativePct, total }",
  51. "percentCalc": "Math.round((count/total)*1000)/10 → 保留一位小数",
  52. "implementation": "groupBy(asin) → 每组统计positive/neutral/negative数量和百分比"
  53. },
  54. "output": "asinAggregation"
  55. },
  56. {
  57. "step": 3,
  58. "name": "提取全局情感热词",
  59. "type": "compute",
  60. "logic": "extractGlobalHotPhrases(classifiedReviews)",
  61. "algorithm": {
  62. "tokenize": "分词 + 停用词过滤(the/a/an/is/it/i/my/to/and/of/for/in...)",
  63. "freqCount": "统计词频,按频率降序排列",
  64. "phraseExtract": "提取高频短语(2-3词组合),如'scent throw', 'oil refill'",
  65. "splitBysentiment": "分别统计正面/负面评论中的高频词",
  66. "implementation": "split+toLowerCase+停用词过滤 → Map<word,count> → 按count降序取topN"
  67. },
  68. "output": "globalHotPhrases"
  69. },
  70. {
  71. "step": 4,
  72. "name": "情感短语识别+情感词云",
  73. "type": "compute",
  74. "logic": "extractSentimentKeywordCloud(classifiedReviews)",
  75. "algorithm": {
  76. "sentimentPhraseRules": {
  77. "description": "Regex模式匹配多词情感短语,短语优先于单词",
  78. "negativePhrases": [
  79. "too small", "too big", "too short", "too long", "too tight", "too thin",
  80. "poor quality", "bad quality", "cheaply made",
  81. "fell apart", "doesn't fit", "not worth", "waste of money",
  82. "not as described/pictured/shown/advertised",
  83. "wrong size", "wrong color", "bad smell", "chemical smell",
  84. "sent back", "returned it", "ripped/torn", "stained"
  85. ],
  86. "positivePhrases": [
  87. "well made", "high quality", "great quality", "good quality",
  88. "perfect fit", "fits perfectly/great/well",
  89. "highly recommend", "fast shipping",
  90. "great value/price/deal", "worth the money/penny",
  91. "loved it", "very comfortable/soft/warm",
  92. "true to size", "exceeded expectations",
  93. "beautiful/gorgeous", "super soft", "pleasantly surprised"
  94. ]
  95. },
  96. "sentimentWordDictionary": {
  97. "positive": ["comfortable", "durable", "sturdy", "soft", "affordable", "flattering", "recommend", "amazing", "awesome", "excellent", "fantastic", "stylish", "elegant", "versatile", "lightweight", "breathable"],
  98. "negative": ["disappointed", "disappointing", "flimsy", "cheap", "broke", "broken", "defective", "damaged", "uncomfortable", "itchy", "scratchy", "rough", "terrible", "horrible", "awful", "worst", "misleading", "refund", "shrunk", "faded", "wrinkled", "unraveled", "fraying", "pilling", "transparent", "see-through", "smelly", "stiff"]
  99. },
  100. "mergeAndOutput": {
  101. "phraseMinCount": 2,
  102. "wordMinCount": 3,
  103. "perSentiment": "正面和负面各取top10",
  104. "weight": "count/maxCount * 100"
  105. }
  106. },
  107. "output": "sentimentKeywordCloud"
  108. }
  109. ],
  110. "response": {
  111. "type": "object",
  112. "properties": {
  113. "reviewSentiments": {
  114. "type": "array",
  115. "description": "带情感标签的评论列表",
  116. "items": {
  117. "type": "object",
  118. "properties": {
  119. "asin": { "type": "string" },
  120. "star": { "type": "integer" },
  121. "sentiment": { "type": "string", "enum": ["positive", "negative", "neutral"] },
  122. "title": { "type": "string" },
  123. "content": { "type": "string" }
  124. }
  125. }
  126. },
  127. "asinAggregation": {
  128. "type": "object",
  129. "description": "按ASIN聚合的情感指标",
  130. "additionalProperties": {
  131. "type": "object",
  132. "properties": {
  133. "total": { "type": "integer" },
  134. "positivePct": { "type": "number" },
  135. "neutralPct": { "type": "number" },
  136. "negativePct": { "type": "number" }
  137. }
  138. }
  139. },
  140. "globalHotPhrases": {
  141. "type": "object",
  142. "description": "全局情感热词",
  143. "properties": {
  144. "positive": { "type": "array", "items": { "type": "object", "properties": { "phrase": { "type": "string" }, "frequency": { "type": "integer" } } } },
  145. "negative": { "type": "array", "items": { "type": "object", "properties": { "phrase": { "type": "string" }, "frequency": { "type": "integer" } } } }
  146. }
  147. },
  148. "sentimentKeywordCloud": {
  149. "type": "array",
  150. "description": "带情感标签的关键词云(短语+单词)",
  151. "items": {
  152. "type": "object",
  153. "properties": {
  154. "text": { "type": "string" },
  155. "weight": { "type": "integer", "description": "0-100权重" },
  156. "sentiment": { "type": "string", "enum": ["positive", "negative"] }
  157. }
  158. }
  159. }
  160. }
  161. },
  162. "algorithmNotes": {
  163. "starRatings": "Sorftime返回的星级字段(fiveStartRatings等)是百分比值,不是计数,加权评分=sum(star*pct)/sum(pct)",
  164. "stopWords": "English停用词表: the/a/an/is/it/i/my/to/and/of/for/in/this/that/was/with/but/have/not/are"
  165. },
  166. "timeout": 300000,
  167. "retry": {
  168. "maxAttempts": 2,
  169. "delay": 1000,
  170. "backoffMultiplier": 2
  171. }
  172. }