-
Notifications
You must be signed in to change notification settings - Fork 901
/
demo.py
executable file
·158 lines (124 loc) · 5.86 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#=========================================================================
#
# Copyright (c) 2017 <> All Rights Reserved
#
#
# File: /Users/hain/ai/Synonyms/demo.py
# Author: Hai Liang Wang
# Date: 2017-09-28:22:23:34
#
#=========================================================================
"""
"""
from __future__ import print_function
from __future__ import division
__copyright__ = "Copyright (c) (2017-2022) Chatopera Inc. All Rights Reserved"
__author__ = "Hai Liang Wang"
__date__ = "2017-09-28:22:23:34"
import os
import sys
curdir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, curdir)
if sys.version_info[0] < 3:
reload(sys)
sys.setdefaultencoding("utf-8")
# raise "Must be using Python 3"
#
import synonyms # https://github.com/chatopera/Synonyms
import numpy
import unittest
compare_ = lambda x,y,z: "%s vs %s: %f" % (x, y, synonyms.compare(x, y, seg=z)) + "\n" +"*"* 30 + "\n"
# run testcase: python /Users/hain/ai/Synonyms/demo.py Test.testExample
class Test(unittest.TestCase):
'''
'''
def setUp(self):
pass
def tearDown(self):
pass
def test_wordseg(self):
print("test_wordseg")
print(synonyms.seg("中文近义词工具包"))
def test_word_vector(self):
print("test_word_vector")
word = "三国"
print(word, "向量", synonyms.v(word))
def test_diff(self):
print("test_diff")
result = []
# 30个 评测词对中的左侧词
left = ['轿车', '宝石', '旅游', '男孩子', '海岸', '庇护所', '魔术师', '中午', '火炉', '食物', '鸟', '鸟', '工具', '兄弟', '起重机', '小伙子',
'旅行', '和尚', '墓地', '食物', '海岸', '森林', '岸边', '和尚', '海岸', '小伙子', '琴弦', '玻璃', '中午', '公鸡']
# 30个 评测词对中的右侧词
right = ['汽车', '宝物', '游历', '小伙子', '海滨', '精神病院', '巫师', '正午', '炉灶', '水果', '公鸡', '鹤', '器械', '和尚', '器械', '兄弟',
'轿车', '圣贤', '林地', '公鸡', '丘陵', '墓地', '林地', '奴隶', '森林', '巫师', '微笑', '魔术师', '绳子', '航行']
# 人工评定的相似度列表。
human = [0.98, 0.96, 0.96, 0.94, 0.925, 0.9025, 0.875, 0.855, 0.7775, 0.77, 0.7625, 0.7425, 0.7375, 0.705, 0.42, 0.415,
0.29, 0.275, 0.2375,
0.2225, 0.2175, 0.21, 0.1575, 0.1375, 0.105, 0.105, 0.0325, 0.0275, 0.02, 0.02]
result.append("# synonyms 分数评测 [(v%s)](https://pypi.python.org/pypi/synonyms/%s)" % (synonyms.__version__, synonyms.__version__))
result.append("| %s | %s | %s | %s |" % ("词1", "词2", "synonyms", "人工评定"))
result.append("| --- | --- | --- | --- |")
for x,y,z in zip(left, right, human):
result.append("| %s | %s | %s | %s |" % (x, y, synonyms.compare(x, y), z))
for x in result: print(x)
with open(os.path.join(curdir, "VALUATION.md"), "w") as fout:
for x in result: fout.write(x + "\n")
def test_similarity(self):
'''
Generate sentence similarity
'''
sen1 = "旗帜引领方向"
sen2 = "道路决定命运"
r = synonyms.compare(sen1, sen2, seg=True)
print("旗帜引领方向 vs 道路决定命运:", r)
# assert r == 0.0, "the similarity should be zero"
sen1 = "旗帜引领方向"
sen2 = "旗帜指引道路"
r = synonyms.compare(sen1, sen2, seg=True)
print("旗帜引领方向 vs 旗帜指引道路:", r)
# assert r > 0, "the similarity should be bigger then zero"
sen1 = "发生历史性变革"
sen2 = "发生历史性变革"
r = synonyms.compare(sen1, sen2, seg=True)
print("发生历史性变革 vs 发生历史性变革:", r)
# assert r > 0, "the similarity should be bigger then zero"
sen1 = "骨折"
sen2 = "巴赫"
r = synonyms.compare(sen1, sen2, seg=True)
print("%s vs %s" % (sen1, sen2), r)
sen1 = "你们好呀"
sen2 = "大家好"
r = synonyms.compare(sen1, sen2, seg=False)
print("%s vs %s" % (sen1, sen2), r)
def test_swap_sent(self):
print("test_swap_sent")
s1 = synonyms.compare("教学", "老师")
s2 = synonyms.compare("老师", "教学")
print('"教学", "老师": %s ' % s1)
print('"老师", "教学": %s ' % s2)
assert s1 == s2, "Scores should be the same after swap sents"
def test_nearby(self):
synonyms.display("奥运") # synonyms.display calls synonyms.nearby
synonyms.display("北新桥") # synonyms.display calls synonyms.nearby
def test_badcase_1(self):
synonyms.display("人脸") # synonyms.display calls synonyms.nearby
def test_basecase_2(self):
print("test_basecase_2")
sen1 = "今天天气"
sen2 = "今天天气怎么样"
r = synonyms.compare(sen1, sen2, seg=True)
def test_analyse_extract_tags(self):
'''
使用 Tag 方式获得关键词
https://github.com/fxsjy/jieba/tree/v0.39
'''
sentence = "华为芯片被断供,源于美国关于华为的修订版禁令生效——9月15日以来,台积电、高通、三星等华为的重要合作伙伴,只要没有美国的相关许可证,都无法供应芯片给华为,而中芯国际等国产芯片企业,也因采用美国技术,而无法供货给华为。目前华为部分型号的手机产品出现货少的现象,若该形势持续下去,华为手机业务将遭受重创。"
keywords = synonyms.keywords(sentence, topK=5, withWeight=False, allowPOS=())
print("[test_analyse_extract_tags] keywords %s" % keywords)
def test():
unittest.main()
if __name__ == '__main__':
test()