1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
"""
synonymiser.py
copyright 2021 Richard Knight
"""
from db import Word, Synonym
from peewee import fn
import sys
# vowels for the changing of a/an
vowels = ["a", "e", "i", "o", "u"];
# recapitalise new_word as to that of word
def _recapitalise(word, new_word):
if word[0].upper() == word[0]:
if len(word) > 1 and word[1].upper() == word[1]:
return new_word.upper()
else:
return new_word.capitalize()
else:
return new_word
# sorting types
class SORTING:
RANDOM=0
ALPHA=2
NONE=3
# get synonyms
def get_synonyms(word, limit=1, sorting=SORTING.RANDOM, offensives=False):
base_word = Word.select().where(fn.LOWER(Word.word) == word.lower())
if not base_word.exists():
return [word]
query = Synonym.select(Synonym, Word).join(
Word, on=(Synonym.synonym==Word.id)
).where(
(Synonym.base == base_word.get())
& ((Word.offensive == False) if offensives else True)
)
if sorting == SORTING.RANDOM:
query = query.order_by(fn.RANDOM())
elif sorting == SORTING.ALPHA:
query = query.order_by(Word.word)
query = query.limit(limit)
if not query.exists():
return [word]
return [_recapitalise(word, q.synonym.word) for q in query]
# rephrase a/an prepends
def _rephrase(line):
new = list()
for i, v in enumerate(line):
nv = v
if i+1 < len(line):
if v == "a" and line[i+1][0].lower() in vowels:
nv = "an"
if v == "an" and line[i+1][0].lower() not in vowels:
nv = "a"
new.append(nv)
return new
# synonymise a line randomly
def synonymise(line, offensives=False):
output = list()
words = line.split(" ")
for word in words:
if len(word) < 4:
output.append(word)
else:
output.append(get_synonyms(word, offensives=offensives)[0])
return " ".join(_rephrase(output))
# command line run
def _cmdline():
from optparse import OptionParser
usage = "usage: %prog [options] word"
parser = OptionParser(usage=usage)
parser.add_option(
"-l", "--limit",
dest="limit",
help="maximum number of synonyms to retreive [default: %default]",
default=999, type="int", metavar="LIMIT"
)
parser.add_option(
"-o", "--offensives",
dest="offensives",
help="show words marked as offensive [default: %default]",
default=False, action="store_true"
)
parser.add_option(
"-s", "--sorting",
dest="sorting",
help="sort order of retreived synonyms (random, alpha, none) [default: %default]",
default="random", type="string", metavar="SORTING"
)
(options, args) = parser.parse_args()
using_stdin = not sys.stdin.isatty()
if len(args) != 1 and not using_stdin:
parser.error("word(s) to synonymise required")
sorting = SORTING.NONE
if options.sorting == "random":
sorting = SORTING.RANDOM
elif options.sorting == "alpha":
sorting = SORTING.ALPHA
if using_stdin:
print synonymise(sys.stdin.read(), options.offensives)
elif len(args[0].split(" ")) > 1:
print synonymise(args[0], options.offensives)
else:
synonyms = get_synonyms(args[0], options.limit, sorting, options.offensives)
for synonym in synonyms:
print synonym
if __name__ == "__main__":
_cmdline()
|