aboutsummaryrefslogtreecommitdiff
path: root/synonymiser/synonymiser.py
diff options
context:
space:
mode:
Diffstat (limited to 'synonymiser/synonymiser.py')
-rw-r--r--synonymiser/synonymiser.py127
1 files changed, 127 insertions, 0 deletions
diff --git a/synonymiser/synonymiser.py b/synonymiser/synonymiser.py
new file mode 100644
index 0000000..14951a7
--- /dev/null
+++ b/synonymiser/synonymiser.py
@@ -0,0 +1,127 @@
+"""
+ synonymiser.py
+ copyright 2021 Richard Knight
+"""
+from db import Word, Synonym
+from peewee import fn
+import sys
+
+
+# vowels for the changing of a/an
+vowels = ["a", "e", "i", "o", "u"];
+
+
+# recapitalise new_word as to that of word
+def _recapitalise(word, new_word):
+ if word[0].upper() == word[0]:
+ if len(word) > 1 and word[1].upper() == word[1]:
+ return new_word.upper()
+ else:
+ return new_word.capitalize()
+ else:
+ return new_word
+
+
+# sorting types
+class SORTING:
+ RANDOM=0
+ ALPHA=2
+ NONE=3
+
+
+# get synonyms
+def get_synonyms(word, limit=1, sorting=SORTING.RANDOM, offensives=False):
+ base_word = Word.select().where(fn.LOWER(Word.word) == word.lower())
+ if not base_word.exists():
+ return [word]
+ query = Synonym.select(Synonym, Word).join(
+ Word, on=(Synonym.synonym==Word.id)
+ ).where(
+ (Synonym.base == base_word.get())
+ & ((Word.offensive == False) if offensives else True)
+ )
+
+ if sorting == SORTING.RANDOM:
+ query = query.order_by(fn.RANDOM())
+ elif sorting == SORTING.ALPHA:
+ query = query.order_by(Word.word)
+
+ query = query.limit(limit)
+ if not query.exists():
+ return [word]
+ return [_recapitalise(word, q.synonym.word) for q in query]
+
+
+# rephrase a/an prepends
+def _rephrase(line):
+ new = list()
+ for i, v in enumerate(line):
+ nv = v
+ if i+1 < len(line):
+ if v == "a" and line[i+1][0].lower() in vowels:
+ nv = "an"
+ if v == "an" and line[i+1][0].lower() not in vowels:
+ nv = "a"
+ new.append(nv)
+ return new
+
+
+# synonymise a line randomly
+def synonymise(line, offensives=False):
+ output = list()
+ words = line.split(" ")
+ for word in words:
+ if len(word) < 4:
+ output.append(word)
+ else:
+ output.append(get_synonyms(word, offensives=offensives)[0])
+ return " ".join(_rephrase(output))
+
+
+# command line run
+def _cmdline():
+ from optparse import OptionParser
+ usage = "usage: %prog [options] word"
+ parser = OptionParser(usage=usage)
+ parser.add_option(
+ "-l", "--limit",
+ dest="limit",
+ help="maximum number of synonyms to retreive [default: %default]",
+ default=999, type="int", metavar="LIMIT"
+ )
+ parser.add_option(
+ "-o", "--offensives",
+ dest="offensives",
+ help="show words marked as offensive [default: %default]",
+ default=False, action="store_true"
+ )
+ parser.add_option(
+ "-s", "--sorting",
+ dest="sorting",
+ help="sort order of retreived synonyms (random, alpha, none) [default: %default]",
+ default="random", type="string", metavar="SORTING"
+ )
+
+ (options, args) = parser.parse_args()
+ using_stdin = not sys.stdin.isatty()
+ if len(args) != 1 and not using_stdin:
+ parser.error("word(s) to synonymise required")
+
+ sorting = SORTING.NONE
+ if options.sorting == "random":
+ sorting = SORTING.RANDOM
+ elif options.sorting == "alpha":
+ sorting = SORTING.ALPHA
+
+ if using_stdin:
+ print synonymise(sys.stdin.read(), options.offensives)
+ elif len(args[0].split(" ")) > 1:
+ print synonymise(args[0], options.offensives)
+ else:
+ synonyms = get_synonyms(args[0], options.limit, sorting, options.offensives)
+ for synonym in synonyms:
+ print synonym
+
+if __name__ == "__main__":
+ _cmdline()
+