diff options
Diffstat (limited to 'synonymiser/synonymiser.py')
-rw-r--r-- | synonymiser/synonymiser.py | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/synonymiser/synonymiser.py b/synonymiser/synonymiser.py new file mode 100644 index 0000000..14951a7 --- /dev/null +++ b/synonymiser/synonymiser.py @@ -0,0 +1,127 @@ +""" + synonymiser.py + copyright 2021 Richard Knight +""" +from db import Word, Synonym +from peewee import fn +import sys + + +# vowels for the changing of a/an +vowels = ["a", "e", "i", "o", "u"]; + + +# recapitalise new_word as to that of word +def _recapitalise(word, new_word): + if word[0].upper() == word[0]: + if len(word) > 1 and word[1].upper() == word[1]: + return new_word.upper() + else: + return new_word.capitalize() + else: + return new_word + + +# sorting types +class SORTING: + RANDOM=0 + ALPHA=2 + NONE=3 + + +# get synonyms +def get_synonyms(word, limit=1, sorting=SORTING.RANDOM, offensives=False): + base_word = Word.select().where(fn.LOWER(Word.word) == word.lower()) + if not base_word.exists(): + return [word] + query = Synonym.select(Synonym, Word).join( + Word, on=(Synonym.synonym==Word.id) + ).where( + (Synonym.base == base_word.get()) + & ((Word.offensive == False) if offensives else True) + ) + + if sorting == SORTING.RANDOM: + query = query.order_by(fn.RANDOM()) + elif sorting == SORTING.ALPHA: + query = query.order_by(Word.word) + + query = query.limit(limit) + if not query.exists(): + return [word] + return [_recapitalise(word, q.synonym.word) for q in query] + + +# rephrase a/an prepends +def _rephrase(line): + new = list() + for i, v in enumerate(line): + nv = v + if i+1 < len(line): + if v == "a" and line[i+1][0].lower() in vowels: + nv = "an" + if v == "an" and line[i+1][0].lower() not in vowels: + nv = "a" + new.append(nv) + return new + + +# synonymise a line randomly +def synonymise(line, offensives=False): + output = list() + words = line.split(" ") + for word in words: + if len(word) < 4: + output.append(word) + else: + output.append(get_synonyms(word, offensives=offensives)[0]) + return " ".join(_rephrase(output)) + + +# command line run +def _cmdline(): + from optparse import OptionParser + usage = "usage: %prog [options] word" + parser = OptionParser(usage=usage) + parser.add_option( + "-l", "--limit", + dest="limit", + help="maximum number of synonyms to retreive [default: %default]", + default=999, type="int", metavar="LIMIT" + ) + parser.add_option( + "-o", "--offensives", + dest="offensives", + help="show words marked as offensive [default: %default]", + default=False, action="store_true" + ) + parser.add_option( + "-s", "--sorting", + dest="sorting", + help="sort order of retreived synonyms (random, alpha, none) [default: %default]", + default="random", type="string", metavar="SORTING" + ) + + (options, args) = parser.parse_args() + using_stdin = not sys.stdin.isatty() + if len(args) != 1 and not using_stdin: + parser.error("word(s) to synonymise required") + + sorting = SORTING.NONE + if options.sorting == "random": + sorting = SORTING.RANDOM + elif options.sorting == "alpha": + sorting = SORTING.ALPHA + + if using_stdin: + print synonymise(sys.stdin.read(), options.offensives) + elif len(args[0].split(" ")) > 1: + print synonymise(args[0], options.offensives) + else: + synonyms = get_synonyms(args[0], options.limit, sorting, options.offensives) + for synonym in synonyms: + print synonym + +if __name__ == "__main__": + _cmdline() + |