diff options
author | Richard <q@1bpm.net> | 2021-03-14 15:55:50 +0000 |
---|---|---|
committer | Richard <q@1bpm.net> | 2021-03-14 15:55:50 +0000 |
commit | ccc8a42c42985b3227c2d9f3a27f967dcaa7d70e (patch) | |
tree | 4864e5a5fabed8d5883c13ac469fcf1d00b6bb04 /synonymiser | |
download | synonymiser-ccc8a42c42985b3227c2d9f3a27f967dcaa7d70e.tar.gz synonymiser-ccc8a42c42985b3227c2d9f3a27f967dcaa7d70e.tar.bz2 synonymiser-ccc8a42c42985b3227c2d9f3a27f967dcaa7d70e.zip |
Diffstat (limited to 'synonymiser')
-rw-r--r-- | synonymiser/.htaccess | 7 | ||||
-rw-r--r-- | synonymiser/config.dist.py | 5 | ||||
-rw-r--r-- | synonymiser/db.py | 34 | ||||
-rw-r--r-- | synonymiser/synonymiser.py | 127 |
4 files changed, 173 insertions, 0 deletions
diff --git a/synonymiser/.htaccess b/synonymiser/.htaccess new file mode 100644 index 0000000..1127c93 --- /dev/null +++ b/synonymiser/.htaccess @@ -0,0 +1,7 @@ +Options +ExecCGI +AddHandler cgi-script .py +<Files /app/main.py> + Options +ExecCGI + AddHandler cgi-script .py + Require all granted +</Files> diff --git a/synonymiser/config.dist.py b/synonymiser/config.dist.py new file mode 100644 index 0000000..c6434ee --- /dev/null +++ b/synonymiser/config.dist.py @@ -0,0 +1,5 @@ +db_host = "" +db_name = "" +db_user = "" +db_password = "" + diff --git a/synonymiser/db.py b/synonymiser/db.py new file mode 100644 index 0000000..e8c830a --- /dev/null +++ b/synonymiser/db.py @@ -0,0 +1,34 @@ +""" + db.py + copyright 2021 Richard Knight +""" +from peewee import * +from playhouse.postgres_ext import * +import config + +_db = PostgresqlExtDatabase( + config.db_name, + **{ + "host": config.db_host, + "user": config.db_user, + "password": config.db_password + } +) + + +class BaseModel(Model): + class Meta: + database = _db + + +class Word(BaseModel): + word = TextField() + offensive = BooleanField() + class Meta: + db_table = "word" + + +class Synonym(BaseModel): + base = ForeignKeyField(Word, related_name="base_word") + synonym = ForeignKeyField(Word, related_name="syn_word") + diff --git a/synonymiser/synonymiser.py b/synonymiser/synonymiser.py new file mode 100644 index 0000000..14951a7 --- /dev/null +++ b/synonymiser/synonymiser.py @@ -0,0 +1,127 @@ +""" + synonymiser.py + copyright 2021 Richard Knight +""" +from db import Word, Synonym +from peewee import fn +import sys + + +# vowels for the changing of a/an +vowels = ["a", "e", "i", "o", "u"]; + + +# recapitalise new_word as to that of word +def _recapitalise(word, new_word): + if word[0].upper() == word[0]: + if len(word) > 1 and word[1].upper() == word[1]: + return new_word.upper() + else: + return new_word.capitalize() + else: + return new_word + + +# sorting types +class SORTING: + RANDOM=0 + ALPHA=2 + NONE=3 + + +# get synonyms +def get_synonyms(word, limit=1, sorting=SORTING.RANDOM, offensives=False): + base_word = Word.select().where(fn.LOWER(Word.word) == word.lower()) + if not base_word.exists(): + return [word] + query = Synonym.select(Synonym, Word).join( + Word, on=(Synonym.synonym==Word.id) + ).where( + (Synonym.base == base_word.get()) + & ((Word.offensive == False) if offensives else True) + ) + + if sorting == SORTING.RANDOM: + query = query.order_by(fn.RANDOM()) + elif sorting == SORTING.ALPHA: + query = query.order_by(Word.word) + + query = query.limit(limit) + if not query.exists(): + return [word] + return [_recapitalise(word, q.synonym.word) for q in query] + + +# rephrase a/an prepends +def _rephrase(line): + new = list() + for i, v in enumerate(line): + nv = v + if i+1 < len(line): + if v == "a" and line[i+1][0].lower() in vowels: + nv = "an" + if v == "an" and line[i+1][0].lower() not in vowels: + nv = "a" + new.append(nv) + return new + + +# synonymise a line randomly +def synonymise(line, offensives=False): + output = list() + words = line.split(" ") + for word in words: + if len(word) < 4: + output.append(word) + else: + output.append(get_synonyms(word, offensives=offensives)[0]) + return " ".join(_rephrase(output)) + + +# command line run +def _cmdline(): + from optparse import OptionParser + usage = "usage: %prog [options] word" + parser = OptionParser(usage=usage) + parser.add_option( + "-l", "--limit", + dest="limit", + help="maximum number of synonyms to retreive [default: %default]", + default=999, type="int", metavar="LIMIT" + ) + parser.add_option( + "-o", "--offensives", + dest="offensives", + help="show words marked as offensive [default: %default]", + default=False, action="store_true" + ) + parser.add_option( + "-s", "--sorting", + dest="sorting", + help="sort order of retreived synonyms (random, alpha, none) [default: %default]", + default="random", type="string", metavar="SORTING" + ) + + (options, args) = parser.parse_args() + using_stdin = not sys.stdin.isatty() + if len(args) != 1 and not using_stdin: + parser.error("word(s) to synonymise required") + + sorting = SORTING.NONE + if options.sorting == "random": + sorting = SORTING.RANDOM + elif options.sorting == "alpha": + sorting = SORTING.ALPHA + + if using_stdin: + print synonymise(sys.stdin.read(), options.offensives) + elif len(args[0].split(" ")) > 1: + print synonymise(args[0], options.offensives) + else: + synonyms = get_synonyms(args[0], options.limit, sorting, options.offensives) + for synonym in synonyms: + print synonym + +if __name__ == "__main__": + _cmdline() + |