Commit c181a7ba authored by Jim Hoekstra's avatar Jim Hoekstra 👋🏻
Browse files

merge with master

parents fc410b18 8023cfeb
import gensim.downloader as api import gensim.downloader as api
import stringdist as sdi
class AssociatedWords: class AssociatedWords:
def __init__(self): def __init__(self):
self.N_RESULTS = 7 self.N_RESULTS = 10
print('loading model') print("\n Word2Vec model is loading.This can take a couple of minutes.")
self.model = api.load('glove-twitter-200') self.model = api.load('glove-twitter-200')
print('done loading model') print("\n Word2Vec model is ready. Enjoy!!!")
self.base_word = None self.base_word = None
self.gensim_result = None self.gensim_result = None
...@@ -24,10 +25,12 @@ class AssociatedWords: ...@@ -24,10 +25,12 @@ class AssociatedWords:
self.filtered_results = [result_tuple for result_tuple in self.gensim_result if self.passes_filter(result_tuple[0])] self.filtered_results = [result_tuple for result_tuple in self.gensim_result if self.passes_filter(result_tuple[0])]
self.words = [result_tuple[0] for result_tuple in self.filtered_results] self.words = [result_tuple[0] for result_tuple in self.filtered_results]
self.scores = [result_tuple[0] for result_tuple in self.filtered_results] self.scores = [result_tuple[0] for result_tuple in self.filtered_results]
def passes_filter(self, word): def passes_filter(self, word):
if 'www' in word or \ if 'www' in word or \
word in self.base_word: word in self.base_word or \
sdi.rdlevenshtein_norm(word, self.base_word) < 0.5:
return False return False
else: else:
return True return True
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment