Commit 8023cfeb authored by Bianco Martinez, Julian's avatar Bianco Martinez, Julian
Browse files

Merge branch 'EBJ_string_dist' into 'master'

Adding restricted Damerau-Levenshtein distance to the code.

See merge request !3
parents fa62bdf5 3571451d
import gensim.downloader as api
import stringdist as sdi
class AssociatedWords:
def __init__(self):
self.N_RESULTS = 7
self.N_RESULTS = 10
print("\n Word2Vec model is loading.This can take a couple of minutes.")
self.model = api.load('glove-twitter-200')
print("\n Word2Vec model is ready. Enjoy!!!")
......@@ -25,10 +25,12 @@ class AssociatedWords:
self.filtered_results = [result_tuple for result_tuple in self.gensim_result if self.passes_filter(result_tuple[0])]
self.words = [result_tuple[0] for result_tuple in self.filtered_results]
self.scores = [result_tuple[0] for result_tuple in self.filtered_results]
def passes_filter(self, word):
if 'www' in word or \
word in self.base_word:
word in self.base_word or \
sdi.rdlevenshtein_norm(word, self.base_word) < 0.5:
return False
return True
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment