import json
import os
import random
import time
from tqdm import tqdm
import pickle
from scipy.spatial.distance import cosine
from flair.data import Sentence
from flair.embeddings import WordEmbeddings, DocumentPoolEmbeddings
embeddings = DocumentPoolEmbeddings([WordEmbeddings('en')],pooling='mean',)
class chatbot:
@staticmethod
def prepare_embeddings(input_file,output_file):
global embeddings
embedded_intent_dict = {}
with open(input_file) as file:
intent_dict = json.load(file)
for intent,examples in tqdm(intent_dict.items()):
embedded_intent_dict[intent] = []
for example in examples:
sentence = Sentence(example)
embeddings.embed(sentence)
embedded_intent_dict[intent].append(sentence.embedding.detach().numpy())
if not os.path.exists(os.path.dirname(output_file)):
os.makedirs(os.path.dirname(output_file))
pickle.dump(embedded_intent_dict,open( output_file, "wb+"))
@staticmethod
def answer(message,embeddings_file,anwsers_file, verbose = False):
start = time.time()
global embeddings
with open(embeddings_file, 'rb') as file:
embedded_dict = pickle.load(file)
# if(verbose== True):
# print("embedding loaded successfully")
message_sentence = Sentence(message)
embeddings.embed(message_sentence)
message_vector = message_sentence.embedding.detach().numpy()
# if(verbose== True):
# print("message encoded successfully")
best_intent = ""
best_score = 1
for intent, examples in embedded_dict.items():
for example in examples:
score = cosine(message_vector, example)
if(score<best_score):
best_score = score
best_intent = intent
if(verbose== True):
print(f"best inent: {best_intent}")
with open(anwsers_file) as file:
anwsers_dict = json.load(file)
if(best_intent in anwsers_dict):
if(verbose == True):
print(f'answer time: {time.time()-start}')
return random.choice(anwsers_dict[best_intent])
else:
return "Error intent not in dict"
if __name__ == "__main__":
while True:
input_message = input("Wiadomość: ")
print(f"Bot:{chatbot.answer(input_message,embeddings_file='embedded_intents/test1.pkl',anwsers_file='answers/test1.json')}")