import json import os import sys import logging from datetime import datetime, timedelta from io import BytesIO import openai from llama_index import VectorStoreIndex, Document, StorageContext, load_index_from_storage from adm.constants import CTS as cts from adm.storage import MediaStorage from adm.services import ParameterService logger = logging.getLogger('dsp') class Chatbot(): def __init__(self) -> None: objParameter = ParameterService() openai.api_key = objParameter.getParameterByKey("OPENAI_API_KEY").value media_storage = MediaStorage() self.media_storage = media_storage self.docs_path = 'chatbot/docs/' # load index self.ai_storage_context = None self.ai_index = None try: self.ai_storage_context = StorageContext.from_defaults(persist_dir='./storage') self.ai_index = load_index_from_storage(self.ai_storage_context) except BaseException as error: # exc_type, exc_obj, exc_tb = sys.exc_info() # fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] # strErro = "ERRO: %s | %s | %s | %s" % (error, exc_type, fname, exc_tb.tb_lineno) logger.info('Chatbot() init: '+error) pass pass def train (self, chat=None): docs = [] if chat: # use chat to build up on existing store # save chat as doc in storage if not 'id' in chat: chat['id'] = datetime.utcnow().replace(microsecond=0).isoformat() if not 'type' in chat: chat['type'] = 'chat' if not 'category' in chat: chat['category'] = 'uncategorized' blob_name = self.docs_path + 'chat-' + chat['id'] + '.json' text = json.dumps(chat, ensure_ascii=False) path_bucket = os.path.join(self.media_storage.location, '/', blob_name) self.media_storage.save(path_bucket, BytesIO(text.encode("utf-8"))) docs.append({ 'id_': chat['id'], 'text': text, 'metadata': { 'filename': blob_name, 'category': chat['category'] } }) else: blobs = self.media_storage.list(self.docs_path) for blob in blobs: text = blob.download_as_bytes().decode('utf-8') if len(text)>0: obj = json.loads(text) if text.startswith('{') else text if not 'category' in obj: obj['category'] = 'uncategorized' docs.append({ 'id_': obj['id'], 'text': text, 'metadata': { 'filename': blob.name, 'category': obj['category'] } }) # parse documents ai_documents = [] for doc in docs: ai_documents.append(Document(**doc)) # create/add to index if self.ai_index is None: self.ai_index = VectorStoreIndex.from_documents(ai_documents, show_progress=True) else: for ai_doc in ai_documents: self.ai_index.insert(ai_doc) # save index to disk self.ai_index.storage_context.persist() chat_engine = self.ai_index.as_chat_engine( chat_mode='react', verbose=True ) chat_engine.chat_repl()