| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- import json
- import os
- import sys
- import logging
- from datetime import datetime, timedelta
- from io import BytesIO
- import openai
- from llama_index import VectorStoreIndex, Document, StorageContext, load_index_from_storage
- from adm.constants import CTS as cts
- from adm.storage import MediaStorage
- from adm.services import ParameterService
- logger = logging.getLogger('dsp')
- class Chatbot():
- def __init__(self) -> None:
- objParameter = ParameterService()
- openai.api_key = objParameter.getParameterByKey("OPENAI_API_KEY").value
- media_storage = MediaStorage()
- self.media_storage = media_storage
- self.docs_path = 'chatbot/docs/'
- # load index
- self.ai_storage_context = None
- self.ai_index = None
- try:
- self.ai_storage_context = StorageContext.from_defaults(persist_dir='./storage')
- self.ai_index = load_index_from_storage(self.ai_storage_context)
- except BaseException as error:
- # exc_type, exc_obj, exc_tb = sys.exc_info()
- # fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
- # strErro = "ERRO: %s | %s | %s | %s" % (error, exc_type, fname, exc_tb.tb_lineno)
- logger.info('Chatbot() init: '+error)
- pass
- pass
- def train (self, chat=None):
- docs = []
- if chat:
- # use chat to build up on existing store
- # save chat as doc in storage
- if not 'id' in chat:
- chat['id'] = datetime.utcnow().replace(microsecond=0).isoformat()
- if not 'type' in chat:
- chat['type'] = 'chat'
- if not 'category' in chat:
- chat['category'] = 'uncategorized'
- blob_name = self.docs_path + 'chat-' + chat['id'] + '.json'
- text = json.dumps(chat, ensure_ascii=False)
-
- path_bucket = os.path.join(self.media_storage.location, '/', blob_name)
- self.media_storage.save(path_bucket, BytesIO(text.encode("utf-8")))
- docs.append({
- 'id_': chat['id'],
- 'text': text,
- 'metadata': {
- 'filename': blob_name,
- 'category': chat['category']
- }
- })
- else:
- blobs = self.media_storage.list(self.docs_path)
- for blob in blobs:
- text = blob.download_as_bytes().decode('utf-8')
- if len(text)>0:
- obj = json.loads(text) if text.startswith('{') else text
- if not 'category' in obj:
- obj['category'] = 'uncategorized'
-
- docs.append({
- 'id_': obj['id'],
- 'text': text,
- 'metadata': {
- 'filename': blob.name,
- 'category': obj['category']
- }
- })
- # parse documents
- ai_documents = []
- for doc in docs:
- ai_documents.append(Document(**doc))
- # create/add to index
- if self.ai_index is None:
- self.ai_index = VectorStoreIndex.from_documents(ai_documents, show_progress=True)
- else:
- for ai_doc in ai_documents:
- self.ai_index.insert(ai_doc)
-
- # save index to disk
- self.ai_index.storage_context.persist()
- chat_engine = self.ai_index.as_chat_engine(
- chat_mode='react',
- verbose=True
- )
- chat_engine.chat_repl()
|