main.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. import json
  2. import os
  3. import sys
  4. import logging
  5. from datetime import datetime, timedelta
  6. from io import BytesIO
  7. import openai
  8. from llama_index import VectorStoreIndex, Document, StorageContext, load_index_from_storage
  9. from adm.constants import CTS as cts
  10. from adm.storage import MediaStorage
  11. from adm.services import ParameterService
  12. logger = logging.getLogger('dsp')
  13. class Chatbot():
  14. def __init__(self) -> None:
  15. objParameter = ParameterService()
  16. openai.api_key = objParameter.getParameterByKey("OPENAI_API_KEY").value
  17. media_storage = MediaStorage()
  18. self.media_storage = media_storage
  19. self.docs_path = 'chatbot/docs/'
  20. # load index
  21. self.ai_storage_context = None
  22. self.ai_index = None
  23. try:
  24. self.ai_storage_context = StorageContext.from_defaults(persist_dir='./storage')
  25. self.ai_index = load_index_from_storage(self.ai_storage_context)
  26. except BaseException as error:
  27. # exc_type, exc_obj, exc_tb = sys.exc_info()
  28. # fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
  29. # strErro = "ERRO: %s | %s | %s | %s" % (error, exc_type, fname, exc_tb.tb_lineno)
  30. logger.info('Chatbot() init: '+error)
  31. pass
  32. pass
  33. def train (self, chat=None):
  34. docs = []
  35. if chat:
  36. # use chat to build up on existing store
  37. # save chat as doc in storage
  38. if not 'id' in chat:
  39. chat['id'] = datetime.utcnow().replace(microsecond=0).isoformat()
  40. if not 'type' in chat:
  41. chat['type'] = 'chat'
  42. if not 'category' in chat:
  43. chat['category'] = 'uncategorized'
  44. blob_name = self.docs_path + 'chat-' + chat['id'] + '.json'
  45. text = json.dumps(chat, ensure_ascii=False)
  46. path_bucket = os.path.join(self.media_storage.location, '/', blob_name)
  47. self.media_storage.save(path_bucket, BytesIO(text.encode("utf-8")))
  48. docs.append({
  49. 'id_': chat['id'],
  50. 'text': text,
  51. 'metadata': {
  52. 'filename': blob_name,
  53. 'category': chat['category']
  54. }
  55. })
  56. else:
  57. blobs = self.media_storage.list(self.docs_path)
  58. for blob in blobs:
  59. text = blob.download_as_bytes().decode('utf-8')
  60. if len(text)>0:
  61. obj = json.loads(text) if text.startswith('{') else text
  62. if not 'category' in obj:
  63. obj['category'] = 'uncategorized'
  64. docs.append({
  65. 'id_': obj['id'],
  66. 'text': text,
  67. 'metadata': {
  68. 'filename': blob.name,
  69. 'category': obj['category']
  70. }
  71. })
  72. # parse documents
  73. ai_documents = []
  74. for doc in docs:
  75. ai_documents.append(Document(**doc))
  76. # create/add to index
  77. if self.ai_index is None:
  78. self.ai_index = VectorStoreIndex.from_documents(ai_documents, show_progress=True)
  79. else:
  80. for ai_doc in ai_documents:
  81. self.ai_index.insert(ai_doc)
  82. # save index to disk
  83. self.ai_index.storage_context.persist()
  84. chat_engine = self.ai_index.as_chat_engine(
  85. chat_mode='react',
  86. verbose=True
  87. )
  88. chat_engine.chat_repl()