3
0

2 Commitit 5c313e3c25 ... 8ba8b2a988

Tekijä SHA1 Viesti Päivämäärä
  donzito 8ba8b2a988 Merge branch 'main' of http://git.mobees.com.br/MOBEES-BR/mobees-ai 2 vuotta sitten
  donzito 11350328d6 Chatbot train with pills 2 vuotta sitten
5 muutettua tiedostoa jossa 103 lisäystä ja 29 poistoa
  1. 2 1
      .vscode/launch.json
  2. 12 1
      adm/management/commands/trainChatbot.py
  3. 5 0
      adm/storage.py
  4. 83 26
      chatbot/main.py
  5. 1 1
      requirements.txt

+ 2 - 1
.vscode/launch.json

@@ -21,7 +21,8 @@
       "request": "launch",
       "program": "${workspaceFolder}/manage.py",
       "args": [
-          "trainChatbot"
+          "trainChatbot",
+          "Olá >>> Olá {{name}}! Eu sou a Mel, bot da operação Mobees. Prazer! Fui criada para ajudar no relacionamento e manutenção dos equipamentos, e deixar nossa Parceria em fina sintonia! 😉"
       ],
       "env": {
           // "ENV_NAME": "production"

+ 12 - 1
adm/management/commands/trainChatbot.py

@@ -3,7 +3,18 @@ from chatbot.main import Chatbot
 
 
 class Command(BaseCommand):
+
+    def add_arguments(self, parser):
+        parser.add_argument('interaction', type=str, help='Chatbot interaction to train')
     
     def handle(self, *args, **options):
+        interaction = options['interaction']
+        interaction = interaction.split('>>>')
+        chat = {
+            'chat': ['User: ' + interaction[0]]
+        }
+        for content in interaction[1:]:
+            text = 'Support: '+ content
+            chat['chat'].append(text)
         
-        Chatbot().train()
+        Chatbot().train(chat)

+ 5 - 0
adm/storage.py

@@ -67,3 +67,8 @@ class MediaStorage():
     
     def generate_filename(self,filename):
         return filename
+    
+    def list(self, prefix=None):
+        # Get blobs in specific subirectory
+        blobs = list(self.storage.bucket.list_blobs(prefix=prefix))
+        return blobs

+ 83 - 26
chatbot/main.py

@@ -2,47 +2,104 @@
 import json
 import os
 import sys
+import logging
+from datetime import datetime, timedelta
+from io import BytesIO
+
 import openai
-from llama_index import GPTVectorStoreIndex, download_loader
-# from llama_index.chat_engine import SimpleChatEngine
-from datetime import datetime
-from django.db import connection, models
-from django.db.models import Q
-import re
+from llama_index import VectorStoreIndex, Document, StorageContext, load_index_from_storage
 
 from adm.constants import CTS as cts
 from adm.storage import MediaStorage
 from adm.services import ParameterService
 
+logger = logging.getLogger('dsp')
+
 class Chatbot():
     def __init__(self) -> None:
         objParameter = ParameterService()
         openai.api_key = objParameter.getParameterByKey("OPENAI_API_KEY").value
         media_storage = MediaStorage()
-        self.bucket = media_storage.storage.bucket.name
-        self.storage = media_storage.storage
-        self.credentials = json.dumps(media_storage.credentials)
-        # self.docs_path = objParameter.getParameterByKey("CHATBOT_DOCS_PATH").value
-        self.docs_path = '/chatbot/docs'
-        self.endpoint = 'https://' + cts.GCP_ST_ACCESS_KEY_ID
+        self.media_storage = media_storage
+        self.docs_path = 'chatbot/docs/'
+
+        # load index
+        self.ai_storage_context = None
+        self.ai_index = None
+        try:
+            self.ai_storage_context = StorageContext.from_defaults(persist_dir='./storage')
+            self.ai_index = load_index_from_storage(self.ai_storage_context)
+        except BaseException as error:
+            # exc_type, exc_obj, exc_tb = sys.exc_info()
+            # fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
+            # strErro = "ERRO: %s | %s | %s | %s" % (error, exc_type, fname, exc_tb.tb_lineno)
+            logger.info('Chatbot() init: '+error)
+            pass
         pass
 
-    def train (self):
-        OpendalGcsReader = download_loader("OpendalGcsReader")
-        
-        loader = OpendalGcsReader(
-            bucket=self.bucket,
-            path=self.docs_path,
-            endpoint=self.endpoint,
-            credentials=self.credentials,
-        )
-        documents = loader.load_data()
+    def train (self, chat=None):
+        docs = []
+        if chat:
+            # use chat to build up on existing store
+            # save chat as doc in storage
+            if not 'id' in chat:
+                chat['id'] = datetime.utcnow().replace(microsecond=0).isoformat()
+            if not 'type' in chat:
+                chat['type'] = 'chat'
+            if not 'category' in chat:
+                chat['category'] = 'uncategorized'
+
+            blob_name = self.docs_path + 'chat-' + chat['id'] + '.json'
+            text = json.dumps(chat, ensure_ascii=False)
+            
+            path_bucket = os.path.join(self.media_storage.location, '/', blob_name)
+            self.media_storage.save(path_bucket, BytesIO(text.encode("utf-8")))
 
-        # construct the index with the txt document
-        index = GPTVectorStoreIndex.from_documents(documents)
+            docs.append({
+                'id_': chat['id'],
+                'text': text,
+                'metadata': {
+                    'filename': blob_name, 
+                    'category': chat['category']
+                }
+            })
+
+        else:
+            blobs = self.media_storage.list(self.docs_path)
+            for blob in blobs:
+                text = blob.download_as_bytes().decode('utf-8')
+                if len(text)>0:
+                    obj = json.loads(text) if text.startswith('{') else text
+
+                    if not 'category' in obj:  
+                        obj['category'] = 'uncategorized'
+                    
+                    docs.append({
+                        'id_': obj['id'],
+                        'text': text,
+                        'metadata': {
+                            'filename': blob.name, 
+                            'category': obj['category']
+                        }
+                    })
+
+        # parse documents
+        ai_documents = []
+        for doc in docs:
+            ai_documents.append(Document(**doc))
+
+        # create/add to index
+        if self.ai_index is None:
+            self.ai_index = VectorStoreIndex.from_documents(ai_documents, show_progress=True)
+        else:
+            for ai_doc in ai_documents:
+                self.ai_index.insert(ai_doc)
+        
+        # save index to disk
+        self.ai_index.storage_context.persist()
 
-        chat_engine = index.as_chat_engine(
-            chat_mode='condense_question', 
+        chat_engine = self.ai_index.as_chat_engine(
+            chat_mode='react', 
             verbose=True
         )
 

+ 1 - 1
requirements.txt

@@ -30,6 +30,6 @@ google-auth-httplib2
 google-auth-oauthlib
 googleapis-common-protos
 google-cloud-storage
-llama-index==0.7.4
+llama-index==0.7.6
 llama_hub
 openai