3
0

3 Commits 5c313e3c25 ... 8f289bb37f

Autor SHA1 Mensagem Data
  donzito 8f289bb37f Chatbot training v0 há 2 anos atrás
  donzito 8ba8b2a988 Merge branch 'main' of http://git.mobees.com.br/MOBEES-BR/mobees-ai há 2 anos atrás
  donzito 11350328d6 Chatbot train with pills há 2 anos atrás

+ 8 - 3
.vscode/launch.json

@@ -10,10 +10,14 @@
       "request": "launch",
       "program": "${workspaceFolder}/manage.py",
       "args": [
-        "runserver"
+          "runserver",
+          "7003",
       ],
+      "env": {
+        // "ENV_NAME": "production"
+      },
       "django": true,
-      "justMyCode": true
+      "justMyCode" : false
     },
     {
       "name": "Command",
@@ -21,7 +25,8 @@
       "request": "launch",
       "program": "${workspaceFolder}/manage.py",
       "args": [
-          "trainChatbot"
+          "trainChatbot",
+          "Não recebi o pagamento ainda >>> Você deve ter 160 horas de exibição de anúncios nas cidades que a Mobees já está presente, até a data do Fechamento. Caso você tenha completado as 160 horas, o pagamento acontece sempre no dia 10 do mês seguinte à data do Fechamento do ciclo – até às 23:59."
       ],
       "env": {
           // "ENV_NAME": "production"

BIN
adm/__pycache__/constants.cpython-39.pyc


BIN
adm/__pycache__/storage.cpython-39.pyc


+ 2 - 0
adm/constants.py

@@ -28,6 +28,8 @@ class CTS():
     GCP_QUEUES_FST = settings.GCP_QUEUES_FST
     GCP_QUEUES_FUNC = settings.GCP_QUEUES_FUNC
     GCP_DOC_AI_PROCESSOR_ID = settings.GCP_DOC_AI_PROCESSOR_ID
+
+    CHATBOT_STORAGE_PATH = 'chatbot/'
     
     ADMIN_PROJECT_ID_GCP = "mobees-prd"
     ADMIN_TOPIC_GCP = "admin"

BIN
adm/management/commands/__pycache__/trainChatbot.cpython-39.pyc


+ 14 - 3
adm/management/commands/trainChatbot.py

@@ -1,9 +1,20 @@
 from django.core.management.base import BaseCommand, CommandError
-from chatbot.main import Chatbot
-
+from chatbot.main import instance as Chatbot
+from adm.constants import CTS
 
 class Command(BaseCommand):
+
+    def add_arguments(self, parser):
+        parser.add_argument('interaction', type=str, help='Chatbot interaction to train')
     
     def handle(self, *args, **options):
+        interaction = options['interaction']
+        interaction = interaction.split('>>>')
+        chat = {
+            'chat': ['Driver: ' + interaction[0]]
+        }
+        for content in interaction[1:]:
+            text = 'Support: '+ content
+            chat['chat'].append(text)
         
-        Chatbot().train()
+        Chatbot.train(chat)

BIN
adm/mysql/__pycache__/base.cpython-39.pyc


+ 5 - 0
adm/storage.py

@@ -67,3 +67,8 @@ class MediaStorage():
     
     def generate_filename(self,filename):
         return filename
+    
+    def list(self, prefix=None):
+        # Get blobs in specific subirectory
+        blobs = list(self.storage.bucket.list_blobs(prefix=prefix))
+        return blobs

BIN
chatbot/__pycache__/__init__.cpython-39.pyc


BIN
chatbot/__pycache__/main.cpython-39.pyc


BIN
chatbot/__pycache__/urls.cpython-39.pyc


BIN
chatbot/__pycache__/views.cpython-39.pyc


+ 130 - 33
chatbot/main.py

@@ -2,48 +2,145 @@
 import json
 import os
 import sys
+import logging
+from datetime import datetime, timedelta
+from io import BytesIO
+from django.utils.text import slugify
+
 import openai
-from llama_index import GPTVectorStoreIndex, download_loader
-# from llama_index.chat_engine import SimpleChatEngine
-from datetime import datetime
-from django.db import connection, models
-from django.db.models import Q
-import re
-
-from adm.constants import CTS as cts
+from llama_index import VectorStoreIndex, Document, StorageContext, load_index_from_storage
+from llama_index.prompts  import Prompt
+from langchain.prompts.chat import (ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate)
+
+
+from adm.constants import CTS
 from adm.storage import MediaStorage
 from adm.services import ParameterService
 
+logger = logging.getLogger('dsp')
+
 class Chatbot():
     def __init__(self) -> None:
         objParameter = ParameterService()
         openai.api_key = objParameter.getParameterByKey("OPENAI_API_KEY").value
         media_storage = MediaStorage()
-        self.bucket = media_storage.storage.bucket.name
-        self.storage = media_storage.storage
-        self.credentials = json.dumps(media_storage.credentials)
-        # self.docs_path = objParameter.getParameterByKey("CHATBOT_DOCS_PATH").value
-        self.docs_path = '/chatbot/docs'
-        self.endpoint = 'https://' + cts.GCP_ST_ACCESS_KEY_ID
+        self.media_storage = media_storage
+        self.storage_path = CTS.CHATBOT_STORAGE_PATH
+
+        # load index
+        self.ai_storage_context = None
+        self.ai_index = None
+        try:
+            self.ai_storage_context = StorageContext.from_defaults(persist_dir='./storage')
+            self.ai_index = load_index_from_storage(self.ai_storage_context)
+        except BaseException as error:
+            # exc_type, exc_obj, exc_tb = sys.exc_info()
+            # fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
+            # strErro = "ERRO: %s | %s | %s | %s" % (error, exc_type, fname, exc_tb.tb_lineno)
+            logger.info('Chatbot() init: '+str(error))
+            pass
         pass
 
-    def train (self):
-        OpendalGcsReader = download_loader("OpendalGcsReader")
+    def train (self, chat=None):
+        docs = []
+        if chat:
+            # use chat to build up on existing store
+            # save chat as doc in storage
+            if not 'id' in chat:
+                question = chat['chat'][0].split(':')[-1]
+                chat['id'] = slugify(question)
+            if not 'type' in chat:
+                chat['type'] = 'instructions'
+            if not 'category' in chat:
+                chat['category'] = 'uncategorized'
+
+            blob_name = chat['id']
+            blob_ext = '.json'
+            file_name = blob_name + blob_ext
+            blob_dir = 'instructions/'
+            blob_path = self.storage_path + blob_dir + file_name
+            text = json.dumps(chat, ensure_ascii=False)
+            
+            path_bucket = os.path.join(self.media_storage.location, '/', blob_path)
+            self.media_storage.save(path_bucket, BytesIO(text.encode("utf-8")))
+
+            docs.append({
+                'id_': blob_name,
+                'text': text,
+                'metadata': {
+                    'filename': file_name, 
+                    'category': chat['category']
+                }
+            })
+
+        else:
+            blob_dir = 'docs/'
+            blobs = self.media_storage.list(self.storage_path + blob_dir)
+            for blob in blobs:
+                text = blob.download_as_bytes().decode('utf-8')
+                if len(text)>0:
+                    obj = json.loads(text) if text.startswith('{') else text
+
+                    if not 'category' in obj:  
+                        obj['category'] = 'uncategorized'
+                    
+                    file_name = blob.name.split('/')[-1]
+
+                    docs.append({
+                        'id_': obj['id'],
+                        'text': text,
+                        'metadata': {
+                            'filename': file_name, 
+                            'category': obj['category']
+                        }
+                    })
+
+        # parse documents
+        ai_documents = []
+        for doc in docs:
+            ai_documents.append(Document(**doc))
+
+        # create/add to index
+        if self.ai_index is None:
+            self.ai_index = VectorStoreIndex.from_documents(ai_documents, show_progress=True)
+        else:
+            for ai_doc in ai_documents:
+                self.ai_index.insert(ai_doc)
         
-        loader = OpendalGcsReader(
-            bucket=self.bucket,
-            path=self.docs_path,
-            endpoint=self.endpoint,
-            credentials=self.credentials,
-        )
-        documents = loader.load_data()
-
-        # construct the index with the txt document
-        index = GPTVectorStoreIndex.from_documents(documents)
-
-        chat_engine = index.as_chat_engine(
-            chat_mode='condense_question', 
-            verbose=True
-        )
-
-        chat_engine.chat_repl()
+        # save index to disk
+        self.ai_index.storage_context.persist()
+
+    def chat (self, question=None):
+        status = 400
+
+        pre_prompt = [
+            SystemMessagePromptTemplate.from_template(
+                "Act as an IT Support Level 1 for the company Mobees. Your name is Mel. You must answer the company's Drivers questions in Brazilian Portuguese, as they only speak this language. Mobees Drivers access support through a chatbot interface in the company's app, which is the only channel for support. If you're not sure how to answer, respond with only the following `<assignee:support>` (this will let our system know it should forward the issue to our support team and take it from there). "
+            ),
+            HumanMessagePromptTemplate.from_template(
+                "Context information is below.\n"
+                "---------------------\n"
+                "{context_str}\n"
+                "---------------------\n"
+                "Given the context information and prior knowledge, "
+                "answer the question: {query_str}\n"
+            ),
+        ]
+        prompt_template = ChatPromptTemplate.from_messages(pre_prompt)
+        text_qa_template = Prompt.from_langchain_prompt(prompt_template)
+
+        answer = self.ai_index.as_query_engine(
+            text_qa_template=text_qa_template
+        ).query(question)
+        print(answer)
+
+        status = 200
+        data = { 'answer': answer.response }
+        return status, data
+
+        # chat_engine = self.ai_index.as_chat_engine(
+        #     verbose=True
+        # )
+        # chat_engine.chat_repl()
+
+instance = Chatbot()

+ 5 - 18
chatbot/urls.py

@@ -1,21 +1,8 @@
-"""mobeesia URL Configuration
-
-The `urlpatterns` list routes URLs to views. For more information please see:
-    https://docs.djangoproject.com/en/3.0/topics/http/urls/
-Examples:
-Function views
-    1. Add an import:  from my_app import views
-    2. Add a URL to urlpatterns:  path('', views.home, name='home')
-Class-based views
-    1. Add an import:  from other_app.views import Home
-    2. Add a URL to urlpatterns:  path('', Home.as_view(), name='home')
-Including another URLconf
-    1. Import the include() function: from django.urls import include, path
-    2. Add a URL to urlpatterns:  path('blog/', include('blog.urls'))
-"""
-from django.contrib import admin
 from django.urls import path
+from django.conf.urls import url
+from . import views
 
 urlpatterns = [
-    path('train', admin.site.urls),
-]
+    path('train', views.ChatbotView.as_view()),
+    path('chat', views.ChatbotView.as_view()),
+]

+ 35 - 1
chatbot/views.py

@@ -1,3 +1,37 @@
+from pickle import decode_long
 from django.shortcuts import render
+from rest_framework.views import APIView, Response
+from django.http import HttpResponse
+from chatbot.main import instance as Chatbot
+
+class ChatbotView (APIView):
+
+    # def get (self, request):
+    #     status, data = InsightsBdo().list(request)
+
+    #     return Response(status=status, data=data)
+
+    def post (self, request):
+        # get insight from a prompt
+        question = request.data.get('question')
+
+        status, data = Chatbot.chat(question)
+
+        return Response(status=status, data=data)
+    
+    # def put (self, request, id=None):
+    #     # set insight exec score
+    #     etlUtil = ETLData()
+    #     score = etlUtil.getRequestData(request, ['score'])
+
+    #     status, data = InsightsBdo().score(request, id, score)
+
+    #     return Response(status=status, data=data)
+    
+    # def delete (self, request, id=None):
+    #     # delete insight
+        
+    #     status, data = InsightsBdo().delete(request, id)
+
+    #     return Response(status=status, data=data)
 
-# Create your views here.

Diff do ficheiro suprimidas por serem muito extensas
+ 5918 - 0
content.json


BIN
mobeesia/__pycache__/urls.cpython-39.pyc


+ 100 - 0
mobeesia/mobeesia.log

@@ -0,0 +1,100 @@
+ERROR | 2023-07-14 15:45:03 | Internal Server Error: /chatbot/chat
+Traceback (most recent call last):
+  File "/usr/local/lib/python3.9/site-packages/django/core/handlers/exception.py", line 34, in inner
+    response = get_response(request)
+  File "/usr/local/lib/python3.9/site-packages/django/core/handlers/base.py", line 115, in _get_response
+    response = self.process_exception_by_middleware(e, request)
+  File "/usr/local/lib/python3.9/site-packages/django/core/handlers/base.py", line 113, in _get_response
+    response = wrapped_callback(request, *callback_args, **callback_kwargs)
+  File "/usr/local/lib/python3.9/site-packages/django/views/decorators/csrf.py", line 54, in wrapped_view
+    return view_func(*args, **kwargs)
+  File "/usr/local/lib/python3.9/site-packages/django/views/generic/base.py", line 71, in view
+    return self.dispatch(request, *args, **kwargs)
+  File "/usr/local/lib/python3.9/site-packages/rest_framework/views.py", line 505, in dispatch
+    response = self.handle_exception(exc)
+  File "/usr/local/lib/python3.9/site-packages/rest_framework/views.py", line 465, in handle_exception
+    self.raise_uncaught_exception(exc)
+  File "/usr/local/lib/python3.9/site-packages/rest_framework/views.py", line 476, in raise_uncaught_exception
+    raise exc
+  File "/usr/local/lib/python3.9/site-packages/rest_framework/views.py", line 502, in dispatch
+    response = handler(request, *args, **kwargs)
+  File "/Users/donzito/Documents/Projetos/mobees/dev/mobees-ai/chatbot/views.py", line 18, in post
+    status, data = Chatbot().chat(request, question)
+TypeError: 'Chatbot' object is not callable
+ERROR | 2023-07-14 15:47:59 | Internal Server Error: /chatbot/chat
+Traceback (most recent call last):
+  File "/usr/local/lib/python3.9/site-packages/django/core/handlers/exception.py", line 34, in inner
+    response = get_response(request)
+  File "/usr/local/lib/python3.9/site-packages/django/core/handlers/base.py", line 115, in _get_response
+    response = self.process_exception_by_middleware(e, request)
+  File "/usr/local/lib/python3.9/site-packages/django/core/handlers/base.py", line 113, in _get_response
+    response = wrapped_callback(request, *callback_args, **callback_kwargs)
+  File "/usr/local/lib/python3.9/site-packages/django/views/decorators/csrf.py", line 54, in wrapped_view
+    return view_func(*args, **kwargs)
+  File "/usr/local/lib/python3.9/site-packages/django/views/generic/base.py", line 71, in view
+    return self.dispatch(request, *args, **kwargs)
+  File "/usr/local/lib/python3.9/site-packages/rest_framework/views.py", line 505, in dispatch
+    response = self.handle_exception(exc)
+  File "/usr/local/lib/python3.9/site-packages/rest_framework/views.py", line 465, in handle_exception
+    self.raise_uncaught_exception(exc)
+  File "/usr/local/lib/python3.9/site-packages/rest_framework/views.py", line 476, in raise_uncaught_exception
+    raise exc
+  File "/usr/local/lib/python3.9/site-packages/rest_framework/views.py", line 502, in dispatch
+    response = handler(request, *args, **kwargs)
+  File "/Users/donzito/Documents/Projetos/mobees/dev/mobees-ai/chatbot/views.py", line 18, in post
+    status, data = Chatbot.chat(request, question)
+TypeError: chat() takes from 1 to 2 positional arguments but 3 were given
+ERROR | 2023-07-14 15:50:02 | Internal Server Error: /chatbot/chat
+Traceback (most recent call last):
+  File "/usr/local/lib/python3.9/site-packages/django/core/handlers/exception.py", line 34, in inner
+    response = get_response(request)
+  File "/usr/local/lib/python3.9/site-packages/django/core/handlers/base.py", line 145, in _get_response
+    response = self.process_exception_by_middleware(e, request)
+  File "/usr/local/lib/python3.9/site-packages/django/core/handlers/base.py", line 143, in _get_response
+    response = response.render()
+  File "/usr/local/lib/python3.9/site-packages/django/template/response.py", line 105, in render
+    self.content = self.rendered_content
+  File "/usr/local/lib/python3.9/site-packages/rest_framework/response.py", line 70, in rendered_content
+    ret = renderer.render(self.data, accepted_media_type, context)
+  File "/usr/local/lib/python3.9/site-packages/rest_framework/renderers.py", line 100, in render
+    ret = json.dumps(
+  File "/usr/local/lib/python3.9/site-packages/rest_framework/utils/json.py", line 25, in dumps
+    return json.dumps(*args, **kwargs)
+  File "/usr/local/Cellar/python@3.9/3.9.17_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/__init__.py", line 234, in dumps
+    return cls(
+  File "/usr/local/Cellar/python@3.9/3.9.17_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py", line 199, in encode
+    chunks = self.iterencode(o, _one_shot=True)
+  File "/usr/local/Cellar/python@3.9/3.9.17_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py", line 257, in iterencode
+    return _iterencode(o, 0)
+  File "/usr/local/lib/python3.9/site-packages/rest_framework/utils/encoders.py", line 67, in default
+    return super().default(obj)
+  File "/usr/local/Cellar/python@3.9/3.9.17_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py", line 179, in default
+    raise TypeError(f'Object of type {o.__class__.__name__} '
+TypeError: Object of type Response is not JSON serializable
+ERROR | 2023-07-14 15:56:00 | Internal Server Error: /chatbot/chat
+Traceback (most recent call last):
+  File "/usr/local/lib/python3.9/site-packages/django/core/handlers/exception.py", line 34, in inner
+    response = get_response(request)
+  File "/usr/local/lib/python3.9/site-packages/django/core/handlers/base.py", line 145, in _get_response
+    response = self.process_exception_by_middleware(e, request)
+  File "/usr/local/lib/python3.9/site-packages/django/core/handlers/base.py", line 143, in _get_response
+    response = response.render()
+  File "/usr/local/lib/python3.9/site-packages/django/template/response.py", line 105, in render
+    self.content = self.rendered_content
+  File "/usr/local/lib/python3.9/site-packages/rest_framework/response.py", line 70, in rendered_content
+    ret = renderer.render(self.data, accepted_media_type, context)
+  File "/usr/local/lib/python3.9/site-packages/rest_framework/renderers.py", line 100, in render
+    ret = json.dumps(
+  File "/usr/local/lib/python3.9/site-packages/rest_framework/utils/json.py", line 25, in dumps
+    return json.dumps(*args, **kwargs)
+  File "/usr/local/Cellar/python@3.9/3.9.17_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/__init__.py", line 234, in dumps
+    return cls(
+  File "/usr/local/Cellar/python@3.9/3.9.17_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py", line 199, in encode
+    chunks = self.iterencode(o, _one_shot=True)
+  File "/usr/local/Cellar/python@3.9/3.9.17_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py", line 257, in iterencode
+    return _iterencode(o, 0)
+  File "/usr/local/lib/python3.9/site-packages/rest_framework/utils/encoders.py", line 67, in default
+    return super().default(obj)
+  File "/usr/local/Cellar/python@3.9/3.9.17_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py", line 179, in default
+    raise TypeError(f'Object of type {o.__class__.__name__} '
+TypeError: Object of type Response is not JSON serializable

BIN
mobeesia/settings/__pycache__/default.cpython-39.pyc


+ 2 - 3
mobeesia/urls.py

@@ -1,7 +1,6 @@
 
-from django.conf.urls import url
-from django.conf.urls import include
+from django.urls import include, path
 
 urlpatterns = [
-    url(r'^', include('chatbot.urls')),
+    path('chatbot/', include('chatbot.urls')),
 ]

+ 1 - 1
requirements.txt

@@ -30,6 +30,6 @@ google-auth-httplib2
 google-auth-oauthlib
 googleapis-common-protos
 google-cloud-storage
-llama-index==0.7.4
+llama-index==0.7.8
 llama_hub
 openai

Diff do ficheiro suprimidas por serem muito extensas
+ 0 - 0
storage/docstore.json


+ 1 - 0
storage/graph_store.json

@@ -0,0 +1 @@
+{"graph_dict": {}}

+ 1 - 0
storage/index_store.json

@@ -0,0 +1 @@
+{"index_store/data": {"84d83981-5f1c-436e-980e-cb4d2b5bfc2f": {"__type__": "vector_store", "__data__": "{\"index_id\": \"84d83981-5f1c-436e-980e-cb4d2b5bfc2f\", \"summary\": null, \"nodes_dict\": {\"05521fae-9547-47df-91f0-7728692e5ba2\": \"05521fae-9547-47df-91f0-7728692e5ba2\", \"24d8c798-5015-4e04-b07d-7e910256f3b9\": \"24d8c798-5015-4e04-b07d-7e910256f3b9\", \"5537300b-8c48-4203-8273-a68153d1403e\": \"5537300b-8c48-4203-8273-a68153d1403e\", \"6f47f1de-ca31-44df-bcd8-8dc87ba58de2\": \"6f47f1de-ca31-44df-bcd8-8dc87ba58de2\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}

Diff do ficheiro suprimidas por serem muito extensas
+ 0 - 0
storage/vector_store.json


Alguns ficheiros não foram mostrados porque muitos ficheiros mudaram neste diff